|
CUB
|
#include <iterator>#include "../ns_wrapper.cuh"#include "../macro_utils.cuh"#include "../thread/thread_load.cuh"#include "../type_utils.cuh"#include "../vector_type.cuh"#include "block_exchange.cuh"Classes | |
| class | cub::BlockLoad< InputIterator, BLOCK_THREADS, ITEMS_PER_THREAD, POLICY, MODIFIER > |
BlockLoad provides data movement operations for reading block-arranged data from global memory.
. | |
Namespaces | |
| namespace | cub |
| CUB namespace. | |
Enumerations | |
| enum | cub::BlockLoadPolicy { cub::BLOCK_LOAD_DIRECT, cub::BLOCK_LOAD_VECTORIZE, cub::BLOCK_LOAD_TRANSPOSE } |
| Tuning policy for cub::BlockLoad. More... | |
Functions | |
Direct threadblock loads (blocked arrangement) | |
| template<PtxLoadModifier MODIFIER, typename T , int ITEMS_PER_THREAD, typename InputIterator > | |
| __device__ __forceinline__ void | cub::BlockLoadDirect (InputIterator block_itr, T(&items)[ITEMS_PER_THREAD]) |
| Load a tile of items across a threadblock directly using the specified cache modifier. More... | |
| template<typename T , int ITEMS_PER_THREAD, typename InputIterator > | |
| __device__ __forceinline__ void | cub::BlockLoadDirect (InputIterator block_itr, T(&items)[ITEMS_PER_THREAD]) |
| Load a tile of items across a threadblock directly. More... | |
| template<PtxLoadModifier MODIFIER, typename T , int ITEMS_PER_THREAD, typename InputIterator , typename SizeT > | |
| __device__ __forceinline__ void | cub::BlockLoadDirect (InputIterator block_itr, const SizeT &guarded_items, T(&items)[ITEMS_PER_THREAD]) |
| Load a tile of items across a threadblock directly using the specified cache modifier, guarded by range. More... | |
| template<typename T , int ITEMS_PER_THREAD, typename InputIterator , typename SizeT > | |
| __device__ __forceinline__ void | cub::BlockLoadDirect (InputIterator block_itr, const SizeT &guarded_items, T(&items)[ITEMS_PER_THREAD]) |
| Load a tile of items across a threadblock directly, guarded by range. More... | |
| template<PtxLoadModifier MODIFIER, typename T , int ITEMS_PER_THREAD, typename InputIterator , typename SizeT > | |
| __device__ __forceinline__ void | cub::BlockLoadDirect (InputIterator block_itr, const SizeT &guarded_items, T oob_default, T(&items)[ITEMS_PER_THREAD]) |
| Load a tile of items across a threadblock directly using the specified cache modifier, guarded by range, with assignment for out-of-bound elements. More... | |
| template<typename T , int ITEMS_PER_THREAD, typename InputIterator , typename SizeT > | |
| __device__ __forceinline__ void | cub::BlockLoadDirect (InputIterator block_itr, const SizeT &guarded_items, T oob_default, T(&items)[ITEMS_PER_THREAD]) |
| Load a tile of items across a threadblock directly, guarded by range, with assignment for out-of-bound elements. More... | |
Direct threadblock loads (striped arrangement) | |
| template<PtxLoadModifier MODIFIER, typename T , int ITEMS_PER_THREAD, typename InputIterator > | |
| __device__ __forceinline__ void | cub::BlockLoadDirectStriped (InputIterator block_itr, T(&items)[ITEMS_PER_THREAD], int stride=blockDim.x) |
| Load striped tile directly using the specified cache modifier. More... | |
| template<typename T , int ITEMS_PER_THREAD, typename InputIterator > | |
| __device__ __forceinline__ void | cub::BlockLoadDirectStriped (InputIterator block_itr, T(&items)[ITEMS_PER_THREAD], int stride=blockDim.x) |
| Load striped tile directly. More... | |
| template<PtxLoadModifier MODIFIER, typename T , int ITEMS_PER_THREAD, typename InputIterator , typename SizeT > | |
| __device__ __forceinline__ void | cub::BlockLoadDirectStriped (InputIterator block_itr, const SizeT &guarded_items, T(&items)[ITEMS_PER_THREAD], int stride=blockDim.x) |
| Load striped directly tile using the specified cache modifier, guarded by range. More... | |
| template<typename T , int ITEMS_PER_THREAD, typename InputIterator , typename SizeT > | |
| __device__ __forceinline__ void | cub::BlockLoadDirectStriped (InputIterator block_itr, const SizeT &guarded_items, T(&items)[ITEMS_PER_THREAD], int stride=blockDim.x) |
| Load striped tile directly, guarded by range. More... | |
| template<PtxLoadModifier MODIFIER, typename T , int ITEMS_PER_THREAD, typename InputIterator , typename SizeT > | |
| __device__ __forceinline__ void | cub::BlockLoadDirectStriped (InputIterator block_itr, const SizeT &guarded_items, T oob_default, T(&items)[ITEMS_PER_THREAD], int stride=blockDim.x) |
| Load striped directly tile using the specified cache modifier, guarded by range, with assignment for out-of-bound elements. More... | |
| template<typename T , int ITEMS_PER_THREAD, typename InputIterator , typename SizeT > | |
| __device__ __forceinline__ void | cub::BlockLoadDirectStriped (InputIterator block_itr, const SizeT &guarded_items, T oob_default, T(&items)[ITEMS_PER_THREAD], int stride=blockDim.x) |
| Load striped tile directly, guarded by range, with assignment for out-of-bound elements. More... | |
Threadblock vectorized loads (blocked arrangement) | |
| template<PtxLoadModifier MODIFIER, typename T , int ITEMS_PER_THREAD> | |
| __device__ __forceinline__ void | cub::BlockLoadVectorized (T *block_ptr, T(&items)[ITEMS_PER_THREAD]) |
| Load a tile of items across a threadblock directly using the specified cache modifier. More... | |
| template<typename T , int ITEMS_PER_THREAD> | |
| __device__ __forceinline__ void | cub::BlockLoadVectorized (T *block_ptr, T(&items)[ITEMS_PER_THREAD]) |
| Load a tile of items across a threadblock directly. More... | |
Operations for reading global tiles of data into the threadblock (in blocked arrangement across threads).
1.8.3.1