CUB
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Macros Groups Pages
Classes | Namespaces | Enumerations
block_load.cuh File Reference
#include <iterator>
#include "../ns_wrapper.cuh"
#include "../macro_utils.cuh"
#include "../thread/thread_load.cuh"
#include "../type_utils.cuh"
#include "../vector_type.cuh"
#include "block_exchange.cuh"

Classes

class  cub::BlockLoad< InputIterator, BLOCK_THREADS, ITEMS_PER_THREAD, POLICY, MODIFIER >
 BlockLoad provides data movement operations for reading block-arranged data from global memory.

block_load_logo.png
.
More...
 

Namespaces

namespace  cub
 CUB namespace.
 

Enumerations

enum  cub::BlockLoadPolicy { cub::BLOCK_LOAD_DIRECT, cub::BLOCK_LOAD_VECTORIZE, cub::BLOCK_LOAD_TRANSPOSE }
 Tuning policy for cub::BlockLoad. More...
 

Functions

Direct threadblock loads (blocked arrangement)
template<PtxLoadModifier MODIFIER, typename T , int ITEMS_PER_THREAD, typename InputIterator >
__device__ __forceinline__ void cub::BlockLoadDirect (InputIterator block_itr, T(&items)[ITEMS_PER_THREAD])
 Load a tile of items across a threadblock directly using the specified cache modifier. More...
 
template<typename T , int ITEMS_PER_THREAD, typename InputIterator >
__device__ __forceinline__ void cub::BlockLoadDirect (InputIterator block_itr, T(&items)[ITEMS_PER_THREAD])
 Load a tile of items across a threadblock directly. More...
 
template<PtxLoadModifier MODIFIER, typename T , int ITEMS_PER_THREAD, typename InputIterator , typename SizeT >
__device__ __forceinline__ void cub::BlockLoadDirect (InputIterator block_itr, const SizeT &guarded_items, T(&items)[ITEMS_PER_THREAD])
 Load a tile of items across a threadblock directly using the specified cache modifier, guarded by range. More...
 
template<typename T , int ITEMS_PER_THREAD, typename InputIterator , typename SizeT >
__device__ __forceinline__ void cub::BlockLoadDirect (InputIterator block_itr, const SizeT &guarded_items, T(&items)[ITEMS_PER_THREAD])
 Load a tile of items across a threadblock directly, guarded by range. More...
 
template<PtxLoadModifier MODIFIER, typename T , int ITEMS_PER_THREAD, typename InputIterator , typename SizeT >
__device__ __forceinline__ void cub::BlockLoadDirect (InputIterator block_itr, const SizeT &guarded_items, T oob_default, T(&items)[ITEMS_PER_THREAD])
 Load a tile of items across a threadblock directly using the specified cache modifier, guarded by range, with assignment for out-of-bound elements. More...
 
template<typename T , int ITEMS_PER_THREAD, typename InputIterator , typename SizeT >
__device__ __forceinline__ void cub::BlockLoadDirect (InputIterator block_itr, const SizeT &guarded_items, T oob_default, T(&items)[ITEMS_PER_THREAD])
 Load a tile of items across a threadblock directly, guarded by range, with assignment for out-of-bound elements. More...
 
Direct threadblock loads (striped arrangement)
template<PtxLoadModifier MODIFIER, typename T , int ITEMS_PER_THREAD, typename InputIterator >
__device__ __forceinline__ void cub::BlockLoadDirectStriped (InputIterator block_itr, T(&items)[ITEMS_PER_THREAD], int stride=blockDim.x)
 Load striped tile directly using the specified cache modifier. More...
 
template<typename T , int ITEMS_PER_THREAD, typename InputIterator >
__device__ __forceinline__ void cub::BlockLoadDirectStriped (InputIterator block_itr, T(&items)[ITEMS_PER_THREAD], int stride=blockDim.x)
 Load striped tile directly. More...
 
template<PtxLoadModifier MODIFIER, typename T , int ITEMS_PER_THREAD, typename InputIterator , typename SizeT >
__device__ __forceinline__ void cub::BlockLoadDirectStriped (InputIterator block_itr, const SizeT &guarded_items, T(&items)[ITEMS_PER_THREAD], int stride=blockDim.x)
 Load striped directly tile using the specified cache modifier, guarded by range. More...
 
template<typename T , int ITEMS_PER_THREAD, typename InputIterator , typename SizeT >
__device__ __forceinline__ void cub::BlockLoadDirectStriped (InputIterator block_itr, const SizeT &guarded_items, T(&items)[ITEMS_PER_THREAD], int stride=blockDim.x)
 Load striped tile directly, guarded by range. More...
 
template<PtxLoadModifier MODIFIER, typename T , int ITEMS_PER_THREAD, typename InputIterator , typename SizeT >
__device__ __forceinline__ void cub::BlockLoadDirectStriped (InputIterator block_itr, const SizeT &guarded_items, T oob_default, T(&items)[ITEMS_PER_THREAD], int stride=blockDim.x)
 Load striped directly tile using the specified cache modifier, guarded by range, with assignment for out-of-bound elements. More...
 
template<typename T , int ITEMS_PER_THREAD, typename InputIterator , typename SizeT >
__device__ __forceinline__ void cub::BlockLoadDirectStriped (InputIterator block_itr, const SizeT &guarded_items, T oob_default, T(&items)[ITEMS_PER_THREAD], int stride=blockDim.x)
 Load striped tile directly, guarded by range, with assignment for out-of-bound elements. More...
 
Threadblock vectorized loads (blocked arrangement)
template<PtxLoadModifier MODIFIER, typename T , int ITEMS_PER_THREAD>
__device__ __forceinline__ void cub::BlockLoadVectorized (T *block_ptr, T(&items)[ITEMS_PER_THREAD])
 Load a tile of items across a threadblock directly using the specified cache modifier. More...
 
template<typename T , int ITEMS_PER_THREAD>
__device__ __forceinline__ void cub::BlockLoadVectorized (T *block_ptr, T(&items)[ITEMS_PER_THREAD])
 Load a tile of items across a threadblock directly. More...
 

Detailed Description

Operations for reading global tiles of data into the threadblock (in blocked arrangement across threads).