37 #ifndef PCL_GPU_DEVICE_EMULATION_HPP_ 38 #define PCL_GPU_DEVICE_EMULATION_HPP_ 40 #include <pcl/gpu/device/warp_reduce.hpp> 48 static __forceinline__ __device__
int ballot(
int predicate,
volatile int* cta_buffer)
50 #if __CUDA_ARCH__ >= 200 52 return __ballot(predicate);
54 int tid = threadIdx.x;
55 cta_buffer[tid] = predicate ? (1 << (tid & 31)) : 0;
static __forceinline__ __device__ int ballot(int predicate, volatile int *cta_buffer)
static __device__ __forceinline__ int warp_reduce(volatile int *ptr, const unsigned int tid)