CUDAContext

class CUDAContext

The cuda device context.

Constructors

inline dragon::CUDAContext::CUDAContext()

Constructor.

inline explicit dragon::CUDAContext::CUDAContext(int device)

Constructor with the device index.

inline explicit dragon::CUDAContext::CUDAContext(const DeviceOption &option)

Constructor with the device option.

Public Properties

cublas_handle

inline cublasHandle_t dragon::CUDAContext::cublas_handle()

Return the cublas handle.

cuda_stream

inline cudaStream_t dragon::CUDAContext::cuda_stream()

Return the current cuda stream.

cuda_stream

inline cudaStream_t dragon::CUDAContext::cuda_stream(
  int device,
  int stream)

Return the specified cuda stream.

cudnn_handle

inline cudnnHandle_t dragon::CUDAContext::cudnn_handle()

Return the cudnn handle.

curand_generator

inline curandGenerator_t &dragon::CUDAContext::curand_generator()

Return the curand generator.

current_device

static inline int dragon::CUDAContext::current_device()

Return the device index of current thread.

device

inline int dragon::CUDAContext::device() const

Return the device index.

mutex

static std::mutex &dragon::CUDAContext::mutex()

Return the shared context mutex.

objects

static CUDAObjects &dragon::CUDAContext::objects()

Return the thread-local cuda objects.

rand_generator

inline std::mt19937 *dragon::CUDAContext::rand_generator()

Return the random generator.

stream

inline int dragon::CUDAContext::stream() const

Return the stream index.

workspace

inline Workspace *dragon::CUDAContext::workspace()

Return the current workspace.

workspace

inline Workspace *dragon::CUDAContext::workspace(
  int device,
  int stream)

Return the specified workspace.

set_stream

inline void dragon::CUDAContext::set_stream(int stream)

Set the stream index.

Public Functions

Copy

template<typename T, class DestContext, class SrcContext>
inline void dragon::CUDAContext::Copy(
  int n,
  T *dest,
  const T *src)

Copy a typed memory block to the destination.

Delete

static inline void dragon::CUDAContext::Delete(void *ptr)

Deallocate a device memory block.

DeleteHost

static inline void dragon::CUDAContext::DeleteHost(void *ptr)

Deallocate a host memory block.

FinishDeviceComputation

inline void dragon::CUDAContext::FinishDeviceComputation()

Wait for the dispatched computation to complete.

Memset

static inline void dragon::CUDAContext::Memset(
  size_t n,
  void *ptr,
  int value = 0)

Set a memory block to the given value.

MemsetAsync

inline void dragon::CUDAContext::MemsetAsync(
  size_t n,
  void *ptr,
  int value = 0)

Set a memory block to the given value asynchronously.

Memcpy

template<class DestContext, class SrcContext>
static inline void dragon::CUDAContext::Memcpy(
  size_t n,
  void *dest,
  const void *src)

Copy a memory block to the destination.

Memcpy

template<class DestContext, class SrcContext>
static inline void dragon::CUDAContext::Memcpy(
  size_t n,
  void *dest,
  const void *src,
  int device)

Copy a memory block to the destination using given device.

MemcpyAsync

template<class DestContext, class SrcContext>
inline void dragon::CUDAContext::MemcpyAsync(
  size_t n,
  void *dest,
  const void *src)

Copy a memory block to the destination asynchronously.

New

static inline void *dragon::CUDAContext::New(size_t size)

Allocate a block of device memory.

NewHost

static inline void *dragon::CUDAContext::NewHost(size_t size)

Allocate a block of host memory.

SwitchToDevice

inline void dragon::CUDAContext::SwitchToDevice(int stream_id = 0)

Switch to the device and select given stream in current thread.

SynchronizeStream

static inline void dragon::CUDAContext::SynchronizeStream(cudaStream_t stream)

Synchronize the given stream.