Dragon - C++ API
A Computation Graph Virtual Machine Based Deep Learning Framework
Classes | Namespaces | Macros | Functions | Variables
cuda_device.h File Reference
#include <cuda.h>
#include <cublas.h>
#include <curand.h>
#include <cuda_runtime.h>
#include <device_launch_parameters.h>
#include "core/common.h"

Go to the source code of this file.

Classes

struct  dragon::CUDADeviceProps
 
class  dragon::CUDADeviceGuard
 

Namespaces

 dragon
 

Macros

#define CUDA_MAX_DEVICES   16
 
#define CUDA_VERSION_MIN(major, minor, patch)   (CUDA_VERSION >= (major * 1000 + minor * 100 + patch))
 
#define CUDA_VERSION_MAX(major, minor, patch)   (CUDA_VERSION < (major * 1000 + minor * 100 + patch))
 
#define CUDA_CHECK(condition)
 
#define CUBLAS_CHECK(condition)
 
#define CURAND_CHECK(condition)
 
#define CUDA_1D_KERNEL_LOOP(i, n)
 
#define CUDA_2D_KERNEL_LOOP1(i, n)   for (size_t i = blockIdx.x; i < n; i += gridDim.x)
 
#define CUDA_2D_KERNEL_LOOP2(j, m)   for (size_t j = threadIdx.x; j < m; j += blockDim.x)
 
#define __hdiv   hdiv
 

Functions

int dragon::CUDA_BLOCKS (const int N)
 
int dragon::CUDA_2D_BLOCKS (const int N)
 
int dragon::CUDA_NUM_DEVICES ()
 
int dragon::CUDA_GET_DEVICE ()
 
const cudaDeviceProp & dragon::GetCUDADeviceProp (int device_id)
 
bool dragon::CUDA_TRUE_FP16_AVAILABLE ()
 
bool dragon::TENSOR_CORE_AVAILABLE ()
 

Variables

const int dragon::CUDA_THREADS = 1024
 
const int dragon::CUDA_MAX_BLOCKS = 65535
 

Macro Definition Documentation

◆ __hdiv

#define __hdiv   hdiv

◆ CUBLAS_CHECK

#define CUBLAS_CHECK (   condition)
Value:
do { \
cublasStatus_t status = condition; \
CHECK_EQ(status, CUBLAS_STATUS_SUCCESS); \
} while (0)

◆ CUDA_1D_KERNEL_LOOP

#define CUDA_1D_KERNEL_LOOP (   i,
 
)
Value:
for (size_t i = blockIdx.x * blockDim.x + threadIdx.x; \
i < n; i += blockDim.x * gridDim.x)

◆ CUDA_2D_KERNEL_LOOP1

#define CUDA_2D_KERNEL_LOOP1 (   i,
 
)    for (size_t i = blockIdx.x; i < n; i += gridDim.x)

◆ CUDA_2D_KERNEL_LOOP2

#define CUDA_2D_KERNEL_LOOP2 (   j,
 
)    for (size_t j = threadIdx.x; j < m; j += blockDim.x)

◆ CUDA_CHECK

#define CUDA_CHECK (   condition)
Value:
do { \
cudaError_t error = condition; \
CHECK_EQ(error, cudaSuccess) \
<< "\n" << cudaGetErrorString(error); \
} while (0)

◆ CUDA_MAX_DEVICES

#define CUDA_MAX_DEVICES   16

◆ CUDA_VERSION_MAX

#define CUDA_VERSION_MAX (   major,
  minor,
  patch 
)    (CUDA_VERSION < (major * 1000 + minor * 100 + patch))

◆ CUDA_VERSION_MIN

#define CUDA_VERSION_MIN (   major,
  minor,
  patch 
)    (CUDA_VERSION >= (major * 1000 + minor * 100 + patch))

◆ CURAND_CHECK

#define CURAND_CHECK (   condition)
Value:
do { \
curandStatus_t status = condition; \
CHECK_EQ(status, CURAND_STATUS_SUCCESS); \
} while (0)