Dragon - C++ API
A Computation Graph Virtual Machine Based Deep Learning Framework
Functions
dragon::kernel Namespace Reference

Functions

template<typename T , class Context >
void Dropout (const int count, const float prob, const float scale, const T *x, uint32_t *mask32, uint8_t *mask8, T *y, Context *ctx)
 
template<typename Tx , typename Tm , class Context >
void ApplyMask (const int count, const float scale, const Tx *x, const Tm *mask, Tx *y, Context *ctx)
 
template<typename T , class Context >
void DropPath (const int rows, const int cols, const float scale, const T *x, const float *mask, T *y, Context *ctx)
 
template<typename T , class Context >
void Elu (const int count, const float alpha, const T *x, T *y, Context *ctx)
 
template<typename T , class Context >
void EluGrad (const int count, const float alpha, const T *dy, const T *y, T *dx, Context *ctx)
 
template<typename T , class Context >
void PRelu (const int count, const int channels, const int dim, const bool channel_shared, const string &data_format, const T *x, const T *w, T *y, Context *ctx)
 
template<typename T , class Context >
void PReluGrad (const int count, const int channels, const int dim, const bool channel_shared, const string &data_format, const T *dy, const T *x, const T *w, T *dx, Context *ctx)
 
template<typename T , class Context >
void PReluWGrad (const int rows, const int row_offset, const int channels, const int dim, const bool channel_shared, const string &data_format, const T *dy, const T *x, const T *multiplier, T *bcast_dw, T *dw, Context *ctx)
 
template<typename T , class Context >
void Relu (const int count, const float slope, const T *x, T *y, Context *ctx)
 
template<typename T , class Context >
void ReluGrad (const int count, const float slope, const T *dy, const T *y, T *dx, Context *ctx)
 
template<typename T , class Context >
void SElu (const int count, const T *x, T *y, Context *ctx)
 
template<typename T , class Context >
void SEluGrad (const int count, const T *dy, const T *y, T *dx, Context *ctx)
 
template<typename T , class Context >
void Sigmoid (const int count, const T *x, T *y, Context *ctx)
 
template<typename T , class Context >
void SigmoidGrad (const int count, const T *dy, const T *y, T *dx, Context *ctx)
 
template<typename T , class Context >
void Softmax (const int outer_dim, const int axis_dim, const int inner_dim, const T *multiplier, const T *x, T *scale, T *y, Context *ctx)
 
template<typename T , class Context >
void SoftmaxGrad (const int outer_dim, const int axis_dim, const int inner_dim, const T *multiplier, const T *dy, const T *y, T *scale, T *dx, Context *ctx)
 
template<typename T , class Context >
void Tanh (const int count, const T *x, T *y, Context *ctx)
 
template<typename T , class Context >
void TanhGrad (const int count, const T *dy, const T *y, T *dx, Context *ctx)
 
template<typename T , class Context >
void Affine (const int outer_dim, const int axis_dim, const int inner_dim, const T *x, const T *alpha, const T *beta, T *y, Context *ctx)
 
template<typename T , class Context >
void AffineGrad (const int outer_dim, const int axis_dim, const int inner_dim, const T *dy, const T *alpha, T *dx, Context *ctx)
 
template<typename T , class Context >
void Clip (const int count, const float low, const float high, const T *x, T *y, Context *ctx)
 
template<typename T , class Context >
void ClipGrad (const int count, const float low, const float high, const T *x, const T *dy, T *dx, Context *ctx)
 
template<typename T , class Context >
void Maximum (const int count, const T *a, const T *b, T *y, Context *ctx)
 
template<typename T , class Context >
void BroadcastMaximum (const int count, const T *a, const T b, T *y, Context *ctx)
 
template<typename T , class Context >
void MaximumGrad (const int count, const T *a, const T *b, const T *dy, T *da, T *db, Context *ctx)
 
template<typename T , class Context >
void BroadcastMaximumGrad (const int count, const T *a, const T b, const T *dy, T *da, T *db, Context *ctx)
 
template<typename T , class Context >
void Minimum (const int count, const T *a, const T *b, T *y, Context *ctx)
 
template<typename T , class Context >
void BroadcastMinimum (const int count, const T *a, const T b, T *y, Context *ctx)
 
template<typename T , class Context >
void MinimumGrad (const int count, const T *a, const T *b, const T *dy, T *da, T *db, Context *ctx)
 
template<typename T , class Context >
void BroadcastMinimumGrad (const int count, const T *a, const T b, const T *dy, T *da, T *db, Context *ctx)
 
template<typename Tx , typename Ty , class Context >
void Moments (const int ndims, const int *dims, const int naxes, const int *axes, const Tx *x, Ty *mean, Ty *var, Context *ctx)
 
template<typename T , class Context >
void Arange (const int count, const int start, const int step, T *y, Context *ctx)
 
template<typename T , class Context >
void ArgMax (const int outer_dim, const int inner_dim, const int axis_dim, const int top_k, const T *x, int64_t *indices, T *values, Context *ctx)
 
template<typename T , class Context >
void ArgMin (const int outer_dim, const int inner_dim, const int axis_dim, const int top_k, const T *x, int64_t *indices, T *values, Context *ctx)
 
template<typename T , class Context >
void ChannelShuffle (const int outer_dim, const int inner_dim, const int axis_dim, const int group, const T *x, T *y, Context *ctx)
 
template<typename T , class Context >
void Concat (const int outer_dim, const int inner_dim, const int axis_dim, const int cat_dim, const int cat_ofs, const T *x, T *y, Context *ctx)
 
template<typename T , class Context >
void Crop (const int count, const int ndims, const int *x_strides, const int *y_dims, const int *starts, const T *x, T *y, Context *ctx)
 
template<typename T , class Context >
void CropGrad (const int count, const int ndims, const int *x_strides, const int *y_dims, const int *starts, const T *dy, T *dx, Context *ctx)
 
template<typename T , class Context >
void IndexSelect (const int outer_dim, const int inner_dim, const int axis_dim, const int num_indices, const int64_t *indices, const T *x, T *y, Context *ctx)
 
template<typename T , class Context >
void IndexSelectGrad (const int outer_dim, const int inner_dim, const int axis_dim, const int num_indices, const int64_t *indices, const T *dy, T *dx, Context *ctx)
 
template<typename T , class Context >
void MaskedSelect (const int count, const uint8_t *mask, const T *x, Tensor *indices, Tensor *scratch, Tensor *y, Context *ctx)
 
template<typename T , class Context >
void MaskedSelectGrad (const int count, const int num_indices, const int64_t *indices, const T *dy, T *dx, Context *ctx)
 
template<class Context >
void UnravelIndex (const int count, const int ndims, const int *dims, const int64_t *x, int64_t *y, Context *ctx)
 
template<typename T , class Context >
void ConstPad (const int count, const int ndims, const int *x_dims, const int *x_strides, const int *y_dims, const int *l_pads, const float value, const T *x, T *y, Context *ctx)
 
template<typename T , class Context >
void ReflectPad (const int count, const int ndims, const int *x_dims, const int *x_strides, const int *y_dims, const int *l_pads, const T *x, T *y, Context *ctx)
 
template<typename T , class Context >
void EdgePad (const int count, const int ndims, const int *x_dims, const int *x_strides, const int *y_dims, const int *l_pads, const T *x, T *y, Context *ctx)
 
template<typename T , class Context >
void OneHot (const int count, const int depth, const int on_value, const T *x, T *y, Context *ctx)
 
template<typename T , class Context >
void ReduceSum (const int ndims, const int *dims, const int naxes, const int *axes, const float scale, const T *x, T *y, Context *ctx)
 
template<typename T , class Context >
void ReduceSumGrad (const int count, const int ndims, const int *x_dims, const int *y_dims, const int *y_strides, const float scale, const T *dy, T *dx, Context *ctx)
 
template<typename T , class Context >
void Repeat (const int outer_dim, const int inner_dim, const int axis_dim, const int repeats, const T *x, T *y, Context *ctx)
 
template<typename T , class Context >
void RepeatGrad (const int outer_dim, const int inner_dim, const int axis_dim, const int repeats, const T *dy, T *dx, Context *ctx)
 
template<typename T , class Context >
void Slice (const int outer_dim, const int inner_dim, const int axis_dim, const int slice_dim, const int slice_ofs, const T *x, T *y, Context *ctx)
 
template<typename T , class Context >
void SliceGrad (const int outer_dim, const int inner_dim, const int axis_dim, const int slice_dim, const int slice_ofs, const T *dy, T *x, Context *ctx)
 
template<typename T , class Context >
void Tile (const int count, const int ndims, const int *x_dims, const int *x_strides, const int *y_dims, const T *x, T *y, Context *ctx)
 
template<typename T , class Context >
void TileGrad (const int rows, const int cols, const int multiple, const T *dy, T *dx, Context *ctx)
 
template<typename T , class Context >
void Transpose (const int count, const int ndims, const int *x_strides, const int *y_dims, const T *x, T *y, Context *ctx)
 
template<typename T , class Context >
void TransposeGrad (const int count, const int ndims, const int *x_strides, const int *y_dims, const T *dy, T *dx, Context *ctx)
 
template<typename T , class Context >
void Where (const int count, const uint8_t *mask, const T *a, const T *b, T *y, Context *ctx)
 
template<typename T , class Context >
void WhereGrad (const int count, const uint8_t *mask, const T *dy, T *da, T *db, Context *ctx)
 
template<typename T , class Context >
void Assign (const int count, const int ndims, const int *x_dims, const int *y_strides, const int *starts, const T *x, T *y, Context *ctx)
 
template<typename T , class Context >
void NotZero (const int count, const T *x, bool *y, Context *ctx)
 
template<typename T , class Context >
void Equal (const int count, const T *a, const T *b, bool *y, Context *ctx)
 
template<typename T , class Context >
void NotEqual (const int count, const T *a, const T *b, bool *y, Context *ctx)
 
template<typename T , class Context >
void Less (const int count, const T *a, const T *b, bool *y, Context *ctx)
 
template<typename T , class Context >
void LessEqual (const int count, const T *a, const T *b, bool *y, Context *ctx)
 
template<typename T , class Context >
void Greater (const int count, const T *a, const T *b, bool *y, Context *ctx)
 
template<typename T , class Context >
void GreaterEqual (const int count, const T *a, const T *b, bool *y, Context *ctx)
 
template<typename T , class Context >
void AbsGrad (const int count, const T *dy, T *dx, Context *ctx)
 
template<typename Tx , typename Ty , class Context >
void NLLLoss (const int outer_dim, const int axis_dim, const int inner_dim, const int nignores, const int *ignore, const Tx *log_prob, const Ty *target, Tx *loss, int *flag, Context *ctx)
 
template<typename Tx , typename Ty , class Context >
void NLLLossGrad (const int outer_dim, const int axis_dim, const int inner_dim, const int nignores, const int *ignore, const Tx *log_prob, const Ty *target, Tx *dx, int *flag, Context *ctx)
 
template<typename T , class Context >
void SigmoidCrossEntropy (const int count, const T *logit, const T *target, T *loss, int *flag, Context *ctx)
 
template<typename T , class Context >
void SigmoidCrossEntropyGrad (const int count, const T *logit, const T *target, T *dlogit, int *flag, Context *ctx)
 
template<typename Tx , typename Ty , class Context >
void SigmoidFocalLoss (const int outer_dim, const int axis_dim, const int inner_dim, const float pos_alpha, const float neg_alpha, const float gamma, const int neg_id, const Tx *logit, const Ty *target, Tx *loss, int *flag, Context *ctx)
 
template<typename Tx , typename Ty , class Context >
void SigmoidFocalLossGrad (const int outer_dim, const int axis_dim, const int inner_dim, const float pos_alpha, const float neg_alpha, const float gamma, const int neg_id, const Tx *logit, const Ty *target, Tx *dlogit, int *flag, Context *ctx)
 
template<typename T , class Context >
void SmoothL1 (const int count, const float beta, const T *x, T *y, Context *ctx)
 
template<typename T , class Context >
void SmoothL1Grad (const int count, const float beta, const T *dy, T *dx, Context *ctx)
 
template<typename T , class Context >
void SoftmaxCrossEntropy (const int count, const T *prob, const T *targets, T *losses, Context *ctx)
 
template<typename Tx , typename Ty , class Context >
void SoftmaxFocalLoss (const int outer_dim, const int axis_dim, const int inner_dim, const float pos_alpha, const float neg_alpha, const float gamma, const int neg_id, const int nignores, const int *ignores, const Tx *prob, const Ty *labels, Tx *losses, int *flags, Context *ctx)
 
template<typename Tx , typename Ty , class Context >
void SoftmaxFocalLossGrad (const int outer_dim, const int axis_dim, const int inner_dim, const float pos_alpha, const float neg_alpha, const float gamma, const int neg_id, const int nignores, const int *ignores, const Tx *prob, const Ty *labels, Tx *dx, int *flags, Context *ctx)
 
template<typename Tx , typename Ty , class Context >
void SparseSoftmaxCrossEntropy (const int outer_dim, const int axis_dim, const int inner_dim, const int nignores, const int *ignore, const Tx *prob, const Ty *target, Tx *loss, int *flag, Context *ctx)
 
template<typename Tx , typename Ty , class Context >
void SparseSoftmaxCrossEntropyGrad (const int outer_dim, const int axis_dim, const int inner_dim, const int nignores, const int *ignore, const Tx *prob, const Ty *target, Tx *dx, int *flag, Context *ctx)
 
template<typename Ta , typename Tb , class Context >
void TypeA2B (const int count, const Ta *a, Tb *b, Context *ctx)
 
template<typename T , class Context >
void GradientTwoSum (const int count, const T *dy1, const T *dy2, T *dx, Context *ctx)
 
template<typename Tx , typename Ty , class Context >
void ImageData (const int N, const int C, const int H, const int W, const string &data_format, const float *mean, const float *std, const Tx *x, Ty *y, Context *ctx)
 
template<typename Tx , typename Tp , class Context >
void BatchNormBackwardTraining (const int N, const int C, const int S, const string &data_format, const Tx *x, const Tp *mu, const Tp *rsig, const Tp *gamma, const Tx *dy, Tp *ds, Tp *db, Tx *dx, Tp *dgamma, Tp *dbeta, Context *ctx)
 
template<typename Tx , typename Tp , class Context >
void BatchNormBackwardInference (const int N, const int C, const int S, const string &data_format, const Tx *x, const Tp *mu, const Tp *rsig, const Tp *gamma, const Tx *dy, Tx *dx, Tp *dgamma, Tp *dbeta, Context *ctx)
 
template<typename Tx , typename Tp , class Context >
void GroupNormForward (const int N, const int G, const int D, const int S, const string &data_format, const Tx *x, const Tp *mu, const Tp *rsig, const Tp *gamma, const Tp *beta, Tp *scale, Tp *bias, Tx *y, Context *ctx)
 
template<typename Tx , typename Tp , class Context >
void GroupNormBackward (const int N, const int G, const int D, const int S, const string &data_format, const Tx *x, const Tp *mu, const Tp *rsig, const Tp *gamma, const Tx *dy, Tp *ds, Tp *db, Tx *dx, Tp *dgamma, Tp *dbeta, Context *ctx)
 
template<typename T , class Context >
void LSTMCell (const int N, const int C, const T *cx, T *actx, T *c, T *h, Context *ctx)
 
template<typename T , class Context >
void LSTMCellGrad (const int N, const int C, const T *cx, const T *actx, const T *c, const T *dc, const T *dh, T *dcx, T *dx, Context *ctx)
 
template<typename T , class Context >
void AdamUpdate (const int count, const float lr, const float beta1, const float beta2, const float eps, T *g, T *m, T *v, Context *ctx)
 
template<typename T , class Context >
void NesterovUpdate (const int count, const float lr, const float momentum, T *g, T *h, Context *ctx)
 
template<typename T , class Context >
void RMSPropUpdate (const int count, const float lr, const float decay, const float eps, T *g, T *h, Context *ctx)
 
template<typename T , class Context >
void SGDUpdate (const int count, const float lr, const float momentum, T *g, T *h, Context *ctx)
 
template<typename T , class Context >
void MixedPrecL2Decay (const int count, const float alpha, const T *w, float *dx, Context *ctx)
 
template<typename T , class Context >
void MixedPrecUpdate (const int count, const float *updates, T *w, Context *ctx)
 
template<typename T , class Context >
void BiasAdd (const int outer_dim, const int axis_dim, const int inner_dim, const string &data_format, const T *bias, const T *multiplier, T *y, Context *ctx)
 
template<typename T , class Context >
void BilinearResize (const int N, const int C, const int H, const int W, const int out_h, const int out_w, const string &data_format, const T *x, T *y, Context *ctx)
 
template<typename T , class Context >
void BilinearResizeGrad (const int N, const int C, const int H, const int W, const int out_h, const int out_w, const string &data_format, const T *dy, T *dx, Context *ctx)
 
template<typename T , class Context >
void Im2Col2d (const int C, const int H, const int W, const int out_h, const int out_w, const int kernel_h, const int kernel_w, const int stride_h, const int stride_w, const int pad_h, const int pad_w, const int dilation_h, const int dilation_w, const string &data_format, const T *im, T *col, Context *ctx)
 
template<typename T , class Context >
void Col2Im2d (const int C, const int H, const int W, const int out_h, const int out_w, const int kernel_h, const int kernel_w, const int stride_h, const int stride_w, const int pad_h, const int pad_w, const int dilation_h, const int dilation_w, const string &data_format, const T *col, T *im, Context *ctx)
 
template<typename T , class Context >
void DepthwiseConv2d (const int N, const int C, const int H, const int W, const int out_h, const int out_w, const int kernel_h, const int kernel_w, const int stride_h, const int stride_w, const int pad_h, const int pad_w, const int dilation_h, const int dilation_w, const string &data_format, const T *x, const T *w, T *y, Context *ctx)
 
template<typename T , class Context >
void DepthwiseConv2dGrad (const int N, const int C, const int H, const int W, const int out_h, const int out_w, const int kernel_h, const int kernel_w, const int stride_h, const int stride_w, const int pad_h, const int pad_w, const int dilation_h, const int dilation_w, const string &data_format, const T *dy, const T *d, T *dx, Context *ctx)
 
template<typename T , class Context >
void DepthwiseConv2dWGrad (const int N, const int C, const int H, const int W, const int out_h, const int out_w, const int kernel_h, const int kernel_w, const int stride_h, const int stride_w, const int pad_h, const int pad_w, const int dilation_h, const int dilation_w, const string &data_format, const T *dy, const T *x, T *dw, Context *ctx)
 
template<class Context >
void DropBlock2d (const int N, const int C, const int H, const int W, const int seed_h, const int seed_w, const int block_size, const float gamma, const string &data_format, uint32_t *seed, int *mask, Context *ctx)
 
template<typename T , class Context >
void NNResize (const int N, const int C, const int H, const int W, const int out_h, const int out_w, const string &data_format, const T *x, T *y, Context *ctx)
 
template<typename T , class Context >
void NNResizeGrad (const int N, const int C, const int H, const int W, const int out_h, const int out_w, const string &data_format, const T *dy, T *dx, Context *ctx)
 
template<typename T , class Context >
void MaxPool2d (const int N, const int C, const int H, const int W, const int pool_h, const int pool_w, const int kernel_h, const int kernel_w, const int stride_h, const int stride_w, const int pad_h, const int pad_w, const string &data_format, const T *x, int *mask, T *y, Context *ctx)
 
template<typename T , class Context >
void AvgPool2d (const int N, const int C, const int H, const int W, const int pool_h, const int pool_w, const int kernel_h, const int kernel_w, const int stride_h, const int stride_w, const int pad_h, const int pad_w, const string &data_format, const T *x, T *y, Context *ctx)
 
template<typename T , class Context >
void MaxPool2dGrad (const int N, const int C, const int H, const int W, const int pool_h, const int pool_w, const int kernel_h, const int kernel_w, const int stride_h, const int stride_w, const int pad_h, const int pad_w, const string &data_format, const T *dy, const int *mask, T *dx, Context *ctx)
 
template<typename T , class Context >
void AvgPool2dGrad (const int N, const int C, const int H, const int W, const int pool_h, const int pool_w, const int kernel_h, const int kernel_w, const int stride_h, const int stride_w, const int pad_h, const int pad_w, const string &data_format, const T *dy, T *dx, Context *ctx)
 
template<typename T , class Context >
void ROIPool (const int C, const int H, const int W, const int pool_h, const int pool_w, const int num_rois, const float spatial_scale, const T *x, const float *rois, int *mask, T *y, Context *ctx)
 
template<typename T , class Context >
void ROIPoolGrad (const int N, const int C, const int H, const int W, const int pool_h, const int pool_w, const int num_rois, const float spatial_scale, const T *dy, const T *rois, const int *mask, T *dx, Context *ctx)
 
template<typename T , class Context >
void ROIAlign (const int C, const int H, const int W, const int pool_h, const int pool_w, const int num_rois, const float spatial_scale, const int sampling_ratio, const T *x, const float *rois, T *y, Context *ctx)
 
template<typename T , class Context >
void ROIAlignGrad (const int C, const int H, const int W, const int pool_h, const int pool_w, const int num_rois, const float spatial_scale, const int sampling_ratio, const T *dy, const float *rois, T *dx, Context *ctx)
 
template<>
void Dropout< float, CPUContext > (const int count, const float prob, const float scale, const float *x, uint32_t *mask32, uint8_t *mask8, float *y, CPUContext *ctx)
 
template<>
void Dropout< float16, CPUContext > (const int count, float prob, float scale, const float16 *x, uint32_t *mask32, uint8_t *mask8, float16 *y, CPUContext *ctx)
 
template<typename Tx , typename Tm >
void _ApplyMask (const int count, const float scale, const Tx *x, const Tm *mask, Tx *y)
 
template<>
void ApplyMask< float, uint8_t, CPUContext > (const int count, const float scale, const float *x, const uint8_t *mask, float *y, CPUContext *ctx)
 
template<>
void ApplyMask< float16, uint8_t, CPUContext > (const int count, const float scale, const float16 *x, const uint8_t *mask, float16 *y, CPUContext *ctx)
 
template<>
void DropPath< float, CPUContext > (const int rows, const int cols, const float scale, const float *x, const float *mask, float *y, CPUContext *ctx)
 
template<>
void DropPath< float16, CPUContext > (const int rows, const int cols, const float scale, const float16 *x, const float *mask, float16 *y, CPUContext *ctx)
 
template<>
void Elu< float, CPUContext > (const int count, const float alpha, const float *x, float *y, CPUContext *ctx)
 
template<>
void EluGrad< float, CPUContext > (const int count, const float alpha, const float *dy, const float *y, float *dx, CPUContext *ctx)
 
template<>
void PRelu< float, CPUContext > (const int count, const int channels, const int dim, const bool channel_shared, const string &data_format, const float *x, const float *w, float *y, CPUContext *ctx)
 
template<>
void PReluGrad< float, CPUContext > (const int count, const int channels, const int dim, const bool channel_shared, const string &data_format, const float *dy, const float *x, const float *w, float *dx, CPUContext *ctx)
 
template<>
void PReluWGrad< float, CPUContext > (const int rows, const int row_offset, const int channels, const int dim, const bool channel_shared, const string &data_format, const float *dy, const float *x, const float *multiplier, float *bcast_dw, float *dw, CPUContext *ctx)
 
template<>
void Relu< float, CPUContext > (const int count, const float slope, const float *x, float *y, CPUContext *ctx)
 
template<>
void Relu< float16, CPUContext > (const int count, const float slope, const float16 *x, float16 *y, CPUContext *ctx)
 
template<>
void ReluGrad< float, CPUContext > (const int count, const float slope, const float *dy, const float *y, float *dx, CPUContext *ctx)
 
template<>
void ReluGrad< float16, CPUContext > (const int count, const float slope, const float16 *dy, const float16 *y, float16 *dx, CPUContext *ctx)
 
template<>
void SElu< float, CPUContext > (const int count, const float *x, float *y, CPUContext *ctx)
 
template<>
void SElu< float16, CPUContext > (const int count, const float16 *x, float16 *y, CPUContext *ctx)
 
template<>
void SEluGrad< float, CPUContext > (const int count, const float *dy, const float *y, float *dx, CPUContext *ctx)
 
template<>
void SEluGrad< float16, CPUContext > (const int count, const float16 *dy, const float16 *y, float16 *dx, CPUContext *ctx)
 
template<typename T >
_Sigmoid (T x)
 
template<>
void Sigmoid< float, CPUContext > (const int count, const float *x, float *y, CPUContext *ctx)
 
template<>
void SigmoidGrad< float, CPUContext > (const int count, const float *dy, const float *y, float *dx, CPUContext *ctx)
 
template<>
void Softmax< float, CPUContext > (const int outer_dim, const int axis_dim, const int inner_dim, const float *multiplier, const float *x, float *scale, float *y, CPUContext *ctx)
 
template<typename T >
void _SoftmaxGrad (const int outer_dim, const int axis_dim, const int inner_dim, const T *dy, const T *y, T *dx)
 
template<>
void SoftmaxGrad< float, CPUContext > (const int outer_dim, const int axis_dim, const int inner_dim, const float *multiplier, const float *dy, const float *y, float *scale, float *dx, CPUContext *ctx)
 
template<>
void Tanh< float, CPUContext > (const int count, const float *x, float *y, CPUContext *ctx)
 
template<>
void TanhGrad< float, CPUContext > (const int count, const float *dy, const float *y, float *dx, CPUContext *ctx)
 
template<>
void Affine< float, CPUContext > (const int outer_dim, const int axis_dim, const int inner_dim, const float *x, const float *alpha, const float *beta, float *y, CPUContext *ctx)
 
template<>
void Affine< float16, CPUContext > (const int outer_dim, const int axis_dim, const int inner_dim, const float16 *x, const float16 *alpha, const float16 *beta, float16 *y, CPUContext *ctx)
 
template<>
void AffineGrad< float, CPUContext > (const int outer_dim, const int axis_dim, const int inner_dim, const float *dy, const float *alpha, float *dx, CPUContext *ctx)
 
template<>
void AffineGrad< float16, CPUContext > (const int outer_dim, const int axis_dim, const int inner_dim, const float16 *dy, const float16 *alpha, float16 *dx, CPUContext *ctx)
 
template<typename T >
void _Clip (const int count, const T low, const T high, const T *x, T *y)
 
template<typename T >
void _ClipGrad (const int count, const T low, const T high, const T *x, const T *dy, T *dx)
 
 DEFINE_CLIP_KERNEL_LAUNCHER (int8_t)
 
 DEFINE_CLIP_KERNEL_LAUNCHER (uint8_t)
 
 DEFINE_CLIP_KERNEL_LAUNCHER (int)
 
 DEFINE_CLIP_KERNEL_LAUNCHER (int64_t)
 
 DEFINE_CLIP_KERNEL_LAUNCHER (float)
 
 DEFINE_CLIP_KERNEL_LAUNCHER (double)
 
 DEFINE_CLIP_GRAD_KERNEL_LAUNCHER (int8_t)
 
 DEFINE_CLIP_GRAD_KERNEL_LAUNCHER (uint8_t)
 
 DEFINE_CLIP_GRAD_KERNEL_LAUNCHER (int)
 
 DEFINE_CLIP_GRAD_KERNEL_LAUNCHER (int64_t)
 
 DEFINE_CLIP_GRAD_KERNEL_LAUNCHER (float)
 
 DEFINE_CLIP_GRAD_KERNEL_LAUNCHER (double)
 
template<>
void Clip< float16, CPUContext > (const int count, const float low, const float high, const float16 *x, float16 *y, CPUContext *ctx)
 
template<>
void ClipGrad< float16, CPUContext > (const int count, const float low, const float high, const float16 *x, const float16 *dy, float16 *y, CPUContext *ctx)
 
template<typename T >
void _Maximum (const int count, const T *a, const T *b, T *y)
 
template<typename T >
void _BroadcastMaximum (const int count, const T *a, const T b, T *y)
 
template<typename T >
void _MaximumGrad (const int count, const T *a, const T *b, const T *dy, T *da, T *db)
 
template<typename T >
void _BroadcastMaximumGrad (const int count, const T *a, const T b, const T *dy, T *da, T *db)
 
 DEFINE_MAXIMUM_KERNEL_LAUNCHER (Maximum, int8_t, int8_t *)
 
 DEFINE_MAXIMUM_KERNEL_LAUNCHER (Maximum, uint8_t, uint8_t *)
 
 DEFINE_MAXIMUM_KERNEL_LAUNCHER (Maximum, int, int *)
 
 DEFINE_MAXIMUM_KERNEL_LAUNCHER (Maximum, int64_t, int64_t *)
 
 DEFINE_MAXIMUM_KERNEL_LAUNCHER (Maximum, float, float *)
 
 DEFINE_MAXIMUM_KERNEL_LAUNCHER (Maximum, double, double *)
 
 DEFINE_MAXIMUM_KERNEL_LAUNCHER (BroadcastMaximum, int8_t, int8_t)
 
 DEFINE_MAXIMUM_KERNEL_LAUNCHER (BroadcastMaximum, uint8_t, uint8_t)
 
 DEFINE_MAXIMUM_KERNEL_LAUNCHER (BroadcastMaximum, int, int)
 
 DEFINE_MAXIMUM_KERNEL_LAUNCHER (BroadcastMaximum, int64_t, int64_t)
 
 DEFINE_MAXIMUM_KERNEL_LAUNCHER (BroadcastMaximum, float, float)
 
 DEFINE_MAXIMUM_KERNEL_LAUNCHER (BroadcastMaximum, double, double)
 
 DEFINE_MAXIMUM_GRAD_KERNEL_LAUNCHER (MaximumGrad, int8_t, int8_t *)
 
 DEFINE_MAXIMUM_GRAD_KERNEL_LAUNCHER (MaximumGrad, uint8_t, uint8_t *)
 
 DEFINE_MAXIMUM_GRAD_KERNEL_LAUNCHER (MaximumGrad, int, int *)
 
 DEFINE_MAXIMUM_GRAD_KERNEL_LAUNCHER (MaximumGrad, int64_t, int64_t *)
 
 DEFINE_MAXIMUM_GRAD_KERNEL_LAUNCHER (MaximumGrad, float, float *)
 
 DEFINE_MAXIMUM_GRAD_KERNEL_LAUNCHER (MaximumGrad, double, double *)
 
 DEFINE_MAXIMUM_GRAD_KERNEL_LAUNCHER (BroadcastMaximumGrad, int8_t, int8_t)
 
 DEFINE_MAXIMUM_GRAD_KERNEL_LAUNCHER (BroadcastMaximumGrad, uint8_t, uint8_t)
 
 DEFINE_MAXIMUM_GRAD_KERNEL_LAUNCHER (BroadcastMaximumGrad, int, int)
 
 DEFINE_MAXIMUM_GRAD_KERNEL_LAUNCHER (BroadcastMaximumGrad, int64_t, int64_t)
 
 DEFINE_MAXIMUM_GRAD_KERNEL_LAUNCHER (BroadcastMaximumGrad, float, float)
 
 DEFINE_MAXIMUM_GRAD_KERNEL_LAUNCHER (BroadcastMaximumGrad, double, double)
 
template<>
void Maximum< float16, CPUContext > (const int count, const float16 *a, const float16 *b, float16 *y, CPUContext *ctx)
 
template<>
void BroadcastMaximum< float16, CPUContext > (const int count, const float16 *a, const float16 b, float16 *y, CPUContext *ctx)
 
template<>
void MaximumGrad< float16, CPUContext > (const int count, const float16 *a, const float16 *b, const float16 *dy, float16 *da, float16 *db, CPUContext *ctx)
 
template<>
void BroadcastMaximumGrad< float16, CPUContext > (const int count, const float16 *a, const float16 b, const float16 *dy, float16 *da, float16 *db, CPUContext *ctx)
 
template<typename T >
void _Minimum (const int count, const T *a, const T *b, T *y)
 
template<typename T >
void _BroadcastMinimum (const int count, const T *a, const T b, T *y)
 
template<typename T >
void _MinimumGrad (const int count, const T *a, const T *b, const T *dy, T *da, T *db)
 
template<typename T >
void _BroadcastMinimumGrad (const int count, const T *a, const T b, const T *dy, T *da, T *db)
 
 DEFINE_MINIMUM_KERNEL_LAUNCHER (Minimum, int8_t, int8_t *)
 
 DEFINE_MINIMUM_KERNEL_LAUNCHER (Minimum, uint8_t, uint8_t *)
 
 DEFINE_MINIMUM_KERNEL_LAUNCHER (Minimum, int, int *)
 
 DEFINE_MINIMUM_KERNEL_LAUNCHER (Minimum, int64_t, int64_t *)
 
 DEFINE_MINIMUM_KERNEL_LAUNCHER (Minimum, float, float *)
 
 DEFINE_MINIMUM_KERNEL_LAUNCHER (Minimum, double, double *)
 
 DEFINE_MINIMUM_KERNEL_LAUNCHER (BroadcastMinimum, int8_t, int8_t)
 
 DEFINE_MINIMUM_KERNEL_LAUNCHER (BroadcastMinimum, uint8_t, uint8_t)
 
 DEFINE_MINIMUM_KERNEL_LAUNCHER (BroadcastMinimum, int, int)
 
 DEFINE_MINIMUM_KERNEL_LAUNCHER (BroadcastMinimum, int64_t, int64_t)
 
 DEFINE_MINIMUM_KERNEL_LAUNCHER (BroadcastMinimum, float, float)
 
 DEFINE_MINIMUM_KERNEL_LAUNCHER (BroadcastMinimum, double, double)
 
 DEFINE_MINIMUM_GRAD_KERNEL_LAUNCHER (MinimumGrad, int8_t, int8_t *)
 
 DEFINE_MINIMUM_GRAD_KERNEL_LAUNCHER (MinimumGrad, uint8_t, uint8_t *)
 
 DEFINE_MINIMUM_GRAD_KERNEL_LAUNCHER (MinimumGrad, int, int *)
 
 DEFINE_MINIMUM_GRAD_KERNEL_LAUNCHER (MinimumGrad, int64_t, int64_t *)
 
 DEFINE_MINIMUM_GRAD_KERNEL_LAUNCHER (MinimumGrad, float, float *)
 
 DEFINE_MINIMUM_GRAD_KERNEL_LAUNCHER (MinimumGrad, double, double *)
 
 DEFINE_MINIMUM_GRAD_KERNEL_LAUNCHER (BroadcastMinimumGrad, int8_t, int8_t)
 
 DEFINE_MINIMUM_GRAD_KERNEL_LAUNCHER (BroadcastMinimumGrad, uint8_t, uint8_t)
 
 DEFINE_MINIMUM_GRAD_KERNEL_LAUNCHER (BroadcastMinimumGrad, int, int)
 
 DEFINE_MINIMUM_GRAD_KERNEL_LAUNCHER (BroadcastMinimumGrad, int64_t, int64_t)
 
 DEFINE_MINIMUM_GRAD_KERNEL_LAUNCHER (BroadcastMinimumGrad, float, float)
 
 DEFINE_MINIMUM_GRAD_KERNEL_LAUNCHER (BroadcastMinimumGrad, double, double)
 
template<>
void Minimum< float16, CPUContext > (const int count, const float16 *a, const float16 *b, float16 *y, CPUContext *ctx)
 
template<>
void BroadcastMinimum< float16, CPUContext > (const int count, const float16 *a, const float16 b, float16 *y, CPUContext *ctx)
 
template<>
void MinimumGrad< float16, CPUContext > (const int count, const float16 *a, const float16 *b, const float16 *dy, float16 *da, float16 *db, CPUContext *ctx)
 
template<>
void BroadcastMinimumGrad< float16, CPUContext > (const int count, const float16 *a, const float16 b, const float16 *dy, float16 *da, float16 *db, CPUContext *ctx)
 
template<typename Tx , typename Ty >
void _ColwiseMoments (const int rows, const int cols, const Tx *x, Ty *mean, Ty *var)
 
template<typename Tx , typename Ty >
void _RowwiseMoments (const int rows, const int cols, const Tx *x, Ty *mean, Ty *var)
 
template<typename Tx , typename Ty >
void _GenericMoments (const int outer_dim, const int inner_dim, const int ndim, const int *x_strides, const int *y_dims, const Tx *x, Ty *mean, Ty *var)
 
template<typename Tx , typename Ty >
void _GenericMomentsLauncher (const int outer_dim, const int inner_dim, const int ndim, const int *dims, const int *axes, const Tx *x, Ty *mean, Ty *var)
 
template<typename Tx , typename Ty >
void _Moments (const int num_dims, const int *dims, const int num_axes, const int *axes, const Tx *x, Ty *mean, Ty *var, CPUContext *ctx)
 
 DEFINE_MOMENTS_KERNEL_LAUNCHER (int8_t, float)
 
 DEFINE_MOMENTS_KERNEL_LAUNCHER (uint8_t, float)
 
 DEFINE_MOMENTS_KERNEL_LAUNCHER (int, float)
 
 DEFINE_MOMENTS_KERNEL_LAUNCHER (int64_t, float)
 
 DEFINE_MOMENTS_KERNEL_LAUNCHER (float, float)
 
 DEFINE_MOMENTS_KERNEL_LAUNCHER (double, double)
 
template<>
void Moments< float16, float, CPUContext > (const int num_dims, const int *dims, const int num_axes, const int *axes, const float16 *x, float *mean, float *var, CPUContext *ctx)
 
template<typename T >
void _Arange (const int count, const int start, const int step, T *y)
 
 DEFINE_ARANGE_KERNEL_LAUNCHER (int8_t)
 
 DEFINE_ARANGE_KERNEL_LAUNCHER (uint8_t)
 
 DEFINE_ARANGE_KERNEL_LAUNCHER (int)
 
 DEFINE_ARANGE_KERNEL_LAUNCHER (int64_t)
 
 DEFINE_ARANGE_KERNEL_LAUNCHER (float)
 
 DEFINE_ARANGE_KERNEL_LAUNCHER (double)
 
template<>
void Arange< float16, CPUContext > (const int count, const int start, const int step, float16 *y, CPUContext *ctx)
 
template<typename T >
void _ArgMax (const int outer_dim, const int inner_dim, const int axis_dim, const int top_k, const T *x, int64_t *indices, T *values)
 
template<typename T >
void _ArgMin (const int outer_dim, const int inner_dim, const int axis_dim, const int top_k, const T *x, int64_t *indices, T *values)
 
 DEFINE_ARGREDUCE_KERNEL_LAUNCHER (ArgMax, bool)
 
 DEFINE_ARGREDUCE_KERNEL_LAUNCHER (ArgMax, int8_t)
 
 DEFINE_ARGREDUCE_KERNEL_LAUNCHER (ArgMax, uint8_t)
 
 DEFINE_ARGREDUCE_KERNEL_LAUNCHER (ArgMax, int)
 
 DEFINE_ARGREDUCE_KERNEL_LAUNCHER (ArgMax, int64_t)
 
 DEFINE_ARGREDUCE_KERNEL_LAUNCHER (ArgMax, float)
 
 DEFINE_ARGREDUCE_KERNEL_LAUNCHER (ArgMax, double)
 
 DEFINE_ARGREDUCE_KERNEL_LAUNCHER (ArgMin, bool)
 
 DEFINE_ARGREDUCE_KERNEL_LAUNCHER (ArgMin, int8_t)
 
 DEFINE_ARGREDUCE_KERNEL_LAUNCHER (ArgMin, uint8_t)
 
 DEFINE_ARGREDUCE_KERNEL_LAUNCHER (ArgMin, int)
 
 DEFINE_ARGREDUCE_KERNEL_LAUNCHER (ArgMin, int64_t)
 
 DEFINE_ARGREDUCE_KERNEL_LAUNCHER (ArgMin, float)
 
 DEFINE_ARGREDUCE_KERNEL_LAUNCHER (ArgMin, double)
 
template<>
void ArgMax< float16, CPUContext > (const int outer_dim, const int inner_dim, const int axis_dim, const int top_k, const float16 *x, int64_t *indices, float16 *values, CPUContext *ctx)
 
template<>
void ArgMin< float16, CPUContext > (const int outer_dim, const int inner_dim, const int axis_dim, const int top_k, const float16 *x, int64_t *indices, float16 *values, CPUContext *ctx)
 
template<typename T >
void _ChannelShuffle (const int outer_dim, const int inner_dim, const int G, const int K, const T *x, T *y, CPUContext *ctx)
 
 DEFINE_SHUFFLE_KERNEL_LAUNCHER (bool)
 
 DEFINE_SHUFFLE_KERNEL_LAUNCHER (int8_t)
 
 DEFINE_SHUFFLE_KERNEL_LAUNCHER (uint8_t)
 
 DEFINE_SHUFFLE_KERNEL_LAUNCHER (int)
 
 DEFINE_SHUFFLE_KERNEL_LAUNCHER (int64_t)
 
 DEFINE_SHUFFLE_KERNEL_LAUNCHER (float16)
 
 DEFINE_SHUFFLE_KERNEL_LAUNCHER (float)
 
 DEFINE_SHUFFLE_KERNEL_LAUNCHER (double)
 
template<typename T >
void _Concat (const int outer_dim, const int inner_dim, const int axis_dim, const int cat_dim, const int cat_ofs, const T *x, T *y, CPUContext *ctx)
 
 DEFINE_CONCAT_KERNEL_LAUNCHER (Concat, bool)
 
 DEFINE_CONCAT_KERNEL_LAUNCHER (Concat, int8_t)
 
 DEFINE_CONCAT_KERNEL_LAUNCHER (Concat, uint8_t)
 
 DEFINE_CONCAT_KERNEL_LAUNCHER (Concat, int)
 
 DEFINE_CONCAT_KERNEL_LAUNCHER (Concat, int64_t)
 
 DEFINE_CONCAT_KERNEL_LAUNCHER (Concat, float16)
 
 DEFINE_CONCAT_KERNEL_LAUNCHER (Concat, float)
 
 DEFINE_CONCAT_KERNEL_LAUNCHER (Concat, double)
 
template<typename T >
void _Crop (const int count, const int ndims, const int *x_strides, const int *y_dims, const int *starts, const T *x, T *y)
 
template<typename T >
void _CropGrad (const int count, const int ndims, const int *x_strides, const int *y_dims, const int *starts, const T *dy, T *dx)
 
 DEFINE_CROP_KERNEL_LAUNCHER (Crop, bool)
 
 DEFINE_CROP_KERNEL_LAUNCHER (Crop, int8_t)
 
 DEFINE_CROP_KERNEL_LAUNCHER (Crop, uint8_t)
 
 DEFINE_CROP_KERNEL_LAUNCHER (Crop, int)
 
 DEFINE_CROP_KERNEL_LAUNCHER (Crop, int64_t)
 
 DEFINE_CROP_KERNEL_LAUNCHER (Crop, float16)
 
 DEFINE_CROP_KERNEL_LAUNCHER (Crop, float)
 
 DEFINE_CROP_KERNEL_LAUNCHER (Crop, double)
 
 DEFINE_CROP_KERNEL_LAUNCHER (CropGrad, bool)
 
 DEFINE_CROP_KERNEL_LAUNCHER (CropGrad, int8_t)
 
 DEFINE_CROP_KERNEL_LAUNCHER (CropGrad, uint8_t)
 
 DEFINE_CROP_KERNEL_LAUNCHER (CropGrad, int)
 
 DEFINE_CROP_KERNEL_LAUNCHER (CropGrad, int64_t)
 
 DEFINE_CROP_KERNEL_LAUNCHER (CropGrad, float16)
 
 DEFINE_CROP_KERNEL_LAUNCHER (CropGrad, float)
 
 DEFINE_CROP_KERNEL_LAUNCHER (CropGrad, double)
 
template<typename T >
void _IndexSelect (const int outer_dim, const int inner_dim, const int axis_dim, const int num_indices, const int64_t *indices, const T *x, T *y, CPUContext *ctx)
 
template<typename T >
void _IndexSelectGrad (const int outer_dim, const int inner_dim, const int axis_dim, const int num_indices, const int64_t *indices, const T *dy, T *dx, CPUContext *ctx)
 
 DEFINE_INDEX_KERNEL_LAUNCHER (IndexSelect, bool)
 
 DEFINE_INDEX_KERNEL_LAUNCHER (IndexSelect, int8_t)
 
 DEFINE_INDEX_KERNEL_LAUNCHER (IndexSelect, uint8_t)
 
 DEFINE_INDEX_KERNEL_LAUNCHER (IndexSelect, int)
 
 DEFINE_INDEX_KERNEL_LAUNCHER (IndexSelect, int64_t)
 
 DEFINE_INDEX_KERNEL_LAUNCHER (IndexSelect, float16)
 
 DEFINE_INDEX_KERNEL_LAUNCHER (IndexSelect, float)
 
 DEFINE_INDEX_KERNEL_LAUNCHER (IndexSelect, double)
 
 DEFINE_INDEX_KERNEL_LAUNCHER (IndexSelectGrad, int8_t)
 
 DEFINE_INDEX_KERNEL_LAUNCHER (IndexSelectGrad, uint8_t)
 
 DEFINE_INDEX_KERNEL_LAUNCHER (IndexSelectGrad, int)
 
 DEFINE_INDEX_KERNEL_LAUNCHER (IndexSelectGrad, int64_t)
 
 DEFINE_INDEX_KERNEL_LAUNCHER (IndexSelectGrad, float16)
 
 DEFINE_INDEX_KERNEL_LAUNCHER (IndexSelectGrad, float)
 
 DEFINE_INDEX_KERNEL_LAUNCHER (IndexSelectGrad, double)
 
template<typename T >
void _MaskedSelectGrad (const int num_indices, const int64_t *indices, const T *dy, T *dx)
 
 DEFINE_MASKED_KERNEL_LAUNCHER (bool)
 
 DEFINE_MASKED_KERNEL_LAUNCHER (int8_t)
 
 DEFINE_MASKED_KERNEL_LAUNCHER (uint8_t)
 
 DEFINE_MASKED_KERNEL_LAUNCHER (int)
 
 DEFINE_MASKED_KERNEL_LAUNCHER (int64_t)
 
 DEFINE_MASKED_KERNEL_LAUNCHER (float16)
 
 DEFINE_MASKED_KERNEL_LAUNCHER (float)
 
 DEFINE_MASKED_KERNEL_LAUNCHER (double)
 
 DEFINE_MASKED_GRAD_KERNEL_LAUNCHER (bool)
 
 DEFINE_MASKED_GRAD_KERNEL_LAUNCHER (int8_t)
 
 DEFINE_MASKED_GRAD_KERNEL_LAUNCHER (uint8_t)
 
 DEFINE_MASKED_GRAD_KERNEL_LAUNCHER (int)
 
 DEFINE_MASKED_GRAD_KERNEL_LAUNCHER (int64_t)
 
 DEFINE_MASKED_GRAD_KERNEL_LAUNCHER (float16)
 
 DEFINE_MASKED_GRAD_KERNEL_LAUNCHER (float)
 
 DEFINE_MASKED_GRAD_KERNEL_LAUNCHER (double)
 
template<>
void UnravelIndex< CPUContext > (const int count, const int ndims, const int *dims, const int64_t *x, int64_t *y, CPUContext *ctx)
 
template<typename T >
void _OneHot (const int count, const int depth, const int on_value, const T *x, T *y)
 
template<>
void OneHot< float, CPUContext > (const int count, const int depth, const int on_value, const float *x, float *y, CPUContext *ctx)
 
template<>
void OneHot< int, CPUContext > (const int count, const int depth, const int on_value, const int *x, int *y, CPUContext *ctx)
 
template<>
void OneHot< int64_t, CPUContext > (const int count, const int depth, const int on_value, const int64_t *x, int64_t *y, CPUContext *ctx)
 
template<typename T >
void _ConstPad (const int nthreads, const int ndims, const int *x_dims, const int *x_strides, const int *y_dims, const int *l_pads, const T value, const T *x, T *y)
 
template<typename T >
void _ReflectPad (const int nthreads, const int ndims, const int *x_dims, const int *x_strides, const int *y_dims, const int *l_pads, const T *x, T *y)
 
template<typename T >
void _EdgePad (const int nthreads, const int ndims, const int *x_dims, const int *x_strides, const int *y_dims, const int *l_pads, const T *x, T *y)
 
 DEFINE_CONST_PAD_KERNEL_LAUNCHER (bool)
 
 DEFINE_CONST_PAD_KERNEL_LAUNCHER (int8_t)
 
 DEFINE_CONST_PAD_KERNEL_LAUNCHER (uint8_t)
 
 DEFINE_CONST_PAD_KERNEL_LAUNCHER (int)
 
 DEFINE_CONST_PAD_KERNEL_LAUNCHER (int64_t)
 
 DEFINE_CONST_PAD_KERNEL_LAUNCHER (float16)
 
 DEFINE_CONST_PAD_KERNEL_LAUNCHER (float)
 
 DEFINE_CONST_PAD_KERNEL_LAUNCHER (double)
 
 DEFINE_PAD_KERNEL_LAUNCHER (ReflectPad, bool)
 
 DEFINE_PAD_KERNEL_LAUNCHER (ReflectPad, int8_t)
 
 DEFINE_PAD_KERNEL_LAUNCHER (ReflectPad, uint8_t)
 
 DEFINE_PAD_KERNEL_LAUNCHER (ReflectPad, int)
 
 DEFINE_PAD_KERNEL_LAUNCHER (ReflectPad, int64_t)
 
 DEFINE_PAD_KERNEL_LAUNCHER (ReflectPad, float16)
 
 DEFINE_PAD_KERNEL_LAUNCHER (ReflectPad, float)
 
 DEFINE_PAD_KERNEL_LAUNCHER (ReflectPad, double)
 
 DEFINE_PAD_KERNEL_LAUNCHER (EdgePad, bool)
 
 DEFINE_PAD_KERNEL_LAUNCHER (EdgePad, int8_t)
 
 DEFINE_PAD_KERNEL_LAUNCHER (EdgePad, uint8_t)
 
 DEFINE_PAD_KERNEL_LAUNCHER (EdgePad, int)
 
 DEFINE_PAD_KERNEL_LAUNCHER (EdgePad, int64_t)
 
 DEFINE_PAD_KERNEL_LAUNCHER (EdgePad, float16)
 
 DEFINE_PAD_KERNEL_LAUNCHER (EdgePad, float)
 
 DEFINE_PAD_KERNEL_LAUNCHER (EdgePad, double)
 
template<typename T >
void _ColwiseReduceSum (const int rows, const int cols, const float scale, const T *x, T *y)
 
template<typename T >
void _RowwiseReduceSum (const int rows, const int cols, const float scale, const T *x, T *y)
 
template<typename T >
void _GenericReduceSum (const int outer_dim, const int inner_dim, const int ndims, const int *x_strides, const int *y_dims, const float scale, const T *x, T *y)
 
template<typename T >
void _GenericReduceSumLauncher (const int outer_dim, const int inner_dim, const int ndims, const int *dims, const int *axes, const float scale, const T *x, T *y)
 
template<typename T >
void _ReduceSum (const int num_dims, const int *dims, const int num_axes, const int *axes, const float scale, const T *x, T *y)
 
 DEFINE_REDUCE_SUM_KERNEL_LAUNCHER (int8_t)
 
 DEFINE_REDUCE_SUM_KERNEL_LAUNCHER (uint8_t)
 
 DEFINE_REDUCE_SUM_KERNEL_LAUNCHER (int)
 
 DEFINE_REDUCE_SUM_KERNEL_LAUNCHER (int64_t)
 
 DEFINE_REDUCE_SUM_KERNEL_LAUNCHER (float)
 
 DEFINE_REDUCE_SUM_KERNEL_LAUNCHER (double)
 
template<>
void ReduceSum< float16, CPUContext > (const int num_dims, const int *dims, const int num_axes, const int *axes, const float scale, const float16 *x, float16 *y, CPUContext *ctx)
 
template<typename T >
void _ReduceSumGrad (const int nthreads, const int ndims, const int *x_dims, const int *y_dims, const int *y_strides, const float scale, const T *dy, T *dx)
 
 DEFINE_REDUCE_SUM_GRAD_KERNEL_LAUNCHER (int8_t)
 
 DEFINE_REDUCE_SUM_GRAD_KERNEL_LAUNCHER (uint8_t)
 
 DEFINE_REDUCE_SUM_GRAD_KERNEL_LAUNCHER (int)
 
 DEFINE_REDUCE_SUM_GRAD_KERNEL_LAUNCHER (int64_t)
 
 DEFINE_REDUCE_SUM_GRAD_KERNEL_LAUNCHER (float)
 
 DEFINE_REDUCE_SUM_GRAD_KERNEL_LAUNCHER (double)
 
template<>
void ReduceSumGrad< float16, CPUContext > (const int count, const int ndims, const int *x_dims, const int *y_dims, const int *y_strides, const float scale, const float16 *dy, float16 *dx, CPUContext *ctx)
 
template<typename T >
void _Repeat (const int outer_dim, const int inner_dim, const int axis_dim, const int repeats, const T *x, T *y, CPUContext *ctx)
 
template<typename T >
void _RepeatGrad (const int outer_dim, const int inner_dim, const int axis_dim, const int repeats, const T *dy, T *dx, CPUContext *ctx)
 
 DEFINE_REPEAT_KERNEL_LAUNCHER (Repeat, bool)
 
 DEFINE_REPEAT_KERNEL_LAUNCHER (Repeat, int8_t)
 
 DEFINE_REPEAT_KERNEL_LAUNCHER (Repeat, uint8_t)
 
 DEFINE_REPEAT_KERNEL_LAUNCHER (Repeat, int)
 
 DEFINE_REPEAT_KERNEL_LAUNCHER (Repeat, int64_t)
 
 DEFINE_REPEAT_KERNEL_LAUNCHER (Repeat, float16)
 
 DEFINE_REPEAT_KERNEL_LAUNCHER (Repeat, float)
 
 DEFINE_REPEAT_KERNEL_LAUNCHER (Repeat, double)
 
 DEFINE_REPEAT_KERNEL_LAUNCHER (RepeatGrad, int8_t)
 
 DEFINE_REPEAT_KERNEL_LAUNCHER (RepeatGrad, uint8_t)
 
 DEFINE_REPEAT_KERNEL_LAUNCHER (RepeatGrad, int)
 
 DEFINE_REPEAT_KERNEL_LAUNCHER (RepeatGrad, int64_t)
 
 DEFINE_REPEAT_KERNEL_LAUNCHER (RepeatGrad, float16)
 
 DEFINE_REPEAT_KERNEL_LAUNCHER (RepeatGrad, float)
 
 DEFINE_REPEAT_KERNEL_LAUNCHER (RepeatGrad, double)
 
template<typename T >
void _Slice (const int outer_dim, const int inner_dim, const int axis_dim, const int slice_dim, const int slice_ofs, const T *x, T *y, CPUContext *ctx)
 
template<typename T >
void _SliceGrad (const int outer_dim, const int inner_dim, const int axis_dim, const int slice_dim, const int slice_ofs, const T *dy, T *dx, CPUContext *ctx)
 
 DEFINE_SLICE_KERNEL_LAUNCHER (Slice, bool)
 
 DEFINE_SLICE_KERNEL_LAUNCHER (Slice, int8_t)
 
 DEFINE_SLICE_KERNEL_LAUNCHER (Slice, uint8_t)
 
 DEFINE_SLICE_KERNEL_LAUNCHER (Slice, int)
 
 DEFINE_SLICE_KERNEL_LAUNCHER (Slice, int64_t)
 
 DEFINE_SLICE_KERNEL_LAUNCHER (Slice, float16)
 
 DEFINE_SLICE_KERNEL_LAUNCHER (Slice, float)
 
 DEFINE_SLICE_KERNEL_LAUNCHER (Slice, double)
 
 DEFINE_SLICE_KERNEL_LAUNCHER (SliceGrad, bool)
 
 DEFINE_SLICE_KERNEL_LAUNCHER (SliceGrad, int8_t)
 
 DEFINE_SLICE_KERNEL_LAUNCHER (SliceGrad, uint8_t)
 
 DEFINE_SLICE_KERNEL_LAUNCHER (SliceGrad, int)
 
 DEFINE_SLICE_KERNEL_LAUNCHER (SliceGrad, int64_t)
 
 DEFINE_SLICE_KERNEL_LAUNCHER (SliceGrad, float16)
 
 DEFINE_SLICE_KERNEL_LAUNCHER (SliceGrad, float)
 
 DEFINE_SLICE_KERNEL_LAUNCHER (SliceGrad, double)
 
template<typename T >
void _Tile (const int nthreads, const int ndims, const int *x_dims, const int *x_strides, const int *y_dims, const T *x, T *y)
 
template<typename T >
void _TileGrad (const int rows, const int cols, const int multiple, const T *dy, T *dx, CPUContext *ctx)
 
 DEFINE_TILE_KERNEL_LAUNCHER (bool)
 
 DEFINE_TILE_KERNEL_LAUNCHER (int8_t)
 
 DEFINE_TILE_KERNEL_LAUNCHER (uint8_t)
 
 DEFINE_TILE_KERNEL_LAUNCHER (int)
 
 DEFINE_TILE_KERNEL_LAUNCHER (int64_t)
 
 DEFINE_TILE_KERNEL_LAUNCHER (float16)
 
 DEFINE_TILE_KERNEL_LAUNCHER (float)
 
 DEFINE_TILE_KERNEL_LAUNCHER (double)
 
 DEFINE_TILE_GRAD_KERNEL_LAUNCHER (int8_t)
 
 DEFINE_TILE_GRAD_KERNEL_LAUNCHER (uint8_t)
 
 DEFINE_TILE_GRAD_KERNEL_LAUNCHER (int)
 
 DEFINE_TILE_GRAD_KERNEL_LAUNCHER (int64_t)
 
 DEFINE_TILE_GRAD_KERNEL_LAUNCHER (float16)
 
 DEFINE_TILE_GRAD_KERNEL_LAUNCHER (float)
 
 DEFINE_TILE_GRAD_KERNEL_LAUNCHER (double)
 
template<typename T >
void _Transpose (const int nthreads, const int ndims, const int *x_strides, const int *y_dims, const T *x, T *y)
 
template<typename T >
void _TransposeGrad (const int nthreads, const int ndims, const int *x_strides, const int *y_dims, const T *dy, T *dx)
 
 DEFINE_TRANSPOSE_KERNEL_LAUNCHER (Transpose, bool)
 
 DEFINE_TRANSPOSE_KERNEL_LAUNCHER (Transpose, int8_t)
 
 DEFINE_TRANSPOSE_KERNEL_LAUNCHER (Transpose, uint8_t)
 
 DEFINE_TRANSPOSE_KERNEL_LAUNCHER (Transpose, int)
 
 DEFINE_TRANSPOSE_KERNEL_LAUNCHER (Transpose, int64_t)
 
 DEFINE_TRANSPOSE_KERNEL_LAUNCHER (Transpose, float16)
 
 DEFINE_TRANSPOSE_KERNEL_LAUNCHER (Transpose, float)
 
 DEFINE_TRANSPOSE_KERNEL_LAUNCHER (Transpose, double)
 
 DEFINE_TRANSPOSE_KERNEL_LAUNCHER (TransposeGrad, bool)
 
 DEFINE_TRANSPOSE_KERNEL_LAUNCHER (TransposeGrad, int8_t)
 
 DEFINE_TRANSPOSE_KERNEL_LAUNCHER (TransposeGrad, uint8_t)
 
 DEFINE_TRANSPOSE_KERNEL_LAUNCHER (TransposeGrad, int)
 
 DEFINE_TRANSPOSE_KERNEL_LAUNCHER (TransposeGrad, int64_t)
 
 DEFINE_TRANSPOSE_KERNEL_LAUNCHER (TransposeGrad, float16)
 
 DEFINE_TRANSPOSE_KERNEL_LAUNCHER (TransposeGrad, float)
 
 DEFINE_TRANSPOSE_KERNEL_LAUNCHER (TransposeGrad, double)
 
template<typename T >
void _Where (const int count, const uint8_t *mask, const T *a, const T *b, T *y)
 
template<typename T >
void _WhereGrad (const int count, const uint8_t *mask, const T *dy, T *da, T *db)
 
 DEFINE_WHERE_KERNEL_LAUNCHER (bool)
 
 DEFINE_WHERE_KERNEL_LAUNCHER (int8_t)
 
 DEFINE_WHERE_KERNEL_LAUNCHER (uint8_t)
 
 DEFINE_WHERE_KERNEL_LAUNCHER (int)
 
 DEFINE_WHERE_KERNEL_LAUNCHER (int64_t)
 
 DEFINE_WHERE_KERNEL_LAUNCHER (float16)
 
 DEFINE_WHERE_KERNEL_LAUNCHER (float)
 
 DEFINE_WHERE_KERNEL_LAUNCHER (double)
 
 DEFINE_WHERE_GRAD_KERNEL_LAUNCHER (bool)
 
 DEFINE_WHERE_GRAD_KERNEL_LAUNCHER (int8_t)
 
 DEFINE_WHERE_GRAD_KERNEL_LAUNCHER (uint8_t)
 
 DEFINE_WHERE_GRAD_KERNEL_LAUNCHER (int)
 
 DEFINE_WHERE_GRAD_KERNEL_LAUNCHER (int64_t)
 
 DEFINE_WHERE_GRAD_KERNEL_LAUNCHER (float)
 
 DEFINE_WHERE_GRAD_KERNEL_LAUNCHER (double)
 
template<>
void WhereGrad< float16, CPUContext > (const int count, const uint8_t *mask, const float16 *dy, float16 *da, float16 *db, CPUContext *ctx)
 
template<typename T >
void _Assign (const int count, const int ndims, const int *x_dims, const int *y_strides, const int *starts, const T *x, T *y)
 
 DEFINE_ASSIGN_KERNEL_LAUNCHER (bool)
 
 DEFINE_ASSIGN_KERNEL_LAUNCHER (int8_t)
 
 DEFINE_ASSIGN_KERNEL_LAUNCHER (uint8_t)
 
 DEFINE_ASSIGN_KERNEL_LAUNCHER (int)
 
 DEFINE_ASSIGN_KERNEL_LAUNCHER (int64_t)
 
 DEFINE_ASSIGN_KERNEL_LAUNCHER (float16)
 
 DEFINE_ASSIGN_KERNEL_LAUNCHER (float)
 
 DEFINE_ASSIGN_KERNEL_LAUNCHER (double)
 
 DEFINE_NOTZERO_KERNEL_LAUNCHER (bool)
 
 DEFINE_NOTZERO_KERNEL_LAUNCHER (int8_t)
 
 DEFINE_NOTZERO_KERNEL_LAUNCHER (uint8_t)
 
 DEFINE_NOTZERO_KERNEL_LAUNCHER (int)
 
 DEFINE_NOTZERO_KERNEL_LAUNCHER (int64_t)
 
 DEFINE_NOTZERO_KERNEL_LAUNCHER (float)
 
 DEFINE_NOTZERO_KERNEL_LAUNCHER (double)
 
 DEFINE_COMPARE_KERNEL_LAUNCHER (bool, Equal,==)
 
 DEFINE_COMPARE_KERNEL_LAUNCHER (int8_t, Equal,==)
 
 DEFINE_COMPARE_KERNEL_LAUNCHER (uint8_t, Equal,==)
 
 DEFINE_COMPARE_KERNEL_LAUNCHER (int, Equal,==)
 
 DEFINE_COMPARE_KERNEL_LAUNCHER (int64_t, Equal,==)
 
 DEFINE_COMPARE_KERNEL_LAUNCHER (float, Equal,==)
 
 DEFINE_COMPARE_KERNEL_LAUNCHER (double, Equal,==)
 
 DEFINE_COMPARE_KERNEL_LAUNCHER (bool, NotEqual, !=)
 
 DEFINE_COMPARE_KERNEL_LAUNCHER (int8_t, NotEqual, !=)
 
 DEFINE_COMPARE_KERNEL_LAUNCHER (uint8_t, NotEqual, !=)
 
 DEFINE_COMPARE_KERNEL_LAUNCHER (int, NotEqual, !=)
 
 DEFINE_COMPARE_KERNEL_LAUNCHER (int64_t, NotEqual, !=)
 
 DEFINE_COMPARE_KERNEL_LAUNCHER (float, NotEqual, !=)
 
 DEFINE_COMPARE_KERNEL_LAUNCHER (double, NotEqual, !=)
 
 DEFINE_COMPARE_KERNEL_LAUNCHER (bool, Less,<)
 
 DEFINE_COMPARE_KERNEL_LAUNCHER (int8_t, Less,<)
 
 DEFINE_COMPARE_KERNEL_LAUNCHER (uint8_t, Less,<)
 
 DEFINE_COMPARE_KERNEL_LAUNCHER (int, Less,<)
 
 DEFINE_COMPARE_KERNEL_LAUNCHER (int64_t, Less,<)
 
 DEFINE_COMPARE_KERNEL_LAUNCHER (float, Less,<)
 
 DEFINE_COMPARE_KERNEL_LAUNCHER (double, Less,<)
 
 DEFINE_COMPARE_KERNEL_LAUNCHER (bool, LessEqual,<=)
 
 DEFINE_COMPARE_KERNEL_LAUNCHER (int8_t, LessEqual,<=)
 
 DEFINE_COMPARE_KERNEL_LAUNCHER (uint8_t, LessEqual,<=)
 
 DEFINE_COMPARE_KERNEL_LAUNCHER (int, LessEqual,<=)
 
 DEFINE_COMPARE_KERNEL_LAUNCHER (int64_t, LessEqual,<=)
 
 DEFINE_COMPARE_KERNEL_LAUNCHER (float, LessEqual,<=)
 
 DEFINE_COMPARE_KERNEL_LAUNCHER (double, LessEqual,<=)
 
 DEFINE_COMPARE_KERNEL_LAUNCHER (bool, Greater, >)
 
 DEFINE_COMPARE_KERNEL_LAUNCHER (int8_t, Greater, >)
 
 DEFINE_COMPARE_KERNEL_LAUNCHER (uint8_t, Greater, >)
 
 DEFINE_COMPARE_KERNEL_LAUNCHER (int, Greater, >)
 
 DEFINE_COMPARE_KERNEL_LAUNCHER (int64_t, Greater, >)
 
 DEFINE_COMPARE_KERNEL_LAUNCHER (float, Greater, >)
 
 DEFINE_COMPARE_KERNEL_LAUNCHER (double, Greater, >)
 
 DEFINE_COMPARE_KERNEL_LAUNCHER (bool, GreaterEqual, >=)
 
 DEFINE_COMPARE_KERNEL_LAUNCHER (int8_t, GreaterEqual, >=)
 
 DEFINE_COMPARE_KERNEL_LAUNCHER (uint8_t, GreaterEqual, >=)
 
 DEFINE_COMPARE_KERNEL_LAUNCHER (int, GreaterEqual, >=)
 
 DEFINE_COMPARE_KERNEL_LAUNCHER (int64_t, GreaterEqual, >=)
 
 DEFINE_COMPARE_KERNEL_LAUNCHER (float, GreaterEqual, >=)
 
 DEFINE_COMPARE_KERNEL_LAUNCHER (double, GreaterEqual, >=)
 
template<>
void NotZero< float16, CPUContext > (const int count, const float16 *x, bool *y, CPUContext *ctx)
 
template<>
void Equal< float16, CPUContext > (const int count, const float16 *a, const float16 *b, bool *y, CPUContext *ctx)
 
template<>
void NotEqual< float16, CPUContext > (const int count, const float16 *a, const float16 *b, bool *y, CPUContext *ctx)
 
template<>
void Less< float16, CPUContext > (const int count, const float16 *a, const float16 *b, bool *y, CPUContext *ctx)
 
template<>
void LessEqual< float16, CPUContext > (const int count, const float16 *a, const float16 *b, bool *y, CPUContext *ctx)
 
template<>
void Greater< float16, CPUContext > (const int count, const float16 *a, const float16 *b, bool *y, CPUContext *ctx)
 
template<>
void GreaterEqual< float16, CPUContext > (const int count, const float16 *a, const float16 *b, bool *y, CPUContext *ctx)
 
template<>
void AbsGrad< float, CPUContext > (const int count, const float *dy, float *dx, CPUContext *ctx)
 
template<typename Tx , typename Ty >
void _NLLLoss (const int outer_dim, const int axis_dim, const int inner_dim, const int nignores, const int *ignore, const Tx *log_prob, const Ty *target, Tx *loss, int *flag)
 
template<>
void NLLLoss< float, float, CPUContext > (const int outer_dim, const int axis_dim, const int inner_dim, const int nignores, const int *ignore, const float *log_prob, const float *target, float *loss, int *flag, CPUContext *ctx)
 
template<>
void NLLLoss< float, int64_t, CPUContext > (const int outer_dim, const int axis_dim, const int inner_dim, const int nignores, const int *ignore, const float *log_prob, const int64_t *target, float *loss, int *flag, CPUContext *ctx)
 
template<typename Tx , typename Ty >
void _NLLLossGrad (const int outer_dim, const int axis_dim, const int inner_dim, const int nignores, const int *ignore, const Tx *log_prob, const Ty *target, Tx *dx, int *flag)
 
template<>
void NLLLossGrad< float, float, CPUContext > (const int outer_dim, const int axis_dim, const int inner_dim, const int nignores, const int *ignore, const float *log_prob, const float *target, float *dx, int *flag, CPUContext *ctx)
 
template<>
void NLLLossGrad< float, int64_t, CPUContext > (const int outer_dim, const int axis_dim, const int inner_dim, const int nignores, const int *ignore, const float *log_prob, const int64_t *target, float *dx, int *flag, CPUContext *ctx)
 
template<>
void SigmoidCrossEntropy< float, CPUContext > (const int count, const float *logit, const float *target, float *loss, int *flag, CPUContext *ctx)
 
template<>
void SigmoidCrossEntropyGrad< float, CPUContext > (const int count, const float *logit, const float *target, float *dlogit, int *flag, CPUContext *ctx)
 
template<typename Tx , typename Ty >
void _SigmoidFocalLoss (const int outer_dim, const int axis_dim, const int inner_dim, const float pos_alpha, const float neg_alpha, const float gamma, const int neg_id, const Tx *logits, const Ty *targets, Tx *losses, int *flags)
 
template<>
void SigmoidFocalLoss< float, float, CPUContext > (const int outer_dim, const int axis_dim, const int inner_dim, const float pos_alpha, const float neg_alpha, const float gamma, const int neg_id, const float *logits, const float *targets, float *losses, int *flags, CPUContext *ctx)
 
template<>
void SigmoidFocalLoss< float, int64_t, CPUContext > (const int outer_dim, const int axis_dim, const int inner_dim, const float pos_alpha, const float neg_alpha, const float gamma, const int neg_id, const float *logits, const int64_t *targets, float *losses, int *flags, CPUContext *ctx)
 
template<typename Tx , typename Ty >
void _SigmoidFocalLossGrad (const int outer_dim, const int axis_dim, const int inner_dim, const float pos_alpha, const float neg_alpha, const float gamma, const int neg_id, const Tx *logits, const Ty *targets, Tx *dlogits, int *flags)
 
template<>
void SigmoidFocalLossGrad< float, float, CPUContext > (const int outer_dim, const int axis_dim, const int inner_dim, const float pos_alpha, const float neg_alpha, const float gamma, const int neg_id, const float *logits, const float *targets, float *dlogits, int *flags, CPUContext *ctx)
 
template<>
void SigmoidFocalLossGrad< float, int64_t, CPUContext > (const int outer_dim, const int axis_dim, const int inner_dim, const float pos_alpha, const float neg_alpha, const float gamma, const int neg_id, const float *logits, const int64_t *targets, float *dlogits, int *flags, CPUContext *ctx)
 
template<>
void SmoothL1< float, CPUContext > (const int count, const float beta, const float *x, float *y, CPUContext *ctx)
 
template<>
void SmoothL1Grad< float, CPUContext > (const int count, const float beta, const float *dy, float *dx, CPUContext *ctx)
 
template<>
void SoftmaxCrossEntropy< float, CPUContext > (const int count, const float *prob, const float *targets, float *losses, CPUContext *ctx)
 
template<typename Tx , typename Ty >
void _SoftmaxFocalLoss (const int outer_dim, const int axis_dim, const int inner_dim, const float pos_alpha, const float neg_alpha, const float gamma, const int neg_id, const int nignores, const int *ignores, const Tx *prob, const Ty *labels, Tx *losses, int *flags)
 
template<>
void SoftmaxFocalLoss< float, float, CPUContext > (const int outer_dim, const int axis_dim, const int inner_dim, const float pos_alpha, const float neg_alpha, const float gamma, const int neg_id, const int nignores, const int *ignores, const float *prob, const float *labels, float *losses, int *flags, CPUContext *ctx)
 
template<>
void SoftmaxFocalLoss< float, int64_t, CPUContext > (const int outer_dim, const int axis_dim, const int inner_dim, const float pos_alpha, const float neg_alpha, const float gamma, const int neg_id, const int nignores, const int *ignores, const float *prob, const int64_t *labels, float *losses, int *flags, CPUContext *ctx)
 
template<typename Tx , typename Ty >
void _SoftmaxFocalLossGrad (const int outer_dim, const int axis_dim, const int inner_dim, const float pos_alpha, const float neg_alpha, const float gamma, const int neg_id, const int nignores, const int *ignores, const Tx *prob, const Ty *labels, Tx *dx, int *flags)
 
template<>
void SoftmaxFocalLossGrad< float, float, CPUContext > (const int outer_dim, const int axis_dim, const int inner_dim, const float pos_alpha, const float neg_alpha, const float gamma, const int neg_id, const int nignores, const int *ignores, const float *prob, const float *labels, float *dx, int *flags, CPUContext *ctx)
 
template<>
void SoftmaxFocalLossGrad< float, int64_t, CPUContext > (const int outer_dim, const int axis_dim, const int inner_dim, const float pos_alpha, const float neg_alpha, const float gamma, const int neg_id, const int nignores, const int *ignores, const float *prob, const int64_t *labels, float *dx, int *flags, CPUContext *ctx)
 
template<typename Tx , typename Ty >
void _SparseSoftmaxCrossEntropy (const int outer_dim, const int axis_dim, const int inner_dim, const int nignores, const int *ignore, const Tx *prob, const Ty *target, Tx *loss, int *flag)
 
template<>
void SparseSoftmaxCrossEntropy< float, float, CPUContext > (const int outer_dim, const int axis_dim, const int inner_dim, const int nignores, const int *ignore, const float *prob, const float *target, float *loss, int *flag, CPUContext *ctx)
 
template<>
void SparseSoftmaxCrossEntropy< float, int64_t, CPUContext > (const int outer_dim, const int axis_dim, const int inner_dim, const int nignores, const int *ignore, const float *prob, const int64_t *target, float *loss, int *flag, CPUContext *ctx)
 
template<typename Tx , typename Ty >
void _SparseSoftmaxCrossEntropyGrad (const int outer_dim, const int axis_dim, const int inner_dim, const int nignores, const int *ignore, const Tx *prob, const Ty *target, Tx *dx, int *flag)
 
template<>
void SparseSoftmaxCrossEntropyGrad< float, float, CPUContext > (const int outer_dim, const int axis_dim, const int inner_dim, const int nignores, const int *ignore, const float *prob, const float *target, float *dx, int *flag, CPUContext *ctx)
 
template<>
void SparseSoftmaxCrossEntropyGrad< float, int64_t, CPUContext > (const int outer_dim, const int axis_dim, const int inner_dim, const int nignores, const int *ignore, const float *prob, const int64_t *target, float *dx, int *flag, CPUContext *ctx)
 
template<typename Ta , typename Tb >
void _TypeA2B (const int count, const Ta *a, Tb *b)
 
 DEFINE_TYPE_A_TO_B (float16, float)
 
 DEFINE_TYPE_A_TO_B (float, float16)
 
 DEFINE_TYPE_A_TO_B (float16, float16)
 
 DEFINE_TYPE_A_TO_ALL (bool)
 
 DEFINE_TYPE_FP16_DISABLED (bool)
 
 DEFINE_TYPE_A_TO_ALL (uint8_t)
 
 DEFINE_TYPE_FP16_DISABLED (uint8_t)
 
 DEFINE_TYPE_A_TO_ALL (int8_t)
 
 DEFINE_TYPE_FP16_DISABLED (int8_t)
 
 DEFINE_TYPE_A_TO_ALL (int)
 
 DEFINE_TYPE_FP16_DISABLED (int)
 
 DEFINE_TYPE_A_TO_ALL (int64_t)
 
 DEFINE_TYPE_FP16_DISABLED (int64_t)
 
 DEFINE_TYPE_A_TO_ALL (float)
 
 DEFINE_TYPE_A_TO_ALL (double)
 
 DEFINE_TYPE_FP16_DISABLED (double)
 
template<typename T >
void _GradientTwoSum (const int count, const T *dy1, const T *dy2, T *dx)
 
 DEFINE_GRAD_SUM2_KERNEL_LAUNCHER (int8_t)
 
 DEFINE_GRAD_SUM2_KERNEL_LAUNCHER (uint8_t)
 
 DEFINE_GRAD_SUM2_KERNEL_LAUNCHER (int)
 
 DEFINE_GRAD_SUM2_KERNEL_LAUNCHER (int64_t)
 
 DEFINE_GRAD_SUM2_KERNEL_LAUNCHER (float)
 
 DEFINE_GRAD_SUM2_KERNEL_LAUNCHER (double)
 
template<>
void GradientTwoSum< float16, CPUContext > (const int count, const float16 *dy1, const float16 *dy2, float16 *dx, CPUContext *ctx)
 
template<typename Tx , typename Ty >
void _ImageDataNCHW (const int N, const int C, const int H, const int W, const float *mean, const float *std, const Tx *x, Ty *y)
 
template<typename Tx , typename Ty >
void _ImageDataNHWC (const int N, const int C, const int H, const int W, const float *mean, const float *std, const Tx *x, Ty *y)
 
template<>
void ImageData< float, float, CPUContext > (const int N, const int C, const int H, const int W, const string &data_format, const float *mean, const float *std, const float *x, float *y, CPUContext *ctx)
 
template<>
void ImageData< uint8_t, float, CPUContext > (const int N, const int C, const int H, const int W, const string &data_format, const float *mean, const float *std, const uint8_t *x, float *y, CPUContext *ctx)
 
template<>
void ImageData< float, float16, CPUContext > (const int N, const int C, const int H, const int W, const string &data_format, const float *mean, const float *std, const float *x, float16 *y, CPUContext *ctx)
 
template<>
void ImageData< uint8_t, float16, CPUContext > (const int N, const int C, const int H, const int W, const string &data_format, const float *mean, const float *std, const uint8_t *x, float16 *y, CPUContext *ctx)
 
template<typename Tx , typename Tp , StorageOrder kOrder>
void _BatchNormInternalGrad (const std::array< int, 3 > &dims, const Tx *x, const Tp *mu, const Tp *rsig, const Tp *gamma, const Tx *dy, Tp *ds, Tp *db, Tp *dgamma, Tp *dbeta)
 
template<typename Tx , typename Tp , StorageOrder kOrder>
void _BatchNormTrainingGrad (const std::array< int, 3 > &dims, const Tx *x, const Tp *mu, const Tp *rsig, const Tp *gamma, const Tp *ds, const Tp *db, const Tx *dy, Tx *dx)
 
template<typename Tx , typename Tp , StorageOrder kOrder>
void _BatchNormWGrad (const std::array< int, 3 > &dims, const Tx *x, const Tp *mu, const Tp *rsig, const Tx *dy, Tp *dgamma, Tp *dbeta)
 
template<typename Tx , typename Tp , StorageOrder kOrder>
void _BatchNormInferenceGrad (const int N, const int C, const int S, const Tp *rsig, const Tp *gamma, const Tx *dy, Tx *dx)
 
 DEFINE_BACKWARD_KERNEL_LAUNCHER (float, float)
 
template<typename T >
void _GroupNormFusedParams (const int N, const int G, const int D, const T *mu, const T *rsig, const T *gamma, const T *beta, T *scale, T *bias)
 
template<typename Tx , typename Tp >
void _GroupNormForwardNCHW (const int N, const int C, const int S, const Tx *x, const Tp *scale, const Tp *bias, Tx *y)
 
template<typename Tx , typename Tp >
void _GroupNormForwardNHWC (const int N, const int C, const int S, const Tx *x, const Tp *scale, const Tp *bias, Tx *y)
 
template<typename Tx , typename Tp , StorageOrder kOrder>
void _GroupNormInternalGrad (const std::array< int, 4 > &dims, const Tx *x, const Tp *gamma, const Tx *dy, Tp *ds, Tp *db)
 
template<typename Tx , typename Tp , StorageOrder kOrder>
void _GroupNormGrad (const std::array< int, 4 > &dims, const Tx *x, const Tp *mu, const Tp *rsig, const Tp *gamma, const Tp *ds, const Tp *db, const Tx *dy, Tx *dx, Tp *dgamma, Tp *dbeta)
 
 DEFINE_FORWARD_KERNEL_LAUNCHER (float, float)
 
template<>
void GroupNormForward< float16, float, CPUContext > (const int N, const int G, const int D, const int S, const string &data_format, const float16 *x, const float *mu, const float *rsig, const float *gamma, const float *beta, float *scale, float *bias, float16 *y, CPUContext *ctx)
 
template<>
void GroupNormBackward< float16, float, CPUContext > (const int N, const int G, const int D, const int S, const string &data_format, const float16 *x, const float *mu, const float *rsig, const float *gamma, const float16 *dy, float *ds, float *db, float16 *dx, float *dgamma, float *dbeta, CPUContext *ctx)
 
template<typename T >
_s (T x)
 
template<>
void LSTMCell< float, CPUContext > (const int N, const int C, const float *cx, float *actx, float *c, float *h, CPUContext *ctx)
 
template<>
void LSTMCellGrad< float, CPUContext > (const int N, const int C, const float *cx, const float *actx, const float *c, const float *dc, const float *dh, float *dcx, float *dx, CPUContext *ctx)
 
template<>
void AdamUpdate< float, CPUContext > (const int count, const float lr, const float beta1, const float beta2, const float eps, float *g, float *m, float *v, CPUContext *ctx)
 
template<>
void MixedPrecL2Decay< float16, CPUContext > (const int count, const float alpha, const float16 *w, float *dx, CPUContext *ctx)
 
template<>
void MixedPrecUpdate< float16, CPUContext > (const int count, const float *updates, float16 *w, CPUContext *ctx)
 
template<>
void NesterovUpdate< float, CPUContext > (const int count, const float lr, const float momentum, float *g, float *h, CPUContext *ctx)
 
template<>
void RMSPropUpdate< float, CPUContext > (const int count, const float lr, const float decay, const float eps, float *g, float *h, CPUContext *ctx)
 
template<>
void SGDUpdate< float, CPUContext > (const int count, const float lr, const float momentum, float *g, float *h, CPUContext *ctx)
 
template<>
void BiasAdd< float, CPUContext > (const int outer_dim, const int axis_dim, const int inner_dim, const string &data_format, const float *bias, const float *multiplier, float *y, CPUContext *ctx)
 
template<typename T >
void _BilinearResizeNCHW (const int N, const int C, const int H, const int W, const int out_h, const int out_w, const float scale_h, const float scale_w, const T *x, T *y)
 
template<typename T >
void _BilinearResizeNHWC (const int N, const int C, const int H, const int W, const int out_h, const int out_w, const float scale_h, const float scale_w, const T *x, T *y)
 
template<>
void BilinearResize< float, CPUContext > (const int N, const int C, const int H, const int W, const int out_h, const int out_w, const string &data_format, const float *x, float *y, CPUContext *ctx)
 
template<typename T >
void _BilinearResizeGradNCHW (const int N, const int C, const int H, const int W, const int out_h, const int out_w, const float scale_h, const float scale_w, const T *dy, T *dx)
 
template<typename T >
void _BilinearResizeGradNHWC (const int N, const int C, const int H, const int W, const int out_h, const int out_w, const float scale_h, const float scale_w, const T *dy, T *dx)
 
template<>
void BilinearResizeGrad< float, CPUContext > (const int N, const int C, const int H, const int W, const int out_h, const int out_w, const string &data_format, const float *dy, float *dx, CPUContext *ctx)
 
bool less (int a, int b)
 
template<typename T >
void _Im2Col2dNCHW (const int C, const int H, const int W, const int out_h, const int out_w, const int kernel_h, const int kernel_w, const int stride_h, const int stride_w, const int pad_h, const int pad_w, const int dilation_h, const int dilation_w, const T *im, T *col)
 
template<typename T >
void _Im2Col2dNHWC (const int C, const int H, const int W, const int out_h, const int out_w, const int kernel_h, const int kernel_w, const int stride_h, const int stride_w, const int pad_h, const int pad_w, const int dilation_h, const int dilation_w, const T *im, T *col)
 
template<>
void Im2Col2d< float, CPUContext > (const int C, const int H, const int W, const int out_h, const int out_w, const int kernel_h, const int kernel_w, const int stride_h, const int stride_w, const int pad_h, const int pad_w, const int dilation_h, const int dilation_w, const string &data_format, const float *im, float *col, CPUContext *ctx)
 
template<typename T >
void _Col2Im2dNCHW (const int C, const int H, const int W, const int out_h, const int out_w, const int kernel_h, const int kernel_w, const int stride_h, const int stride_w, const int pad_h, const int pad_w, const int dilation_h, const int dilation_w, const T *col, T *im)
 
template<typename T >
void _Col2Im2dNHWC (const int C, const int H, const int W, const int out_h, const int out_w, const int kernel_h, const int kernel_w, const int stride_h, const int stride_w, const int pad_h, const int pad_w, const int dilation_h, const int dilation_w, const T *col, T *im)
 
template<>
void Col2Im2d< float, CPUContext > (const int C, const int H, const int W, const int out_h, const int out_w, const int kernel_h, const int kernel_w, const int stride_h, const int stride_w, const int pad_h, const int pad_w, const int dilation_h, const int dilation_w, const string &data_format, const float *col, float *im, CPUContext *ctx)
 
template<typename T >
void _DepthwiseConv2dNCHW (const int N, const int C, const int H, const int W, const int out_h, const int out_w, const int kernel_h, const int kernel_w, const int stride_h, const int stride_w, const int pad_h, const int pad_w, const int dilation_h, const int dilation_w, const T *x, const T *w, T *y)
 
template<typename T >
void _DepthwiseConv2dNHWC (const int N, const int C, const int H, const int W, const int out_h, const int out_w, const int kernel_h, const int kernel_w, const int stride_h, const int stride_w, const int pad_h, const int pad_w, const int dilation_h, const int dilation_w, const T *x, const T *w, T *y)
 
template<>
void DepthwiseConv2d< float, CPUContext > (const int N, const int C, const int H, const int W, const int out_h, const int out_w, const int kernel_h, const int kernel_w, const int stride_h, const int stride_w, const int pad_h, const int pad_w, const int dilation_h, const int dilation_w, const string &data_format, const float *x, const float *w, float *y, CPUContext *ctx)
 
template<>
void DepthwiseConv2dGrad< float, CPUContext > (const int N, const int C, const int H, const int W, const int out_h, const int out_w, const int kernel_h, const int kernel_w, const int stride_h, const int stride_w, const int pad_h, const int pad_w, const int dilation_h, const int dilation_w, const string &data_format, const float *dy, const float *w, float *dx, CPUContext *ctx)
 
template<>
void DepthwiseConv2dWGrad< float, CPUContext > (const int N, const int C, const int H, const int W, const int out_h, const int out_w, const int kernel_h, const int kernel_w, const int stride_h, const int stride_w, const int pad_h, const int pad_w, const int dilation_h, const int dilation_w, const string &data_format, const float *dy, const float *x, float *dw, CPUContext *ctx)
 
void _DropBlock2dNCHW (const int N, const int C, const int H, const int W, const int seed_h, const int seed_w, const int block_size, const uint32_t *seed, int *mask)
 
void _DropBlock2dNHWC (const int N, const int C, const int H, const int W, const int seed_h, const int seed_w, const int block_size, const uint32_t *seed, int *mask)
 
template<>
void DropBlock2d< CPUContext > (const int N, const int C, const int H, const int W, const int seed_h, const int seed_w, const int block_size, const float gamma, const string &data_format, uint32_t *seed, int *mask, CPUContext *ctx)
 
template<typename T >
void _NNResizeNCHW (const int N, const int C, const int H, const int W, const int out_h, const int out_w, const float scale_h, const float scale_w, const T *x, T *y)
 
template<typename T >
void _NNResizeNHWC (const int N, const int C, const int H, const int W, const int out_h, const int out_w, const float scale_h, const float scale_w, const T *x, T *y)
 
template<>
void NNResize< float, CPUContext > (const int N, const int C, const int H, const int W, const int out_h, const int out_w, const string &data_format, const float *x, float *y, CPUContext *ctx)
 
template<>
void NNResize< float16, CPUContext > (const int N, const int C, const int H, const int W, const int out_h, const int out_w, const string &data_format, const float16 *x, float16 *y, CPUContext *ctx)
 
template<typename T >
void _NNResizeGradNCHW (const int N, const int C, const int H, const int W, const int out_h, const int out_w, const float scale_h, const float scale_w, const T *dy, T *dx)
 
template<typename T >
void _NNResizeGradNHWC (const int N, const int C, const int H, const int W, const int out_h, const int out_w, const float scale_h, const float scale_w, const T *dy, T *dx)
 
template<>
void NNResizeGrad< float, CPUContext > (const int N, const int C, const int H, const int W, const int out_h, const int out_w, const string &data_format, const float *dy, float *dx, CPUContext *ctx)
 
template<typename T >
void _MaxPool2dNCHW (const int N, const int C, const int H, const int W, const int pool_h, const int pool_w, const int kernel_h, const int kernel_w, const int stride_h, const int stride_w, const int pad_h, const int pad_w, const T *x, int *mask, T *y)
 
template<typename T >
void _MaxPool2dNHWC (const int N, const int C, const int H, const int W, const int pool_h, const int pool_w, const int kernel_h, const int kernel_w, const int stride_h, const int stride_w, const int pad_h, const int pad_w, const T *x, int *mask, T *y)
 
template<>
void MaxPool2d< float, CPUContext > (const int N, const int C, const int H, const int W, const int pool_h, const int pool_w, const int kernel_h, const int kernel_w, const int stride_h, const int stride_w, const int pad_h, const int pad_w, const string &data_format, const float *x, int *mask, float *y, CPUContext *ctx)
 
template<typename T >
void _AvgPool2dNCHW (const int N, const int C, const int H, const int W, const int pool_h, const int pool_w, const int kernel_h, const int kernel_w, const int stride_h, const int stride_w, const int pad_h, const int pad_w, const T *x, T *y)
 
template<typename T >
void _AvgPool2dNHWC (const int N, const int C, const int H, const int W, const int pool_h, const int pool_w, const int kernel_h, const int kernel_w, const int stride_h, const int stride_w, const int pad_h, const int pad_w, const T *x, T *y)
 
template<>
void AvgPool2d< float, CPUContext > (const int N, const int C, const int H, const int W, const int pool_h, const int pool_w, const int kernel_h, const int kernel_w, const int stride_h, const int stride_w, const int pad_h, const int pad_w, const string &data_format, const float *x, float *y, CPUContext *ctx)
 
template<typename T >
void _MaxPool2dGradNCHW (const int N, const int C, const int H, const int W, const int pool_h, const int pool_w, const int kernel_h, const int kernel_w, const int stride_h, const int stride_w, const int pad_h, const int pad_w, const T *dy, const int *mask, T *dx, CPUContext *ctx)
 
template<typename T >
void _MaxPool2dGradNHWC (const int N, const int C, const int H, const int W, const int pool_h, const int pool_w, const int kernel_h, const int kernel_w, const int stride_h, const int stride_w, const int pad_h, const int pad_w, const T *dy, const int *mask, T *dx, CPUContext *ctx)
 
template<>
void MaxPool2dGrad< float, CPUContext > (const int N, const int C, const int H, const int W, const int pool_h, const int pool_w, const int kernel_h, const int kernel_w, const int stride_h, const int stride_w, const int pad_h, const int pad_w, const string &data_format, const float *dy, const int *mask, float *dx, CPUContext *ctx)
 
template<typename T >
void _AvgPool2dGradNCHW (const int N, const int C, const int H, const int W, const int pool_h, const int pool_w, const int kernel_h, const int kernel_w, const int stride_h, const int stride_w, const int pad_h, const int pad_w, const T *dy, T *dx, CPUContext *ctx)
 
template<typename T >
void _AvgPool2dGradNHWC (const int N, const int C, const int H, const int W, const int pool_h, const int pool_w, const int kernel_h, const int kernel_w, const int stride_h, const int stride_w, const int pad_h, const int pad_w, const T *dy, T *dx, CPUContext *ctx)
 
template<>
void AvgPool2dGrad< float, CPUContext > (const int N, const int C, const int H, const int W, const int pool_h, const int pool_w, const int kernel_h, const int kernel_w, const int stride_h, const int stride_w, const int pad_h, const int pad_w, const string &data_format, const float *dy, float *dx, CPUContext *ctx)
 
template<typename T >
_ROIAlignIntp (const T *X, const int H, const int W, T y, T x)
 
template<>
void ROIAlign< float, CPUContext > (const int C, const int H, const int W, const int pool_h, const int pool_w, const int num_rois, const float spatial_scale, const int sampling_ratio, const float *xdata, const float *rois, float *ydata, CPUContext *ctx)
 
template<>
void ROIAlign< float16, CPUContext > (const int C, const int H, const int W, const int pool_h, const int pool_w, const int num_rois, const float spatial_scale, const int sampling_ratio, const float16 *x, const float *rois, float16 *y, CPUContext *ctx)
 
template<>
void ROIAlignGrad< float, CPUContext > (const int C, const int H, const int W, const int pool_h, const int pool_w, const int num_rois, const float spatial_scale, const int sampling_ratio, const float *dy, const float *rois, float *dx, CPUContext *ctx)
 
template<>
void ROIPool< float, CPUContext > (const int C, const int H, const int W, const int pool_h, const int pool_w, const int num_rois, const float spatial_scale, const float *x, const float *rois, int *mask, float *y, CPUContext *ctx)
 
template<>
void ROIPool< float16, CPUContext > (const int C, const int H, const int W, const int pool_h, const int pool_w, const int num_rois, const float spatial_scale, const float16 *x, const float *rois, int *mask, float16 *y, CPUContext *ctx)
 
template<>
void ROIPoolGrad< float, CPUContext > (const int N, const int C, const int H, const int W, const int pool_h, const int pool_w, const int num_rois, const float spatial_scale, const float *dy, const float *rois, const int *mask, float *dx, CPUContext *ctx)
 

Function Documentation

◆ _ApplyMask()

template<typename Tx , typename Tm >
void dragon::kernel::_ApplyMask ( const int  count,
const float  scale,
const Tx *  x,
const Tm *  mask,
Tx *  y 
)

◆ _Arange()

template<typename T >
void dragon::kernel::_Arange ( const int  count,
const int  start,
const int  step,
T *  y 
)

◆ _ArgMax()

template<typename T >
void dragon::kernel::_ArgMax ( const int  outer_dim,
const int  inner_dim,
const int  axis_dim,
const int  top_k,
const T *  x,
int64_t *  indices,
T *  values 
)

◆ _ArgMin()

template<typename T >
void dragon::kernel::_ArgMin ( const int  outer_dim,
const int  inner_dim,
const int  axis_dim,
const int  top_k,
const T *  x,
int64_t *  indices,
T *  values 
)

◆ _Assign()

template<typename T >
void dragon::kernel::_Assign ( const int  count,
const int  ndims,
const int *  x_dims,
const int *  y_strides,
const int *  starts,
const T *  x,
T *  y 
)

◆ _AvgPool2dGradNCHW()

template<typename T >
void dragon::kernel::_AvgPool2dGradNCHW ( const int  N,
const int  C,
const int  H,
const int  W,
const int  pool_h,
const int  pool_w,
const int  kernel_h,
const int  kernel_w,
const int  stride_h,
const int  stride_w,
const int  pad_h,
const int  pad_w,
const T *  dy,
T *  dx,
CPUContext ctx 
)

◆ _AvgPool2dGradNHWC()

template<typename T >
void dragon::kernel::_AvgPool2dGradNHWC ( const int  N,
const int  C,
const int  H,
const int  W,
const int  pool_h,
const int  pool_w,
const int  kernel_h,
const int  kernel_w,
const int  stride_h,
const int  stride_w,
const int  pad_h,
const int  pad_w,
const T *  dy,
T *  dx,
CPUContext ctx 
)

◆ _AvgPool2dNCHW()

template<typename T >
void dragon::kernel::_AvgPool2dNCHW ( const int  N,
const int  C,
const int  H,
const int  W,
const int  pool_h,
const int  pool_w,
const int  kernel_h,
const int  kernel_w,
const int  stride_h,
const int  stride_w,
const int  pad_h,
const int  pad_w,
const T *  x,
T *  y 
)

◆ _AvgPool2dNHWC()

template<typename T >
void dragon::kernel::_AvgPool2dNHWC ( const int  N,
const int  C,
const int  H,
const int  W,
const int  pool_h,
const int  pool_w,
const int  kernel_h,
const int  kernel_w,
const int  stride_h,
const int  stride_w,
const int  pad_h,
const int  pad_w,
const T *  x,
T *  y 
)

◆ _BatchNormInferenceGrad()

template<typename Tx , typename Tp , StorageOrder kOrder>
void dragon::kernel::_BatchNormInferenceGrad ( const int  N,
const int  C,
const int  S,
const Tp *  rsig,
const Tp *  gamma,
const Tx *  dy,
Tx *  dx 
)

◆ _BatchNormInternalGrad()

template<typename Tx , typename Tp , StorageOrder kOrder>
void dragon::kernel::_BatchNormInternalGrad ( const std::array< int, 3 > &  dims,
const Tx *  x,
const Tp *  mu,
const Tp *  rsig,
const Tp *  gamma,
const Tx *  dy,
Tp *  ds,
Tp *  db,
Tp *  dgamma,
Tp *  dbeta 
)

◆ _BatchNormTrainingGrad()

template<typename Tx , typename Tp , StorageOrder kOrder>
void dragon::kernel::_BatchNormTrainingGrad ( const std::array< int, 3 > &  dims,
const Tx *  x,
const Tp *  mu,
const Tp *  rsig,
const Tp *  gamma,
const Tp *  ds,
const Tp *  db,
const Tx *  dy,
Tx *  dx 
)

◆ _BatchNormWGrad()

template<typename Tx , typename Tp , StorageOrder kOrder>
void dragon::kernel::_BatchNormWGrad ( const std::array< int, 3 > &  dims,
const Tx *  x,
const Tp *  mu,
const Tp *  rsig,
const Tx *  dy,
Tp *  dgamma,
Tp *  dbeta 
)

◆ _BilinearResizeGradNCHW()

template<typename T >
void dragon::kernel::_BilinearResizeGradNCHW ( const int  N,
const int  C,
const int  H,
const int  W,
const int  out_h,
const int  out_w,
const float  scale_h,
const float  scale_w,
const T *  dy,
T *  dx 
)

◆ _BilinearResizeGradNHWC()

template<typename T >
void dragon::kernel::_BilinearResizeGradNHWC ( const int  N,
const int  C,
const int  H,
const int  W,
const int  out_h,
const int  out_w,
const float  scale_h,
const float  scale_w,
const T *  dy,
T *  dx 
)

◆ _BilinearResizeNCHW()

template<typename T >
void dragon::kernel::_BilinearResizeNCHW ( const int  N,
const int  C,
const int  H,
const int  W,
const int  out_h,
const int  out_w,
const float  scale_h,
const float  scale_w,
const T *  x,
T *  y 
)

◆ _BilinearResizeNHWC()

template<typename T >
void dragon::kernel::_BilinearResizeNHWC ( const int  N,
const int  C,
const int  H,
const int  W,
const int  out_h,
const int  out_w,
const float  scale_h,
const float  scale_w,
const T *  x,
T *  y 
)

◆ _BroadcastMaximum()

template<typename T >
void dragon::kernel::_BroadcastMaximum ( const int  count,
const T *  a,
const T  b,
T *  y 
)

◆ _BroadcastMaximumGrad()

template<typename T >
void dragon::kernel::_BroadcastMaximumGrad ( const int  count,
const T *  a,
const T  b,
const T *  dy,
T *  da,
T *  db 
)

◆ _BroadcastMinimum()

template<typename T >
void dragon::kernel::_BroadcastMinimum ( const int  count,
const T *  a,
const T  b,
T *  y 
)

◆ _BroadcastMinimumGrad()

template<typename T >
void dragon::kernel::_BroadcastMinimumGrad ( const int  count,
const T *  a,
const T  b,
const T *  dy,
T *  da,
T *  db 
)

◆ _ChannelShuffle()

template<typename T >
void dragon::kernel::_ChannelShuffle ( const int  outer_dim,
const int  inner_dim,
const int  G,
const int  K,
const T *  x,
T *  y,
CPUContext ctx 
)

◆ _Clip()

template<typename T >
void dragon::kernel::_Clip ( const int  count,
const T  low,
const T  high,
const T *  x,
T *  y 
)

◆ _ClipGrad()

template<typename T >
void dragon::kernel::_ClipGrad ( const int  count,
const T  low,
const T  high,
const T *  x,
const T *  dy,
T *  dx 
)

◆ _Col2Im2dNCHW()

template<typename T >
void dragon::kernel::_Col2Im2dNCHW ( const int  C,
const int  H,
const int  W,
const int  out_h,
const int  out_w,
const int  kernel_h,
const int  kernel_w,
const int  stride_h,
const int  stride_w,
const int  pad_h,
const int  pad_w,
const int  dilation_h,
const int  dilation_w,
const T *  col,
T *  im 
)

◆ _Col2Im2dNHWC()

template<typename T >
void dragon::kernel::_Col2Im2dNHWC ( const int  C,
const int  H,
const int  W,
const int  out_h,
const int  out_w,
const int  kernel_h,
const int  kernel_w,
const int  stride_h,
const int  stride_w,
const int  pad_h,
const int  pad_w,
const int  dilation_h,
const int  dilation_w,
const T *  col,
T *  im 
)

◆ _ColwiseMoments()

template<typename Tx , typename Ty >
void dragon::kernel::_ColwiseMoments ( const int  rows,
const int  cols,
const Tx *  x,
Ty *  mean,
Ty *  var 
)

◆ _ColwiseReduceSum()

template<typename T >
void dragon::kernel::_ColwiseReduceSum ( const int  rows,
const int  cols,
const float  scale,
const T *  x,
T *  y 
)

◆ _Concat()

template<typename T >
void dragon::kernel::_Concat ( const int  outer_dim,
const int  inner_dim,
const int  axis_dim,
const int  cat_dim,
const int  cat_ofs,
const T *  x,
T *  y,
CPUContext ctx 
)

◆ _ConstPad()

template<typename T >
void dragon::kernel::_ConstPad ( const int  nthreads,
const int  ndims,
const int *  x_dims,
const int *  x_strides,
const int *  y_dims,
const int *  l_pads,
const T  value,
const T *  x,
T *  y 
)

◆ _Crop()

template<typename T >
void dragon::kernel::_Crop ( const int  count,
const int  ndims,
const int *  x_strides,
const int *  y_dims,
const int *  starts,
const T *  x,
T *  y 
)

◆ _CropGrad()

template<typename T >
void dragon::kernel::_CropGrad ( const int  count,
const int  ndims,
const int *  x_strides,
const int *  y_dims,
const int *  starts,
const T *  dy,
T *  dx 
)

◆ _DepthwiseConv2dNCHW()

template<typename T >
void dragon::kernel::_DepthwiseConv2dNCHW ( const int  N,
const int  C,
const int  H,
const int  W,
const int  out_h,
const int  out_w,
const int  kernel_h,
const int  kernel_w,
const int  stride_h,
const int  stride_w,
const int  pad_h,
const int  pad_w,
const int  dilation_h,
const int  dilation_w,
const T *  x,
const T *  w,
T *  y 
)

◆ _DepthwiseConv2dNHWC()

template<typename T >
void dragon::kernel::_DepthwiseConv2dNHWC ( const int  N,
const int  C,
const int  H,
const int  W,
const int  out_h,
const int  out_w,
const int  kernel_h,
const int  kernel_w,
const int  stride_h,
const int  stride_w,
const int  pad_h,
const int  pad_w,
const int  dilation_h,
const int  dilation_w,
const T *  x,
const T *  w,
T *  y 
)

◆ _DropBlock2dNCHW()

void dragon::kernel::_DropBlock2dNCHW ( const int  N,
const int  C,
const int  H,
const int  W,
const int  seed_h,
const int  seed_w,
const int  block_size,
const uint32_t *  seed,
int *  mask 
)

◆ _DropBlock2dNHWC()

void dragon::kernel::_DropBlock2dNHWC ( const int  N,
const int  C,
const int  H,
const int  W,
const int  seed_h,
const int  seed_w,
const int  block_size,
const uint32_t *  seed,
int *  mask 
)

◆ _EdgePad()

template<typename T >
void dragon::kernel::_EdgePad ( const int  nthreads,
const int  ndims,
const int *  x_dims,
const int *  x_strides,
const int *  y_dims,
const int *  l_pads,
const T *  x,
T *  y 
)

◆ _GenericMoments()

template<typename Tx , typename Ty >
void dragon::kernel::_GenericMoments ( const int  outer_dim,
const int  inner_dim,
const int  ndim,
const int *  x_strides,
const int *  y_dims,
const Tx *  x,
Ty *  mean,
Ty *  var 
)

◆ _GenericMomentsLauncher()

template<typename Tx , typename Ty >
void dragon::kernel::_GenericMomentsLauncher ( const int  outer_dim,
const int  inner_dim,
const int  ndim,
const int *  dims,
const int *  axes,
const Tx *  x,
Ty *  mean,
Ty *  var 
)

◆ _GenericReduceSum()

template<typename T >
void dragon::kernel::_GenericReduceSum ( const int  outer_dim,
const int  inner_dim,
const int  ndims,
const int *  x_strides,
const int *  y_dims,
const float  scale,
const T *  x,
T *  y 
)

◆ _GenericReduceSumLauncher()

template<typename T >
void dragon::kernel::_GenericReduceSumLauncher ( const int  outer_dim,
const int  inner_dim,
const int  ndims,
const int *  dims,
const int *  axes,
const float  scale,
const T *  x,
T *  y 
)

◆ _GradientTwoSum()

template<typename T >
void dragon::kernel::_GradientTwoSum ( const int  count,
const T *  dy1,
const T *  dy2,
T *  dx 
)

◆ _GroupNormForwardNCHW()

template<typename Tx , typename Tp >
void dragon::kernel::_GroupNormForwardNCHW ( const int  N,
const int  C,
const int  S,
const Tx *  x,
const Tp *  scale,
const Tp *  bias,
Tx *  y 
)

◆ _GroupNormForwardNHWC()

template<typename Tx , typename Tp >
void dragon::kernel::_GroupNormForwardNHWC ( const int  N,
const int  C,
const int  S,
const Tx *  x,
const Tp *  scale,
const Tp *  bias,
Tx *  y 
)

◆ _GroupNormFusedParams()

template<typename T >
void dragon::kernel::_GroupNormFusedParams ( const int  N,
const int  G,
const int  D,
const T *  mu,
const T *  rsig,
const T *  gamma,
const T *  beta,
T *  scale,
T *  bias 
)

◆ _GroupNormGrad()

template<typename Tx , typename Tp , StorageOrder kOrder>
void dragon::kernel::_GroupNormGrad ( const std::array< int, 4 > &  dims,
const Tx *  x,
const Tp *  mu,
const Tp *  rsig,
const Tp *  gamma,
const Tp *  ds,
const Tp *  db,
const Tx *  dy,
Tx *  dx,
Tp *  dgamma,
Tp *  dbeta 
)

◆ _GroupNormInternalGrad()

template<typename Tx , typename Tp , StorageOrder kOrder>
void dragon::kernel::_GroupNormInternalGrad ( const std::array< int, 4 > &  dims,
const Tx *  x,
const Tp *  gamma,
const Tx *  dy,
Tp *  ds,
Tp *  db 
)

◆ _Im2Col2dNCHW()

template<typename T >
void dragon::kernel::_Im2Col2dNCHW ( const int  C,
const int  H,
const int  W,
const int  out_h,
const int  out_w,
const int  kernel_h,
const int  kernel_w,
const int  stride_h,
const int  stride_w,
const int  pad_h,
const int  pad_w,
const int  dilation_h,
const int  dilation_w,
const T *  im,
T *  col 
)

◆ _Im2Col2dNHWC()

template<typename T >
void dragon::kernel::_Im2Col2dNHWC ( const int  C,
const int  H,
const int  W,
const int  out_h,
const int  out_w,
const int  kernel_h,
const int  kernel_w,
const int  stride_h,
const int  stride_w,
const int  pad_h,
const int  pad_w,
const int  dilation_h,
const int  dilation_w,
const T *  im,
T *  col 
)

◆ _ImageDataNCHW()

template<typename Tx , typename Ty >
void dragon::kernel::_ImageDataNCHW ( const int  N,
const int  C,
const int  H,
const int  W,
const float *  mean,
const float *  std,
const Tx *  x,
Ty *  y 
)

◆ _ImageDataNHWC()

template<typename Tx , typename Ty >
void dragon::kernel::_ImageDataNHWC ( const int  N,
const int  C,
const int  H,
const int  W,
const float *  mean,
const float *  std,
const Tx *  x,
Ty *  y 
)

◆ _IndexSelect()

template<typename T >
void dragon::kernel::_IndexSelect ( const int  outer_dim,
const int  inner_dim,
const int  axis_dim,
const int  num_indices,
const int64_t *  indices,
const T *  x,
T *  y,
CPUContext ctx 
)

◆ _IndexSelectGrad()

template<typename T >
void dragon::kernel::_IndexSelectGrad ( const int  outer_dim,
const int  inner_dim,
const int  axis_dim,
const int  num_indices,
const int64_t *  indices,
const T *  dy,
T *  dx,
CPUContext ctx 
)

◆ _MaskedSelectGrad()

template<typename T >
void dragon::kernel::_MaskedSelectGrad ( const int  num_indices,
const int64_t *  indices,
const T *  dy,
T *  dx 
)

◆ _Maximum()

template<typename T >
void dragon::kernel::_Maximum ( const int  count,
const T *  a,
const T *  b,
T *  y 
)

◆ _MaximumGrad()

template<typename T >
void dragon::kernel::_MaximumGrad ( const int  count,
const T *  a,
const T *  b,
const T *  dy,
T *  da,
T *  db 
)

◆ _MaxPool2dGradNCHW()

template<typename T >
void dragon::kernel::_MaxPool2dGradNCHW ( const int  N,
const int  C,
const int  H,
const int  W,
const int  pool_h,
const int  pool_w,
const int  kernel_h,
const int  kernel_w,
const int  stride_h,
const int  stride_w,
const int  pad_h,
const int  pad_w,
const T *  dy,
const int *  mask,
T *  dx,
CPUContext ctx 
)

◆ _MaxPool2dGradNHWC()

template<typename T >
void dragon::kernel::_MaxPool2dGradNHWC ( const int  N,
const int  C,
const int  H,
const int  W,
const int  pool_h,
const int  pool_w,
const int  kernel_h,
const int  kernel_w,
const int  stride_h,
const int  stride_w,
const int  pad_h,
const int  pad_w,
const T *  dy,
const int *  mask,
T *  dx,
CPUContext ctx 
)

◆ _MaxPool2dNCHW()

template<typename T >
void dragon::kernel::_MaxPool2dNCHW ( const int  N,
const int  C,
const int  H,
const int  W,
const int  pool_h,
const int  pool_w,
const int  kernel_h,
const int  kernel_w,
const int  stride_h,
const int  stride_w,
const int  pad_h,
const int  pad_w,
const T *  x,
int *  mask,
T *  y 
)

◆ _MaxPool2dNHWC()

template<typename T >
void dragon::kernel::_MaxPool2dNHWC ( const int  N,
const int  C,
const int  H,
const int  W,
const int  pool_h,
const int  pool_w,
const int  kernel_h,
const int  kernel_w,
const int  stride_h,
const int  stride_w,
const int  pad_h,
const int  pad_w,
const T *  x,
int *  mask,
T *  y 
)

◆ _Minimum()

template<typename T >
void dragon::kernel::_Minimum ( const int  count,
const T *  a,
const T *  b,
T *  y 
)

◆ _MinimumGrad()

template<typename T >
void dragon::kernel::_MinimumGrad ( const int  count,
const T *  a,
const T *  b,
const T *  dy,
T *  da,
T *  db 
)

◆ _Moments()

template<typename Tx , typename Ty >
void dragon::kernel::_Moments ( const int  num_dims,
const int *  dims,
const int  num_axes,
const int *  axes,
const Tx *  x,
Ty *  mean,
Ty *  var,
CPUContext ctx 
)

◆ _NLLLoss()

template<typename Tx , typename Ty >
void dragon::kernel::_NLLLoss ( const int  outer_dim,
const int  axis_dim,
const int  inner_dim,
const int  nignores,
const int *  ignore,
const Tx *  log_prob,
const Ty *  target,
Tx *  loss,
int *  flag 
)

◆ _NLLLossGrad()

template<typename Tx , typename Ty >
void dragon::kernel::_NLLLossGrad ( const int  outer_dim,
const int  axis_dim,
const int  inner_dim,
const int  nignores,
const int *  ignore,
const Tx *  log_prob,
const Ty *  target,
Tx *  dx,
int *  flag 
)

◆ _NNResizeGradNCHW()

template<typename T >
void dragon::kernel::_NNResizeGradNCHW ( const int  N,
const int  C,
const int  H,
const int  W,
const int  out_h,
const int  out_w,
const float  scale_h,
const float  scale_w,
const T *  dy,
T *  dx 
)

◆ _NNResizeGradNHWC()

template<typename T >
void dragon::kernel::_NNResizeGradNHWC ( const int  N,
const int  C,
const int  H,
const int  W,
const int  out_h,
const int  out_w,
const float  scale_h,
const float  scale_w,
const T *  dy,
T *  dx 
)

◆ _NNResizeNCHW()

template<typename T >
void dragon::kernel::_NNResizeNCHW ( const int  N,
const int  C,
const int  H,
const int  W,
const int  out_h,
const int  out_w,
const float  scale_h,
const float  scale_w,
const T *  x,
T *  y 
)

◆ _NNResizeNHWC()

template<typename T >
void dragon::kernel::_NNResizeNHWC ( const int  N,
const int  C,
const int  H,
const int  W,
const int  out_h,
const int  out_w,
const float  scale_h,
const float  scale_w,
const T *  x,
T *  y 
)

◆ _OneHot()

template<typename T >
void dragon::kernel::_OneHot ( const int  count,
const int  depth,
const int  on_value,
const T *  x,
T *  y 
)

◆ _ReduceSum()

template<typename T >
void dragon::kernel::_ReduceSum ( const int  num_dims,
const int *  dims,
const int  num_axes,
const int *  axes,
const float  scale,
const T *  x,
T *  y 
)

◆ _ReduceSumGrad()

template<typename T >
void dragon::kernel::_ReduceSumGrad ( const int  nthreads,
const int  ndims,
const int *  x_dims,
const int *  y_dims,
const int *  y_strides,
const float  scale,
const T *  dy,
T *  dx 
)

ReduceSumGrad <T = ?, Device = CPU>

◆ _ReflectPad()

template<typename T >
void dragon::kernel::_ReflectPad ( const int  nthreads,
const int  ndims,
const int *  x_dims,
const int *  x_strides,
const int *  y_dims,
const int *  l_pads,
const T *  x,
T *  y 
)

◆ _Repeat()

template<typename T >
void dragon::kernel::_Repeat ( const int  outer_dim,
const int  inner_dim,
const int  axis_dim,
const int  repeats,
const T *  x,
T *  y,
CPUContext ctx 
)

◆ _RepeatGrad()

template<typename T >
void dragon::kernel::_RepeatGrad ( const int  outer_dim,
const int  inner_dim,
const int  axis_dim,
const int  repeats,
const T *  dy,
T *  dx,
CPUContext ctx 
)

◆ _ROIAlignIntp()

template<typename T >
T dragon::kernel::_ROIAlignIntp ( const T *  X,
const int  H,
const int  W,
y,
x 
)

◆ _RowwiseMoments()

template<typename Tx , typename Ty >
void dragon::kernel::_RowwiseMoments ( const int  rows,
const int  cols,
const Tx *  x,
Ty *  mean,
Ty *  var 
)

◆ _RowwiseReduceSum()

template<typename T >
void dragon::kernel::_RowwiseReduceSum ( const int  rows,
const int  cols,
const float  scale,
const T *  x,
T *  y 
)

◆ _s()

template<typename T >
T dragon::kernel::_s ( x)

◆ _Sigmoid()

template<typename T >
T dragon::kernel::_Sigmoid ( x)

◆ _SigmoidFocalLoss()

template<typename Tx , typename Ty >
void dragon::kernel::_SigmoidFocalLoss ( const int  outer_dim,
const int  axis_dim,
const int  inner_dim,
const float  pos_alpha,
const float  neg_alpha,
const float  gamma,
const int  neg_id,
const Tx *  logits,
const Ty *  targets,
Tx *  losses,
int *  flags 
)

◆ _SigmoidFocalLossGrad()

template<typename Tx , typename Ty >
void dragon::kernel::_SigmoidFocalLossGrad ( const int  outer_dim,
const int  axis_dim,
const int  inner_dim,
const float  pos_alpha,
const float  neg_alpha,
const float  gamma,
const int  neg_id,
const Tx *  logits,
const Ty *  targets,
Tx *  dlogits,
int *  flags 
)

◆ _Slice()

template<typename T >
void dragon::kernel::_Slice ( const int  outer_dim,
const int  inner_dim,
const int  axis_dim,
const int  slice_dim,
const int  slice_ofs,
const T *  x,
T *  y,
CPUContext ctx 
)

◆ _SliceGrad()

template<typename T >
void dragon::kernel::_SliceGrad ( const int  outer_dim,
const int  inner_dim,
const int  axis_dim,
const int  slice_dim,
const int  slice_ofs,
const T *  dy,
T *  dx,
CPUContext ctx 
)

◆ _SoftmaxFocalLoss()

template<typename Tx , typename Ty >
void dragon::kernel::_SoftmaxFocalLoss ( const int  outer_dim,
const int  axis_dim,
const int  inner_dim,
const float  pos_alpha,
const float  neg_alpha,
const float  gamma,
const int  neg_id,
const int  nignores,
const int *  ignores,
const Tx *  prob,
const Ty *  labels,
Tx *  losses,
int *  flags 
)

◆ _SoftmaxFocalLossGrad()

template<typename Tx , typename Ty >
void dragon::kernel::_SoftmaxFocalLossGrad ( const int  outer_dim,
const int  axis_dim,
const int  inner_dim,
const float  pos_alpha,
const float  neg_alpha,
const float  gamma,
const int  neg_id,
const int  nignores,
const int *  ignores,
const Tx *  prob,
const Ty *  labels,
Tx *  dx,
int *  flags 
)

◆ _SoftmaxGrad()

template<typename T >
void dragon::kernel::_SoftmaxGrad ( const int  outer_dim,
const int  axis_dim,
const int  inner_dim,
const T *  dy,
const T *  y,
T *  dx 
)

◆ _SparseSoftmaxCrossEntropy()

template<typename Tx , typename Ty >
void dragon::kernel::_SparseSoftmaxCrossEntropy ( const int  outer_dim,
const int  axis_dim,
const int  inner_dim,
const int  nignores,
const int *  ignore,
const Tx *  prob,
const Ty *  target,
Tx *  loss,
int *  flag 
)

◆ _SparseSoftmaxCrossEntropyGrad()

template<typename Tx , typename Ty >
void dragon::kernel::_SparseSoftmaxCrossEntropyGrad ( const int  outer_dim,
const int  axis_dim,
const int  inner_dim,
const int  nignores,
const int *  ignore,
const Tx *  prob,
const Ty *  target,
Tx *  dx,
int *  flag 
)

◆ _Tile()

template<typename T >
void dragon::kernel::_Tile ( const int  nthreads,
const int  ndims,
const int *  x_dims,
const int *  x_strides,
const int *  y_dims,
const T *  x,
T *  y 
)

◆ _TileGrad()

template<typename T >
void dragon::kernel::_TileGrad ( const int  rows,
const int  cols,
const int  multiple,
const T *  dy,
T *  dx,
CPUContext ctx 
)

◆ _Transpose()

template<typename T >
void dragon::kernel::_Transpose ( const int  nthreads,
const int  ndims,
const int *  x_strides,
const int *  y_dims,
const T *  x,
T *  y 
)

◆ _TransposeGrad()

template<typename T >
void dragon::kernel::_TransposeGrad ( const int  nthreads,
const int  ndims,
const int *  x_strides,
const int *  y_dims,
const T *  dy,
T *  dx 
)

◆ _TypeA2B()

template<typename Ta , typename Tb >
void dragon::kernel::_TypeA2B ( const int  count,
const Ta *  a,
Tb *  b 
)

◆ _Where()

template<typename T >
void dragon::kernel::_Where ( const int  count,
const uint8_t *  mask,
const T *  a,
const T *  b,
T *  y 
)

◆ _WhereGrad()

template<typename T >
void dragon::kernel::_WhereGrad ( const int  count,
const uint8_t *  mask,
const T *  dy,
T *  da,
T *  db 
)

◆ AbsGrad()

template<typename T , class Context >
void dragon::kernel::AbsGrad ( const int  count,
const T *  dy,
T *  dx,
Context *  ctx 
)

loss.l1_loss

◆ AbsGrad< float, CPUContext >()

template<>
void dragon::kernel::AbsGrad< float, CPUContext > ( const int  count,
const float *  dy,
float *  dx,
CPUContext ctx 
)

◆ AdamUpdate()

template<typename T , class Context >
void dragon::kernel::AdamUpdate ( const int  count,
const float  lr,
const float  beta1,
const float  beta2,
const float  eps,
T *  g,
T *  m,
T *  v,
Context *  ctx 
)

update.adam_update

◆ AdamUpdate< float, CPUContext >()

template<>
void dragon::kernel::AdamUpdate< float, CPUContext > ( const int  count,
const float  lr,
const float  beta1,
const float  beta2,
const float  eps,
float *  g,
float *  m,
float *  v,
CPUContext ctx 
)

◆ Affine()

template<typename T , class Context >
void dragon::kernel::Affine ( const int  outer_dim,
const int  axis_dim,
const int  inner_dim,
const T *  x,
const T *  alpha,
const T *  beta,
T *  y,
Context *  ctx 
)

arithmetic.affine

◆ Affine< float, CPUContext >()

template<>
void dragon::kernel::Affine< float, CPUContext > ( const int  outer_dim,
const int  axis_dim,
const int  inner_dim,
const float *  x,
const float *  alpha,
const float *  beta,
float *  y,
CPUContext ctx 
)

◆ Affine< float16, CPUContext >()

template<>
void dragon::kernel::Affine< float16, CPUContext > ( const int  outer_dim,
const int  axis_dim,
const int  inner_dim,
const float16 *  x,
const float16 *  alpha,
const float16 *  beta,
float16 *  y,
CPUContext ctx 
)

◆ AffineGrad()

template<typename T , class Context >
void dragon::kernel::AffineGrad ( const int  outer_dim,
const int  axis_dim,
const int  inner_dim,
const T *  dy,
const T *  alpha,
T *  dx,
Context *  ctx 
)

◆ AffineGrad< float, CPUContext >()

template<>
void dragon::kernel::AffineGrad< float, CPUContext > ( const int  outer_dim,
const int  axis_dim,
const int  inner_dim,
const float *  dy,
const float *  alpha,
float *  dx,
CPUContext ctx 
)

◆ AffineGrad< float16, CPUContext >()

template<>
void dragon::kernel::AffineGrad< float16, CPUContext > ( const int  outer_dim,
const int  axis_dim,
const int  inner_dim,
const float16 *  dy,
const float16 *  alpha,
float16 *  dx,
CPUContext ctx 
)

◆ ApplyMask()

template<typename Tx , typename Tm , class Context >
void dragon::kernel::ApplyMask ( const int  count,
const float  scale,
const Tx *  x,
const Tm *  mask,
Tx *  y,
Context *  ctx 
)

◆ ApplyMask< float, uint8_t, CPUContext >()

template<>
void dragon::kernel::ApplyMask< float, uint8_t, CPUContext > ( const int  count,
const float  scale,
const float *  x,
const uint8_t *  mask,
float *  y,
CPUContext ctx 
)

◆ ApplyMask< float16, uint8_t, CPUContext >()

template<>
void dragon::kernel::ApplyMask< float16, uint8_t, CPUContext > ( const int  count,
const float  scale,
const float16 *  x,
const uint8_t *  mask,
float16 *  y,
CPUContext ctx 
)

◆ Arange()

template<typename T , class Context >
void dragon::kernel::Arange ( const int  count,
const int  start,
const int  step,
T *  y,
Context *  ctx 
)

array.arange

◆ Arange< float16, CPUContext >()

template<>
void dragon::kernel::Arange< float16, CPUContext > ( const int  count,
const int  start,
const int  step,
float16 *  y,
CPUContext ctx 
)

◆ ArgMax()

template<typename T , class Context >
void dragon::kernel::ArgMax ( const int  outer_dim,
const int  inner_dim,
const int  axis_dim,
const int  top_k,
const T *  x,
int64_t *  indices,
T *  values,
Context *  ctx 
)

array.argreduce

◆ ArgMax< float16, CPUContext >()

template<>
void dragon::kernel::ArgMax< float16, CPUContext > ( const int  outer_dim,
const int  inner_dim,
const int  axis_dim,
const int  top_k,
const float16 *  x,
int64_t *  indices,
float16 *  values,
CPUContext ctx 
)

◆ ArgMin()

template<typename T , class Context >
void dragon::kernel::ArgMin ( const int  outer_dim,
const int  inner_dim,
const int  axis_dim,
const int  top_k,
const T *  x,
int64_t *  indices,
T *  values,
Context *  ctx 
)

◆ ArgMin< float16, CPUContext >()

template<>
void dragon::kernel::ArgMin< float16, CPUContext > ( const int  outer_dim,
const int  inner_dim,
const int  axis_dim,
const int  top_k,
const float16 *  x,
int64_t *  indices,
float16 *  values,
CPUContext ctx 
)

◆ Assign()

template<typename T , class Context >
void dragon::kernel::Assign ( const int  count,
const int  ndims,
const int *  x_dims,
const int *  y_strides,
const int *  starts,
const T *  x,
T *  y,
Context *  ctx 
)

control_flow.assgin

◆ AvgPool2d()

template<typename T , class Context >
void dragon::kernel::AvgPool2d ( const int  N,
const int  C,
const int  H,
const int  W,
const int  pool_h,
const int  pool_w,
const int  kernel_h,
const int  kernel_w,
const int  stride_h,
const int  stride_w,
const int  pad_h,
const int  pad_w,
const string &  data_format,
const T *  x,
T *  y,
Context *  ctx 
)

◆ AvgPool2d< float, CPUContext >()

template<>
void dragon::kernel::AvgPool2d< float, CPUContext > ( const int  N,
const int  C,
const int  H,
const int  W,
const int  pool_h,
const int  pool_w,
const int  kernel_h,
const int  kernel_w,
const int  stride_h,
const int  stride_w,
const int  pad_h,
const int  pad_w,
const string &  data_format,
const float *  x,
float *  y,
CPUContext ctx 
)

◆ AvgPool2dGrad()

template<typename T , class Context >
void dragon::kernel::AvgPool2dGrad ( const int  N,
const int  C,
const int  H,
const int  W,
const int  pool_h,
const int  pool_w,
const int  kernel_h,
const int  kernel_w,
const int  stride_h,
const int  stride_w,
const int  pad_h,
const int  pad_w,
const string &  data_format,
const T *  dy,
T *  dx,
Context *  ctx 
)

◆ AvgPool2dGrad< float, CPUContext >()

template<>
void dragon::kernel::AvgPool2dGrad< float, CPUContext > ( const int  N,
const int  C,
const int  H,
const int  W,
const int  pool_h,
const int  pool_w,
const int  kernel_h,
const int  kernel_w,
const int  stride_h,
const int  stride_w,
const int  pad_h,
const int  pad_w,
const string &  data_format,
const float *  dy,
float *  dx,
CPUContext ctx 
)

◆ BatchNormBackwardInference()

template<typename Tx , typename Tp , class Context >
void dragon::kernel::BatchNormBackwardInference ( const int  N,
const int  C,
const int  S,
const string &  data_format,
const Tx *  x,
const Tp *  mu,
const Tp *  rsig,
const Tp *  gamma,
const Tx *  dy,
Tx *  dx,
Tp *  dgamma,
Tp *  dbeta,
Context *  ctx 
)

◆ BatchNormBackwardTraining()

template<typename Tx , typename Tp , class Context >
void dragon::kernel::BatchNormBackwardTraining ( const int  N,
const int  C,
const int  S,
const string &  data_format,
const Tx *  x,
const Tp *  mu,
const Tp *  rsig,
const Tp *  gamma,
const Tx *  dy,
Tp *  ds,
Tp *  db,
Tx *  dx,
Tp *  dgamma,
Tp *  dbeta,
Context *  ctx 
)

norm.batch_norm

◆ BiasAdd()

template<typename T , class Context >
void dragon::kernel::BiasAdd ( const int  outer_dim,
const int  axis_dim,
const int  inner_dim,
const string &  data_format,
const T *  bias,
const T *  multiplier,
T *  y,
Context *  ctx 
)

vision.bias_add

◆ BiasAdd< float, CPUContext >()

template<>
void dragon::kernel::BiasAdd< float, CPUContext > ( const int  outer_dim,
const int  axis_dim,
const int  inner_dim,
const string &  data_format,
const float *  bias,
const float *  multiplier,
float *  y,
CPUContext ctx 
)

◆ BilinearResize()

template<typename T , class Context >
void dragon::kernel::BilinearResize ( const int  N,
const int  C,
const int  H,
const int  W,
const int  out_h,
const int  out_w,
const string &  data_format,
const T *  x,
T *  y,
Context *  ctx 
)

vision.bilinear_resize

◆ BilinearResize< float, CPUContext >()

template<>
void dragon::kernel::BilinearResize< float, CPUContext > ( const int  N,
const int  C,
const int  H,
const int  W,
const int  out_h,
const int  out_w,
const string &  data_format,
const float *  x,
float *  y,
CPUContext ctx 
)

◆ BilinearResizeGrad()

template<typename T , class Context >
void dragon::kernel::BilinearResizeGrad ( const int  N,
const int  C,
const int  H,
const int  W,
const int  out_h,
const int  out_w,
const string &  data_format,
const T *  dy,
T *  dx,
Context *  ctx 
)

◆ BilinearResizeGrad< float, CPUContext >()

template<>
void dragon::kernel::BilinearResizeGrad< float, CPUContext > ( const int  N,
const int  C,
const int  H,
const int  W,
const int  out_h,
const int  out_w,
const string &  data_format,
const float *  dy,
float *  dx,
CPUContext ctx 
)

◆ BroadcastMaximum()

template<typename T , class Context >
void dragon::kernel::BroadcastMaximum ( const int  count,
const T *  a,
const T  b,
T *  y,
Context *  ctx 
)

◆ BroadcastMaximum< float16, CPUContext >()

template<>
void dragon::kernel::BroadcastMaximum< float16, CPUContext > ( const int  count,
const float16 *  a,
const float16  b,
float16 *  y,
CPUContext ctx 
)

◆ BroadcastMaximumGrad()

template<typename T , class Context >
void dragon::kernel::BroadcastMaximumGrad ( const int  count,
const T *  a,
const T  b,
const T *  dy,
T *  da,
T *  db,
Context *  ctx 
)

◆ BroadcastMaximumGrad< float16, CPUContext >()

template<>
void dragon::kernel::BroadcastMaximumGrad< float16, CPUContext > ( const int  count,
const float16 *  a,
const float16  b,
const float16 *  dy,
float16 *  da,
float16 *  db,
CPUContext ctx 
)

◆ BroadcastMinimum()

template<typename T , class Context >
void dragon::kernel::BroadcastMinimum ( const int  count,
const T *  a,
const T  b,
T *  y,
Context *  ctx 
)

◆ BroadcastMinimum< float16, CPUContext >()

template<>
void dragon::kernel::BroadcastMinimum< float16, CPUContext > ( const int  count,
const float16 *  a,
const float16  b,
float16 *  y,
CPUContext ctx 
)

◆ BroadcastMinimumGrad()

template<typename T , class Context >
void dragon::kernel::BroadcastMinimumGrad ( const int  count,
const T *  a,
const T  b,
const T *  dy,
T *  da,
T *  db,
Context *  ctx 
)

◆ BroadcastMinimumGrad< float16, CPUContext >()

template<>
void dragon::kernel::BroadcastMinimumGrad< float16, CPUContext > ( const int  count,
const float16 *  a,
const float16  b,
const float16 *  dy,
float16 *  da,
float16 *  db,
CPUContext ctx 
)

◆ ChannelShuffle()

template<typename T , class Context >
void dragon::kernel::ChannelShuffle ( const int  outer_dim,
const int  inner_dim,
const int  axis_dim,
const int  group,
const T *  x,
T *  y,
Context *  ctx 
)

array.channel_shuffle

◆ Clip()

template<typename T , class Context >
void dragon::kernel::Clip ( const int  count,
const float  low,
const float  high,
const T *  x,
T *  y,
Context *  ctx 
)

arithmetic.clip

◆ Clip< float16, CPUContext >()

template<>
void dragon::kernel::Clip< float16, CPUContext > ( const int  count,
const float  low,
const float  high,
const float16 *  x,
float16 *  y,
CPUContext ctx 
)

◆ ClipGrad()

template<typename T , class Context >
void dragon::kernel::ClipGrad ( const int  count,
const float  low,
const float  high,
const T *  x,
const T *  dy,
T *  dx,
Context *  ctx 
)

◆ ClipGrad< float16, CPUContext >()

template<>
void dragon::kernel::ClipGrad< float16, CPUContext > ( const int  count,
const float  low,
const float  high,
const float16 *  x,
const float16 *  dy,
float16 *  y,
CPUContext ctx 
)

◆ Col2Im2d()

template<typename T , class Context >
void dragon::kernel::Col2Im2d ( const int  C,
const int  H,
const int  W,
const int  out_h,
const int  out_w,
const int  kernel_h,
const int  kernel_w,
const int  stride_h,
const int  stride_w,
const int  pad_h,
const int  pad_w,
const int  dilation_h,
const int  dilation_w,
const string &  data_format,
const T *  col,
T *  im,
Context *  ctx 
)

◆ Col2Im2d< float, CPUContext >()

template<>
void dragon::kernel::Col2Im2d< float, CPUContext > ( const int  C,
const int  H,
const int  W,
const int  out_h,
const int  out_w,
const int  kernel_h,
const int  kernel_w,
const int  stride_h,
const int  stride_w,
const int  pad_h,
const int  pad_w,
const int  dilation_h,
const int  dilation_w,
const string &  data_format,
const float *  col,
float *  im,
CPUContext ctx 
)

◆ Concat()

template<typename T , class Context >
void dragon::kernel::Concat ( const int  outer_dim,
const int  inner_dim,
const int  axis_dim,
const int  cat_dim,
const int  cat_ofs,
const T *  x,
T *  y,
Context *  ctx 
)

array.concat

◆ ConstPad()

template<typename T , class Context >
void dragon::kernel::ConstPad ( const int  count,
const int  ndims,
const int *  x_dims,
const int *  x_strides,
const int *  y_dims,
const int *  l_pads,
const float  value,
const T *  x,
T *  y,
Context *  ctx 
)

array.pad

◆ Crop()

template<typename T , class Context >
void dragon::kernel::Crop ( const int  count,
const int  ndims,
const int *  x_strides,
const int *  y_dims,
const int *  starts,
const T *  x,
T *  y,
Context *  ctx 
)

array.crop

◆ CropGrad()

template<typename T , class Context >
void dragon::kernel::CropGrad ( const int  count,
const int  ndims,
const int *  x_strides,
const int *  y_dims,
const int *  starts,
const T *  dy,
T *  dx,
Context *  ctx 
)

◆ DEFINE_ARANGE_KERNEL_LAUNCHER() [1/6]

dragon::kernel::DEFINE_ARANGE_KERNEL_LAUNCHER ( int8_t  )

◆ DEFINE_ARANGE_KERNEL_LAUNCHER() [2/6]

dragon::kernel::DEFINE_ARANGE_KERNEL_LAUNCHER ( uint8_t  )

◆ DEFINE_ARANGE_KERNEL_LAUNCHER() [3/6]

dragon::kernel::DEFINE_ARANGE_KERNEL_LAUNCHER ( int  )

◆ DEFINE_ARANGE_KERNEL_LAUNCHER() [4/6]

dragon::kernel::DEFINE_ARANGE_KERNEL_LAUNCHER ( int64_t  )

◆ DEFINE_ARANGE_KERNEL_LAUNCHER() [5/6]

dragon::kernel::DEFINE_ARANGE_KERNEL_LAUNCHER ( float  )

◆ DEFINE_ARANGE_KERNEL_LAUNCHER() [6/6]

dragon::kernel::DEFINE_ARANGE_KERNEL_LAUNCHER ( double  )

◆ DEFINE_ARGREDUCE_KERNEL_LAUNCHER() [1/14]

dragon::kernel::DEFINE_ARGREDUCE_KERNEL_LAUNCHER ( ArgMax  ,
bool   
)

◆ DEFINE_ARGREDUCE_KERNEL_LAUNCHER() [2/14]

dragon::kernel::DEFINE_ARGREDUCE_KERNEL_LAUNCHER ( ArgMax  ,
int8_t   
)

◆ DEFINE_ARGREDUCE_KERNEL_LAUNCHER() [3/14]

dragon::kernel::DEFINE_ARGREDUCE_KERNEL_LAUNCHER ( ArgMax  ,
uint8_t   
)

◆ DEFINE_ARGREDUCE_KERNEL_LAUNCHER() [4/14]

dragon::kernel::DEFINE_ARGREDUCE_KERNEL_LAUNCHER ( ArgMax  ,
int   
)

◆ DEFINE_ARGREDUCE_KERNEL_LAUNCHER() [5/14]

dragon::kernel::DEFINE_ARGREDUCE_KERNEL_LAUNCHER ( ArgMax  ,
int64_t   
)

◆ DEFINE_ARGREDUCE_KERNEL_LAUNCHER() [6/14]

dragon::kernel::DEFINE_ARGREDUCE_KERNEL_LAUNCHER ( ArgMax  ,
float   
)

◆ DEFINE_ARGREDUCE_KERNEL_LAUNCHER() [7/14]

dragon::kernel::DEFINE_ARGREDUCE_KERNEL_LAUNCHER ( ArgMax  ,
double   
)

◆ DEFINE_ARGREDUCE_KERNEL_LAUNCHER() [8/14]

dragon::kernel::DEFINE_ARGREDUCE_KERNEL_LAUNCHER ( ArgMin  ,
bool   
)

◆ DEFINE_ARGREDUCE_KERNEL_LAUNCHER() [9/14]

dragon::kernel::DEFINE_ARGREDUCE_KERNEL_LAUNCHER ( ArgMin  ,
int8_t   
)

◆ DEFINE_ARGREDUCE_KERNEL_LAUNCHER() [10/14]

dragon::kernel::DEFINE_ARGREDUCE_KERNEL_LAUNCHER ( ArgMin  ,
uint8_t   
)

◆ DEFINE_ARGREDUCE_KERNEL_LAUNCHER() [11/14]

dragon::kernel::DEFINE_ARGREDUCE_KERNEL_LAUNCHER ( ArgMin  ,
int   
)

◆ DEFINE_ARGREDUCE_KERNEL_LAUNCHER() [12/14]

dragon::kernel::DEFINE_ARGREDUCE_KERNEL_LAUNCHER ( ArgMin  ,
int64_t   
)

◆ DEFINE_ARGREDUCE_KERNEL_LAUNCHER() [13/14]

dragon::kernel::DEFINE_ARGREDUCE_KERNEL_LAUNCHER ( ArgMin  ,
float   
)

◆ DEFINE_ARGREDUCE_KERNEL_LAUNCHER() [14/14]

dragon::kernel::DEFINE_ARGREDUCE_KERNEL_LAUNCHER ( ArgMin  ,
double   
)

◆ DEFINE_ASSIGN_KERNEL_LAUNCHER() [1/8]

dragon::kernel::DEFINE_ASSIGN_KERNEL_LAUNCHER ( bool  )

◆ DEFINE_ASSIGN_KERNEL_LAUNCHER() [2/8]

dragon::kernel::DEFINE_ASSIGN_KERNEL_LAUNCHER ( int8_t  )

◆ DEFINE_ASSIGN_KERNEL_LAUNCHER() [3/8]

dragon::kernel::DEFINE_ASSIGN_KERNEL_LAUNCHER ( uint8_t  )

◆ DEFINE_ASSIGN_KERNEL_LAUNCHER() [4/8]

dragon::kernel::DEFINE_ASSIGN_KERNEL_LAUNCHER ( int  )

◆ DEFINE_ASSIGN_KERNEL_LAUNCHER() [5/8]

dragon::kernel::DEFINE_ASSIGN_KERNEL_LAUNCHER ( int64_t  )

◆ DEFINE_ASSIGN_KERNEL_LAUNCHER() [6/8]

dragon::kernel::DEFINE_ASSIGN_KERNEL_LAUNCHER ( float16  )

◆ DEFINE_ASSIGN_KERNEL_LAUNCHER() [7/8]

dragon::kernel::DEFINE_ASSIGN_KERNEL_LAUNCHER ( float  )

◆ DEFINE_ASSIGN_KERNEL_LAUNCHER() [8/8]

dragon::kernel::DEFINE_ASSIGN_KERNEL_LAUNCHER ( double  )

◆ DEFINE_BACKWARD_KERNEL_LAUNCHER()

dragon::kernel::DEFINE_BACKWARD_KERNEL_LAUNCHER ( float  ,
float   
)

◆ DEFINE_CLIP_GRAD_KERNEL_LAUNCHER() [1/6]

dragon::kernel::DEFINE_CLIP_GRAD_KERNEL_LAUNCHER ( int8_t  )

◆ DEFINE_CLIP_GRAD_KERNEL_LAUNCHER() [2/6]

dragon::kernel::DEFINE_CLIP_GRAD_KERNEL_LAUNCHER ( uint8_t  )

◆ DEFINE_CLIP_GRAD_KERNEL_LAUNCHER() [3/6]

dragon::kernel::DEFINE_CLIP_GRAD_KERNEL_LAUNCHER ( int  )

◆ DEFINE_CLIP_GRAD_KERNEL_LAUNCHER() [4/6]

dragon::kernel::DEFINE_CLIP_GRAD_KERNEL_LAUNCHER ( int64_t  )

◆ DEFINE_CLIP_GRAD_KERNEL_LAUNCHER() [5/6]

dragon::kernel::DEFINE_CLIP_GRAD_KERNEL_LAUNCHER ( float  )

◆ DEFINE_CLIP_GRAD_KERNEL_LAUNCHER() [6/6]

dragon::kernel::DEFINE_CLIP_GRAD_KERNEL_LAUNCHER ( double  )

◆ DEFINE_CLIP_KERNEL_LAUNCHER() [1/6]

dragon::kernel::DEFINE_CLIP_KERNEL_LAUNCHER ( int8_t  )

◆ DEFINE_CLIP_KERNEL_LAUNCHER() [2/6]

dragon::kernel::DEFINE_CLIP_KERNEL_LAUNCHER ( uint8_t  )

◆ DEFINE_CLIP_KERNEL_LAUNCHER() [3/6]

dragon::kernel::DEFINE_CLIP_KERNEL_LAUNCHER ( int  )

◆ DEFINE_CLIP_KERNEL_LAUNCHER() [4/6]

dragon::kernel::DEFINE_CLIP_KERNEL_LAUNCHER ( int64_t  )

◆ DEFINE_CLIP_KERNEL_LAUNCHER() [5/6]

dragon::kernel::DEFINE_CLIP_KERNEL_LAUNCHER ( float  )

◆ DEFINE_CLIP_KERNEL_LAUNCHER() [6/6]

dragon::kernel::DEFINE_CLIP_KERNEL_LAUNCHER ( double  )

◆ DEFINE_COMPARE_KERNEL_LAUNCHER() [1/42]

dragon::kernel::DEFINE_COMPARE_KERNEL_LAUNCHER ( bool  ,
Equal   
)

◆ DEFINE_COMPARE_KERNEL_LAUNCHER() [2/42]

dragon::kernel::DEFINE_COMPARE_KERNEL_LAUNCHER ( int8_t  ,
Equal   
)

◆ DEFINE_COMPARE_KERNEL_LAUNCHER() [3/42]

dragon::kernel::DEFINE_COMPARE_KERNEL_LAUNCHER ( uint8_t  ,
Equal   
)

◆ DEFINE_COMPARE_KERNEL_LAUNCHER() [4/42]

dragon::kernel::DEFINE_COMPARE_KERNEL_LAUNCHER ( int  ,
Equal   
)

◆ DEFINE_COMPARE_KERNEL_LAUNCHER() [5/42]

dragon::kernel::DEFINE_COMPARE_KERNEL_LAUNCHER ( int64_t  ,
Equal   
)

◆ DEFINE_COMPARE_KERNEL_LAUNCHER() [6/42]

dragon::kernel::DEFINE_COMPARE_KERNEL_LAUNCHER ( float  ,
Equal   
)

◆ DEFINE_COMPARE_KERNEL_LAUNCHER() [7/42]

dragon::kernel::DEFINE_COMPARE_KERNEL_LAUNCHER ( double  ,
Equal   
)

◆ DEFINE_COMPARE_KERNEL_LAUNCHER() [8/42]

dragon::kernel::DEFINE_COMPARE_KERNEL_LAUNCHER ( bool  ,
NotEqual  ,
 
)

◆ DEFINE_COMPARE_KERNEL_LAUNCHER() [9/42]

dragon::kernel::DEFINE_COMPARE_KERNEL_LAUNCHER ( int8_t  ,
NotEqual  ,
 
)

◆ DEFINE_COMPARE_KERNEL_LAUNCHER() [10/42]

dragon::kernel::DEFINE_COMPARE_KERNEL_LAUNCHER ( uint8_t  ,
NotEqual  ,
 
)

◆ DEFINE_COMPARE_KERNEL_LAUNCHER() [11/42]

dragon::kernel::DEFINE_COMPARE_KERNEL_LAUNCHER ( int  ,
NotEqual  ,
 
)

◆ DEFINE_COMPARE_KERNEL_LAUNCHER() [12/42]

dragon::kernel::DEFINE_COMPARE_KERNEL_LAUNCHER ( int64_t  ,
NotEqual  ,
 
)

◆ DEFINE_COMPARE_KERNEL_LAUNCHER() [13/42]

dragon::kernel::DEFINE_COMPARE_KERNEL_LAUNCHER ( float  ,
NotEqual  ,
 
)

◆ DEFINE_COMPARE_KERNEL_LAUNCHER() [14/42]

dragon::kernel::DEFINE_COMPARE_KERNEL_LAUNCHER ( double  ,
NotEqual  ,
 
)

◆ DEFINE_COMPARE_KERNEL_LAUNCHER() [15/42]

dragon::kernel::DEFINE_COMPARE_KERNEL_LAUNCHER ( bool  ,
Less   
)

◆ DEFINE_COMPARE_KERNEL_LAUNCHER() [16/42]

dragon::kernel::DEFINE_COMPARE_KERNEL_LAUNCHER ( int8_t  ,
Less   
)

◆ DEFINE_COMPARE_KERNEL_LAUNCHER() [17/42]

dragon::kernel::DEFINE_COMPARE_KERNEL_LAUNCHER ( uint8_t  ,
Less   
)

◆ DEFINE_COMPARE_KERNEL_LAUNCHER() [18/42]

dragon::kernel::DEFINE_COMPARE_KERNEL_LAUNCHER ( int  ,
Less   
)

◆ DEFINE_COMPARE_KERNEL_LAUNCHER() [19/42]

dragon::kernel::DEFINE_COMPARE_KERNEL_LAUNCHER ( int64_t  ,
Less   
)

◆ DEFINE_COMPARE_KERNEL_LAUNCHER() [20/42]

dragon::kernel::DEFINE_COMPARE_KERNEL_LAUNCHER ( float  ,
Less   
)

◆ DEFINE_COMPARE_KERNEL_LAUNCHER() [21/42]

dragon::kernel::DEFINE_COMPARE_KERNEL_LAUNCHER ( double  ,
Less   
)

◆ DEFINE_COMPARE_KERNEL_LAUNCHER() [22/42]

dragon::kernel::DEFINE_COMPARE_KERNEL_LAUNCHER ( bool  ,
LessEqual  ,
<=   
)

◆ DEFINE_COMPARE_KERNEL_LAUNCHER() [23/42]

dragon::kernel::DEFINE_COMPARE_KERNEL_LAUNCHER ( int8_t  ,
LessEqual  ,
<=   
)

◆ DEFINE_COMPARE_KERNEL_LAUNCHER() [24/42]

dragon::kernel::DEFINE_COMPARE_KERNEL_LAUNCHER ( uint8_t  ,
LessEqual  ,
<=   
)

◆ DEFINE_COMPARE_KERNEL_LAUNCHER() [25/42]

dragon::kernel::DEFINE_COMPARE_KERNEL_LAUNCHER ( int  ,
LessEqual  ,
<=   
)

◆ DEFINE_COMPARE_KERNEL_LAUNCHER() [26/42]

dragon::kernel::DEFINE_COMPARE_KERNEL_LAUNCHER ( int64_t  ,
LessEqual  ,
<=   
)

◆ DEFINE_COMPARE_KERNEL_LAUNCHER() [27/42]

dragon::kernel::DEFINE_COMPARE_KERNEL_LAUNCHER ( float  ,
LessEqual  ,
<=   
)

◆ DEFINE_COMPARE_KERNEL_LAUNCHER() [28/42]

dragon::kernel::DEFINE_COMPARE_KERNEL_LAUNCHER ( double  ,
LessEqual  ,
<=   
)

◆ DEFINE_COMPARE_KERNEL_LAUNCHER() [29/42]

dragon::kernel::DEFINE_COMPARE_KERNEL_LAUNCHER ( bool  ,
Greater   
)

◆ DEFINE_COMPARE_KERNEL_LAUNCHER() [30/42]

dragon::kernel::DEFINE_COMPARE_KERNEL_LAUNCHER ( int8_t  ,
Greater   
)

◆ DEFINE_COMPARE_KERNEL_LAUNCHER() [31/42]

dragon::kernel::DEFINE_COMPARE_KERNEL_LAUNCHER ( uint8_t  ,
Greater   
)

◆ DEFINE_COMPARE_KERNEL_LAUNCHER() [32/42]

dragon::kernel::DEFINE_COMPARE_KERNEL_LAUNCHER ( int  ,
Greater   
)

◆ DEFINE_COMPARE_KERNEL_LAUNCHER() [33/42]

dragon::kernel::DEFINE_COMPARE_KERNEL_LAUNCHER ( int64_t  ,
Greater   
)

◆ DEFINE_COMPARE_KERNEL_LAUNCHER() [34/42]

dragon::kernel::DEFINE_COMPARE_KERNEL_LAUNCHER ( float  ,
Greater   
)

◆ DEFINE_COMPARE_KERNEL_LAUNCHER() [35/42]

dragon::kernel::DEFINE_COMPARE_KERNEL_LAUNCHER ( double  ,
Greater   
)

◆ DEFINE_COMPARE_KERNEL_LAUNCHER() [36/42]

dragon::kernel::DEFINE_COMPARE_KERNEL_LAUNCHER ( bool  ,
GreaterEqual  ,
>=   
)

◆ DEFINE_COMPARE_KERNEL_LAUNCHER() [37/42]

dragon::kernel::DEFINE_COMPARE_KERNEL_LAUNCHER ( int8_t  ,
GreaterEqual  ,
>=   
)

◆ DEFINE_COMPARE_KERNEL_LAUNCHER() [38/42]

dragon::kernel::DEFINE_COMPARE_KERNEL_LAUNCHER ( uint8_t  ,
GreaterEqual  ,
>=   
)

◆ DEFINE_COMPARE_KERNEL_LAUNCHER() [39/42]

dragon::kernel::DEFINE_COMPARE_KERNEL_LAUNCHER ( int  ,
GreaterEqual  ,
>=   
)

◆ DEFINE_COMPARE_KERNEL_LAUNCHER() [40/42]

dragon::kernel::DEFINE_COMPARE_KERNEL_LAUNCHER ( int64_t  ,
GreaterEqual  ,
>=   
)

◆ DEFINE_COMPARE_KERNEL_LAUNCHER() [41/42]

dragon::kernel::DEFINE_COMPARE_KERNEL_LAUNCHER ( float  ,
GreaterEqual  ,
>=   
)

◆ DEFINE_COMPARE_KERNEL_LAUNCHER() [42/42]

dragon::kernel::DEFINE_COMPARE_KERNEL_LAUNCHER ( double  ,
GreaterEqual  ,
>=   
)

◆ DEFINE_CONCAT_KERNEL_LAUNCHER() [1/8]

dragon::kernel::DEFINE_CONCAT_KERNEL_LAUNCHER ( Concat  ,
bool   
)

◆ DEFINE_CONCAT_KERNEL_LAUNCHER() [2/8]

dragon::kernel::DEFINE_CONCAT_KERNEL_LAUNCHER ( Concat  ,
int8_t   
)

◆ DEFINE_CONCAT_KERNEL_LAUNCHER() [3/8]

dragon::kernel::DEFINE_CONCAT_KERNEL_LAUNCHER ( Concat  ,
uint8_t   
)

◆ DEFINE_CONCAT_KERNEL_LAUNCHER() [4/8]

dragon::kernel::DEFINE_CONCAT_KERNEL_LAUNCHER ( Concat  ,
int   
)

◆ DEFINE_CONCAT_KERNEL_LAUNCHER() [5/8]

dragon::kernel::DEFINE_CONCAT_KERNEL_LAUNCHER ( Concat  ,
int64_t   
)

◆ DEFINE_CONCAT_KERNEL_LAUNCHER() [6/8]

dragon::kernel::DEFINE_CONCAT_KERNEL_LAUNCHER ( Concat  ,
float16   
)

◆ DEFINE_CONCAT_KERNEL_LAUNCHER() [7/8]

dragon::kernel::DEFINE_CONCAT_KERNEL_LAUNCHER ( Concat  ,
float   
)

◆ DEFINE_CONCAT_KERNEL_LAUNCHER() [8/8]

dragon::kernel::DEFINE_CONCAT_KERNEL_LAUNCHER ( Concat  ,
double   
)

◆ DEFINE_CONST_PAD_KERNEL_LAUNCHER() [1/8]

dragon::kernel::DEFINE_CONST_PAD_KERNEL_LAUNCHER ( bool  )

◆ DEFINE_CONST_PAD_KERNEL_LAUNCHER() [2/8]

dragon::kernel::DEFINE_CONST_PAD_KERNEL_LAUNCHER ( int8_t  )

◆ DEFINE_CONST_PAD_KERNEL_LAUNCHER() [3/8]

dragon::kernel::DEFINE_CONST_PAD_KERNEL_LAUNCHER ( uint8_t  )

◆ DEFINE_CONST_PAD_KERNEL_LAUNCHER() [4/8]

dragon::kernel::DEFINE_CONST_PAD_KERNEL_LAUNCHER ( int  )

◆ DEFINE_CONST_PAD_KERNEL_LAUNCHER() [5/8]

dragon::kernel::DEFINE_CONST_PAD_KERNEL_LAUNCHER ( int64_t  )

◆ DEFINE_CONST_PAD_KERNEL_LAUNCHER() [6/8]

dragon::kernel::DEFINE_CONST_PAD_KERNEL_LAUNCHER ( float16  )

◆ DEFINE_CONST_PAD_KERNEL_LAUNCHER() [7/8]

dragon::kernel::DEFINE_CONST_PAD_KERNEL_LAUNCHER ( float  )

◆ DEFINE_CONST_PAD_KERNEL_LAUNCHER() [8/8]

dragon::kernel::DEFINE_CONST_PAD_KERNEL_LAUNCHER ( double  )

◆ DEFINE_CROP_KERNEL_LAUNCHER() [1/16]

dragon::kernel::DEFINE_CROP_KERNEL_LAUNCHER ( Crop  ,
bool   
)

◆ DEFINE_CROP_KERNEL_LAUNCHER() [2/16]

dragon::kernel::DEFINE_CROP_KERNEL_LAUNCHER ( Crop  ,
int8_t   
)

◆ DEFINE_CROP_KERNEL_LAUNCHER() [3/16]

dragon::kernel::DEFINE_CROP_KERNEL_LAUNCHER ( Crop  ,
uint8_t   
)

◆ DEFINE_CROP_KERNEL_LAUNCHER() [4/16]

dragon::kernel::DEFINE_CROP_KERNEL_LAUNCHER ( Crop  ,
int   
)

◆ DEFINE_CROP_KERNEL_LAUNCHER() [5/16]

dragon::kernel::DEFINE_CROP_KERNEL_LAUNCHER ( Crop  ,
int64_t   
)

◆ DEFINE_CROP_KERNEL_LAUNCHER() [6/16]

dragon::kernel::DEFINE_CROP_KERNEL_LAUNCHER ( Crop  ,
float16   
)

◆ DEFINE_CROP_KERNEL_LAUNCHER() [7/16]

dragon::kernel::DEFINE_CROP_KERNEL_LAUNCHER ( Crop  ,
float   
)

◆ DEFINE_CROP_KERNEL_LAUNCHER() [8/16]

dragon::kernel::DEFINE_CROP_KERNEL_LAUNCHER ( Crop  ,
double   
)

◆ DEFINE_CROP_KERNEL_LAUNCHER() [9/16]

dragon::kernel::DEFINE_CROP_KERNEL_LAUNCHER ( CropGrad  ,
bool   
)

◆ DEFINE_CROP_KERNEL_LAUNCHER() [10/16]

dragon::kernel::DEFINE_CROP_KERNEL_LAUNCHER ( CropGrad  ,
int8_t   
)

◆ DEFINE_CROP_KERNEL_LAUNCHER() [11/16]

dragon::kernel::DEFINE_CROP_KERNEL_LAUNCHER ( CropGrad  ,
uint8_t   
)

◆ DEFINE_CROP_KERNEL_LAUNCHER() [12/16]

dragon::kernel::DEFINE_CROP_KERNEL_LAUNCHER ( CropGrad  ,
int   
)

◆ DEFINE_CROP_KERNEL_LAUNCHER() [13/16]

dragon::kernel::DEFINE_CROP_KERNEL_LAUNCHER ( CropGrad  ,
int64_t   
)

◆ DEFINE_CROP_KERNEL_LAUNCHER() [14/16]

dragon::kernel::DEFINE_CROP_KERNEL_LAUNCHER ( CropGrad  ,
float16   
)

◆ DEFINE_CROP_KERNEL_LAUNCHER() [15/16]

dragon::kernel::DEFINE_CROP_KERNEL_LAUNCHER ( CropGrad  ,
float   
)

◆ DEFINE_CROP_KERNEL_LAUNCHER() [16/16]

dragon::kernel::DEFINE_CROP_KERNEL_LAUNCHER ( CropGrad  ,
double   
)

◆ DEFINE_FORWARD_KERNEL_LAUNCHER()

dragon::kernel::DEFINE_FORWARD_KERNEL_LAUNCHER ( float  ,
float   
)

◆ DEFINE_GRAD_SUM2_KERNEL_LAUNCHER() [1/6]

dragon::kernel::DEFINE_GRAD_SUM2_KERNEL_LAUNCHER ( int8_t  )

◆ DEFINE_GRAD_SUM2_KERNEL_LAUNCHER() [2/6]

dragon::kernel::DEFINE_GRAD_SUM2_KERNEL_LAUNCHER ( uint8_t  )

◆ DEFINE_GRAD_SUM2_KERNEL_LAUNCHER() [3/6]

dragon::kernel::DEFINE_GRAD_SUM2_KERNEL_LAUNCHER ( int  )

◆ DEFINE_GRAD_SUM2_KERNEL_LAUNCHER() [4/6]

dragon::kernel::DEFINE_GRAD_SUM2_KERNEL_LAUNCHER ( int64_t  )

◆ DEFINE_GRAD_SUM2_KERNEL_LAUNCHER() [5/6]

dragon::kernel::DEFINE_GRAD_SUM2_KERNEL_LAUNCHER ( float  )

◆ DEFINE_GRAD_SUM2_KERNEL_LAUNCHER() [6/6]

dragon::kernel::DEFINE_GRAD_SUM2_KERNEL_LAUNCHER ( double  )

◆ DEFINE_INDEX_KERNEL_LAUNCHER() [1/15]

dragon::kernel::DEFINE_INDEX_KERNEL_LAUNCHER ( IndexSelect  ,
bool   
)

◆ DEFINE_INDEX_KERNEL_LAUNCHER() [2/15]

dragon::kernel::DEFINE_INDEX_KERNEL_LAUNCHER ( IndexSelect  ,
int8_t   
)

◆ DEFINE_INDEX_KERNEL_LAUNCHER() [3/15]

dragon::kernel::DEFINE_INDEX_KERNEL_LAUNCHER ( IndexSelect  ,
uint8_t   
)

◆ DEFINE_INDEX_KERNEL_LAUNCHER() [4/15]

dragon::kernel::DEFINE_INDEX_KERNEL_LAUNCHER ( IndexSelect  ,
int   
)

◆ DEFINE_INDEX_KERNEL_LAUNCHER() [5/15]

dragon::kernel::DEFINE_INDEX_KERNEL_LAUNCHER ( IndexSelect  ,
int64_t   
)

◆ DEFINE_INDEX_KERNEL_LAUNCHER() [6/15]

dragon::kernel::DEFINE_INDEX_KERNEL_LAUNCHER ( IndexSelect  ,
float16   
)

◆ DEFINE_INDEX_KERNEL_LAUNCHER() [7/15]

dragon::kernel::DEFINE_INDEX_KERNEL_LAUNCHER ( IndexSelect  ,
float   
)

◆ DEFINE_INDEX_KERNEL_LAUNCHER() [8/15]

dragon::kernel::DEFINE_INDEX_KERNEL_LAUNCHER ( IndexSelect  ,
double   
)

◆ DEFINE_INDEX_KERNEL_LAUNCHER() [9/15]

dragon::kernel::DEFINE_INDEX_KERNEL_LAUNCHER ( IndexSelectGrad  ,
int8_t   
)

◆ DEFINE_INDEX_KERNEL_LAUNCHER() [10/15]

dragon::kernel::DEFINE_INDEX_KERNEL_LAUNCHER ( IndexSelectGrad  ,
uint8_t   
)

◆ DEFINE_INDEX_KERNEL_LAUNCHER() [11/15]

dragon::kernel::DEFINE_INDEX_KERNEL_LAUNCHER ( IndexSelectGrad  ,
int   
)

◆ DEFINE_INDEX_KERNEL_LAUNCHER() [12/15]

dragon::kernel::DEFINE_INDEX_KERNEL_LAUNCHER ( IndexSelectGrad  ,
int64_t   
)

◆ DEFINE_INDEX_KERNEL_LAUNCHER() [13/15]

dragon::kernel::DEFINE_INDEX_KERNEL_LAUNCHER ( IndexSelectGrad  ,
float16   
)

◆ DEFINE_INDEX_KERNEL_LAUNCHER() [14/15]

dragon::kernel::DEFINE_INDEX_KERNEL_LAUNCHER ( IndexSelectGrad  ,
float   
)

◆ DEFINE_INDEX_KERNEL_LAUNCHER() [15/15]

dragon::kernel::DEFINE_INDEX_KERNEL_LAUNCHER ( IndexSelectGrad  ,
double   
)

◆ DEFINE_MASKED_GRAD_KERNEL_LAUNCHER() [1/8]

dragon::kernel::DEFINE_MASKED_GRAD_KERNEL_LAUNCHER ( bool  )

◆ DEFINE_MASKED_GRAD_KERNEL_LAUNCHER() [2/8]

dragon::kernel::DEFINE_MASKED_GRAD_KERNEL_LAUNCHER ( int8_t  )

◆ DEFINE_MASKED_GRAD_KERNEL_LAUNCHER() [3/8]

dragon::kernel::DEFINE_MASKED_GRAD_KERNEL_LAUNCHER ( uint8_t  )

◆ DEFINE_MASKED_GRAD_KERNEL_LAUNCHER() [4/8]

dragon::kernel::DEFINE_MASKED_GRAD_KERNEL_LAUNCHER ( int  )

◆ DEFINE_MASKED_GRAD_KERNEL_LAUNCHER() [5/8]

dragon::kernel::DEFINE_MASKED_GRAD_KERNEL_LAUNCHER ( int64_t  )

◆ DEFINE_MASKED_GRAD_KERNEL_LAUNCHER() [6/8]

dragon::kernel::DEFINE_MASKED_GRAD_KERNEL_LAUNCHER ( float16  )

◆ DEFINE_MASKED_GRAD_KERNEL_LAUNCHER() [7/8]

dragon::kernel::DEFINE_MASKED_GRAD_KERNEL_LAUNCHER ( float  )

◆ DEFINE_MASKED_GRAD_KERNEL_LAUNCHER() [8/8]

dragon::kernel::DEFINE_MASKED_GRAD_KERNEL_LAUNCHER ( double  )

◆ DEFINE_MASKED_KERNEL_LAUNCHER() [1/8]

dragon::kernel::DEFINE_MASKED_KERNEL_LAUNCHER ( bool  )

◆ DEFINE_MASKED_KERNEL_LAUNCHER() [2/8]

dragon::kernel::DEFINE_MASKED_KERNEL_LAUNCHER ( int8_t  )

◆ DEFINE_MASKED_KERNEL_LAUNCHER() [3/8]

dragon::kernel::DEFINE_MASKED_KERNEL_LAUNCHER ( uint8_t  )

◆ DEFINE_MASKED_KERNEL_LAUNCHER() [4/8]

dragon::kernel::DEFINE_MASKED_KERNEL_LAUNCHER ( int  )

◆ DEFINE_MASKED_KERNEL_LAUNCHER() [5/8]

dragon::kernel::DEFINE_MASKED_KERNEL_LAUNCHER ( int64_t  )

◆ DEFINE_MASKED_KERNEL_LAUNCHER() [6/8]

dragon::kernel::DEFINE_MASKED_KERNEL_LAUNCHER ( float16  )

◆ DEFINE_MASKED_KERNEL_LAUNCHER() [7/8]

dragon::kernel::DEFINE_MASKED_KERNEL_LAUNCHER ( float  )

◆ DEFINE_MASKED_KERNEL_LAUNCHER() [8/8]

dragon::kernel::DEFINE_MASKED_KERNEL_LAUNCHER ( double  )

◆ DEFINE_MAXIMUM_GRAD_KERNEL_LAUNCHER() [1/12]

dragon::kernel::DEFINE_MAXIMUM_GRAD_KERNEL_LAUNCHER ( MaximumGrad  ,
int8_t  ,
int8_t *   
)

◆ DEFINE_MAXIMUM_GRAD_KERNEL_LAUNCHER() [2/12]

dragon::kernel::DEFINE_MAXIMUM_GRAD_KERNEL_LAUNCHER ( MaximumGrad  ,
uint8_t  ,
uint8_t *   
)

◆ DEFINE_MAXIMUM_GRAD_KERNEL_LAUNCHER() [3/12]

dragon::kernel::DEFINE_MAXIMUM_GRAD_KERNEL_LAUNCHER ( MaximumGrad  ,
int  ,
int *   
)

◆ DEFINE_MAXIMUM_GRAD_KERNEL_LAUNCHER() [4/12]

dragon::kernel::DEFINE_MAXIMUM_GRAD_KERNEL_LAUNCHER ( MaximumGrad  ,
int64_t  ,
int64_t *   
)

◆ DEFINE_MAXIMUM_GRAD_KERNEL_LAUNCHER() [5/12]

dragon::kernel::DEFINE_MAXIMUM_GRAD_KERNEL_LAUNCHER ( MaximumGrad  ,
float  ,
float *   
)

◆ DEFINE_MAXIMUM_GRAD_KERNEL_LAUNCHER() [6/12]

dragon::kernel::DEFINE_MAXIMUM_GRAD_KERNEL_LAUNCHER ( MaximumGrad  ,
double  ,
double *   
)

◆ DEFINE_MAXIMUM_GRAD_KERNEL_LAUNCHER() [7/12]

dragon::kernel::DEFINE_MAXIMUM_GRAD_KERNEL_LAUNCHER ( BroadcastMaximumGrad  ,
int8_t  ,
int8_t   
)

◆ DEFINE_MAXIMUM_GRAD_KERNEL_LAUNCHER() [8/12]

dragon::kernel::DEFINE_MAXIMUM_GRAD_KERNEL_LAUNCHER ( BroadcastMaximumGrad  ,
uint8_t  ,
uint8_t   
)

◆ DEFINE_MAXIMUM_GRAD_KERNEL_LAUNCHER() [9/12]

dragon::kernel::DEFINE_MAXIMUM_GRAD_KERNEL_LAUNCHER ( BroadcastMaximumGrad  ,
int  ,
int   
)

◆ DEFINE_MAXIMUM_GRAD_KERNEL_LAUNCHER() [10/12]

dragon::kernel::DEFINE_MAXIMUM_GRAD_KERNEL_LAUNCHER ( BroadcastMaximumGrad  ,
int64_t  ,
int64_t   
)

◆ DEFINE_MAXIMUM_GRAD_KERNEL_LAUNCHER() [11/12]

dragon::kernel::DEFINE_MAXIMUM_GRAD_KERNEL_LAUNCHER ( BroadcastMaximumGrad  ,
float  ,
float   
)

◆ DEFINE_MAXIMUM_GRAD_KERNEL_LAUNCHER() [12/12]

dragon::kernel::DEFINE_MAXIMUM_GRAD_KERNEL_LAUNCHER ( BroadcastMaximumGrad  ,
double  ,
double   
)

◆ DEFINE_MAXIMUM_KERNEL_LAUNCHER() [1/12]

dragon::kernel::DEFINE_MAXIMUM_KERNEL_LAUNCHER ( Maximum  ,
int8_t  ,
int8_t *   
)

◆ DEFINE_MAXIMUM_KERNEL_LAUNCHER() [2/12]

dragon::kernel::DEFINE_MAXIMUM_KERNEL_LAUNCHER ( Maximum  ,
uint8_t  ,
uint8_t *   
)

◆ DEFINE_MAXIMUM_KERNEL_LAUNCHER() [3/12]

dragon::kernel::DEFINE_MAXIMUM_KERNEL_LAUNCHER ( Maximum  ,
int  ,
int *   
)

◆ DEFINE_MAXIMUM_KERNEL_LAUNCHER() [4/12]

dragon::kernel::DEFINE_MAXIMUM_KERNEL_LAUNCHER ( Maximum  ,
int64_t  ,
int64_t *   
)

◆ DEFINE_MAXIMUM_KERNEL_LAUNCHER() [5/12]

dragon::kernel::DEFINE_MAXIMUM_KERNEL_LAUNCHER ( Maximum  ,
float  ,
float *   
)

◆ DEFINE_MAXIMUM_KERNEL_LAUNCHER() [6/12]

dragon::kernel::DEFINE_MAXIMUM_KERNEL_LAUNCHER ( Maximum  ,
double  ,
double *   
)

◆ DEFINE_MAXIMUM_KERNEL_LAUNCHER() [7/12]

dragon::kernel::DEFINE_MAXIMUM_KERNEL_LAUNCHER ( BroadcastMaximum  ,
int8_t  ,
int8_t   
)

◆ DEFINE_MAXIMUM_KERNEL_LAUNCHER() [8/12]

dragon::kernel::DEFINE_MAXIMUM_KERNEL_LAUNCHER ( BroadcastMaximum  ,
uint8_t  ,
uint8_t   
)

◆ DEFINE_MAXIMUM_KERNEL_LAUNCHER() [9/12]

dragon::kernel::DEFINE_MAXIMUM_KERNEL_LAUNCHER ( BroadcastMaximum  ,
int  ,
int   
)

◆ DEFINE_MAXIMUM_KERNEL_LAUNCHER() [10/12]

dragon::kernel::DEFINE_MAXIMUM_KERNEL_LAUNCHER ( BroadcastMaximum  ,
int64_t  ,
int64_t   
)

◆ DEFINE_MAXIMUM_KERNEL_LAUNCHER() [11/12]

dragon::kernel::DEFINE_MAXIMUM_KERNEL_LAUNCHER ( BroadcastMaximum  ,
float  ,
float   
)

◆ DEFINE_MAXIMUM_KERNEL_LAUNCHER() [12/12]

dragon::kernel::DEFINE_MAXIMUM_KERNEL_LAUNCHER ( BroadcastMaximum  ,
double  ,
double   
)

◆ DEFINE_MINIMUM_GRAD_KERNEL_LAUNCHER() [1/12]

dragon::kernel::DEFINE_MINIMUM_GRAD_KERNEL_LAUNCHER ( MinimumGrad  ,
int8_t  ,
int8_t *   
)

◆ DEFINE_MINIMUM_GRAD_KERNEL_LAUNCHER() [2/12]

dragon::kernel::DEFINE_MINIMUM_GRAD_KERNEL_LAUNCHER ( MinimumGrad  ,
uint8_t  ,
uint8_t *   
)

◆ DEFINE_MINIMUM_GRAD_KERNEL_LAUNCHER() [3/12]

dragon::kernel::DEFINE_MINIMUM_GRAD_KERNEL_LAUNCHER ( MinimumGrad  ,
int  ,
int *   
)

◆ DEFINE_MINIMUM_GRAD_KERNEL_LAUNCHER() [4/12]

dragon::kernel::DEFINE_MINIMUM_GRAD_KERNEL_LAUNCHER ( MinimumGrad  ,
int64_t  ,
int64_t *   
)

◆ DEFINE_MINIMUM_GRAD_KERNEL_LAUNCHER() [5/12]

dragon::kernel::DEFINE_MINIMUM_GRAD_KERNEL_LAUNCHER ( MinimumGrad  ,
float  ,
float *   
)

◆ DEFINE_MINIMUM_GRAD_KERNEL_LAUNCHER() [6/12]

dragon::kernel::DEFINE_MINIMUM_GRAD_KERNEL_LAUNCHER ( MinimumGrad  ,
double  ,
double *   
)

◆ DEFINE_MINIMUM_GRAD_KERNEL_LAUNCHER() [7/12]

dragon::kernel::DEFINE_MINIMUM_GRAD_KERNEL_LAUNCHER ( BroadcastMinimumGrad  ,
int8_t  ,
int8_t   
)

◆ DEFINE_MINIMUM_GRAD_KERNEL_LAUNCHER() [8/12]

dragon::kernel::DEFINE_MINIMUM_GRAD_KERNEL_LAUNCHER ( BroadcastMinimumGrad  ,
uint8_t  ,
uint8_t   
)

◆ DEFINE_MINIMUM_GRAD_KERNEL_LAUNCHER() [9/12]

dragon::kernel::DEFINE_MINIMUM_GRAD_KERNEL_LAUNCHER ( BroadcastMinimumGrad  ,
int  ,
int   
)

◆ DEFINE_MINIMUM_GRAD_KERNEL_LAUNCHER() [10/12]

dragon::kernel::DEFINE_MINIMUM_GRAD_KERNEL_LAUNCHER ( BroadcastMinimumGrad  ,
int64_t  ,
int64_t   
)

◆ DEFINE_MINIMUM_GRAD_KERNEL_LAUNCHER() [11/12]

dragon::kernel::DEFINE_MINIMUM_GRAD_KERNEL_LAUNCHER ( BroadcastMinimumGrad  ,
float  ,
float   
)

◆ DEFINE_MINIMUM_GRAD_KERNEL_LAUNCHER() [12/12]

dragon::kernel::DEFINE_MINIMUM_GRAD_KERNEL_LAUNCHER ( BroadcastMinimumGrad  ,
double  ,
double   
)

◆ DEFINE_MINIMUM_KERNEL_LAUNCHER() [1/12]

dragon::kernel::DEFINE_MINIMUM_KERNEL_LAUNCHER ( Minimum  ,
int8_t  ,
int8_t *   
)

◆ DEFINE_MINIMUM_KERNEL_LAUNCHER() [2/12]

dragon::kernel::DEFINE_MINIMUM_KERNEL_LAUNCHER ( Minimum  ,
uint8_t  ,
uint8_t *   
)

◆ DEFINE_MINIMUM_KERNEL_LAUNCHER() [3/12]

dragon::kernel::DEFINE_MINIMUM_KERNEL_LAUNCHER ( Minimum  ,
int  ,
int *   
)

◆ DEFINE_MINIMUM_KERNEL_LAUNCHER() [4/12]

dragon::kernel::DEFINE_MINIMUM_KERNEL_LAUNCHER ( Minimum  ,
int64_t  ,
int64_t *   
)

◆ DEFINE_MINIMUM_KERNEL_LAUNCHER() [5/12]

dragon::kernel::DEFINE_MINIMUM_KERNEL_LAUNCHER ( Minimum  ,
float  ,
float *   
)

◆ DEFINE_MINIMUM_KERNEL_LAUNCHER() [6/12]

dragon::kernel::DEFINE_MINIMUM_KERNEL_LAUNCHER ( Minimum  ,
double  ,
double *   
)

◆ DEFINE_MINIMUM_KERNEL_LAUNCHER() [7/12]

dragon::kernel::DEFINE_MINIMUM_KERNEL_LAUNCHER ( BroadcastMinimum  ,
int8_t  ,
int8_t   
)

◆ DEFINE_MINIMUM_KERNEL_LAUNCHER() [8/12]

dragon::kernel::DEFINE_MINIMUM_KERNEL_LAUNCHER ( BroadcastMinimum  ,
uint8_t  ,
uint8_t   
)

◆ DEFINE_MINIMUM_KERNEL_LAUNCHER() [9/12]

dragon::kernel::DEFINE_MINIMUM_KERNEL_LAUNCHER ( BroadcastMinimum  ,
int  ,
int   
)

◆ DEFINE_MINIMUM_KERNEL_LAUNCHER() [10/12]

dragon::kernel::DEFINE_MINIMUM_KERNEL_LAUNCHER ( BroadcastMinimum  ,
int64_t  ,
int64_t   
)

◆ DEFINE_MINIMUM_KERNEL_LAUNCHER() [11/12]

dragon::kernel::DEFINE_MINIMUM_KERNEL_LAUNCHER ( BroadcastMinimum  ,
float  ,
float   
)

◆ DEFINE_MINIMUM_KERNEL_LAUNCHER() [12/12]

dragon::kernel::DEFINE_MINIMUM_KERNEL_LAUNCHER ( BroadcastMinimum  ,
double  ,
double   
)

◆ DEFINE_MOMENTS_KERNEL_LAUNCHER() [1/6]

dragon::kernel::DEFINE_MOMENTS_KERNEL_LAUNCHER ( int8_t  ,
float   
)

◆ DEFINE_MOMENTS_KERNEL_LAUNCHER() [2/6]

dragon::kernel::DEFINE_MOMENTS_KERNEL_LAUNCHER ( uint8_t  ,
float   
)

◆ DEFINE_MOMENTS_KERNEL_LAUNCHER() [3/6]

dragon::kernel::DEFINE_MOMENTS_KERNEL_LAUNCHER ( int  ,
float   
)

◆ DEFINE_MOMENTS_KERNEL_LAUNCHER() [4/6]

dragon::kernel::DEFINE_MOMENTS_KERNEL_LAUNCHER ( int64_t  ,
float   
)

◆ DEFINE_MOMENTS_KERNEL_LAUNCHER() [5/6]

dragon::kernel::DEFINE_MOMENTS_KERNEL_LAUNCHER ( float  ,
float   
)

◆ DEFINE_MOMENTS_KERNEL_LAUNCHER() [6/6]

dragon::kernel::DEFINE_MOMENTS_KERNEL_LAUNCHER ( double  ,
double   
)

◆ DEFINE_NOTZERO_KERNEL_LAUNCHER() [1/7]

dragon::kernel::DEFINE_NOTZERO_KERNEL_LAUNCHER ( bool  )

◆ DEFINE_NOTZERO_KERNEL_LAUNCHER() [2/7]

dragon::kernel::DEFINE_NOTZERO_KERNEL_LAUNCHER ( int8_t  )

◆ DEFINE_NOTZERO_KERNEL_LAUNCHER() [3/7]

dragon::kernel::DEFINE_NOTZERO_KERNEL_LAUNCHER ( uint8_t  )

◆ DEFINE_NOTZERO_KERNEL_LAUNCHER() [4/7]

dragon::kernel::DEFINE_NOTZERO_KERNEL_LAUNCHER ( int  )

◆ DEFINE_NOTZERO_KERNEL_LAUNCHER() [5/7]

dragon::kernel::DEFINE_NOTZERO_KERNEL_LAUNCHER ( int64_t  )

◆ DEFINE_NOTZERO_KERNEL_LAUNCHER() [6/7]

dragon::kernel::DEFINE_NOTZERO_KERNEL_LAUNCHER ( float  )

◆ DEFINE_NOTZERO_KERNEL_LAUNCHER() [7/7]

dragon::kernel::DEFINE_NOTZERO_KERNEL_LAUNCHER ( double  )

◆ DEFINE_PAD_KERNEL_LAUNCHER() [1/16]

dragon::kernel::DEFINE_PAD_KERNEL_LAUNCHER ( ReflectPad  ,
bool   
)

◆ DEFINE_PAD_KERNEL_LAUNCHER() [2/16]

dragon::kernel::DEFINE_PAD_KERNEL_LAUNCHER ( ReflectPad  ,
int8_t   
)

◆ DEFINE_PAD_KERNEL_LAUNCHER() [3/16]

dragon::kernel::DEFINE_PAD_KERNEL_LAUNCHER ( ReflectPad  ,
uint8_t   
)

◆ DEFINE_PAD_KERNEL_LAUNCHER() [4/16]

dragon::kernel::DEFINE_PAD_KERNEL_LAUNCHER ( ReflectPad  ,
int   
)

◆ DEFINE_PAD_KERNEL_LAUNCHER() [5/16]

dragon::kernel::DEFINE_PAD_KERNEL_LAUNCHER ( ReflectPad  ,
int64_t   
)

◆ DEFINE_PAD_KERNEL_LAUNCHER() [6/16]

dragon::kernel::DEFINE_PAD_KERNEL_LAUNCHER ( ReflectPad  ,
float16   
)

◆ DEFINE_PAD_KERNEL_LAUNCHER() [7/16]

dragon::kernel::DEFINE_PAD_KERNEL_LAUNCHER ( ReflectPad  ,
float   
)

◆ DEFINE_PAD_KERNEL_LAUNCHER() [8/16]

dragon::kernel::DEFINE_PAD_KERNEL_LAUNCHER ( ReflectPad  ,
double   
)

◆ DEFINE_PAD_KERNEL_LAUNCHER() [9/16]

dragon::kernel::DEFINE_PAD_KERNEL_LAUNCHER ( EdgePad  ,
bool   
)

◆ DEFINE_PAD_KERNEL_LAUNCHER() [10/16]

dragon::kernel::DEFINE_PAD_KERNEL_LAUNCHER ( EdgePad  ,
int8_t   
)

◆ DEFINE_PAD_KERNEL_LAUNCHER() [11/16]

dragon::kernel::DEFINE_PAD_KERNEL_LAUNCHER ( EdgePad  ,
uint8_t   
)

◆ DEFINE_PAD_KERNEL_LAUNCHER() [12/16]

dragon::kernel::DEFINE_PAD_KERNEL_LAUNCHER ( EdgePad  ,
int   
)

◆ DEFINE_PAD_KERNEL_LAUNCHER() [13/16]

dragon::kernel::DEFINE_PAD_KERNEL_LAUNCHER ( EdgePad  ,
int64_t   
)

◆ DEFINE_PAD_KERNEL_LAUNCHER() [14/16]

dragon::kernel::DEFINE_PAD_KERNEL_LAUNCHER ( EdgePad  ,
float16   
)

◆ DEFINE_PAD_KERNEL_LAUNCHER() [15/16]

dragon::kernel::DEFINE_PAD_KERNEL_LAUNCHER ( EdgePad  ,
float   
)

◆ DEFINE_PAD_KERNEL_LAUNCHER() [16/16]

dragon::kernel::DEFINE_PAD_KERNEL_LAUNCHER ( EdgePad  ,
double   
)

◆ DEFINE_REDUCE_SUM_GRAD_KERNEL_LAUNCHER() [1/6]

dragon::kernel::DEFINE_REDUCE_SUM_GRAD_KERNEL_LAUNCHER ( int8_t  )

◆ DEFINE_REDUCE_SUM_GRAD_KERNEL_LAUNCHER() [2/6]

dragon::kernel::DEFINE_REDUCE_SUM_GRAD_KERNEL_LAUNCHER ( uint8_t  )

◆ DEFINE_REDUCE_SUM_GRAD_KERNEL_LAUNCHER() [3/6]

dragon::kernel::DEFINE_REDUCE_SUM_GRAD_KERNEL_LAUNCHER ( int  )

◆ DEFINE_REDUCE_SUM_GRAD_KERNEL_LAUNCHER() [4/6]

dragon::kernel::DEFINE_REDUCE_SUM_GRAD_KERNEL_LAUNCHER ( int64_t  )

◆ DEFINE_REDUCE_SUM_GRAD_KERNEL_LAUNCHER() [5/6]

dragon::kernel::DEFINE_REDUCE_SUM_GRAD_KERNEL_LAUNCHER ( float  )

◆ DEFINE_REDUCE_SUM_GRAD_KERNEL_LAUNCHER() [6/6]

dragon::kernel::DEFINE_REDUCE_SUM_GRAD_KERNEL_LAUNCHER ( double  )

◆ DEFINE_REDUCE_SUM_KERNEL_LAUNCHER() [1/6]

dragon::kernel::DEFINE_REDUCE_SUM_KERNEL_LAUNCHER ( int8_t  )

◆ DEFINE_REDUCE_SUM_KERNEL_LAUNCHER() [2/6]

dragon::kernel::DEFINE_REDUCE_SUM_KERNEL_LAUNCHER ( uint8_t  )

◆ DEFINE_REDUCE_SUM_KERNEL_LAUNCHER() [3/6]

dragon::kernel::DEFINE_REDUCE_SUM_KERNEL_LAUNCHER ( int  )

◆ DEFINE_REDUCE_SUM_KERNEL_LAUNCHER() [4/6]

dragon::kernel::DEFINE_REDUCE_SUM_KERNEL_LAUNCHER ( int64_t  )

◆ DEFINE_REDUCE_SUM_KERNEL_LAUNCHER() [5/6]

dragon::kernel::DEFINE_REDUCE_SUM_KERNEL_LAUNCHER ( float  )

◆ DEFINE_REDUCE_SUM_KERNEL_LAUNCHER() [6/6]

dragon::kernel::DEFINE_REDUCE_SUM_KERNEL_LAUNCHER ( double  )

◆ DEFINE_REPEAT_KERNEL_LAUNCHER() [1/15]

dragon::kernel::DEFINE_REPEAT_KERNEL_LAUNCHER ( Repeat  ,
bool   
)

◆ DEFINE_REPEAT_KERNEL_LAUNCHER() [2/15]

dragon::kernel::DEFINE_REPEAT_KERNEL_LAUNCHER ( Repeat  ,
int8_t   
)

◆ DEFINE_REPEAT_KERNEL_LAUNCHER() [3/15]

dragon::kernel::DEFINE_REPEAT_KERNEL_LAUNCHER ( Repeat  ,
uint8_t   
)

◆ DEFINE_REPEAT_KERNEL_LAUNCHER() [4/15]

dragon::kernel::DEFINE_REPEAT_KERNEL_LAUNCHER ( Repeat  ,
int   
)

◆ DEFINE_REPEAT_KERNEL_LAUNCHER() [5/15]

dragon::kernel::DEFINE_REPEAT_KERNEL_LAUNCHER ( Repeat  ,
int64_t   
)

◆ DEFINE_REPEAT_KERNEL_LAUNCHER() [6/15]

dragon::kernel::DEFINE_REPEAT_KERNEL_LAUNCHER ( Repeat  ,
float16   
)

◆ DEFINE_REPEAT_KERNEL_LAUNCHER() [7/15]

dragon::kernel::DEFINE_REPEAT_KERNEL_LAUNCHER ( Repeat  ,
float   
)

◆ DEFINE_REPEAT_KERNEL_LAUNCHER() [8/15]

dragon::kernel::DEFINE_REPEAT_KERNEL_LAUNCHER ( Repeat  ,
double   
)

◆ DEFINE_REPEAT_KERNEL_LAUNCHER() [9/15]

dragon::kernel::DEFINE_REPEAT_KERNEL_LAUNCHER ( RepeatGrad  ,
int8_t   
)

◆ DEFINE_REPEAT_KERNEL_LAUNCHER() [10/15]

dragon::kernel::DEFINE_REPEAT_KERNEL_LAUNCHER ( RepeatGrad  ,
uint8_t   
)

◆ DEFINE_REPEAT_KERNEL_LAUNCHER() [11/15]

dragon::kernel::DEFINE_REPEAT_KERNEL_LAUNCHER ( RepeatGrad  ,
int   
)

◆ DEFINE_REPEAT_KERNEL_LAUNCHER() [12/15]

dragon::kernel::DEFINE_REPEAT_KERNEL_LAUNCHER ( RepeatGrad  ,
int64_t   
)

◆ DEFINE_REPEAT_KERNEL_LAUNCHER() [13/15]

dragon::kernel::DEFINE_REPEAT_KERNEL_LAUNCHER ( RepeatGrad  ,
float16   
)

◆ DEFINE_REPEAT_KERNEL_LAUNCHER() [14/15]

dragon::kernel::DEFINE_REPEAT_KERNEL_LAUNCHER ( RepeatGrad  ,
float   
)

◆ DEFINE_REPEAT_KERNEL_LAUNCHER() [15/15]

dragon::kernel::DEFINE_REPEAT_KERNEL_LAUNCHER ( RepeatGrad  ,
double   
)

◆ DEFINE_SHUFFLE_KERNEL_LAUNCHER() [1/8]

dragon::kernel::DEFINE_SHUFFLE_KERNEL_LAUNCHER ( bool  )

◆ DEFINE_SHUFFLE_KERNEL_LAUNCHER() [2/8]

dragon::kernel::DEFINE_SHUFFLE_KERNEL_LAUNCHER ( int8_t  )

◆ DEFINE_SHUFFLE_KERNEL_LAUNCHER() [3/8]

dragon::kernel::DEFINE_SHUFFLE_KERNEL_LAUNCHER ( uint8_t  )

◆ DEFINE_SHUFFLE_KERNEL_LAUNCHER() [4/8]

dragon::kernel::DEFINE_SHUFFLE_KERNEL_LAUNCHER ( int  )

◆ DEFINE_SHUFFLE_KERNEL_LAUNCHER() [5/8]

dragon::kernel::DEFINE_SHUFFLE_KERNEL_LAUNCHER ( int64_t  )

◆ DEFINE_SHUFFLE_KERNEL_LAUNCHER() [6/8]

dragon::kernel::DEFINE_SHUFFLE_KERNEL_LAUNCHER ( float16  )

◆ DEFINE_SHUFFLE_KERNEL_LAUNCHER() [7/8]

dragon::kernel::DEFINE_SHUFFLE_KERNEL_LAUNCHER ( float  )

◆ DEFINE_SHUFFLE_KERNEL_LAUNCHER() [8/8]

dragon::kernel::DEFINE_SHUFFLE_KERNEL_LAUNCHER ( double  )

◆ DEFINE_SLICE_KERNEL_LAUNCHER() [1/16]

dragon::kernel::DEFINE_SLICE_KERNEL_LAUNCHER ( Slice  ,
bool   
)

◆ DEFINE_SLICE_KERNEL_LAUNCHER() [2/16]

dragon::kernel::DEFINE_SLICE_KERNEL_LAUNCHER ( Slice  ,
int8_t   
)

◆ DEFINE_SLICE_KERNEL_LAUNCHER() [3/16]

dragon::kernel::DEFINE_SLICE_KERNEL_LAUNCHER ( Slice  ,
uint8_t   
)

◆ DEFINE_SLICE_KERNEL_LAUNCHER() [4/16]

dragon::kernel::DEFINE_SLICE_KERNEL_LAUNCHER ( Slice  ,
int   
)

◆ DEFINE_SLICE_KERNEL_LAUNCHER() [5/16]

dragon::kernel::DEFINE_SLICE_KERNEL_LAUNCHER ( Slice  ,
int64_t   
)

◆ DEFINE_SLICE_KERNEL_LAUNCHER() [6/16]

dragon::kernel::DEFINE_SLICE_KERNEL_LAUNCHER ( Slice  ,
float16   
)

◆ DEFINE_SLICE_KERNEL_LAUNCHER() [7/16]

dragon::kernel::DEFINE_SLICE_KERNEL_LAUNCHER ( Slice  ,
float   
)

◆ DEFINE_SLICE_KERNEL_LAUNCHER() [8/16]

dragon::kernel::DEFINE_SLICE_KERNEL_LAUNCHER ( Slice  ,
double   
)

◆ DEFINE_SLICE_KERNEL_LAUNCHER() [9/16]

dragon::kernel::DEFINE_SLICE_KERNEL_LAUNCHER ( SliceGrad  ,
bool   
)

◆ DEFINE_SLICE_KERNEL_LAUNCHER() [10/16]

dragon::kernel::DEFINE_SLICE_KERNEL_LAUNCHER ( SliceGrad  ,
int8_t   
)

◆ DEFINE_SLICE_KERNEL_LAUNCHER() [11/16]

dragon::kernel::DEFINE_SLICE_KERNEL_LAUNCHER ( SliceGrad  ,
uint8_t   
)

◆ DEFINE_SLICE_KERNEL_LAUNCHER() [12/16]

dragon::kernel::DEFINE_SLICE_KERNEL_LAUNCHER ( SliceGrad  ,
int   
)

◆ DEFINE_SLICE_KERNEL_LAUNCHER() [13/16]

dragon::kernel::DEFINE_SLICE_KERNEL_LAUNCHER ( SliceGrad  ,
int64_t   
)

◆ DEFINE_SLICE_KERNEL_LAUNCHER() [14/16]

dragon::kernel::DEFINE_SLICE_KERNEL_LAUNCHER ( SliceGrad  ,
float16   
)

◆ DEFINE_SLICE_KERNEL_LAUNCHER() [15/16]

dragon::kernel::DEFINE_SLICE_KERNEL_LAUNCHER ( SliceGrad  ,
float   
)

◆ DEFINE_SLICE_KERNEL_LAUNCHER() [16/16]

dragon::kernel::DEFINE_SLICE_KERNEL_LAUNCHER ( SliceGrad  ,
double   
)

◆ DEFINE_TILE_GRAD_KERNEL_LAUNCHER() [1/7]

dragon::kernel::DEFINE_TILE_GRAD_KERNEL_LAUNCHER ( int8_t  )

◆ DEFINE_TILE_GRAD_KERNEL_LAUNCHER() [2/7]

dragon::kernel::DEFINE_TILE_GRAD_KERNEL_LAUNCHER ( uint8_t  )

◆ DEFINE_TILE_GRAD_KERNEL_LAUNCHER() [3/7]

dragon::kernel::DEFINE_TILE_GRAD_KERNEL_LAUNCHER ( int  )

◆ DEFINE_TILE_GRAD_KERNEL_LAUNCHER() [4/7]

dragon::kernel::DEFINE_TILE_GRAD_KERNEL_LAUNCHER ( int64_t  )

◆ DEFINE_TILE_GRAD_KERNEL_LAUNCHER() [5/7]

dragon::kernel::DEFINE_TILE_GRAD_KERNEL_LAUNCHER ( float16  )

◆ DEFINE_TILE_GRAD_KERNEL_LAUNCHER() [6/7]

dragon::kernel::DEFINE_TILE_GRAD_KERNEL_LAUNCHER ( float  )

◆ DEFINE_TILE_GRAD_KERNEL_LAUNCHER() [7/7]

dragon::kernel::DEFINE_TILE_GRAD_KERNEL_LAUNCHER ( double  )

◆ DEFINE_TILE_KERNEL_LAUNCHER() [1/8]

dragon::kernel::DEFINE_TILE_KERNEL_LAUNCHER ( bool  )

◆ DEFINE_TILE_KERNEL_LAUNCHER() [2/8]

dragon::kernel::DEFINE_TILE_KERNEL_LAUNCHER ( int8_t  )

◆ DEFINE_TILE_KERNEL_LAUNCHER() [3/8]

dragon::kernel::DEFINE_TILE_KERNEL_LAUNCHER ( uint8_t  )

◆ DEFINE_TILE_KERNEL_LAUNCHER() [4/8]

dragon::kernel::DEFINE_TILE_KERNEL_LAUNCHER ( int  )

◆ DEFINE_TILE_KERNEL_LAUNCHER() [5/8]

dragon::kernel::DEFINE_TILE_KERNEL_LAUNCHER ( int64_t  )

◆ DEFINE_TILE_KERNEL_LAUNCHER() [6/8]

dragon::kernel::DEFINE_TILE_KERNEL_LAUNCHER ( float16  )

◆ DEFINE_TILE_KERNEL_LAUNCHER() [7/8]

dragon::kernel::DEFINE_TILE_KERNEL_LAUNCHER ( float  )

◆ DEFINE_TILE_KERNEL_LAUNCHER() [8/8]

dragon::kernel::DEFINE_TILE_KERNEL_LAUNCHER ( double  )

◆ DEFINE_TRANSPOSE_KERNEL_LAUNCHER() [1/16]

dragon::kernel::DEFINE_TRANSPOSE_KERNEL_LAUNCHER ( Transpose  ,
bool   
)

◆ DEFINE_TRANSPOSE_KERNEL_LAUNCHER() [2/16]

dragon::kernel::DEFINE_TRANSPOSE_KERNEL_LAUNCHER ( Transpose  ,
int8_t   
)

◆ DEFINE_TRANSPOSE_KERNEL_LAUNCHER() [3/16]

dragon::kernel::DEFINE_TRANSPOSE_KERNEL_LAUNCHER ( Transpose  ,
uint8_t   
)

◆ DEFINE_TRANSPOSE_KERNEL_LAUNCHER() [4/16]

dragon::kernel::DEFINE_TRANSPOSE_KERNEL_LAUNCHER ( Transpose  ,
int   
)

◆ DEFINE_TRANSPOSE_KERNEL_LAUNCHER() [5/16]

dragon::kernel::DEFINE_TRANSPOSE_KERNEL_LAUNCHER ( Transpose  ,
int64_t   
)

◆ DEFINE_TRANSPOSE_KERNEL_LAUNCHER() [6/16]

dragon::kernel::DEFINE_TRANSPOSE_KERNEL_LAUNCHER ( Transpose  ,
float16   
)

◆ DEFINE_TRANSPOSE_KERNEL_LAUNCHER() [7/16]

dragon::kernel::DEFINE_TRANSPOSE_KERNEL_LAUNCHER ( Transpose  ,
float   
)

◆ DEFINE_TRANSPOSE_KERNEL_LAUNCHER() [8/16]

dragon::kernel::DEFINE_TRANSPOSE_KERNEL_LAUNCHER ( Transpose  ,
double   
)

◆ DEFINE_TRANSPOSE_KERNEL_LAUNCHER() [9/16]

dragon::kernel::DEFINE_TRANSPOSE_KERNEL_LAUNCHER ( TransposeGrad  ,
bool   
)

◆ DEFINE_TRANSPOSE_KERNEL_LAUNCHER() [10/16]

dragon::kernel::DEFINE_TRANSPOSE_KERNEL_LAUNCHER ( TransposeGrad  ,
int8_t   
)

◆ DEFINE_TRANSPOSE_KERNEL_LAUNCHER() [11/16]

dragon::kernel::DEFINE_TRANSPOSE_KERNEL_LAUNCHER ( TransposeGrad  ,
uint8_t   
)

◆ DEFINE_TRANSPOSE_KERNEL_LAUNCHER() [12/16]

dragon::kernel::DEFINE_TRANSPOSE_KERNEL_LAUNCHER ( TransposeGrad  ,
int   
)

◆ DEFINE_TRANSPOSE_KERNEL_LAUNCHER() [13/16]

dragon::kernel::DEFINE_TRANSPOSE_KERNEL_LAUNCHER ( TransposeGrad  ,
int64_t   
)

◆ DEFINE_TRANSPOSE_KERNEL_LAUNCHER() [14/16]

dragon::kernel::DEFINE_TRANSPOSE_KERNEL_LAUNCHER ( TransposeGrad  ,
float16   
)

◆ DEFINE_TRANSPOSE_KERNEL_LAUNCHER() [15/16]

dragon::kernel::DEFINE_TRANSPOSE_KERNEL_LAUNCHER ( TransposeGrad  ,
float   
)

◆ DEFINE_TRANSPOSE_KERNEL_LAUNCHER() [16/16]

dragon::kernel::DEFINE_TRANSPOSE_KERNEL_LAUNCHER ( TransposeGrad  ,
double   
)

◆ DEFINE_TYPE_A_TO_ALL() [1/7]

dragon::kernel::DEFINE_TYPE_A_TO_ALL ( bool  )

◆ DEFINE_TYPE_A_TO_ALL() [2/7]

dragon::kernel::DEFINE_TYPE_A_TO_ALL ( uint8_t  )

◆ DEFINE_TYPE_A_TO_ALL() [3/7]

dragon::kernel::DEFINE_TYPE_A_TO_ALL ( int8_t  )

◆ DEFINE_TYPE_A_TO_ALL() [4/7]

dragon::kernel::DEFINE_TYPE_A_TO_ALL ( int  )

◆ DEFINE_TYPE_A_TO_ALL() [5/7]

dragon::kernel::DEFINE_TYPE_A_TO_ALL ( int64_t  )

◆ DEFINE_TYPE_A_TO_ALL() [6/7]

dragon::kernel::DEFINE_TYPE_A_TO_ALL ( float  )

◆ DEFINE_TYPE_A_TO_ALL() [7/7]

dragon::kernel::DEFINE_TYPE_A_TO_ALL ( double  )

◆ DEFINE_TYPE_A_TO_B() [1/3]

dragon::kernel::DEFINE_TYPE_A_TO_B ( float16  ,
float   
)

◆ DEFINE_TYPE_A_TO_B() [2/3]

dragon::kernel::DEFINE_TYPE_A_TO_B ( float  ,
float16   
)

◆ DEFINE_TYPE_A_TO_B() [3/3]

dragon::kernel::DEFINE_TYPE_A_TO_B ( float16  ,
float16   
)

◆ DEFINE_TYPE_FP16_DISABLED() [1/6]

dragon::kernel::DEFINE_TYPE_FP16_DISABLED ( bool  )

◆ DEFINE_TYPE_FP16_DISABLED() [2/6]

dragon::kernel::DEFINE_TYPE_FP16_DISABLED ( uint8_t  )

◆ DEFINE_TYPE_FP16_DISABLED() [3/6]

dragon::kernel::DEFINE_TYPE_FP16_DISABLED ( int8_t  )

◆ DEFINE_TYPE_FP16_DISABLED() [4/6]

dragon::kernel::DEFINE_TYPE_FP16_DISABLED ( int  )

◆ DEFINE_TYPE_FP16_DISABLED() [5/6]

dragon::kernel::DEFINE_TYPE_FP16_DISABLED ( int64_t  )

◆ DEFINE_TYPE_FP16_DISABLED() [6/6]

dragon::kernel::DEFINE_TYPE_FP16_DISABLED ( double  )

◆ DEFINE_WHERE_GRAD_KERNEL_LAUNCHER() [1/7]

dragon::kernel::DEFINE_WHERE_GRAD_KERNEL_LAUNCHER ( bool  )

◆ DEFINE_WHERE_GRAD_KERNEL_LAUNCHER() [2/7]

dragon::kernel::DEFINE_WHERE_GRAD_KERNEL_LAUNCHER ( int8_t  )

◆ DEFINE_WHERE_GRAD_KERNEL_LAUNCHER() [3/7]

dragon::kernel::DEFINE_WHERE_GRAD_KERNEL_LAUNCHER ( uint8_t  )

◆ DEFINE_WHERE_GRAD_KERNEL_LAUNCHER() [4/7]

dragon::kernel::DEFINE_WHERE_GRAD_KERNEL_LAUNCHER ( int  )

◆ DEFINE_WHERE_GRAD_KERNEL_LAUNCHER() [5/7]

dragon::kernel::DEFINE_WHERE_GRAD_KERNEL_LAUNCHER ( int64_t  )

◆ DEFINE_WHERE_GRAD_KERNEL_LAUNCHER() [6/7]

dragon::kernel::DEFINE_WHERE_GRAD_KERNEL_LAUNCHER ( float  )

◆ DEFINE_WHERE_GRAD_KERNEL_LAUNCHER() [7/7]

dragon::kernel::DEFINE_WHERE_GRAD_KERNEL_LAUNCHER ( double  )

◆ DEFINE_WHERE_KERNEL_LAUNCHER() [1/8]

dragon::kernel::DEFINE_WHERE_KERNEL_LAUNCHER ( bool  )

◆ DEFINE_WHERE_KERNEL_LAUNCHER() [2/8]

dragon::kernel::DEFINE_WHERE_KERNEL_LAUNCHER ( int8_t  )

◆ DEFINE_WHERE_KERNEL_LAUNCHER() [3/8]

dragon::kernel::DEFINE_WHERE_KERNEL_LAUNCHER ( uint8_t  )

◆ DEFINE_WHERE_KERNEL_LAUNCHER() [4/8]

dragon::kernel::DEFINE_WHERE_KERNEL_LAUNCHER ( int  )

◆ DEFINE_WHERE_KERNEL_LAUNCHER() [5/8]

dragon::kernel::DEFINE_WHERE_KERNEL_LAUNCHER ( int64_t  )

◆ DEFINE_WHERE_KERNEL_LAUNCHER() [6/8]

dragon::kernel::DEFINE_WHERE_KERNEL_LAUNCHER ( float16  )

◆ DEFINE_WHERE_KERNEL_LAUNCHER() [7/8]

dragon::kernel::DEFINE_WHERE_KERNEL_LAUNCHER ( float  )

◆ DEFINE_WHERE_KERNEL_LAUNCHER() [8/8]

dragon::kernel::DEFINE_WHERE_KERNEL_LAUNCHER ( double  )

◆ DepthwiseConv2d()

template<typename T , class Context >
void dragon::kernel::DepthwiseConv2d ( const int  N,
const int  C,
const int  H,
const int  W,
const int  out_h,
const int  out_w,
const int  kernel_h,
const int  kernel_w,
const int  stride_h,
const int  stride_w,
const int  pad_h,
const int  pad_w,
const int  dilation_h,
const int  dilation_w,
const string &  data_format,
const T *  x,
const T *  w,
T *  y,
Context *  ctx 
)

vision.depthwise_conv

◆ DepthwiseConv2d< float, CPUContext >()

template<>
void dragon::kernel::DepthwiseConv2d< float, CPUContext > ( const int  N,
const int  C,
const int  H,
const int  W,
const int  out_h,
const int  out_w,
const int  kernel_h,
const int  kernel_w,
const int  stride_h,
const int  stride_w,
const int  pad_h,
const int  pad_w,
const int  dilation_h,
const int  dilation_w,
const string &  data_format,
const float *  x,
const float *  w,
float *  y,
CPUContext ctx 
)

◆ DepthwiseConv2dGrad()

template<typename T , class Context >
void dragon::kernel::DepthwiseConv2dGrad ( const int  N,
const int  C,
const int  H,
const int  W,
const int  out_h,
const int  out_w,
const int  kernel_h,
const int  kernel_w,
const int  stride_h,
const int  stride_w,
const int  pad_h,
const int  pad_w,
const int  dilation_h,
const int  dilation_w,
const string &  data_format,
const T *  dy,
const T *  d,
T *  dx,
Context *  ctx 
)

◆ DepthwiseConv2dGrad< float, CPUContext >()

template<>
void dragon::kernel::DepthwiseConv2dGrad< float, CPUContext > ( const int  N,
const int  C,
const int  H,
const int  W,
const int  out_h,
const int  out_w,
const int  kernel_h,
const int  kernel_w,
const int  stride_h,
const int  stride_w,
const int  pad_h,
const int  pad_w,
const int  dilation_h,
const int  dilation_w,
const string &  data_format,
const float *  dy,
const float *  w,
float *  dx,
CPUContext ctx 
)

◆ DepthwiseConv2dWGrad()

template<typename T , class Context >
void dragon::kernel::DepthwiseConv2dWGrad ( const int  N,
const int  C,
const int  H,
const int  W,
const int  out_h,
const int  out_w,
const int  kernel_h,
const int  kernel_w,
const int  stride_h,
const int  stride_w,
const int  pad_h,
const int  pad_w,
const int  dilation_h,
const int  dilation_w,
const string &  data_format,
const T *  dy,
const T *  x,
T *  dw,
Context *  ctx 
)

◆ DepthwiseConv2dWGrad< float, CPUContext >()

template<>
void dragon::kernel::DepthwiseConv2dWGrad< float, CPUContext > ( const int  N,
const int  C,
const int  H,
const int  W,
const int  out_h,
const int  out_w,
const int  kernel_h,
const int  kernel_w,
const int  stride_h,
const int  stride_w,
const int  pad_h,
const int  pad_w,
const int  dilation_h,
const int  dilation_w,
const string &  data_format,
const float *  dy,
const float *  x,
float *  dw,
CPUContext ctx 
)

◆ DropBlock2d()

template<class Context >
void dragon::kernel::DropBlock2d ( const int  N,
const int  C,
const int  H,
const int  W,
const int  seed_h,
const int  seed_w,
const int  block_size,
const float  gamma,
const string &  data_format,
uint32_t *  seed,
int *  mask,
Context *  ctx 
)

vision.drop_block

◆ DropBlock2d< CPUContext >()

template<>
void dragon::kernel::DropBlock2d< CPUContext > ( const int  N,
const int  C,
const int  H,
const int  W,
const int  seed_h,
const int  seed_w,
const int  block_size,
const float  gamma,
const string &  data_format,
uint32_t *  seed,
int *  mask,
CPUContext ctx 
)

◆ Dropout()

template<typename T , class Context >
void dragon::kernel::Dropout ( const int  count,
const float  prob,
const float  scale,
const T *  x,
uint32_t *  mask32,
uint8_t *  mask8,
T *  y,
Context *  ctx 
)

activation.dropout

◆ Dropout< float, CPUContext >()

template<>
void dragon::kernel::Dropout< float, CPUContext > ( const int  count,
const float  prob,
const float  scale,
const float *  x,
uint32_t *  mask32,
uint8_t *  mask8,
float *  y,
CPUContext ctx 
)

◆ Dropout< float16, CPUContext >()

template<>
void dragon::kernel::Dropout< float16, CPUContext > ( const int  count,
float  prob,
float  scale,
const float16 *  x,
uint32_t *  mask32,