Dragon - C++ API
A Computation Graph Virtual Machine Based Deep Learning Framework
operator.h
Go to the documentation of this file.
1 
13 #ifndef DRAGON_CORE_OPERATOR_H_
14 #define DRAGON_CORE_OPERATOR_H_
15 
16 #include "core/registry.h"
17 #include "core/context.h"
18 #include "core/tensor.h"
19 #include "core/operator_gradient.h"
20 #include "core/operator_schema.h"
21 #include "utils/cast.h"
22 
23 #ifdef WITH_MPI
24 #include <mpi.h>
25 #endif
26 
27 namespace dragon {
28 
29 class Workspace;
30 
31 class OperatorBase {
32  public:
34  OperatorBase(const OperatorDef& def, Workspace* ws);
35 
37  virtual ~OperatorBase() {}
38 
40  Tensor& X(int i);
41 
43  Tensor* Y(int i);
44 
46  int XSize() { return (int)inputs_.size(); }
47 
49  int YSize() { return (int)outputs_.size(); }
50 
52  void UpdateFrom(const OperatorDef& def);
53 
55  void SwitchToPhase(const string& phase) { phase_ = phase; }
56 
58  virtual void Run(int stream_id = 0) { NOT_IMPLEMENTED; }
59 
61  virtual void Fusion(void* graph) { NOT_IMPLEMENTED; }
62 
64  const string& name() const { return def_.name(); }
65 
67  const string& type() const { return def_.type(); }
68 
70  const string& phase() const { return phase_; }
71 
73  const string& handle() const { return handle_; }
74 
76  const string& dtype() const { return dtype_; }
77 
79  const string& data_format() const { return data_format_; }
80 
82  const string unique_name(const string& name) const {
83  return "/mnt/" + handle_ + "/" + name;
84  }
85 
87  Workspace* ws() const { return ws_; }
88 
90  template <typename T>
91  T Arg(const string& name, const T& default_value);
92 
94  template <typename T>
95  vector<T> Args(const string& name);
96 
99 
101  const Argument& arg(const string& name) { return *(args_[name]); }
102 
104 
106  SubGraph& subgraph() { return subgraph_; }
107 
111  }
112 
114  const OperatorDef& def() const { return def_; }
115 
117  string DebugString() const { return def_.DebugString(); }
118 
120  string DTypeString(const Tensor&, const Set<string>&) const;
121 
122  /* \brief Return the dtype string according to given type */
123  string DTypeString(const string&, const Set<string>&) const;
124 
125  protected:
128 
130  OperatorDef def_;
131 
134 
136  string phase_, handle_;
137 
140 
142  vector<Tensor*> inputs_, outputs_;
143 
146 };
147 
148 template <class Context>
149 class Operator : public OperatorBase {
150  public:
152  Operator(const OperatorDef& def, Workspace* ws)
153  : OperatorBase(def, ws),
154  ctx_(def.device_option()),
155  do_sync_(OperatorBase::Arg<bool>(
156  "do_sync", false)),
158  "allow_recomp", false)) {
159  allow_run_ = true;
160  allow_run_ &= MPICheck();
161  allow_run_ &= (!(YSize() == 1 &&
162  Y(0)->name() == "NULL"));
163  }
164 
166  void Run(int stream_id = 0) final {
167  if (!allow_run_) return;
169  ctx()->SwitchToDevice(stream_id);
170  MemorySwitch();
171  RunOnDevice();
172  if (do_sync_ || stream_id > 0) {
173  // Sync the stream(0) at the specific time
174  ctx()->FinishDeviceCompution();
175  }
177  }
178 
180  virtual void PrepareResource();
181 
183  virtual void ReleaseResource();
184 
186  virtual void MemorySwitch() {
187  for (auto* e : inputs_)
188  if(e->name() != "NULL")
189  e->SwitchToDevice(ctx()->device_id());
190  for (auto* e : outputs_)
191  if(e->name() != "NULL")
192  e->SwitchToDevice(ctx()->device_id());
193  }
194 
196  virtual void RunOnDevice() = 0;
197 
199  Context* ctx() { return &ctx_; }
200 
202  bool AllowRun() { return allow_run_; }
203 
204  protected:
206  Context ctx_;
208 
209  private:
211  bool MPICheck() {
212 #ifndef WITH_MPI
213  return true;
214 #else
215  vec32_t allow_ranks =
216  OperatorBase::Args<int>("mpi_ranks");
217  if (allow_ranks.empty()) return true;
218  int cur_rank;
219  MPI_Comm_rank(MPI_COMM_WORLD, &cur_rank);
220  for (auto mpi_rank : allow_ranks)
221  if (cur_rank == mpi_rank) return true;
222  return false;
223 #endif
224  }
225 };
226 
229 OperatorBase* NewOperator(
230  const OperatorDef& def,
231  Workspace* ws);
232 
233 /* Macros */
234 
235 #define OpArg OperatorBase::Arg
236 #define OpArgs OperatorBase::Args
237 
238 #define SIMPLE_CTOR_DTOR(name) \
239  name(const OperatorDef& def, Workspace* ws) \
240  : Operator<Context>(def, ws) {} \
241  virtual ~name() {}
242 
243 #define USE_OPERATOR_BASE_FUNCTIONS \
244  using OperatorBase::ws; \
245  using OperatorBase::name; \
246  using OperatorBase::type; \
247  using OperatorBase::phase; \
248  using OperatorBase::handle; \
249  using OperatorBase::dtype; \
250  using OperatorBase::data_format; \
251  using OperatorBase::unique_name; \
252  using OperatorBase::def; \
253  using OperatorBase::X; \
254  using OperatorBase::Y; \
255  using OperatorBase::XSize; \
256  using OperatorBase::YSize; \
257  using OperatorBase::DebugString; \
258  using OperatorBase::DTypeString; \
259  using OperatorBase::SwitchToPhase
260 
261 #define USE_OPERATOR_FUNCTIONS \
262  USE_OPERATOR_BASE_FUNCTIONS; \
263  using Operator<Context>::ctx; \
264  using Operator<Context>::AllowRun
265 
267  CPUOperatorRegistry,
268  OperatorBase,
269  const OperatorDef&,
270  Workspace*);
271 
273  CUDAOperatorRegistry,
274  OperatorBase,
275  const OperatorDef&,
276  Workspace*);
277 
278 /* NVIDIA's Accelerated Library - CUDNN */
279 
281  CUDNNOperatorRegistry,
282  OperatorBase,
283  const OperatorDef&,
284  Workspace*);
285 
286 /* CAMBRICON's Accelerated Library - CNML */
287 
289  CNMLOperatorRegistry,
290  OperatorBase,
291  const OperatorDef&,
292  Workspace*);
293 
294 /* Dispatcher for Runtime Typed-Implementation */
295 
296 #define XIsType(x, dtype) \
297  x.template IsType<dtype>()
298 
299 template <typename... Types>
300 struct TensorTypes {};
301 
302 template <typename Sizes, typename... Args>
304 
305 #define DEFINE_TENSOR_TYPES_DISPATCHER(TensorTypes, Impl) \
306  template <typename T, typename... Types, typename... Args> \
307  struct DispatchHelper<TensorTypes<T, Types...>, Args...> { \
308  template <typename Op> \
309  static void Call(Op* op, const TypeMeta& meta, string& types) { \
310  if (meta.Match<T>()) return op->template Impl<T, Args...>(); \
311  types += " * " + TypeToString<T>() + ",\n"; \
312  return DispatchHelper<TensorTypes<Types...>, Args...> \
313  ::Call(op, meta, types); \
314  } \
315  template <typename Op> \
316  static void Call(Op* op, const Tensor& tensor) { \
317  string types; return Call(op, tensor.meta(), types); \
318  } \
319  }; \
320  template <typename... Args> \
321  struct DispatchHelper<TensorTypes<>, Args...> { \
322  template <typename Op> \
323  static void Call(Op* op, const TypeMeta& meta, string& types) { \
324  LOG(FATAL) << "Unsupported DType: " \
325  << TypeMetaToString(meta) << "\n" \
326  << "<" << op->type() << "Op>" \
327  << " supports the following dtypes: {\n" \
328  << types << "}"; \
329  } \
330  template <typename Op> \
331  static void Call(Op* op, const Tensor& tensor) { \
332  return Call(op, tensor.meta(), ""); \
333  } \
334  };
335 
336 DEFINE_TENSOR_TYPES_DISPATCHER(TensorTypes, RunImpl);
337 #undef DEFINE_TENSOR_TYPES_DISPATCHER
338 
339 /* TensorFiller */
340 
341 #define TENSOR_FILL_WITH_TYPE(tensor, shape, type) \
342  if (tensor.count() == 0) { \
343  CHECK(ws()->GetFiller(tensor.name())) \
344  << "\nTensor(" << tensor.name() << ") is empty. \n" \
345  << "may be specify a filler for it ?"; \
346  tensor.Reshape(shape); \
347  unique_ptr<Filler<type, Context>> filler( \
348  CreateFiller<type, Context>(*ws()->GetFiller(tensor.name()))); \
349  filler->Fill(&tensor, ctx()); \
350  } else { \
351  int64_t count = 1; \
352  for(int i = 0; i < shape.size(); i++) count *= shape[i]; \
353  CHECK_EQ(count, tensor.count()) \
354  << "\nModel request " << "Tensor(" << tensor.name() << ")'s " \
355  << "size is " << count << ", \n" \
356  << "but now is " << tensor.count() << ", " \
357  << "did you feed the incorrect Tensor before ?"; \
358  tensor.Reshape(shape); \
359  }
360 
361 #define TENSOR_FILL(tensor, shape) \
362  if (tensor.count() == 0) { \
363  CHECK(ws()->GetFiller(tensor.name())) \
364  << "\nTensor(" << tensor.name() << ") is empty. \n" \
365  << "may be specify a filler for it ?"; \
366  tensor.Reshape(shape); \
367  unique_ptr<Filler<T, Context>> filler( \
368  CreateFiller<T, Context>(*ws()->GetFiller(tensor.name()))); \
369  filler->Fill(&tensor, ctx()); \
370  } else { \
371  int64_t count = 1; \
372  for(int i = 0; i < shape.size(); i++) count *= shape[i]; \
373  CHECK_EQ(count, tensor.count()) \
374  << "\nModel request " << "Tensor(" << tensor.name() << ")'s " \
375  << "size is " << count << ", \n" \
376  << "but now is " << tensor.count() << ", " \
377  << "did you feed the incorrect Tensor before ?"; \
378  tensor.Reshape(shape); \
379  }
380 
381 /* Shared Multiplier */
382 
383 #define DECLARE_MULTIPLIER(name, size) \
384  const T* name; \
385  { \
386  auto* mp = ws()->CreateTensor("/share/multiplier/" \
387  + TypeMetaToString(TypeMeta::Make<T>())); \
388  if (size > mp->count()) { \
389  mp->Reshape({ size }); \
390  math::Set<T, Context>(size, cast::to<T>(1.f), \
391  mp->template mutable_data<T, Context>(), ctx()); \
392  } \
393  name = mp->template data<T, Context>(); \
394  }
395 
396 /* Dynamic Arguments */
397 
398 #define DECLARE_ARG_WITH_DESC(type, arg) \
399  type arg##_; \
400  string arg##_desc_; \
401  type arg()
402 
403 #define DECLARE_ARGS_WITH_DESC(type, arg) \
404  vector<type> arg##_; \
405  vector<string> arg##_desc_; \
406  type arg(int i)
407 
408 #define GET_ARG_WITH_DESC(type, arg, default_value) \
409  arg##_ = OpArg<type>(#arg, default_value); \
410  arg##_desc_ = OpArg<string>(string(#arg) + "_desc", "")
411 
412 #define GET_ARGS_WITH_DESC(type, arg) \
413  arg##_ = OpArgs<type>(#arg); \
414  arg##_desc_ = OpArgs<string>(string(#arg) + "_desc")
415 
416 #define DEFINE_ARG_WITH_DESC(type, classname, arg) \
417  template <class Context> \
418  type classname<Context>::arg() { \
419  if (arg##_desc_.empty()) return arg##_; \
420  auto* arg##T = ws()->GetTensor(arg##_desc_); \
421  CHECK(arg##T->template IsType<type>()) \
422  << "\nThe type of " << #arg << " should be " << #type << "."; \
423  CHECK_EQ(arg##T->count(), 1) \
424  << "\nThe argument of " << #arg << " should be a scalar."; \
425  return arg##T->template data<type, CPUContext>()[0]; \
426  }
427 
428 #define DEFINE_ARGS_WITH_DESC(type, classname, arg) \
429  template <class Context> \
430  type classname<Context>::arg(int i) { \
431  if (arg##_desc_.empty()) { \
432  CHECK_LT(i, arg##_.size()) \
433  << "\nExcepted the size of " << #arg \
434  << " > " << i << ". (Got " \
435  << arg##_.size() << ")."; \
436  return arg##_[i]; \
437  } \
438  CHECK_LT(i, arg##_desc_.size()) \
439  << "\nExcepted the size of " << #arg \
440  << " > " << i << ". (Got " \
441  << arg##_desc_.size() << ")."; \
442  auto* arg##T = ws()->GetTensor( \
443  str::replace_first(arg##_desc_[i], \
444  "${HANDLE}", handle())); \
445  CHECK(arg##T->template IsType<type>()) \
446  << "\nThe type of " << #arg << " should be " << #type << "."; \
447  CHECK_EQ(arg##T->count(), 1) \
448  << "\nThe argument of " << #arg << " at pos(" \
449  << i << ") should be a scalar."; \
450  return arg##T->template data<type, CPUContext>()[0]; \
451  }
452 
453 #define GET_ARGS_SIZE(arg) \
454  (int)std::max(arg##_.size(), arg##_desc_.size())
455 
456 /* Registers */
457 
458 #define INSTANTIATE_OPERATOR(name, context) \
459  template class name##Op<context>;
460 
461 #define INSTANTIATE_CUDNN_OPERATOR(name) \
462  template class CuDNN##name##Op<CUDAContext>;
463 
464 #define INSTANTIATE_CNML_OPERATOR(name) \
465  template class CnML##name##Op<CNMLContext>;
466 
467 #define REGISTER_CPU_OPERATOR(name, ...) \
468  REGISTER_CLASS(CPUOperatorRegistry, name, __VA_ARGS__)
469 
470 #define REGISTER_CUDA_OPERATOR(name, ...) \
471  REGISTER_CLASS(CUDAOperatorRegistry, name, __VA_ARGS__)
472 
473 #define REGISTER_CUDNN_OPERATOR(name, ...) \
474  REGISTER_CLASS(CUDNNOperatorRegistry, name, __VA_ARGS__)
475 
476 #define REGISTER_CNML_OPERATOR(name, ...) \
477  REGISTER_CLASS(CNMLOperatorRegistry, name, __VA_ARGS__)
478 
479 #define DEPLOY_CPU(name) \
480  REGISTER_CPU_OPERATOR(name, name##Op<CPUContext>); \
481  INSTANTIATE_OPERATOR(name, CPUContext);
482 
483 #define DEPLOY_CUDA(name) \
484  REGISTER_CUDA_OPERATOR(name, name##Op<CUDAContext>); \
485  INSTANTIATE_OPERATOR(name, CUDAContext); \
486 
487 #define DEPLOY_CPU_CUDA(name) \
488  REGISTER_CPU_OPERATOR(name, name##Op<CPUContext>); \
489  REGISTER_CUDA_OPERATOR(name, name##Op<CPUContext>); \
490  INSTANTIATE_OPERATOR(name, CPUContext); \
491 
492 #define DEPLOY_CUDNN(name) \
493  REGISTER_CUDNN_OPERATOR(name, CuDNN##name##Op<CUDAContext>); \
494  INSTANTIATE_CUDNN_OPERATOR(name);
495 
496 #define DEPLOY_CNML(name) \
497  REGISTER_CNML_OPERATOR(name, CnML##name##Op<CNMLContext>); \
498  INSTANTIATE_CNML_OPERATOR(name);
499 
500 } // namespace dragon
501 
502 #endif // DRAGON_CORE_OPERATOR_H_
Map< string, const Argument * > args_
Store the defined arguments.
Definition: operator.h:145
virtual void RunOnDevice()=0
Implement the detailed execution.
int YSize()
Return the number of outputs.
Definition: operator.h:49
string dtype_
Store the data type and format.
Definition: operator.h:139
bool AllowRun()
Whether this operator can be ignored.
Definition: operator.h:202
Context ctx_
Store the internal context.
Definition: operator.h:206
vector< T > Args(const string &name)
Return the values of the specified argument.
Definition: workspace.h:20
const Argument & arg(const string &name)
Return the specified argument.
Definition: operator.h:101
std::unordered_map< Key, Value > Map
Definition: common.h:54
string handle_
Definition: operator.h:136
bool do_sync_
Definition: operator.h:207
string data_format_
Definition: operator.h:139
virtual void MemorySwitch()
Coordinate the context of inputs and outputs.
Definition: operator.h:186
const string & data_format() const
Return the data format.
Definition: operator.h:79
string DebugString() const
Return the debug string of stored def.
Definition: operator.h:117
const OperatorDef & def() const
Return the stored def.
Definition: operator.h:114
string phase_
Store the phase and handle.
Definition: operator.h:136
Workspace * ws() const
Return the parent workspace.
Definition: operator.h:87
OperatorDef def_
Store the def.
Definition: operator.h:130
DECLARE_REGISTRY(GraphRegistry, GraphBase, const GraphDef &, Workspace *)
string DTypeString(const Tensor &, const Set< string > &) const
Return the dtype string according to given tensor.
Definition: operator.cc:64
vector< Tensor * > outputs_
Definition: operator.h:142
Definition: tensor.h:21
Tensor & X(int i)
Return the specified input tensor.
Definition: operator.cc:46
DEFINE_TENSOR_TYPES_DISPATCHER(TensorTypes, RunImpl)
OperatorBase(const OperatorDef &def, Workspace *ws)
Default constructor.
Definition: operator.cc:9
void set_subgraph(SubGraph subgraph)
Set the given recomputing subgraph.
Definition: operator.h:109
Definition: operator.h:149
int XSize()
Return the number of inputs.
Definition: operator.h:46
const string & phase() const
Return the current running phase.
Definition: operator.h:70
bool allow_run_
Definition: operator.h:207
virtual void Run(int stream_id=0)
Run operator on the specified stream.
Definition: operator.h:58
vector< Tensor * > inputs_
Store the pointer of inputs and outputs.
Definition: operator.h:142
#define NOT_IMPLEMENTED
Definition: common.h:93
virtual void ReleaseResource()
Release the ownership of inputs.
Definition: operator.cc:207
void UpdateFrom(const OperatorDef &def)
Modify operator according to the given def.
Definition: operator.cc:91
const string & dtype() const
Return the data type.
Definition: operator.h:76
const string & name() const
Return the operator name.
Definition: operator.h:64
bool allow_recomp_
Definition: operator.h:207
virtual ~OperatorBase()
Default deconstructor.
Definition: operator.h:37
T Arg(const string &name, const T &default_value)
Return the value of the specified argument.
OperatorBase * NewOperator(const OperatorDef &def, Workspace *ws)
New a operator from the raw def.
Definition: operator.cc:128
const string & type() const
Return the operator type.
Definition: operator.h:67
std::unordered_set< Value > Set
Definition: common.h:57
virtual void Fusion(void *graph)
Fusion this operator into the specified graph.
Definition: operator.h:61
SubGraph & subgraph()
Return the recomputing subgraph.
Definition: operator.h:106
void Run(int stream_id=0) final
Run this operator on the specified stream.
Definition: operator.h:166
Context * ctx()
Return the internal context.
Definition: operator.h:199
virtual void PrepareResource()
Prepare the content of inputs.
Definition: operator.cc:183
SubGraph subgraph_
Store the recomputing subgraph.
Definition: operator.h:133
Map< string, vector< OperatorBase * > > SubGraph
Definition: operator.h:103
Operator(const OperatorDef &def, Workspace *ws)
Default constructor.
Definition: operator.h:152
Definition: operator.h:300
Definition: operator.h:31
Workspace * ws_
Store the parent workspace.
Definition: operator.h:127
const Map< std::string, const Argument * > & args()
Return the argument map.
Definition: operator.h:98
const string unique_name(const string &name) const
Return the unique name in this operator.
Definition: operator.h:82
Tensor * Y(int i)
Return the specified output tensor.
Definition: operator.cc:55
std::vector< int > vec32_t
Definition: types.h:24
const string & name() const
Return the tensor name.
Definition: tensor.h:69
void SwitchToPhase(const string &phase)
Switch the internal running phase.
Definition: operator.h:55
Definition: common.h:41
const string & handle() const
Return the resource handle.
Definition: operator.h:73
Definition: operator.h:303