PaddlePaddle · reyoung · Jan 19, 2017 · Jan 20, 2017 · wangkuiyi · Jan 21, 2017
diff --git a/paddle/gserver/activations/ActivationFunction.cpp b/paddle/gserver/activations/ActivationFunction.cpp
@@ -22,12 +22,13 @@ limitations under the License. */
 #include <type_traits>
 #include "paddle/parameter/Argument.h"
 #include "paddle/utils/ClassRegistrar.h"
-
 #include "paddle/utils/Logging.h"
+#include "paddle/utils/ProtoCMDArgs.h"
 
 namespace paddle {
 
-static ClassRegistrar<ActivationFunction> gActivationRegistrar;
+static ClassRegistrar<ActivationFunction, const CMDArguments&>
+    gActivationRegistrar;
 /**
  * @def ACTIVATION_CLASS_NAME
  * @brief Macro for getting derived activation class name
@@ -39,13 +40,19 @@ static ClassRegistrar<ActivationFunction> gActivationRegistrar;
  * @def BEGIN_DEFINE_ACTIVATION
  * @brief Macro for defining a devried activation class
  */
-#define BEGIN_DEFINE_ACTIVATION(ACTIVATION_NAME)                             \
+#define BEGIN_DEFINE_ACTIVATION_WITHOUT_CTOR(ACTIVATION_NAME)                \
   class ACTIVATION_CLASS_NAME(ACTIVATION_NAME) : public ActivationFunction { \
   private:                                                                   \
     static const std::string name;                                           \
                                                                              \
   public:                                                                    \
     const std::string& getName() const { return name; }
+
+#define BEGIN_DEFINE_ACTIVATION(ACTIVATION_NAME)                            \
+  BEGIN_DEFINE_ACTIVATION_WITHOUT_CTOR(ACTIVATION_NAME)                     \
+  explicit ACTIVATION_CLASS_NAME(ACTIVATION_NAME)(const CMDArguments& args) \
+      : ActivationFunction(args) {}
+
 /**
  * @def END_DEFINE_ACTIVATION
  * @brief Macro for registering a derived activation class
@@ -68,7 +75,10 @@ static ClassRegistrar<ActivationFunction> gActivationRegistrar;
  */
 class IdentityActivation : public ActivationFunction {
 public:
+  explicit IdentityActivation(const CMDArguments& args)
+      : ActivationFunction(args) {}
   static const std::string name;
+
   Error __must_check forward(Argument& act) {
     (void)act;
     return Error();
@@ -79,6 +89,7 @@ class IdentityActivation : public ActivationFunction {
   }
   const std::string& getName() const { return name; }
 };
+
 const std::string IdentityActivation::name = "";
 static InitFunction __reg_activation__identity([] {
   gActivationRegistrar.registerClass<IdentityActivation>("");
@@ -132,18 +143,18 @@ Error __must_check backward(Argument& act) {
                            outputG->getHeight(),
                            outputG->getWidth(),
                            /* trans */ false,
-                           useGpu(act.deviceId));
+                           useGPU(this->cmdArgs_, act.deviceId));
     Matrix::resizeOrCreate(sftMaxSum_,
                            outputG->getHeight(),
                            1,
                            /* trans */ false,
-                           useGpu(act.deviceId));
+                           useGPU(this->cmdArgs_, act.deviceId));
     if (!one_ || one_->getWidth() != outputG->getWidth()) {
       Matrix::resizeOrCreate(one_,
                              1,
                              outputG->getWidth(),
                              /* trans */ false,
-                             useGpu(act.deviceId));
+                             useGPU(this->cmdArgs_, act.deviceId));
       one_->one();
     }
 
@@ -161,12 +172,16 @@ END_DEFINE_ACTIVATION(softmax)
  * @note Softmax on all frames of one sequence.
  * Width of frame must be one.
  */
-BEGIN_DEFINE_ACTIVATION(sequence_softmax)
+BEGIN_DEFINE_ACTIVATION_WITHOUT_CTOR(sequence_softmax)
 private:
 ACTIVATION_CLASS_NAME(softmax) softmax_;
 Argument argument_;
 
 public:
+explicit ACTIVATION_CLASS_NAME(sequence_softmax)(const CMDArguments& args)
+    : ActivationFunction(args),
+      softmax_(ACTIVATION_CLASS_NAME(softmax)(args)) {}
+
 Error __must_check forward(Argument& act) {
   if (act.value->getWidth() != 1UL) {
     return Error(
@@ -178,15 +193,16 @@ Error __must_check forward(Argument& act) {
                                      /* height= */ 1,
                                      1,
                                      /* trans= */ false,
-                                     useGpu(act.deviceId));
+                                     useGPU(this->cmdArgs_, act.deviceId));
     argument_.grad = Matrix::create(nullptr,
                                     /* height= */ 1,
                                     1,
                                     /* trans= */ false,
-                                    useGpu(act.deviceId));
+                                    useGPU(this->cmdArgs_, act.deviceId));
   }
 
-  auto starts = act.sequenceStartPositions->getVector(useGpu(act.deviceId));
+  auto starts = act.sequenceStartPositions->getVector(
+      useGPU(this->cmdArgs_, act.deviceId));
   act.value->sequenceSoftmax(*act.value, *starts);
   return Error();
 }
@@ -285,12 +301,13 @@ END_DEFINE_ACTIVATION(tanh)
  * f(z) = 1.7159 * tanh(2/3*z)
  * \f]
  */
-BEGIN_DEFINE_ACTIVATION(stanh)
+BEGIN_DEFINE_ACTIVATION_WITHOUT_CTOR(stanh)
 private:
 real a, b;
 
 public:
-ACTIVATION_CLASS_NAME(stanh)() : a(1.7159), b(2. / 3.) {}
+ACTIVATION_CLASS_NAME(stanh)
+(const CMDArguments& args) : ActivationFunction(args), a(1.7159), b(2. / 3.) {}
 Error __must_check forward(Argument& act) {
   act.value->scaledTanh(*act.value, a, b);
   return Error();
@@ -339,7 +356,7 @@ Error __must_check forward(Argument& act) {
                          act.value->getHeight(),
                          act.value->getWidth(),
                          /* trans */ false,
-                         useGpu(act.deviceId));
+                         useGPU(this->cmdArgs_, act.deviceId));
 
   act.in->copyFrom(*act.value);
   act.value->abs2(*act.value);
@@ -365,7 +382,7 @@ Error __must_check forward(Argument& act) {
                          act.value->getHeight(),
                          act.value->getWidth(),
                          /* trans */ false,
-                         useGpu(act.deviceId));
+                         useGPU(this->cmdArgs_, act.deviceId));
 
   act.in->copyFrom(*act.value);
   act.value->square2(*act.value);
@@ -409,7 +426,7 @@ Error __must_check forward(Argument& act) {
                          act.value->getHeight(),
                          act.value->getWidth(),
                          /* trans */ false,
-                         useGpu(act.deviceId));
+                         useGPU(this->cmdArgs_, act.deviceId));
 
   act.in->copyFrom(*act.value);
   act.value->log2(*act.value);
@@ -422,8 +439,9 @@ Error __must_check backward(Argument& act) {
 }
 END_DEFINE_ACTIVATION(log)
 
-ActivationFunction* ActivationFunction::create(const std::string& type) {
-  return gActivationRegistrar.createByType(type);
+ActivationFunction* ActivationFunction::create(const std::string& type,
+                                               const CMDArguments& args) {
+  return gActivationRegistrar.createByType(type, args);
 }
 
 std::vector<std::string> ActivationFunction::getAllRegisteredTypes() {

diff --git a/paddle/gserver/activations/ActivationFunction.h b/paddle/gserver/activations/ActivationFunction.h
@@ -15,6 +15,7 @@ limitations under the License. */
 #pragma once
 #include <string>
 #include <vector>
+#include "CMDArguments.pb.h"
 #include "paddle/utils/Error.h"
 
 namespace paddle {
@@ -32,10 +33,12 @@ struct Argument;
  */
 class ActivationFunction {
 public:
-  static ActivationFunction* create(const std::string& type);
+  static ActivationFunction* create(const std::string& type,
+                                    const CMDArguments& args);
   static std::vector<std::string> getAllRegisteredTypes();
 
-  ActivationFunction() {}
+  explicit ActivationFunction(const CMDArguments& cmdArgs)
+      : cmdArgs_(cmdArgs) {}
 
   virtual ~ActivationFunction() {}
 
@@ -61,6 +64,9 @@ class ActivationFunction {
   virtual Error __must_check backward(Argument& act) = 0;
 
   virtual const std::string& getName() const = 0;
+
+protected:
+  const CMDArguments& cmdArgs_;
 };
 
 }  // namespace paddle
diff --git a/paddle/gserver/evaluators/Evaluator.cpp b/paddle/gserver/evaluators/Evaluator.cpp
@@ -13,9 +13,9 @@ See the License for the specific language governing permissions and
 limitations under the License. */
 
 #include "paddle/gserver/evaluators/Evaluator.h"
-#include "paddle/utils/Stat.h"
-
 #include "paddle/gserver/gradientmachines/NeuralNetwork.h"
+#include "paddle/utils/ProtoCMDArgs.h"
+#include "paddle/utils/Stat.h"
 
 DECLARE_int32(trainer_id);
 
@@ -72,10 +72,11 @@ class ClassificationErrorEvaluator : public Evaluator {
       CHECK_EQ((size_t)1, weight->getWidth());
     }
 
-    const MatrixPtr errorMat = Matrix::create(output->getHeight(),
-                                              1,
-                                              /* trans= */ false,
-                                              useGpu(arguments[0].deviceId));
+    const MatrixPtr errorMat =
+        Matrix::create(output->getHeight(),
+                       1,
+                       /* trans= */ false,
+                       useGPU(config_, arguments[0].deviceId));
     errorMat->zeroMem();
     if (label != nullptr) {
       errorMat->classificationError(*output, *label);

diff --git a/paddle/gserver/gradientmachines/GradientMachine.cpp b/paddle/gserver/gradientmachines/GradientMachine.cpp
@@ -42,7 +42,7 @@ GradientMachine* GradientMachine::create(
     if (config.type() == "multi_nn") {
       /* multi submodel calculate, thread(s) will be initialized inside */
       nn = new MultiNetwork("root");
-    } else if (FLAGS_parallel_nn) {
+    } else if (config.cmd_args().parallel_nn()) {
       /* multi threads calculate */
       nn = new ParallelNeuralNetwork();
     } else {

diff --git a/paddle/gserver/gradientmachines/MultiGradientMachine.cpp b/paddle/gserver/gradientmachines/MultiGradientMachine.cpp
@@ -399,7 +399,7 @@ TrainerThread::TrainerThread(const ModelConfig& config,
   SetDevice gpuDevice(deviceId_);
 
   NeuralNetwork* nn = nullptr;
-  if (!multiMachine->useGpu() || !FLAGS_parallel_nn) {
+  if (!multiMachine->useGpu() || !config.cmd_args().parallel_nn()) {
     nn = NeuralNetwork::create(config);
   } else {
     nn = new ParallelNeuralNetwork();
@@ -422,7 +422,7 @@ TrainerThread::TrainerThread(const ModelConfig& config,
   nn->init(config_, slaveParamInitCb);
   gradientMachine_.reset(nn);
   parameters_ = gradientMachine_->getParameters();
-  if (!FLAGS_parallel_nn) {
+  if (!config.cmd_args().parallel_nn()) {
     for (auto& para : parameters_) {
       para->setDevice(deviceId_);
     }
@@ -744,7 +744,7 @@ void TrainerThread::copyInArgs() {
         fullInArgs[i],
         startSeq,
         copySize,
-        FLAGS_parallel_nn ? false : multiMachine_->useGpu());
+        config_.cmd_args().parallel_nn() ? false : multiMachine_->useGpu());
   }
 }
 

diff --git a/paddle/gserver/gradientmachines/MultiNetwork.cpp b/paddle/gserver/gradientmachines/MultiNetwork.cpp
@@ -38,7 +38,7 @@ void MultiNetwork::init(const ModelConfig& config,
   // sub networks
   for (int i = 1; i < config.sub_models_size(); ++i) {
     std::string subModelName = config.sub_models(i).name();
-    if (FLAGS_parallel_nn) {
+    if (config.cmd_args().parallel_nn()) {
       subNetworks_[i - 1] = std::unique_ptr<ParallelNeuralNetwork>(
           new ParallelNeuralNetwork(subModelName, this));
     } else {

diff --git a/paddle/gserver/gradientmachines/NeuralNetwork.cpp b/paddle/gserver/gradientmachines/NeuralNetwork.cpp
@@ -208,7 +208,7 @@ void NeuralNetwork::prefetch(const std::vector<Argument>& inArgs) {
   }
 
   for (size_t i = 0; i != dataLayers_.size(); ++i) {
-    if (FLAGS_parallel_nn) {
+    if (this->config_.cmd_args().parallel_nn()) {
       const_cast<Argument&>(inArgs[i]).deviceId = -1;
     }
     dataLayers_[i]->setData(inArgs[i]);

diff --git a/paddle/gserver/layers/DataLayer.cpp b/paddle/gserver/layers/DataLayer.cpp
@@ -13,7 +13,7 @@ See the License for the specific language governing permissions and
 limitations under the License. */
 
 #include "DataLayer.h"
-
+#include "paddle/utils/ProtoCMDArgs.h"
 namespace paddle {
 
 REGISTER_LAYER(data, DataLayer);
@@ -30,7 +30,7 @@ void DataLayer::copyDataToOutput(Argument& output) {
       if (!output.value) {
         output.value = data_.value->clone(data_.value->getHeight(),
                                           data_.value->getWidth(),
-                                          useGpu(output.deviceId));
+                                          useGPU(config_, output.deviceId));
       } else {
         output.value->resize(data_.value->getHeight(), data_.value->getWidth());
       }
@@ -41,11 +41,11 @@ void DataLayer::copyDataToOutput(Argument& output) {
                              data_.grad->getHeight(),
                              data_.grad->getWidth(),
                              /* trans= */ false,
-                             useGpu(output.deviceId));
+                             useGPU(config_, output.deviceId));
     }
     if (data_.ids) {
       IVector::resizeOrCreate(
-          output.ids, data_.ids->getSize(), useGpu(output.deviceId));
+          output.ids, data_.ids->getSize(), useGPU(config_, output.deviceId));
       output.ids->copyFrom(*data_.ids);
     }
   }

diff --git a/paddle/gserver/layers/GatedRecurrentLayer.cpp b/paddle/gserver/layers/GatedRecurrentLayer.cpp
@@ -36,7 +36,8 @@ bool GatedRecurrentLayer::init(const LayerMap& layerMap,
   }
 
   reversed_ = config_.reversed();
-  activationGate_.reset(ActivationFunction::create(config_.active_gate_type()));
+  activationGate_.reset(ActivationFunction::create(config_.active_gate_type(),
+                                                   config_.cmd_args()));
 
   GruCompute::init(config_);
   useBatch_ = true;

diff --git a/paddle/gserver/layers/HierarchicalSigmoidLayer.cpp b/paddle/gserver/layers/HierarchicalSigmoidLayer.cpp
@@ -13,7 +13,7 @@ See the License for the specific language governing permissions and
 limitations under the License. */
 
 #include "HierarchicalSigmoidLayer.h"
-#include "paddle/utils/Util.h"
+#include "paddle/utils/ProtoCMDArgs.h"
 
 namespace paddle {
 
@@ -64,12 +64,12 @@ void HierarchicalSigmoidLayer::forward(PassType passType) {
                          batchSize,
                          codeLength_,
                          /* trans */ false,
-                         useGpu(deviceId_));
+                         useGPU(config_, deviceId_));
   Matrix::resizeOrCreate(preOutput_.grad,
                          batchSize,
                          codeLength_,
                          /* trans */ false,
-                         useGpu(deviceId_));
+                         useGPU(config_, deviceId_));
 
   IVectorPtr label = getInput(*getLabelLayer()).ids;
 
@@ -91,8 +91,8 @@ void HierarchicalSigmoidLayer::forward(PassType passType) {
                                  *output_.value,
                                  -1);  // scaleSum
   preOutput_.value->softrelu(*preOutput_.value);
-  MatrixPtr sum =
-      Matrix::create(batchSize, 1, /* trans= */ false, useGpu(deviceId_));
+  MatrixPtr sum = Matrix::create(
+      batchSize, 1, /* trans= */ false, useGPU(config_, deviceId_));
   preOutput_.value->rowSum(*sum);
   output_.value->add(*sum);
 }