diff --git a/paddle/gserver/activations/ActivationFunction.cpp b/paddle/gserver/activations/ActivationFunction.cpp
index c541b72e104bf..0e494cf6d8279 100644
--- a/paddle/gserver/activations/ActivationFunction.cpp
+++ b/paddle/gserver/activations/ActivationFunction.cpp
@@ -22,12 +22,13 @@ limitations under the License. */
 #include <type_traits>
 #include "paddle/parameter/Argument.h"
 #include "paddle/utils/ClassRegistrar.h"
-
 #include "paddle/utils/Logging.h"
+#include "paddle/utils/ProtoCMDArgs.h"
 
 namespace paddle {
 
-static ClassRegistrar<ActivationFunction> gActivationRegistrar;
+static ClassRegistrar<ActivationFunction, const CMDArguments&>
+    gActivationRegistrar;
 /**
  * @def ACTIVATION_CLASS_NAME
  * @brief Macro for getting derived activation class name
@@ -39,13 +40,19 @@ static ClassRegistrar<ActivationFunction> gActivationRegistrar;
  * @def BEGIN_DEFINE_ACTIVATION
  * @brief Macro for defining a devried activation class
  */
-#define BEGIN_DEFINE_ACTIVATION(ACTIVATION_NAME)                             \
+#define BEGIN_DEFINE_ACTIVATION_WITHOUT_CTOR(ACTIVATION_NAME)                \
   class ACTIVATION_CLASS_NAME(ACTIVATION_NAME) : public ActivationFunction { \
   private:                                                                   \
     static const std::string name;                                           \
                                                                              \
   public:                                                                    \
     const std::string& getName() const { return name; }
+
+#define BEGIN_DEFINE_ACTIVATION(ACTIVATION_NAME)                            \
+  BEGIN_DEFINE_ACTIVATION_WITHOUT_CTOR(ACTIVATION_NAME)                     \
+  explicit ACTIVATION_CLASS_NAME(ACTIVATION_NAME)(const CMDArguments& args) \
+      : ActivationFunction(args) {}
+
 /**
  * @def END_DEFINE_ACTIVATION
  * @brief Macro for registering a derived activation class
@@ -68,7 +75,10 @@ static ClassRegistrar<ActivationFunction> gActivationRegistrar;
  */
 class IdentityActivation : public ActivationFunction {
 public:
+  explicit IdentityActivation(const CMDArguments& args)
+      : ActivationFunction(args) {}
   static const std::string name;
+
   Error __must_check forward(Argument& act) {
     (void)act;
     return Error();
@@ -79,6 +89,7 @@ class IdentityActivation : public ActivationFunction {
   }
   const std::string& getName() const { return name; }
 };
+
 const std::string IdentityActivation::name = "";
 static InitFunction __reg_activation__identity([] {
   gActivationRegistrar.registerClass<IdentityActivation>("");
@@ -132,18 +143,18 @@ Error __must_check backward(Argument& act) {
                            outputG->getHeight(),
                            outputG->getWidth(),
                            /* trans */ false,
-                           useGpu(act.deviceId));
+                           useGPU(this->cmdArgs_, act.deviceId));
     Matrix::resizeOrCreate(sftMaxSum_,
                            outputG->getHeight(),
                            1,
                            /* trans */ false,
-                           useGpu(act.deviceId));
+                           useGPU(this->cmdArgs_, act.deviceId));
     if (!one_ || one_->getWidth() != outputG->getWidth()) {
       Matrix::resizeOrCreate(one_,
                              1,
                              outputG->getWidth(),
                              /* trans */ false,
-                             useGpu(act.deviceId));
+                             useGPU(this->cmdArgs_, act.deviceId));
       one_->one();
     }
 
@@ -161,12 +172,16 @@ END_DEFINE_ACTIVATION(softmax)
  * @note Softmax on all frames of one sequence.
  * Width of frame must be one.
  */
-BEGIN_DEFINE_ACTIVATION(sequence_softmax)
+BEGIN_DEFINE_ACTIVATION_WITHOUT_CTOR(sequence_softmax)
 private:
 ACTIVATION_CLASS_NAME(softmax) softmax_;
 Argument argument_;
 
 public:
+explicit ACTIVATION_CLASS_NAME(sequence_softmax)(const CMDArguments& args)
+    : ActivationFunction(args),
+      softmax_(ACTIVATION_CLASS_NAME(softmax)(args)) {}
+
 Error __must_check forward(Argument& act) {
   if (act.value->getWidth() != 1UL) {
     return Error(
@@ -178,15 +193,16 @@ Error __must_check forward(Argument& act) {
                                      /* height= */ 1,
                                      1,
                                      /* trans= */ false,
-                                     useGpu(act.deviceId));
+                                     useGPU(this->cmdArgs_, act.deviceId));
     argument_.grad = Matrix::create(nullptr,
                                     /* height= */ 1,
                                     1,
                                     /* trans= */ false,
-                                    useGpu(act.deviceId));
+                                    useGPU(this->cmdArgs_, act.deviceId));
   }
 
-  auto starts = act.sequenceStartPositions->getVector(useGpu(act.deviceId));
+  auto starts = act.sequenceStartPositions->getVector(
+      useGPU(this->cmdArgs_, act.deviceId));
   act.value->sequenceSoftmax(*act.value, *starts);
   return Error();
 }
@@ -285,12 +301,13 @@ END_DEFINE_ACTIVATION(tanh)
  * f(z) = 1.7159 * tanh(2/3*z)
  * \f]
  */
-BEGIN_DEFINE_ACTIVATION(stanh)
+BEGIN_DEFINE_ACTIVATION_WITHOUT_CTOR(stanh)
 private:
 real a, b;
 
 public:
-ACTIVATION_CLASS_NAME(stanh)() : a(1.7159), b(2. / 3.) {}
+ACTIVATION_CLASS_NAME(stanh)
+(const CMDArguments& args) : ActivationFunction(args), a(1.7159), b(2. / 3.) {}
 Error __must_check forward(Argument& act) {
   act.value->scaledTanh(*act.value, a, b);
   return Error();
@@ -339,7 +356,7 @@ Error __must_check forward(Argument& act) {
                          act.value->getHeight(),
                          act.value->getWidth(),
                          /* trans */ false,
-                         useGpu(act.deviceId));
+                         useGPU(this->cmdArgs_, act.deviceId));
 
   act.in->copyFrom(*act.value);
   act.value->abs2(*act.value);
@@ -365,7 +382,7 @@ Error __must_check forward(Argument& act) {
                          act.value->getHeight(),
                          act.value->getWidth(),
                          /* trans */ false,
-                         useGpu(act.deviceId));
+                         useGPU(this->cmdArgs_, act.deviceId));
 
   act.in->copyFrom(*act.value);
   act.value->square2(*act.value);
@@ -409,7 +426,7 @@ Error __must_check forward(Argument& act) {
                          act.value->getHeight(),
                          act.value->getWidth(),
                          /* trans */ false,
-                         useGpu(act.deviceId));
+                         useGPU(this->cmdArgs_, act.deviceId));
 
   act.in->copyFrom(*act.value);
   act.value->log2(*act.value);
@@ -422,8 +439,9 @@ Error __must_check backward(Argument& act) {
 }
 END_DEFINE_ACTIVATION(log)
 
-ActivationFunction* ActivationFunction::create(const std::string& type) {
-  return gActivationRegistrar.createByType(type);
+ActivationFunction* ActivationFunction::create(const std::string& type,
+                                               const CMDArguments& args) {
+  return gActivationRegistrar.createByType(type, args);
 }
 
 std::vector<std::string> ActivationFunction::getAllRegisteredTypes() {
diff --git a/paddle/gserver/activations/ActivationFunction.h b/paddle/gserver/activations/ActivationFunction.h
index f208224e304a7..eff28aed33b11 100644
--- a/paddle/gserver/activations/ActivationFunction.h
+++ b/paddle/gserver/activations/ActivationFunction.h
@@ -15,6 +15,7 @@ limitations under the License. */
 #pragma once
 #include <string>
 #include <vector>
+#include "CMDArguments.pb.h"
 #include "paddle/utils/Error.h"
 
 namespace paddle {
@@ -32,10 +33,12 @@ struct Argument;
  */
 class ActivationFunction {
 public:
-  static ActivationFunction* create(const std::string& type);
+  static ActivationFunction* create(const std::string& type,
+                                    const CMDArguments& args);
   static std::vector<std::string> getAllRegisteredTypes();
 
-  ActivationFunction() {}
+  explicit ActivationFunction(const CMDArguments& cmdArgs)
+      : cmdArgs_(cmdArgs) {}
 
   virtual ~ActivationFunction() {}
 
@@ -61,6 +64,9 @@ class ActivationFunction {
   virtual Error __must_check backward(Argument& act) = 0;
 
   virtual const std::string& getName() const = 0;
+
+protected:
+  const CMDArguments& cmdArgs_;
 };
 
 }  // namespace paddle
diff --git a/paddle/gserver/evaluators/Evaluator.cpp b/paddle/gserver/evaluators/Evaluator.cpp
index ae7508e2bb117..129e7d36befbd 100644
--- a/paddle/gserver/evaluators/Evaluator.cpp
+++ b/paddle/gserver/evaluators/Evaluator.cpp
@@ -13,9 +13,9 @@ See the License for the specific language governing permissions and
 limitations under the License. */
 
 #include "paddle/gserver/evaluators/Evaluator.h"
-#include "paddle/utils/Stat.h"
-
 #include "paddle/gserver/gradientmachines/NeuralNetwork.h"
+#include "paddle/utils/ProtoCMDArgs.h"
+#include "paddle/utils/Stat.h"
 
 DECLARE_int32(trainer_id);
 
@@ -72,10 +72,11 @@ class ClassificationErrorEvaluator : public Evaluator {
       CHECK_EQ((size_t)1, weight->getWidth());
     }
 
-    const MatrixPtr errorMat = Matrix::create(output->getHeight(),
-                                              1,
-                                              /* trans= */ false,
-                                              useGpu(arguments[0].deviceId));
+    const MatrixPtr errorMat =
+        Matrix::create(output->getHeight(),
+                       1,
+                       /* trans= */ false,
+                       useGPU(config_, arguments[0].deviceId));
     errorMat->zeroMem();
     if (label != nullptr) {
       errorMat->classificationError(*output, *label);
diff --git a/paddle/gserver/gradientmachines/GradientMachine.cpp b/paddle/gserver/gradientmachines/GradientMachine.cpp
index 36ca05b919b13..54d849fd9b5f4 100644
--- a/paddle/gserver/gradientmachines/GradientMachine.cpp
+++ b/paddle/gserver/gradientmachines/GradientMachine.cpp
@@ -42,7 +42,7 @@ GradientMachine* GradientMachine::create(
     if (config.type() == "multi_nn") {
       /* multi submodel calculate, thread(s) will be initialized inside */
       nn = new MultiNetwork("root");
-    } else if (FLAGS_parallel_nn) {
+    } else if (config.cmd_args().parallel_nn()) {
       /* multi threads calculate */
       nn = new ParallelNeuralNetwork();
     } else {
diff --git a/paddle/gserver/gradientmachines/MultiGradientMachine.cpp b/paddle/gserver/gradientmachines/MultiGradientMachine.cpp
index 80f223824d8dc..e1b17b53453a0 100644
--- a/paddle/gserver/gradientmachines/MultiGradientMachine.cpp
+++ b/paddle/gserver/gradientmachines/MultiGradientMachine.cpp
@@ -399,7 +399,7 @@ TrainerThread::TrainerThread(const ModelConfig& config,
   SetDevice gpuDevice(deviceId_);
 
   NeuralNetwork* nn = nullptr;
-  if (!multiMachine->useGpu() || !FLAGS_parallel_nn) {
+  if (!multiMachine->useGpu() || !config.cmd_args().parallel_nn()) {
     nn = NeuralNetwork::create(config);
   } else {
     nn = new ParallelNeuralNetwork();
@@ -422,7 +422,7 @@ TrainerThread::TrainerThread(const ModelConfig& config,
   nn->init(config_, slaveParamInitCb);
   gradientMachine_.reset(nn);
   parameters_ = gradientMachine_->getParameters();
-  if (!FLAGS_parallel_nn) {
+  if (!config.cmd_args().parallel_nn()) {
     for (auto& para : parameters_) {
       para->setDevice(deviceId_);
     }
@@ -744,7 +744,7 @@ void TrainerThread::copyInArgs() {
         fullInArgs[i],
         startSeq,
         copySize,
-        FLAGS_parallel_nn ? false : multiMachine_->useGpu());
+        config_.cmd_args().parallel_nn() ? false : multiMachine_->useGpu());
   }
 }
 
diff --git a/paddle/gserver/gradientmachines/MultiNetwork.cpp b/paddle/gserver/gradientmachines/MultiNetwork.cpp
index 5f52a5f3d48a4..57625c8ee0ca3 100644
--- a/paddle/gserver/gradientmachines/MultiNetwork.cpp
+++ b/paddle/gserver/gradientmachines/MultiNetwork.cpp
@@ -38,7 +38,7 @@ void MultiNetwork::init(const ModelConfig& config,
   // sub networks
   for (int i = 1; i < config.sub_models_size(); ++i) {
     std::string subModelName = config.sub_models(i).name();
-    if (FLAGS_parallel_nn) {
+    if (config.cmd_args().parallel_nn()) {
       subNetworks_[i - 1] = std::unique_ptr<ParallelNeuralNetwork>(
           new ParallelNeuralNetwork(subModelName, this));
     } else {
diff --git a/paddle/gserver/gradientmachines/NeuralNetwork.cpp b/paddle/gserver/gradientmachines/NeuralNetwork.cpp
index 22051e07ee002..54d3854bee038 100644
--- a/paddle/gserver/gradientmachines/NeuralNetwork.cpp
+++ b/paddle/gserver/gradientmachines/NeuralNetwork.cpp
@@ -208,7 +208,7 @@ void NeuralNetwork::prefetch(const std::vector<Argument>& inArgs) {
   }
 
   for (size_t i = 0; i != dataLayers_.size(); ++i) {
-    if (FLAGS_parallel_nn) {
+    if (this->config_.cmd_args().parallel_nn()) {
       const_cast<Argument&>(inArgs[i]).deviceId = -1;
     }
     dataLayers_[i]->setData(inArgs[i]);
diff --git a/paddle/gserver/layers/DataLayer.cpp b/paddle/gserver/layers/DataLayer.cpp
index 3551df4e172f0..1fbcc2056a483 100644
--- a/paddle/gserver/layers/DataLayer.cpp
+++ b/paddle/gserver/layers/DataLayer.cpp
@@ -13,7 +13,7 @@ See the License for the specific language governing permissions and
 limitations under the License. */
 
 #include "DataLayer.h"
-
+#include "paddle/utils/ProtoCMDArgs.h"
 namespace paddle {
 
 REGISTER_LAYER(data, DataLayer);
@@ -30,7 +30,7 @@ void DataLayer::copyDataToOutput(Argument& output) {
       if (!output.value) {
         output.value = data_.value->clone(data_.value->getHeight(),
                                           data_.value->getWidth(),
-                                          useGpu(output.deviceId));
+                                          useGPU(config_, output.deviceId));
       } else {
         output.value->resize(data_.value->getHeight(), data_.value->getWidth());
       }
@@ -41,11 +41,11 @@ void DataLayer::copyDataToOutput(Argument& output) {
                              data_.grad->getHeight(),
                              data_.grad->getWidth(),
                              /* trans= */ false,
-                             useGpu(output.deviceId));
+                             useGPU(config_, output.deviceId));
     }
     if (data_.ids) {
       IVector::resizeOrCreate(
-          output.ids, data_.ids->getSize(), useGpu(output.deviceId));
+          output.ids, data_.ids->getSize(), useGPU(config_, output.deviceId));
       output.ids->copyFrom(*data_.ids);
     }
   }
diff --git a/paddle/gserver/layers/GatedRecurrentLayer.cpp b/paddle/gserver/layers/GatedRecurrentLayer.cpp
index d3aeea921801d..cab86634afa29 100644
--- a/paddle/gserver/layers/GatedRecurrentLayer.cpp
+++ b/paddle/gserver/layers/GatedRecurrentLayer.cpp
@@ -36,7 +36,8 @@ bool GatedRecurrentLayer::init(const LayerMap& layerMap,
   }
 
   reversed_ = config_.reversed();
-  activationGate_.reset(ActivationFunction::create(config_.active_gate_type()));
+  activationGate_.reset(ActivationFunction::create(config_.active_gate_type(),
+                                                   config_.cmd_args()));
 
   GruCompute::init(config_);
   useBatch_ = true;
diff --git a/paddle/gserver/layers/HierarchicalSigmoidLayer.cpp b/paddle/gserver/layers/HierarchicalSigmoidLayer.cpp
index d62a8d846e5b3..ae7c2c70e1ed1 100644
--- a/paddle/gserver/layers/HierarchicalSigmoidLayer.cpp
+++ b/paddle/gserver/layers/HierarchicalSigmoidLayer.cpp
@@ -13,7 +13,7 @@ See the License for the specific language governing permissions and
 limitations under the License. */
 
 #include "HierarchicalSigmoidLayer.h"
-#include "paddle/utils/Util.h"
+#include "paddle/utils/ProtoCMDArgs.h"
 
 namespace paddle {
 
@@ -64,12 +64,12 @@ void HierarchicalSigmoidLayer::forward(PassType passType) {
                          batchSize,
                          codeLength_,
                          /* trans */ false,
-                         useGpu(deviceId_));
+                         useGPU(config_, deviceId_));
   Matrix::resizeOrCreate(preOutput_.grad,
                          batchSize,
                          codeLength_,
                          /* trans */ false,
-                         useGpu(deviceId_));
+                         useGPU(config_, deviceId_));
 
   IVectorPtr label = getInput(*getLabelLayer()).ids;
 
@@ -91,8 +91,8 @@ void HierarchicalSigmoidLayer::forward(PassType passType) {
                                  *output_.value,
                                  -1);  // scaleSum
   preOutput_.value->softrelu(*preOutput_.value);
-  MatrixPtr sum =
-      Matrix::create(batchSize, 1, /* trans= */ false, useGpu(deviceId_));
+  MatrixPtr sum = Matrix::create(
+      batchSize, 1, /* trans= */ false, useGPU(config_, deviceId_));
   preOutput_.value->rowSum(*sum);
   output_.value->add(*sum);
 }
diff --git a/paddle/gserver/layers/Layer.cpp b/paddle/gserver/layers/Layer.cpp
index f76d41ad3e8a3..791cf19c1cb6e 100644
--- a/paddle/gserver/layers/Layer.cpp
+++ b/paddle/gserver/layers/Layer.cpp
@@ -12,11 +12,11 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 See the License for the specific language governing permissions and
 limitations under the License. */
 
-#include "paddle/utils/Util.h"
-
 #include "paddle/math/SparseMatrix.h"
 #include "paddle/utils/Error.h"
 #include "paddle/utils/Logging.h"
+#include "paddle/utils/ProtoCMDArgs.h"
+#include "paddle/utils/Util.h"
 
 #include "AddtoLayer.h"
 #include "CRFLayer.h"
@@ -45,7 +45,7 @@ Layer::Layer(const LayerConfig& config, bool useGpu)
       needSequenceInfo_(true) {}
 
 bool Layer::init(const LayerMap& layerMap, const ParameterMap& parameterMap) {
-  if (useGpu_ && FLAGS_parallel_nn) {
+  if (useGpu_ && config_.cmd_args().parallel_nn()) {
     /* gpu environment is specified by device property */
     deviceId_ = config_.device();
     if (deviceId_ < 0) {
@@ -95,7 +95,8 @@ bool Layer::init(const LayerMap& layerMap, const ParameterMap& parameterMap) {
 
   /* specify the activation function according to the configuration */
   std::string action_type = config_.active_type();
-  activation_.reset(ActivationFunction::create(action_type));
+  activation_.reset(
+      ActivationFunction::create(action_type, config_.cmd_args()));
   CHECK(activation_);
 
   initNeedFlags();
@@ -130,15 +131,21 @@ void Layer::resetSpecifyOutput(Argument& output,
                                bool isGradClean) {
   SetDevice device(output.deviceId);
 
-  Matrix::resizeOrCreate(
-      output.value, height, width, /* trans */ false, useGpu(output.deviceId));
+  Matrix::resizeOrCreate(output.value,
+                         height,
+                         width,
+                         /* trans */ false,
+                         useGPU(config_, output.deviceId));
   if (isValueClean) {
     output.value->zeroMem();
   }
 
   if (passType_ != PASS_TEST && needGradient()) {
-    Matrix::resizeOrCreate(
-        output.grad, height, width, /* trans */ false, useGpu(output.deviceId));
+    Matrix::resizeOrCreate(output.grad,
+                           height,
+                           width,
+                           /* trans */ false,
+                           useGPU(config_, output.deviceId));
     if (isGradClean) {
       output.grad->zeroMem();
     }
@@ -234,7 +241,7 @@ void Layer::waitAndMergeOutputGrad() {
                          output_.grad->getHeight(),
                          output_.grad->getWidth(),
                          /* trans */ false,
-                         useGpu(output_.deviceId));
+                         useGPU(config_, output_.deviceId));
 
   for (; i != outputOtherDevice_.size(); i++) {
     tmpGrad_->copyFrom(*outputOtherDevice_[i].grad, HPPL_STREAM_1);
@@ -388,15 +395,17 @@ void Layer::forwardDropOut() {
                            outV->getHeight(),
                            outV->getWidth(),
                            false,
-                           useGpu(deviceId_));
+                           useGPU(config_, deviceId_));
     dropOutMask_->randomizeUniform();  // generate a uniform random matrix
     dropOutMask_->biggerThanScalar(config_.drop_rate());  // random mask
     outV->dotMul(*outV, *dropOutMask_);                   // dropout
   } else if (passType_ == PASS_GC) {
     // only initialize once
     if (!dropOutMask_) {
-      dropOutMask_ = Matrix::create(
-          outV->getHeight(), outV->getWidth(), false, useGpu(deviceId_));
+      dropOutMask_ = Matrix::create(outV->getHeight(),
+                                    outV->getWidth(),
+                                    false,
+                                    useGPU(config_, deviceId_));
       // We use cpu matrix to generate mask so that the mask
       // will be same for both gpu version and cpu version.
       // This will help unittest to make sure they have same result.
diff --git a/paddle/gserver/layers/LstmLayer.cpp b/paddle/gserver/layers/LstmLayer.cpp
index 01cc5fec8b970..8642d53fd2495 100644
--- a/paddle/gserver/layers/LstmLayer.cpp
+++ b/paddle/gserver/layers/LstmLayer.cpp
@@ -93,7 +93,7 @@ bool LstmLayer::init(const LayerMap &layerMap,
   reversed_ = config_.reversed();
 
   // create IdentityActivation for using drop_rate
-  activation_.reset(ActivationFunction::create(""));
+  activation_.reset(ActivationFunction::create("", config_.cmd_args()));
 
   LstmCompute::init(config_);
   useBatch_ = true;
diff --git a/paddle/gserver/layers/MDLstmLayer.cpp b/paddle/gserver/layers/MDLstmLayer.cpp
index 88d934d782b54..69dc0bce1ae79 100644
--- a/paddle/gserver/layers/MDLstmLayer.cpp
+++ b/paddle/gserver/layers/MDLstmLayer.cpp
@@ -298,9 +298,10 @@ bool MDLstmLayer::init(const LayerMap& layerMap,
   for (int i = 0; i < numDims_; i++) {
     delays_.push_back(-1);
   }
-  activationGate_.reset(ActivationFunction::create(config_.active_gate_type()));
-  activationState_.reset(
-      ActivationFunction::create(config_.active_state_type()));
+  activationGate_.reset(ActivationFunction::create(config_.active_gate_type(),
+                                                   config_.cmd_args()));
+  activationState_.reset(ActivationFunction::create(config_.active_state_type(),
+                                                    config_.cmd_args()));
 
   return true;
 }
diff --git a/paddle/gserver/layers/MaxLayer.cpp b/paddle/gserver/layers/MaxLayer.cpp
index 23629e1986834..fcbe05ba3057d 100644
--- a/paddle/gserver/layers/MaxLayer.cpp
+++ b/paddle/gserver/layers/MaxLayer.cpp
@@ -14,6 +14,7 @@ limitations under the License. */
 
 #include "MaxLayer.h"
 #include "paddle/utils/Logging.h"
+#include "paddle/utils/ProtoCMDArgs.h"
 #include "paddle/utils/Stat.h"
 
 namespace paddle {
@@ -24,7 +25,7 @@ void MaxLayer::forward(PassType passType) {
   SequencePoolLayer::forward(passType);
 
   IVector::resizeOrCreate(
-      maxIndex_, newBatchSize_ * getSize(), useGpu(deviceId_));
+      maxIndex_, newBatchSize_ * getSize(), useGPU(config_, deviceId_));
   maxIndex_->zeroMem();
 
   MatrixPtr inputValue = getInputValue(0);
diff --git a/paddle/gserver/tests/test_WarpCTCLayer.cpp b/paddle/gserver/tests/test_WarpCTCLayer.cpp
index 55427e2f12fd7..327803121a4f3 100644
--- a/paddle/gserver/tests/test_WarpCTCLayer.cpp
+++ b/paddle/gserver/tests/test_WarpCTCLayer.cpp
@@ -146,7 +146,8 @@ LayerPtr createCTCLayer(string name,
   layerMap[layer->getName()] = layer;
   layer->init(layerMap, parameterMap);
 
-  ActivationFunction* softmaxActivation = ActivationFunction::create("softmax");
+  ActivationFunction* softmaxActivation =
+      ActivationFunction::create("softmax", layerConfig.cmd_args());
 
   softmaxActivation->forward(dataLayer->getOutput()).check();
   layer->forward(PASS_GC);
diff --git a/paddle/parameter/Parameter.cpp b/paddle/parameter/Parameter.cpp
index 29d6e20dc1696..91d31982e7a2e 100644
--- a/paddle/parameter/Parameter.cpp
+++ b/paddle/parameter/Parameter.cpp
@@ -49,7 +49,7 @@ Parameter::Parameter(const ParameterConfig& config, bool useGpu, bool doInit)
       updateCounter_(0),
       updated_(false) {
   setID(-1); /* capture uninitialized id */
-  if (useGpu_ && FLAGS_parallel_nn) {
+  if (useGpu_ && config_.cmd_args().parallel_nn()) {
     /* gpu environment is specified by device property */
     deviceId_ = config_.device();
     if (deviceId_ < 0) {
diff --git a/paddle/trainer/TrainerConfigHelper.cpp b/paddle/trainer/TrainerConfigHelper.cpp
index 60ac8459a12db..864734dc7444b 100644
--- a/paddle/trainer/TrainerConfigHelper.cpp
+++ b/paddle/trainer/TrainerConfigHelper.cpp
@@ -101,6 +101,15 @@ bool TrainerConfigHelper::hasTestDataConfig() const {
   return m->conf.has_test_data_config();
 }
 
+template <typename T>
+static void updateCMDArgs(T *mutableConf) {
+  if (mutableConf->has_cmd_args()) {
+    mutableConf->set_allocated_cmd_args(new paddle::CMDArguments());
+  }
+  paddle::CMDArguments &args = *mutableConf->mutable_cmd_args();
+  args.set_parallel_nn(FLAGS_parallel_nn);
+}
+
 void TrainerConfigHelper::updateConfigFromFlags() {
   if (!FLAGS_save_dir.empty()) {
     m->conf.set_save_dir(FLAGS_save_dir);
@@ -111,6 +120,14 @@ void TrainerConfigHelper::updateConfigFromFlags() {
   if (FLAGS_start_pass != 0) {
     m->conf.set_start_pass(FLAGS_start_pass);
   }
+  updateCMDArgs(m->conf.mutable_model_config());
+  for (auto paramConf : *m->conf.mutable_model_config()->mutable_parameters()) {
+    updateCMDArgs(&paramConf);
+  }
+  for (auto evaluatorConf :
+       *m->conf.mutable_model_config()->mutable_evaluators()) {
+    updateCMDArgs(&evaluatorConf);
+  }
 }
 
 void TrainerConfigHelper::disableRemoteSparseUpdater() {
diff --git a/paddle/utils/ProtoCMDArgs.h b/paddle/utils/ProtoCMDArgs.h
new file mode 100644
index 0000000000000..dda10da2ac118
--- /dev/null
+++ b/paddle/utils/ProtoCMDArgs.h
@@ -0,0 +1,31 @@
+/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License. */
+
+#pragma once
+#include "CMDArguments.pb.h"
+#include "Flags.h"
+
+namespace paddle {
+
+template <typename T>
+inline bool useGPU(const T& conf, int deviceID) {
+  return useGPU<CMDArguments>(conf.cmd_args(), deviceID);
+}
+
+template <>
+inline bool useGPU<CMDArguments>(const CMDArguments& args, int deviceID) {
+  return args.parallel_nn() ? deviceID >= 0 : FLAGS_use_gpu;
+}
+
+}  // namespace paddle
diff --git a/paddle/utils/Util.h b/paddle/utils/Util.h
index 613844669d249..b56fbcbc14dae 100644
--- a/paddle/utils/Util.h
+++ b/paddle/utils/Util.h
@@ -245,11 +245,6 @@ class AsyncGpuBlock {
 private:
   bool syncFlag_;
 };
-
-inline bool useGpu(int deviceId) {
-  return FLAGS_parallel_nn ? (deviceId >= 0 ? true : false) : FLAGS_use_gpu;
-}
-
 /*
  * hppl activation mode
  */
diff --git a/proto/CMDArguments.proto b/proto/CMDArguments.proto
new file mode 100644
index 0000000000000..94c804266500e
--- /dev/null
+++ b/proto/CMDArguments.proto
@@ -0,0 +1,19 @@
+/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License. */
+syntax = "proto2";
+package paddle;
+
+message CMDArguments {
+	optional bool parallel_nn = 1 [default = false];
+}
\ No newline at end of file
diff --git a/proto/CMakeLists.txt b/proto/CMakeLists.txt
index 62d5b9e38b21e..8846aaadfecb9 100644
--- a/proto/CMakeLists.txt
+++ b/proto/CMakeLists.txt
@@ -5,7 +5,8 @@ set(proto_filenames
     ParameterConfig.proto
     ParameterService.proto
     TrainerConfig.proto
-    ParameterServerConfig.proto)
+    ParameterServerConfig.proto
+    CMDArguments.proto)
 
 set(PROTO_GEN)
 set(PROTO_GEN_PY)
diff --git a/proto/ModelConfig.proto b/proto/ModelConfig.proto
index 3a9d339976fff..660192409222e 100644
--- a/proto/ModelConfig.proto
+++ b/proto/ModelConfig.proto
@@ -14,6 +14,7 @@ limitations under the License. */
 syntax = "proto2";
 
 import "ParameterConfig.proto";
+import "CMDArguments.proto";
 
 package paddle;
 
@@ -433,6 +434,9 @@ message LayerConfig {
 
   // blank label used in ctc loss
   optional uint32 blank = 52 [default = 0];
+
+  // To pass command line through Paddle codes.
+  optional CMDArguments cmd_args = 53;
 }
 
 message EvaluatorConfig {
@@ -467,6 +471,9 @@ message EvaluatorConfig {
   // Used by ChunkEvaluator
   // chunk of these types are not counted
   repeated int32 excluded_chunk_types = 12;
+
+  // To pass command line through Paddle codes.
+  optional CMDArguments cmd_args = 13;
 }
 
 message LinkConfig {
@@ -554,4 +561,7 @@ message ModelConfig {
   // For External Machine, defining how to split a neural network
   // into multiple parts.
   optional ExternalConfig external_config = 9;
+
+  // To pass command line through Paddle codes.
+  optional CMDArguments cmd_args = 10;
 };
diff --git a/proto/ParameterConfig.proto b/proto/ParameterConfig.proto
index cbcd0af598df2..cd14dde249185 100644
--- a/proto/ParameterConfig.proto
+++ b/proto/ParameterConfig.proto
@@ -15,6 +15,9 @@ syntax = "proto2";
 
 package paddle;
 
+
+import "CMDArguments.proto";
+
 /**
  * Configuration structure for parameter
  */
@@ -77,4 +80,7 @@ message ParameterConfig {
   optional bool is_shared = 23 [default = false];
   // parameter block size
   optional uint64 parameter_block_size = 24 [default = 0];
+
+  // To pass command line through Paddle codes.
+  optional CMDArguments cmd_args = 25;
 }