diff --git a/paddle/gserver/activations/ActivationFunction.cpp b/paddle/gserver/activations/ActivationFunction.cpp index c541b72e104bf..0e494cf6d8279 100644 --- a/paddle/gserver/activations/ActivationFunction.cpp +++ b/paddle/gserver/activations/ActivationFunction.cpp @@ -22,12 +22,13 @@ limitations under the License. */ #include #include "paddle/parameter/Argument.h" #include "paddle/utils/ClassRegistrar.h" - #include "paddle/utils/Logging.h" +#include "paddle/utils/ProtoCMDArgs.h" namespace paddle { -static ClassRegistrar gActivationRegistrar; +static ClassRegistrar + gActivationRegistrar; /** * @def ACTIVATION_CLASS_NAME * @brief Macro for getting derived activation class name @@ -39,13 +40,19 @@ static ClassRegistrar gActivationRegistrar; * @def BEGIN_DEFINE_ACTIVATION * @brief Macro for defining a devried activation class */ -#define BEGIN_DEFINE_ACTIVATION(ACTIVATION_NAME) \ +#define BEGIN_DEFINE_ACTIVATION_WITHOUT_CTOR(ACTIVATION_NAME) \ class ACTIVATION_CLASS_NAME(ACTIVATION_NAME) : public ActivationFunction { \ private: \ static const std::string name; \ \ public: \ const std::string& getName() const { return name; } + +#define BEGIN_DEFINE_ACTIVATION(ACTIVATION_NAME) \ + BEGIN_DEFINE_ACTIVATION_WITHOUT_CTOR(ACTIVATION_NAME) \ + explicit ACTIVATION_CLASS_NAME(ACTIVATION_NAME)(const CMDArguments& args) \ + : ActivationFunction(args) {} + /** * @def END_DEFINE_ACTIVATION * @brief Macro for registering a derived activation class @@ -68,7 +75,10 @@ static ClassRegistrar gActivationRegistrar; */ class IdentityActivation : public ActivationFunction { public: + explicit IdentityActivation(const CMDArguments& args) + : ActivationFunction(args) {} static const std::string name; + Error __must_check forward(Argument& act) { (void)act; return Error(); @@ -79,6 +89,7 @@ class IdentityActivation : public ActivationFunction { } const std::string& getName() const { return name; } }; + const std::string IdentityActivation::name = ""; static InitFunction __reg_activation__identity([] { gActivationRegistrar.registerClass(""); @@ -132,18 +143,18 @@ Error __must_check backward(Argument& act) { outputG->getHeight(), outputG->getWidth(), /* trans */ false, - useGpu(act.deviceId)); + useGPU(this->cmdArgs_, act.deviceId)); Matrix::resizeOrCreate(sftMaxSum_, outputG->getHeight(), 1, /* trans */ false, - useGpu(act.deviceId)); + useGPU(this->cmdArgs_, act.deviceId)); if (!one_ || one_->getWidth() != outputG->getWidth()) { Matrix::resizeOrCreate(one_, 1, outputG->getWidth(), /* trans */ false, - useGpu(act.deviceId)); + useGPU(this->cmdArgs_, act.deviceId)); one_->one(); } @@ -161,12 +172,16 @@ END_DEFINE_ACTIVATION(softmax) * @note Softmax on all frames of one sequence. * Width of frame must be one. */ -BEGIN_DEFINE_ACTIVATION(sequence_softmax) +BEGIN_DEFINE_ACTIVATION_WITHOUT_CTOR(sequence_softmax) private: ACTIVATION_CLASS_NAME(softmax) softmax_; Argument argument_; public: +explicit ACTIVATION_CLASS_NAME(sequence_softmax)(const CMDArguments& args) + : ActivationFunction(args), + softmax_(ACTIVATION_CLASS_NAME(softmax)(args)) {} + Error __must_check forward(Argument& act) { if (act.value->getWidth() != 1UL) { return Error( @@ -178,15 +193,16 @@ Error __must_check forward(Argument& act) { /* height= */ 1, 1, /* trans= */ false, - useGpu(act.deviceId)); + useGPU(this->cmdArgs_, act.deviceId)); argument_.grad = Matrix::create(nullptr, /* height= */ 1, 1, /* trans= */ false, - useGpu(act.deviceId)); + useGPU(this->cmdArgs_, act.deviceId)); } - auto starts = act.sequenceStartPositions->getVector(useGpu(act.deviceId)); + auto starts = act.sequenceStartPositions->getVector( + useGPU(this->cmdArgs_, act.deviceId)); act.value->sequenceSoftmax(*act.value, *starts); return Error(); } @@ -285,12 +301,13 @@ END_DEFINE_ACTIVATION(tanh) * f(z) = 1.7159 * tanh(2/3*z) * \f] */ -BEGIN_DEFINE_ACTIVATION(stanh) +BEGIN_DEFINE_ACTIVATION_WITHOUT_CTOR(stanh) private: real a, b; public: -ACTIVATION_CLASS_NAME(stanh)() : a(1.7159), b(2. / 3.) {} +ACTIVATION_CLASS_NAME(stanh) +(const CMDArguments& args) : ActivationFunction(args), a(1.7159), b(2. / 3.) {} Error __must_check forward(Argument& act) { act.value->scaledTanh(*act.value, a, b); return Error(); @@ -339,7 +356,7 @@ Error __must_check forward(Argument& act) { act.value->getHeight(), act.value->getWidth(), /* trans */ false, - useGpu(act.deviceId)); + useGPU(this->cmdArgs_, act.deviceId)); act.in->copyFrom(*act.value); act.value->abs2(*act.value); @@ -365,7 +382,7 @@ Error __must_check forward(Argument& act) { act.value->getHeight(), act.value->getWidth(), /* trans */ false, - useGpu(act.deviceId)); + useGPU(this->cmdArgs_, act.deviceId)); act.in->copyFrom(*act.value); act.value->square2(*act.value); @@ -409,7 +426,7 @@ Error __must_check forward(Argument& act) { act.value->getHeight(), act.value->getWidth(), /* trans */ false, - useGpu(act.deviceId)); + useGPU(this->cmdArgs_, act.deviceId)); act.in->copyFrom(*act.value); act.value->log2(*act.value); @@ -422,8 +439,9 @@ Error __must_check backward(Argument& act) { } END_DEFINE_ACTIVATION(log) -ActivationFunction* ActivationFunction::create(const std::string& type) { - return gActivationRegistrar.createByType(type); +ActivationFunction* ActivationFunction::create(const std::string& type, + const CMDArguments& args) { + return gActivationRegistrar.createByType(type, args); } std::vector ActivationFunction::getAllRegisteredTypes() { diff --git a/paddle/gserver/activations/ActivationFunction.h b/paddle/gserver/activations/ActivationFunction.h index f208224e304a7..eff28aed33b11 100644 --- a/paddle/gserver/activations/ActivationFunction.h +++ b/paddle/gserver/activations/ActivationFunction.h @@ -15,6 +15,7 @@ limitations under the License. */ #pragma once #include #include +#include "CMDArguments.pb.h" #include "paddle/utils/Error.h" namespace paddle { @@ -32,10 +33,12 @@ struct Argument; */ class ActivationFunction { public: - static ActivationFunction* create(const std::string& type); + static ActivationFunction* create(const std::string& type, + const CMDArguments& args); static std::vector getAllRegisteredTypes(); - ActivationFunction() {} + explicit ActivationFunction(const CMDArguments& cmdArgs) + : cmdArgs_(cmdArgs) {} virtual ~ActivationFunction() {} @@ -61,6 +64,9 @@ class ActivationFunction { virtual Error __must_check backward(Argument& act) = 0; virtual const std::string& getName() const = 0; + +protected: + const CMDArguments& cmdArgs_; }; } // namespace paddle diff --git a/paddle/gserver/evaluators/Evaluator.cpp b/paddle/gserver/evaluators/Evaluator.cpp index ae7508e2bb117..129e7d36befbd 100644 --- a/paddle/gserver/evaluators/Evaluator.cpp +++ b/paddle/gserver/evaluators/Evaluator.cpp @@ -13,9 +13,9 @@ See the License for the specific language governing permissions and limitations under the License. */ #include "paddle/gserver/evaluators/Evaluator.h" -#include "paddle/utils/Stat.h" - #include "paddle/gserver/gradientmachines/NeuralNetwork.h" +#include "paddle/utils/ProtoCMDArgs.h" +#include "paddle/utils/Stat.h" DECLARE_int32(trainer_id); @@ -72,10 +72,11 @@ class ClassificationErrorEvaluator : public Evaluator { CHECK_EQ((size_t)1, weight->getWidth()); } - const MatrixPtr errorMat = Matrix::create(output->getHeight(), - 1, - /* trans= */ false, - useGpu(arguments[0].deviceId)); + const MatrixPtr errorMat = + Matrix::create(output->getHeight(), + 1, + /* trans= */ false, + useGPU(config_, arguments[0].deviceId)); errorMat->zeroMem(); if (label != nullptr) { errorMat->classificationError(*output, *label); diff --git a/paddle/gserver/gradientmachines/GradientMachine.cpp b/paddle/gserver/gradientmachines/GradientMachine.cpp index 36ca05b919b13..54d849fd9b5f4 100644 --- a/paddle/gserver/gradientmachines/GradientMachine.cpp +++ b/paddle/gserver/gradientmachines/GradientMachine.cpp @@ -42,7 +42,7 @@ GradientMachine* GradientMachine::create( if (config.type() == "multi_nn") { /* multi submodel calculate, thread(s) will be initialized inside */ nn = new MultiNetwork("root"); - } else if (FLAGS_parallel_nn) { + } else if (config.cmd_args().parallel_nn()) { /* multi threads calculate */ nn = new ParallelNeuralNetwork(); } else { diff --git a/paddle/gserver/gradientmachines/MultiGradientMachine.cpp b/paddle/gserver/gradientmachines/MultiGradientMachine.cpp index 80f223824d8dc..e1b17b53453a0 100644 --- a/paddle/gserver/gradientmachines/MultiGradientMachine.cpp +++ b/paddle/gserver/gradientmachines/MultiGradientMachine.cpp @@ -399,7 +399,7 @@ TrainerThread::TrainerThread(const ModelConfig& config, SetDevice gpuDevice(deviceId_); NeuralNetwork* nn = nullptr; - if (!multiMachine->useGpu() || !FLAGS_parallel_nn) { + if (!multiMachine->useGpu() || !config.cmd_args().parallel_nn()) { nn = NeuralNetwork::create(config); } else { nn = new ParallelNeuralNetwork(); @@ -422,7 +422,7 @@ TrainerThread::TrainerThread(const ModelConfig& config, nn->init(config_, slaveParamInitCb); gradientMachine_.reset(nn); parameters_ = gradientMachine_->getParameters(); - if (!FLAGS_parallel_nn) { + if (!config.cmd_args().parallel_nn()) { for (auto& para : parameters_) { para->setDevice(deviceId_); } @@ -744,7 +744,7 @@ void TrainerThread::copyInArgs() { fullInArgs[i], startSeq, copySize, - FLAGS_parallel_nn ? false : multiMachine_->useGpu()); + config_.cmd_args().parallel_nn() ? false : multiMachine_->useGpu()); } } diff --git a/paddle/gserver/gradientmachines/MultiNetwork.cpp b/paddle/gserver/gradientmachines/MultiNetwork.cpp index 5f52a5f3d48a4..57625c8ee0ca3 100644 --- a/paddle/gserver/gradientmachines/MultiNetwork.cpp +++ b/paddle/gserver/gradientmachines/MultiNetwork.cpp @@ -38,7 +38,7 @@ void MultiNetwork::init(const ModelConfig& config, // sub networks for (int i = 1; i < config.sub_models_size(); ++i) { std::string subModelName = config.sub_models(i).name(); - if (FLAGS_parallel_nn) { + if (config.cmd_args().parallel_nn()) { subNetworks_[i - 1] = std::unique_ptr( new ParallelNeuralNetwork(subModelName, this)); } else { diff --git a/paddle/gserver/gradientmachines/NeuralNetwork.cpp b/paddle/gserver/gradientmachines/NeuralNetwork.cpp index 22051e07ee002..54d3854bee038 100644 --- a/paddle/gserver/gradientmachines/NeuralNetwork.cpp +++ b/paddle/gserver/gradientmachines/NeuralNetwork.cpp @@ -208,7 +208,7 @@ void NeuralNetwork::prefetch(const std::vector& inArgs) { } for (size_t i = 0; i != dataLayers_.size(); ++i) { - if (FLAGS_parallel_nn) { + if (this->config_.cmd_args().parallel_nn()) { const_cast(inArgs[i]).deviceId = -1; } dataLayers_[i]->setData(inArgs[i]); diff --git a/paddle/gserver/layers/DataLayer.cpp b/paddle/gserver/layers/DataLayer.cpp index 3551df4e172f0..1fbcc2056a483 100644 --- a/paddle/gserver/layers/DataLayer.cpp +++ b/paddle/gserver/layers/DataLayer.cpp @@ -13,7 +13,7 @@ See the License for the specific language governing permissions and limitations under the License. */ #include "DataLayer.h" - +#include "paddle/utils/ProtoCMDArgs.h" namespace paddle { REGISTER_LAYER(data, DataLayer); @@ -30,7 +30,7 @@ void DataLayer::copyDataToOutput(Argument& output) { if (!output.value) { output.value = data_.value->clone(data_.value->getHeight(), data_.value->getWidth(), - useGpu(output.deviceId)); + useGPU(config_, output.deviceId)); } else { output.value->resize(data_.value->getHeight(), data_.value->getWidth()); } @@ -41,11 +41,11 @@ void DataLayer::copyDataToOutput(Argument& output) { data_.grad->getHeight(), data_.grad->getWidth(), /* trans= */ false, - useGpu(output.deviceId)); + useGPU(config_, output.deviceId)); } if (data_.ids) { IVector::resizeOrCreate( - output.ids, data_.ids->getSize(), useGpu(output.deviceId)); + output.ids, data_.ids->getSize(), useGPU(config_, output.deviceId)); output.ids->copyFrom(*data_.ids); } } diff --git a/paddle/gserver/layers/GatedRecurrentLayer.cpp b/paddle/gserver/layers/GatedRecurrentLayer.cpp index d3aeea921801d..cab86634afa29 100644 --- a/paddle/gserver/layers/GatedRecurrentLayer.cpp +++ b/paddle/gserver/layers/GatedRecurrentLayer.cpp @@ -36,7 +36,8 @@ bool GatedRecurrentLayer::init(const LayerMap& layerMap, } reversed_ = config_.reversed(); - activationGate_.reset(ActivationFunction::create(config_.active_gate_type())); + activationGate_.reset(ActivationFunction::create(config_.active_gate_type(), + config_.cmd_args())); GruCompute::init(config_); useBatch_ = true; diff --git a/paddle/gserver/layers/HierarchicalSigmoidLayer.cpp b/paddle/gserver/layers/HierarchicalSigmoidLayer.cpp index d62a8d846e5b3..ae7c2c70e1ed1 100644 --- a/paddle/gserver/layers/HierarchicalSigmoidLayer.cpp +++ b/paddle/gserver/layers/HierarchicalSigmoidLayer.cpp @@ -13,7 +13,7 @@ See the License for the specific language governing permissions and limitations under the License. */ #include "HierarchicalSigmoidLayer.h" -#include "paddle/utils/Util.h" +#include "paddle/utils/ProtoCMDArgs.h" namespace paddle { @@ -64,12 +64,12 @@ void HierarchicalSigmoidLayer::forward(PassType passType) { batchSize, codeLength_, /* trans */ false, - useGpu(deviceId_)); + useGPU(config_, deviceId_)); Matrix::resizeOrCreate(preOutput_.grad, batchSize, codeLength_, /* trans */ false, - useGpu(deviceId_)); + useGPU(config_, deviceId_)); IVectorPtr label = getInput(*getLabelLayer()).ids; @@ -91,8 +91,8 @@ void HierarchicalSigmoidLayer::forward(PassType passType) { *output_.value, -1); // scaleSum preOutput_.value->softrelu(*preOutput_.value); - MatrixPtr sum = - Matrix::create(batchSize, 1, /* trans= */ false, useGpu(deviceId_)); + MatrixPtr sum = Matrix::create( + batchSize, 1, /* trans= */ false, useGPU(config_, deviceId_)); preOutput_.value->rowSum(*sum); output_.value->add(*sum); } diff --git a/paddle/gserver/layers/Layer.cpp b/paddle/gserver/layers/Layer.cpp index f76d41ad3e8a3..791cf19c1cb6e 100644 --- a/paddle/gserver/layers/Layer.cpp +++ b/paddle/gserver/layers/Layer.cpp @@ -12,11 +12,11 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. */ -#include "paddle/utils/Util.h" - #include "paddle/math/SparseMatrix.h" #include "paddle/utils/Error.h" #include "paddle/utils/Logging.h" +#include "paddle/utils/ProtoCMDArgs.h" +#include "paddle/utils/Util.h" #include "AddtoLayer.h" #include "CRFLayer.h" @@ -45,7 +45,7 @@ Layer::Layer(const LayerConfig& config, bool useGpu) needSequenceInfo_(true) {} bool Layer::init(const LayerMap& layerMap, const ParameterMap& parameterMap) { - if (useGpu_ && FLAGS_parallel_nn) { + if (useGpu_ && config_.cmd_args().parallel_nn()) { /* gpu environment is specified by device property */ deviceId_ = config_.device(); if (deviceId_ < 0) { @@ -95,7 +95,8 @@ bool Layer::init(const LayerMap& layerMap, const ParameterMap& parameterMap) { /* specify the activation function according to the configuration */ std::string action_type = config_.active_type(); - activation_.reset(ActivationFunction::create(action_type)); + activation_.reset( + ActivationFunction::create(action_type, config_.cmd_args())); CHECK(activation_); initNeedFlags(); @@ -130,15 +131,21 @@ void Layer::resetSpecifyOutput(Argument& output, bool isGradClean) { SetDevice device(output.deviceId); - Matrix::resizeOrCreate( - output.value, height, width, /* trans */ false, useGpu(output.deviceId)); + Matrix::resizeOrCreate(output.value, + height, + width, + /* trans */ false, + useGPU(config_, output.deviceId)); if (isValueClean) { output.value->zeroMem(); } if (passType_ != PASS_TEST && needGradient()) { - Matrix::resizeOrCreate( - output.grad, height, width, /* trans */ false, useGpu(output.deviceId)); + Matrix::resizeOrCreate(output.grad, + height, + width, + /* trans */ false, + useGPU(config_, output.deviceId)); if (isGradClean) { output.grad->zeroMem(); } @@ -234,7 +241,7 @@ void Layer::waitAndMergeOutputGrad() { output_.grad->getHeight(), output_.grad->getWidth(), /* trans */ false, - useGpu(output_.deviceId)); + useGPU(config_, output_.deviceId)); for (; i != outputOtherDevice_.size(); i++) { tmpGrad_->copyFrom(*outputOtherDevice_[i].grad, HPPL_STREAM_1); @@ -388,15 +395,17 @@ void Layer::forwardDropOut() { outV->getHeight(), outV->getWidth(), false, - useGpu(deviceId_)); + useGPU(config_, deviceId_)); dropOutMask_->randomizeUniform(); // generate a uniform random matrix dropOutMask_->biggerThanScalar(config_.drop_rate()); // random mask outV->dotMul(*outV, *dropOutMask_); // dropout } else if (passType_ == PASS_GC) { // only initialize once if (!dropOutMask_) { - dropOutMask_ = Matrix::create( - outV->getHeight(), outV->getWidth(), false, useGpu(deviceId_)); + dropOutMask_ = Matrix::create(outV->getHeight(), + outV->getWidth(), + false, + useGPU(config_, deviceId_)); // We use cpu matrix to generate mask so that the mask // will be same for both gpu version and cpu version. // This will help unittest to make sure they have same result. diff --git a/paddle/gserver/layers/LstmLayer.cpp b/paddle/gserver/layers/LstmLayer.cpp index 01cc5fec8b970..8642d53fd2495 100644 --- a/paddle/gserver/layers/LstmLayer.cpp +++ b/paddle/gserver/layers/LstmLayer.cpp @@ -93,7 +93,7 @@ bool LstmLayer::init(const LayerMap &layerMap, reversed_ = config_.reversed(); // create IdentityActivation for using drop_rate - activation_.reset(ActivationFunction::create("")); + activation_.reset(ActivationFunction::create("", config_.cmd_args())); LstmCompute::init(config_); useBatch_ = true; diff --git a/paddle/gserver/layers/MDLstmLayer.cpp b/paddle/gserver/layers/MDLstmLayer.cpp index 88d934d782b54..69dc0bce1ae79 100644 --- a/paddle/gserver/layers/MDLstmLayer.cpp +++ b/paddle/gserver/layers/MDLstmLayer.cpp @@ -298,9 +298,10 @@ bool MDLstmLayer::init(const LayerMap& layerMap, for (int i = 0; i < numDims_; i++) { delays_.push_back(-1); } - activationGate_.reset(ActivationFunction::create(config_.active_gate_type())); - activationState_.reset( - ActivationFunction::create(config_.active_state_type())); + activationGate_.reset(ActivationFunction::create(config_.active_gate_type(), + config_.cmd_args())); + activationState_.reset(ActivationFunction::create(config_.active_state_type(), + config_.cmd_args())); return true; } diff --git a/paddle/gserver/layers/MaxLayer.cpp b/paddle/gserver/layers/MaxLayer.cpp index 23629e1986834..fcbe05ba3057d 100644 --- a/paddle/gserver/layers/MaxLayer.cpp +++ b/paddle/gserver/layers/MaxLayer.cpp @@ -14,6 +14,7 @@ limitations under the License. */ #include "MaxLayer.h" #include "paddle/utils/Logging.h" +#include "paddle/utils/ProtoCMDArgs.h" #include "paddle/utils/Stat.h" namespace paddle { @@ -24,7 +25,7 @@ void MaxLayer::forward(PassType passType) { SequencePoolLayer::forward(passType); IVector::resizeOrCreate( - maxIndex_, newBatchSize_ * getSize(), useGpu(deviceId_)); + maxIndex_, newBatchSize_ * getSize(), useGPU(config_, deviceId_)); maxIndex_->zeroMem(); MatrixPtr inputValue = getInputValue(0); diff --git a/paddle/gserver/tests/test_WarpCTCLayer.cpp b/paddle/gserver/tests/test_WarpCTCLayer.cpp index 55427e2f12fd7..327803121a4f3 100644 --- a/paddle/gserver/tests/test_WarpCTCLayer.cpp +++ b/paddle/gserver/tests/test_WarpCTCLayer.cpp @@ -146,7 +146,8 @@ LayerPtr createCTCLayer(string name, layerMap[layer->getName()] = layer; layer->init(layerMap, parameterMap); - ActivationFunction* softmaxActivation = ActivationFunction::create("softmax"); + ActivationFunction* softmaxActivation = + ActivationFunction::create("softmax", layerConfig.cmd_args()); softmaxActivation->forward(dataLayer->getOutput()).check(); layer->forward(PASS_GC); diff --git a/paddle/parameter/Parameter.cpp b/paddle/parameter/Parameter.cpp index 29d6e20dc1696..91d31982e7a2e 100644 --- a/paddle/parameter/Parameter.cpp +++ b/paddle/parameter/Parameter.cpp @@ -49,7 +49,7 @@ Parameter::Parameter(const ParameterConfig& config, bool useGpu, bool doInit) updateCounter_(0), updated_(false) { setID(-1); /* capture uninitialized id */ - if (useGpu_ && FLAGS_parallel_nn) { + if (useGpu_ && config_.cmd_args().parallel_nn()) { /* gpu environment is specified by device property */ deviceId_ = config_.device(); if (deviceId_ < 0) { diff --git a/paddle/trainer/TrainerConfigHelper.cpp b/paddle/trainer/TrainerConfigHelper.cpp index 60ac8459a12db..864734dc7444b 100644 --- a/paddle/trainer/TrainerConfigHelper.cpp +++ b/paddle/trainer/TrainerConfigHelper.cpp @@ -101,6 +101,15 @@ bool TrainerConfigHelper::hasTestDataConfig() const { return m->conf.has_test_data_config(); } +template +static void updateCMDArgs(T *mutableConf) { + if (mutableConf->has_cmd_args()) { + mutableConf->set_allocated_cmd_args(new paddle::CMDArguments()); + } + paddle::CMDArguments &args = *mutableConf->mutable_cmd_args(); + args.set_parallel_nn(FLAGS_parallel_nn); +} + void TrainerConfigHelper::updateConfigFromFlags() { if (!FLAGS_save_dir.empty()) { m->conf.set_save_dir(FLAGS_save_dir); @@ -111,6 +120,14 @@ void TrainerConfigHelper::updateConfigFromFlags() { if (FLAGS_start_pass != 0) { m->conf.set_start_pass(FLAGS_start_pass); } + updateCMDArgs(m->conf.mutable_model_config()); + for (auto paramConf : *m->conf.mutable_model_config()->mutable_parameters()) { + updateCMDArgs(¶mConf); + } + for (auto evaluatorConf : + *m->conf.mutable_model_config()->mutable_evaluators()) { + updateCMDArgs(&evaluatorConf); + } } void TrainerConfigHelper::disableRemoteSparseUpdater() { diff --git a/paddle/utils/ProtoCMDArgs.h b/paddle/utils/ProtoCMDArgs.h new file mode 100644 index 0000000000000..dda10da2ac118 --- /dev/null +++ b/paddle/utils/ProtoCMDArgs.h @@ -0,0 +1,31 @@ +/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ + +#pragma once +#include "CMDArguments.pb.h" +#include "Flags.h" + +namespace paddle { + +template +inline bool useGPU(const T& conf, int deviceID) { + return useGPU(conf.cmd_args(), deviceID); +} + +template <> +inline bool useGPU(const CMDArguments& args, int deviceID) { + return args.parallel_nn() ? deviceID >= 0 : FLAGS_use_gpu; +} + +} // namespace paddle diff --git a/paddle/utils/Util.h b/paddle/utils/Util.h index 613844669d249..b56fbcbc14dae 100644 --- a/paddle/utils/Util.h +++ b/paddle/utils/Util.h @@ -245,11 +245,6 @@ class AsyncGpuBlock { private: bool syncFlag_; }; - -inline bool useGpu(int deviceId) { - return FLAGS_parallel_nn ? (deviceId >= 0 ? true : false) : FLAGS_use_gpu; -} - /* * hppl activation mode */ diff --git a/proto/CMDArguments.proto b/proto/CMDArguments.proto new file mode 100644 index 0000000000000..94c804266500e --- /dev/null +++ b/proto/CMDArguments.proto @@ -0,0 +1,19 @@ +/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ +syntax = "proto2"; +package paddle; + +message CMDArguments { + optional bool parallel_nn = 1 [default = false]; +} \ No newline at end of file diff --git a/proto/CMakeLists.txt b/proto/CMakeLists.txt index 62d5b9e38b21e..8846aaadfecb9 100644 --- a/proto/CMakeLists.txt +++ b/proto/CMakeLists.txt @@ -5,7 +5,8 @@ set(proto_filenames ParameterConfig.proto ParameterService.proto TrainerConfig.proto - ParameterServerConfig.proto) + ParameterServerConfig.proto + CMDArguments.proto) set(PROTO_GEN) set(PROTO_GEN_PY) diff --git a/proto/ModelConfig.proto b/proto/ModelConfig.proto index 3a9d339976fff..660192409222e 100644 --- a/proto/ModelConfig.proto +++ b/proto/ModelConfig.proto @@ -14,6 +14,7 @@ limitations under the License. */ syntax = "proto2"; import "ParameterConfig.proto"; +import "CMDArguments.proto"; package paddle; @@ -433,6 +434,9 @@ message LayerConfig { // blank label used in ctc loss optional uint32 blank = 52 [default = 0]; + + // To pass command line through Paddle codes. + optional CMDArguments cmd_args = 53; } message EvaluatorConfig { @@ -467,6 +471,9 @@ message EvaluatorConfig { // Used by ChunkEvaluator // chunk of these types are not counted repeated int32 excluded_chunk_types = 12; + + // To pass command line through Paddle codes. + optional CMDArguments cmd_args = 13; } message LinkConfig { @@ -554,4 +561,7 @@ message ModelConfig { // For External Machine, defining how to split a neural network // into multiple parts. optional ExternalConfig external_config = 9; + + // To pass command line through Paddle codes. + optional CMDArguments cmd_args = 10; }; diff --git a/proto/ParameterConfig.proto b/proto/ParameterConfig.proto index cbcd0af598df2..cd14dde249185 100644 --- a/proto/ParameterConfig.proto +++ b/proto/ParameterConfig.proto @@ -15,6 +15,9 @@ syntax = "proto2"; package paddle; + +import "CMDArguments.proto"; + /** * Configuration structure for parameter */ @@ -77,4 +80,7 @@ message ParameterConfig { optional bool is_shared = 23 [default = false]; // parameter block size optional uint64 parameter_block_size = 24 [default = 0]; + + // To pass command line through Paddle codes. + optional CMDArguments cmd_args = 25; }