Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Feature/try to remove parallel nn #1198

Closed
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
52 changes: 35 additions & 17 deletions paddle/gserver/activations/ActivationFunction.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -22,12 +22,13 @@ limitations under the License. */
#include <type_traits>
#include "paddle/parameter/Argument.h"
#include "paddle/utils/ClassRegistrar.h"

#include "paddle/utils/Logging.h"
#include "paddle/utils/ProtoCMDArgs.h"

namespace paddle {

static ClassRegistrar<ActivationFunction> gActivationRegistrar;
static ClassRegistrar<ActivationFunction, const CMDArguments&>
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

CMD不是synonym,不应该全大写。

另外,“命令行参数”是command-line arguments, 不是command arguments。

gActivationRegistrar;
/**
* @def ACTIVATION_CLASS_NAME
* @brief Macro for getting derived activation class name
Expand All @@ -39,13 +40,19 @@ static ClassRegistrar<ActivationFunction> gActivationRegistrar;
* @def BEGIN_DEFINE_ACTIVATION
* @brief Macro for defining a devried activation class
*/
#define BEGIN_DEFINE_ACTIVATION(ACTIVATION_NAME) \
#define BEGIN_DEFINE_ACTIVATION_WITHOUT_CTOR(ACTIVATION_NAME) \
class ACTIVATION_CLASS_NAME(ACTIVATION_NAME) : public ActivationFunction { \
private: \
static const std::string name; \
\
public: \
const std::string& getName() const { return name; }

#define BEGIN_DEFINE_ACTIVATION(ACTIVATION_NAME) \
BEGIN_DEFINE_ACTIVATION_WITHOUT_CTOR(ACTIVATION_NAME) \
explicit ACTIVATION_CLASS_NAME(ACTIVATION_NAME)(const CMDArguments& args) \
: ActivationFunction(args) {}

/**
* @def END_DEFINE_ACTIVATION
* @brief Macro for registering a derived activation class
Expand All @@ -68,7 +75,10 @@ static ClassRegistrar<ActivationFunction> gActivationRegistrar;
*/
class IdentityActivation : public ActivationFunction {
public:
explicit IdentityActivation(const CMDArguments& args)
: ActivationFunction(args) {}
static const std::string name;

Error __must_check forward(Argument& act) {
(void)act;
return Error();
Expand All @@ -79,6 +89,7 @@ class IdentityActivation : public ActivationFunction {
}
const std::string& getName() const { return name; }
};

const std::string IdentityActivation::name = "";
static InitFunction __reg_activation__identity([] {
gActivationRegistrar.registerClass<IdentityActivation>("");
Expand Down Expand Up @@ -132,18 +143,18 @@ Error __must_check backward(Argument& act) {
outputG->getHeight(),
outputG->getWidth(),
/* trans */ false,
useGpu(act.deviceId));
useGPU(this->cmdArgs_, act.deviceId));
Matrix::resizeOrCreate(sftMaxSum_,
outputG->getHeight(),
1,
/* trans */ false,
useGpu(act.deviceId));
useGPU(this->cmdArgs_, act.deviceId));
if (!one_ || one_->getWidth() != outputG->getWidth()) {
Matrix::resizeOrCreate(one_,
1,
outputG->getWidth(),
/* trans */ false,
useGpu(act.deviceId));
useGPU(this->cmdArgs_, act.deviceId));
one_->one();
}

Expand All @@ -161,12 +172,16 @@ END_DEFINE_ACTIVATION(softmax)
* @note Softmax on all frames of one sequence.
* Width of frame must be one.
*/
BEGIN_DEFINE_ACTIVATION(sequence_softmax)
BEGIN_DEFINE_ACTIVATION_WITHOUT_CTOR(sequence_softmax)
private:
ACTIVATION_CLASS_NAME(softmax) softmax_;
Argument argument_;

public:
explicit ACTIVATION_CLASS_NAME(sequence_softmax)(const CMDArguments& args)
: ActivationFunction(args),
softmax_(ACTIVATION_CLASS_NAME(softmax)(args)) {}

Error __must_check forward(Argument& act) {
if (act.value->getWidth() != 1UL) {
return Error(
Expand All @@ -178,15 +193,16 @@ Error __must_check forward(Argument& act) {
/* height= */ 1,
1,
/* trans= */ false,
useGpu(act.deviceId));
useGPU(this->cmdArgs_, act.deviceId));
argument_.grad = Matrix::create(nullptr,
/* height= */ 1,
1,
/* trans= */ false,
useGpu(act.deviceId));
useGPU(this->cmdArgs_, act.deviceId));
}

auto starts = act.sequenceStartPositions->getVector(useGpu(act.deviceId));
auto starts = act.sequenceStartPositions->getVector(
useGPU(this->cmdArgs_, act.deviceId));
act.value->sequenceSoftmax(*act.value, *starts);
return Error();
}
Expand Down Expand Up @@ -285,12 +301,13 @@ END_DEFINE_ACTIVATION(tanh)
* f(z) = 1.7159 * tanh(2/3*z)
* \f]
*/
BEGIN_DEFINE_ACTIVATION(stanh)
BEGIN_DEFINE_ACTIVATION_WITHOUT_CTOR(stanh)
private:
real a, b;

public:
ACTIVATION_CLASS_NAME(stanh)() : a(1.7159), b(2. / 3.) {}
ACTIVATION_CLASS_NAME(stanh)
(const CMDArguments& args) : ActivationFunction(args), a(1.7159), b(2. / 3.) {}
Error __must_check forward(Argument& act) {
act.value->scaledTanh(*act.value, a, b);
return Error();
Expand Down Expand Up @@ -339,7 +356,7 @@ Error __must_check forward(Argument& act) {
act.value->getHeight(),
act.value->getWidth(),
/* trans */ false,
useGpu(act.deviceId));
useGPU(this->cmdArgs_, act.deviceId));

act.in->copyFrom(*act.value);
act.value->abs2(*act.value);
Expand All @@ -365,7 +382,7 @@ Error __must_check forward(Argument& act) {
act.value->getHeight(),
act.value->getWidth(),
/* trans */ false,
useGpu(act.deviceId));
useGPU(this->cmdArgs_, act.deviceId));

act.in->copyFrom(*act.value);
act.value->square2(*act.value);
Expand Down Expand Up @@ -409,7 +426,7 @@ Error __must_check forward(Argument& act) {
act.value->getHeight(),
act.value->getWidth(),
/* trans */ false,
useGpu(act.deviceId));
useGPU(this->cmdArgs_, act.deviceId));

act.in->copyFrom(*act.value);
act.value->log2(*act.value);
Expand All @@ -422,8 +439,9 @@ Error __must_check backward(Argument& act) {
}
END_DEFINE_ACTIVATION(log)

ActivationFunction* ActivationFunction::create(const std::string& type) {
return gActivationRegistrar.createByType(type);
ActivationFunction* ActivationFunction::create(const std::string& type,
const CMDArguments& args) {
return gActivationRegistrar.createByType(type, args);
}

std::vector<std::string> ActivationFunction::getAllRegisteredTypes() {
Expand Down
10 changes: 8 additions & 2 deletions paddle/gserver/activations/ActivationFunction.h
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@ limitations under the License. */
#pragma once
#include <string>
#include <vector>
#include "CMDArguments.pb.h"
#include "paddle/utils/Error.h"

namespace paddle {
Expand All @@ -32,10 +33,12 @@ struct Argument;
*/
class ActivationFunction {
public:
static ActivationFunction* create(const std::string& type);
static ActivationFunction* create(const std::string& type,
const CMDArguments& args);
static std::vector<std::string> getAllRegisteredTypes();

ActivationFunction() {}
explicit ActivationFunction(const CMDArguments& cmdArgs)
: cmdArgs_(cmdArgs) {}

virtual ~ActivationFunction() {}

Expand All @@ -61,6 +64,9 @@ class ActivationFunction {
virtual Error __must_check backward(Argument& act) = 0;

virtual const std::string& getName() const = 0;

protected:
const CMDArguments& cmdArgs_;
};

} // namespace paddle
13 changes: 7 additions & 6 deletions paddle/gserver/evaluators/Evaluator.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -13,9 +13,9 @@ See the License for the specific language governing permissions and
limitations under the License. */

#include "paddle/gserver/evaluators/Evaluator.h"
#include "paddle/utils/Stat.h"

#include "paddle/gserver/gradientmachines/NeuralNetwork.h"
#include "paddle/utils/ProtoCMDArgs.h"
#include "paddle/utils/Stat.h"

DECLARE_int32(trainer_id);

Expand Down Expand Up @@ -72,10 +72,11 @@ class ClassificationErrorEvaluator : public Evaluator {
CHECK_EQ((size_t)1, weight->getWidth());
}

const MatrixPtr errorMat = Matrix::create(output->getHeight(),
1,
/* trans= */ false,
useGpu(arguments[0].deviceId));
const MatrixPtr errorMat =
Matrix::create(output->getHeight(),
1,
/* trans= */ false,
useGPU(config_, arguments[0].deviceId));
errorMat->zeroMem();
if (label != nullptr) {
errorMat->classificationError(*output, *label);
Expand Down
2 changes: 1 addition & 1 deletion paddle/gserver/gradientmachines/GradientMachine.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -42,7 +42,7 @@ GradientMachine* GradientMachine::create(
if (config.type() == "multi_nn") {
/* multi submodel calculate, thread(s) will be initialized inside */
nn = new MultiNetwork("root");
} else if (FLAGS_parallel_nn) {
} else if (config.cmd_args().parallel_nn()) {
/* multi threads calculate */
nn = new ParallelNeuralNetwork();
} else {
Expand Down
6 changes: 3 additions & 3 deletions paddle/gserver/gradientmachines/MultiGradientMachine.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -399,7 +399,7 @@ TrainerThread::TrainerThread(const ModelConfig& config,
SetDevice gpuDevice(deviceId_);

NeuralNetwork* nn = nullptr;
if (!multiMachine->useGpu() || !FLAGS_parallel_nn) {
if (!multiMachine->useGpu() || !config.cmd_args().parallel_nn()) {
nn = NeuralNetwork::create(config);
} else {
nn = new ParallelNeuralNetwork();
Expand All @@ -422,7 +422,7 @@ TrainerThread::TrainerThread(const ModelConfig& config,
nn->init(config_, slaveParamInitCb);
gradientMachine_.reset(nn);
parameters_ = gradientMachine_->getParameters();
if (!FLAGS_parallel_nn) {
if (!config.cmd_args().parallel_nn()) {
for (auto& para : parameters_) {
para->setDevice(deviceId_);
}
Expand Down Expand Up @@ -744,7 +744,7 @@ void TrainerThread::copyInArgs() {
fullInArgs[i],
startSeq,
copySize,
FLAGS_parallel_nn ? false : multiMachine_->useGpu());
config_.cmd_args().parallel_nn() ? false : multiMachine_->useGpu());
}
}

Expand Down
2 changes: 1 addition & 1 deletion paddle/gserver/gradientmachines/MultiNetwork.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -38,7 +38,7 @@ void MultiNetwork::init(const ModelConfig& config,
// sub networks
for (int i = 1; i < config.sub_models_size(); ++i) {
std::string subModelName = config.sub_models(i).name();
if (FLAGS_parallel_nn) {
if (config.cmd_args().parallel_nn()) {
subNetworks_[i - 1] = std::unique_ptr<ParallelNeuralNetwork>(
new ParallelNeuralNetwork(subModelName, this));
} else {
Expand Down
2 changes: 1 addition & 1 deletion paddle/gserver/gradientmachines/NeuralNetwork.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -208,7 +208,7 @@ void NeuralNetwork::prefetch(const std::vector<Argument>& inArgs) {
}

for (size_t i = 0; i != dataLayers_.size(); ++i) {
if (FLAGS_parallel_nn) {
if (this->config_.cmd_args().parallel_nn()) {
const_cast<Argument&>(inArgs[i]).deviceId = -1;
}
dataLayers_[i]->setData(inArgs[i]);
Expand Down
8 changes: 4 additions & 4 deletions paddle/gserver/layers/DataLayer.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@ See the License for the specific language governing permissions and
limitations under the License. */

#include "DataLayer.h"

#include "paddle/utils/ProtoCMDArgs.h"
namespace paddle {

REGISTER_LAYER(data, DataLayer);
Expand All @@ -30,7 +30,7 @@ void DataLayer::copyDataToOutput(Argument& output) {
if (!output.value) {
output.value = data_.value->clone(data_.value->getHeight(),
data_.value->getWidth(),
useGpu(output.deviceId));
useGPU(config_, output.deviceId));
} else {
output.value->resize(data_.value->getHeight(), data_.value->getWidth());
}
Expand All @@ -41,11 +41,11 @@ void DataLayer::copyDataToOutput(Argument& output) {
data_.grad->getHeight(),
data_.grad->getWidth(),
/* trans= */ false,
useGpu(output.deviceId));
useGPU(config_, output.deviceId));
}
if (data_.ids) {
IVector::resizeOrCreate(
output.ids, data_.ids->getSize(), useGpu(output.deviceId));
output.ids, data_.ids->getSize(), useGPU(config_, output.deviceId));
output.ids->copyFrom(*data_.ids);
}
}
Expand Down
3 changes: 2 additions & 1 deletion paddle/gserver/layers/GatedRecurrentLayer.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,8 @@ bool GatedRecurrentLayer::init(const LayerMap& layerMap,
}

reversed_ = config_.reversed();
activationGate_.reset(ActivationFunction::create(config_.active_gate_type()));
activationGate_.reset(ActivationFunction::create(config_.active_gate_type(),
config_.cmd_args()));

GruCompute::init(config_);
useBatch_ = true;
Expand Down
10 changes: 5 additions & 5 deletions paddle/gserver/layers/HierarchicalSigmoidLayer.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@ See the License for the specific language governing permissions and
limitations under the License. */

#include "HierarchicalSigmoidLayer.h"
#include "paddle/utils/Util.h"
#include "paddle/utils/ProtoCMDArgs.h"

namespace paddle {

Expand Down Expand Up @@ -64,12 +64,12 @@ void HierarchicalSigmoidLayer::forward(PassType passType) {
batchSize,
codeLength_,
/* trans */ false,
useGpu(deviceId_));
useGPU(config_, deviceId_));
Matrix::resizeOrCreate(preOutput_.grad,
batchSize,
codeLength_,
/* trans */ false,
useGpu(deviceId_));
useGPU(config_, deviceId_));

IVectorPtr label = getInput(*getLabelLayer()).ids;

Expand All @@ -91,8 +91,8 @@ void HierarchicalSigmoidLayer::forward(PassType passType) {
*output_.value,
-1); // scaleSum
preOutput_.value->softrelu(*preOutput_.value);
MatrixPtr sum =
Matrix::create(batchSize, 1, /* trans= */ false, useGpu(deviceId_));
MatrixPtr sum = Matrix::create(
batchSize, 1, /* trans= */ false, useGPU(config_, deviceId_));
preOutput_.value->rowSum(*sum);
output_.value->add(*sum);
}
Expand Down
Loading