merge release_1.0.6

intel · Nov 13, 2017 · c7ed327 · c7ed327
2 parents 16f8c2b + f77f7e0
commit c7ed327
Show file tree

Hide file tree

Showing 53 changed files with 19,621 additions and 5,785 deletions.
diff --git a/Makefile b/Makefile
@@ -64,6 +64,11 @@ endif
 #################### MLSL ####################
 
 ifeq ($(USE_MLSL), 1)
+
+ifeq ($(CPU_ONLY), 0)
+$(error Multi-node is not supported if CPU_ONLY is disabled. Please set CPU_ONLY=1 if USE_MLSL=1)
+endif
+
 	RETURN_STRING=$(shell ./external/mlsl/prepare_mlsl.sh)
 	MLSL_ROOT=$(firstword $(RETURN_STRING))
 	MLSL_LDFLAGS=$(lastword $(RETURN_STRING))	

diff --git a/cmake/Dependencies.cmake b/cmake/Dependencies.cmake
@@ -96,6 +96,10 @@ endif()
 
 # ---[ MLSL
 if(USE_MLSL)
+  if (NOT CPU_ONLY)
+    message(FATAL_ERROR "Multi-node is not supported if CPU_ONLY is disabled. Please set CPU_ONLY=1 if USE_MLSL=1.")
+  endif()
+
   #--find mlsl in external/mkl
   set(script_cmd "./external/mlsl/prepare_mlsl.sh" )
   execute_process(COMMAND ${script_cmd}

diff --git a/data/Celeb-A/celebA.txt b/data/Celeb-A/celebA.txt
@@ -0,0 +1,10 @@
+/Celeb-A_Cropped/000001.jpg 1
+/Celeb-A_Cropped/000002.jpg 1
+/Celeb-A_Cropped/000003.jpg 1
+/Celeb-A_Cropped/000004.jpg 1
+/Celeb-A_Cropped/000005.jpg 1
+/Celeb-A_Cropped/000006.jpg 1
+/Celeb-A_Cropped/000007.jpg 1
+/Celeb-A_Cropped/000008.jpg 1
+/Celeb-A_Cropped/000009.jpg 1
+/Celeb-A_Cropped/000010.jpg 1
diff --git a/data/Celeb-A/crop_celebA.py b/data/Celeb-A/crop_celebA.py
@@ -0,0 +1,59 @@
+from PIL import Image
+import os
+import sys
+
+print ""
+print "Prepare Celeb-A Dataset! (1. Crop the images. 2. Generate a train list file.)"
+print ""
+print "-------------------------------------------------------------------------------"
+
+current_path = os.getcwd()
+celebA_path = ""
+celebA_cropped_path = ""
+print "The current path containing this python file is: " + current_path
+if len(sys.argv) == 1:
+    print "Please give the path of original Celeb-A dataset!"
+    exit(0)
+elif len(sys.argv) > 1:
+    print "The path of original Celeb-A dataset is: " + str(sys.argv[1])
+    celebA_path = sys.argv[1]
+    celebA_cropped_path = os.path.dirname(celebA_path) + os.sep + "Cropped"     #To avoid crop the generated images again if this parameter is not provided
+    if len(sys.argv) > 2:
+        print "The path of cropped Celeb-A dataset will be: " + str(sys.argv[2])
+        celebA_cropped_path = sys.argv[2]
+    else:
+        print "The path of cropped Celeb-A dataset will be defult, set as: " + celebA_cropped_path
+
+if os.path.exists(celebA_cropped_path):
+    print "The path of cropped Celeb-A dataset exists."
+else:
+    print "The path of cropped Celeb-A dataset doesn't exist! I will create it now!"
+    os.makedirs(celebA_cropped_path)
+print "-------------------------------------------------------------------------------"
+
+training_list_file = os.path.join(celebA_cropped_path, "celebA.txt")
+list_file = open(training_list_file, 'w')
+total_image_num = 0
+x1, y1 = 30, 40
+cropped_box = (x1, y1, x1 + 138, y1 + 138)
+
+for parent,dirnames,filenames in os.walk(celebA_path):
+    for filename in filenames:
+        if filename.endswith(".jpg"):
+            total_image_num += 1
+            #print "parent is:" + parent
+            #print "filename is:" + filename
+            image_path_and_name = os.path.join(parent,filename)
+            print "the full name of the file is: " + image_path_and_name
+            input_image = Image.open(image_path_and_name)
+            #input_image.show()
+            cropped_image = input_image.crop(cropped_box)
+            #cropped_image.show()
+            scaled_cropped_image = cropped_image.resize((64, 64))
+            #scaled_cropped_image.show()
+            save_result_image_path_and_name = os.path.join(celebA_cropped_path,filename)
+            scaled_cropped_image.save(save_result_image_path_and_name, 'jpeg')
+            list_file.writelines(save_result_image_path_and_name)
+            list_file.writelines(" 1" + "\n")   #Must add label to list file
+print "There are " + str(total_image_num) + " images are finished with cropping and scaling operations!"
+list_file.close()
diff --git a/docker/standalone/cpu-centos/Dockerfile b/docker/standalone/cpu-centos/Dockerfile
@@ -4,7 +4,9 @@ MAINTAINER [email protected]
 #ENV http_proxy proxy:port
 #ENV https_proxy proxy:port
 
-RUN rpm -iUvh http://download.fedoraproject.org/pub/epel/7/x86_64/e/epel-release-7-8.noarch.rpm
+RUN rpm -iUvh http://download.fedoraproject.org/pub/epel/7/x86_64/e/epel-release-7-10.noarch.rpm
+
+RUN yum upgrade -y
 
 RUN yum install -y \
         redhat-rpm-config \
@@ -15,6 +17,7 @@ RUN yum install -y \
         cmake \
         git \
         wget \
+        ssh \
         atlas-devel \
         boost-devel \
         gflags-devel \

diff --git a/docker/standalone/cpu-ubuntu/Dockerfile b/docker/standalone/cpu-ubuntu/Dockerfile
@@ -1,4 +1,4 @@
-FROM ubuntu:14.04
+FROM ubuntu:16.04
 MAINTAINER [email protected]
 
 #ENV http_proxy proxy:port
@@ -9,6 +9,7 @@ RUN apt-get update && apt-get install -y --no-install-recommends \
         cmake \
         git \
         wget \
+        ssh \
         libboost-all-dev \
         libgflags-dev \
         libgoogle-glog-dev \

diff --git a/docker/templates/Dockerfile.template b/docker/templates/Dockerfile.template
@@ -8,6 +8,7 @@ RUN apt-get update && apt-get install -y --no-install-recommends \
         cmake \
         git \
         wget \
+        ssh \
         libatlas-base-dev \
         libboost-all-dev \
         libgflags-dev \

diff --git a/include/caffe/layers/mkldnn_layers.hpp b/include/caffe/layers/mkldnn_layers.hpp
@@ -69,7 +69,7 @@ class MKLDNNBatchNormLayer : public MKLDNNLayer<Dtype>, public Layer<Dtype> {
         , bwd_top_diff(), bwd_bottom_diff()
         , BatchNormFwd_pd(), BatchNormBwd_pd()
         , scaleshift_memory(), bwd_scaleshift_diff_memory()
-        , output_memory(), bwd_bottom_diff_memory(), inplace_buffer_memory()
+        , output_memory(), bwd_bottom_diff_memory()
         , input_primitive(), bwd_top_diff_primitive()
         {
           PERFORMANCE_EVENT_ID_RESET(perf_id_fw_);
@@ -95,12 +95,10 @@ class MKLDNNBatchNormLayer : public MKLDNNLayer<Dtype>, public Layer<Dtype> {
     void InitBatchNormBwd(const vector<Blob<Dtype>*>& top,
             const vector<bool>& propagate_down,
             const vector<Blob<Dtype>*>& bottom);
-    void InitBatchNormFwdPrimitive(int stats_batch_idx, bool inplace);
-    void InitBatchNormBwdPrimitive(int stats_batch_idx, bool inplace);
+    void InitBatchNormFwdPrimitive(int stats_batch_idx);
+    void InitBatchNormBwdPrimitive(int stats_batch_idx);
     template <bool diff> shared_ptr<memory> GetStatsBatchMemory(
       shared_ptr<MKLDNNMemoryDescriptor<Dtype, diff> > mkldnn_data, int idx);
-    template <bool diff> shared_ptr<memory> GetStatsBatchMemoryInplace(
-      shared_ptr<MKLDNNMemoryDescriptor<Dtype, diff> > mkldnn_data, int idx, shared_ptr<memory > buffer_memory);
     void InitStatsBatchVars(int batch_size);
     shared_ptr<MKLDNNData<Dtype> > fwd_top_data, fwd_bottom_data;
     shared_ptr<MKLDNNDiff<Dtype> > bwd_top_diff, bwd_bottom_diff;
@@ -112,8 +110,8 @@ class MKLDNNBatchNormLayer : public MKLDNNLayer<Dtype>, public Layer<Dtype> {
 
     shared_ptr<memory> scaleshift_memory, bwd_scaleshift_diff_memory;
     shared_ptr<memory> output_memory, bwd_bottom_diff_memory;
-    shared_ptr<memory> inplace_buffer_memory;
-    vector<shared_ptr<memory> > input_stats, output_stats, top_diff_stats, bottom_diff_stats, input_inplace_buffer;
+
+    vector<shared_ptr<memory> > input_stats, output_stats, top_diff_stats, bottom_diff_stats;
 
     shared_ptr<primitive> input_primitive, bwd_top_diff_primitive;
 
@@ -124,6 +122,7 @@ class MKLDNNBatchNormLayer : public MKLDNNLayer<Dtype>, public Layer<Dtype> {
     int stats_batch_size_;
     shared_ptr<Blob<Dtype> > scaleshift_blob_;
     shared_ptr<Blob<Dtype> > scaleshift_acc_;
+    Blob<Dtype> inplace_buffer;
 
     PERFORMANCE_EVENT_ID_DECL(perf_id_fw_);
     PERFORMANCE_EVENT_ID_DECL(perf_id_bw_);
@@ -224,7 +223,7 @@ class MKLDNNInnerProductLayer : public MKLDNNLayer<Dtype> , public InnerProductL
                     , bwdd_top_diff_primitive, bwdd_weights_data_primitive
                     , bwdw_top_diff_primitive, bwdw_bottom_data_primitive;
     int32_t w_, h_;
-    
+
     /* In case of (iter_size > 1) we need additional buffers */
     shared_ptr<MKLDNNDiff<Dtype> > bwdw_weights_diff_iter, bwdw_bias_diff_iter;
     shared_ptr<memory> bwdw_weights_diff_memory_iter, bwdw_bias_diff_memory_iter;
@@ -322,13 +321,14 @@ class MKLDNNPoolingLayer : public MKLDNNLayer<Dtype>, public Layer<Dtype>  {
                                 ,const vector<Blob<Dtype>*>& bottom);
     virtual void Backward_gpu(const vector<Blob<Dtype>*>& top, const vector<bool>& propagate_down
                                 ,const vector<Blob<Dtype>*>& bottom);
+    virtual void compute_output_shape(const vector<Blob<Dtype>*>& bottom, const vector<Blob<Dtype>*>& top);
 
 private:
     void InitPoolingFwd(const vector<Blob<Dtype>*>& bottom, const vector<Blob<Dtype>*>& top);
     void InitPoolingBwd(const vector<Blob<Dtype>*>& bottom
                         , const vector<bool>& propagate_down
                         , const vector<Blob<Dtype>*>& top);
-  
+
     shared_ptr<MKLDNNData<Dtype>> fwd_bottom_data, fwd_top_data;
     shared_ptr<MKLDNNDiff<Dtype>> bwd_top_diff, bwd_bottom_diff;
     shared_ptr<pooling_forward::primitive_desc> poolingFwd_pd;
@@ -408,7 +408,7 @@ class MKLDNNConcatLayer : public MKLDNNLayer<Dtype> , public Layer<Dtype> {
             : MKLDNNLayer<Dtype>(), Layer<Dtype>(param),
             concatFwd_pd(), fwd_output_memory(),
             bwd_reorder_input_memory(), bwd_reorder_output_memory(),
-            fwd_top_data(), fwd_bottom_data(), split_channels() {
+            fwd_top_data(), fwd_bottom_data(), split_dims() {
               PERFORMANCE_EVENT_ID_RESET(perf_id_fw_);
               PERFORMANCE_EVENT_ID_RESET(perf_id_bw_);
     }
@@ -440,7 +440,7 @@ class MKLDNNConcatLayer : public MKLDNNLayer<Dtype> , public Layer<Dtype> {
     shared_ptr<MKLDNNDiff<Dtype> > bwd_top_diff;
     vector<shared_ptr<MKLDNNDiff<Dtype> > > bwd_bottom_diff;
     vector<MKLDNNPrimitive<Dtype> > reorders;
-    vector<int> split_channels;
+    vector<int> split_dims;
 
     int32_t num_, width_, height_, channels_, num_concats_;
     int concat_dimension;

diff --git a/include/caffe/mkldnn_base.hpp b/include/caffe/mkldnn_base.hpp
@@ -196,6 +196,8 @@ class MKLDNNLayer {
 public:
     explicit MKLDNNLayer() {}
     virtual ~MKLDNNLayer() {}
+protected:
+    bool reshape;
 };
 
 // =====  MKLDNNPrimitive =======================================

diff --git a/include/caffe/net.hpp b/include/caffe/net.hpp
@@ -326,6 +326,9 @@ class Net {
   /// @brief return whether NetState state meets NetStateRule rule
   static bool StateMeetsRule(const NetState& state, const NetStateRule& rule,
       const string& layer_name);
+  inline const map<string,int>& blob_names_index() const {
+    return blob_names_index_;
+  }
 
  protected:
   // Helpers for Init.

diff --git a/include/caffe/solver.hpp b/include/caffe/solver.hpp
@@ -113,6 +113,7 @@ class Solver {
   }
   int iter() { return iter_; }
   void set_iter(int value) { iter_ = value; }
+  void increment_iter() { iter_++; }
 
   // Invoked at specific points during an iteration
   class Callback {

diff --git a/include/caffe/syncedmem.hpp b/include/caffe/syncedmem.hpp
@@ -151,6 +151,7 @@ class SyncedMemory {
         cpu_malloc_use_cuda_(false), own_gpu_data_(false), own_prv_data_(false),
         gpu_device_(-1) {}
   ~SyncedMemory();
+  void swap(shared_ptr<SyncedMemory> other);
   const void* cpu_data();
   void set_cpu_data(void* data);
   const void* gpu_data();

diff --git a/include/caffe/test/test_caffe_main.hpp b/include/caffe/test/test_caffe_main.hpp
@@ -91,6 +91,7 @@ class CPUDeviceTest : public MultiDeviceTest<CPUDevice<Dtype> > {
 
 typedef ::testing::Types<CPUDevice<float>,
                          CPUDevice<double> > TestDtypesAndDevices;
+typedef ::testing::Types<CPUDevice<float>> MKLDNNTestDtypesAndDevices;
 
 #else
 

diff --git a/mkldnn.commit b/mkldnn.commit
@@ -1 +1 @@
-472bbbf05ce5ff5c072811220c55cf9b5bbd96ad
+ba482eca9459e3b9a8256ab07f9afa41dba34b9e
diff --git a/models/intel_optimized_models/alexnet/knm/solver_dummydata.prototxt b/models/intel_optimized_models/alexnet/knm/solver_dummydata.prototxt
@@ -0,0 +1,25 @@
+#This is Intel(R) optimized (in terms of time to train) version of solver for model described in the [AlexNet](http://papers.nips.cc/paper/4824-imagenet-classification-with-deep-convolutional-neural-networks) publication.
+#Original solver.prototxt can be found in /models/bvlc_alexnet/ directory of this repository.
+#Differences:
+#- lr_policy is set to poly instead of step
+#- base_lr is decreased to 0.007
+#- max_iter is decreased to 250000
+#- power is set to 0.6
+#
+#Top-5 and Top-1 results achieved with this version of solver:
+#Top-5: 80.4%
+#Top-1: 57.4%
+#Training was performed using server equipped with Intel(R) Xeon Phi(TM) CPU 7250 processor. 
+net: "models/intel_optimized_models/alexnet/knm/train_val_dummydata.prototxt"
+test_iter: 1000
+test_interval: 10000
+base_lr: 0.007
+lr_policy: "poly"
+power: 0.6
+display: 1
+max_iter: 5000
+momentum: 0.9
+weight_decay: 0.0005
+snapshot: 50000
+snapshot_prefix: "models/intel_optimized_models/alexnet/knm/alexnet_train"
+solver_mode: CPU
-Original file line number
+Diff line change
@@ Expand Up @@
             cmake \
             git \
             wget \
+            ssh \
             libatlas-base-dev \
             libboost-all-dev \
             libgflags-dev \
@@ Expand Down @@
Original file line number	Diff line number	Diff line change
		@@ -1 +1 @@
		472bbbf05ce5ff5c072811220c55cf9b5bbd96ad
		ba482eca9459e3b9a8256ab07f9afa41dba34b9e