From 3f494b442ee3f9d17a07b09ecbd5fa2bbda00836 Mon Sep 17 00:00:00 2001
From: Daisy Deng <daisy.deng@intel.com>
Date: Wed, 22 May 2019 21:38:33 +0800
Subject: [PATCH] Revert fix the bug of SSD NMS Noise raised by community into
 master_clean

---
 examples/faster-rcnn/tools/test_net.py        | 17 ++++++++++++++++
 examples/rfcn/tools/test_net.py               | 17 ++++++++++++++++
 scripts/calibrator.py                         | 17 +++++++++++++---
 .../layers/mkldnn_inner_product_layer.cpp     |  2 +-
 src/caffe/util/bbox_util.cpp                  | 20 +++++--------------
 5 files changed, 54 insertions(+), 19 deletions(-)

diff --git a/examples/faster-rcnn/tools/test_net.py b/examples/faster-rcnn/tools/test_net.py
index 1cb3cf3c0..6bdaa4382 100755
--- a/examples/faster-rcnn/tools/test_net.py
+++ b/examples/faster-rcnn/tools/test_net.py
@@ -77,8 +77,19 @@ def parse_args():
 
     parser.add_argument('-wi', '--conv_algo', dest='conv_algo', action="store_true", default=False,
                         help='to choose the convolution algorithm')
+
     parser.add_argument('-1st', '--enable_1st_conv_layer', dest='enable_1st_conv_layer', action="store_true", default=False,
                         help='enable 1st conv layer')
+
+    parser.add_argument('-fc', '--fc_int8', dest='fc_int8', action="store_true", default=False,
+                        help='enable int8 fc layer')
+
+    parser.add_argument('-uff', '--disable_force_fp32', dest='disable_force_fp32', action="store_true", default=False,
+                        help='to disable force fp32 output in conv/fc + fp32')
+
+    parser.add_argument('-ucac', '--disable_cac_unify', dest='disable_cac_unify', action="store_true", default=False,
+                        help='to disable scale unify in conv/fc + avg pooling + conv/fc')
+
     if len(sys.argv) == 1:
         parser.print_help()
         sys.exit(1)
@@ -119,6 +130,8 @@ def parse_args():
     if args.quantized_prototxt == None:
 	test_net(net, imdb, max_per_image=args.max_per_image, vis=args.vis)
     else:
+        if args.fc_int8:
+            calibrator.enable_fc_int8()
         (blobs, params, top_blobs_map, bottom_blobs_map, conv_top_blob_layer_map, conv_bottom_blob_layer_map, winograd_bottoms, winograd_convolutions) = sample_net(args.prototxt, net, imdb, args.sample_iters, args.quant_mode, args.enable_1st_conv_layer)
 
         (inputs_max, outputs_max, inputs_min) = sampling.calibrate_activations(blobs, conv_top_blob_layer_map, conv_bottom_blob_layer_map, winograd_bottoms, args.calibration_algos, "SINGLE", args.conv_algo)
@@ -130,3 +143,7 @@ def parse_args():
         with open(compile_net_path, "w") as f:
             f.write(compiled_net_str)
         calibrator.transform_convolutions(args.quantized_prototxt, compile_net_path, top_blobs_map, bottom_blobs_map, args.unsigned_range, args.concat_use_fp32, args.unify_concat_scales, args.conv_algo, args.enable_1st_conv_layer)
+        if not args.disable_force_fp32:
+            calibrator.force_fp32_opt(args.quantized_prototxt)
+        if not args.disable_cac_unify:
+            calibrator.cac_opt(args.quantized_prototxt)
diff --git a/examples/rfcn/tools/test_net.py b/examples/rfcn/tools/test_net.py
index d87a7a327..38230a4c3 100755
--- a/examples/rfcn/tools/test_net.py
+++ b/examples/rfcn/tools/test_net.py
@@ -82,8 +82,19 @@ def parse_args():
 
     parser.add_argument('-wi', '--conv_algo', dest='conv_algo', action="store_true", default=False,
                         help='to choose the convolution algorithm')
+
     parser.add_argument('-1st', '--enable_1st_conv_layer', dest='enable_1st_conv_layer', action="store_true", default=False,
                         help='enable 1st conv layer')
+
+    parser.add_argument('-fc', '--fc_int8', dest='fc_int8', action="store_true", default=False,
+                        help='enable int8 fc layer')
+
+    parser.add_argument('-uff', '--disable_force_fp32', dest='disable_force_fp32', action="store_true", default=False,
+                        help='to disable force fp32 output in conv/fc + fp32')
+
+    parser.add_argument('-ucac', '--disable_cac_unify', dest='disable_cac_unify', action="store_true", default=False,
+                        help='to disable scale unify in conv/fc + avg pooling + conv/fc')
+
     if len(sys.argv) == 1:
         parser.print_help()
         sys.exit(1)
@@ -126,6 +137,8 @@ def parse_args():
     if args.quantized_prototxt == None:
 	test_net(net, imdb, max_per_image=args.max_per_image, vis=args.vis)
     else:
+        if args.fc_int8:
+            calibrator.enable_fc_int8()
         (blobs, params, top_blobs_map, bottom_blobs_map, conv_top_blob_layer_map, conv_bottom_blob_layer_map, winograd_bottoms, winograd_convolutions) = sample_net(args.prototxt, net, imdb, args.sample_iters, args.quant_mode, args.enable_1st_conv_layer)
         
         (inputs_max, outputs_max, inputs_min) = sampling.calibrate_activations(blobs, conv_top_blob_layer_map, conv_bottom_blob_layer_map, winograd_bottoms, args.calibration_algos, "SINGLE", args.conv_algo)
@@ -137,3 +150,7 @@ def parse_args():
         with open(compile_net_path, "w") as f:
             f.write(compiled_net_str)
         calibrator.transform_convolutions(args.quantized_prototxt, compile_net_path, top_blobs_map, bottom_blobs_map, args.unsigned_range, args.concat_use_fp32, args.unify_concat_scales, args.conv_algo, args.enable_1st_conv_layer)
+        if not args.disable_force_fp32:
+            calibrator.force_fp32_opt(args.quantized_prototxt)
+        if not args.disable_cac_unify:
+            calibrator.cac_opt(args.quantized_prototxt)
diff --git a/scripts/calibrator.py b/scripts/calibrator.py
index 57c89a627..bbaa9718f 100644
--- a/scripts/calibrator.py
+++ b/scripts/calibrator.py
@@ -471,7 +471,11 @@ def force_fp32_opt(quantized_prototxt):
             if base_net.layer[index].top[0] in layer_bottom_name_map.keys():
                 bottom_layer_indexes=layer_bottom_name_map[base_net.layer[index].top[0]]
                 for bottom_layer_index in bottom_layer_indexes:
-                    if base_net.layer[bottom_layer_index].type in int8_layers:
+                    next_layer = base_net.layer[bottom_layer_index]
+                    if next_layer.top == next_layer.bottom and next_layer.type not in int8_layers:
+                        force_fp32 = True
+                        break
+                    if next_layer.type in int8_layers:
                         force_fp32 = False
             if force_fp32 or index == np.max(quantize_layers_indexes):
                 new_net_index=find_index_by_name(base_net.layer[index].name, layer_infos)
@@ -565,6 +569,14 @@ def cac_opt(quantized_prototxt):
          f.write(str(new_net))
     print('cac opt done')
 
+def enable_fc_int8():
+    local_q = quantize_layers + ["InnerProduct"]
+    local_i = int8_layers + ["InnerProduct"]
+    global quantize_layers
+    global int8_layers
+    quantize_layers = local_q
+    int8_layers = local_i
+
 
 if __name__ == '__main__':
     usage_string = 'Usage: 1.Build the caffe\n ' \
@@ -712,8 +724,7 @@ def cac_opt(quantized_prototxt):
         user_conv_algo = params.conv_algo 
 
     if params.fc_int8:
-        quantize_layers.append("InnerProduct")
-        int8_layers.append("InnerProduct")
+        enable_fc_int8()
 
     try:
         toleration = float(params.loss)
diff --git a/src/caffe/layers/mkldnn_inner_product_layer.cpp b/src/caffe/layers/mkldnn_inner_product_layer.cpp
index 651da5aa7..139ac4dc3 100644
--- a/src/caffe/layers/mkldnn_inner_product_layer.cpp
+++ b/src/caffe/layers/mkldnn_inner_product_layer.cpp
@@ -303,7 +303,7 @@ void MKLDNNInnerProductLayer<Dtype>::InitInnerProductFwd(const vector<Blob<Dtype
     std::vector<float> scale_top(1);
     scale_top[0] = 1.0f;
     if(this->need_quantize_) scale_top = this->scale_out_;
-    fwd_top_data.reset(new MKLDNNData<Dtype>(usr_top_data_memory_pd, prv_fwd_top_data_memory_pd, top[0], this));
+    fwd_top_data.reset(new MKLDNNData<Dtype>(usr_top_data_memory_pd, prv_fwd_top_data_memory_pd, top[0], this, scale_top));
     fwd_top_data    ->name = "fwd_top_data      @ " + this->layer_param_.name();
     fwd_top_data_memory = fwd_top_data->create_output_memory();
 
diff --git a/src/caffe/util/bbox_util.cpp b/src/caffe/util/bbox_util.cpp
index 886813d33..958232173 100644
--- a/src/caffe/util/bbox_util.cpp
+++ b/src/caffe/util/bbox_util.cpp
@@ -2267,22 +2267,12 @@ void GetMaxScoreIndex(const vector<float>& scores, const float threshold,
       const int top_k, vector<pair<float, int> >* score_index_vec) {
   // Generate index score pairs.
 #ifdef _OPENMP
-  #pragma omp parallel
-#endif
-  {
-    vector<pair<float, int> > prv;
-#ifdef _OPENMP
-    #pragma omp for nowait
+  #pragma omp parallel for
 #endif
-    for (int i = 0; i < scores.size(); ++i) {
-      if (scores[i] > threshold) {
-        prv.push_back(std::make_pair(scores[i], i));
-      }
+  for (int i = 0; i < scores.size(); ++i) {
+    if (scores[i] > threshold) {
+      score_index_vec->at(i) = std::make_pair(scores[i], i);
     }
-#ifdef _OPENMP
-    #pragma omp critical
-#endif
-    score_index_vec->insert(score_index_vec->end(), prv.begin(), prv.end());
   }
 
   // Sort the score pair according to the scores in descending order
@@ -2442,7 +2432,7 @@ void ApplyNMSFast(const vector<NormalizedBBox>& bboxes,
   CHECK_EQ(bboxes.size(), scores.size())
       << "bboxes and scores have different size.";
   // Get top_k scores (with corresponding indices).
-  vector<pair<float, int> > score_index_vec;
+  vector<pair<float, int> > score_index_vec(scores.size());
   GetMaxScoreIndex(scores, score_threshold, top_k, &score_index_vec);
   // Do nms.
   float adaptive_threshold = nms_threshold;