From 3f494b442ee3f9d17a07b09ecbd5fa2bbda00836 Mon Sep 17 00:00:00 2001 From: Daisy Deng Date: Wed, 22 May 2019 21:38:33 +0800 Subject: [PATCH] Revert fix the bug of SSD NMS Noise raised by community into master_clean --- examples/faster-rcnn/tools/test_net.py | 17 ++++++++++++++++ examples/rfcn/tools/test_net.py | 17 ++++++++++++++++ scripts/calibrator.py | 17 +++++++++++++--- .../layers/mkldnn_inner_product_layer.cpp | 2 +- src/caffe/util/bbox_util.cpp | 20 +++++-------------- 5 files changed, 54 insertions(+), 19 deletions(-) diff --git a/examples/faster-rcnn/tools/test_net.py b/examples/faster-rcnn/tools/test_net.py index 1cb3cf3c0..6bdaa4382 100755 --- a/examples/faster-rcnn/tools/test_net.py +++ b/examples/faster-rcnn/tools/test_net.py @@ -77,8 +77,19 @@ def parse_args(): parser.add_argument('-wi', '--conv_algo', dest='conv_algo', action="store_true", default=False, help='to choose the convolution algorithm') + parser.add_argument('-1st', '--enable_1st_conv_layer', dest='enable_1st_conv_layer', action="store_true", default=False, help='enable 1st conv layer') + + parser.add_argument('-fc', '--fc_int8', dest='fc_int8', action="store_true", default=False, + help='enable int8 fc layer') + + parser.add_argument('-uff', '--disable_force_fp32', dest='disable_force_fp32', action="store_true", default=False, + help='to disable force fp32 output in conv/fc + fp32') + + parser.add_argument('-ucac', '--disable_cac_unify', dest='disable_cac_unify', action="store_true", default=False, + help='to disable scale unify in conv/fc + avg pooling + conv/fc') + if len(sys.argv) == 1: parser.print_help() sys.exit(1) @@ -119,6 +130,8 @@ def parse_args(): if args.quantized_prototxt == None: test_net(net, imdb, max_per_image=args.max_per_image, vis=args.vis) else: + if args.fc_int8: + calibrator.enable_fc_int8() (blobs, params, top_blobs_map, bottom_blobs_map, conv_top_blob_layer_map, conv_bottom_blob_layer_map, winograd_bottoms, winograd_convolutions) = sample_net(args.prototxt, net, imdb, args.sample_iters, args.quant_mode, args.enable_1st_conv_layer) (inputs_max, outputs_max, inputs_min) = sampling.calibrate_activations(blobs, conv_top_blob_layer_map, conv_bottom_blob_layer_map, winograd_bottoms, args.calibration_algos, "SINGLE", args.conv_algo) @@ -130,3 +143,7 @@ def parse_args(): with open(compile_net_path, "w") as f: f.write(compiled_net_str) calibrator.transform_convolutions(args.quantized_prototxt, compile_net_path, top_blobs_map, bottom_blobs_map, args.unsigned_range, args.concat_use_fp32, args.unify_concat_scales, args.conv_algo, args.enable_1st_conv_layer) + if not args.disable_force_fp32: + calibrator.force_fp32_opt(args.quantized_prototxt) + if not args.disable_cac_unify: + calibrator.cac_opt(args.quantized_prototxt) diff --git a/examples/rfcn/tools/test_net.py b/examples/rfcn/tools/test_net.py index d87a7a327..38230a4c3 100755 --- a/examples/rfcn/tools/test_net.py +++ b/examples/rfcn/tools/test_net.py @@ -82,8 +82,19 @@ def parse_args(): parser.add_argument('-wi', '--conv_algo', dest='conv_algo', action="store_true", default=False, help='to choose the convolution algorithm') + parser.add_argument('-1st', '--enable_1st_conv_layer', dest='enable_1st_conv_layer', action="store_true", default=False, help='enable 1st conv layer') + + parser.add_argument('-fc', '--fc_int8', dest='fc_int8', action="store_true", default=False, + help='enable int8 fc layer') + + parser.add_argument('-uff', '--disable_force_fp32', dest='disable_force_fp32', action="store_true", default=False, + help='to disable force fp32 output in conv/fc + fp32') + + parser.add_argument('-ucac', '--disable_cac_unify', dest='disable_cac_unify', action="store_true", default=False, + help='to disable scale unify in conv/fc + avg pooling + conv/fc') + if len(sys.argv) == 1: parser.print_help() sys.exit(1) @@ -126,6 +137,8 @@ def parse_args(): if args.quantized_prototxt == None: test_net(net, imdb, max_per_image=args.max_per_image, vis=args.vis) else: + if args.fc_int8: + calibrator.enable_fc_int8() (blobs, params, top_blobs_map, bottom_blobs_map, conv_top_blob_layer_map, conv_bottom_blob_layer_map, winograd_bottoms, winograd_convolutions) = sample_net(args.prototxt, net, imdb, args.sample_iters, args.quant_mode, args.enable_1st_conv_layer) (inputs_max, outputs_max, inputs_min) = sampling.calibrate_activations(blobs, conv_top_blob_layer_map, conv_bottom_blob_layer_map, winograd_bottoms, args.calibration_algos, "SINGLE", args.conv_algo) @@ -137,3 +150,7 @@ def parse_args(): with open(compile_net_path, "w") as f: f.write(compiled_net_str) calibrator.transform_convolutions(args.quantized_prototxt, compile_net_path, top_blobs_map, bottom_blobs_map, args.unsigned_range, args.concat_use_fp32, args.unify_concat_scales, args.conv_algo, args.enable_1st_conv_layer) + if not args.disable_force_fp32: + calibrator.force_fp32_opt(args.quantized_prototxt) + if not args.disable_cac_unify: + calibrator.cac_opt(args.quantized_prototxt) diff --git a/scripts/calibrator.py b/scripts/calibrator.py index 57c89a627..bbaa9718f 100644 --- a/scripts/calibrator.py +++ b/scripts/calibrator.py @@ -471,7 +471,11 @@ def force_fp32_opt(quantized_prototxt): if base_net.layer[index].top[0] in layer_bottom_name_map.keys(): bottom_layer_indexes=layer_bottom_name_map[base_net.layer[index].top[0]] for bottom_layer_index in bottom_layer_indexes: - if base_net.layer[bottom_layer_index].type in int8_layers: + next_layer = base_net.layer[bottom_layer_index] + if next_layer.top == next_layer.bottom and next_layer.type not in int8_layers: + force_fp32 = True + break + if next_layer.type in int8_layers: force_fp32 = False if force_fp32 or index == np.max(quantize_layers_indexes): new_net_index=find_index_by_name(base_net.layer[index].name, layer_infos) @@ -565,6 +569,14 @@ def cac_opt(quantized_prototxt): f.write(str(new_net)) print('cac opt done') +def enable_fc_int8(): + local_q = quantize_layers + ["InnerProduct"] + local_i = int8_layers + ["InnerProduct"] + global quantize_layers + global int8_layers + quantize_layers = local_q + int8_layers = local_i + if __name__ == '__main__': usage_string = 'Usage: 1.Build the caffe\n ' \ @@ -712,8 +724,7 @@ def cac_opt(quantized_prototxt): user_conv_algo = params.conv_algo if params.fc_int8: - quantize_layers.append("InnerProduct") - int8_layers.append("InnerProduct") + enable_fc_int8() try: toleration = float(params.loss) diff --git a/src/caffe/layers/mkldnn_inner_product_layer.cpp b/src/caffe/layers/mkldnn_inner_product_layer.cpp index 651da5aa7..139ac4dc3 100644 --- a/src/caffe/layers/mkldnn_inner_product_layer.cpp +++ b/src/caffe/layers/mkldnn_inner_product_layer.cpp @@ -303,7 +303,7 @@ void MKLDNNInnerProductLayer::InitInnerProductFwd(const vector scale_top(1); scale_top[0] = 1.0f; if(this->need_quantize_) scale_top = this->scale_out_; - fwd_top_data.reset(new MKLDNNData(usr_top_data_memory_pd, prv_fwd_top_data_memory_pd, top[0], this)); + fwd_top_data.reset(new MKLDNNData(usr_top_data_memory_pd, prv_fwd_top_data_memory_pd, top[0], this, scale_top)); fwd_top_data ->name = "fwd_top_data @ " + this->layer_param_.name(); fwd_top_data_memory = fwd_top_data->create_output_memory(); diff --git a/src/caffe/util/bbox_util.cpp b/src/caffe/util/bbox_util.cpp index 886813d33..958232173 100644 --- a/src/caffe/util/bbox_util.cpp +++ b/src/caffe/util/bbox_util.cpp @@ -2267,22 +2267,12 @@ void GetMaxScoreIndex(const vector& scores, const float threshold, const int top_k, vector >* score_index_vec) { // Generate index score pairs. #ifdef _OPENMP - #pragma omp parallel -#endif - { - vector > prv; -#ifdef _OPENMP - #pragma omp for nowait + #pragma omp parallel for #endif - for (int i = 0; i < scores.size(); ++i) { - if (scores[i] > threshold) { - prv.push_back(std::make_pair(scores[i], i)); - } + for (int i = 0; i < scores.size(); ++i) { + if (scores[i] > threshold) { + score_index_vec->at(i) = std::make_pair(scores[i], i); } -#ifdef _OPENMP - #pragma omp critical -#endif - score_index_vec->insert(score_index_vec->end(), prv.begin(), prv.end()); } // Sort the score pair according to the scores in descending order @@ -2442,7 +2432,7 @@ void ApplyNMSFast(const vector& bboxes, CHECK_EQ(bboxes.size(), scores.size()) << "bboxes and scores have different size."; // Get top_k scores (with corresponding indices). - vector > score_index_vec; + vector > score_index_vec(scores.size()); GetMaxScoreIndex(scores, score_threshold, top_k, &score_index_vec); // Do nms. float adaptive_threshold = nms_threshold;