Merge remote-tracking branch 'internal/release_1.0.2a'

intel · Aug 4, 2017 · 8012927 · 8012927
2 parents 3bbc9f1 + 0da457c
commit 8012927
Show file tree

Hide file tree

Showing 26 changed files with 3,790 additions and 402 deletions.
diff --git a/CMakeLists.txt b/CMakeLists.txt
@@ -29,7 +29,7 @@ include(cmake/ConfigGen.cmake)
 caffe_option(CPU_ONLY  "Build Caffe without CUDA support" OFF) # TODO: rename to USE_CUDA
 caffe_option(USE_OPENMP "Build Caffe with OpenMP support" ON )
 caffe_option(USE_CUDNN "Build Caffe with cuDNN library support" ON IF NOT CPU_ONLY)
-caffe_option(USE_MKL2017_AS_DEFAULT_ENGINE "Use MKL2017 primitives for supported layers" OFF)
+caffe_option(USE_MKL2017_AS_DEFAULT_ENGINE "Use MKL2017 primitives for supported layers" ON)
 caffe_option(USE_MKLDNN_AS_DEFAULT_ENGINE "Use MKL-DNN primitives for supported layers" OFF)
 caffe_option(BUILD_SHARED_LIBS "Build shared libraries" ON)
 caffe_option(BUILD_python "Build Python wrapper" ON)

diff --git a/Makefile.config.example b/Makefile.config.example
@@ -43,8 +43,8 @@
 # CPU-only switch (uncomment to build without GPU support).
 CPU_ONLY := 1
 
-# USE_MKL2017_AS_DEFAULT_ENGINE flag is OBSOLETE
-# Put this at the top your train_val.protoxt or solver.prototxt file:
+USE_MKL2017_AS_DEFAULT_ENGINE := 1
+# or put this at the top your train_val.protoxt or solver.prototxt file:
 # engine: "MKL2017" 
 # or use this option with caffe tool:
 # -engine "MKL2017"

diff --git a/docs/release_notes.md b/docs/release_notes.md
@@ -126,8 +126,6 @@ This Caffe version is seflcontained. This means that newest version of Intel MKL
 * Set layer engine to `MKL2017` in prototxt file (model). Only this specific layer will be accelerated with new primitives.
 * Use -engine = MKL2017 in command line as an option during execution of caffe (training, scoring, benchmark)
 
-Comment: there is obsolete method to compale with `USE_MKL2017_AS_DEFAULT_ENGINE := 1` in `Makefile.config`. This is obsolete solution - not recommended to use anymore.
-
 ### Building for GPU
 Caffe requires the CUDA `nvcc` compiler to compile its GPU code and CUDA driver for GPU operation.
 To install CUDA, go to the [NVIDIA CUDA website](https://developer.nvidia.com/cuda-downloads) and follow installation instructions there. Install the library and the latest standalone driver separately; the driver bundled with the library is usually out-of-date. **Warning!** The 331.* CUDA driver series has a critical performance issue: do not use it.

diff --git a/examples/pycaffe/tune_engine.py b/examples/pycaffe/tune_engine.py
@@ -1,120 +1,9 @@
 import os
 import sys
-import copy
 import argparse
-
 from caffe.proto import caffe_pb2
 import google.protobuf.text_format as txtf
-
-def readFile(filePath):
-    lines = []
-    file = open(filePath, 'r')
-    for line in file.readlines():
-        lines.append(line)
-    file.close()
-
-    return lines
-
-def writeFile(filePath, lines):
-    file = open(filePath, 'w+')
-    file.write(lines)
-    file.close()
-
-def parseLog(log):
-    lines = readFile(log)
-    model_start = False
-    time_start = False
-    model_lines = []
-    time_lines = []
-    for line in lines:
-        trim_line = line.strip()
-        if trim_line.endswith("Initializing net from parameters:"):
-            model_start = True
-            continue
-        if model_start:
-            if trim_line.find("Creating layer") <> -1:
-                model_start = False
-                continue
-            model_lines.append(line)
-
-        if trim_line.endswith("Average time per layer:"):
-            time_start = True
-            continue
-        if time_start:
-            if trim_line.find("Average Forward pass") <> -1:
-                time_start = False
-                break
-            time_lines.append(line)
-
-    model_lines = model_lines[1:]
-    model_str = ""
-    for line in model_lines:
-        model_str = model_str + line
-
-    return (model_str, time_lines)
-
-def parseTimeLines(timeLines):
-    layer_map = {}
-    for line in timeLines:
-        trim_line = line.strip()
-        items = trim_line.split("\t")
-        layer_items = items[0].split(" ")
-        layer_name = layer_items[-1]
-        time_items = items[1].split(" ")
-        if layer_name not in layer_map.keys():
-            layer_map[layer_name] = (float)(time_items[1])
-        else:
-            layer_map[layer_name] = layer_map[layer_name] + (float)(time_items[1])
-
-    return layer_map
-
-def parseModelStr(modelStr):
-    net = caffe_pb2.NetParameter()
-    txtf.Merge(modelStr, net)
-    layer_model_map = {}
-    global_engine = "CAFFE"
-    if net.engine != "":
-        global_engine = net.engine
-    for index in range(0, len(net.layer)):
-        engine = global_engine
-        l = net.layer[index]
-        if l.engine != "":
-            engine = l.engine
-        param_engine = -1
-        if l.type == "Convolution" or l.type == "Deconvolution":
-            if l.convolution_param.engine != "":
-                param_engine = l.convolution_param.engine
-        elif l.type == "BatchNorm":
-            if l.batch_norm_param.engine != "":
-                param_engine = l.batch_norm_param.engine
-        elif l.type == "Concat":
-            if l.concat_param.engine != "":
-                param_engine = l.concat_param.engine
-        elif l.type == "Eltwise":
-            if l.eltwise_param.engine != "":
-                param_engine = l.eltwise_param.engine
-        elif l.type == "InnerProduct":
-            if l.inner_product_param.engine != "":
-                param_engine = l.inner_product_param.engine
-        elif l.type == "LRN":
-            if l.lrn_param.engine != "":
-                param_engine = l.lrn_param.engine
-        elif l.type == "Pooling":
-            if l.pooling_param.engine != "":
-                param_engine = l.pooling_param.engine
-        elif l.type == "ReLU":
-            if l.relu_param.engine != "":
-                param_engine = l.relu_param.engine
-
-        if param_engine == 0 or param_engine == 1:
-            engine = "CAFFE"
-        elif param_engine == 3:
-            engine = "MKL2017"
-        elif param_engine == 4:
-            engine = "MKLDNN"
-        layer_model_map[l.name] = (index, engine, l)
-
-    return (net, layer_model_map)
+import utils
 
 def selectOptimalEngine(layers):
     optimal_layer = None
@@ -140,9 +29,9 @@ def tuneEngine(logs, model):
     net = None
     for log in logs:
         log_name = os.path.basename(log)
-        (model_str, time_lines) = parseLog(log)
-        (net, layer_model_map) = parseModelStr(model_str)
-        layer_time_map = parseTimeLines(time_lines)
+        (model_str, time_lines) = utils.parseLog(log)
+        (net, layer_model_map) = utils.parseModelStr(model_str)
+        layer_time_map = utils.parseTimeLines(time_lines)
         for k, v in layer_model_map.items():
             if k not in layer_map.keys():
                 layer_map[k] = [(v[0], v[1], layer_time_map[k], v[2])]
@@ -187,4 +76,8 @@ def genModel(net, model, optimal_layer_map):
     parser.add_argument('-v', '--version', action='version', version='%(prog)s 1.0')
 
     params = parser.parse_args()
+    if params.output == "":
+        print "Please specify the output for tuned model with -o"
+        sys.exit(1)
+
     tuneEngine(params.logs, params.output)
diff --git a/examples/pycaffe/tune_model.py b/examples/pycaffe/tune_model.py
@@ -1,92 +1,88 @@
 import os
-import datetime
-import copy
+import sys
 import argparse
-
 from caffe.proto import caffe_pb2
 import google.protobuf.text_format as txtf
-import caffe
-
-def isWinogradApplicable(ic, oc, stride, kernel_size):
-    if ic % 16 != 0:
-        return False
-    if oc % 16 != 0:
-        return False
-    if stride != 1:
-        return False
-    if kernel_size != 3:
-        return False
-
-    return True
-
-def genHybridModel(net, winogradLayers, modelName):
-    newNet = copy.deepcopy(net)
-    newNetName = modelName.split(".")[0] + "_hybrid.prototxt"
-    for layer in winogradLayers:
-        newNet.layer[layer].convolution_param.conv_algorithm = "winograd"
-    with open(newNetName, 'w') as f:
-       f.write(str(newNet))
-       print "[INFO] Complete model tuning with Winograd:", newNetName
-
-def tuneModelDefinition(model):
-    net = caffe_pb2.NetParameter()
-    with open(model) as f:
-        s = f.read()
-        txtf.Merge(s, net)
+import copy
+import utils
 
-    net.name = 'Tuned model of ' + net.name
-    output_layer_map = {} 
+def genOptimalModel(net, mkldnn_direct_time_map, mkldnn_winograd_time_map, optimal_model):
     for index in range(0, len(net.layer)):
         l = net.layer[index]
-        if l.type == ("Convolution"):
-            stride = 0
-            kernel_size = 0
-            if len(l.convolution_param.stride) == 0:
-                stride = 1
+        if l.type == "Convolution":
+            if mkldnn_winograd_time_map[l.name] < mkldnn_direct_time_map[l.name]:
+                l.convolution_param.conv_algorithm = "winograd"
             else:
-                stride = l.convolution_param.stride[0]
-            kernel_size = l.convolution_param.kernel_size[0]
-            ic = 0
-            if l.bottom[0] in output_layer_map.keys():
-                ic = output_layer_map[l.bottom[0]][4]
-            oc = l.convolution_param.num_output
-            output_layer_map[l.name] = (index, stride, kernel_size, ic, oc, True)
-        elif l.type == ("InnerProduct"):
-            oc = l.inner_product_param.num_output
-            ic = 0
-            if l.bottom[0] in output_layer_map.keys():
-                ic = output_layer_map[l.bottom[0]][4]
-            output_layer_map[l.name] = (index, 0, 0, ic, oc, False)
-        elif l.type.endswith("Data") or l.type.endswith("Input"):
-            # TODO: correct the output
-            #    dynamic_net = caffe.Net(model, caffe.TEST)
-            #    for k, v in dynamic_net.blobs.items():
-            #        dynamic_net_map[k] = v.data.shape
-            ic = oc = 3
-            output_layer_map[l.name] = (index, 0, 0, ic, oc, False)
-        else:
-            ic = 0
-            if l.bottom[0] in output_layer_map.keys():
-                ic = output_layer_map[l.bottom[0]][4]
-            oc = ic
-            output_layer_map[l.name] = (index, 0, 0, ic, oc, False)
-
-    winograd_convolutions = []
-    for k,v in output_layer_map.items():
-        if v[5] and isWinogradApplicable(v[3], v[4], v[1], v[2]):
-            winograd_convolutions.append(v[0])
-
-    if len(winograd_convolutions) > 0:
-        genHybridModel(net, winograd_convolutions, model)
-    else:
-        print "[INFO] No need to tune model with Winograd:", model
-
+                l.convolution_param.conv_algorithm = "direct"
+
+    with open(optimal_model, "w") as f:
+        f.write(txtf.MessageToString(net, float_format=".17g"))
+
+def tuneModelDefinition(model_path, iteration):
+    working_dir = sys.path[0]
+    caffe_path = os.path.join(working_dir, "..", "..", "build", "tools", "caffe")
+    if not os.path.exists(caffe_path):
+        print "Caffe binary does not exist; please build Caffe binary first."
+        sys,exit(1)
+
+    base_model_name = os.path.basename(model_path)
+    model_dir = os.path.dirname(model_path)
+    winograd_model_name = base_model_name.split(".")[0] + "_winograd.prototxt"
+    winograd_model_path = os.path.join(model_dir, winograd_model_name)
+    direct_model_name = base_model_name.split(".")[0] + "_direct.prototxt"
+    direct_model_path = os.path.join(model_dir, direct_model_name)
+
+    base_net = caffe_pb2.NetParameter()
+    with open(model_path) as f:
+        s = f.read()
+        txtf.Merge(s, base_net)
+
+    direct_net = copy.deepcopy(base_net)
+    for index in range(0, len(direct_net.layer)):
+        l = direct_net.layer[index]
+        if l.type == "Convolution":
+            l.convolution_param.conv_algorithm = "direct"
+
+    with open(direct_model_path, "w") as f:
+        f.write(txtf.MessageToString(direct_net, float_format=".17g"))
+
+    winograd_net = copy.deepcopy(base_net)
+    for index in range(0, len(winograd_net.layer)):
+        l = winograd_net.layer[index]
+        if l.type == "Convolution":
+            l.convolution_param.conv_algorithm = "winograd"
+
+    with open(winograd_model_path, "w") as f:
+        f.write(txtf.MessageToString(winograd_net, float_format=".17g"))
+
+    mkldnn_direct_log = "mkldnn_direct.log"
+    mkldnn_winograd_log = "mkldnn_winograd.log"
+    mkldnn_direct_log_path = os.path.join(model_dir, mkldnn_direct_log)
+    mkldnn_winograd_log_path = os.path.join(model_dir, mkldnn_winograd_log)
+
+    mkldnn_direct_command = caffe_path + " time -model " + direct_model_path + " -engine MKLDNN -iterations " + str(iteration) + " >& " + mkldnn_direct_log_path
+    os.system(mkldnn_direct_command)
+    mkldnn_winograd_command = caffe_path + " time -model " + winograd_model_path + " -engine MKLDNN -iterations " + str(iteration) + " >& " + mkldnn_winograd_log_path
+    os.system(mkldnn_winograd_command)
+
+    (model_str, mkldnn_direct_time_lines) = utils.parseLog(mkldnn_direct_log_path)
+    mkldnn_direct_layer_time_map = utils.parseTimeLines(mkldnn_direct_time_lines)
+    (model_str, mkldnn_winograd_time_lines) = utils.parseLog(mkldnn_winograd_log_path)
+    mkldnn_winograd_layer_time_map = utils.parseTimeLines(mkldnn_winograd_time_lines)
+
+    hybrid_model_name = base_model_name.split(".")[0] + "_hybrid.prototxt"
+    hybrid_model_path = os.path.join(model_dir, hybrid_model_name)
+    genOptimalModel(base_net, mkldnn_direct_layer_time_map, mkldnn_winograd_layer_time_map, hybrid_model_path)
+
 if __name__ == '__main__':
     parser = argparse.ArgumentParser()
 
     parser.add_argument('-m', '--model', action='store', dest='model', default="",
                         help='require the model definition (prototxt)')
 
+    parser.add_argument('-i', '--iteration', action='store', dest='iterations', type=int, default=10,
+                        help='require iterations number to run the model')
+
     parser.add_argument('-v', '--version', action='version', version='%(prog)s 1.0')
 
     params = parser.parse_args()
@@ -96,4 +92,4 @@ def tuneModelDefinition(model):
         print "[ERROR] Please specify the model definition file with -m"
         exit(1)
 
-    tuneModelDefinition(model)
+    tuneModelDefinition(params.model, params.iterations)