From 65fe465bb7620f8fc47f0c5283ea3f2de7296d0f Mon Sep 17 00:00:00 2001 From: anleeos <2937160075@qq.com> Date: Thu, 30 Nov 2023 23:09:13 +0800 Subject: [PATCH 01/15] add files: depoly_adreno_tvmc.md, depoly_adreno.md --- .../deploy/deploy_models/10-depoly_adreno.md | 0 .../deploy_models/11-depoly_adreno_tvmc.md | 512 ++++++++++++++++++ 2 files changed, 512 insertions(+) create mode 100644 docs/how_to/deploy/deploy_models/10-depoly_adreno.md create mode 100644 docs/how_to/deploy/deploy_models/11-depoly_adreno_tvmc.md diff --git a/docs/how_to/deploy/deploy_models/10-depoly_adreno.md b/docs/how_to/deploy/deploy_models/10-depoly_adreno.md new file mode 100644 index 00000000..e69de29b diff --git a/docs/how_to/deploy/deploy_models/11-depoly_adreno_tvmc.md b/docs/how_to/deploy/deploy_models/11-depoly_adreno_tvmc.md new file mode 100644 index 00000000..af9de6f1 --- /dev/null +++ b/docs/how_to/deploy/deploy_models/11-depoly_adreno_tvmc.md @@ -0,0 +1,512 @@ +--- +title: 使用 tvmc 接口在 Adreno™ 上部署预训练模型 +--- + + +# 使用 tvmc 接口在 Adreno™ 上部署预训练模型 + +:::note +单击 [此处](https://tvm.apache.org/docs/how_to/deploy_models/deploy_model_on_adreno_tvmc.html#sphx-glr-download-how-to-deploy-models-deploy-model-on-adreno-tvmc-py) 下载完整的示例代码 +::: + +**作者**: Siva Rama Krishna + +本文是一篇关于在 Adreno™ 上部署预训练 Keras resnet50 模型的逐步教程。 + +此外,您应该已经为 Android 构建了 TVM 。请参阅以下说明,了解如何构建它并设置 RPC 环境。 + +[在 Adreno GPU 上部署](https://tvm.apache.org/docs/how_to/deploy/adreno.html) + +```python +import os +import tvm +import numpy as np +from tvm import relay +from tvm.driver import tvmc +from tvm.driver.tvmc.model import TVMCPackage +from tvm.contrib import utils +``` + +# 配置 +在编译以生成纹理之前指定 Adreno 目标以利用内核并获得所有纹理的好处。注意:此生成的示例在我们的 x86 服务器上运行以进行演示。如果在 Android 设备上运行它,我们需要指定其指令集。如果要在实际设备上通过 rpc 运行此教程,请将 `local_demo` 设置为 False。 + +```python +local_demo = True + +# 默认情况下,将在 CPU 目标上执行。 +# 选择 'llvm'、'opencl' 和 'opencl -device=adreno' +target = "llvm" + +# 更改目标配置。 +# 运行 `adb shell cat /proc/cpuinfo` 以查找架构。 +arch = "arm64" +target_host = "llvm -mtriple=%s-linux-android" % arch + +# 自动调整是计算和耗时的任务,因此默认情况下禁用。 +# 如果需要,请启用它。请启用它。 +is_tuning = False +tune_log = "adreno-resnet50.log" + +# 启用 OpenCLML 加速运算符库。 +enable_clml = False +cross_compiler = ( + os.getenv("ANDROID_NDK_HOME", "") + + "/toolchains/llvm/prebuilt/linux-x86_64/bin/aarch64-linux-android28-clang" +) +``` + +# 制作 Keras Resnet50 模型 +```python +from tensorflow.keras.applications.resnet50 import ResNet50 + +tmp_path = utils.tempdir() +model_file_name = tmp_path.relpath("resnet50.h5") + +model = ResNet50(include_top=True, weights="imagenet", input_shape=(224, 224, 3), classes=1000) +model.save(model_file_name) +``` + +Out: +```info +Downloading data from https://storage.googleapis.com/tensorflow/keras-applications/resnet/resnet50_weights_tf_dim_ordering_tf_kernels.h5 + + 8192/102967424 [..............................] - ETA: 0s + 7208960/102967424 [=>............................] - ETA: 0s + 8380416/102967424 [=>............................] - ETA: 1s + 16769024/102967424 [===>..........................] - ETA: 1s + 23412736/102967424 [=====>........................] - ETA: 1s + 25157632/102967424 [======>.......................] - ETA: 1s + 33546240/102967424 [========>.....................] - ETA: 1s + 40189952/102967424 [==========>...................] - ETA: 1s + 41934848/102967424 [===========>..................] - ETA: 1s + 50143232/102967424 [=============>................] - ETA: 1s + 50323456/102967424 [=============>................] - ETA: 1s + 56967168/102967424 [===============>..............] - ETA: 1s + 58712064/102967424 [================>.............] - ETA: 1s + 65355776/102967424 [==================>...........] - ETA: 0s + 67100672/102967424 [==================>...........] - ETA: 0s + 69296128/102967424 [===================>..........] - ETA: 0s + 71540736/102967424 [===================>..........] - ETA: 0s + 73269248/102967424 [====================>.........] - ETA: 0s + 75489280/102967424 [====================>.........] - ETA: 0s + 83877888/102967424 [=======================>......] - ETA: 0s + 90521600/102967424 [=========================>....] - ETA: 0s + 92266496/102967424 [=========================>....] - ETA: 0s + 99598336/102967424 [============================>.] - ETA: 0s +100646912/102967424 [============================>.] - ETA: 0s +102850560/102967424 [============================>.] - ETA: 0s +102967424/102967424 [==============================] - 3s 0us/step +``` + +# 加载模型 +将模型从任何框架转换为 tvm relay 模块。tvmc.load 支持来自任何框架的模型(例如 tensorflow saves_model、onnx、tflite 等),并自动检测文件类型。 +```python +tvmc_model = tvmc.load(model_file_name) + +print(tvmc_model.mod) + + +# tvmc_model 包含 tvmc_mode.mod,即 relay 模块和 tvmc_model.params,即模块的参数。 +``` + +Out: + +```python +def @main(%input_2: Tensor[(1, 224, 224, 3), float32], %v_param_1: Tensor[(7, 7, 3, 64), float32], %v_param_2: Tensor[(64), float32], %v_param_3: Tensor[(64), float32], %v_param_4: Tensor[(64), float32], %v_param_5: Tensor[(64), float32], %v_param_6: Tensor[(64), float32], %v_param_19: Tensor[(1, 1, 64, 256), float32], %v_param_20: Tensor[(256), float32], %v_param_23: Tensor[(256), float32], %v_param_24: Tensor[(256), float32], %v_param_25: Tensor[(256), float32], %v_param_26: Tensor[(256), float32], %v_param_7: Tensor[(1, 1, 64, 64), float32], %v_param_8: Tensor[(64), float32], %v_param_9: Tensor[(64), float32], %v_param_10: Tensor[(64), float32], %v_param_11: Tensor[(64), float32], %v_param_12: Tensor[(64), float32], %v_param_13: Tensor[(3, 3, 64, 64), float32], %v_param_14: Tensor[(64), float32], %v_param_15: Tensor[(64), float32], %v_param_16: Tensor[(64), float32], %v_param_17: Tensor[(64), float32], %v_param_18: Tensor[(64), float32], %v_param_21: Tensor[(1, 1, 64, 256), float32], %v_param_22: Tensor[(256), float32], %v_param_27: Tensor[(256), float32], %v_param_28: Tensor[(256), float32], %v_param_29: Tensor[(256), float32], %v_param_30: Tensor[(256), float32], %v_param_31: Tensor[(1, 1, 256, 64), float32], %v_param_32: Tensor[(64), float32], %v_param_33: Tensor[(64), float32], %v_param_34: Tensor[(64), float32], %v_param_35: Tensor[(64), float32], %v_param_36: Tensor[(64), float32], %v_param_37: Tensor[(3, 3, 64, 64), float32], %v_param_38: Tensor[(64), float32], %v_param_39: Tensor[(64), float32], %v_param_40: Tensor[(64), float32], %v_param_41: Tensor[(64), float32], %v_param_42: Tensor[(64), float32], %v_param_43: Tensor[(1, 1, 64, 256), float32], %v_param_44: Tensor[(256), float32], %v_param_45: Tensor[(256), float32], %v_param_46: Tensor[(256), float32], %v_param_47: Tensor[(256), float32], %v_param_48: Tensor[(256), float32], %v_param_49: Tensor[(1, 1, 256, 64), float32], %v_param_50: Tensor[(64), float32], %v_param_51: Tensor[(64), float32], %v_param_52: Tensor[(64), float32], %v_param_53: Tensor[(64), float32], %v_param_54: Tensor[(64), float32], %v_param_55: Tensor[(3, 3, 64, 64), float32], %v_param_56: Tensor[(64), float32], %v_param_57: Tensor[(64), float32], %v_param_58: Tensor[(64), float32], %v_param_59: Tensor[(64), float32], %v_param_60: Tensor[(64), float32], %v_param_61: Tensor[(1, 1, 64, 256), float32], %v_param_62: Tensor[(256), float32], %v_param_63: Tensor[(256), float32], %v_param_64: Tensor[(256), float32], %v_param_65: Tensor[(256), float32], %v_param_66: Tensor[(256), float32], %v_param_79: Tensor[(1, 1, 256, 512), float32], %v_param_80: Tensor[(512), float32], %v_param_83: Tensor[(512), float32], %v_param_84: Tensor[(512), float32], %v_param_85: Tensor[(512), float32], %v_param_86: Tensor[(512), float32], %v_param_67: Tensor[(1, 1, 256, 128), float32], %v_param_68: Tensor[(128), float32], %v_param_69: Tensor[(128), float32], %v_param_70: Tensor[(128), float32], %v_param_71: Tensor[(128), float32], %v_param_72: Tensor[(128), float32], %v_param_73: Tensor[(3, 3, 128, 128), float32], %v_param_74: Tensor[(128), float32], %v_param_75: Tensor[(128), float32], %v_param_76: Tensor[(128), float32], %v_param_77: Tensor[(128), float32], %v_param_78: Tensor[(128), float32], %v_param_81: Tensor[(1, 1, 128, 512), float32], %v_param_82: Tensor[(512), float32], %v_param_87: Tensor[(512), float32], %v_param_88: Tensor[(512), float32], %v_param_89: Tensor[(512), float32], %v_param_90: Tensor[(512), float32], %v_param_91: Tensor[(1, 1, 512, 128), float32], %v_param_92: Tensor[(128), float32], %v_param_93: Tensor[(128), float32], %v_param_94: Tensor[(128), float32], %v_param_95: Tensor[(128), float32], %v_param_96: Tensor[(128), float32], %v_param_97: Tensor[(3, 3, 128, 128), float32], %v_param_98: Tensor[(128), float32], %v_param_99: Tensor[(128), float32], %v_param_100: Tensor[(128), float32], %v_param_101: Tensor[(128), float32], %v_param_102: Tensor[(128), float32], %v_param_103: Tensor[(1, 1, 128, 512), float32], %v_param_104: Tensor[(512), float32], %v_param_105: Tensor[(512), float32], %v_param_106: Tensor[(512), float32], %v_param_107: Tensor[(512), float32], %v_param_108: Tensor[(512), float32], %v_param_109: Tensor[(1, 1, 512, 128), float32], %v_param_110: Tensor[(128), float32], %v_param_111: Tensor[(128), float32], %v_param_112: Tensor[(128), float32], %v_param_113: Tensor[(128), float32], %v_param_114: Tensor[(128), float32], %v_param_115: Tensor[(3, 3, 128, 128), float32], %v_param_116: Tensor[(128), float32], %v_param_117: Tensor[(128), float32], %v_param_118: Tensor[(128), float32], %v_param_119: Tensor[(128), float32], %v_param_120: Tensor[(128), float32], %v_param_121: Tensor[(1, 1, 128, 512), float32], %v_param_122: Tensor[(512), float32], %v_param_123: Tensor[(512), float32], %v_param_124: Tensor[(512), float32], %v_param_125: Tensor[(512), float32], %v_param_126: Tensor[(512), float32], %v_param_127: Tensor[(1, 1, 512, 128), float32], %v_param_128: Tensor[(128), float32], %v_param_129: Tensor[(128), float32], %v_param_130: Tensor[(128), float32], %v_param_131: Tensor[(128), float32], %v_param_132: Tensor[(128), float32], %v_param_133: Tensor[(3, 3, 128, 128), float32], %v_param_134: Tensor[(128), float32], %v_param_135: Tensor[(128), float32], %v_param_136: Tensor[(128), float32], %v_param_137: Tensor[(128), float32], %v_param_138: Tensor[(128), float32], %v_param_139: Tensor[(1, 1, 128, 512), float32], %v_param_140: Tensor[(512), float32], %v_param_141: Tensor[(512), float32], %v_param_142: Tensor[(512), float32], %v_param_143: Tensor[(512), float32], %v_param_144: Tensor[(512), float32], %v_param_157: Tensor[(1, 1, 512, 1024), float32], %v_param_158: Tensor[(1024), float32], %v_param_161: Tensor[(1024), float32], %v_param_162: Tensor[(1024), float32], %v_param_163: Tensor[(1024), float32], %v_param_164: Tensor[(1024), float32], %v_param_145: Tensor[(1, 1, 512, 256), float32], %v_param_146: Tensor[(256), float32], %v_param_147: Tensor[(256), float32], %v_param_148: Tensor[(256), float32], %v_param_149: Tensor[(256), float32], %v_param_150: Tensor[(256), float32], %v_param_151: Tensor[(3, 3, 256, 256), float32], %v_param_152: Tensor[(256), float32], %v_param_153: Tensor[(256), float32], %v_param_154: Tensor[(256), float32], %v_param_155: Tensor[(256), float32], %v_param_156: Tensor[(256), float32], %v_param_159: Tensor[(1, 1, 256, 1024), float32], %v_param_160: Tensor[(1024), float32], %v_param_165: Tensor[(1024), float32], %v_param_166: Tensor[(1024), float32], %v_param_167: Tensor[(1024), float32], %v_param_168: Tensor[(1024), float32], %v_param_169: Tensor[(1, 1, 1024, 256), float32], %v_param_170: Tensor[(256), float32], %v_param_171: Tensor[(256), float32], %v_param_172: Tensor[(256), float32], %v_param_173: Tensor[(256), float32], %v_param_174: Tensor[(256), float32], %v_param_175: Tensor[(3, 3, 256, 256), float32], %v_param_176: Tensor[(256), float32], %v_param_177: Tensor[(256), float32], %v_param_178: Tensor[(256), float32], %v_param_179: Tensor[(256), float32], %v_param_180: Tensor[(256), float32], %v_param_181: Tensor[(1, 1, 256, 1024), float32], %v_param_182: Tensor[(1024), float32], %v_param_183: Tensor[(1024), float32], %v_param_184: Tensor[(1024), float32], %v_param_185: Tensor[(1024), float32], %v_param_186: Tensor[(1024), float32], %v_param_187: Tensor[(1, 1, 1024, 256), float32], %v_param_188: Tensor[(256), float32], %v_param_189: Tensor[(256), float32], %v_param_190: Tensor[(256), float32], %v_param_191: Tensor[(256), float32], %v_param_192: Tensor[(256), float32], %v_param_193: Tensor[(3, 3, 256, 256), float32], %v_param_194: Tensor[(256), float32], %v_param_195: Tensor[(256), float32], %v_param_196: Tensor[(256), float32], %v_param_197: Tensor[(256), float32], %v_param_198: Tensor[(256), float32], %v_param_199: Tensor[(1, 1, 256, 1024), float32], %v_param_200: Tensor[(1024), float32], %v_param_201: Tensor[(1024), float32], %v_param_202: Tensor[(1024), float32], %v_param_203: Tensor[(1024), float32], %v_param_204: Tensor[(1024), float32], %v_param_205: Tensor[(1, 1, 1024, 256), float32], %v_param_206: Tensor[(256), float32], %v_param_207: Tensor[(256), float32], %v_param_208: Tensor[(256), float32], %v_param_209: Tensor[(256), float32], %v_param_210: Tensor[(256), float32], %v_param_211: Tensor[(3, 3, 256, 256), float32], %v_param_212: Tensor[(256), float32], %v_param_213: Tensor[(256), float32], %v_param_214: Tensor[(256), float32], %v_param_215: Tensor[(256), float32], %v_param_216: Tensor[(256), float32], %v_param_217: Tensor[(1, 1, 256, 1024), float32], %v_param_218: Tensor[(1024), float32], %v_param_219: Tensor[(1024), float32], %v_param_220: Tensor[(1024), float32], %v_param_221: Tensor[(1024), float32], %v_param_222: Tensor[(1024), float32], %v_param_223: Tensor[(1, 1, 1024, 256), float32], %v_param_224: Tensor[(256), float32], %v_param_225: Tensor[(256), float32], %v_param_226: Tensor[(256), float32], %v_param_227: Tensor[(256), float32], %v_param_228: Tensor[(256), float32], %v_param_229: Tensor[(3, 3, 256, 256), float32], %v_param_230: Tensor[(256), float32], %v_param_231: Tensor[(256), float32], %v_param_232: Tensor[(256), float32], %v_param_233: Tensor[(256), float32], %v_param_234: Tensor[(256), float32], %v_param_235: Tensor[(1, 1, 256, 1024), float32], %v_param_236: Tensor[(1024), float32], %v_param_237: Tensor[(1024), float32], %v_param_238: Tensor[(1024), float32], %v_param_239: Tensor[(1024), float32], %v_param_240: Tensor[(1024), float32], %v_param_241: Tensor[(1, 1, 1024, 256), float32], %v_param_242: Tensor[(256), float32], %v_param_243: Tensor[(256), float32], %v_param_244: Tensor[(256), float32], %v_param_245: Tensor[(256), float32], %v_param_246: Tensor[(256), float32], %v_param_247: Tensor[(3, 3, 256, 256), float32], %v_param_248: Tensor[(256), float32], %v_param_249: Tensor[(256), float32], %v_param_250: Tensor[(256), float32], %v_param_251: Tensor[(256), float32], %v_param_252: Tensor[(256), float32], %v_param_253: Tensor[(1, 1, 256, 1024), float32], %v_param_254: Tensor[(1024), float32], %v_param_255: Tensor[(1024), float32], %v_param_256: Tensor[(1024), float32], %v_param_257: Tensor[(1024), float32], %v_param_258: Tensor[(1024), float32], %v_param_271: Tensor[(1, 1, 1024, 2048), float32], %v_param_272: Tensor[(2048), float32], %v_param_275: Tensor[(2048), float32], %v_param_276: Tensor[(2048), float32], %v_param_277: Tensor[(2048), float32], %v_param_278: Tensor[(2048), float32], %v_param_259: Tensor[(1, 1, 1024, 512), float32], %v_param_260: Tensor[(512), float32], %v_param_261: Tensor[(512), float32], %v_param_262: Tensor[(512), float32], %v_param_263: Tensor[(512), float32], %v_param_264: Tensor[(512), float32], %v_param_265: Tensor[(3, 3, 512, 512), float32], %v_param_266: Tensor[(512), float32], %v_param_267: Tensor[(512), float32], %v_param_268: Tensor[(512), float32], %v_param_269: Tensor[(512), float32], %v_param_270: Tensor[(512), float32], %v_param_273: Tensor[(1, 1, 512, 2048), float32], %v_param_274: Tensor[(2048), float32], %v_param_279: Tensor[(2048), float32], %v_param_280: Tensor[(2048), float32], %v_param_281: Tensor[(2048), float32], %v_param_282: Tensor[(2048), float32], %v_param_283: Tensor[(1, 1, 2048, 512), float32], %v_param_284: Tensor[(512), float32], %v_param_285: Tensor[(512), float32], %v_param_286: Tensor[(512), float32], %v_param_287: Tensor[(512), float32], %v_param_288: Tensor[(512), float32], %v_param_289: Tensor[(3, 3, 512, 512), float32], %v_param_290: Tensor[(512), float32], %v_param_291: Tensor[(512), float32], %v_param_292: Tensor[(512), float32], %v_param_293: Tensor[(512), float32], %v_param_294: Tensor[(512), float32], %v_param_295: Tensor[(1, 1, 512, 2048), float32], %v_param_296: Tensor[(2048), float32], %v_param_297: Tensor[(2048), float32], %v_param_298: Tensor[(2048), float32], %v_param_299: Tensor[(2048), float32], %v_param_300: Tensor[(2048), float32], %v_param_301: Tensor[(1, 1, 2048, 512), float32], %v_param_302: Tensor[(512), float32], %v_param_303: Tensor[(512), float32], %v_param_304: Tensor[(512), float32], %v_param_305: Tensor[(512), float32], %v_param_306: Tensor[(512), float32], %v_param_307: Tensor[(3, 3, 512, 512), float32], %v_param_308: Tensor[(512), float32], %v_param_309: Tensor[(512), float32], %v_param_310: Tensor[(512), float32], %v_param_311: Tensor[(512), float32], %v_param_312: Tensor[(512), float32], %v_param_313: Tensor[(1, 1, 512, 2048), float32], %v_param_314: Tensor[(2048), float32], %v_param_315: Tensor[(2048), float32], %v_param_316: Tensor[(2048), float32], %v_param_317: Tensor[(2048), float32], %v_param_318: Tensor[(2048), float32], %v_param_319: Tensor[(1000, 2048), float32], %v_param_320: Tensor[(1000), float32]) { + %0 = nn.pad(%input_2, 0, pad_width=[[0, 0], [3, 3], [3, 3], [0, 0]]); + %1 = nn.conv2d(%0, %v_param_1, strides=[2, 2], padding=[0, 0, 0, 0], channels=64, kernel_size=[7, 7], data_layout="NHWC", kernel_layout="HWIO"); + %2 = nn.bias_add(%1, %v_param_2, axis=-1); + %3 = nn.batch_norm(%2, %v_param_3, %v_param_4, %v_param_5, %v_param_6, axis=3, epsilon=1.001e-05f); + %4 = %3.0; + %5 = nn.relu(%4); + %6 = nn.pad(%5, 0, pad_width=[[0, 0], [1, 1], [1, 1], [0, 0]]); + %7 = nn.max_pool2d(%6, pool_size=[3, 3], strides=[2, 2], padding=[0, 0, 0, 0], layout="NHWC"); + %8 = nn.conv2d(%7, %v_param_19, padding=[0, 0, 0, 0], channels=256, kernel_size=[1, 1], data_layout="NHWC", kernel_layout="HWIO"); + %9 = nn.bias_add(%8, %v_param_20, axis=-1); + %10 = nn.batch_norm(%9, %v_param_23, %v_param_24, %v_param_25, %v_param_26, axis=3, epsilon=1.001e-05f); + %11 = nn.conv2d(%7, %v_param_7, padding=[0, 0, 0, 0], channels=64, kernel_size=[1, 1], data_layout="NHWC", kernel_layout="HWIO"); + %12 = nn.bias_add(%11, %v_param_8, axis=-1); + %13 = nn.batch_norm(%12, %v_param_9, %v_param_10, %v_param_11, %v_param_12, axis=3, epsilon=1.001e-05f); + %14 = %13.0; + %15 = nn.relu(%14); + %16 = nn.conv2d(%15, %v_param_13, padding=[1i64, 1i64, 1i64, 1i64], channels=64, kernel_size=[3, 3], data_layout="NHWC", kernel_layout="HWIO"); + %17 = nn.bias_add(%16, %v_param_14, axis=-1); + %18 = nn.batch_norm(%17, %v_param_15, %v_param_16, %v_param_17, %v_param_18, axis=3, epsilon=1.001e-05f); + %19 = %18.0; + %20 = nn.relu(%19); + %21 = nn.conv2d(%20, %v_param_21, padding=[0, 0, 0, 0], channels=256, kernel_size=[1, 1], data_layout="NHWC", kernel_layout="HWIO"); + %22 = nn.bias_add(%21, %v_param_22, axis=-1); + %23 = nn.batch_norm(%22, %v_param_27, %v_param_28, %v_param_29, %v_param_30, axis=3, epsilon=1.001e-05f); + %24 = %10.0; + %25 = %23.0; + %26 = add(%24, %25); + %27 = nn.relu(%26); + %28 = nn.conv2d(%27, %v_param_31, padding=[0, 0, 0, 0], channels=64, kernel_size=[1, 1], data_layout="NHWC", kernel_layout="HWIO"); + %29 = nn.bias_add(%28, %v_param_32, axis=-1); + %30 = nn.batch_norm(%29, %v_param_33, %v_param_34, %v_param_35, %v_param_36, axis=3, epsilon=1.001e-05f); + %31 = %30.0; + %32 = nn.relu(%31); + %33 = nn.conv2d(%32, %v_param_37, padding=[1i64, 1i64, 1i64, 1i64], channels=64, kernel_size=[3, 3], data_layout="NHWC", kernel_layout="HWIO"); + %34 = nn.bias_add(%33, %v_param_38, axis=-1); + %35 = nn.batch_norm(%34, %v_param_39, %v_param_40, %v_param_41, %v_param_42, axis=3, epsilon=1.001e-05f); + %36 = %35.0; + %37 = nn.relu(%36); + %38 = nn.conv2d(%37, %v_param_43, padding=[0, 0, 0, 0], channels=256, kernel_size=[1, 1], data_layout="NHWC", kernel_layout="HWIO"); + %39 = nn.bias_add(%38, %v_param_44, axis=-1); + %40 = nn.batch_norm(%39, %v_param_45, %v_param_46, %v_param_47, %v_param_48, axis=3, epsilon=1.001e-05f); + %41 = %40.0; + %42 = add(%27, %41); + %43 = nn.relu(%42); + %44 = nn.conv2d(%43, %v_param_49, padding=[0, 0, 0, 0], channels=64, kernel_size=[1, 1], data_layout="NHWC", kernel_layout="HWIO"); + %45 = nn.bias_add(%44, %v_param_50, axis=-1); + %46 = nn.batch_norm(%45, %v_param_51, %v_param_52, %v_param_53, %v_param_54, axis=3, epsilon=1.001e-05f); + %47 = %46.0; + %48 = nn.relu(%47); + %49 = nn.conv2d(%48, %v_param_55, padding=[1i64, 1i64, 1i64, 1i64], channels=64, kernel_size=[3, 3], data_layout="NHWC", kernel_layout="HWIO"); + %50 = nn.bias_add(%49, %v_param_56, axis=-1); + %51 = nn.batch_norm(%50, %v_param_57, %v_param_58, %v_param_59, %v_param_60, axis=3, epsilon=1.001e-05f); + %52 = %51.0; + %53 = nn.relu(%52); + %54 = nn.conv2d(%53, %v_param_61, padding=[0, 0, 0, 0], channels=256, kernel_size=[1, 1], data_layout="NHWC", kernel_layout="HWIO"); + %55 = nn.bias_add(%54, %v_param_62, axis=-1); + %56 = nn.batch_norm(%55, %v_param_63, %v_param_64, %v_param_65, %v_param_66, axis=3, epsilon=1.001e-05f); + %57 = %56.0; + %58 = add(%43, %57); + %59 = nn.relu(%58); + %60 = nn.conv2d(%59, %v_param_79, strides=[2, 2], padding=[0, 0, 0, 0], channels=512, kernel_size=[1, 1], data_layout="NHWC", kernel_layout="HWIO"); + %61 = nn.bias_add(%60, %v_param_80, axis=-1); + %62 = nn.batch_norm(%61, %v_param_83, %v_param_84, %v_param_85, %v_param_86, axis=3, epsilon=1.001e-05f); + %63 = nn.conv2d(%59, %v_param_67, strides=[2, 2], padding=[0, 0, 0, 0], channels=128, kernel_size=[1, 1], data_layout="NHWC", kernel_layout="HWIO"); + %64 = nn.bias_add(%63, %v_param_68, axis=-1); + %65 = nn.batch_norm(%64, %v_param_69, %v_param_70, %v_param_71, %v_param_72, axis=3, epsilon=1.001e-05f); + %66 = %65.0; + %67 = nn.relu(%66); + %68 = nn.conv2d(%67, %v_param_73, padding=[1i64, 1i64, 1i64, 1i64], channels=128, kernel_size=[3, 3], data_layout="NHWC", kernel_layout="HWIO"); + %69 = nn.bias_add(%68, %v_param_74, axis=-1); + %70 = nn.batch_norm(%69, %v_param_75, %v_param_76, %v_param_77, %v_param_78, axis=3, epsilon=1.001e-05f); + %71 = %70.0; + %72 = nn.relu(%71); + %73 = nn.conv2d(%72, %v_param_81, padding=[0, 0, 0, 0], channels=512, kernel_size=[1, 1], data_layout="NHWC", kernel_layout="HWIO"); + %74 = nn.bias_add(%73, %v_param_82, axis=-1); + %75 = nn.batch_norm(%74, %v_param_87, %v_param_88, %v_param_89, %v_param_90, axis=3, epsilon=1.001e-05f); + %76 = %62.0; + %77 = %75.0; + %78 = add(%76, %77); + %79 = nn.relu(%78); + %80 = nn.conv2d(%79, %v_param_91, padding=[0, 0, 0, 0], channels=128, kernel_size=[1, 1], data_layout="NHWC", kernel_layout="HWIO"); + %81 = nn.bias_add(%80, %v_param_92, axis=-1); + %82 = nn.batch_norm(%81, %v_param_93, %v_param_94, %v_param_95, %v_param_96, axis=3, epsilon=1.001e-05f); + %83 = %82.0; + %84 = nn.relu(%83); + %85 = nn.conv2d(%84, %v_param_97, padding=[1i64, 1i64, 1i64, 1i64], channels=128, kernel_size=[3, 3], data_layout="NHWC", kernel_layout="HWIO"); + %86 = nn.bias_add(%85, %v_param_98, axis=-1); + %87 = nn.batch_norm(%86, %v_param_99, %v_param_100, %v_param_101, %v_param_102, axis=3, epsilon=1.001e-05f); + %88 = %87.0; + %89 = nn.relu(%88); + %90 = nn.conv2d(%89, %v_param_103, padding=[0, 0, 0, 0], channels=512, kernel_size=[1, 1], data_layout="NHWC", kernel_layout="HWIO"); + %91 = nn.bias_add(%90, %v_param_104, axis=-1); + %92 = nn.batch_norm(%91, %v_param_105, %v_param_106, %v_param_107, %v_param_108, axis=3, epsilon=1.001e-05f); + %93 = %92.0; + %94 = add(%79, %93); + %95 = nn.relu(%94); + %96 = nn.conv2d(%95, %v_param_109, padding=[0, 0, 0, 0], channels=128, kernel_size=[1, 1], data_layout="NHWC", kernel_layout="HWIO"); + %97 = nn.bias_add(%96, %v_param_110, axis=-1); + %98 = nn.batch_norm(%97, %v_param_111, %v_param_112, %v_param_113, %v_param_114, axis=3, epsilon=1.001e-05f); + %99 = %98.0; + %100 = nn.relu(%99); + %101 = nn.conv2d(%100, %v_param_115, padding=[1i64, 1i64, 1i64, 1i64], channels=128, kernel_size=[3, 3], data_layout="NHWC", kernel_layout="HWIO"); + %102 = nn.bias_add(%101, %v_param_116, axis=-1); + %103 = nn.batch_norm(%102, %v_param_117, %v_param_118, %v_param_119, %v_param_120, axis=3, epsilon=1.001e-05f); + %104 = %103.0; + %105 = nn.relu(%104); + %106 = nn.conv2d(%105, %v_param_121, padding=[0, 0, 0, 0], channels=512, kernel_size=[1, 1], data_layout="NHWC", kernel_layout="HWIO"); + %107 = nn.bias_add(%106, %v_param_122, axis=-1); + %108 = nn.batch_norm(%107, %v_param_123, %v_param_124, %v_param_125, %v_param_126, axis=3, epsilon=1.001e-05f); + %109 = %108.0; + %110 = add(%95, %109); + %111 = nn.relu(%110); + %112 = nn.conv2d(%111, %v_param_127, padding=[0, 0, 0, 0], channels=128, kernel_size=[1, 1], data_layout="NHWC", kernel_layout="HWIO"); + %113 = nn.bias_add(%112, %v_param_128, axis=-1); + %114 = nn.batch_norm(%113, %v_param_129, %v_param_130, %v_param_131, %v_param_132, axis=3, epsilon=1.001e-05f); + %115 = %114.0; + %116 = nn.relu(%115); + %117 = nn.conv2d(%116, %v_param_133, padding=[1i64, 1i64, 1i64, 1i64], channels=128, kernel_size=[3, 3], data_layout="NHWC", kernel_layout="HWIO"); + %118 = nn.bias_add(%117, %v_param_134, axis=-1); + %119 = nn.batch_norm(%118, %v_param_135, %v_param_136, %v_param_137, %v_param_138, axis=3, epsilon=1.001e-05f); + %120 = %119.0; + %121 = nn.relu(%120); + %122 = nn.conv2d(%121, %v_param_139, padding=[0, 0, 0, 0], channels=512, kernel_size=[1, 1], data_layout="NHWC", kernel_layout="HWIO"); + %123 = nn.bias_add(%122, %v_param_140, axis=-1); + %124 = nn.batch_norm(%123, %v_param_141, %v_param_142, %v_param_143, %v_param_144, axis=3, epsilon=1.001e-05f); + %125 = %124.0; + %126 = add(%111, %125); + %127 = nn.relu(%126); + %128 = nn.conv2d(%127, %v_param_157, strides=[2, 2], padding=[0, 0, 0, 0], channels=1024, kernel_size=[1, 1], data_layout="NHWC", kernel_layout="HWIO"); + %129 = nn.bias_add(%128, %v_param_158, axis=-1); + %130 = nn.batch_norm(%129, %v_param_161, %v_param_162, %v_param_163, %v_param_164, axis=3, epsilon=1.001e-05f); + %131 = nn.conv2d(%127, %v_param_145, strides=[2, 2], padding=[0, 0, 0, 0], channels=256, kernel_size=[1, 1], data_layout="NHWC", kernel_layout="HWIO"); + %132 = nn.bias_add(%131, %v_param_146, axis=-1); + %133 = nn.batch_norm(%132, %v_param_147, %v_param_148, %v_param_149, %v_param_150, axis=3, epsilon=1.001e-05f); + %134 = %133.0; + %135 = nn.relu(%134); + %136 = nn.conv2d(%135, %v_param_151, padding=[1i64, 1i64, 1i64, 1i64], channels=256, kernel_size=[3, 3], data_layout="NHWC", kernel_layout="HWIO"); + %137 = nn.bias_add(%136, %v_param_152, axis=-1); + %138 = nn.batch_norm(%137, %v_param_153, %v_param_154, %v_param_155, %v_param_156, axis=3, epsilon=1.001e-05f); + %139 = %138.0; + %140 = nn.relu(%139); + %141 = nn.conv2d(%140, %v_param_159, padding=[0, 0, 0, 0], channels=1024, kernel_size=[1, 1], data_layout="NHWC", kernel_layout="HWIO"); + %142 = nn.bias_add(%141, %v_param_160, axis=-1); + %143 = nn.batch_norm(%142, %v_param_165, %v_param_166, %v_param_167, %v_param_168, axis=3, epsilon=1.001e-05f); + %144 = %130.0; + %145 = %143.0; + %146 = add(%144, %145); + %147 = nn.relu(%146); + %148 = nn.conv2d(%147, %v_param_169, padding=[0, 0, 0, 0], channels=256, kernel_size=[1, 1], data_layout="NHWC", kernel_layout="HWIO"); + %149 = nn.bias_add(%148, %v_param_170, axis=-1); + %150 = nn.batch_norm(%149, %v_param_171, %v_param_172, %v_param_173, %v_param_174, axis=3, epsilon=1.001e-05f); + %151 = %150.0; + %152 = nn.relu(%151); + %153 = nn.conv2d(%152, %v_param_175, padding=[1i64, 1i64, 1i64, 1i64], channels=256, kernel_size=[3, 3], data_layout="NHWC", kernel_layout="HWIO"); + %154 = nn.bias_add(%153, %v_param_176, axis=-1); + %155 = nn.batch_norm(%154, %v_param_177, %v_param_178, %v_param_179, %v_param_180, axis=3, epsilon=1.001e-05f); + %156 = %155.0; + %157 = nn.relu(%156); + %158 = nn.conv2d(%157, %v_param_181, padding=[0, 0, 0, 0], channels=1024, kernel_size=[1, 1], data_layout="NHWC", kernel_layout="HWIO"); + %159 = nn.bias_add(%158, %v_param_182, axis=-1); + %160 = nn.batch_norm(%159, %v_param_183, %v_param_184, %v_param_185, %v_param_186, axis=3, epsilon=1.001e-05f); + %161 = %160.0; + %162 = add(%147, %161); + %163 = nn.relu(%162); + %164 = nn.conv2d(%163, %v_param_187, padding=[0, 0, 0, 0], channels=256, kernel_size=[1, 1], data_layout="NHWC", kernel_layout="HWIO"); + %165 = nn.bias_add(%164, %v_param_188, axis=-1); + %166 = nn.batch_norm(%165, %v_param_189, %v_param_190, %v_param_191, %v_param_192, axis=3, epsilon=1.001e-05f); + %167 = %166.0; + %168 = nn.relu(%167); + %169 = nn.conv2d(%168, %v_param_193, padding=[1i64, 1i64, 1i64, 1i64], channels=256, kernel_size=[3, 3], data_layout="NHWC", kernel_layout="HWIO"); + %170 = nn.bias_add(%169, %v_param_194, axis=-1); + %171 = nn.batch_norm(%170, %v_param_195, %v_param_196, %v_param_197, %v_param_198, axis=3, epsilon=1.001e-05f); + %172 = %171.0; + %173 = nn.relu(%172); + %174 = nn.conv2d(%173, %v_param_199, padding=[0, 0, 0, 0], channels=1024, kernel_size=[1, 1], data_layout="NHWC", kernel_layout="HWIO"); + %175 = nn.bias_add(%174, %v_param_200, axis=-1); + %176 = nn.batch_norm(%175, %v_param_201, %v_param_202, %v_param_203, %v_param_204, axis=3, epsilon=1.001e-05f); + %177 = %176.0; + %178 = add(%163, %177); + %179 = nn.relu(%178); + %180 = nn.conv2d(%179, %v_param_205, padding=[0, 0, 0, 0], channels=256, kernel_size=[1, 1], data_layout="NHWC", kernel_layout="HWIO"); + %181 = nn.bias_add(%180, %v_param_206, axis=-1); + %182 = nn.batch_norm(%181, %v_param_207, %v_param_208, %v_param_209, %v_param_210, axis=3, epsilon=1.001e-05f); + %183 = %182.0; + %184 = nn.relu(%183); + %185 = nn.conv2d(%184, %v_param_211, padding=[1i64, 1i64, 1i64, 1i64], channels=256, kernel_size=[3, 3], data_layout="NHWC", kernel_layout="HWIO"); + %186 = nn.bias_add(%185, %v_param_212, axis=-1); + %187 = nn.batch_norm(%186, %v_param_213, %v_param_214, %v_param_215, %v_param_216, axis=3, epsilon=1.001e-05f); + %188 = %187.0; + %189 = nn.relu(%188); + %190 = nn.conv2d(%189, %v_param_217, padding=[0, 0, 0, 0], channels=1024, kernel_size=[1, 1], data_layout="NHWC", kernel_layout="HWIO"); + %191 = nn.bias_add(%190, %v_param_218, axis=-1); + %192 = nn.batch_norm(%191, %v_param_219, %v_param_220, %v_param_221, %v_param_222, axis=3, epsilon=1.001e-05f); + %193 = %192.0; + %194 = add(%179, %193); + %195 = nn.relu(%194); + %196 = nn.conv2d(%195, %v_param_223, padding=[0, 0, 0, 0], channels=256, kernel_size=[1, 1], data_layout="NHWC", kernel_layout="HWIO"); + %197 = nn.bias_add(%196, %v_param_224, axis=-1); + %198 = nn.batch_norm(%197, %v_param_225, %v_param_226, %v_param_227, %v_param_228, axis=3, epsilon=1.001e-05f); + %199 = %198.0; + %200 = nn.relu(%199); + %201 = nn.conv2d(%200, %v_param_229, padding=[1i64, 1i64, 1i64, 1i64], channels=256, kernel_size=[3, 3], data_layout="NHWC", kernel_layout="HWIO"); + %202 = nn.bias_add(%201, %v_param_230, axis=-1); + %203 = nn.batch_norm(%202, %v_param_231, %v_param_232, %v_param_233, %v_param_234, axis=3, epsilon=1.001e-05f); + %204 = %203.0; + %205 = nn.relu(%204); + %206 = nn.conv2d(%205, %v_param_235, padding=[0, 0, 0, 0], channels=1024, kernel_size=[1, 1], data_layout="NHWC", kernel_layout="HWIO"); + %207 = nn.bias_add(%206, %v_param_236, axis=-1); + %208 = nn.batch_norm(%207, %v_param_237, %v_param_238, %v_param_239, %v_param_240, axis=3, epsilon=1.001e-05f); + %209 = %208.0; + %210 = add(%195, %209); + %211 = nn.relu(%210); + %212 = nn.conv2d(%211, %v_param_241, padding=[0, 0, 0, 0], channels=256, kernel_size=[1, 1], data_layout="NHWC", kernel_layout="HWIO"); + %213 = nn.bias_add(%212, %v_param_242, axis=-1); + %214 = nn.batch_norm(%213, %v_param_243, %v_param_244, %v_param_245, %v_param_246, axis=3, epsilon=1.001e-05f); + %215 = %214.0; + %216 = nn.relu(%215); + %217 = nn.conv2d(%216, %v_param_247, padding=[1i64, 1i64, 1i64, 1i64], channels=256, kernel_size=[3, 3], data_layout="NHWC", kernel_layout="HWIO"); + %218 = nn.bias_add(%217, %v_param_248, axis=-1); + %219 = nn.batch_norm(%218, %v_param_249, %v_param_250, %v_param_251, %v_param_252, axis=3, epsilon=1.001e-05f); + %220 = %219.0; + %221 = nn.relu(%220); + %222 = nn.conv2d(%221, %v_param_253, padding=[0, 0, 0, 0], channels=1024, kernel_size=[1, 1], data_layout="NHWC", kernel_layout="HWIO"); + %223 = nn.bias_add(%222, %v_param_254, axis=-1); + %224 = nn.batch_norm(%223, %v_param_255, %v_param_256, %v_param_257, %v_param_258, axis=3, epsilon=1.001e-05f); + %225 = %224.0; + %226 = add(%211, %225); + %227 = nn.relu(%226); + %228 = nn.conv2d(%227, %v_param_271, strides=[2, 2], padding=[0, 0, 0, 0], channels=2048, kernel_size=[1, 1], data_layout="NHWC", kernel_layout="HWIO"); + %229 = nn.bias_add(%228, %v_param_272, axis=-1); + %230 = nn.batch_norm(%229, %v_param_275, %v_param_276, %v_param_277, %v_param_278, axis=3, epsilon=1.001e-05f); + %231 = nn.conv2d(%227, %v_param_259, strides=[2, 2], padding=[0, 0, 0, 0], channels=512, kernel_size=[1, 1], data_layout="NHWC", kernel_layout="HWIO"); + %232 = nn.bias_add(%231, %v_param_260, axis=-1); + %233 = nn.batch_norm(%232, %v_param_261, %v_param_262, %v_param_263, %v_param_264, axis=3, epsilon=1.001e-05f); + %234 = %233.0; + %235 = nn.relu(%234); + %236 = nn.conv2d(%235, %v_param_265, padding=[1i64, 1i64, 1i64, 1i64], channels=512, kernel_size=[3, 3], data_layout="NHWC", kernel_layout="HWIO"); + %237 = nn.bias_add(%236, %v_param_266, axis=-1); + %238 = nn.batch_norm(%237, %v_param_267, %v_param_268, %v_param_269, %v_param_270, axis=3, epsilon=1.001e-05f); + %239 = %238.0; + %240 = nn.relu(%239); + %241 = nn.conv2d(%240, %v_param_273, padding=[0, 0, 0, 0], channels=2048, kernel_size=[1, 1], data_layout="NHWC", kernel_layout="HWIO"); + %242 = nn.bias_add(%241, %v_param_274, axis=-1); + %243 = nn.batch_norm(%242, %v_param_279, %v_param_280, %v_param_281, %v_param_282, axis=3, epsilon=1.001e-05f); + %244 = %230.0; + %245 = %243.0; + %246 = add(%244, %245); + %247 = nn.relu(%246); + %248 = nn.conv2d(%247, %v_param_283, padding=[0, 0, 0, 0], channels=512, kernel_size=[1, 1], data_layout="NHWC", kernel_layout="HWIO"); + %249 = nn.bias_add(%248, %v_param_284, axis=-1); + %250 = nn.batch_norm(%249, %v_param_285, %v_param_286, %v_param_287, %v_param_288, axis=3, epsilon=1.001e-05f); + %251 = %250.0; + %252 = nn.relu(%251); + %253 = nn.conv2d(%252, %v_param_289, padding=[1i64, 1i64, 1i64, 1i64], channels=512, kernel_size=[3, 3], data_layout="NHWC", kernel_layout="HWIO"); + %254 = nn.bias_add(%253, %v_param_290, axis=-1); + %255 = nn.batch_norm(%254, %v_param_291, %v_param_292, %v_param_293, %v_param_294, axis=3, epsilon=1.001e-05f); + %256 = %255.0; + %257 = nn.relu(%256); + %258 = nn.conv2d(%257, %v_param_295, padding=[0, 0, 0, 0], channels=2048, kernel_size=[1, 1], data_layout="NHWC", kernel_layout="HWIO"); + %259 = nn.bias_add(%258, %v_param_296, axis=-1); + %260 = nn.batch_norm(%259, %v_param_297, %v_param_298, %v_param_299, %v_param_300, axis=3, epsilon=1.001e-05f); + %261 = %260.0; + %262 = add(%247, %261); + %263 = nn.relu(%262); + %264 = nn.conv2d(%263, %v_param_301, padding=[0, 0, 0, 0], channels=512, kernel_size=[1, 1], data_layout="NHWC", kernel_layout="HWIO"); + %265 = nn.bias_add(%264, %v_param_302, axis=-1); + %266 = nn.batch_norm(%265, %v_param_303, %v_param_304, %v_param_305, %v_param_306, axis=3, epsilon=1.001e-05f); + %267 = %266.0; + %268 = nn.relu(%267); + %269 = nn.conv2d(%268, %v_param_307, padding=[1i64, 1i64, 1i64, 1i64], channels=512, kernel_size=[3, 3], data_layout="NHWC", kernel_layout="HWIO"); + %270 = nn.bias_add(%269, %v_param_308, axis=-1); + %271 = nn.batch_norm(%270, %v_param_309, %v_param_310, %v_param_311, %v_param_312, axis=3, epsilon=1.001e-05f); + %272 = %271.0; + %273 = nn.relu(%272); + %274 = nn.conv2d(%273, %v_param_313, padding=[0, 0, 0, 0], channels=2048, kernel_size=[1, 1], data_layout="NHWC", kernel_layout="HWIO"); + %275 = nn.bias_add(%274, %v_param_314, axis=-1); + %276 = nn.batch_norm(%275, %v_param_315, %v_param_316, %v_param_317, %v_param_318, axis=3, epsilon=1.001e-05f); + %277 = %276.0; + %278 = add(%263, %277); + %279 = nn.relu(%278); + %280 = nn.global_avg_pool2d(%279, layout="NHWC"); + %281 = nn.batch_flatten(%280); + %282 = nn.dense(%281, %v_param_319, units=1000); + %283 = nn.bias_add(%282, %v_param_320); + nn.softmax(%283) +} +``` + +# 自动调优 +现在,可以使用下面的 api 为任何目标对模型进行自动调优。调整需要 RPC 设置,请参阅[在 Adreno GPU 上部署](https://tvm.apache.org/docs/how_to/deploy/adreno.html) + +```python +rpc_tracker_host = os.environ.get("TVM_TRACKER_HOST", "127.0.0.1") +rpc_tracker_port = int(os.environ.get("TVM_TRACKER_PORT", 9190)) +rpc_key = "android" +rpc_tracker = rpc_tracker_host + ":" + str(rpc_tracker_port) + +# 自动调整是计算密集型和耗时的任务。 +# 它在上述配置中被设置为 False,因为此脚本在 x86 上运行以进行演示。 +# 请将 :code:`is_tuning` 设置为 True 以启用自动调整。 + +# 此外,:code:`test_target` 设置为 :code:`llvm`,因为此示例以使其与 x86 演示兼容。 +# 请在上述配置中将其更改为 :code:`opencl` 或 :code:`opencl -device=adreno` 以用于 RPC 目标。 + +if is_tuning: + tvmc.tune( + tvmc_model, + target=target, + tuning_records=tune_log, + target_host=target_host, + hostname=rpc_tracker_host, + port=rpc_tracker_port, + rpc_key=rpc_key, + tuner="xgb", + repeat=30, + trials=3, + early_stopping=0, + ) +``` + +# 编译 +编译以生成 tvm 产品 + +```python +# 此生成的示例在我们的 x86 服务器上运行以进行演示。 +# 要在真实目标上的 RPC 上部署和调优,请在上述配置部分将 :code:`local_demo` 设置为 False。 + +# OpenCLML 卸载将尝试通过使用 OpenCLML 专有运算符库加速受支持的运算符。 +# 默认情况下,在上述配置部分 :code:`enable_clml` 设置为 False。 + +if not enable_clml: + if local_demo: + tvmc_package = tvmc.compile( + tvmc_model, + target=target, + ) + else: + tvmc_package = tvmc.compile( + tvmc_model, + target=target, + target_host=target_host, + cross=cross_compiler, + tuning_records=tune_log, + ) +else: + # 或者,我们可以保存编译输出并将其保存为 TVMCPackage。 + # 这种方式避免了再次编译时加载编译的模块。 + target = target + ", clml" + pkg_path = tmp_path.relpath("keras-resnet50.tar") + tvmc.compile( + tvmc_model, + target=target, + target_host=target_host, + cross=cross_compiler, + tuning_records=tune_log, + package_path=pkg_path, + ) + + # 加载已编译的包 + tvmc_package = TVMCPackage(package_path=pkg_path) + +# tvmc_package 包括 tvmc_package.lib_path, tvmc_package.graph, tvmc_package.params +# 已保存的 TVMPackage 实际上是 mod.so、mod.json 和 mod.params的 tar 存档。 +``` + +# 部署和运行 +通过让 tvmc 使用随机数据填充输入在 RPC 上部署和运行已编译的模型。 + +```python +# 在 RPC 设置上运行 +if local_demo: + result = tvmc.run(tvmc_package, device="cpu", fill_mode="random") +else: + result = tvmc.run( + tvmc_package, + device="cl", + rpc_key=rpc_key, + hostname=rpc_tracker_host, + port=rpc_tracker_port, + fill_mode="random", + ) + +# result 是输出的字典。 +print("Result:", result) +``` + +Out: +```python +Result: [] +Output Names: + ['output_0'] +``` \ No newline at end of file From 2d3e6aed308a5281a0b88f0ac721796378c602bf Mon Sep 17 00:00:00 2001 From: anleeos <2937160075@qq.com> Date: Thu, 30 Nov 2023 23:11:32 +0800 Subject: [PATCH 02/15] alter url --- docs/how_to/deploy/deploy_models/11-depoly_adreno_tvmc.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/docs/how_to/deploy/deploy_models/11-depoly_adreno_tvmc.md b/docs/how_to/deploy/deploy_models/11-depoly_adreno_tvmc.md index af9de6f1..74b3a9cb 100644 --- a/docs/how_to/deploy/deploy_models/11-depoly_adreno_tvmc.md +++ b/docs/how_to/deploy/deploy_models/11-depoly_adreno_tvmc.md @@ -20,7 +20,7 @@ title: 使用 tvmc 接口在 Adreno™ 上部署预训练模型 此外,您应该已经为 Android 构建了 TVM 。请参阅以下说明,了解如何构建它并设置 RPC 环境。 -[在 Adreno GPU 上部署](https://tvm.apache.org/docs/how_to/deploy/adreno.html) +[在 Adreno GPU 上部署](https://tvm.hyper.ai/docs/how_to/deploy/deploy_adreno) ```python import os @@ -407,7 +407,7 @@ def @main(%input_2: Tensor[(1, 224, 224, 3), float32], %v_param_1: Tensor[(7, 7, ``` # 自动调优 -现在,可以使用下面的 api 为任何目标对模型进行自动调优。调整需要 RPC 设置,请参阅[在 Adreno GPU 上部署](https://tvm.apache.org/docs/how_to/deploy/adreno.html) +现在,可以使用下面的 api 为任何目标对模型进行自动调优。调整需要 RPC 设置,请参阅[在 Adreno GPU 上部署](https://tvm.hyper.ai/docs/how_to/deploy/deploy_adreno) ```python rpc_tracker_host = os.environ.get("TVM_TRACKER_HOST", "127.0.0.1") From d9a20dfe86f78ab9d6af1799ca28e7b628cf9325 Mon Sep 17 00:00:00 2001 From: anleeos <2937160075@qq.com> Date: Thu, 30 Nov 2023 23:42:18 +0800 Subject: [PATCH 03/15] translate depoly_adreno --- .../deploy/deploy_models/10-depoly_adreno.md | 467 ++++++++++++++++++ 1 file changed, 467 insertions(+) diff --git a/docs/how_to/deploy/deploy_models/10-depoly_adreno.md b/docs/how_to/deploy/deploy_models/10-depoly_adreno.md index e69de29b..d4a97d43 100644 --- a/docs/how_to/deploy/deploy_models/10-depoly_adreno.md +++ b/docs/how_to/deploy/deploy_models/10-depoly_adreno.md @@ -0,0 +1,467 @@ +--- +title: 在 Adreno™ 上部署预训练模型 +--- + +# 在 Adreno™ 上部署预训练模型 +**作者**: Daniil Barinov, Siva Rama Krishna +:::note +单击 [此处](https://tvm.apache.org/docs/how_to/deploy_models/deploy_model_on_adreno.html#sphx-glr-download-how-to-deploy-models-deploy-model-on-adreno-py) 下载完整的示例代码 +::: + +本文是一个逐步教程,演示如何在 Adreno 上(不同精度)部署预训练的 PyTorch ResNet-18 模型。 + +首先,我们需要安装 PyTorch 与 TorchVision ,因为我们将使用它作为我们的模型库。 + +可以通过 pip 快速安装: + +```bash +pip install torch +pip install torchvision +``` + +除此之外,您应该已经为 Android 构建了 TVM。请参阅以下说明,了解如何构建它。 + +[在 Adreno GPU 上部署](https://tvm.hyper.ai/docs/how_to/deploy/deploy_adreno) + +在构建部分之后,构建目录中应该有两个文件: “libtvm_runtime.so” 和 “tvm_rpc”。让我们将它们推送到设备上并运行 TVM RPC 服务器。 + +## TVM RPC 服务器 + +要获取设备的哈希值,请使用: + +```bash +adb devices +``` + +设置要使用的 Android 设备,如果您的计算机连接了多个设备。 + +```bash +export ANDROID_SERIAL= +``` + +然后,要将这两个文件上传到设备上,应该使用: + +```bash +adb push {libtvm_runtime.so,tvm_rpc} /data/local/tmp +``` + +此时,您的设备上的路径 /data/local/tmp 将有 “libtvm_runtime.so” 和 “tvm_rpc” 。有时 cmake 找不到 “libc++_shared.so”。使用: + +```bash +find ${ANDROID_NDK_HOME} -name libc++_shared.so +``` + +找到它,并使用 adb 将其推送到所需的设备: + +```bash +adb push libc++_shared.so /data/local/tmp +``` + +我们现在准备运行 TVM RPC 服务器。在第一个控制台中使用以下行启动 rpc_tracker: + +```bash +python3 -m tvm.exec.rpc_tracker --port 9190 +``` + +然后,我们需要在第二个控制台中从所需的设备下运行 tvm_rpc 服务器: + +```bash +adb reverse tcp:9190 tcp:9190 +adb forward tcp:5000 tcp:5000 +adb forward tcp:5002 tcp:5001 +adb forward tcp:5003 tcp:5002 +adb forward tcp:5004 tcp:5003 +adb shell LD_LIBRARY_PATH=/data/local/tmp /data/local/tmp/tvm_rpc server --host=0.0.0.0 --port=5000 --tracker=127.0.0.1:9190 --key=android --port-end=5100 +``` + +在编译和推断模型之前,请指定 TVM_TRACKER_HOST 和 TVM_TRACKER_PORT: + +```bash +export TVM_TRACKER_HOST=0.0.0.0 +export TVM_TRACKER_PORT=9190 +``` + +检查 tracker 是否正在运行,并且设备是否可用: + +```bash +python -m tvm.exec.query_rpc_tracker --port 9190 +``` + +例如,如果有 1 个 Android 设备,输出可能是: + +```info +Queue Status +---------------------------------- +key total free pending +---------------------------------- +android 1 1 0 +---------------------------------- +``` + +## 配置 +```python +import os +import torch +import torchvision +import tvm +from tvm import te +from tvm import relay, rpc +from tvm.contrib import utils, ndk +from tvm.contrib import graph_executor +from tvm.relay.op.contrib import clml +from tvm import autotvm + +# 下面是一组配置,用于控制脚本的行为,如本地运行或设备运行、目标定义、dtype 设置和自动调优启用。 +# 如有需要,请根据需要更改这些设置。 + +# 与 float32 相比,Adreno 设备对 float16 的效率更高 +# 鉴于降低精度不会影响预期输出 +# 建议使用较低的精度。 +# 我们有一个辅助 API,使精度转换变得简单 +# 它支持 "float16" 和 "float16_acc32" 模式的 dtype。 +# 让我们选择 "float16" 进行计算和 "float32" 进行累积。 + +calculation_dtype = "float16" +acc_dtype = "float32" + +# 在编译以生成纹理之前指定 Adreno 目标 +# 利用内核并获得所有纹理的好处 +# 注意:此生成的示例在我们的 x86 服务器上运行以进行演示。 +# 如果在 Android 设备上运行它,我们需要 +# 指定其指令集。如果要在实际设备上运行此教程,请将 :code:`local_demo` 设置为 False。 +local_demo = True + +# 默认情况下,在 CPU 目标上执行。 +# 选择 'cpu'、'opencl' 和 'opencl -device=adreno' +test_target = "cpu" + +# 更改目标配置。 +# 运行 `adb shell cat /proc/cpuinfo` 以查找架构。 +arch = "arm64" +target = tvm.target.Target("llvm -mtriple=%s-linux-android" % arch) + +# 自动调整是计算密集型和耗时的任务, +# 因此默认情况下禁用。如果需要,请启用它。 +is_tuning = False +tune_log = "adreno-resnet18.log" + +# 启用 OpenCLML 加速运算符库。 +enable_clml = False + +``` + +## 获取 PyTorch 模型 +从 torchvision models 获取 resnet18 + +```python +model_name = "resnet18" +model = getattr(torchvision.models, model_name)(pretrained=True) +model = model.eval() + +# 通过追踪抓取 TorchScripted 模型 +input_shape = [1, 3, 224, 224] +input_data = torch.randn(input_shape) +scripted_model = torch.jit.trace(model, input_data).eval() + +``` + +Out: +```info +/venv/apache-tvm-py3.8/lib/python3.8/site-packages/torchvision/models/_utils.py:208: UserWarning: The parameter 'pretrained' is deprecated since 0.13 and may be removed in the future, please use 'weights' instead. + warnings.warn( +/venv/apache-tvm-py3.8/lib/python3.8/site-packages/torchvision/models/_utils.py:223: UserWarning: Arguments other than a weight enum or `None` for 'weights' are deprecated since 0.13 and may be removed in the future. The current behavior is equivalent to passing `weights=ResNet18_Weights.IMAGENET1K_V1`. You can also use `weights=ResNet18_Weights.DEFAULT` to get the most up-to-date weights. + warnings.warn(msg) + +``` + +## 加载测试图片 +我们使用一张经典的来自 ImageNet 的猫图片作为示例 + +```python +from PIL import Image +from tvm.contrib.download import download_testdata +from matplotlib import pyplot as plt +import numpy as np + +img_url = "https://github.com/dmlc/mxnet.js/blob/main/data/cat.png?raw=true" +img_path = download_testdata(img_url, "cat.png", module="data") +img = Image.open(img_path).resize((224, 224)) +plt.imshow(img) +plt.show() + +# 处理图片并转换为 tensor +from torchvision import transforms + +my_preprocess = transforms.Compose( + [ + transforms.Resize(256), + transforms.CenterCrop(224), + transforms.ToTensor(), + transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]), + ] +) +img = my_preprocess(img) +img = np.expand_dims(img, 0) + +``` + +![cat](https://tvm.apache.org/docs/_images/sphx_glr_from_keras_001.png) + +## 将 PyTorch 模型转换为 Relay 模块 +TVM 具有用于各种框架 的在 relay.frontend 中的前端 API 。现在对于 PyTorch 模型导入,我们有 relay.frontend.from_pytorch API 。输入名称可以是任意的 + +```python +input_name = "input0" +shape_list = [(input_name, img.shape)] + +mod, params = relay.frontend.from_pytorch(scripted_model, shape_list) + +``` + +Out: +```info +/workspace/python/tvm/relay/frontend/pytorch_utils.py:47: DeprecationWarning: distutils Version classes are deprecated. Use packaging.version instead. + return LooseVersion(torch_ver) > ver +/venv/apache-tvm-py3.8/lib/python3.8/site-packages/setuptools/_distutils/version.py:346: DeprecationWarning: distutils Version classes are deprecated. Use packaging.version instead. + other = LooseVersion(other) +``` + +## 精度 +```python +# Adreno 设备在 float16 上的效率比 float32 高 +# 鉴于降低精度不会影响预期输出 +# 建议使用较低的精度。 + +# TVM 通过 ToMixedPrecision 转换过程支持混合精度。 +# 我们可能需要注册精度规则,比如精度类型、累加 +# 数据类型等,以覆盖默认设置。 +# 下面的辅助 API 简化了模块间的精度转换。 + +# 在上面的配置部分,计算 dtype 设置为 "float16",累积 dtype 设置为 "float32"。 + +from tvm.driver.tvmc.transform import apply_graph_transforms + +mod = apply_graph_transforms( + mod, + { + "mixed_precision": True, + "mixed_precision_ops": ["nn.conv2d", "nn.dense"], + "mixed_precision_calculation_type": calculation_dtype, + "mixed_precision_acc_type": acc_dtype, + }, +) + +``` + +正如您在 IR 中所看到的那样,该架构现在包含强制转换操作,这些操作是为了将精度转换为 FP16 。您还可以使用 "float16" 或 "float32" 作为其他 dtype 选项。 + +## 准备 TVM 目标 + +```python +# 此生成的示例在我们的 x86 服务器上运行以进行演示。 + +# 要在真实目标上部署并调试,请在上面的配置部分将 :code:`local_demo` 设置为 False。 +# 同样,:code:`test_target` 设置为 :code:`llvm`,以使其与 x86 演示兼容。 +# 请将其更改为 :code:`opencl` 或 :code:`opencl -device=adreno`,以用于上面配置中的 RPC 目标。 + + +if local_demo: + target = tvm.target.Target("llvm") +elif test_target.find("opencl"): + target = tvm.target.Target(test_target, host=target) + +``` + +## 自动调整 +下面的几个指令可以使用 XGBoost 作为调优算法对 Relay 模块进行自动调优。 + +```python +# 自动调优过程包括提取任务、定义调优配置和 +# 为每个任务调整最佳性能的内核配置。 + +# 获取与 RPC 相关的设置。 +rpc_tracker_host = os.environ.get("TVM_TRACKER_HOST", "127.0.0.1") +rpc_tracker_port = int(os.environ.get("TVM_TRACKER_PORT", 9190)) +key = "android" + +# 自动调优是计算密集型和耗时的任务。 +# 在上面的配置中,由于此脚本在 x86 上运行进行演示,设置为 False。 +# 请将 :code:`is_tuning` 设置为 True 以启用自动调优。 + +if is_tuning: + # 自动调优阶段 1:提取可调优任务 + tasks = autotvm.task.extract_from_program( + mod, target=test_target, target_host=target, params=params + ) + + # 自动调优阶段 2:定义调优配置 + tmp_log_file = tune_log + ".tmp" + measure_option = autotvm.measure_option( + builder=autotvm.LocalBuilder( + build_func=ndk.create_shared, timeout=15 + ), # 在本地构建测试内核 + runner=autotvm.RPCRunner( # 运行程序将在远程设备上运行。 + key, # RPC 密钥 + host=rpc_tracker_host, # 追踪主机 + port=int(rpc_tracker_port), # 追踪端口 + number=3, # 平均运行次数 + timeout=600, # RPC 超时 + ), + ) + n_trial = 1024 # 在选择最佳内核配置之前进行训练的迭代次数 + early_stopping = False # 可以启用以在损失不断最小化时停止调优。 + + # 自动调优阶段 3:遍历任务并进行调优。 + from tvm.autotvm.tuner import XGBTuner + + for i, tsk in enumerate(reversed(tasks[:3])): + print("Task:", tsk) + prefix = "[Task %2d/%2d] " % (i + 1, len(tasks)) + + # 选择调谐器 + tuner = "xgb" + + # 创建调谐器 + if tuner == "xgb": + tuner_obj = XGBTuner(tsk, loss_type="reg") + # 其他调谐器类型的判断可以在此处添加 + + tsk_trial = min(n_trial, len(tsk.config_space)) + tuner_obj.tune( + n_trial=tsk_trial, + early_stopping=early_stopping, + measure_option=measure_option, + callbacks=[ + autotvm.callback.progress_bar(tsk_trial, prefix=prefix), + autotvm.callback.log_to_file(tmp_log_file), + ], + ) + # 自动调优阶段 4:从整体日志中选择性能最佳的配置。 + autotvm.record.pick_best(tmp_log_file, tune_log) + +``` + +## 启用 OpenCLML 卸载 +OpenCLML 卸载将尝试通过使用 OpenCLML 专有运算符库来加速支持的运算符。 + +```python +# 默认情况下,在上面的配置部分,:code:enable_clml 被设置为 False。 + +if not local_demo and enable_clml: + mod = clml.partition_for_clml(mod, params) + +``` + +## 编译 +如果存在调优缓存,则使用调优缓存。 + +```python +if os.path.exists(tune_log): + with autotvm.apply_history_best(tune_log): + with tvm.transform.PassContext(opt_level=3): + lib = relay.build(mod, target=target, params=params) +else: + with tvm.transform.PassContext(opt_level=3): + lib = relay.build(mod, target=target, params=params) + +``` + +## 远程通过 RPC 部署模型 +使用 RPC,您可以将模型从主机机器部署到远程 Adreno 设备。 + +```python +if local_demo: + remote = rpc.LocalSession() +else: + tracker = rpc.connect_tracker(rpc_tracker_host, rpc_tracker_port) + # 运行大模型时, 应该增加 `session_timeout` + remote = tracker.request(key, priority=0, session_timeout=60) + +if local_demo: + dev = remote.cpu(0) +elif test_target.find("opencl"): + dev = remote.cl(0) +else: + dev = remote.cpu(0) + +temp = utils.tempdir() +dso_binary = "dev_lib_cl.so" +dso_binary_path = temp.relpath(dso_binary) +fcompile = ndk.create_shared if not local_demo else None +lib.export_library(dso_binary_path, fcompile=fcompile) +remote_path = "/data/local/tmp/" + dso_binary +remote.upload(dso_binary_path) +rlib = remote.load_module(dso_binary) +m = graph_executor.GraphModule(rlib["default"](dev)) + +``` + +## 运行推理 +我们现在可以设置输入,推理我们的模型并得到输出预测。 + +```python +m.set_input(input_name, tvm.nd.array(img.astype("float32"))) +m.run() +tvm_output = m.get_output(0) +``` + +## 获取预测与性能统计 +这块代码展示了 top-1 和 top-5 预测,同时提供模型的性能信息。 + +```python +from os.path import join, isfile +from matplotlib import pyplot as plt +from tvm.contrib import download + + +# 下载 ImageNet 分类 +categ_url = "https://github.com/uwsampl/web-data/raw/main/vta/models/" +categ_fn = "synset.txt" +download.download(join(categ_url, categ_fn), categ_fn) +synset = eval(open(categ_fn).read()) + +top_categories = np.argsort(tvm_output.asnumpy()[0]) +top5 = np.flip(top_categories, axis=0)[:5] + +# 记录 top-1 分类结果 +print("Top-1 id: {}, class name: {}".format(top5[1 - 1], synset[top5[1 - 1]])) + +# 记录 top-5 分类结果 +print("\nTop5 predictions: \n") +print("\t#1:", synset[top5[1 - 1]]) +print("\t#2:", synset[top5[2 - 1]]) +print("\t#3:", synset[top5[3 - 1]]) +print("\t#4:", synset[top5[4 - 1]]) +print("\t#5:", synset[top5[5 - 1]]) +print("\t", top5) +ImageNetClassifier = False +for k in top_categories[-5:]: + if "cat" in synset[k]: + ImageNetClassifier = True +assert ImageNetClassifier, "Failed ImageNet classifier validation check" + +print("Evaluate inference time cost...") +print(m.benchmark(dev, number=1, repeat=10)) +``` + +Out: +```info +/workspace/python/tvm/runtime/ndarray.py:199: DeprecationWarning: NDArray.asnumpy() will be deprecated in TVM v0.8 release. Please use NDArray.numpy() instead. + warnings.warn( +Top-1 id: 281, class name: tabby, tabby cat + +Top5 predictions: + + #1: tabby, tabby cat + #2: tiger cat + #3: lynx, catamount + #4: red fox, Vulpes vulpes + #5: Egyptian cat + [281 282 287 277 285] +Evaluate inference time cost... +Execution time summary: + mean (ms) median (ms) max (ms) min (ms) std (ms) + 3991.4967 3991.2103 3996.6988 3988.8485 2.0989 +``` + +**该脚本的总运行时间:** ( 1 分 18.970 秒) \ No newline at end of file From 5ff68e40ed6ac09ead5b8e2370645166182d8c13 Mon Sep 17 00:00:00 2001 From: anleeos <2937160075@qq.com> Date: Thu, 30 Nov 2023 23:57:03 +0800 Subject: [PATCH 04/15] update deploy_models --- docs/how_to/deploy/deploy_models/01-deploy_android.md | 6 +++--- docs/how_to/deploy/deploy_models/04-compile_od.md | 4 ++-- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/docs/how_to/deploy/deploy_models/01-deploy_android.md b/docs/how_to/deploy/deploy_models/01-deploy_android.md index 769b320d..d8a259fa 100644 --- a/docs/how_to/deploy/deploy_models/01-deploy_android.md +++ b/docs/how_to/deploy/deploy_models/01-deploy_android.md @@ -107,11 +107,11 @@ endif # 要添加的其他 include 头,例如 SDK_PATH/adrenosdk/Development/Inc ADD_C_INCLUDES += /work/adrenosdk-linux-5_0/Development/Inc -# 从 https://github.com/KhronosGroup/OpenCL-Headers 下载 -ADD_C_INCLUDES += /usr/local/OpenCL-Headers/ + +ADD_C_INCLUDES = # 要添加的附加链接库,例如 ANDROID_LIB_PATH/libOpenCL.so -ADD_LDLIBS = /workspace/pull-from-android-device/libOpenCL.so +ADD_LDLIBS = ``` :::note diff --git a/docs/how_to/deploy/deploy_models/04-compile_od.md b/docs/how_to/deploy/deploy_models/04-compile_od.md index 040a89af..d804d170 100644 --- a/docs/how_to/deploy/deploy_models/04-compile_od.md +++ b/docs/how_to/deploy/deploy_models/04-compile_od.md @@ -15,8 +15,8 @@ title: 编译 PyTorch 目标检测模型 可通过 pip 快速安装: ``` bash -pip install torch==1.7.0 -pip install torchvision==0.8.1 +pip install torch +pip install torchvision ``` 或参考官网:https://pytorch.org/get-started/locally/ From f01c813d3cbe37ef41b2ac2ff7459efaa1ca7a3b Mon Sep 17 00:00:00 2001 From: anleeos <2937160075@qq.com> Date: Fri, 1 Dec 2023 00:02:21 +0800 Subject: [PATCH 05/15] delete deploy_ssd and update index --- .../deploy/deploy_models/09-deploy_ssd.md | 162 ------------------ docs/how_to/deploy/deploy_models/index.md | 3 +- 2 files changed, 2 insertions(+), 163 deletions(-) delete mode 100644 docs/how_to/deploy/deploy_models/09-deploy_ssd.md diff --git a/docs/how_to/deploy/deploy_models/09-deploy_ssd.md b/docs/how_to/deploy/deploy_models/09-deploy_ssd.md deleted file mode 100644 index eee3d9df..00000000 --- a/docs/how_to/deploy/deploy_models/09-deploy_ssd.md +++ /dev/null @@ -1,162 +0,0 @@ ---- -title: 部署 Single Shot Multibox Detector(SSD)模型 ---- - -# 部署 Single Shot Multibox Detector(SSD)模型 - -:::note -单击 [此处](https://tvm.apache.org/docs/how_to/deploy_models/deploy_ssd_gluoncv.html#sphx-glr-download-how-to-deploy-models-deploy-ssd-gluoncv-py) 下载完整的示例代码 -::: - -**作者**:[Yao Wang](https://github.com/kevinthesun),[Leyuan Wang](https://github.com/Laurawly) - -本文介绍如何用 TVM 部署 SSD 模型。这里将使用 GluonCV 预训练的 SSD 模型,并将其转换为 Relay IR。 - -``` python -import tvm -from tvm import te - -from matplotlib import pyplot as plt -from tvm import relay -from tvm.contrib import graph_executor -from tvm.contrib.download import download_testdata -from gluoncv import model_zoo, data, utils -``` - -输出结果: - -``` bash -/usr/local/lib/python3.7/dist-packages/gluoncv/__init__.py:40: UserWarning: Both `mxnet==1.6.0` and `torch==1.11.0+cpu` are installed. You might encounter increased GPU memory footprint if both framework are used at the same time. - warnings.warn(f'Both `mxnet=={mx.__version__}` and `torch=={torch.__version__}` are installed. ' -``` - -## 初步参数设置 - -:::note -现在支持在 CPU 和 GPU 上编译 SSD。 - -为取得 CPU 上的最佳推理性能,需要根据设备修改 target 参数——对于 x86 CPU:参考 [为 x86 CPU 自动调整卷积网络](/docs/how_to/autotune/autotuning_x86) 来调整;对于 arm CPU:参考 [为 ARM CPU 自动调整卷积网络](/docs/how_to/autotune/autotuning_arm) 来调整。 - -为在 Intel 显卡上取得最佳推理性能,将 target 参数修改为 `opencl -device=intel_graphics` 。注意:在 Mac 上使用 Intel 显卡时,target 要设置为 `opencl` ,因为 Mac 上不支持 Intel 子组扩展。 - -为取得基于 CUDA 的 GPU 上的最佳推理性能,将 target 参数修改为 `cuda`;对于基于 OPENCL 的 GPU,将 target 参数修改为 `opencl`,然后根据设备来修改设备参数。 -::: - -``` python -supported_model = [ - "ssd_512_resnet50_v1_voc", - "ssd_512_resnet50_v1_coco", - "ssd_512_resnet101_v2_voc", - "ssd_512_mobilenet1.0_voc", - "ssd_512_mobilenet1.0_coco", - "ssd_300_vgg16_atrous_voc" "ssd_512_vgg16_atrous_coco", -] - -model_name = supported_model[0] -dshape = (1, 3, 512, 512) -``` - -下载并预处理 demo 图像: - -``` python -im_fname = download_testdata( - "https://github.com/dmlc/web-data/blob/main/" + "gluoncv/detection/street_small.jpg?raw=true", - "street_small.jpg", - module="data", -) -x, img = data.transforms.presets.ssd.load_test(im_fname, short=512) -``` - -为 CPU 转换和编译模型: - -``` python -block = model_zoo.get_model(model_name, pretrained=True) - -def build(target): - mod, params = relay.frontend.from_mxnet(block, {"data": dshape}) - with tvm.transform.PassContext(opt_level=3): - lib = relay.build(mod, target, params=params) - return lib -``` - -输出结果: - -``` bash -/usr/local/lib/python3.7/dist-packages/mxnet/gluon/block.py:1389: UserWarning: Cannot decide type for the following arguments. Consider providing them as input: - data: None - input_sym_arg_type = in_param.infer_type()[0] -Downloading /workspace/.mxnet/models/ssd_512_resnet50_v1_voc-9c8b225a.zip from https://apache-mxnet.s3-accelerate.dualstack.amazonaws.com/gluon/models/ssd_512_resnet50_v1_voc-9c8b225a.zip... - - 0%| | 0/132723 [00:00 Date: Fri, 1 Dec 2023 00:25:26 +0800 Subject: [PATCH 06/15] Update docs/how_to/deploy/deploy_models/10-depoly_adreno.md Co-authored-by: sparanoid --- docs/how_to/deploy/deploy_models/10-depoly_adreno.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/how_to/deploy/deploy_models/10-depoly_adreno.md b/docs/how_to/deploy/deploy_models/10-depoly_adreno.md index d4a97d43..6bf7e75b 100644 --- a/docs/how_to/deploy/deploy_models/10-depoly_adreno.md +++ b/docs/how_to/deploy/deploy_models/10-depoly_adreno.md @@ -10,7 +10,7 @@ title: 在 Adreno™ 上部署预训练模型 本文是一个逐步教程,演示如何在 Adreno 上(不同精度)部署预训练的 PyTorch ResNet-18 模型。 -首先,我们需要安装 PyTorch 与 TorchVision ,因为我们将使用它作为我们的模型库。 +首先,我们需要安装 PyTorch 与 TorchVision,因为我们将使用它作为我们的模型库。 可以通过 pip 快速安装: From c2d8c7849f0bc1e32659ad00fecffe96d3956eeb Mon Sep 17 00:00:00 2001 From: Anleeos <2937160075@qq.com> Date: Fri, 1 Dec 2023 00:25:34 +0800 Subject: [PATCH 07/15] Update docs/how_to/deploy/deploy_models/10-depoly_adreno.md Co-authored-by: sparanoid --- docs/how_to/deploy/deploy_models/10-depoly_adreno.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/how_to/deploy/deploy_models/10-depoly_adreno.md b/docs/how_to/deploy/deploy_models/10-depoly_adreno.md index 6bf7e75b..a66a3205 100644 --- a/docs/how_to/deploy/deploy_models/10-depoly_adreno.md +++ b/docs/how_to/deploy/deploy_models/10-depoly_adreno.md @@ -23,7 +23,7 @@ pip install torchvision [在 Adreno GPU 上部署](https://tvm.hyper.ai/docs/how_to/deploy/deploy_adreno) -在构建部分之后,构建目录中应该有两个文件: “libtvm_runtime.so” 和 “tvm_rpc”。让我们将它们推送到设备上并运行 TVM RPC 服务器。 +在构建部分之后,构建目录中应该有两个文件:“libtvm_runtime.so” 和 “tvm_rpc”。让我们将它们推送到设备上并运行 TVM RPC 服务器。 ## TVM RPC 服务器 From 0ab1365ead428e87f1d126d4dc4269042b78ba86 Mon Sep 17 00:00:00 2001 From: Anleeos <2937160075@qq.com> Date: Fri, 1 Dec 2023 00:25:41 +0800 Subject: [PATCH 08/15] Update docs/how_to/deploy/deploy_models/10-depoly_adreno.md Co-authored-by: sparanoid --- docs/how_to/deploy/deploy_models/10-depoly_adreno.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/how_to/deploy/deploy_models/10-depoly_adreno.md b/docs/how_to/deploy/deploy_models/10-depoly_adreno.md index a66a3205..1ee01b0a 100644 --- a/docs/how_to/deploy/deploy_models/10-depoly_adreno.md +++ b/docs/how_to/deploy/deploy_models/10-depoly_adreno.md @@ -208,7 +208,7 @@ img = np.expand_dims(img, 0) ![cat](https://tvm.apache.org/docs/_images/sphx_glr_from_keras_001.png) ## 将 PyTorch 模型转换为 Relay 模块 -TVM 具有用于各种框架 的在 relay.frontend 中的前端 API 。现在对于 PyTorch 模型导入,我们有 relay.frontend.from_pytorch API 。输入名称可以是任意的 +TVM 具有用于各种框架 的在 relay.frontend 中的前端 API。现在对于 PyTorch 模型导入,我们有 relay.frontend.from_pytorch API。输入名称可以是任意的 ```python input_name = "input0" From c8a480aac2180503a1fca7335bf877cf2d21cc36 Mon Sep 17 00:00:00 2001 From: Anleeos <2937160075@qq.com> Date: Fri, 1 Dec 2023 00:25:49 +0800 Subject: [PATCH 09/15] Update docs/how_to/deploy/deploy_models/10-depoly_adreno.md Co-authored-by: sparanoid --- docs/how_to/deploy/deploy_models/10-depoly_adreno.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/how_to/deploy/deploy_models/10-depoly_adreno.md b/docs/how_to/deploy/deploy_models/10-depoly_adreno.md index 1ee01b0a..dd523854 100644 --- a/docs/how_to/deploy/deploy_models/10-depoly_adreno.md +++ b/docs/how_to/deploy/deploy_models/10-depoly_adreno.md @@ -253,7 +253,7 @@ mod = apply_graph_transforms( ``` -正如您在 IR 中所看到的那样,该架构现在包含强制转换操作,这些操作是为了将精度转换为 FP16 。您还可以使用 "float16" 或 "float32" 作为其他 dtype 选项。 +正如您在 IR 中所看到的那样,该架构现在包含强制转换操作,这些操作是为了将精度转换为 FP16。您还可以使用 "float16" 或 "float32" 作为其他 dtype 选项。 ## 准备 TVM 目标 From 11b6c1f1ae10db49b21d80239e984dd528d5a6fa Mon Sep 17 00:00:00 2001 From: Anleeos <2937160075@qq.com> Date: Fri, 1 Dec 2023 00:25:55 +0800 Subject: [PATCH 10/15] Update docs/how_to/deploy/deploy_models/11-depoly_adreno_tvmc.md Co-authored-by: sparanoid --- docs/how_to/deploy/deploy_models/11-depoly_adreno_tvmc.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/how_to/deploy/deploy_models/11-depoly_adreno_tvmc.md b/docs/how_to/deploy/deploy_models/11-depoly_adreno_tvmc.md index 74b3a9cb..bcf7a111 100644 --- a/docs/how_to/deploy/deploy_models/11-depoly_adreno_tvmc.md +++ b/docs/how_to/deploy/deploy_models/11-depoly_adreno_tvmc.md @@ -14,7 +14,7 @@ title: 使用 tvmc 接口在 Adreno™ 上部署预训练模型 单击 [此处](https://tvm.apache.org/docs/how_to/deploy_models/deploy_model_on_adreno_tvmc.html#sphx-glr-download-how-to-deploy-models-deploy-model-on-adreno-tvmc-py) 下载完整的示例代码 ::: -**作者**: Siva Rama Krishna +**作者**:Siva Rama Krishna 本文是一篇关于在 Adreno™ 上部署预训练 Keras resnet50 模型的逐步教程。 From b1918d69752f34f7592ec38c35eccbeed5eb249f Mon Sep 17 00:00:00 2001 From: Anleeos <2937160075@qq.com> Date: Fri, 1 Dec 2023 00:26:49 +0800 Subject: [PATCH 11/15] Update docs/how_to/deploy/deploy_models/10-depoly_adreno.md Co-authored-by: sparanoid --- docs/how_to/deploy/deploy_models/10-depoly_adreno.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/how_to/deploy/deploy_models/10-depoly_adreno.md b/docs/how_to/deploy/deploy_models/10-depoly_adreno.md index dd523854..347af753 100644 --- a/docs/how_to/deploy/deploy_models/10-depoly_adreno.md +++ b/docs/how_to/deploy/deploy_models/10-depoly_adreno.md @@ -374,7 +374,7 @@ if local_demo: remote = rpc.LocalSession() else: tracker = rpc.connect_tracker(rpc_tracker_host, rpc_tracker_port) - # 运行大模型时, 应该增加 `session_timeout` + # 运行大模型时,应该增加 `session_timeout` remote = tracker.request(key, priority=0, session_timeout=60) if local_demo: From 8ced38074f2938b38d29749191dab4bab7464ce8 Mon Sep 17 00:00:00 2001 From: Anleeos <2937160075@qq.com> Date: Fri, 1 Dec 2023 00:26:54 +0800 Subject: [PATCH 12/15] Update docs/how_to/deploy/deploy_models/11-depoly_adreno_tvmc.md Co-authored-by: sparanoid --- docs/how_to/deploy/deploy_models/11-depoly_adreno_tvmc.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/how_to/deploy/deploy_models/11-depoly_adreno_tvmc.md b/docs/how_to/deploy/deploy_models/11-depoly_adreno_tvmc.md index bcf7a111..0bf76986 100644 --- a/docs/how_to/deploy/deploy_models/11-depoly_adreno_tvmc.md +++ b/docs/how_to/deploy/deploy_models/11-depoly_adreno_tvmc.md @@ -18,7 +18,7 @@ title: 使用 tvmc 接口在 Adreno™ 上部署预训练模型 本文是一篇关于在 Adreno™ 上部署预训练 Keras resnet50 模型的逐步教程。 -此外,您应该已经为 Android 构建了 TVM 。请参阅以下说明,了解如何构建它并设置 RPC 环境。 +此外,您应该已经为 Android 构建了 TVM。请参阅以下说明,了解如何构建它并设置 RPC 环境。 [在 Adreno GPU 上部署](https://tvm.hyper.ai/docs/how_to/deploy/deploy_adreno) From b4a0e9b75de279ba0b684bec5236f8c56a7f736f Mon Sep 17 00:00:00 2001 From: anleeos <2937160075@qq.com> Date: Fri, 1 Dec 2023 15:18:35 +0800 Subject: [PATCH 13/15] delete # 使用 tvmc 接口在 Adreno™ 上部署预训练模型 @@ -72,7 +66,7 @@ model.save(model_file_name) ``` Out: -```info +```info Downloading data from https://storage.googleapis.com/tensorflow/keras-applications/resnet/resnet50_weights_tf_dim_ordering_tf_kernels.h5 8192/102967424 [..............................] - ETA: 0s From acc615ffed1d0b0cb7cbbc0f4b8ec90422fd83d3 Mon Sep 17 00:00:00 2001 From: anleeos <2937160075@qq.com> Date: Fri, 1 Dec 2023 16:12:00 +0800 Subject: [PATCH 14/15] fix broken links --- docs/how_to/deploy/deploy_models/10-depoly_adreno.md | 2 +- docs/how_to/deploy/deploy_models/11-depoly_adreno_tvmc.md | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/docs/how_to/deploy/deploy_models/10-depoly_adreno.md b/docs/how_to/deploy/deploy_models/10-depoly_adreno.md index 347af753..14bf92ea 100644 --- a/docs/how_to/deploy/deploy_models/10-depoly_adreno.md +++ b/docs/how_to/deploy/deploy_models/10-depoly_adreno.md @@ -21,7 +21,7 @@ pip install torchvision 除此之外,您应该已经为 Android 构建了 TVM。请参阅以下说明,了解如何构建它。 -[在 Adreno GPU 上部署](https://tvm.hyper.ai/docs/how_to/deploy/deploy_adreno) +[在 Adreno GPU 上部署](https://tvm.apache.org/docs/v0.13.0/how_to/deploy/adreno.html) 在构建部分之后,构建目录中应该有两个文件:“libtvm_runtime.so” 和 “tvm_rpc”。让我们将它们推送到设备上并运行 TVM RPC 服务器。 diff --git a/docs/how_to/deploy/deploy_models/11-depoly_adreno_tvmc.md b/docs/how_to/deploy/deploy_models/11-depoly_adreno_tvmc.md index c6948258..7fa35681 100644 --- a/docs/how_to/deploy/deploy_models/11-depoly_adreno_tvmc.md +++ b/docs/how_to/deploy/deploy_models/11-depoly_adreno_tvmc.md @@ -401,7 +401,7 @@ def @main(%input_2: Tensor[(1, 224, 224, 3), float32], %v_param_1: Tensor[(7, 7, ``` # 自动调优 -现在,可以使用下面的 api 为任何目标对模型进行自动调优。调整需要 RPC 设置,请参阅[在 Adreno GPU 上部署](https://tvm.hyper.ai/docs/how_to/deploy/deploy_adreno) +现在,可以使用下面的 api 为任何目标对模型进行自动调优。调整需要 RPC 设置,请参阅[在 Adreno GPU 上部署](https://tvm.apache.org/docs/v0.13.0/how_to/deploy/adreno.html) ```python rpc_tracker_host = os.environ.get("TVM_TRACKER_HOST", "127.0.0.1") From b605b02e9395b063e9b543d2113e944ffdb29451 Mon Sep 17 00:00:00 2001 From: anleeos <2937160075@qq.com> Date: Fri, 1 Dec 2023 16:34:23 +0800 Subject: [PATCH 15/15] fix broken link --- docs/how_to/deploy/deploy_models/11-depoly_adreno_tvmc.md | 2 +- docs/how_to/deploy/deploy_models/index.md | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/docs/how_to/deploy/deploy_models/11-depoly_adreno_tvmc.md b/docs/how_to/deploy/deploy_models/11-depoly_adreno_tvmc.md index 7fa35681..16206c81 100644 --- a/docs/how_to/deploy/deploy_models/11-depoly_adreno_tvmc.md +++ b/docs/how_to/deploy/deploy_models/11-depoly_adreno_tvmc.md @@ -14,7 +14,7 @@ title: 使用 tvmc 接口在 Adreno™ 上部署预训练模型 此外,您应该已经为 Android 构建了 TVM。请参阅以下说明,了解如何构建它并设置 RPC 环境。 -[在 Adreno GPU 上部署](https://tvm.hyper.ai/docs/how_to/deploy/deploy_adreno) +[在 Adreno GPU 上部署](https://tvm.apache.org/docs/v0.13.0/how_to/deploy/adreno.html) ```python import os diff --git a/docs/how_to/deploy/deploy_models/index.md b/docs/how_to/deploy/deploy_models/index.md index 60c3eb04..74c8bd59 100644 --- a/docs/how_to/deploy/deploy_models/index.md +++ b/docs/how_to/deploy/deploy_models/index.md @@ -6,9 +6,9 @@ title: 部署深度学习模型 TVM 可将模型部署到各种不同的平台。以下操作指南描述了如何准备模型,并将其部署到多种支持的后端。 +* [在 Adreno™ 上部署预训练模型](https://tvm.apache.org/docs/v0.13.0/how_to/deploy_models/deploy_model_on_adreno.html#sphx-glr-how-to-deploy-models-deploy-model-on-adreno-py) +* [使用 tvmc 接口在 Adreno™ 上部署预训练模型](https://tvm.apache.org/docs/v0.13.0/how_to/deploy_models/deploy_model_on_adreno_tvmc.html#sphx-glr-how-to-deploy-models-deploy-model-on-adreno-tvmc-py) * [在 Android 上部署预训练模型](deploy_android) -* [在 Adreno™ 上部署预训练模型](deploy_adreno) -* [在 Adreno™ 上部署预训练模型](deploy_adreno_tvmc) * [在 Jetson Nano 上部署预训练模型](deploy_nano) * [在树莓派上部署预训练模型](deploy_pi) * [编译 PyTorch 对象检测模型](compile_od)