From 65fe465bb7620f8fc47f0c5283ea3f2de7296d0f Mon Sep 17 00:00:00 2001
From: anleeos <2937160075@qq.com>
Date: Thu, 30 Nov 2023 23:09:13 +0800
Subject: [PATCH 01/15] add files: depoly_adreno_tvmc.md, depoly_adreno.md
---
.../deploy/deploy_models/10-depoly_adreno.md | 0
.../deploy_models/11-depoly_adreno_tvmc.md | 512 ++++++++++++++++++
2 files changed, 512 insertions(+)
create mode 100644 docs/how_to/deploy/deploy_models/10-depoly_adreno.md
create mode 100644 docs/how_to/deploy/deploy_models/11-depoly_adreno_tvmc.md
diff --git a/docs/how_to/deploy/deploy_models/10-depoly_adreno.md b/docs/how_to/deploy/deploy_models/10-depoly_adreno.md
new file mode 100644
index 00000000..e69de29b
diff --git a/docs/how_to/deploy/deploy_models/11-depoly_adreno_tvmc.md b/docs/how_to/deploy/deploy_models/11-depoly_adreno_tvmc.md
new file mode 100644
index 00000000..af9de6f1
--- /dev/null
+++ b/docs/how_to/deploy/deploy_models/11-depoly_adreno_tvmc.md
@@ -0,0 +1,512 @@
+---
+title: 使用 tvmc 接口在 Adreno™ 上部署预训练模型
+---
+
+
+# 使用 tvmc 接口在 Adreno™ 上部署预训练模型
+
+:::note
+单击 [此处](https://tvm.apache.org/docs/how_to/deploy_models/deploy_model_on_adreno_tvmc.html#sphx-glr-download-how-to-deploy-models-deploy-model-on-adreno-tvmc-py) 下载完整的示例代码
+:::
+
+**作者**: Siva Rama Krishna
+
+本文是一篇关于在 Adreno™ 上部署预训练 Keras resnet50 模型的逐步教程。
+
+此外,您应该已经为 Android 构建了 TVM 。请参阅以下说明,了解如何构建它并设置 RPC 环境。
+
+[在 Adreno GPU 上部署](https://tvm.apache.org/docs/how_to/deploy/adreno.html)
+
+```python
+import os
+import tvm
+import numpy as np
+from tvm import relay
+from tvm.driver import tvmc
+from tvm.driver.tvmc.model import TVMCPackage
+from tvm.contrib import utils
+```
+
+# 配置
+在编译以生成纹理之前指定 Adreno 目标以利用内核并获得所有纹理的好处。注意:此生成的示例在我们的 x86 服务器上运行以进行演示。如果在 Android 设备上运行它,我们需要指定其指令集。如果要在实际设备上通过 rpc 运行此教程,请将 `local_demo` 设置为 False。
+
+```python
+local_demo = True
+
+# 默认情况下,将在 CPU 目标上执行。
+# 选择 'llvm'、'opencl' 和 'opencl -device=adreno'
+target = "llvm"
+
+# 更改目标配置。
+# 运行 `adb shell cat /proc/cpuinfo` 以查找架构。
+arch = "arm64"
+target_host = "llvm -mtriple=%s-linux-android" % arch
+
+# 自动调整是计算和耗时的任务,因此默认情况下禁用。
+# 如果需要,请启用它。请启用它。
+is_tuning = False
+tune_log = "adreno-resnet50.log"
+
+# 启用 OpenCLML 加速运算符库。
+enable_clml = False
+cross_compiler = (
+ os.getenv("ANDROID_NDK_HOME", "")
+ + "/toolchains/llvm/prebuilt/linux-x86_64/bin/aarch64-linux-android28-clang"
+)
+```
+
+# 制作 Keras Resnet50 模型
+```python
+from tensorflow.keras.applications.resnet50 import ResNet50
+
+tmp_path = utils.tempdir()
+model_file_name = tmp_path.relpath("resnet50.h5")
+
+model = ResNet50(include_top=True, weights="imagenet", input_shape=(224, 224, 3), classes=1000)
+model.save(model_file_name)
+```
+
+Out:
+```info
+Downloading data from https://storage.googleapis.com/tensorflow/keras-applications/resnet/resnet50_weights_tf_dim_ordering_tf_kernels.h5
+
+ 8192/102967424 [..............................] - ETA: 0s
+ 7208960/102967424 [=>............................] - ETA: 0s
+ 8380416/102967424 [=>............................] - ETA: 1s
+ 16769024/102967424 [===>..........................] - ETA: 1s
+ 23412736/102967424 [=====>........................] - ETA: 1s
+ 25157632/102967424 [======>.......................] - ETA: 1s
+ 33546240/102967424 [========>.....................] - ETA: 1s
+ 40189952/102967424 [==========>...................] - ETA: 1s
+ 41934848/102967424 [===========>..................] - ETA: 1s
+ 50143232/102967424 [=============>................] - ETA: 1s
+ 50323456/102967424 [=============>................] - ETA: 1s
+ 56967168/102967424 [===============>..............] - ETA: 1s
+ 58712064/102967424 [================>.............] - ETA: 1s
+ 65355776/102967424 [==================>...........] - ETA: 0s
+ 67100672/102967424 [==================>...........] - ETA: 0s
+ 69296128/102967424 [===================>..........] - ETA: 0s
+ 71540736/102967424 [===================>..........] - ETA: 0s
+ 73269248/102967424 [====================>.........] - ETA: 0s
+ 75489280/102967424 [====================>.........] - ETA: 0s
+ 83877888/102967424 [=======================>......] - ETA: 0s
+ 90521600/102967424 [=========================>....] - ETA: 0s
+ 92266496/102967424 [=========================>....] - ETA: 0s
+ 99598336/102967424 [============================>.] - ETA: 0s
+100646912/102967424 [============================>.] - ETA: 0s
+102850560/102967424 [============================>.] - ETA: 0s
+102967424/102967424 [==============================] - 3s 0us/step
+```
+
+# 加载模型
+将模型从任何框架转换为 tvm relay 模块。tvmc.load 支持来自任何框架的模型(例如 tensorflow saves_model、onnx、tflite 等),并自动检测文件类型。
+```python
+tvmc_model = tvmc.load(model_file_name)
+
+print(tvmc_model.mod)
+
+
+# tvmc_model 包含 tvmc_mode.mod,即 relay 模块和 tvmc_model.params,即模块的参数。
+```
+
+Out:
+
+```python
+def @main(%input_2: Tensor[(1, 224, 224, 3), float32], %v_param_1: Tensor[(7, 7, 3, 64), float32], %v_param_2: Tensor[(64), float32], %v_param_3: Tensor[(64), float32], %v_param_4: Tensor[(64), float32], %v_param_5: Tensor[(64), float32], %v_param_6: Tensor[(64), float32], %v_param_19: Tensor[(1, 1, 64, 256), float32], %v_param_20: Tensor[(256), float32], %v_param_23: Tensor[(256), float32], %v_param_24: Tensor[(256), float32], %v_param_25: Tensor[(256), float32], %v_param_26: Tensor[(256), float32], %v_param_7: Tensor[(1, 1, 64, 64), float32], %v_param_8: Tensor[(64), float32], %v_param_9: Tensor[(64), float32], %v_param_10: Tensor[(64), float32], %v_param_11: Tensor[(64), float32], %v_param_12: Tensor[(64), float32], %v_param_13: Tensor[(3, 3, 64, 64), float32], %v_param_14: Tensor[(64), float32], %v_param_15: Tensor[(64), float32], %v_param_16: Tensor[(64), float32], %v_param_17: Tensor[(64), float32], %v_param_18: Tensor[(64), float32], %v_param_21: Tensor[(1, 1, 64, 256), float32], %v_param_22: Tensor[(256), float32], %v_param_27: Tensor[(256), float32], %v_param_28: Tensor[(256), float32], %v_param_29: Tensor[(256), float32], %v_param_30: Tensor[(256), float32], %v_param_31: Tensor[(1, 1, 256, 64), float32], %v_param_32: Tensor[(64), float32], %v_param_33: Tensor[(64), float32], %v_param_34: Tensor[(64), float32], %v_param_35: Tensor[(64), float32], %v_param_36: Tensor[(64), float32], %v_param_37: Tensor[(3, 3, 64, 64), float32], %v_param_38: Tensor[(64), float32], %v_param_39: Tensor[(64), float32], %v_param_40: Tensor[(64), float32], %v_param_41: Tensor[(64), float32], %v_param_42: Tensor[(64), float32], %v_param_43: Tensor[(1, 1, 64, 256), float32], %v_param_44: Tensor[(256), float32], %v_param_45: Tensor[(256), float32], %v_param_46: Tensor[(256), float32], %v_param_47: Tensor[(256), float32], %v_param_48: Tensor[(256), float32], %v_param_49: Tensor[(1, 1, 256, 64), float32], %v_param_50: Tensor[(64), float32], %v_param_51: Tensor[(64), float32], %v_param_52: Tensor[(64), float32], %v_param_53: Tensor[(64), float32], %v_param_54: Tensor[(64), float32], %v_param_55: Tensor[(3, 3, 64, 64), float32], %v_param_56: Tensor[(64), float32], %v_param_57: Tensor[(64), float32], %v_param_58: Tensor[(64), float32], %v_param_59: Tensor[(64), float32], %v_param_60: Tensor[(64), float32], %v_param_61: Tensor[(1, 1, 64, 256), float32], %v_param_62: Tensor[(256), float32], %v_param_63: Tensor[(256), float32], %v_param_64: Tensor[(256), float32], %v_param_65: Tensor[(256), float32], %v_param_66: Tensor[(256), float32], %v_param_79: Tensor[(1, 1, 256, 512), float32], %v_param_80: Tensor[(512), float32], %v_param_83: Tensor[(512), float32], %v_param_84: Tensor[(512), float32], %v_param_85: Tensor[(512), float32], %v_param_86: Tensor[(512), float32], %v_param_67: Tensor[(1, 1, 256, 128), float32], %v_param_68: Tensor[(128), float32], %v_param_69: Tensor[(128), float32], %v_param_70: Tensor[(128), float32], %v_param_71: Tensor[(128), float32], %v_param_72: Tensor[(128), float32], %v_param_73: Tensor[(3, 3, 128, 128), float32], %v_param_74: Tensor[(128), float32], %v_param_75: Tensor[(128), float32], %v_param_76: Tensor[(128), float32], %v_param_77: Tensor[(128), float32], %v_param_78: Tensor[(128), float32], %v_param_81: Tensor[(1, 1, 128, 512), float32], %v_param_82: Tensor[(512), float32], %v_param_87: Tensor[(512), float32], %v_param_88: Tensor[(512), float32], %v_param_89: Tensor[(512), float32], %v_param_90: Tensor[(512), float32], %v_param_91: Tensor[(1, 1, 512, 128), float32], %v_param_92: Tensor[(128), float32], %v_param_93: Tensor[(128), float32], %v_param_94: Tensor[(128), float32], %v_param_95: Tensor[(128), float32], %v_param_96: Tensor[(128), float32], %v_param_97: Tensor[(3, 3, 128, 128), float32], %v_param_98: Tensor[(128), float32], %v_param_99: Tensor[(128), float32], %v_param_100: Tensor[(128), float32], %v_param_101: Tensor[(128), float32], %v_param_102: Tensor[(128), float32], %v_param_103: Tensor[(1, 1, 128, 512), float32], %v_param_104: Tensor[(512), float32], %v_param_105: Tensor[(512), float32], %v_param_106: Tensor[(512), float32], %v_param_107: Tensor[(512), float32], %v_param_108: Tensor[(512), float32], %v_param_109: Tensor[(1, 1, 512, 128), float32], %v_param_110: Tensor[(128), float32], %v_param_111: Tensor[(128), float32], %v_param_112: Tensor[(128), float32], %v_param_113: Tensor[(128), float32], %v_param_114: Tensor[(128), float32], %v_param_115: Tensor[(3, 3, 128, 128), float32], %v_param_116: Tensor[(128), float32], %v_param_117: Tensor[(128), float32], %v_param_118: Tensor[(128), float32], %v_param_119: Tensor[(128), float32], %v_param_120: Tensor[(128), float32], %v_param_121: Tensor[(1, 1, 128, 512), float32], %v_param_122: Tensor[(512), float32], %v_param_123: Tensor[(512), float32], %v_param_124: Tensor[(512), float32], %v_param_125: Tensor[(512), float32], %v_param_126: Tensor[(512), float32], %v_param_127: Tensor[(1, 1, 512, 128), float32], %v_param_128: Tensor[(128), float32], %v_param_129: Tensor[(128), float32], %v_param_130: Tensor[(128), float32], %v_param_131: Tensor[(128), float32], %v_param_132: Tensor[(128), float32], %v_param_133: Tensor[(3, 3, 128, 128), float32], %v_param_134: Tensor[(128), float32], %v_param_135: Tensor[(128), float32], %v_param_136: Tensor[(128), float32], %v_param_137: Tensor[(128), float32], %v_param_138: Tensor[(128), float32], %v_param_139: Tensor[(1, 1, 128, 512), float32], %v_param_140: Tensor[(512), float32], %v_param_141: Tensor[(512), float32], %v_param_142: Tensor[(512), float32], %v_param_143: Tensor[(512), float32], %v_param_144: Tensor[(512), float32], %v_param_157: Tensor[(1, 1, 512, 1024), float32], %v_param_158: Tensor[(1024), float32], %v_param_161: Tensor[(1024), float32], %v_param_162: Tensor[(1024), float32], %v_param_163: Tensor[(1024), float32], %v_param_164: Tensor[(1024), float32], %v_param_145: Tensor[(1, 1, 512, 256), float32], %v_param_146: Tensor[(256), float32], %v_param_147: Tensor[(256), float32], %v_param_148: Tensor[(256), float32], %v_param_149: Tensor[(256), float32], %v_param_150: Tensor[(256), float32], %v_param_151: Tensor[(3, 3, 256, 256), float32], %v_param_152: Tensor[(256), float32], %v_param_153: Tensor[(256), float32], %v_param_154: Tensor[(256), float32], %v_param_155: Tensor[(256), float32], %v_param_156: Tensor[(256), float32], %v_param_159: Tensor[(1, 1, 256, 1024), float32], %v_param_160: Tensor[(1024), float32], %v_param_165: Tensor[(1024), float32], %v_param_166: Tensor[(1024), float32], %v_param_167: Tensor[(1024), float32], %v_param_168: Tensor[(1024), float32], %v_param_169: Tensor[(1, 1, 1024, 256), float32], %v_param_170: Tensor[(256), float32], %v_param_171: Tensor[(256), float32], %v_param_172: Tensor[(256), float32], %v_param_173: Tensor[(256), float32], %v_param_174: Tensor[(256), float32], %v_param_175: Tensor[(3, 3, 256, 256), float32], %v_param_176: Tensor[(256), float32], %v_param_177: Tensor[(256), float32], %v_param_178: Tensor[(256), float32], %v_param_179: Tensor[(256), float32], %v_param_180: Tensor[(256), float32], %v_param_181: Tensor[(1, 1, 256, 1024), float32], %v_param_182: Tensor[(1024), float32], %v_param_183: Tensor[(1024), float32], %v_param_184: Tensor[(1024), float32], %v_param_185: Tensor[(1024), float32], %v_param_186: Tensor[(1024), float32], %v_param_187: Tensor[(1, 1, 1024, 256), float32], %v_param_188: Tensor[(256), float32], %v_param_189: Tensor[(256), float32], %v_param_190: Tensor[(256), float32], %v_param_191: Tensor[(256), float32], %v_param_192: Tensor[(256), float32], %v_param_193: Tensor[(3, 3, 256, 256), float32], %v_param_194: Tensor[(256), float32], %v_param_195: Tensor[(256), float32], %v_param_196: Tensor[(256), float32], %v_param_197: Tensor[(256), float32], %v_param_198: Tensor[(256), float32], %v_param_199: Tensor[(1, 1, 256, 1024), float32], %v_param_200: Tensor[(1024), float32], %v_param_201: Tensor[(1024), float32], %v_param_202: Tensor[(1024), float32], %v_param_203: Tensor[(1024), float32], %v_param_204: Tensor[(1024), float32], %v_param_205: Tensor[(1, 1, 1024, 256), float32], %v_param_206: Tensor[(256), float32], %v_param_207: Tensor[(256), float32], %v_param_208: Tensor[(256), float32], %v_param_209: Tensor[(256), float32], %v_param_210: Tensor[(256), float32], %v_param_211: Tensor[(3, 3, 256, 256), float32], %v_param_212: Tensor[(256), float32], %v_param_213: Tensor[(256), float32], %v_param_214: Tensor[(256), float32], %v_param_215: Tensor[(256), float32], %v_param_216: Tensor[(256), float32], %v_param_217: Tensor[(1, 1, 256, 1024), float32], %v_param_218: Tensor[(1024), float32], %v_param_219: Tensor[(1024), float32], %v_param_220: Tensor[(1024), float32], %v_param_221: Tensor[(1024), float32], %v_param_222: Tensor[(1024), float32], %v_param_223: Tensor[(1, 1, 1024, 256), float32], %v_param_224: Tensor[(256), float32], %v_param_225: Tensor[(256), float32], %v_param_226: Tensor[(256), float32], %v_param_227: Tensor[(256), float32], %v_param_228: Tensor[(256), float32], %v_param_229: Tensor[(3, 3, 256, 256), float32], %v_param_230: Tensor[(256), float32], %v_param_231: Tensor[(256), float32], %v_param_232: Tensor[(256), float32], %v_param_233: Tensor[(256), float32], %v_param_234: Tensor[(256), float32], %v_param_235: Tensor[(1, 1, 256, 1024), float32], %v_param_236: Tensor[(1024), float32], %v_param_237: Tensor[(1024), float32], %v_param_238: Tensor[(1024), float32], %v_param_239: Tensor[(1024), float32], %v_param_240: Tensor[(1024), float32], %v_param_241: Tensor[(1, 1, 1024, 256), float32], %v_param_242: Tensor[(256), float32], %v_param_243: Tensor[(256), float32], %v_param_244: Tensor[(256), float32], %v_param_245: Tensor[(256), float32], %v_param_246: Tensor[(256), float32], %v_param_247: Tensor[(3, 3, 256, 256), float32], %v_param_248: Tensor[(256), float32], %v_param_249: Tensor[(256), float32], %v_param_250: Tensor[(256), float32], %v_param_251: Tensor[(256), float32], %v_param_252: Tensor[(256), float32], %v_param_253: Tensor[(1, 1, 256, 1024), float32], %v_param_254: Tensor[(1024), float32], %v_param_255: Tensor[(1024), float32], %v_param_256: Tensor[(1024), float32], %v_param_257: Tensor[(1024), float32], %v_param_258: Tensor[(1024), float32], %v_param_271: Tensor[(1, 1, 1024, 2048), float32], %v_param_272: Tensor[(2048), float32], %v_param_275: Tensor[(2048), float32], %v_param_276: Tensor[(2048), float32], %v_param_277: Tensor[(2048), float32], %v_param_278: Tensor[(2048), float32], %v_param_259: Tensor[(1, 1, 1024, 512), float32], %v_param_260: Tensor[(512), float32], %v_param_261: Tensor[(512), float32], %v_param_262: Tensor[(512), float32], %v_param_263: Tensor[(512), float32], %v_param_264: Tensor[(512), float32], %v_param_265: Tensor[(3, 3, 512, 512), float32], %v_param_266: Tensor[(512), float32], %v_param_267: Tensor[(512), float32], %v_param_268: Tensor[(512), float32], %v_param_269: Tensor[(512), float32], %v_param_270: Tensor[(512), float32], %v_param_273: Tensor[(1, 1, 512, 2048), float32], %v_param_274: Tensor[(2048), float32], %v_param_279: Tensor[(2048), float32], %v_param_280: Tensor[(2048), float32], %v_param_281: Tensor[(2048), float32], %v_param_282: Tensor[(2048), float32], %v_param_283: Tensor[(1, 1, 2048, 512), float32], %v_param_284: Tensor[(512), float32], %v_param_285: Tensor[(512), float32], %v_param_286: Tensor[(512), float32], %v_param_287: Tensor[(512), float32], %v_param_288: Tensor[(512), float32], %v_param_289: Tensor[(3, 3, 512, 512), float32], %v_param_290: Tensor[(512), float32], %v_param_291: Tensor[(512), float32], %v_param_292: Tensor[(512), float32], %v_param_293: Tensor[(512), float32], %v_param_294: Tensor[(512), float32], %v_param_295: Tensor[(1, 1, 512, 2048), float32], %v_param_296: Tensor[(2048), float32], %v_param_297: Tensor[(2048), float32], %v_param_298: Tensor[(2048), float32], %v_param_299: Tensor[(2048), float32], %v_param_300: Tensor[(2048), float32], %v_param_301: Tensor[(1, 1, 2048, 512), float32], %v_param_302: Tensor[(512), float32], %v_param_303: Tensor[(512), float32], %v_param_304: Tensor[(512), float32], %v_param_305: Tensor[(512), float32], %v_param_306: Tensor[(512), float32], %v_param_307: Tensor[(3, 3, 512, 512), float32], %v_param_308: Tensor[(512), float32], %v_param_309: Tensor[(512), float32], %v_param_310: Tensor[(512), float32], %v_param_311: Tensor[(512), float32], %v_param_312: Tensor[(512), float32], %v_param_313: Tensor[(1, 1, 512, 2048), float32], %v_param_314: Tensor[(2048), float32], %v_param_315: Tensor[(2048), float32], %v_param_316: Tensor[(2048), float32], %v_param_317: Tensor[(2048), float32], %v_param_318: Tensor[(2048), float32], %v_param_319: Tensor[(1000, 2048), float32], %v_param_320: Tensor[(1000), float32]) {
+ %0 = nn.pad(%input_2, 0, pad_width=[[0, 0], [3, 3], [3, 3], [0, 0]]);
+ %1 = nn.conv2d(%0, %v_param_1, strides=[2, 2], padding=[0, 0, 0, 0], channels=64, kernel_size=[7, 7], data_layout="NHWC", kernel_layout="HWIO");
+ %2 = nn.bias_add(%1, %v_param_2, axis=-1);
+ %3 = nn.batch_norm(%2, %v_param_3, %v_param_4, %v_param_5, %v_param_6, axis=3, epsilon=1.001e-05f);
+ %4 = %3.0;
+ %5 = nn.relu(%4);
+ %6 = nn.pad(%5, 0, pad_width=[[0, 0], [1, 1], [1, 1], [0, 0]]);
+ %7 = nn.max_pool2d(%6, pool_size=[3, 3], strides=[2, 2], padding=[0, 0, 0, 0], layout="NHWC");
+ %8 = nn.conv2d(%7, %v_param_19, padding=[0, 0, 0, 0], channels=256, kernel_size=[1, 1], data_layout="NHWC", kernel_layout="HWIO");
+ %9 = nn.bias_add(%8, %v_param_20, axis=-1);
+ %10 = nn.batch_norm(%9, %v_param_23, %v_param_24, %v_param_25, %v_param_26, axis=3, epsilon=1.001e-05f);
+ %11 = nn.conv2d(%7, %v_param_7, padding=[0, 0, 0, 0], channels=64, kernel_size=[1, 1], data_layout="NHWC", kernel_layout="HWIO");
+ %12 = nn.bias_add(%11, %v_param_8, axis=-1);
+ %13 = nn.batch_norm(%12, %v_param_9, %v_param_10, %v_param_11, %v_param_12, axis=3, epsilon=1.001e-05f);
+ %14 = %13.0;
+ %15 = nn.relu(%14);
+ %16 = nn.conv2d(%15, %v_param_13, padding=[1i64, 1i64, 1i64, 1i64], channels=64, kernel_size=[3, 3], data_layout="NHWC", kernel_layout="HWIO");
+ %17 = nn.bias_add(%16, %v_param_14, axis=-1);
+ %18 = nn.batch_norm(%17, %v_param_15, %v_param_16, %v_param_17, %v_param_18, axis=3, epsilon=1.001e-05f);
+ %19 = %18.0;
+ %20 = nn.relu(%19);
+ %21 = nn.conv2d(%20, %v_param_21, padding=[0, 0, 0, 0], channels=256, kernel_size=[1, 1], data_layout="NHWC", kernel_layout="HWIO");
+ %22 = nn.bias_add(%21, %v_param_22, axis=-1);
+ %23 = nn.batch_norm(%22, %v_param_27, %v_param_28, %v_param_29, %v_param_30, axis=3, epsilon=1.001e-05f);
+ %24 = %10.0;
+ %25 = %23.0;
+ %26 = add(%24, %25);
+ %27 = nn.relu(%26);
+ %28 = nn.conv2d(%27, %v_param_31, padding=[0, 0, 0, 0], channels=64, kernel_size=[1, 1], data_layout="NHWC", kernel_layout="HWIO");
+ %29 = nn.bias_add(%28, %v_param_32, axis=-1);
+ %30 = nn.batch_norm(%29, %v_param_33, %v_param_34, %v_param_35, %v_param_36, axis=3, epsilon=1.001e-05f);
+ %31 = %30.0;
+ %32 = nn.relu(%31);
+ %33 = nn.conv2d(%32, %v_param_37, padding=[1i64, 1i64, 1i64, 1i64], channels=64, kernel_size=[3, 3], data_layout="NHWC", kernel_layout="HWIO");
+ %34 = nn.bias_add(%33, %v_param_38, axis=-1);
+ %35 = nn.batch_norm(%34, %v_param_39, %v_param_40, %v_param_41, %v_param_42, axis=3, epsilon=1.001e-05f);
+ %36 = %35.0;
+ %37 = nn.relu(%36);
+ %38 = nn.conv2d(%37, %v_param_43, padding=[0, 0, 0, 0], channels=256, kernel_size=[1, 1], data_layout="NHWC", kernel_layout="HWIO");
+ %39 = nn.bias_add(%38, %v_param_44, axis=-1);
+ %40 = nn.batch_norm(%39, %v_param_45, %v_param_46, %v_param_47, %v_param_48, axis=3, epsilon=1.001e-05f);
+ %41 = %40.0;
+ %42 = add(%27, %41);
+ %43 = nn.relu(%42);
+ %44 = nn.conv2d(%43, %v_param_49, padding=[0, 0, 0, 0], channels=64, kernel_size=[1, 1], data_layout="NHWC", kernel_layout="HWIO");
+ %45 = nn.bias_add(%44, %v_param_50, axis=-1);
+ %46 = nn.batch_norm(%45, %v_param_51, %v_param_52, %v_param_53, %v_param_54, axis=3, epsilon=1.001e-05f);
+ %47 = %46.0;
+ %48 = nn.relu(%47);
+ %49 = nn.conv2d(%48, %v_param_55, padding=[1i64, 1i64, 1i64, 1i64], channels=64, kernel_size=[3, 3], data_layout="NHWC", kernel_layout="HWIO");
+ %50 = nn.bias_add(%49, %v_param_56, axis=-1);
+ %51 = nn.batch_norm(%50, %v_param_57, %v_param_58, %v_param_59, %v_param_60, axis=3, epsilon=1.001e-05f);
+ %52 = %51.0;
+ %53 = nn.relu(%52);
+ %54 = nn.conv2d(%53, %v_param_61, padding=[0, 0, 0, 0], channels=256, kernel_size=[1, 1], data_layout="NHWC", kernel_layout="HWIO");
+ %55 = nn.bias_add(%54, %v_param_62, axis=-1);
+ %56 = nn.batch_norm(%55, %v_param_63, %v_param_64, %v_param_65, %v_param_66, axis=3, epsilon=1.001e-05f);
+ %57 = %56.0;
+ %58 = add(%43, %57);
+ %59 = nn.relu(%58);
+ %60 = nn.conv2d(%59, %v_param_79, strides=[2, 2], padding=[0, 0, 0, 0], channels=512, kernel_size=[1, 1], data_layout="NHWC", kernel_layout="HWIO");
+ %61 = nn.bias_add(%60, %v_param_80, axis=-1);
+ %62 = nn.batch_norm(%61, %v_param_83, %v_param_84, %v_param_85, %v_param_86, axis=3, epsilon=1.001e-05f);
+ %63 = nn.conv2d(%59, %v_param_67, strides=[2, 2], padding=[0, 0, 0, 0], channels=128, kernel_size=[1, 1], data_layout="NHWC", kernel_layout="HWIO");
+ %64 = nn.bias_add(%63, %v_param_68, axis=-1);
+ %65 = nn.batch_norm(%64, %v_param_69, %v_param_70, %v_param_71, %v_param_72, axis=3, epsilon=1.001e-05f);
+ %66 = %65.0;
+ %67 = nn.relu(%66);
+ %68 = nn.conv2d(%67, %v_param_73, padding=[1i64, 1i64, 1i64, 1i64], channels=128, kernel_size=[3, 3], data_layout="NHWC", kernel_layout="HWIO");
+ %69 = nn.bias_add(%68, %v_param_74, axis=-1);
+ %70 = nn.batch_norm(%69, %v_param_75, %v_param_76, %v_param_77, %v_param_78, axis=3, epsilon=1.001e-05f);
+ %71 = %70.0;
+ %72 = nn.relu(%71);
+ %73 = nn.conv2d(%72, %v_param_81, padding=[0, 0, 0, 0], channels=512, kernel_size=[1, 1], data_layout="NHWC", kernel_layout="HWIO");
+ %74 = nn.bias_add(%73, %v_param_82, axis=-1);
+ %75 = nn.batch_norm(%74, %v_param_87, %v_param_88, %v_param_89, %v_param_90, axis=3, epsilon=1.001e-05f);
+ %76 = %62.0;
+ %77 = %75.0;
+ %78 = add(%76, %77);
+ %79 = nn.relu(%78);
+ %80 = nn.conv2d(%79, %v_param_91, padding=[0, 0, 0, 0], channels=128, kernel_size=[1, 1], data_layout="NHWC", kernel_layout="HWIO");
+ %81 = nn.bias_add(%80, %v_param_92, axis=-1);
+ %82 = nn.batch_norm(%81, %v_param_93, %v_param_94, %v_param_95, %v_param_96, axis=3, epsilon=1.001e-05f);
+ %83 = %82.0;
+ %84 = nn.relu(%83);
+ %85 = nn.conv2d(%84, %v_param_97, padding=[1i64, 1i64, 1i64, 1i64], channels=128, kernel_size=[3, 3], data_layout="NHWC", kernel_layout="HWIO");
+ %86 = nn.bias_add(%85, %v_param_98, axis=-1);
+ %87 = nn.batch_norm(%86, %v_param_99, %v_param_100, %v_param_101, %v_param_102, axis=3, epsilon=1.001e-05f);
+ %88 = %87.0;
+ %89 = nn.relu(%88);
+ %90 = nn.conv2d(%89, %v_param_103, padding=[0, 0, 0, 0], channels=512, kernel_size=[1, 1], data_layout="NHWC", kernel_layout="HWIO");
+ %91 = nn.bias_add(%90, %v_param_104, axis=-1);
+ %92 = nn.batch_norm(%91, %v_param_105, %v_param_106, %v_param_107, %v_param_108, axis=3, epsilon=1.001e-05f);
+ %93 = %92.0;
+ %94 = add(%79, %93);
+ %95 = nn.relu(%94);
+ %96 = nn.conv2d(%95, %v_param_109, padding=[0, 0, 0, 0], channels=128, kernel_size=[1, 1], data_layout="NHWC", kernel_layout="HWIO");
+ %97 = nn.bias_add(%96, %v_param_110, axis=-1);
+ %98 = nn.batch_norm(%97, %v_param_111, %v_param_112, %v_param_113, %v_param_114, axis=3, epsilon=1.001e-05f);
+ %99 = %98.0;
+ %100 = nn.relu(%99);
+ %101 = nn.conv2d(%100, %v_param_115, padding=[1i64, 1i64, 1i64, 1i64], channels=128, kernel_size=[3, 3], data_layout="NHWC", kernel_layout="HWIO");
+ %102 = nn.bias_add(%101, %v_param_116, axis=-1);
+ %103 = nn.batch_norm(%102, %v_param_117, %v_param_118, %v_param_119, %v_param_120, axis=3, epsilon=1.001e-05f);
+ %104 = %103.0;
+ %105 = nn.relu(%104);
+ %106 = nn.conv2d(%105, %v_param_121, padding=[0, 0, 0, 0], channels=512, kernel_size=[1, 1], data_layout="NHWC", kernel_layout="HWIO");
+ %107 = nn.bias_add(%106, %v_param_122, axis=-1);
+ %108 = nn.batch_norm(%107, %v_param_123, %v_param_124, %v_param_125, %v_param_126, axis=3, epsilon=1.001e-05f);
+ %109 = %108.0;
+ %110 = add(%95, %109);
+ %111 = nn.relu(%110);
+ %112 = nn.conv2d(%111, %v_param_127, padding=[0, 0, 0, 0], channels=128, kernel_size=[1, 1], data_layout="NHWC", kernel_layout="HWIO");
+ %113 = nn.bias_add(%112, %v_param_128, axis=-1);
+ %114 = nn.batch_norm(%113, %v_param_129, %v_param_130, %v_param_131, %v_param_132, axis=3, epsilon=1.001e-05f);
+ %115 = %114.0;
+ %116 = nn.relu(%115);
+ %117 = nn.conv2d(%116, %v_param_133, padding=[1i64, 1i64, 1i64, 1i64], channels=128, kernel_size=[3, 3], data_layout="NHWC", kernel_layout="HWIO");
+ %118 = nn.bias_add(%117, %v_param_134, axis=-1);
+ %119 = nn.batch_norm(%118, %v_param_135, %v_param_136, %v_param_137, %v_param_138, axis=3, epsilon=1.001e-05f);
+ %120 = %119.0;
+ %121 = nn.relu(%120);
+ %122 = nn.conv2d(%121, %v_param_139, padding=[0, 0, 0, 0], channels=512, kernel_size=[1, 1], data_layout="NHWC", kernel_layout="HWIO");
+ %123 = nn.bias_add(%122, %v_param_140, axis=-1);
+ %124 = nn.batch_norm(%123, %v_param_141, %v_param_142, %v_param_143, %v_param_144, axis=3, epsilon=1.001e-05f);
+ %125 = %124.0;
+ %126 = add(%111, %125);
+ %127 = nn.relu(%126);
+ %128 = nn.conv2d(%127, %v_param_157, strides=[2, 2], padding=[0, 0, 0, 0], channels=1024, kernel_size=[1, 1], data_layout="NHWC", kernel_layout="HWIO");
+ %129 = nn.bias_add(%128, %v_param_158, axis=-1);
+ %130 = nn.batch_norm(%129, %v_param_161, %v_param_162, %v_param_163, %v_param_164, axis=3, epsilon=1.001e-05f);
+ %131 = nn.conv2d(%127, %v_param_145, strides=[2, 2], padding=[0, 0, 0, 0], channels=256, kernel_size=[1, 1], data_layout="NHWC", kernel_layout="HWIO");
+ %132 = nn.bias_add(%131, %v_param_146, axis=-1);
+ %133 = nn.batch_norm(%132, %v_param_147, %v_param_148, %v_param_149, %v_param_150, axis=3, epsilon=1.001e-05f);
+ %134 = %133.0;
+ %135 = nn.relu(%134);
+ %136 = nn.conv2d(%135, %v_param_151, padding=[1i64, 1i64, 1i64, 1i64], channels=256, kernel_size=[3, 3], data_layout="NHWC", kernel_layout="HWIO");
+ %137 = nn.bias_add(%136, %v_param_152, axis=-1);
+ %138 = nn.batch_norm(%137, %v_param_153, %v_param_154, %v_param_155, %v_param_156, axis=3, epsilon=1.001e-05f);
+ %139 = %138.0;
+ %140 = nn.relu(%139);
+ %141 = nn.conv2d(%140, %v_param_159, padding=[0, 0, 0, 0], channels=1024, kernel_size=[1, 1], data_layout="NHWC", kernel_layout="HWIO");
+ %142 = nn.bias_add(%141, %v_param_160, axis=-1);
+ %143 = nn.batch_norm(%142, %v_param_165, %v_param_166, %v_param_167, %v_param_168, axis=3, epsilon=1.001e-05f);
+ %144 = %130.0;
+ %145 = %143.0;
+ %146 = add(%144, %145);
+ %147 = nn.relu(%146);
+ %148 = nn.conv2d(%147, %v_param_169, padding=[0, 0, 0, 0], channels=256, kernel_size=[1, 1], data_layout="NHWC", kernel_layout="HWIO");
+ %149 = nn.bias_add(%148, %v_param_170, axis=-1);
+ %150 = nn.batch_norm(%149, %v_param_171, %v_param_172, %v_param_173, %v_param_174, axis=3, epsilon=1.001e-05f);
+ %151 = %150.0;
+ %152 = nn.relu(%151);
+ %153 = nn.conv2d(%152, %v_param_175, padding=[1i64, 1i64, 1i64, 1i64], channels=256, kernel_size=[3, 3], data_layout="NHWC", kernel_layout="HWIO");
+ %154 = nn.bias_add(%153, %v_param_176, axis=-1);
+ %155 = nn.batch_norm(%154, %v_param_177, %v_param_178, %v_param_179, %v_param_180, axis=3, epsilon=1.001e-05f);
+ %156 = %155.0;
+ %157 = nn.relu(%156);
+ %158 = nn.conv2d(%157, %v_param_181, padding=[0, 0, 0, 0], channels=1024, kernel_size=[1, 1], data_layout="NHWC", kernel_layout="HWIO");
+ %159 = nn.bias_add(%158, %v_param_182, axis=-1);
+ %160 = nn.batch_norm(%159, %v_param_183, %v_param_184, %v_param_185, %v_param_186, axis=3, epsilon=1.001e-05f);
+ %161 = %160.0;
+ %162 = add(%147, %161);
+ %163 = nn.relu(%162);
+ %164 = nn.conv2d(%163, %v_param_187, padding=[0, 0, 0, 0], channels=256, kernel_size=[1, 1], data_layout="NHWC", kernel_layout="HWIO");
+ %165 = nn.bias_add(%164, %v_param_188, axis=-1);
+ %166 = nn.batch_norm(%165, %v_param_189, %v_param_190, %v_param_191, %v_param_192, axis=3, epsilon=1.001e-05f);
+ %167 = %166.0;
+ %168 = nn.relu(%167);
+ %169 = nn.conv2d(%168, %v_param_193, padding=[1i64, 1i64, 1i64, 1i64], channels=256, kernel_size=[3, 3], data_layout="NHWC", kernel_layout="HWIO");
+ %170 = nn.bias_add(%169, %v_param_194, axis=-1);
+ %171 = nn.batch_norm(%170, %v_param_195, %v_param_196, %v_param_197, %v_param_198, axis=3, epsilon=1.001e-05f);
+ %172 = %171.0;
+ %173 = nn.relu(%172);
+ %174 = nn.conv2d(%173, %v_param_199, padding=[0, 0, 0, 0], channels=1024, kernel_size=[1, 1], data_layout="NHWC", kernel_layout="HWIO");
+ %175 = nn.bias_add(%174, %v_param_200, axis=-1);
+ %176 = nn.batch_norm(%175, %v_param_201, %v_param_202, %v_param_203, %v_param_204, axis=3, epsilon=1.001e-05f);
+ %177 = %176.0;
+ %178 = add(%163, %177);
+ %179 = nn.relu(%178);
+ %180 = nn.conv2d(%179, %v_param_205, padding=[0, 0, 0, 0], channels=256, kernel_size=[1, 1], data_layout="NHWC", kernel_layout="HWIO");
+ %181 = nn.bias_add(%180, %v_param_206, axis=-1);
+ %182 = nn.batch_norm(%181, %v_param_207, %v_param_208, %v_param_209, %v_param_210, axis=3, epsilon=1.001e-05f);
+ %183 = %182.0;
+ %184 = nn.relu(%183);
+ %185 = nn.conv2d(%184, %v_param_211, padding=[1i64, 1i64, 1i64, 1i64], channels=256, kernel_size=[3, 3], data_layout="NHWC", kernel_layout="HWIO");
+ %186 = nn.bias_add(%185, %v_param_212, axis=-1);
+ %187 = nn.batch_norm(%186, %v_param_213, %v_param_214, %v_param_215, %v_param_216, axis=3, epsilon=1.001e-05f);
+ %188 = %187.0;
+ %189 = nn.relu(%188);
+ %190 = nn.conv2d(%189, %v_param_217, padding=[0, 0, 0, 0], channels=1024, kernel_size=[1, 1], data_layout="NHWC", kernel_layout="HWIO");
+ %191 = nn.bias_add(%190, %v_param_218, axis=-1);
+ %192 = nn.batch_norm(%191, %v_param_219, %v_param_220, %v_param_221, %v_param_222, axis=3, epsilon=1.001e-05f);
+ %193 = %192.0;
+ %194 = add(%179, %193);
+ %195 = nn.relu(%194);
+ %196 = nn.conv2d(%195, %v_param_223, padding=[0, 0, 0, 0], channels=256, kernel_size=[1, 1], data_layout="NHWC", kernel_layout="HWIO");
+ %197 = nn.bias_add(%196, %v_param_224, axis=-1);
+ %198 = nn.batch_norm(%197, %v_param_225, %v_param_226, %v_param_227, %v_param_228, axis=3, epsilon=1.001e-05f);
+ %199 = %198.0;
+ %200 = nn.relu(%199);
+ %201 = nn.conv2d(%200, %v_param_229, padding=[1i64, 1i64, 1i64, 1i64], channels=256, kernel_size=[3, 3], data_layout="NHWC", kernel_layout="HWIO");
+ %202 = nn.bias_add(%201, %v_param_230, axis=-1);
+ %203 = nn.batch_norm(%202, %v_param_231, %v_param_232, %v_param_233, %v_param_234, axis=3, epsilon=1.001e-05f);
+ %204 = %203.0;
+ %205 = nn.relu(%204);
+ %206 = nn.conv2d(%205, %v_param_235, padding=[0, 0, 0, 0], channels=1024, kernel_size=[1, 1], data_layout="NHWC", kernel_layout="HWIO");
+ %207 = nn.bias_add(%206, %v_param_236, axis=-1);
+ %208 = nn.batch_norm(%207, %v_param_237, %v_param_238, %v_param_239, %v_param_240, axis=3, epsilon=1.001e-05f);
+ %209 = %208.0;
+ %210 = add(%195, %209);
+ %211 = nn.relu(%210);
+ %212 = nn.conv2d(%211, %v_param_241, padding=[0, 0, 0, 0], channels=256, kernel_size=[1, 1], data_layout="NHWC", kernel_layout="HWIO");
+ %213 = nn.bias_add(%212, %v_param_242, axis=-1);
+ %214 = nn.batch_norm(%213, %v_param_243, %v_param_244, %v_param_245, %v_param_246, axis=3, epsilon=1.001e-05f);
+ %215 = %214.0;
+ %216 = nn.relu(%215);
+ %217 = nn.conv2d(%216, %v_param_247, padding=[1i64, 1i64, 1i64, 1i64], channels=256, kernel_size=[3, 3], data_layout="NHWC", kernel_layout="HWIO");
+ %218 = nn.bias_add(%217, %v_param_248, axis=-1);
+ %219 = nn.batch_norm(%218, %v_param_249, %v_param_250, %v_param_251, %v_param_252, axis=3, epsilon=1.001e-05f);
+ %220 = %219.0;
+ %221 = nn.relu(%220);
+ %222 = nn.conv2d(%221, %v_param_253, padding=[0, 0, 0, 0], channels=1024, kernel_size=[1, 1], data_layout="NHWC", kernel_layout="HWIO");
+ %223 = nn.bias_add(%222, %v_param_254, axis=-1);
+ %224 = nn.batch_norm(%223, %v_param_255, %v_param_256, %v_param_257, %v_param_258, axis=3, epsilon=1.001e-05f);
+ %225 = %224.0;
+ %226 = add(%211, %225);
+ %227 = nn.relu(%226);
+ %228 = nn.conv2d(%227, %v_param_271, strides=[2, 2], padding=[0, 0, 0, 0], channels=2048, kernel_size=[1, 1], data_layout="NHWC", kernel_layout="HWIO");
+ %229 = nn.bias_add(%228, %v_param_272, axis=-1);
+ %230 = nn.batch_norm(%229, %v_param_275, %v_param_276, %v_param_277, %v_param_278, axis=3, epsilon=1.001e-05f);
+ %231 = nn.conv2d(%227, %v_param_259, strides=[2, 2], padding=[0, 0, 0, 0], channels=512, kernel_size=[1, 1], data_layout="NHWC", kernel_layout="HWIO");
+ %232 = nn.bias_add(%231, %v_param_260, axis=-1);
+ %233 = nn.batch_norm(%232, %v_param_261, %v_param_262, %v_param_263, %v_param_264, axis=3, epsilon=1.001e-05f);
+ %234 = %233.0;
+ %235 = nn.relu(%234);
+ %236 = nn.conv2d(%235, %v_param_265, padding=[1i64, 1i64, 1i64, 1i64], channels=512, kernel_size=[3, 3], data_layout="NHWC", kernel_layout="HWIO");
+ %237 = nn.bias_add(%236, %v_param_266, axis=-1);
+ %238 = nn.batch_norm(%237, %v_param_267, %v_param_268, %v_param_269, %v_param_270, axis=3, epsilon=1.001e-05f);
+ %239 = %238.0;
+ %240 = nn.relu(%239);
+ %241 = nn.conv2d(%240, %v_param_273, padding=[0, 0, 0, 0], channels=2048, kernel_size=[1, 1], data_layout="NHWC", kernel_layout="HWIO");
+ %242 = nn.bias_add(%241, %v_param_274, axis=-1);
+ %243 = nn.batch_norm(%242, %v_param_279, %v_param_280, %v_param_281, %v_param_282, axis=3, epsilon=1.001e-05f);
+ %244 = %230.0;
+ %245 = %243.0;
+ %246 = add(%244, %245);
+ %247 = nn.relu(%246);
+ %248 = nn.conv2d(%247, %v_param_283, padding=[0, 0, 0, 0], channels=512, kernel_size=[1, 1], data_layout="NHWC", kernel_layout="HWIO");
+ %249 = nn.bias_add(%248, %v_param_284, axis=-1);
+ %250 = nn.batch_norm(%249, %v_param_285, %v_param_286, %v_param_287, %v_param_288, axis=3, epsilon=1.001e-05f);
+ %251 = %250.0;
+ %252 = nn.relu(%251);
+ %253 = nn.conv2d(%252, %v_param_289, padding=[1i64, 1i64, 1i64, 1i64], channels=512, kernel_size=[3, 3], data_layout="NHWC", kernel_layout="HWIO");
+ %254 = nn.bias_add(%253, %v_param_290, axis=-1);
+ %255 = nn.batch_norm(%254, %v_param_291, %v_param_292, %v_param_293, %v_param_294, axis=3, epsilon=1.001e-05f);
+ %256 = %255.0;
+ %257 = nn.relu(%256);
+ %258 = nn.conv2d(%257, %v_param_295, padding=[0, 0, 0, 0], channels=2048, kernel_size=[1, 1], data_layout="NHWC", kernel_layout="HWIO");
+ %259 = nn.bias_add(%258, %v_param_296, axis=-1);
+ %260 = nn.batch_norm(%259, %v_param_297, %v_param_298, %v_param_299, %v_param_300, axis=3, epsilon=1.001e-05f);
+ %261 = %260.0;
+ %262 = add(%247, %261);
+ %263 = nn.relu(%262);
+ %264 = nn.conv2d(%263, %v_param_301, padding=[0, 0, 0, 0], channels=512, kernel_size=[1, 1], data_layout="NHWC", kernel_layout="HWIO");
+ %265 = nn.bias_add(%264, %v_param_302, axis=-1);
+ %266 = nn.batch_norm(%265, %v_param_303, %v_param_304, %v_param_305, %v_param_306, axis=3, epsilon=1.001e-05f);
+ %267 = %266.0;
+ %268 = nn.relu(%267);
+ %269 = nn.conv2d(%268, %v_param_307, padding=[1i64, 1i64, 1i64, 1i64], channels=512, kernel_size=[3, 3], data_layout="NHWC", kernel_layout="HWIO");
+ %270 = nn.bias_add(%269, %v_param_308, axis=-1);
+ %271 = nn.batch_norm(%270, %v_param_309, %v_param_310, %v_param_311, %v_param_312, axis=3, epsilon=1.001e-05f);
+ %272 = %271.0;
+ %273 = nn.relu(%272);
+ %274 = nn.conv2d(%273, %v_param_313, padding=[0, 0, 0, 0], channels=2048, kernel_size=[1, 1], data_layout="NHWC", kernel_layout="HWIO");
+ %275 = nn.bias_add(%274, %v_param_314, axis=-1);
+ %276 = nn.batch_norm(%275, %v_param_315, %v_param_316, %v_param_317, %v_param_318, axis=3, epsilon=1.001e-05f);
+ %277 = %276.0;
+ %278 = add(%263, %277);
+ %279 = nn.relu(%278);
+ %280 = nn.global_avg_pool2d(%279, layout="NHWC");
+ %281 = nn.batch_flatten(%280);
+ %282 = nn.dense(%281, %v_param_319, units=1000);
+ %283 = nn.bias_add(%282, %v_param_320);
+ nn.softmax(%283)
+}
+```
+
+# 自动调优
+现在,可以使用下面的 api 为任何目标对模型进行自动调优。调整需要 RPC 设置,请参阅[在 Adreno GPU 上部署](https://tvm.apache.org/docs/how_to/deploy/adreno.html)
+
+```python
+rpc_tracker_host = os.environ.get("TVM_TRACKER_HOST", "127.0.0.1")
+rpc_tracker_port = int(os.environ.get("TVM_TRACKER_PORT", 9190))
+rpc_key = "android"
+rpc_tracker = rpc_tracker_host + ":" + str(rpc_tracker_port)
+
+# 自动调整是计算密集型和耗时的任务。
+# 它在上述配置中被设置为 False,因为此脚本在 x86 上运行以进行演示。
+# 请将 :code:`is_tuning` 设置为 True 以启用自动调整。
+
+# 此外,:code:`test_target` 设置为 :code:`llvm`,因为此示例以使其与 x86 演示兼容。
+# 请在上述配置中将其更改为 :code:`opencl` 或 :code:`opencl -device=adreno` 以用于 RPC 目标。
+
+if is_tuning:
+ tvmc.tune(
+ tvmc_model,
+ target=target,
+ tuning_records=tune_log,
+ target_host=target_host,
+ hostname=rpc_tracker_host,
+ port=rpc_tracker_port,
+ rpc_key=rpc_key,
+ tuner="xgb",
+ repeat=30,
+ trials=3,
+ early_stopping=0,
+ )
+```
+
+# 编译
+编译以生成 tvm 产品
+
+```python
+# 此生成的示例在我们的 x86 服务器上运行以进行演示。
+# 要在真实目标上的 RPC 上部署和调优,请在上述配置部分将 :code:`local_demo` 设置为 False。
+
+# OpenCLML 卸载将尝试通过使用 OpenCLML 专有运算符库加速受支持的运算符。
+# 默认情况下,在上述配置部分 :code:`enable_clml` 设置为 False。
+
+if not enable_clml:
+ if local_demo:
+ tvmc_package = tvmc.compile(
+ tvmc_model,
+ target=target,
+ )
+ else:
+ tvmc_package = tvmc.compile(
+ tvmc_model,
+ target=target,
+ target_host=target_host,
+ cross=cross_compiler,
+ tuning_records=tune_log,
+ )
+else:
+ # 或者,我们可以保存编译输出并将其保存为 TVMCPackage。
+ # 这种方式避免了再次编译时加载编译的模块。
+ target = target + ", clml"
+ pkg_path = tmp_path.relpath("keras-resnet50.tar")
+ tvmc.compile(
+ tvmc_model,
+ target=target,
+ target_host=target_host,
+ cross=cross_compiler,
+ tuning_records=tune_log,
+ package_path=pkg_path,
+ )
+
+ # 加载已编译的包
+ tvmc_package = TVMCPackage(package_path=pkg_path)
+
+# tvmc_package 包括 tvmc_package.lib_path, tvmc_package.graph, tvmc_package.params
+# 已保存的 TVMPackage 实际上是 mod.so、mod.json 和 mod.params的 tar 存档。
+```
+
+# 部署和运行
+通过让 tvmc 使用随机数据填充输入在 RPC 上部署和运行已编译的模型。
+
+```python
+# 在 RPC 设置上运行
+if local_demo:
+ result = tvmc.run(tvmc_package, device="cpu", fill_mode="random")
+else:
+ result = tvmc.run(
+ tvmc_package,
+ device="cl",
+ rpc_key=rpc_key,
+ hostname=rpc_tracker_host,
+ port=rpc_tracker_port,
+ fill_mode="random",
+ )
+
+# result 是输出的字典。
+print("Result:", result)
+```
+
+Out:
+```python
+Result: []
+Output Names:
+ ['output_0']
+```
\ No newline at end of file
From 2d3e6aed308a5281a0b88f0ac721796378c602bf Mon Sep 17 00:00:00 2001
From: anleeos <2937160075@qq.com>
Date: Thu, 30 Nov 2023 23:11:32 +0800
Subject: [PATCH 02/15] alter url
---
docs/how_to/deploy/deploy_models/11-depoly_adreno_tvmc.md | 4 ++--
1 file changed, 2 insertions(+), 2 deletions(-)
diff --git a/docs/how_to/deploy/deploy_models/11-depoly_adreno_tvmc.md b/docs/how_to/deploy/deploy_models/11-depoly_adreno_tvmc.md
index af9de6f1..74b3a9cb 100644
--- a/docs/how_to/deploy/deploy_models/11-depoly_adreno_tvmc.md
+++ b/docs/how_to/deploy/deploy_models/11-depoly_adreno_tvmc.md
@@ -20,7 +20,7 @@ title: 使用 tvmc 接口在 Adreno™ 上部署预训练模型
此外,您应该已经为 Android 构建了 TVM 。请参阅以下说明,了解如何构建它并设置 RPC 环境。
-[在 Adreno GPU 上部署](https://tvm.apache.org/docs/how_to/deploy/adreno.html)
+[在 Adreno GPU 上部署](https://tvm.hyper.ai/docs/how_to/deploy/deploy_adreno)
```python
import os
@@ -407,7 +407,7 @@ def @main(%input_2: Tensor[(1, 224, 224, 3), float32], %v_param_1: Tensor[(7, 7,
```
# 自动调优
-现在,可以使用下面的 api 为任何目标对模型进行自动调优。调整需要 RPC 设置,请参阅[在 Adreno GPU 上部署](https://tvm.apache.org/docs/how_to/deploy/adreno.html)
+现在,可以使用下面的 api 为任何目标对模型进行自动调优。调整需要 RPC 设置,请参阅[在 Adreno GPU 上部署](https://tvm.hyper.ai/docs/how_to/deploy/deploy_adreno)
```python
rpc_tracker_host = os.environ.get("TVM_TRACKER_HOST", "127.0.0.1")
From d9a20dfe86f78ab9d6af1799ca28e7b628cf9325 Mon Sep 17 00:00:00 2001
From: anleeos <2937160075@qq.com>
Date: Thu, 30 Nov 2023 23:42:18 +0800
Subject: [PATCH 03/15] translate depoly_adreno
---
.../deploy/deploy_models/10-depoly_adreno.md | 467 ++++++++++++++++++
1 file changed, 467 insertions(+)
diff --git a/docs/how_to/deploy/deploy_models/10-depoly_adreno.md b/docs/how_to/deploy/deploy_models/10-depoly_adreno.md
index e69de29b..d4a97d43 100644
--- a/docs/how_to/deploy/deploy_models/10-depoly_adreno.md
+++ b/docs/how_to/deploy/deploy_models/10-depoly_adreno.md
@@ -0,0 +1,467 @@
+---
+title: 在 Adreno™ 上部署预训练模型
+---
+
+# 在 Adreno™ 上部署预训练模型
+**作者**: Daniil Barinov, Siva Rama Krishna
+:::note
+单击 [此处](https://tvm.apache.org/docs/how_to/deploy_models/deploy_model_on_adreno.html#sphx-glr-download-how-to-deploy-models-deploy-model-on-adreno-py) 下载完整的示例代码
+:::
+
+本文是一个逐步教程,演示如何在 Adreno 上(不同精度)部署预训练的 PyTorch ResNet-18 模型。
+
+首先,我们需要安装 PyTorch 与 TorchVision ,因为我们将使用它作为我们的模型库。
+
+可以通过 pip 快速安装:
+
+```bash
+pip install torch
+pip install torchvision
+```
+
+除此之外,您应该已经为 Android 构建了 TVM。请参阅以下说明,了解如何构建它。
+
+[在 Adreno GPU 上部署](https://tvm.hyper.ai/docs/how_to/deploy/deploy_adreno)
+
+在构建部分之后,构建目录中应该有两个文件: “libtvm_runtime.so” 和 “tvm_rpc”。让我们将它们推送到设备上并运行 TVM RPC 服务器。
+
+## TVM RPC 服务器
+
+要获取设备的哈希值,请使用:
+
+```bash
+adb devices
+```
+
+设置要使用的 Android 设备,如果您的计算机连接了多个设备。
+
+```bash
+export ANDROID_SERIAL=
+```
+
+然后,要将这两个文件上传到设备上,应该使用:
+
+```bash
+adb push {libtvm_runtime.so,tvm_rpc} /data/local/tmp
+```
+
+此时,您的设备上的路径 /data/local/tmp 将有 “libtvm_runtime.so” 和 “tvm_rpc” 。有时 cmake 找不到 “libc++_shared.so”。使用:
+
+```bash
+find ${ANDROID_NDK_HOME} -name libc++_shared.so
+```
+
+找到它,并使用 adb 将其推送到所需的设备:
+
+```bash
+adb push libc++_shared.so /data/local/tmp
+```
+
+我们现在准备运行 TVM RPC 服务器。在第一个控制台中使用以下行启动 rpc_tracker:
+
+```bash
+python3 -m tvm.exec.rpc_tracker --port 9190
+```
+
+然后,我们需要在第二个控制台中从所需的设备下运行 tvm_rpc 服务器:
+
+```bash
+adb reverse tcp:9190 tcp:9190
+adb forward tcp:5000 tcp:5000
+adb forward tcp:5002 tcp:5001
+adb forward tcp:5003 tcp:5002
+adb forward tcp:5004 tcp:5003
+adb shell LD_LIBRARY_PATH=/data/local/tmp /data/local/tmp/tvm_rpc server --host=0.0.0.0 --port=5000 --tracker=127.0.0.1:9190 --key=android --port-end=5100
+```
+
+在编译和推断模型之前,请指定 TVM_TRACKER_HOST 和 TVM_TRACKER_PORT:
+
+```bash
+export TVM_TRACKER_HOST=0.0.0.0
+export TVM_TRACKER_PORT=9190
+```
+
+检查 tracker 是否正在运行,并且设备是否可用:
+
+```bash
+python -m tvm.exec.query_rpc_tracker --port 9190
+```
+
+例如,如果有 1 个 Android 设备,输出可能是:
+
+```info
+Queue Status
+----------------------------------
+key total free pending
+----------------------------------
+android 1 1 0
+----------------------------------
+```
+
+## 配置
+```python
+import os
+import torch
+import torchvision
+import tvm
+from tvm import te
+from tvm import relay, rpc
+from tvm.contrib import utils, ndk
+from tvm.contrib import graph_executor
+from tvm.relay.op.contrib import clml
+from tvm import autotvm
+
+# 下面是一组配置,用于控制脚本的行为,如本地运行或设备运行、目标定义、dtype 设置和自动调优启用。
+# 如有需要,请根据需要更改这些设置。
+
+# 与 float32 相比,Adreno 设备对 float16 的效率更高
+# 鉴于降低精度不会影响预期输出
+# 建议使用较低的精度。
+# 我们有一个辅助 API,使精度转换变得简单
+# 它支持 "float16" 和 "float16_acc32" 模式的 dtype。
+# 让我们选择 "float16" 进行计算和 "float32" 进行累积。
+
+calculation_dtype = "float16"
+acc_dtype = "float32"
+
+# 在编译以生成纹理之前指定 Adreno 目标
+# 利用内核并获得所有纹理的好处
+# 注意:此生成的示例在我们的 x86 服务器上运行以进行演示。
+# 如果在 Android 设备上运行它,我们需要
+# 指定其指令集。如果要在实际设备上运行此教程,请将 :code:`local_demo` 设置为 False。
+local_demo = True
+
+# 默认情况下,在 CPU 目标上执行。
+# 选择 'cpu'、'opencl' 和 'opencl -device=adreno'
+test_target = "cpu"
+
+# 更改目标配置。
+# 运行 `adb shell cat /proc/cpuinfo` 以查找架构。
+arch = "arm64"
+target = tvm.target.Target("llvm -mtriple=%s-linux-android" % arch)
+
+# 自动调整是计算密集型和耗时的任务,
+# 因此默认情况下禁用。如果需要,请启用它。
+is_tuning = False
+tune_log = "adreno-resnet18.log"
+
+# 启用 OpenCLML 加速运算符库。
+enable_clml = False
+
+```
+
+## 获取 PyTorch 模型
+从 torchvision models 获取 resnet18
+
+```python
+model_name = "resnet18"
+model = getattr(torchvision.models, model_name)(pretrained=True)
+model = model.eval()
+
+# 通过追踪抓取 TorchScripted 模型
+input_shape = [1, 3, 224, 224]
+input_data = torch.randn(input_shape)
+scripted_model = torch.jit.trace(model, input_data).eval()
+
+```
+
+Out:
+```info
+/venv/apache-tvm-py3.8/lib/python3.8/site-packages/torchvision/models/_utils.py:208: UserWarning: The parameter 'pretrained' is deprecated since 0.13 and may be removed in the future, please use 'weights' instead.
+ warnings.warn(
+/venv/apache-tvm-py3.8/lib/python3.8/site-packages/torchvision/models/_utils.py:223: UserWarning: Arguments other than a weight enum or `None` for 'weights' are deprecated since 0.13 and may be removed in the future. The current behavior is equivalent to passing `weights=ResNet18_Weights.IMAGENET1K_V1`. You can also use `weights=ResNet18_Weights.DEFAULT` to get the most up-to-date weights.
+ warnings.warn(msg)
+
+```
+
+## 加载测试图片
+我们使用一张经典的来自 ImageNet 的猫图片作为示例
+
+```python
+from PIL import Image
+from tvm.contrib.download import download_testdata
+from matplotlib import pyplot as plt
+import numpy as np
+
+img_url = "https://github.com/dmlc/mxnet.js/blob/main/data/cat.png?raw=true"
+img_path = download_testdata(img_url, "cat.png", module="data")
+img = Image.open(img_path).resize((224, 224))
+plt.imshow(img)
+plt.show()
+
+# 处理图片并转换为 tensor
+from torchvision import transforms
+
+my_preprocess = transforms.Compose(
+ [
+ transforms.Resize(256),
+ transforms.CenterCrop(224),
+ transforms.ToTensor(),
+ transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
+ ]
+)
+img = my_preprocess(img)
+img = np.expand_dims(img, 0)
+
+```
+
+![cat](https://tvm.apache.org/docs/_images/sphx_glr_from_keras_001.png)
+
+## 将 PyTorch 模型转换为 Relay 模块
+TVM 具有用于各种框架 的在 relay.frontend 中的前端 API 。现在对于 PyTorch 模型导入,我们有 relay.frontend.from_pytorch API 。输入名称可以是任意的
+
+```python
+input_name = "input0"
+shape_list = [(input_name, img.shape)]
+
+mod, params = relay.frontend.from_pytorch(scripted_model, shape_list)
+
+```
+
+Out:
+```info
+/workspace/python/tvm/relay/frontend/pytorch_utils.py:47: DeprecationWarning: distutils Version classes are deprecated. Use packaging.version instead.
+ return LooseVersion(torch_ver) > ver
+/venv/apache-tvm-py3.8/lib/python3.8/site-packages/setuptools/_distutils/version.py:346: DeprecationWarning: distutils Version classes are deprecated. Use packaging.version instead.
+ other = LooseVersion(other)
+```
+
+## 精度
+```python
+# Adreno 设备在 float16 上的效率比 float32 高
+# 鉴于降低精度不会影响预期输出
+# 建议使用较低的精度。
+
+# TVM 通过 ToMixedPrecision 转换过程支持混合精度。
+# 我们可能需要注册精度规则,比如精度类型、累加
+# 数据类型等,以覆盖默认设置。
+# 下面的辅助 API 简化了模块间的精度转换。
+
+# 在上面的配置部分,计算 dtype 设置为 "float16",累积 dtype 设置为 "float32"。
+
+from tvm.driver.tvmc.transform import apply_graph_transforms
+
+mod = apply_graph_transforms(
+ mod,
+ {
+ "mixed_precision": True,
+ "mixed_precision_ops": ["nn.conv2d", "nn.dense"],
+ "mixed_precision_calculation_type": calculation_dtype,
+ "mixed_precision_acc_type": acc_dtype,
+ },
+)
+
+```
+
+正如您在 IR 中所看到的那样,该架构现在包含强制转换操作,这些操作是为了将精度转换为 FP16 。您还可以使用 "float16" 或 "float32" 作为其他 dtype 选项。
+
+## 准备 TVM 目标
+
+```python
+# 此生成的示例在我们的 x86 服务器上运行以进行演示。
+
+# 要在真实目标上部署并调试,请在上面的配置部分将 :code:`local_demo` 设置为 False。
+# 同样,:code:`test_target` 设置为 :code:`llvm`,以使其与 x86 演示兼容。
+# 请将其更改为 :code:`opencl` 或 :code:`opencl -device=adreno`,以用于上面配置中的 RPC 目标。
+
+
+if local_demo:
+ target = tvm.target.Target("llvm")
+elif test_target.find("opencl"):
+ target = tvm.target.Target(test_target, host=target)
+
+```
+
+## 自动调整
+下面的几个指令可以使用 XGBoost 作为调优算法对 Relay 模块进行自动调优。
+
+```python
+# 自动调优过程包括提取任务、定义调优配置和
+# 为每个任务调整最佳性能的内核配置。
+
+# 获取与 RPC 相关的设置。
+rpc_tracker_host = os.environ.get("TVM_TRACKER_HOST", "127.0.0.1")
+rpc_tracker_port = int(os.environ.get("TVM_TRACKER_PORT", 9190))
+key = "android"
+
+# 自动调优是计算密集型和耗时的任务。
+# 在上面的配置中,由于此脚本在 x86 上运行进行演示,设置为 False。
+# 请将 :code:`is_tuning` 设置为 True 以启用自动调优。
+
+if is_tuning:
+ # 自动调优阶段 1:提取可调优任务
+ tasks = autotvm.task.extract_from_program(
+ mod, target=test_target, target_host=target, params=params
+ )
+
+ # 自动调优阶段 2:定义调优配置
+ tmp_log_file = tune_log + ".tmp"
+ measure_option = autotvm.measure_option(
+ builder=autotvm.LocalBuilder(
+ build_func=ndk.create_shared, timeout=15
+ ), # 在本地构建测试内核
+ runner=autotvm.RPCRunner( # 运行程序将在远程设备上运行。
+ key, # RPC 密钥
+ host=rpc_tracker_host, # 追踪主机
+ port=int(rpc_tracker_port), # 追踪端口
+ number=3, # 平均运行次数
+ timeout=600, # RPC 超时
+ ),
+ )
+ n_trial = 1024 # 在选择最佳内核配置之前进行训练的迭代次数
+ early_stopping = False # 可以启用以在损失不断最小化时停止调优。
+
+ # 自动调优阶段 3:遍历任务并进行调优。
+ from tvm.autotvm.tuner import XGBTuner
+
+ for i, tsk in enumerate(reversed(tasks[:3])):
+ print("Task:", tsk)
+ prefix = "[Task %2d/%2d] " % (i + 1, len(tasks))
+
+ # 选择调谐器
+ tuner = "xgb"
+
+ # 创建调谐器
+ if tuner == "xgb":
+ tuner_obj = XGBTuner(tsk, loss_type="reg")
+ # 其他调谐器类型的判断可以在此处添加
+
+ tsk_trial = min(n_trial, len(tsk.config_space))
+ tuner_obj.tune(
+ n_trial=tsk_trial,
+ early_stopping=early_stopping,
+ measure_option=measure_option,
+ callbacks=[
+ autotvm.callback.progress_bar(tsk_trial, prefix=prefix),
+ autotvm.callback.log_to_file(tmp_log_file),
+ ],
+ )
+ # 自动调优阶段 4:从整体日志中选择性能最佳的配置。
+ autotvm.record.pick_best(tmp_log_file, tune_log)
+
+```
+
+## 启用 OpenCLML 卸载
+OpenCLML 卸载将尝试通过使用 OpenCLML 专有运算符库来加速支持的运算符。
+
+```python
+# 默认情况下,在上面的配置部分,:code:enable_clml 被设置为 False。
+
+if not local_demo and enable_clml:
+ mod = clml.partition_for_clml(mod, params)
+
+```
+
+## 编译
+如果存在调优缓存,则使用调优缓存。
+
+```python
+if os.path.exists(tune_log):
+ with autotvm.apply_history_best(tune_log):
+ with tvm.transform.PassContext(opt_level=3):
+ lib = relay.build(mod, target=target, params=params)
+else:
+ with tvm.transform.PassContext(opt_level=3):
+ lib = relay.build(mod, target=target, params=params)
+
+```
+
+## 远程通过 RPC 部署模型
+使用 RPC,您可以将模型从主机机器部署到远程 Adreno 设备。
+
+```python
+if local_demo:
+ remote = rpc.LocalSession()
+else:
+ tracker = rpc.connect_tracker(rpc_tracker_host, rpc_tracker_port)
+ # 运行大模型时, 应该增加 `session_timeout`
+ remote = tracker.request(key, priority=0, session_timeout=60)
+
+if local_demo:
+ dev = remote.cpu(0)
+elif test_target.find("opencl"):
+ dev = remote.cl(0)
+else:
+ dev = remote.cpu(0)
+
+temp = utils.tempdir()
+dso_binary = "dev_lib_cl.so"
+dso_binary_path = temp.relpath(dso_binary)
+fcompile = ndk.create_shared if not local_demo else None
+lib.export_library(dso_binary_path, fcompile=fcompile)
+remote_path = "/data/local/tmp/" + dso_binary
+remote.upload(dso_binary_path)
+rlib = remote.load_module(dso_binary)
+m = graph_executor.GraphModule(rlib["default"](dev))
+
+```
+
+## 运行推理
+我们现在可以设置输入,推理我们的模型并得到输出预测。
+
+```python
+m.set_input(input_name, tvm.nd.array(img.astype("float32")))
+m.run()
+tvm_output = m.get_output(0)
+```
+
+## 获取预测与性能统计
+这块代码展示了 top-1 和 top-5 预测,同时提供模型的性能信息。
+
+```python
+from os.path import join, isfile
+from matplotlib import pyplot as plt
+from tvm.contrib import download
+
+
+# 下载 ImageNet 分类
+categ_url = "https://github.com/uwsampl/web-data/raw/main/vta/models/"
+categ_fn = "synset.txt"
+download.download(join(categ_url, categ_fn), categ_fn)
+synset = eval(open(categ_fn).read())
+
+top_categories = np.argsort(tvm_output.asnumpy()[0])
+top5 = np.flip(top_categories, axis=0)[:5]
+
+# 记录 top-1 分类结果
+print("Top-1 id: {}, class name: {}".format(top5[1 - 1], synset[top5[1 - 1]]))
+
+# 记录 top-5 分类结果
+print("\nTop5 predictions: \n")
+print("\t#1:", synset[top5[1 - 1]])
+print("\t#2:", synset[top5[2 - 1]])
+print("\t#3:", synset[top5[3 - 1]])
+print("\t#4:", synset[top5[4 - 1]])
+print("\t#5:", synset[top5[5 - 1]])
+print("\t", top5)
+ImageNetClassifier = False
+for k in top_categories[-5:]:
+ if "cat" in synset[k]:
+ ImageNetClassifier = True
+assert ImageNetClassifier, "Failed ImageNet classifier validation check"
+
+print("Evaluate inference time cost...")
+print(m.benchmark(dev, number=1, repeat=10))
+```
+
+Out:
+```info
+/workspace/python/tvm/runtime/ndarray.py:199: DeprecationWarning: NDArray.asnumpy() will be deprecated in TVM v0.8 release. Please use NDArray.numpy() instead.
+ warnings.warn(
+Top-1 id: 281, class name: tabby, tabby cat
+
+Top5 predictions:
+
+ #1: tabby, tabby cat
+ #2: tiger cat
+ #3: lynx, catamount
+ #4: red fox, Vulpes vulpes
+ #5: Egyptian cat
+ [281 282 287 277 285]
+Evaluate inference time cost...
+Execution time summary:
+ mean (ms) median (ms) max (ms) min (ms) std (ms)
+ 3991.4967 3991.2103 3996.6988 3988.8485 2.0989
+```
+
+**该脚本的总运行时间:** ( 1 分 18.970 秒)
\ No newline at end of file
From 5ff68e40ed6ac09ead5b8e2370645166182d8c13 Mon Sep 17 00:00:00 2001
From: anleeos <2937160075@qq.com>
Date: Thu, 30 Nov 2023 23:57:03 +0800
Subject: [PATCH 04/15] update deploy_models
---
docs/how_to/deploy/deploy_models/01-deploy_android.md | 6 +++---
docs/how_to/deploy/deploy_models/04-compile_od.md | 4 ++--
2 files changed, 5 insertions(+), 5 deletions(-)
diff --git a/docs/how_to/deploy/deploy_models/01-deploy_android.md b/docs/how_to/deploy/deploy_models/01-deploy_android.md
index 769b320d..d8a259fa 100644
--- a/docs/how_to/deploy/deploy_models/01-deploy_android.md
+++ b/docs/how_to/deploy/deploy_models/01-deploy_android.md
@@ -107,11 +107,11 @@ endif
# 要添加的其他 include 头,例如 SDK_PATH/adrenosdk/Development/Inc
ADD_C_INCLUDES += /work/adrenosdk-linux-5_0/Development/Inc
-# 从 https://github.com/KhronosGroup/OpenCL-Headers 下载
-ADD_C_INCLUDES += /usr/local/OpenCL-Headers/
+
+ADD_C_INCLUDES =
# 要添加的附加链接库,例如 ANDROID_LIB_PATH/libOpenCL.so
-ADD_LDLIBS = /workspace/pull-from-android-device/libOpenCL.so
+ADD_LDLIBS =
```
:::note
diff --git a/docs/how_to/deploy/deploy_models/04-compile_od.md b/docs/how_to/deploy/deploy_models/04-compile_od.md
index 040a89af..d804d170 100644
--- a/docs/how_to/deploy/deploy_models/04-compile_od.md
+++ b/docs/how_to/deploy/deploy_models/04-compile_od.md
@@ -15,8 +15,8 @@ title: 编译 PyTorch 目标检测模型
可通过 pip 快速安装:
``` bash
-pip install torch==1.7.0
-pip install torchvision==0.8.1
+pip install torch
+pip install torchvision
```
或参考官网:https://pytorch.org/get-started/locally/
From f01c813d3cbe37ef41b2ac2ff7459efaa1ca7a3b Mon Sep 17 00:00:00 2001
From: anleeos <2937160075@qq.com>
Date: Fri, 1 Dec 2023 00:02:21 +0800
Subject: [PATCH 05/15] delete deploy_ssd and update index
---
.../deploy/deploy_models/09-deploy_ssd.md | 162 ------------------
docs/how_to/deploy/deploy_models/index.md | 3 +-
2 files changed, 2 insertions(+), 163 deletions(-)
delete mode 100644 docs/how_to/deploy/deploy_models/09-deploy_ssd.md
diff --git a/docs/how_to/deploy/deploy_models/09-deploy_ssd.md b/docs/how_to/deploy/deploy_models/09-deploy_ssd.md
deleted file mode 100644
index eee3d9df..00000000
--- a/docs/how_to/deploy/deploy_models/09-deploy_ssd.md
+++ /dev/null
@@ -1,162 +0,0 @@
----
-title: 部署 Single Shot Multibox Detector(SSD)模型
----
-
-# 部署 Single Shot Multibox Detector(SSD)模型
-
-:::note
-单击 [此处](https://tvm.apache.org/docs/how_to/deploy_models/deploy_ssd_gluoncv.html#sphx-glr-download-how-to-deploy-models-deploy-ssd-gluoncv-py) 下载完整的示例代码
-:::
-
-**作者**:[Yao Wang](https://github.com/kevinthesun),[Leyuan Wang](https://github.com/Laurawly)
-
-本文介绍如何用 TVM 部署 SSD 模型。这里将使用 GluonCV 预训练的 SSD 模型,并将其转换为 Relay IR。
-
-``` python
-import tvm
-from tvm import te
-
-from matplotlib import pyplot as plt
-from tvm import relay
-from tvm.contrib import graph_executor
-from tvm.contrib.download import download_testdata
-from gluoncv import model_zoo, data, utils
-```
-
-输出结果:
-
-``` bash
-/usr/local/lib/python3.7/dist-packages/gluoncv/__init__.py:40: UserWarning: Both `mxnet==1.6.0` and `torch==1.11.0+cpu` are installed. You might encounter increased GPU memory footprint if both framework are used at the same time.
- warnings.warn(f'Both `mxnet=={mx.__version__}` and `torch=={torch.__version__}` are installed. '
-```
-
-## 初步参数设置
-
-:::note
-现在支持在 CPU 和 GPU 上编译 SSD。
-
-为取得 CPU 上的最佳推理性能,需要根据设备修改 target 参数——对于 x86 CPU:参考 [为 x86 CPU 自动调整卷积网络](/docs/how_to/autotune/autotuning_x86) 来调整;对于 arm CPU:参考 [为 ARM CPU 自动调整卷积网络](/docs/how_to/autotune/autotuning_arm) 来调整。
-
-为在 Intel 显卡上取得最佳推理性能,将 target 参数修改为 `opencl -device=intel_graphics` 。注意:在 Mac 上使用 Intel 显卡时,target 要设置为 `opencl` ,因为 Mac 上不支持 Intel 子组扩展。
-
-为取得基于 CUDA 的 GPU 上的最佳推理性能,将 target 参数修改为 `cuda`;对于基于 OPENCL 的 GPU,将 target 参数修改为 `opencl`,然后根据设备来修改设备参数。
-:::
-
-``` python
-supported_model = [
- "ssd_512_resnet50_v1_voc",
- "ssd_512_resnet50_v1_coco",
- "ssd_512_resnet101_v2_voc",
- "ssd_512_mobilenet1.0_voc",
- "ssd_512_mobilenet1.0_coco",
- "ssd_300_vgg16_atrous_voc" "ssd_512_vgg16_atrous_coco",
-]
-
-model_name = supported_model[0]
-dshape = (1, 3, 512, 512)
-```
-
-下载并预处理 demo 图像:
-
-``` python
-im_fname = download_testdata(
- "https://github.com/dmlc/web-data/blob/main/" + "gluoncv/detection/street_small.jpg?raw=true",
- "street_small.jpg",
- module="data",
-)
-x, img = data.transforms.presets.ssd.load_test(im_fname, short=512)
-```
-
-为 CPU 转换和编译模型:
-
-``` python
-block = model_zoo.get_model(model_name, pretrained=True)
-
-def build(target):
- mod, params = relay.frontend.from_mxnet(block, {"data": dshape})
- with tvm.transform.PassContext(opt_level=3):
- lib = relay.build(mod, target, params=params)
- return lib
-```
-
-输出结果:
-
-``` bash
-/usr/local/lib/python3.7/dist-packages/mxnet/gluon/block.py:1389: UserWarning: Cannot decide type for the following arguments. Consider providing them as input:
- data: None
- input_sym_arg_type = in_param.infer_type()[0]
-Downloading /workspace/.mxnet/models/ssd_512_resnet50_v1_voc-9c8b225a.zip from https://apache-mxnet.s3-accelerate.dualstack.amazonaws.com/gluon/models/ssd_512_resnet50_v1_voc-9c8b225a.zip...
-
- 0%| | 0/132723 [00:00, ?KB/s]
- 2%|1 | 2429/132723 [00:00<00:05, 24288.69KB/s]
- 8%|8 | 10888/132723 [00:00<00:02, 59757.20KB/s]
- 14%|#4 | 18798/132723 [00:00<00:01, 68586.60KB/s]
- 21%|## | 27307/132723 [00:00<00:01, 75099.17KB/s]
- 27%|##7 | 35836/132723 [00:00<00:01, 78765.91KB/s]
- 33%|###3 | 44460/132723 [00:00<00:01, 81298.98KB/s]
- 40%|###9 | 53075/132723 [00:00<00:00, 82882.32KB/s]
- 46%|####6 | 61612/132723 [00:00<00:00, 83671.87KB/s]
- 53%|#####2 | 69980/132723 [00:00<00:00, 82355.51KB/s]
- 59%|#####9 | 78462/132723 [00:01<00:00, 83105.52KB/s]
- 65%|######5 | 86777/132723 [00:01<00:00, 79179.66KB/s]
- 72%|#######1 | 95291/132723 [00:01<00:00, 80915.06KB/s]
- 78%|#######7 | 103417/132723 [00:01<00:00, 62776.56KB/s]
- 84%|########4 | 111967/132723 [00:01<00:00, 68364.35KB/s]
- 90%|########9 | 119368/132723 [00:01<00:00, 44237.04KB/s]
- 96%|#########6| 127829/132723 [00:01<00:00, 51926.12KB/s]
-100%|##########| 132723/132723 [00:02<00:00, 64946.94KB/s]
-```
-
-创建 TVM runtime,并进行推理,注意:
-
-``` text
-Use target = "cuda -libs" to enable thrust based sort, if you
-enabled thrust during cmake by -DUSE_THRUST=ON.
-```
-
-``` python
-def run(lib, dev):
- # 构建 TVM runtime
- m = graph_executor.GraphModule(lib["default"](dev))
- tvm_input = tvm.nd.array(x.asnumpy(), device=dev)
- m.set_input("data", tvm_input)
- # 执行
- m.run()
- # 得到输出
- class_IDs, scores, bounding_boxs = m.get_output(0), m.get_output(1), m.get_output(2)
- return class_IDs, scores, bounding_boxs
-
-for target in ["llvm", "cuda"]:
- dev = tvm.device(target, 0)
- if dev.exist:
- lib = build(target)
- class_IDs, scores, bounding_boxs = run(lib, dev)
-```
-
-输出结果:
-
-``` bash
-/workspace/python/tvm/driver/build_module.py:268: UserWarning: target_host parameter is going to be deprecated. Please pass in tvm.target.Target(target, host=target_host) instead.
- "target_host parameter is going to be deprecated. "
-```
-
-显示结果:
-
-``` python
-ax = utils.viz.plot_bbox(
- img,
- bounding_boxs.numpy()[0],
- scores.numpy()[0],
- class_IDs.numpy()[0],
- class_names=block.classes,
-)
-plt.show()
-```
-
- ![图片](https://tvm.apache.org/docs/_images/sphx_glr_deploy_ssd_gluoncv_001.png)
-
-**脚本总运行时长:**( 2 分 32.231 秒)
-
-[下载 Python 源代码:deploy_ssd_gluoncv.py](https://tvm.apache.org/docs/_downloads/cccb17d28e5e8b2e94ea8cd5ec59f6ed/deploy_ssd_gluoncv.py)
-
-[下载 Jupyter Notebook:deploy_ssd_gluoncv.ipynb](https://tvm.apache.org/docs/_downloads/d92aacfae35477bed0f7f60aa8d2714e/deploy_ssd_gluoncv.ipynb)
diff --git a/docs/how_to/deploy/deploy_models/index.md b/docs/how_to/deploy/deploy_models/index.md
index 8a97d9f2..60c3eb04 100644
--- a/docs/how_to/deploy/deploy_models/index.md
+++ b/docs/how_to/deploy/deploy_models/index.md
@@ -7,6 +7,8 @@ title: 部署深度学习模型
TVM 可将模型部署到各种不同的平台。以下操作指南描述了如何准备模型,并将其部署到多种支持的后端。
* [在 Android 上部署预训练模型](deploy_android)
+* [在 Adreno™ 上部署预训练模型](deploy_adreno)
+* [在 Adreno™ 上部署预训练模型](deploy_adreno_tvmc)
* [在 Jetson Nano 上部署预训练模型](deploy_nano)
* [在树莓派上部署预训练模型](deploy_pi)
* [编译 PyTorch 对象检测模型](compile_od)
@@ -14,4 +16,3 @@ TVM 可将模型部署到各种不同的平台。以下操作指南描述了如
* [使用 TVM 部署框架预量化模型 - 第 3 部分(TFLite)](deploy_prequan_3)
* [在 CUDA 上部署量化模型](deploy_quan)
* [在 CPU 上部署 Hugging Face 剪枝模型](hugging_face)
-* [部署 Single Shot Multibox Detector(SSD)模型](deploy_ssd)
From 466a70581d314273d80951f5f537aa995c5574dd Mon Sep 17 00:00:00 2001
From: Anleeos <2937160075@qq.com>
Date: Fri, 1 Dec 2023 00:25:26 +0800
Subject: [PATCH 06/15] Update
docs/how_to/deploy/deploy_models/10-depoly_adreno.md
Co-authored-by: sparanoid
---
docs/how_to/deploy/deploy_models/10-depoly_adreno.md | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/docs/how_to/deploy/deploy_models/10-depoly_adreno.md b/docs/how_to/deploy/deploy_models/10-depoly_adreno.md
index d4a97d43..6bf7e75b 100644
--- a/docs/how_to/deploy/deploy_models/10-depoly_adreno.md
+++ b/docs/how_to/deploy/deploy_models/10-depoly_adreno.md
@@ -10,7 +10,7 @@ title: 在 Adreno™ 上部署预训练模型
本文是一个逐步教程,演示如何在 Adreno 上(不同精度)部署预训练的 PyTorch ResNet-18 模型。
-首先,我们需要安装 PyTorch 与 TorchVision ,因为我们将使用它作为我们的模型库。
+首先,我们需要安装 PyTorch 与 TorchVision,因为我们将使用它作为我们的模型库。
可以通过 pip 快速安装:
From c2d8c7849f0bc1e32659ad00fecffe96d3956eeb Mon Sep 17 00:00:00 2001
From: Anleeos <2937160075@qq.com>
Date: Fri, 1 Dec 2023 00:25:34 +0800
Subject: [PATCH 07/15] Update
docs/how_to/deploy/deploy_models/10-depoly_adreno.md
Co-authored-by: sparanoid
---
docs/how_to/deploy/deploy_models/10-depoly_adreno.md | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/docs/how_to/deploy/deploy_models/10-depoly_adreno.md b/docs/how_to/deploy/deploy_models/10-depoly_adreno.md
index 6bf7e75b..a66a3205 100644
--- a/docs/how_to/deploy/deploy_models/10-depoly_adreno.md
+++ b/docs/how_to/deploy/deploy_models/10-depoly_adreno.md
@@ -23,7 +23,7 @@ pip install torchvision
[在 Adreno GPU 上部署](https://tvm.hyper.ai/docs/how_to/deploy/deploy_adreno)
-在构建部分之后,构建目录中应该有两个文件: “libtvm_runtime.so” 和 “tvm_rpc”。让我们将它们推送到设备上并运行 TVM RPC 服务器。
+在构建部分之后,构建目录中应该有两个文件:“libtvm_runtime.so” 和 “tvm_rpc”。让我们将它们推送到设备上并运行 TVM RPC 服务器。
## TVM RPC 服务器
From 0ab1365ead428e87f1d126d4dc4269042b78ba86 Mon Sep 17 00:00:00 2001
From: Anleeos <2937160075@qq.com>
Date: Fri, 1 Dec 2023 00:25:41 +0800
Subject: [PATCH 08/15] Update
docs/how_to/deploy/deploy_models/10-depoly_adreno.md
Co-authored-by: sparanoid
---
docs/how_to/deploy/deploy_models/10-depoly_adreno.md | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/docs/how_to/deploy/deploy_models/10-depoly_adreno.md b/docs/how_to/deploy/deploy_models/10-depoly_adreno.md
index a66a3205..1ee01b0a 100644
--- a/docs/how_to/deploy/deploy_models/10-depoly_adreno.md
+++ b/docs/how_to/deploy/deploy_models/10-depoly_adreno.md
@@ -208,7 +208,7 @@ img = np.expand_dims(img, 0)
![cat](https://tvm.apache.org/docs/_images/sphx_glr_from_keras_001.png)
## 将 PyTorch 模型转换为 Relay 模块
-TVM 具有用于各种框架 的在 relay.frontend 中的前端 API 。现在对于 PyTorch 模型导入,我们有 relay.frontend.from_pytorch API 。输入名称可以是任意的
+TVM 具有用于各种框架 的在 relay.frontend 中的前端 API。现在对于 PyTorch 模型导入,我们有 relay.frontend.from_pytorch API。输入名称可以是任意的
```python
input_name = "input0"
From c8a480aac2180503a1fca7335bf877cf2d21cc36 Mon Sep 17 00:00:00 2001
From: Anleeos <2937160075@qq.com>
Date: Fri, 1 Dec 2023 00:25:49 +0800
Subject: [PATCH 09/15] Update
docs/how_to/deploy/deploy_models/10-depoly_adreno.md
Co-authored-by: sparanoid
---
docs/how_to/deploy/deploy_models/10-depoly_adreno.md | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/docs/how_to/deploy/deploy_models/10-depoly_adreno.md b/docs/how_to/deploy/deploy_models/10-depoly_adreno.md
index 1ee01b0a..dd523854 100644
--- a/docs/how_to/deploy/deploy_models/10-depoly_adreno.md
+++ b/docs/how_to/deploy/deploy_models/10-depoly_adreno.md
@@ -253,7 +253,7 @@ mod = apply_graph_transforms(
```
-正如您在 IR 中所看到的那样,该架构现在包含强制转换操作,这些操作是为了将精度转换为 FP16 。您还可以使用 "float16" 或 "float32" 作为其他 dtype 选项。
+正如您在 IR 中所看到的那样,该架构现在包含强制转换操作,这些操作是为了将精度转换为 FP16。您还可以使用 "float16" 或 "float32" 作为其他 dtype 选项。
## 准备 TVM 目标
From 11b6c1f1ae10db49b21d80239e984dd528d5a6fa Mon Sep 17 00:00:00 2001
From: Anleeos <2937160075@qq.com>
Date: Fri, 1 Dec 2023 00:25:55 +0800
Subject: [PATCH 10/15] Update
docs/how_to/deploy/deploy_models/11-depoly_adreno_tvmc.md
Co-authored-by: sparanoid
---
docs/how_to/deploy/deploy_models/11-depoly_adreno_tvmc.md | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/docs/how_to/deploy/deploy_models/11-depoly_adreno_tvmc.md b/docs/how_to/deploy/deploy_models/11-depoly_adreno_tvmc.md
index 74b3a9cb..bcf7a111 100644
--- a/docs/how_to/deploy/deploy_models/11-depoly_adreno_tvmc.md
+++ b/docs/how_to/deploy/deploy_models/11-depoly_adreno_tvmc.md
@@ -14,7 +14,7 @@ title: 使用 tvmc 接口在 Adreno™ 上部署预训练模型
单击 [此处](https://tvm.apache.org/docs/how_to/deploy_models/deploy_model_on_adreno_tvmc.html#sphx-glr-download-how-to-deploy-models-deploy-model-on-adreno-tvmc-py) 下载完整的示例代码
:::
-**作者**: Siva Rama Krishna
+**作者**:Siva Rama Krishna
本文是一篇关于在 Adreno™ 上部署预训练 Keras resnet50 模型的逐步教程。
From b1918d69752f34f7592ec38c35eccbeed5eb249f Mon Sep 17 00:00:00 2001
From: Anleeos <2937160075@qq.com>
Date: Fri, 1 Dec 2023 00:26:49 +0800
Subject: [PATCH 11/15] Update
docs/how_to/deploy/deploy_models/10-depoly_adreno.md
Co-authored-by: sparanoid
---
docs/how_to/deploy/deploy_models/10-depoly_adreno.md | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/docs/how_to/deploy/deploy_models/10-depoly_adreno.md b/docs/how_to/deploy/deploy_models/10-depoly_adreno.md
index dd523854..347af753 100644
--- a/docs/how_to/deploy/deploy_models/10-depoly_adreno.md
+++ b/docs/how_to/deploy/deploy_models/10-depoly_adreno.md
@@ -374,7 +374,7 @@ if local_demo:
remote = rpc.LocalSession()
else:
tracker = rpc.connect_tracker(rpc_tracker_host, rpc_tracker_port)
- # 运行大模型时, 应该增加 `session_timeout`
+ # 运行大模型时,应该增加 `session_timeout`
remote = tracker.request(key, priority=0, session_timeout=60)
if local_demo:
From 8ced38074f2938b38d29749191dab4bab7464ce8 Mon Sep 17 00:00:00 2001
From: Anleeos <2937160075@qq.com>
Date: Fri, 1 Dec 2023 00:26:54 +0800
Subject: [PATCH 12/15] Update
docs/how_to/deploy/deploy_models/11-depoly_adreno_tvmc.md
Co-authored-by: sparanoid
---
docs/how_to/deploy/deploy_models/11-depoly_adreno_tvmc.md | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/docs/how_to/deploy/deploy_models/11-depoly_adreno_tvmc.md b/docs/how_to/deploy/deploy_models/11-depoly_adreno_tvmc.md
index bcf7a111..0bf76986 100644
--- a/docs/how_to/deploy/deploy_models/11-depoly_adreno_tvmc.md
+++ b/docs/how_to/deploy/deploy_models/11-depoly_adreno_tvmc.md
@@ -18,7 +18,7 @@ title: 使用 tvmc 接口在 Adreno™ 上部署预训练模型
本文是一篇关于在 Adreno™ 上部署预训练 Keras resnet50 模型的逐步教程。
-此外,您应该已经为 Android 构建了 TVM 。请参阅以下说明,了解如何构建它并设置 RPC 环境。
+此外,您应该已经为 Android 构建了 TVM。请参阅以下说明,了解如何构建它并设置 RPC 环境。
[在 Adreno GPU 上部署](https://tvm.hyper.ai/docs/how_to/deploy/deploy_adreno)
From b4a0e9b75de279ba0b684bec5236f8c56a7f736f Mon Sep 17 00:00:00 2001
From: anleeos <2937160075@qq.com>
Date: Fri, 1 Dec 2023 15:18:35 +0800
Subject: [PATCH 13/15] delete
# 使用 tvmc 接口在 Adreno™ 上部署预训练模型
@@ -72,7 +66,7 @@ model.save(model_file_name)
```
Out:
-```info
+```info
Downloading data from https://storage.googleapis.com/tensorflow/keras-applications/resnet/resnet50_weights_tf_dim_ordering_tf_kernels.h5
8192/102967424 [..............................] - ETA: 0s
From acc615ffed1d0b0cb7cbbc0f4b8ec90422fd83d3 Mon Sep 17 00:00:00 2001
From: anleeos <2937160075@qq.com>
Date: Fri, 1 Dec 2023 16:12:00 +0800
Subject: [PATCH 14/15] fix broken links
---
docs/how_to/deploy/deploy_models/10-depoly_adreno.md | 2 +-
docs/how_to/deploy/deploy_models/11-depoly_adreno_tvmc.md | 2 +-
2 files changed, 2 insertions(+), 2 deletions(-)
diff --git a/docs/how_to/deploy/deploy_models/10-depoly_adreno.md b/docs/how_to/deploy/deploy_models/10-depoly_adreno.md
index 347af753..14bf92ea 100644
--- a/docs/how_to/deploy/deploy_models/10-depoly_adreno.md
+++ b/docs/how_to/deploy/deploy_models/10-depoly_adreno.md
@@ -21,7 +21,7 @@ pip install torchvision
除此之外,您应该已经为 Android 构建了 TVM。请参阅以下说明,了解如何构建它。
-[在 Adreno GPU 上部署](https://tvm.hyper.ai/docs/how_to/deploy/deploy_adreno)
+[在 Adreno GPU 上部署](https://tvm.apache.org/docs/v0.13.0/how_to/deploy/adreno.html)
在构建部分之后,构建目录中应该有两个文件:“libtvm_runtime.so” 和 “tvm_rpc”。让我们将它们推送到设备上并运行 TVM RPC 服务器。
diff --git a/docs/how_to/deploy/deploy_models/11-depoly_adreno_tvmc.md b/docs/how_to/deploy/deploy_models/11-depoly_adreno_tvmc.md
index c6948258..7fa35681 100644
--- a/docs/how_to/deploy/deploy_models/11-depoly_adreno_tvmc.md
+++ b/docs/how_to/deploy/deploy_models/11-depoly_adreno_tvmc.md
@@ -401,7 +401,7 @@ def @main(%input_2: Tensor[(1, 224, 224, 3), float32], %v_param_1: Tensor[(7, 7,
```
# 自动调优
-现在,可以使用下面的 api 为任何目标对模型进行自动调优。调整需要 RPC 设置,请参阅[在 Adreno GPU 上部署](https://tvm.hyper.ai/docs/how_to/deploy/deploy_adreno)
+现在,可以使用下面的 api 为任何目标对模型进行自动调优。调整需要 RPC 设置,请参阅[在 Adreno GPU 上部署](https://tvm.apache.org/docs/v0.13.0/how_to/deploy/adreno.html)
```python
rpc_tracker_host = os.environ.get("TVM_TRACKER_HOST", "127.0.0.1")
From b605b02e9395b063e9b543d2113e944ffdb29451 Mon Sep 17 00:00:00 2001
From: anleeos <2937160075@qq.com>
Date: Fri, 1 Dec 2023 16:34:23 +0800
Subject: [PATCH 15/15] fix broken link
---
docs/how_to/deploy/deploy_models/11-depoly_adreno_tvmc.md | 2 +-
docs/how_to/deploy/deploy_models/index.md | 4 ++--
2 files changed, 3 insertions(+), 3 deletions(-)
diff --git a/docs/how_to/deploy/deploy_models/11-depoly_adreno_tvmc.md b/docs/how_to/deploy/deploy_models/11-depoly_adreno_tvmc.md
index 7fa35681..16206c81 100644
--- a/docs/how_to/deploy/deploy_models/11-depoly_adreno_tvmc.md
+++ b/docs/how_to/deploy/deploy_models/11-depoly_adreno_tvmc.md
@@ -14,7 +14,7 @@ title: 使用 tvmc 接口在 Adreno™ 上部署预训练模型
此外,您应该已经为 Android 构建了 TVM。请参阅以下说明,了解如何构建它并设置 RPC 环境。
-[在 Adreno GPU 上部署](https://tvm.hyper.ai/docs/how_to/deploy/deploy_adreno)
+[在 Adreno GPU 上部署](https://tvm.apache.org/docs/v0.13.0/how_to/deploy/adreno.html)
```python
import os
diff --git a/docs/how_to/deploy/deploy_models/index.md b/docs/how_to/deploy/deploy_models/index.md
index 60c3eb04..74c8bd59 100644
--- a/docs/how_to/deploy/deploy_models/index.md
+++ b/docs/how_to/deploy/deploy_models/index.md
@@ -6,9 +6,9 @@ title: 部署深度学习模型
TVM 可将模型部署到各种不同的平台。以下操作指南描述了如何准备模型,并将其部署到多种支持的后端。
+* [在 Adreno™ 上部署预训练模型](https://tvm.apache.org/docs/v0.13.0/how_to/deploy_models/deploy_model_on_adreno.html#sphx-glr-how-to-deploy-models-deploy-model-on-adreno-py)
+* [使用 tvmc 接口在 Adreno™ 上部署预训练模型](https://tvm.apache.org/docs/v0.13.0/how_to/deploy_models/deploy_model_on_adreno_tvmc.html#sphx-glr-how-to-deploy-models-deploy-model-on-adreno-tvmc-py)
* [在 Android 上部署预训练模型](deploy_android)
-* [在 Adreno™ 上部署预训练模型](deploy_adreno)
-* [在 Adreno™ 上部署预训练模型](deploy_adreno_tvmc)
* [在 Jetson Nano 上部署预训练模型](deploy_nano)
* [在树莓派上部署预训练模型](deploy_pi)
* [编译 PyTorch 对象检测模型](compile_od)