From f1421976cbb9e0e6ca62fabb14270987f47ee22e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E9=9B=81=E8=A1=8C?= Date: Fri, 22 Sep 2023 14:24:03 +0800 Subject: [PATCH] just test --- .github/workflows/android-build.yml | 38 ----------------------------- .github/workflows/build-test.yml | 18 +++++++------- script/download_model.ps1 | 15 +++++++----- script/download_model.sh | 12 ++++++++- src/llm.cpp | 9 +++++++ 5 files changed, 38 insertions(+), 54 deletions(-) delete mode 100755 .github/workflows/android-build.yml diff --git a/.github/workflows/android-build.yml b/.github/workflows/android-build.yml deleted file mode 100755 index 5a436b25..00000000 --- a/.github/workflows/android-build.yml +++ /dev/null @@ -1,38 +0,0 @@ -name: android-build -on: - push: - branches: - - master - - 'feature/**' - paths: - - 'src/**' - - 'android/**' - - '.github/workflows/android-build.yml' - pull_request: - branches: [master] - paths: - - 'src/**' - - 'android/**' - - '.github/workflows/android-build.yml' - -concurrency: - group: android-${{ github.ref }} - cancel-in-progress: true -permissions: - contents: read - -jobs: - android_adb_build: - runs-on: ubuntu-latest - steps: - - uses: actions/checkout@v3 - - name: build - run: ./script/android_build.sh - - android_app_build: - runs-on: ubuntu-latest - steps: - - uses: actions/checkout@v3 - - name: build - run: ./script/android_app_build.sh - diff --git a/.github/workflows/build-test.yml b/.github/workflows/build-test.yml index f1ff7853..ad501d1b 100644 --- a/.github/workflows/build-test.yml +++ b/.github/workflows/build-test.yml @@ -22,7 +22,7 @@ jobs: runs-on: ${{ matrix.os }} strategy: matrix: - os: [ubuntu-latest, macos-latest, windows-latest] + os: [windows-latest] steps: - uses: actions/checkout@v3 @@ -55,8 +55,8 @@ jobs: PACAGE_FILE: ${{ matrix.os }}-package.zip strategy: matrix: - os: [ubuntu-latest, macos-latest, windows-latest] - model: [chatglm-6b, chatglm2-6b, codegeex2-6b, qwen-7b-chat, baichuan2-7b-chat, llama2-7b-chat] + os: [windows-latest] + model: [chatglm-6b] steps: - uses: actions/download-artifact@v3 @@ -70,16 +70,16 @@ jobs: unzip $PACAGE_FILE cd $PACAGE_DIR ./script/model_test.sh ${{ matrix.model }} - - name: windows-download + - name: windows-test if: matrix.os == 'windows-latest' run: | cd workspace 7z x windows-package.zip cd windows-package + ls echo ${{ matrix.model }} - ./script/model_download.ps1 ${{ matrix.model }} - - name: windows-test - if: matrix.os == 'windows-latest' - run: | + ./script/download_model.ps1 ${{ matrix.model }} + ls cd build - .\Release\cli_demo -m ..\${{ matrix.model }} \ No newline at end of file + .\Release\cli_demo -m ..\${{ matrix.model }} + ls \ No newline at end of file diff --git a/script/download_model.ps1 b/script/download_model.ps1 index 7dc0ce8f..64b11c83 100644 --- a/script/download_model.ps1 +++ b/script/download_model.ps1 @@ -3,10 +3,13 @@ param( ) mkdir $model cd $model -wget -Uri https://github.com/wangzhaode/mnn-llm/releases/download/$model-mnn/embedding.mnn -OutFile embedding.mnn -wget -Uri https://github.com/wangzhaode/mnn-llm/releases/download/$model-mnn/lm.mnn -OutFile lm.mnn -for($i=1; $i -lt 32; $i=$i+1) -{ - wget -Uri https://github.com/wangzhaode/mnn-llm/releases/download/$model-mnn/block_$i.mnn -OutFile block_$i.mnn +$block_num = 28 +if ($model.Contains('7b')) { + $block_num = 32 } -cd .. +Invoke-WebRequest -Uri https://github.com/wangzhaode/mnn-llm/releases/download/$model-mnn/embedding.mnn -OutFile embedding.mnn +Invoke-WebRequest -Uri https://github.com/wangzhaode/mnn-llm/releases/download/$model-mnn/lm.mnn -OutFile lm.mnn +for ($i=1; $i -lt $block_num; $i=$i+1) { + Invoke-WebRequest -Uri https://github.com/wangzhaode/mnn-llm/releases/download/$model-mnn/block_$i.mnn -OutFile block_$i.mnn +} +cd .. \ No newline at end of file diff --git a/script/download_model.sh b/script/download_model.sh index 3977f29b..5a8c5394 100755 --- a/script/download_model.sh +++ b/script/download_model.sh @@ -1,10 +1,20 @@ +if [ $# -lt 1 ]; then + echo 'Usage: ./download_model.sh $model' + exit 1 +fi + model=$1 mkdir $model cd $model +is_7b=`echo $model | grep '7b'` +block_num=27 +if [ $is_7b ]; then + block_num=31 +fi # download models wget -c -nv https://github.com/wangzhaode/mnn-llm/releases/download/$model-mnn/embedding.mnn wget -c -nv https://github.com/wangzhaode/mnn-llm/releases/download/$model-mnn/lm.mnn -for i in `seq 0 31` +for i in `seq 0 $block_num` do wget -c -nv https://github.com/wangzhaode/mnn-llm/releases/download/$model-mnn/block_$i.mnn done diff --git a/src/llm.cpp b/src/llm.cpp index a133f903..f7d1ed1f 100644 --- a/src/llm.cpp +++ b/src/llm.cpp @@ -94,12 +94,15 @@ Llm* Llm::createLLM(const std::string& path) { } std::string Llm::response(const std::string& query, std::ostream* os) { + printf("response\n"); // init status if (is_single_) { key_value_shape_.insert(key_value_shape_.begin(), layer_nums_); past_key_values_.push_back(_Input(key_value_shape_, NCHW)); } else { for (int i = 0; i < layer_nums_; i++) { + printf("past_key_values_ %d\n", i); + fflush(stdout); past_key_values_.push_back(_Input(key_value_shape_, NCHW)); } } @@ -225,6 +228,7 @@ int Llm::forward(const std::vector& input_ids) { } all_seq_len_ += seq_len; gen_seq_len_++; + printf("id = %d\n", id); return id; } @@ -250,6 +254,7 @@ VARP Llm::gen_embedding(const std::vector& input_ids) { } std::vector Llm::tokenizer_encode(std::string input_str) { + printf("tokenizer_encode start\n"); std::vector ids; std::vector words; std::string dict_path = tokenizer_dir_ + "/jieba.dict.utf8"; @@ -257,6 +262,7 @@ std::vector Llm::tokenizer_encode(std::string input_str) { std::string user_dict_path = tokenizer_dir_ + "/user.dict.utf8"; std::string idf_path = tokenizer_dir_ + "/idf.utf8"; std::string stopWord_path = tokenizer_dir_ + "/stop_words.utf8"; + printf("jieba init\n"); cppjieba::Jieba jieba( dict_path, model_path, @@ -264,8 +270,10 @@ std::vector Llm::tokenizer_encode(std::string input_str) { idf_path, stopWord_path ); + printf("jieba Cut\n"); jieba.Cut(input_str, words, true); for (auto word : words) { + printf("word = %s\n", word.c_str()); const auto& iter = word_encoder_.find(word); if (iter != word_encoder_.end()) { ids.push_back(iter->second); @@ -290,6 +298,7 @@ std::vector Chatglm_6b::tokenizer(const std::string& query) { context_len_ = ids.size(); ids.push_back(130001); ids.push_back(130004); + printf("ids = ["); for (auto id : ids) printf("%d, "); printf("]\n"); return ids; }