This repository has been archived by the owner on Oct 25, 2024. It is now read-only.
-
Notifications
You must be signed in to change notification settings - Fork 211
71 lines (60 loc) · 3.94 KB
/
chatbot-finetune-mpt-7b-chat.yml
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
name: Chatbot finetune on mosaicml/mpt-7b-chat
on:
workflow_call:
concurrency:
group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.ref }}-ft-mpt-7b
cancel-in-progress: true
permissions:
contents: read
jobs:
finetuning:
name: finetuning test
runs-on: lms-lab
steps:
- name: Checkout
uses: actions/checkout@v4
- name: Load environment variables
run:
cat ~/itrex-actions-runner/.env >> $GITHUB_ENV
- name: Prepare Cache
run: cp ${{ env.CACHE }}/torch/* ./
- name: Build Docker Image
run:
if [[ $(docker images | grep chatbotfinetune-mpi | wc -l) == 0 ]]; then
docker build ./ --target cpu --build-arg REPO=${{ github.server_url }}/${{ github.repository }}.git --build-arg REPO_PATH="." --build-arg http_proxy="${{ env.HTTP_PROXY_IMAGE_BUILD }}" --build-arg https_proxy="${{ env.HTTPS_PROXY_IMAGE_BUILD }}" -f intel_extension_for_transformers/neural_chat/docker/Dockerfile -t chatbotfinetune-mpi:latest && yes | docker container prune && yes | docker image prune;
fi
- name: Start Docker Container on socket 0
id: master_container
run: |
cid=$(docker ps -q --filter "name=chatbotfinetune-mpi-s0")
if [[ ! -z "$cid" ]]; then docker stop $cid && docker rm $cid; fi
numactl --cpunodebind=0 -- docker run -tid -v /mnt/DP_disk1/huggingface/cache/:/root/.cache/huggingface/hub -e http_proxy="${{ env.HTTP_PROXY_CONTAINER_RUN }}" -e https_proxy="${{ env.HTTPS_PROXY_CONTAINER_RUN }}" --name="chatbotfinetune-mpi-s0" --hostname="chatbotfinetune-container-mpi-s0" chatbotfinetune-mpi:latest
master=$(docker inspect -f "{{range.NetworkSettings.Networks}}{{.IPAddress}}{{end}}" "chatbotfinetune-mpi-s0")
echo "master_node=$master" >> $GITHUB_OUTPUT
- name: Start Docker Container on socket 1
id: slave_container
run: |
cid=$(docker ps -q --filter "name=chatbotfinetune-mpi-s1")
if [[ ! -z "$cid" ]]; then docker stop $cid && docker rm $cid; fi
numactl --cpunodebind=1 -- docker run -tid -v /mnt/DP_disk1/huggingface/cache/:/root/.cache/huggingface/hub -e http_proxy="${{ env.HTTP_PROXY_CONTAINER_RUN }}" -e https_proxy="${{ env.HTTPS_PROXY_CONTAINER_RUN }}" --name="chatbotfinetune-mpi-s1" --hostname="chatbotfinetune-container-mpi-s1" chatbotfinetune-mpi:latest
slave=$(docker inspect -f "{{range.NetworkSettings.Networks}}{{.IPAddress}}{{end}}" "chatbotfinetune-mpi-s1")
echo "slave_node=$slave" >> $GITHUB_OUTPUT
- name: Run Finetuning
run: |
sh .github/workflows/script/chatbot/prepare_ft_mpt-7b-chat_mpi.sh ${{ steps.master_container.outputs.master_node }} ${{ steps.slave_container.outputs.slave_node }}
docker exec "chatbotfinetune-mpi-s0" bash -c "cd /intel-extension-for-transformers && pip uninstall intel-extension-for-transformers -y && python setup.py install"
docker exec "chatbotfinetune-mpi-s1" bash -c "cd /intel-extension-for-transformers && pip uninstall intel-extension-for-transformers -y && python setup.py install"
docker exec "chatbotfinetune-mpi-s0" bash -c "cd /intel-extension-for-transformers; source ./bash_setup.sh; mpirun -f ./hosts2 -n 2 -ppn 1 -genv OMP_NUM_THREADS=48 sh .github/workflows/script/chatbot/start_ft_mpt-7b-chat_mpi.sh"
- name: Print Logs and Check Finetuning Status
if: success() || failure()
run: |
sh .github/workflows/script/chatbot/finish_ft_mpt-7b-chat_mpi.sh
- name: Stop Container
if: success() || failure()
run: |
cid=$(docker ps -q --filter "name=chatbotfinetune-mpi-s0")
if [[ ! -z "$cid" ]]; then docker stop $cid && docker rm $cid; fi
cid=$(docker ps -q --filter "name=chatbotfinetune-mpi-s1")
if [[ ! -z "$cid" ]]; then docker stop $cid && docker rm $cid; fi
- name: Test Summary
run: echo "Finetuning completed successfully"