From 21fcc204551386143948f1fa21fde876c8fd1b5a Mon Sep 17 00:00:00 2001 From: Luca Soldaini Date: Thu, 24 Oct 2024 11:00:57 -0700 Subject: [PATCH] increased workers --- classifiers/scripts/fineweb_100b.sh | 2 +- classifiers/scripts/fineweb_50b_extra.sh | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/classifiers/scripts/fineweb_100b.sh b/classifiers/scripts/fineweb_100b.sh index ab9de247..224e011f 100644 --- a/classifiers/scripts/fineweb_100b.sh +++ b/classifiers/scripts/fineweb_100b.sh @@ -42,4 +42,4 @@ gantry run \ --shared-memory 10GiB \ --install "pip install -e classifiers/" \ --yes \ - -- /bin/bash -c "huggingface-cli download ${MODEL_NAME} && torchrun --nnodes "${NUM_NODES}:${NUM_NODES}" --nproc-per-node 8 --rdzv_id 12347 --rdzv_backend static --rdzv_endpoint "\${BEAKER_LEADER_REPLICA_HOSTNAME}:29400" --node_rank "\${BEAKER_REPLICA_RANK}" --rdzv_conf 'read_timeout=420' -m dolma_classifiers.inference --source-prefix ${DOCUMENTS} --batch-size ${BATCH_SIZE} --use-wandb --wandb-project 'dolma-classifiers' --wandb-entity ai2-llm --model-name ${MODEL_NAME} --num-workers 8 --prefetch-factor 16" + -- /bin/bash -c "huggingface-cli download ${MODEL_NAME} && torchrun --nnodes "${NUM_NODES}:${NUM_NODES}" --nproc-per-node 8 --rdzv_id 12347 --rdzv_backend static --rdzv_endpoint "\${BEAKER_LEADER_REPLICA_HOSTNAME}:29400" --node_rank "\${BEAKER_REPLICA_RANK}" --rdzv_conf 'read_timeout=420' -m dolma_classifiers.inference --source-prefix ${DOCUMENTS} --batch-size ${BATCH_SIZE} --use-wandb --wandb-project 'dolma-classifiers' --wandb-entity ai2-llm --model-name ${MODEL_NAME} --num-workers 8 --prefetch-factor 8" diff --git a/classifiers/scripts/fineweb_50b_extra.sh b/classifiers/scripts/fineweb_50b_extra.sh index d349fb00..5017bf98 100644 --- a/classifiers/scripts/fineweb_50b_extra.sh +++ b/classifiers/scripts/fineweb_50b_extra.sh @@ -42,4 +42,4 @@ gantry run \ --shared-memory 10GiB \ --install "pip install -e classifiers/" \ --yes \ - -- /bin/bash -c "huggingface-cli download ${MODEL_NAME} && torchrun --nnodes "${NUM_NODES}:${NUM_NODES}" --nproc-per-node 8 --rdzv_id 12347 --rdzv_backend static --rdzv_endpoint "\${BEAKER_LEADER_REPLICA_HOSTNAME}:29400" --node_rank "\${BEAKER_REPLICA_RANK}" --rdzv_conf 'read_timeout=420' -m dolma_classifiers.inference --source-prefix ${DOCUMENTS} --batch-size ${BATCH_SIZE} --use-wandb --wandb-project 'dolma-classifiers' --wandb-entity ai2-llm --model-name ${MODEL_NAME} --num-workers 8 --prefetch-factor 16" + -- /bin/bash -c "huggingface-cli download ${MODEL_NAME} && torchrun --nnodes "${NUM_NODES}:${NUM_NODES}" --nproc-per-node 8 --rdzv_id 12347 --rdzv_backend static --rdzv_endpoint "\${BEAKER_LEADER_REPLICA_HOSTNAME}:29400" --node_rank "\${BEAKER_REPLICA_RANK}" --rdzv_conf 'read_timeout=420' -m dolma_classifiers.inference --source-prefix ${DOCUMENTS} --batch-size ${BATCH_SIZE} --use-wandb --wandb-project 'dolma-classifiers' --wandb-entity ai2-llm --model-name ${MODEL_NAME} --num-workers 8 --prefetch-factor 8"