Skip to content

Commit

Permalink
remove data collator
Browse files Browse the repository at this point in the history
Signed-off-by: Sertac Ozercan <[email protected]>
  • Loading branch information
sozercan committed Oct 2, 2024
1 parent a7c8b1d commit 5212876
Show file tree
Hide file tree
Showing 2 changed files with 1 addition and 2 deletions.
2 changes: 1 addition & 1 deletion pkg/aikit2llb/finetune/convert.go
Original file line number Diff line number Diff line change
Expand Up @@ -46,7 +46,7 @@ func Aikit2LLB(c *config.FineTuneConfig) llb.State {
if c.Target == utils.TargetUnsloth {
// installing unsloth and its dependencies
// uv does not support installing xformers via unsloth pyproject
state = state.Run(utils.Shf("pip install --upgrade pip uv && uv venv --system-site-packages && %[1]s && uv pip install --upgrade --force-reinstall packaging torch==2.3.0 ipython ninja packaging bitsandbytes setuptools==69.5.1 wheel psutil transformers==4.44.2 numpy==2.0.2 && uv pip install flash-attn --no-build-isolation && python -m pip install 'unsloth[cu121_ampere_torch230] @ git+https://github.com/unslothai/unsloth.git@%[2]s'", sourceVenv, unslothCommitOrTag)).Root()
state = state.Run(utils.Shf("pip install --upgrade pip uv && uv venv --system-site-packages && %[1]s && uv pip install --upgrade --force-reinstall packaging torch==2.4.0 ipython ninja packaging bitsandbytes setuptools==69.5.1 wheel psutil transformers==4.44.2 numpy==2.0.2 && uv pip install flash-attn --no-build-isolation && python -m pip install 'unsloth[cu121_ampere_torch240] @ git+https://github.com/unslothai/unsloth.git@%[2]s'", sourceVenv, unslothCommitOrTag)).Root()

version := version.Version
if version == "" {
Expand Down
1 change: 0 additions & 1 deletion pkg/finetune/target_unsloth.py
Original file line number Diff line number Diff line change
Expand Up @@ -80,7 +80,6 @@ def formatting_prompts_func(examples):
train_dataset=dataset,
dataset_text_field="text",
max_seq_length=max_seq_length,
data_collator=DataCollatorForSeq2Seq(tokenizer=tokenizer),
tokenizer=tokenizer,
dataset_num_proc = 2,
packing = cfg.get('packing'), # Can make training 5x faster for short sequences.
Expand Down

0 comments on commit 5212876

Please sign in to comment.