Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Added test llama-2-7b with GPTQ quant. scheme #141

Merged
merged 9 commits into from
Mar 21, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion .github/workflows/test_cli_cuda_pytorch.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -49,4 +49,4 @@ jobs:
--workdir /workspace/optimum-benchmark
--entrypoint /bin/bash
opt-bench-cuda:${{ matrix.image.cuda_version }}
-c "pip install requests && pip install -e .[testing,diffusers,timm,deepspeed,peft,bitsandbytes,autoawq] && pytest -k 'cli and cuda and pytorch' -x"
-c "pip install requests && pip install -e .[testing,diffusers,timm,deepspeed,peft,bitsandbytes,autoawq,auto-gptq-${{ matrix.image.torch_cuda }}] && pytest -k 'cli and cuda and pytorch' -x"
3 changes: 2 additions & 1 deletion .github/workflows/test_cli_rocm_pytorch.yaml
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@

name: CLI ROCm Pytorch Tests

on:
Expand Down Expand Up @@ -51,4 +52,4 @@ jobs:
--device /dev/dri/renderD129
--entrypoint /bin/bash
opt-bench-rocm:${{ matrix.image.rocm_version }}
-c "pip install requests && pip install -e .[testing,diffusers,timm,deepspeed,peft,autoawq] && pytest -k 'cli and cuda and pytorch and not bnb' -x"
-c "pip install requests && pip install -e .[testing,diffusers,timm,deepspeed,peft,autoawq] && pytest -k 'cli and cuda and pytorch and not bnb and not gptq' -x"
4 changes: 2 additions & 2 deletions Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@ CLI_MISC_REQS := testing

CLI_CUDA_ONNXRUNTIME_REQS := testing,timm,diffusers
CLI_ROCM_ONNXRUNTIME_REQS := testing,timm,diffusers
CLI_CUDA_PYTORCH_REQS := testing,timm,diffusers,deepspeed,peft,bitsandbytes,autoawq
CLI_CUDA_PYTORCH_REQS := testing,timm,diffusers,deepspeed,peft,bitsandbytes,autoawq,auto-gptq-cu118
CLI_ROCM_PYTORCH_REQS := testing,timm,diffusers,deepspeed,peft,autoawq
CLI_CPU_OPENVINO_REQS := testing,openvino,timm,diffusers
CLI_CPU_PYTORCH_REQS := testing,timm,diffusers,deepspeed,peft
Expand Down Expand Up @@ -143,7 +143,7 @@ test_cli_cuda_pytorch:
$(call test_nvidia,cuda,$(CLI_CUDA_PYTORCH_REQS),cli and cuda and pytorch)

test_cli_rocm_pytorch:
$(call test_amdgpu,rocm,$(CLI_ROCM_PYTORCH_REQS),cli and cuda and pytorch and peft and not bnb)
$(call test_amdgpu,rocm,$(CLI_ROCM_PYTORCH_REQS),cli and cuda and pytorch and peft and not bnb and not gptq)

test_cli_cuda_onnxruntime:
$(call test_nvidia,cuda,$(CLI_CUDA_ONNXRUNTIME_REQS),cli and cuda and onnxruntime)
Expand Down
5 changes: 5 additions & 0 deletions setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -70,6 +70,11 @@
"peft": ["peft"],
"autoawq": ["autoawq@git+https://github.com/casper-hansen/AutoAWQ.git"],
"bitsandbytes": ["bitsandbytes"],
"auto-gptq-cu118": [
"optimum",
"auto-gptq@https://huggingface.github.io/autogptq-index/whl/cu118/auto-gptq/auto_gptq-0.7.1%2Bcu118-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl",
],
"auto-gptq-cu121": ["optimum", "auto-gptq"],
}


Expand Down
21 changes: 21 additions & 0 deletions tests/configs/cuda_inference_pytorch_llama_gptq.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
defaults:
- backend: pytorch
# order of inheritance, last one overrides previous ones
- _base_ # inherits from base config
- _inference_ # inherits from inference config
- _cuda_ # inherits from cpu config
- _self_ # hydra 1.1 compatibility

experiment_name: cuda_inference_pytorch_gptq

backend:
model: TheBloke/TinyLlama-1.1B-Chat-v0.3-GPTQ
quantization_config:
exllama_config:
version: 2

# hydra/cli specific settings
hydra:
sweeper:
params:
backend.no_weights: false,true
Loading