diff --git a/tests/configs/cuda_inference_pytorch_llama_gptq.yaml b/tests/configs/cuda_inference_pytorch_llama_gptq.yaml index 6eabf834..2d0f8e0d 100644 --- a/tests/configs/cuda_inference_pytorch_llama_gptq.yaml +++ b/tests/configs/cuda_inference_pytorch_llama_gptq.yaml @@ -1,44 +1,21 @@ defaults: - - backend: pytorch # default backend - - launcher: process # default launcher - - benchmark: inference # default benchmark - - experiment # inheriting experiment schema - - _self_ # for hydra 1.1 compatibility - - override hydra/job_logging: colorlog # colorful logging - - override hydra/hydra_logging: colorlog # colorful logging + - backend: pytorch + # order of inheritance, last one overrides previous ones + - _base_ # inherits from base config + - _inference_ # inherits from inference config + - _cuda_ # inherits from cpu config + - _self_ # hydra 1.1 compatibility -experiment_name: cuda_inference_pytorch_llama_gptq +experiment_name: cuda_inference_pytorch_gptq backend: - device: cuda - device_ids: 0 - model: TheBloke/Llama-2-7B-GPTQ + model: TheBloke/TinyLlama-1.1B-Chat-v0.3-GPTQ quantization_config: exllama_config: version: 2 -launcher: - device_isolation: true - -benchmark: - memory: true - latency: true - # hydra/cli specific settings hydra: sweeper: params: - backend.no_weights: true,false - run: - # where to store run results - dir: runs/${experiment_name} - sweep: - # where to store sweep results - dir: sweeps/${experiment_name} - job: - # change working directory to the run directory - chdir: true - env_set: - # set environment variable OVERRIDE_BENCHMARKS to 1 - # to not skip benchmarks that have been run before - OVERRIDE_BENCHMARKS: 1 \ No newline at end of file + backend.no_weights: false,true \ No newline at end of file