diff --git a/tests/configs/cuda_inference_pytorch_llama_gptq.yaml b/tests/configs/cuda_inference_pytorch_llama_gptq.yaml
index 6eabf834..2d0f8e0d 100644
--- a/tests/configs/cuda_inference_pytorch_llama_gptq.yaml
+++ b/tests/configs/cuda_inference_pytorch_llama_gptq.yaml
@@ -1,44 +1,21 @@
 defaults:
-  - backend: pytorch # default backend
-  - launcher: process # default launcher
-  - benchmark: inference # default benchmark
-  - experiment # inheriting experiment schema
-  - _self_ # for hydra 1.1 compatibility
-  - override hydra/job_logging: colorlog # colorful logging
-  - override hydra/hydra_logging: colorlog # colorful logging
+  - backend: pytorch
+  # order of inheritance, last one overrides previous ones
+  - _base_ # inherits from base config
+  - _inference_ # inherits from inference config
+  - _cuda_ # inherits from cpu config
+  - _self_ # hydra 1.1 compatibility
 
-experiment_name: cuda_inference_pytorch_llama_gptq
+experiment_name: cuda_inference_pytorch_gptq
 
 backend:
-  device: cuda
-  device_ids: 0
-  model: TheBloke/Llama-2-7B-GPTQ
+  model: TheBloke/TinyLlama-1.1B-Chat-v0.3-GPTQ
   quantization_config:
     exllama_config:
       version: 2
 
-launcher:
-  device_isolation: true
-
-benchmark:
-  memory: true
-  latency: true
-
 # hydra/cli specific settings
 hydra:
   sweeper:
     params:
-      backend.no_weights: true,false
-  run:
-    # where to store run results
-    dir: runs/${experiment_name}
-  sweep:
-    # where to store sweep results
-    dir: sweeps/${experiment_name}
-  job:
-    # change working directory to the run directory
-    chdir: true
-    env_set:
-      # set environment variable OVERRIDE_BENCHMARKS to 1
-      # to not skip benchmarks that have been run before
-      OVERRIDE_BENCHMARKS: 1
\ No newline at end of file
+      backend.no_weights: false,true
\ No newline at end of file