Skip to content

Commit

Permalink
update examples
Browse files Browse the repository at this point in the history
  • Loading branch information
IlyasMoutawwakil committed Oct 31, 2023
1 parent 09435a8 commit 71a6017
Show file tree
Hide file tree
Showing 117 changed files with 1,894 additions and 1,227 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -5,8 +5,8 @@ backend:
seed: 42
inter_op_num_threads: null
intra_op_num_threads: null
initial_isolation_check: true
continous_isolation_check: true
initial_isolation_check: false
continous_isolation_check: false
delete_cache: false
no_weights: false
device_map: null
Expand All @@ -18,7 +18,7 @@ backend:
torch_compile: false
torch_compile_config: {}
bettertransformer: false
quantization_strategy: bnb
quantization_scheme: bnb
quantization_config:
load_in_4bit: true
bnb_4bit_compute_dtype: float16
Expand All @@ -31,7 +31,6 @@ benchmark:
_target_: optimum_benchmark.benchmarks.inference.benchmark.InferenceBenchmark
duration: 10
warmup_runs: 10
benchmark_duration: null
memory: true
energy: false
input_shapes:
Expand All @@ -41,14 +40,12 @@ benchmark:
feature_size: 80
nb_max_frames: 3000
audio_sequence_length: 16000
new_tokens: null
new_tokens: 1000
can_diffuse: ${can_diffuse:${task}}
forward_kwargs: {}
can_generate: ${can_generate:${task}}
generate_kwargs:
max_new_tokens: 1000
min_new_tokens: 1000
experiment_name: llama_bnb
forward_kwargs: {}
generate_kwargs: {}
experiment_name: bnb-batch_size(${benchmark.input_shapes.batch_size})-sequence_length(${benchmark.input_shapes.sequence_length})-new_tokens(${benchmark.new_tokens})
model: meta-llama/Llama-2-7b-hf
device: cuda
task: ${infer_task:${model}}
Expand All @@ -58,18 +55,18 @@ hub_kwargs:
force_download: false
local_files_only: false
environment:
optimum_version: 1.12.1.dev0
transformers_version: 4.33.0.dev0
accelerate_version: 0.23.0.dev0
diffusers_version: 0.21.0.dev0
python_version: 3.9.17
optimum_version: 1.13.2
optimum_commit: null
transformers_version: 4.35.0.dev0
transformers_commit: null
accelerate_version: 0.24.0
accelerate_commit: null
diffusers_version: null
diffusers_commit: null
python_version: 3.10.12
system: Linux
cpu: ' AMD EPYC 7742 64-Core Processor'
cpu_count: 128
cpu_ram_mb: 540684
gpus:
- NVIDIA A100-SXM4-80GB
- NVIDIA A100-SXM4-80GB
- NVIDIA A100-SXM4-80GB
- NVIDIA DGX Display
- NVIDIA A100-SXM4-80GB
Original file line number Diff line number Diff line change
@@ -1,8 +1,8 @@
hydra:
run:
dir: experiments/${experiment_name}_batch_size(${benchmark.input_shapes.batch_size})
dir: experiments/${experiment_name}
sweep:
dir: experiments/${experiment_name}_batch_size(${benchmark.input_shapes.batch_size})
dir: experiments/${experiment_name}
subdir: ${hydra.job.num}
launcher:
_target_: hydra._internal.core_plugins.basic_launcher.BasicLauncher
Expand Down Expand Up @@ -128,9 +128,10 @@ hydra:
override_dirname: benchmark.input_shapes.batch_size=1
id: '0'
num: 0
config_name: llama_bnb
config_name: bnb
env_set:
CUDA_VISIBLE_DEVICES: '2'
CUDA_VISIBLE_DEVICES: '0'
CUDA_DEVICE_ORDER: PCI_BUS_ID
env_copy: []
config:
override_dirname:
Expand All @@ -140,7 +141,7 @@ hydra:
runtime:
version: 1.3.2
version_base: '1.3'
cwd: /home/ilyas/optimum-benchmark/examples/running-llamas
cwd: /workspace/optimum-benchmark/examples/running-llamas
config_sources:
- path: hydra.conf
schema: pkg
Expand All @@ -151,13 +152,13 @@ hydra:
- path: hydra_plugins.hydra_colorlog.conf
schema: pkg
provider: hydra-colorlog
- path: /home/ilyas/optimum-benchmark/examples/running-llamas/configs
- path: /workspace/optimum-benchmark/examples/running-llamas/configs
schema: file
provider: command-line
- path: ''
schema: structured
provider: schema
output_dir: /home/ilyas/optimum-benchmark/examples/running-llamas/experiments/llama_bnb_batch_size(1)/0
output_dir: /workspace/optimum-benchmark/examples/running-llamas/experiments/bnb-batch_size(1)-sequence_length(512)-new_tokens(1000)/0
choices:
benchmark: inference
backend: pytorch
Expand Down
Original file line number Diff line number Diff line change
@@ -1,12 +1,12 @@
backend:
name: pytorch
version: 2.0.1
version: 2.1.0+cu118
_target_: optimum_benchmark.backends.pytorch.backend.PyTorchBackend
seed: 42
inter_op_num_threads: null
intra_op_num_threads: null
initial_isolation_check: true
continous_isolation_check: true
initial_isolation_check: false
continous_isolation_check: false
delete_cache: false
no_weights: false
device_map: null
Expand All @@ -18,11 +18,10 @@ backend:
torch_compile: false
torch_compile_config: {}
bettertransformer: false
quantization_strategy: bnb
quantization_scheme: bnb
quantization_config:
load_in_8bit: false
load_in_4bit: true
llm_int8_threshold: 0.0
load_in_4bit: true
bnb_4bit_compute_dtype: float16
use_ddp: false
ddp_config: {}
Expand All @@ -33,7 +32,6 @@ benchmark:
_target_: optimum_benchmark.benchmarks.inference.benchmark.InferenceBenchmark
duration: 10
warmup_runs: 10
benchmark_duration: null
memory: true
energy: false
input_shapes:
Expand All @@ -43,18 +41,18 @@ benchmark:
feature_size: 80
nb_max_frames: 3000
audio_sequence_length: 16000
new_tokens: null
new_tokens: 1000
can_diffuse: false
forward_kwargs: {}
can_generate: true
forward_kwargs: {}
generate_kwargs:
max_new_tokens: 100
min_new_tokens: 100
max_new_tokens: 1000
min_new_tokens: 1000
do_sample: false
use_cache: true
pad_token_id: 0
num_beams: 1
experiment_name: llama_bnb
experiment_name: bnb-batch_size(1)-sequence_length(512)-new_tokens(1000)
model: meta-llama/Llama-2-7b-hf
device: cuda
task: text-generation
Expand All @@ -64,18 +62,18 @@ hub_kwargs:
force_download: false
local_files_only: false
environment:
optimum_version: 1.12.1.dev0
transformers_version: 4.33.0.dev0
accelerate_version: 0.23.0.dev0
diffusers_version: 0.21.0.dev0
python_version: 3.9.17
optimum_version: 1.13.2
optimum_commit: null
transformers_version: 4.35.0.dev0
transformers_commit: null
accelerate_version: 0.24.0
accelerate_commit: null
diffusers_version: null
diffusers_commit: null
python_version: 3.10.12
system: Linux
cpu: ' AMD EPYC 7742 64-Core Processor'
cpu_count: 128
cpu_ram_mb: 540684
gpus:
- NVIDIA A100-SXM4-80GB
- NVIDIA A100-SXM4-80GB
- NVIDIA A100-SXM4-80GB
- NVIDIA DGX Display
- NVIDIA A100-SXM4-80GB
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
forward.latency(s),forward.throughput(samples/s),forward.peak_memory(MB),generate.latency(s),generate.throughput(tokens/s),generate.peak_memory(MB)
0.0807,12.4,7614,41.2,24.3,8633
Original file line number Diff line number Diff line change
Expand Up @@ -5,8 +5,8 @@ backend:
seed: 42
inter_op_num_threads: null
intra_op_num_threads: null
initial_isolation_check: true
continous_isolation_check: true
initial_isolation_check: false
continous_isolation_check: false
delete_cache: false
no_weights: false
device_map: null
Expand All @@ -18,7 +18,7 @@ backend:
torch_compile: false
torch_compile_config: {}
bettertransformer: false
quantization_strategy: bnb
quantization_scheme: bnb
quantization_config:
load_in_4bit: true
bnb_4bit_compute_dtype: float16
Expand All @@ -31,7 +31,6 @@ benchmark:
_target_: optimum_benchmark.benchmarks.inference.benchmark.InferenceBenchmark
duration: 10
warmup_runs: 10
benchmark_duration: null
memory: true
energy: false
input_shapes:
Expand All @@ -41,14 +40,12 @@ benchmark:
feature_size: 80
nb_max_frames: 3000
audio_sequence_length: 16000
new_tokens: null
new_tokens: 1000
can_diffuse: ${can_diffuse:${task}}
forward_kwargs: {}
can_generate: ${can_generate:${task}}
generate_kwargs:
max_new_tokens: 1000
min_new_tokens: 1000
experiment_name: llama_bnb
forward_kwargs: {}
generate_kwargs: {}
experiment_name: bnb-batch_size(${benchmark.input_shapes.batch_size})-sequence_length(${benchmark.input_shapes.sequence_length})-new_tokens(${benchmark.new_tokens})
model: meta-llama/Llama-2-7b-hf
device: cuda
task: ${infer_task:${model}}
Expand All @@ -58,18 +55,18 @@ hub_kwargs:
force_download: false
local_files_only: false
environment:
optimum_version: 1.12.1.dev0
transformers_version: 4.33.0.dev0
accelerate_version: 0.23.0.dev0
diffusers_version: 0.21.0.dev0
python_version: 3.9.17
optimum_version: 1.13.2
optimum_commit: null
transformers_version: 4.35.0.dev0
transformers_commit: null
accelerate_version: 0.24.0
accelerate_commit: null
diffusers_version: null
diffusers_commit: null
python_version: 3.10.12
system: Linux
cpu: ' AMD EPYC 7742 64-Core Processor'
cpu_count: 128
cpu_ram_mb: 540684
gpus:
- NVIDIA A100-SXM4-80GB
- NVIDIA A100-SXM4-80GB
- NVIDIA A100-SXM4-80GB
- NVIDIA DGX Display
- NVIDIA A100-SXM4-80GB
Original file line number Diff line number Diff line change
@@ -1,8 +1,8 @@
hydra:
run:
dir: experiments/${experiment_name}_batch_size(${benchmark.input_shapes.batch_size})
dir: experiments/${experiment_name}
sweep:
dir: experiments/${experiment_name}_batch_size(${benchmark.input_shapes.batch_size})
dir: experiments/${experiment_name}
subdir: ${hydra.job.num}
launcher:
_target_: hydra._internal.core_plugins.basic_launcher.BasicLauncher
Expand Down Expand Up @@ -128,9 +128,10 @@ hydra:
override_dirname: benchmark.input_shapes.batch_size=16
id: '4'
num: 4
config_name: llama_bnb
config_name: bnb
env_set:
CUDA_VISIBLE_DEVICES: '2'
CUDA_VISIBLE_DEVICES: '0'
CUDA_DEVICE_ORDER: PCI_BUS_ID
env_copy: []
config:
override_dirname:
Expand All @@ -140,7 +141,7 @@ hydra:
runtime:
version: 1.3.2
version_base: '1.3'
cwd: /home/ilyas/optimum-benchmark/examples/running-llamas
cwd: /workspace/optimum-benchmark/examples/running-llamas
config_sources:
- path: hydra.conf
schema: pkg
Expand All @@ -151,13 +152,13 @@ hydra:
- path: hydra_plugins.hydra_colorlog.conf
schema: pkg
provider: hydra-colorlog
- path: /home/ilyas/optimum-benchmark/examples/running-llamas/configs
- path: /workspace/optimum-benchmark/examples/running-llamas/configs
schema: file
provider: command-line
- path: ''
schema: structured
provider: schema
output_dir: /home/ilyas/optimum-benchmark/examples/running-llamas/experiments/llama_bnb_batch_size(16)/4
output_dir: /workspace/optimum-benchmark/examples/running-llamas/experiments/bnb-batch_size(16)-sequence_length(512)-new_tokens(1000)/4
choices:
benchmark: inference
backend: pytorch
Expand Down
Loading

0 comments on commit 71a6017

Please sign in to comment.