From f2998fb7bbdc52b8e5ceb56ffe9489a7e0df62ff Mon Sep 17 00:00:00 2001
From: baptiste <collebaptiste@gmail.com>
Date: Mon, 14 Oct 2024 12:33:17 +0000
Subject: [PATCH] remove code linked ot llm-perf leaderboard

---
 .../update_llm_perf_cpu_pytorch.yaml          |  52 -----
 .../update_llm_perf_cuda_pytorch.yaml         |  57 ------
 .../update_llm_perf_leaderboard.yaml          |  36 ----
 llm_perf/__init__.py                          |   0
 llm_perf/update_llm_perf_cpu_pytorch.py       | 147 --------------
 llm_perf/update_llm_perf_cuda_pytorch.py      | 186 ------------------
 llm_perf/update_llm_perf_leaderboard.py       |  73 -------
 llm_perf/utils.py                             | 137 -------------
 8 files changed, 688 deletions(-)
 delete mode 100644 .github/workflows/update_llm_perf_cpu_pytorch.yaml
 delete mode 100644 .github/workflows/update_llm_perf_cuda_pytorch.yaml
 delete mode 100644 .github/workflows/update_llm_perf_leaderboard.yaml
 delete mode 100644 llm_perf/__init__.py
 delete mode 100644 llm_perf/update_llm_perf_cpu_pytorch.py
 delete mode 100644 llm_perf/update_llm_perf_cuda_pytorch.py
 delete mode 100644 llm_perf/update_llm_perf_leaderboard.py
 delete mode 100644 llm_perf/utils.py

diff --git a/.github/workflows/update_llm_perf_cpu_pytorch.yaml b/.github/workflows/update_llm_perf_cpu_pytorch.yaml
deleted file mode 100644
index cdec51c4d..000000000
--- a/.github/workflows/update_llm_perf_cpu_pytorch.yaml
+++ /dev/null
@@ -1,52 +0,0 @@
-name: Update LLM Perf Benchmarks - Intel PyTorch
-
-on:
-  workflow_dispatch:
-  schedule:
-    - cron: "0 0 * * *"
-
-concurrency:
-  cancel-in-progress: true
-  group: ${{ github.workflow }}-${{ github.ref }}
-
-env:
-  IMAGE: ghcr.io/huggingface/optimum-benchmark:latest-cpu
-
-jobs:
-  run_benchmarks:
-    strategy:
-      fail-fast: false
-      matrix:
-        subset: [unquantized]
-        machine:
-          [{ name: 32vCPU-C7i, runs-on: { group: "aws-c7i-8xlarge-plus" } }]
-
-    runs-on: ${{ matrix.machine.runs-on }}
-
-    steps:
-      - name: Checkout
-        uses: actions/checkout@v4
-
-      - name: Run benchmarks
-        uses: addnab/docker-run-action@v3
-        env:
-          SUBSET: ${{ matrix.subset }}
-          MACHINE: ${{ matrix.machine.name }}
-          HF_TOKEN: ${{ secrets.HF_TOKEN }}
-        with:
-          image: ${{ env.IMAGE }}
-          options: |
-            --rm
-            --shm-size 64G
-            --env SUBSET
-            --env MACHINE
-            --env HF_TOKEN
-            --env MKL_THREADING_LAYER=GNU
-            --env HF_HUB_ENABLE_HF_TRANSFER=1
-            --volume ${{ github.workspace }}:/workspace
-            --workdir /workspace
-          run: |
-            pip install packaging && pip install einops scipy optimum codecarbon
-            pip install -U transformers huggingface_hub[hf_transfer]
-            pip install -e .
-            python llm_perf/update_llm_perf_cpu_pytorch.py
diff --git a/.github/workflows/update_llm_perf_cuda_pytorch.yaml b/.github/workflows/update_llm_perf_cuda_pytorch.yaml
deleted file mode 100644
index 7c902b8c3..000000000
--- a/.github/workflows/update_llm_perf_cuda_pytorch.yaml
+++ /dev/null
@@ -1,57 +0,0 @@
-name: Update LLM Perf Benchmarks - CUDA PyTorch
-
-on:
-  workflow_dispatch:
-  schedule:
-    - cron: "0 0 * * *"
-
-concurrency:
-  cancel-in-progress: true
-  group: ${{ github.workflow }}-${{ github.ref }}
-
-env:
-  IMAGE: ghcr.io/huggingface/optimum-benchmark:latest-cuda
-
-jobs:
-  run_benchmarks:
-    strategy:
-      fail-fast: false
-      matrix:
-        subset: [unquantized, bnb, awq, gptq]
-
-        machine:
-          [
-            { name: 1xA10, runs-on: { group: "aws-g5-4xlarge-plus" } },
-            { name: 1xT4, runs-on: { group: "aws-g4dn-2xlarge" } },
-          ]
-
-    runs-on: ${{ matrix.machine.runs-on }}
-
-    steps:
-      - name: Checkout
-        uses: actions/checkout@v4
-
-      - name: Run benchmarks
-        uses: addnab/docker-run-action@v3
-        env:
-          SUBSET: ${{ matrix.subset }}
-          MACHINE: ${{ matrix.machine.name }}
-          HF_TOKEN: ${{ secrets.HF_TOKEN }}
-        with:
-          image: ${{ env.IMAGE }}
-          options: |
-            --rm
-            --gpus all
-            --shm-size 64G
-            --env SUBSET
-            --env MACHINE
-            --env HF_TOKEN
-            --env MKL_THREADING_LAYER=GNU
-            --env HF_HUB_ENABLE_HF_TRANSFER=1
-            --volume ${{ github.workspace }}:/workspace
-            --workdir /workspace
-          run: |
-            pip install packaging && pip install flash-attn einops scipy auto-gptq optimum bitsandbytes autoawq codecarbon
-            pip install -U transformers huggingface_hub[hf_transfer]
-            pip install -e .
-            python llm_perf/update_llm_perf_cuda_pytorch.py
diff --git a/.github/workflows/update_llm_perf_leaderboard.yaml b/.github/workflows/update_llm_perf_leaderboard.yaml
deleted file mode 100644
index 10ed80c98..000000000
--- a/.github/workflows/update_llm_perf_leaderboard.yaml
+++ /dev/null
@@ -1,36 +0,0 @@
-name: Update LLM Perf Leaderboard
-
-on:
-  workflow_dispatch:
-  schedule:
-    - cron: "0 */6 * * *"
-
-concurrency:
-  cancel-in-progress: true
-  group: ${{ github.workflow }}-${{ github.ref }}
-
-jobs:
-  update_llm_perf_leaderboard:
-    runs-on: ubuntu-latest
-    steps:
-      - name: Checkout
-        uses: actions/checkout@v4
-
-      - name: Set up Python 3.10
-        uses: actions/setup-python@v3
-        with:
-          python-version: "3.10"
-
-      - name: Install requirements
-        run: |
-          pip install --upgrade pip
-          pip install torch torchvision torchaudio --index-url https://download.pytorch.org/whl/cpu
-          pip install pandas huggingface_hub[hf_transfer]
-          pip install .
-
-      - name: Update Open LLM Data
-        env:
-          HF_TOKEN: ${{ secrets.HF_TOKEN }}
-          HF_HUB_ENABLE_HF_TRANSFER: 1
-        run: |
-          python llm_perf/update_llm_perf_leaderboard.py
diff --git a/llm_perf/__init__.py b/llm_perf/__init__.py
deleted file mode 100644
index e69de29bb..000000000
diff --git a/llm_perf/update_llm_perf_cpu_pytorch.py b/llm_perf/update_llm_perf_cpu_pytorch.py
deleted file mode 100644
index 250355505..000000000
--- a/llm_perf/update_llm_perf_cpu_pytorch.py
+++ /dev/null
@@ -1,147 +0,0 @@
-import os
-import traceback
-from itertools import product
-from logging import getLogger
-
-from llm_perf.utils import (
-    CANONICAL_PRETRAINED_OPEN_LLM_LIST,
-    GENERATE_KWARGS,
-    INPUT_SHAPES,
-    OPEN_LLM_LIST,
-    PRETRAINED_OPEN_LLM_LIST,
-    is_benchmark_conducted,
-)
-from optimum_benchmark import (
-    Benchmark,
-    BenchmarkConfig,
-    BenchmarkReport,
-    InferenceConfig,
-    ProcessConfig,
-    PyTorchConfig,
-)
-from optimum_benchmark.logging_utils import setup_logging
-
-SUBSET = os.getenv("SUBSET", None)
-MACHINE = os.getenv("MACHINE", None)
-BACKEND = "pytorch"
-HARDWARE = "cpu"
-
-if os.getenv("MACHINE", None) is None and os.getenv("SUBSET", None) is None:
-    PUSH_REPO_ID = f"optimum-benchmark/llm-perf-{BACKEND}-{HARDWARE}-debug"
-    CANONICAL_PRETRAINED_OPEN_LLM_LIST = ["gpt2"]  # noqa: F811
-    SUBSET = "unquantized"
-elif os.getenv("MACHINE", None) is not None and os.getenv("SUBSET", None) is not None:
-    PUSH_REPO_ID = f"optimum-benchmark/llm-perf-{BACKEND}-{HARDWARE}-{SUBSET}-{MACHINE}"
-else:
-    raise ValueError("Either both MACHINE and SUBSET should be set for benchmarking or neither for debugging")
-
-ATTENTION_CONFIGS = ["eager", "sdpa"]
-
-
-if SUBSET == "unquantized":
-    WEIGHTS_CONFIGS = {
-        # unquantized
-        "float32": {"torch_dtype": "float32", "quant_scheme": None, "quant_config": {}},
-        "float16": {"torch_dtype": "float16", "quant_scheme": None, "quant_config": {}},
-        "bfloat16": {"torch_dtype": "bfloat16", "quant_scheme": None, "quant_config": {}},
-    }
-else:
-    raise ValueError(f"Subset {SUBSET} not supported")
-
-
-LOGGER = getLogger("llm-perf-backend")
-LOGGER.info(f"len(OPEN_LLM_LIST): {len(OPEN_LLM_LIST)}")
-LOGGER.info(f"len(PRETRAINED_OPEN_LLM_LIST): {len(PRETRAINED_OPEN_LLM_LIST)}")
-LOGGER.info(f"len(CANONICAL_PRETRAINED_OPEN_LLM_LIST): {len(CANONICAL_PRETRAINED_OPEN_LLM_LIST)}")
-
-
-def is_benchmark_supported(weights_config, attn_implementation, hardware):
-    if attn_implementation == "flash_attention_2":
-        return False
-
-    return True
-
-
-def benchmark_cpu_pytorch(model, attn_implementation, weights_config):
-    benchmark_name = f"{weights_config}-{attn_implementation}-{BACKEND}"
-    subfolder = f"{benchmark_name}/{model.replace('/', '--')}"
-
-    torch_dtype = WEIGHTS_CONFIGS[weights_config]["torch_dtype"]
-    quant_scheme = WEIGHTS_CONFIGS[weights_config]["quant_scheme"]
-    quant_config = WEIGHTS_CONFIGS[weights_config]["quant_config"]
-
-    if not is_benchmark_supported(weights_config, attn_implementation, HARDWARE):
-        LOGGER.info(f"Skipping benchmark {benchmark_name} with model {model} since it is not supported")
-        return
-
-    if is_benchmark_conducted(PUSH_REPO_ID, subfolder):
-        LOGGER.info(f"Skipping benchmark {benchmark_name} with model {model} since it was already conducted")
-        return
-
-    launcher_config = ProcessConfig()
-    scenario_config = InferenceConfig(
-        memory=True,
-        energy=True,
-        latency=True,
-        duration=10,
-        iterations=10,
-        warmup_runs=10,
-        input_shapes=INPUT_SHAPES,
-        generate_kwargs=GENERATE_KWARGS,
-    )
-
-    backend_config = PyTorchConfig(
-        model=model,
-        device="cpu",
-        no_weights=True,
-        library="transformers",
-        task="text-generation",
-        torch_dtype=torch_dtype,
-        quantization_scheme=quant_scheme,
-        quantization_config=quant_config,
-        attn_implementation=attn_implementation,
-        model_kwargs={"trust_remote_code": True},
-    )
-
-    benchmark_config = BenchmarkConfig(
-        name=benchmark_name, scenario=scenario_config, launcher=launcher_config, backend=backend_config
-    )
-
-    benchmark_config.push_to_hub(repo_id=PUSH_REPO_ID, subfolder=subfolder, private=True)
-
-    try:
-        LOGGER.info(f"Running benchmark {benchmark_name} with model {model}")
-        benchmark_report = Benchmark.launch(benchmark_config)
-        benchmark_report.push_to_hub(repo_id=PUSH_REPO_ID, subfolder=subfolder, private=True)
-        benchmark = Benchmark(config=benchmark_config, report=benchmark_report)
-        benchmark.push_to_hub(repo_id=PUSH_REPO_ID, subfolder=subfolder, private=True)
-
-    except Exception:
-        LOGGER.error(f"Benchmark {benchmark_name} failed with model {model}")
-        benchmark_report = BenchmarkReport.from_dict({"traceback": traceback.format_exc()})
-        benchmark_report.push_to_hub(repo_id=PUSH_REPO_ID, subfolder=subfolder, private=True)
-        benchmark = Benchmark(config=benchmark_config, report=benchmark_report)
-        benchmark.push_to_hub(repo_id=PUSH_REPO_ID, subfolder=subfolder, private=True)
-
-
-if __name__ == "__main__":
-    # for isolated process
-    os.environ["LOG_TO_FILE"] = "0"
-    os.environ["LOG_LEVEL"] = "INFO"
-
-    # for main process
-    setup_logging(level="INFO", prefix="MAIN-PROCESS")
-
-    models_attentions_weights = list(
-        product(CANONICAL_PRETRAINED_OPEN_LLM_LIST, ATTENTION_CONFIGS, WEIGHTS_CONFIGS.keys())
-    )
-
-    LOGGER.info(
-        f"Running a total of {len(models_attentions_weights)} benchmarks, "
-        f"with {len(CANONICAL_PRETRAINED_OPEN_LLM_LIST)} models, "
-        f"{len(ATTENTION_CONFIGS)} attentions implementations "
-        f"and {len(WEIGHTS_CONFIGS)} weights configurations."
-    )
-
-    for model, attn_implementation, weights_config in models_attentions_weights:
-        benchmark_cpu_pytorch(model, attn_implementation, weights_config)
diff --git a/llm_perf/update_llm_perf_cuda_pytorch.py b/llm_perf/update_llm_perf_cuda_pytorch.py
deleted file mode 100644
index 98914f6ad..000000000
--- a/llm_perf/update_llm_perf_cuda_pytorch.py
+++ /dev/null
@@ -1,186 +0,0 @@
-import os
-import traceback
-from itertools import product
-from logging import getLogger
-
-from llm_perf.utils import (
-    CANONICAL_PRETRAINED_OPEN_LLM_LIST,
-    GENERATE_KWARGS,
-    INPUT_SHAPES,
-    OPEN_LLM_LIST,
-    PRETRAINED_OPEN_LLM_LIST,
-    is_benchmark_conducted,
-)
-from optimum_benchmark import Benchmark, BenchmarkConfig, BenchmarkReport, InferenceConfig, ProcessConfig, PyTorchConfig
-from optimum_benchmark.logging_utils import setup_logging
-
-SUBSET = os.getenv("SUBSET", None)
-MACHINE = os.getenv("MACHINE", None)
-
-if os.getenv("MACHINE", None) is None and os.getenv("SUBSET", None) is None:
-    PUSH_REPO_ID = "optimum-benchmark/llm-perf-pytorch-cuda-debug"
-    CANONICAL_PRETRAINED_OPEN_LLM_LIST = ["gpt2"]  # noqa: F811
-    SUBSET = "unquantized"
-elif os.getenv("MACHINE", None) is not None and os.getenv("SUBSET", None) is not None:
-    PUSH_REPO_ID = f"optimum-benchmark/llm-perf-pytorch-cuda-{SUBSET}-{MACHINE}"
-else:
-    raise ValueError("Either both MACHINE and SUBSET should be set for benchmarking or neither for debugging")
-
-ATTENTION_CONFIGS = ["eager", "sdpa", "flash_attention_2"]
-if SUBSET == "unquantized":
-    WEIGHTS_CONFIGS = {
-        # unquantized
-        "float32": {"torch_dtype": "float32", "quant_scheme": None, "quant_config": {}},
-        "float16": {"torch_dtype": "float16", "quant_scheme": None, "quant_config": {}},
-        "bfloat16": {"torch_dtype": "bfloat16", "quant_scheme": None, "quant_config": {}},
-    }
-elif SUBSET == "bnb":
-    WEIGHTS_CONFIGS = {
-        # bnb
-        "4bit-bnb": {"torch_dtype": "float16", "quant_scheme": "bnb", "quant_config": {"load_in_4bit": True}},
-        "8bit-bnb": {"torch_dtype": "float16", "quant_scheme": "bnb", "quant_config": {"load_in_8bit": True}},
-    }
-elif SUBSET == "gptq":
-    WEIGHTS_CONFIGS = {
-        # gptq
-        "4bit-gptq-exllama-v1": {
-            "quant_scheme": "gptq",
-            "torch_dtype": "float16",
-            "quant_config": {"bits": 4, "use_exllama ": True, "version": 1, "model_seqlen": 256},
-        },
-        "4bit-gptq-exllama-v2": {
-            "torch_dtype": "float16",
-            "quant_scheme": "gptq",
-            "quant_config": {"bits": 4, "use_exllama ": True, "version": 2, "model_seqlen": 256},
-        },
-    }
-elif SUBSET == "awq":
-    WEIGHTS_CONFIGS = {
-        # awq
-        "4bit-awq-gemm": {
-            "torch_dtype": "float16",
-            "quant_scheme": "awq",
-            "quant_config": {"bits": 4, "version": "gemm"},
-        },
-        "4bit-awq-gemv": {
-            "torch_dtype": "float16",
-            "quant_scheme": "awq",
-            "quant_config": {"bits": 4, "version": "gemv"},
-        },
-        "4bit-awq-exllama-v1": {
-            "torch_dtype": "float16",
-            "quant_scheme": "awq",
-            "quant_config": {
-                "bits": 4,
-                "version": "exllama",
-                "exllama_config": {"version": 1, "max_input_len": 64, "max_batch_size": 1},
-            },
-        },
-        "4bit-awq-exllama-v2": {
-            "torch_dtype": "float16",
-            "quant_scheme": "awq",
-            "quant_config": {
-                "bits": 4,
-                "version": "exllama",
-                "exllama_config": {"version": 2, "max_input_len": 64, "max_batch_size": 1},
-            },
-        },
-    }
-
-
-LOGGER = getLogger("llm-perf-backend")
-LOGGER.info(f"len(OPEN_LLM_LIST): {len(OPEN_LLM_LIST)}")
-LOGGER.info(f"len(PRETRAINED_OPEN_LLM_LIST): {len(PRETRAINED_OPEN_LLM_LIST)}")
-LOGGER.info(f"len(CANONICAL_PRETRAINED_OPEN_LLM_LIST): {len(CANONICAL_PRETRAINED_OPEN_LLM_LIST)}")
-
-
-def is_benchmark_supported(weights_config, attn_implementation):
-    if attn_implementation == "flash_attention_2" and weights_config == "float32":
-        return False
-
-    return True
-
-
-def benchmark_cuda_pytorch(model, attn_implementation, weights_config):
-    benchmark_name = f"{weights_config}-{attn_implementation}"
-    subfolder = f"{benchmark_name}/{model.replace('/', '--')}"
-
-    torch_dtype = WEIGHTS_CONFIGS[weights_config]["torch_dtype"]
-    quant_scheme = WEIGHTS_CONFIGS[weights_config]["quant_scheme"]
-    quant_config = WEIGHTS_CONFIGS[weights_config]["quant_config"]
-
-    if not is_benchmark_supported(weights_config, attn_implementation):
-        LOGGER.info(f"Skipping benchmark {benchmark_name} with model {model} since it is not supported")
-        return
-
-    if is_benchmark_conducted(PUSH_REPO_ID, subfolder):
-        LOGGER.info(f"Skipping benchmark {benchmark_name} with model {model} since it was already conducted")
-        return
-
-    launcher_config = ProcessConfig(device_isolation=True, device_isolation_action="kill")
-    scenario_config = InferenceConfig(
-        memory=True,
-        energy=True,
-        latency=True,
-        duration=10,
-        iterations=10,
-        warmup_runs=10,
-        input_shapes=INPUT_SHAPES,
-        generate_kwargs=GENERATE_KWARGS,
-    )
-    backend_config = PyTorchConfig(
-        model=model,
-        device="cuda",
-        device_ids="0",
-        no_weights=True,
-        library="transformers",
-        task="text-generation",
-        torch_dtype=torch_dtype,
-        quantization_scheme=quant_scheme,
-        quantization_config=quant_config,
-        attn_implementation=attn_implementation,
-        model_kwargs={"trust_remote_code": True},
-    )
-
-    benchmark_config = BenchmarkConfig(
-        name=benchmark_name, scenario=scenario_config, launcher=launcher_config, backend=backend_config
-    )
-
-    benchmark_config.push_to_hub(repo_id=PUSH_REPO_ID, subfolder=subfolder, private=True)
-
-    try:
-        LOGGER.info(f"Running benchmark {benchmark_name} with model {model}")
-        benchmark_report = Benchmark.launch(benchmark_config)
-        benchmark_report.push_to_hub(repo_id=PUSH_REPO_ID, subfolder=subfolder, private=True)
-        benchmark = Benchmark(config=benchmark_config, report=benchmark_report)
-        benchmark.push_to_hub(repo_id=PUSH_REPO_ID, subfolder=subfolder, private=True)
-
-    except Exception:
-        LOGGER.error(f"Benchmark {benchmark_name} failed with model {model}")
-        benchmark_report = BenchmarkReport.from_dict({"traceback": traceback.format_exc()})
-        benchmark_report.push_to_hub(repo_id=PUSH_REPO_ID, subfolder=subfolder, private=True)
-        benchmark = Benchmark(config=benchmark_config, report=benchmark_report)
-        benchmark.push_to_hub(repo_id=PUSH_REPO_ID, subfolder=subfolder, private=True)
-
-
-if __name__ == "__main__":
-    # for isolated process
-    os.environ["LOG_TO_FILE"] = "0"
-    os.environ["LOG_LEVEL"] = "INFO"
-
-    # for main process
-    setup_logging(level="INFO", prefix="MAIN-PROCESS")
-
-    models_attentions_weights = list(
-        product(CANONICAL_PRETRAINED_OPEN_LLM_LIST, ATTENTION_CONFIGS, WEIGHTS_CONFIGS.keys())
-    )
-
-    LOGGER.info(
-        f"Running a total of {len(models_attentions_weights)} benchmarks, "
-        f"with {len(CANONICAL_PRETRAINED_OPEN_LLM_LIST)} models, "
-        f"{len(ATTENTION_CONFIGS)} attentions implementations "
-        f"and {len(WEIGHTS_CONFIGS)} weights configurations."
-    )
-
-    for model, attn_implementation, weights_config in models_attentions_weights:
-        benchmark_cuda_pytorch(model, attn_implementation, weights_config)
diff --git a/llm_perf/update_llm_perf_leaderboard.py b/llm_perf/update_llm_perf_leaderboard.py
deleted file mode 100644
index 4516750ae..000000000
--- a/llm_perf/update_llm_perf_leaderboard.py
+++ /dev/null
@@ -1,73 +0,0 @@
-import subprocess
-from glob import glob
-
-import pandas as pd
-from huggingface_hub import create_repo, snapshot_download, upload_file
-from tqdm import tqdm
-
-from optimum_benchmark import Benchmark
-
-REPO_TYPE = "dataset"
-MAIN_REPO_ID = "optimum-benchmark/llm-perf-leaderboard"
-PERF_REPO_ID = "optimum-benchmark/llm-perf-{backend}-{hardware}-{subset}-{machine}"
-
-PERF_DF = "perf-df-{subset}-{machine}.csv"
-LLM_DF = "llm-df.csv"
-
-
-def gather_benchmarks(subset: str, machine: str, backend: str, hardware: str):
-    """
-    Gather the benchmarks for a given machine
-    """
-    perf_repo_id = PERF_REPO_ID.format(subset=subset, machine=machine, backend=backend, hardware=hardware)
-    snapshot = snapshot_download(repo_type=REPO_TYPE, repo_id=perf_repo_id, allow_patterns=["**/benchmark.json"])
-
-    dfs = []
-    for file in tqdm(glob(f"{snapshot}/**/benchmark.json", recursive=True)):
-        dfs.append(Benchmark.from_json(file).to_dataframe())
-    benchmarks = pd.concat(dfs, ignore_index=True)
-
-    perf_df = PERF_DF.format(subset=subset, machine=machine)
-    benchmarks.to_csv(perf_df, index=False)
-    create_repo(repo_id=MAIN_REPO_ID, repo_type=REPO_TYPE, private=False, exist_ok=True)
-    upload_file(repo_id=MAIN_REPO_ID, repo_type=REPO_TYPE, path_in_repo=perf_df, path_or_fileobj=perf_df)
-
-
-def update_perf_dfs():
-    """
-    Update the performance dataframes for all machines
-    """
-    for machine in ["1xA10", "1xA100", "1xT4", "32vCPU-C7i"]:
-        for backend in ["pytorch"]:
-            for hardware in ["cuda", "cpu"]:
-                for subset in ["unquantized", "bnb", "awq", "gptq"]:
-                    try:
-                        gather_benchmarks(subset, machine, backend, hardware)
-                    except Exception:
-                        print(
-                            f"benchmark for subset: {subset}, machine: {machine}, backend: {backend}, hardware: {hardware} not found"
-                        )
-
-
-scrapping_script = """
-git clone https://github.com/Weyaxi/scrape-open-llm-leaderboard.git
-pip install -r scrape-open-llm-leaderboard/requirements.txt
-python scrape-open-llm-leaderboard/main.py
-rm -rf scrape-open-llm-leaderboard
-"""
-
-
-def update_llm_df():
-    """
-    Scrape the open-llm-leaderboard and update the leaderboard dataframe
-    """
-    subprocess.run(scrapping_script, shell=True)
-    create_repo(repo_id=MAIN_REPO_ID, repo_type=REPO_TYPE, exist_ok=True, private=False)
-    upload_file(
-        repo_id=MAIN_REPO_ID, repo_type=REPO_TYPE, path_in_repo=LLM_DF, path_or_fileobj="open-llm-leaderboard.csv"
-    )
-
-
-if __name__ == "__main__":
-    update_llm_df()
-    update_perf_dfs()
diff --git a/llm_perf/utils.py b/llm_perf/utils.py
deleted file mode 100644
index 6a5584284..000000000
--- a/llm_perf/utils.py
+++ /dev/null
@@ -1,137 +0,0 @@
-import pandas as pd
-
-from optimum_benchmark.benchmark.report import BenchmarkReport
-
-INPUT_SHAPES = {"batch_size": 1, "sequence_length": 256}
-GENERATE_KWARGS = {"max_new_tokens": 64, "min_new_tokens": 64}
-
-
-OPEN_LLM_LEADERBOARD = pd.read_csv("hf://datasets/optimum-benchmark/llm-perf-leaderboard/llm-df.csv")
-OPEN_LLM_LIST = OPEN_LLM_LEADERBOARD.drop_duplicates(subset=["Model"])["Model"].tolist()
-PRETRAINED_OPEN_LLM_LIST = (
-    OPEN_LLM_LEADERBOARD[OPEN_LLM_LEADERBOARD["Type"] == "pretrained"]
-    .drop_duplicates(subset=["Model"])["Model"]
-    .tolist()
-)
-# CANONICAL_ORGANIZATIONS = [
-#     # big companies
-#     *["google", "facebook", "meta", "meta-llama", "microsoft", "Intel", "TencentARC", "Salesforce"],
-#     # collectives
-#     *["EleutherAI", "tiiuae", "NousResearch", "Open-Orca"],
-#     # HF related
-#     ["bigcode", "HuggingFaceH4", "huggyllama"],
-#     # community members
-#     ["teknium"],
-#     # startups
-#     *[
-#         "mistral-community",
-#         "openai-community",
-#         "togethercomputer",
-#         "stabilityai",
-#         "CohereForAI",
-#         "databricks",
-#         "mistralai",
-#         "internlm",
-#         "Upstage",
-#         "xai-org",
-#         "Phind",
-#         "01-ai",
-#         "Deci",
-#         "Qwen",
-#     ],
-# ]
-# CANONICAL_PRETRAINED_OPEN_LLM_LIST = [
-#     model for model in PRETRAINED_OPEN_LLM_LIST if model.split("/")[0] in CANONICAL_ORGANIZATIONS
-# ]
-CANONICAL_PRETRAINED_OPEN_LLM_LIST = [
-    "01-ai/Yi-6B",
-    "01-ai/Yi-34B",
-    "Deci/DeciLM-7B",
-    "Deci/DeciCoder-1b",
-    "EleutherAI/gpt-j-6b",
-    "EleutherAI/gpt-neo-1.3B",
-    "EleutherAI/gpt-neo-125m",
-    "EleutherAI/gpt-neo-2.7B",
-    "EleutherAI/gpt-neox-20b",
-    "EleutherAI/polyglot-ko-12.8b",
-    "EleutherAI/pythia-1.3b",
-    "EleutherAI/pythia-1.4b",
-    "EleutherAI/pythia-12b",
-    "EleutherAI/pythia-160m",
-    "EleutherAI/pythia-2.7b",
-    "EleutherAI/pythia-410m",
-    "EleutherAI/pythia-6.7b",
-    "EleutherAI/pythia-70m",
-    "Qwen/Qwen-7B",
-    "Qwen/Qwen-14B",
-    "Qwen/Qwen-72B",
-    "Qwen/Qwen1.5-0.5B",
-    "Qwen/Qwen1.5-1.8B",
-    "Qwen/Qwen1.5-4B",
-    "Qwen/Qwen1.5-7B",
-    "Qwen/Qwen1.5-14B",
-    "Qwen/Qwen1.5-32B",
-    "Qwen/Qwen1.5-72B",
-    "Qwen/Qwen1.5-110B",
-    "Qwen/Qwen1.5-MoE-A2.7B",
-    "Qwen/Qwen2-beta-14B",
-    "Qwen/Qwen2-beta-72B",
-    "Salesforce/codegen-6B-nl",
-    "Salesforce/codegen-16B-nl",
-    "TencentARC/Mistral_Pro_8B_v0.1",
-    "databricks/dbrx-base",
-    "facebook/opt-125m",
-    "facebook/opt-350m",
-    "facebook/opt-2.7b",
-    "facebook/opt-6.7b",
-    "facebook/opt-13b",
-    "facebook/opt-30b",
-    "facebook/opt-66b",
-    "facebook/xglm-564M",
-    "facebook/xglm-4.5B",
-    "facebook/xglm-7.5B",
-    "google/gemma-2b",
-    "google/gemma-7b",
-    "google/recurrentgemma-2b",
-    "google/recurrentgemma-9b",
-    "internlm/internlm-20b",
-    "internlm/internlm2-20b",
-    "huggyllama/llama-7b",
-    "huggyllama/llama-13b",
-    "huggyllama/llama-30b",
-    "huggyllama/llama-65b",
-    "meta-llama/Llama-2-7b-hf",
-    "meta-llama/Llama-2-13b-hf",
-    "meta-llama/Llama-2-70b-hf",
-    "meta-llama/Meta-Llama-3-8B",
-    "meta-llama/Meta-Llama-3-70B",
-    "microsoft/phi-1_5",
-    "microsoft/rho-math-1b-v0.1",
-    "mistralai/Mistral-7B-v0.1",
-    "mistralai/Mixtral-8x7B-v0.1",
-    "mistralai/Mixtral-8x22B-v0.1",
-    "openai-community/gpt2",
-    "openai-community/gpt2-large",
-    "stabilityai/stablelm-3b-4e1t",
-    "stabilityai/stablelm-2-1_6b",
-    "stabilityai/stablelm-2-12b",
-    "stabilityai/stablelm-base-alpha-3b",
-    "stabilityai/stablelm-base-alpha-7b",
-    "tiiuae/falcon-rw-1b",
-    "tiiuae/falcon-7b",
-    "tiiuae/falcon-40b",
-    "tiiuae/falcon-180B",
-    "togethercomputer/RedPajama-INCITE-Base-3B-v1",
-    "togethercomputer/RedPajama-INCITE-Base-7B-v0.1",
-]
-
-
-def is_benchmark_conducted(push_repo_id, subfolder):
-    try:
-        report = BenchmarkReport.from_pretrained(repo_id=push_repo_id, subfolder=subfolder)
-        if "traceback" in report.to_dict():
-            return False
-        else:
-            return True
-    except Exception:
-        return False