fix bugs

huggingface · Oct 14, 2024 · 83c4c9f · 83c4c9f
1 parent ebdc70a
commit 83c4c9f
Show file tree

Hide file tree

Showing 17 changed files with 409 additions and 103 deletions.
diff --git a/.github/workflows/test_cli_cpu_llama_cpp.yaml b/.github/workflows/test_cli_cpu_llama_cpp.yaml
@@ -50,20 +50,4 @@ jobs:
 
       # - name: Run cli tests
       #   run: |
-      #     pytest tests/test_cli.py -s -k "llama_cpp"
-
-      - name: Run tests from example folder
-        run: |
-          pytest tests/test_examples.py -s -k "cpu and llama_cpp"
-        continue-on-error: true  # Continue even if tests fail or no tests are collected
-
-      - name: Check test result
-        if: steps.run_tests.outcome == 'failure' && steps.run_tests.conclusion == 'failure'
-        run: |
-          if [ "${{ steps.run_tests.outputs.exit_code }}" -eq 5 ]; then
-            echo "No tests were collected. Skipping failure..."
-            exit 0
-          else
-            echo "Tests failed for another reason."
-            exit 1
-          fi
+      #     pytest tests/test_cli.py -s -k "llama_cpp"
diff --git a/.github/workflows/test_cli_cpu_pytorch.yaml b/.github/workflows/test_cli_cpu_pytorch.yaml
@@ -50,20 +50,4 @@ jobs:
 
       # - name: Run cli tests
       #   run: |
-      #     pytest tests/test_cli.py -s -k "cli and cpu and pytorch"
-
-      - name: Run tests from example folder
-        run: |
-          pytest tests/test_examples.py -s -k "cpu and pytorch"
-        continue-on-error: true  # Continue even if tests fail or no tests are collected
-
-      - name: Check test result
-        if: steps.run_tests.outcome == 'failure' && steps.run_tests.conclusion == 'failure'
-        run: |
-          if [ "${{ steps.run_tests.outputs.exit_code }}" -eq 5 ]; then
-            echo "No tests were collected. Skipping failure..."
-            exit 0
-          else
-            echo "Tests failed for another reason."
-            exit 1
-          fi
+      #     pytest tests/test_cli.py -s -k "cli and cpu and pytorch"
diff --git a/.github/workflows/test_cli_cuda_onnxruntime.yaml b/.github/workflows/test_cli_cuda_onnxruntime.yaml
@@ -49,19 +49,3 @@ jobs:
       # - name: Run cli tests
       #   run: |
       #     pytest tests/test_cli.py -x -s -k "cli and cuda and onnxruntime"
-
-      - name: Run tests from example folder
-        run: |
-          pytest tests/test_examples.py -x -s -k "cuda and onnxruntime"
-        continue-on-error: true  # Continue even if tests fail or no tests are collected
-
-      - name: Check test result
-        if: steps.run_tests.outcome == 'failure' && steps.run_tests.conclusion == 'failure'
-        run: |
-          if [ "${{ steps.run_tests.outputs.exit_code }}" -eq 5 ]; then
-            echo "No tests were collected. Skipping failure..."
-            exit 0
-          else
-            echo "Tests failed for another reason."
-            exit 1
-          fi
diff --git a/.github/workflows/test_cli_cuda_torch_ort.yaml b/.github/workflows/test_cli_cuda_torch_ort.yaml
@@ -81,20 +81,4 @@ jobs:
 
       # - name: Run cli tests
       #   run: |
-      #     pytest tests/test_cli.py -x -s -k "cli and cuda and torch_ort and (dp or ddp or device_map)"
-
-      - name: Run tests from example folder
-        run: |
-          pytest tests/test_examples.py -x -s -k "cuda and torch_ort"
-        continue-on-error: true  # Continue even if tests fail or no tests are collected
-
-      - name: Check test result
-        if: steps.run_tests.outcome == 'failure' && steps.run_tests.conclusion == 'failure'
-        run: |
-          if [ "${{ steps.run_tests.outputs.exit_code }}" -eq 5 ]; then
-            echo "No tests were collected. Skipping failure..."
-            exit 0
-          else
-            echo "Tests failed for another reason."
-            exit 1
-          fi
+      #     pytest tests/test_cli.py -x -s -k "cli and cuda and torch_ort and (dp or ddp or device_map)"
diff --git a/.github/workflows/test_cli_rocm_pytorch.yaml b/.github/workflows/test_cli_rocm_pytorch.yaml
@@ -54,10 +54,6 @@ jobs:
       #   run: |
       #     pytest tests/test_cli.py -x -s -k "cli and cuda and pytorch and not (dp or ddp or device_map or deepspeed) and not bnb"
 
-      - name: Run tests from example folder
-        run: |
-          pytest tests/test_examples.py -x -s -k "rocm and pytorch"
-
   run_cli_rocm_pytorch_multi_gpu_tests:
     if: ${{
       (github.event_name == 'push') ||

diff --git a/benchmark.json b/benchmark.json
@@ -0,0 +1,166 @@
+{
+    "config": {
+        "name": "tei_bert",
+        "backend": {
+            "name": "py-txi",
+            "version": "0.10.0",
+            "_target_": "optimum_benchmark.backends.py_txi.backend.PyTXIBackend",
+            "task": "feature-extraction",
+            "library": "transformers",
+            "model_type": "bert",
+            "model": "BAAI/bge-base-en-v1.5",
+            "processor": "BAAI/bge-base-en-v1.5",
+            "device": "cpu",
+            "device_ids": null,
+            "seed": 42,
+            "inter_op_num_threads": null,
+            "intra_op_num_threads": null,
+            "model_kwargs": {},
+            "processor_kwargs": {},
+            "no_weights": false,
+            "image": null,
+            "shm_size": "1g",
+            "devices": null,
+            "gpus": null,
+            "ports": {
+                "80/tcp": [
+                    "127.0.0.1",
+                    48029
+                ]
+            },
+            "volumes": {
+                "/home/ubuntu/.cache/huggingface/hub": {
+                    "bind": "/data",
+                    "mode": "rw"
+                }
+            },
+            "environment": [
+                "HUGGING_FACE_HUB_TOKEN"
+            ],
+            "dtype": null,
+            "max_concurrent_requests": 512,
+            "sharded": null,
+            "quantize": null,
+            "num_shard": null,
+            "speculate": null,
+            "cuda_graphs": null,
+            "disable_custom_kernels": null,
+            "trust_remote_code": null,
+            "pooling": null
+        },
+        "scenario": {
+            "name": "inference",
+            "_target_": "optimum_benchmark.scenarios.inference.scenario.InferenceScenario",
+            "iterations": 10,
+            "duration": 10,
+            "warmup_runs": 10,
+            "input_shapes": {
+                "batch_size": 64,
+                "num_choices": 2,
+                "sequence_length": 128
+            },
+            "new_tokens": null,
+            "memory": false,
+            "latency": true,
+            "energy": false,
+            "forward_kwargs": {},
+            "generate_kwargs": {},
+            "call_kwargs": {}
+        },
+        "launcher": {
+            "name": "inline",
+            "_target_": "optimum_benchmark.launchers.inline.launcher.InlineLauncher",
+            "device_isolation": false,
+            "device_isolation_action": null,
+            "numactl": false,
+            "numactl_kwargs": {}
+        },
+        "environment": {
+            "cpu": " Intel(R) Xeon(R) Platinum 8259CL CPU @ 2.50GHz",
+            "cpu_count": 8,
+            "cpu_ram_mb": 33164.890112,
+            "system": "Linux",
+            "machine": "x86_64",
+            "platform": "Linux-5.15.0-1064-aws-x86_64-with-glibc2.31",
+            "processor": "x86_64",
+            "python_version": "3.10.14",
+            "gpu": [
+                "Tesla T4"
+            ],
+            "gpu_count": 1,
+            "gpu_vram_mb": 16106127360,
+            "optimum_benchmark_version": "0.5.0.dev0",
+            "optimum_benchmark_commit": "ebdc70a40b3eba1fc5d3c3a469f1884f8f1ffc1c",
+            "transformers_version": "4.45.2",
+            "transformers_commit": "ebdc70a40b3eba1fc5d3c3a469f1884f8f1ffc1c",
+            "accelerate_version": "1.0.0",
+            "accelerate_commit": "ebdc70a40b3eba1fc5d3c3a469f1884f8f1ffc1c",
+            "diffusers_version": null,
+            "diffusers_commit": null,
+            "optimum_version": null,
+            "optimum_commit": null,
+            "timm_version": null,
+            "timm_commit": null,
+            "peft_version": null,
+            "peft_commit": null
+        },
+        "print_report": false,
+        "log_report": true
+    },
+    "report": {
+        "load": {
+            "memory": null,
+            "latency": {
+                "unit": "s",
+                "values": [
+                    15.59886232309509
+                ],
+                "count": 1,
+                "total": 15.59886232309509,
+                "mean": 15.59886232309509,
+                "p50": 15.59886232309509,
+                "p90": 15.59886232309509,
+                "p95": 15.59886232309509,
+                "p99": 15.59886232309509,
+                "stdev": 0,
+                "stdev_": 0
+            },
+            "throughput": null,
+            "energy": null,
+            "efficiency": null
+        },
+        "forward": {
+            "memory": null,
+            "latency": {
+                "unit": "s",
+                "values": [
+                    11.477741526905447,
+                    11.554612639942206,
+                    11.268277329974808,
+                    11.438665975932963,
+                    11.257700112997554,
+                    11.165944762993604,
+                    11.163343374966644,
+                    11.381994054070674,
+                    11.254174266010523,
+                    11.333256351063028
+                ],
+                "count": 10,
+                "total": 113.29571039485745,
+                "mean": 11.329571039485746,
+                "p50": 11.300766840518918,
+                "p90": 11.485428638209124,
+                "p95": 11.520020639075664,
+                "p99": 11.547694239768898,
+                "stdev": 0.12500955626540203,
+                "stdev_": 1.1033917862355032
+            },
+            "throughput": {
+                "unit": "samples/s",
+                "value": 5.648934083819028
+            },
+            "energy": null,
+            "efficiency": null
+        }
+    }
+}
diff --git a/benchmark_config.json b/benchmark_config.json
@@ -0,0 +1,108 @@
+{
+    "name": "tei_bert",
+    "backend": {
+        "name": "py-txi",
+        "version": "0.10.0",
+        "_target_": "optimum_benchmark.backends.py_txi.backend.PyTXIBackend",
+        "task": "feature-extraction",
+        "library": "transformers",
+        "model_type": "bert",
+        "model": "BAAI/bge-base-en-v1.5",
+        "processor": "BAAI/bge-base-en-v1.5",
+        "device": "cpu",
+        "device_ids": null,
+        "seed": 42,
+        "inter_op_num_threads": null,
+        "intra_op_num_threads": null,
+        "model_kwargs": {},
+        "processor_kwargs": {},
+        "no_weights": false,
+        "image": null,
+        "shm_size": "1g",
+        "devices": null,
+        "gpus": null,
+        "ports": {
+            "80/tcp": [
+                "127.0.0.1",
+                0
+            ]
+        },
+        "volumes": {
+            "/home/ubuntu/.cache/huggingface/hub": {
+                "bind": "/data",
+                "mode": "rw"
+            }
+        },
+        "environment": [
+            "HUGGING_FACE_HUB_TOKEN"
+        ],
+        "dtype": null,
+        "max_concurrent_requests": 512,
+        "sharded": null,
+        "quantize": null,
+        "num_shard": null,
+        "speculate": null,
+        "cuda_graphs": null,
+        "disable_custom_kernels": null,
+        "trust_remote_code": null,
+        "pooling": null
+    },
+    "scenario": {
+        "name": "inference",
+        "_target_": "optimum_benchmark.scenarios.inference.scenario.InferenceScenario",
+        "iterations": 10,
+        "duration": 10,
+        "warmup_runs": 10,
+        "input_shapes": {
+            "batch_size": 64,
+            "num_choices": 2,
+            "sequence_length": 128
+        },
+        "new_tokens": null,
+        "memory": false,
+        "latency": true,
+        "energy": false,
+        "forward_kwargs": {},
+        "generate_kwargs": {},
+        "call_kwargs": {}
+    },
+    "launcher": {
+        "name": "inline",
+        "_target_": "optimum_benchmark.launchers.inline.launcher.InlineLauncher",
+        "device_isolation": false,
+        "device_isolation_action": null,
+        "numactl": false,
+        "numactl_kwargs": {}
+    },
+    "environment": {
+        "cpu": " Intel(R) Xeon(R) Platinum 8259CL CPU @ 2.50GHz",
+        "cpu_count": 8,
+        "cpu_ram_mb": 33164.890112,
+        "system": "Linux",
+        "machine": "x86_64",
+        "platform": "Linux-5.15.0-1064-aws-x86_64-with-glibc2.31",
+        "processor": "x86_64",
+        "python_version": "3.10.14",
+        "gpu": [
+            "Tesla T4"
+        ],
+        "gpu_count": 1,
+        "gpu_vram_mb": 16106127360,
+        "optimum_benchmark_version": "0.5.0.dev0",
+        "optimum_benchmark_commit": "ebdc70a40b3eba1fc5d3c3a469f1884f8f1ffc1c",
+        "transformers_version": "4.45.2",
+        "transformers_commit": "ebdc70a40b3eba1fc5d3c3a469f1884f8f1ffc1c",
+        "accelerate_version": "1.0.0",
+        "accelerate_commit": "ebdc70a40b3eba1fc5d3c3a469f1884f8f1ffc1c",
+        "diffusers_version": null,
+        "diffusers_commit": null,
+        "optimum_version": null,
+        "optimum_commit": null,
+        "timm_version": null,
+        "timm_commit": null,
+        "peft_version": null,
+        "peft_commit": null
+    },
+    "print_report": false,
+    "log_report": true
+}