fix issue ray-project#63

huangdi614 · Aug 9, 2024 · 3672188 · 3672188
1 parent 03872a4
commit 3672188
Showing 1 changed file with 11 additions and 9 deletions.
diff --git a/token_benchmark_ray.py b/token_benchmark_ray.py
@@ -95,15 +95,17 @@ def get_token_throughput_latencies(
     ):
         iter += 1
 
-        default_sampling_params = {"max_tokens": num_output_tokens_list.pop()}
-        default_sampling_params.update(additional_sampling_params)
-        request_config = RequestConfig(
-            model=model,
-            prompt=prompts.pop(),
-            sampling_params=default_sampling_params,
-            llm_api=llm_api,
-        )
-        req_launcher.launch_requests(request_config)
+        if iter <= max_num_completed_requests:
+            default_sampling_params = {"max_tokens": num_output_tokens_list.pop()}
+            default_sampling_params.update(additional_sampling_params)
+            request_config = RequestConfig(
+                model=model,
+                prompt=prompts.pop(),
+                sampling_params=default_sampling_params,
+                llm_api=llm_api,
+            )
+            req_launcher.launch_requests(request_config)
+
         # Retrieving results less frequently allows for more concurrent requests
         # to be launched. This will overall reduce the amount of time it takes
         # for the test to run.