minor beautification of imports

modal-labs · Mar 12, 2024 · 4bba53a · 4bba53a
1 parent aab83e5
commit 4bba53a
Showing 1 changed file with 6 additions and 6 deletions.
diff --git a/src/inference.py b/src/inference.py
@@ -9,6 +9,12 @@
 
 N_INFERENCE_GPU = 2
 
+with vllm_image.imports():
+    from vllm.engine.arg_utils import AsyncEngineArgs
+    from vllm.engine.async_llm_engine import AsyncLLMEngine
+    from vllm.sampling_params import SamplingParams
+    from vllm.utils import random_uuid
+
 
 @stub.cls(
     gpu=modal.gpu.H100(count=N_INFERENCE_GPU),
@@ -37,9 +43,6 @@ def init(self):
         model_path = f"{self.run_dir}/{run_name}/{output_dir}/merged"
         print("Initializing vLLM engine on:", model_path)
 
-        from vllm.engine.arg_utils import AsyncEngineArgs
-        from vllm.engine.async_llm_engine import AsyncLLMEngine
-
         engine_args = AsyncEngineArgs(
             model=model_path,
             gpu_memory_utilization=0.95,
@@ -51,9 +54,6 @@ async def _stream(self, input: str):
         if not input:
             return
 
-        from vllm.sampling_params import SamplingParams
-        from vllm.utils import random_uuid
-
         sampling_params = SamplingParams(
             repetition_penalty=1.1,
             temperature=0.2,