diff --git a/alpa/device_mesh.py b/alpa/device_mesh.py
index 62bf2aeae..bf0acf1c4 100644
--- a/alpa/device_mesh.py
+++ b/alpa/device_mesh.py
@@ -1513,9 +1513,9 @@ class DistributedArray:
     a normal numpy array.
 
     Internally, it stores a pointer to all remote buffers.
-    The buffers are stored distributedly on remote workers' device memeory.
+    The buffers are stored distributedly on remote workers' device memory.
     When users require the value of the array. These buffers will be gathered
-    to the dirver.
+    to the driver.
     """
 
     def __init__(self,
diff --git a/alpa/pipeline_parallel/pipeshard_executable.py b/alpa/pipeline_parallel/pipeshard_executable.py
index aef5c9f4e..b3b566021 100644
--- a/alpa/pipeline_parallel/pipeshard_executable.py
+++ b/alpa/pipeline_parallel/pipeshard_executable.py
@@ -1,4 +1,4 @@
-"""The dirver part and worker part of a pipeshard executable."""
+"""The driver part and worker part of a pipeshard executable."""
 import logging
 from functools import partial
 import json
diff --git a/benchmark/alpa/benchmark_parallel_utils.py b/benchmark/alpa/benchmark_parallel_utils.py
index 074d5eea0..b0a46d757 100644
--- a/benchmark/alpa/benchmark_parallel_utils.py
+++ b/benchmark/alpa/benchmark_parallel_utils.py
@@ -236,7 +236,7 @@ def benchmark_training_executable(niter,
         executable.sync()
         e2e_latency = (time.time() - tic) / niter
         latencies = [e2e_latency]
-        print(f"latency with dirver overhead: {e2e_latency:.3f}")
+        print(f"latency with driver overhead: {e2e_latency:.3f}")
     else:
         # Benchmark latency without driver overhead
         for i in range(niter):