From 8c3035e5447aa412a64b90cf8e3f7bc25fa327b0 Mon Sep 17 00:00:00 2001
From: Matt Watson <mattdangerw@gmail.com>
Date: Mon, 21 Aug 2023 15:51:58 -0700
Subject: [PATCH] Fix typos

---
 keras_nlp/layers/modeling/masked_lm_head.py                   | 2 +-
 keras_nlp/layers/modeling/rotary_embedding_test.py            | 2 +-
 keras_nlp/layers/modeling/sine_position_encoding_test.py      | 2 +-
 keras_nlp/layers/preprocessing/multi_segment_packer.py        | 2 +-
 keras_nlp/models/albert/albert_classifier.py                  | 2 +-
 keras_nlp/models/albert/albert_masked_lm.py                   | 2 +-
 keras_nlp/models/backbone.py                                  | 2 +-
 keras_nlp/models/bart/bart_seq_2_seq_lm_preprocessor.py       | 2 +-
 keras_nlp/models/bert/bert_classifier.py                      | 2 +-
 keras_nlp/models/bert/bert_masked_lm.py                       | 2 +-
 keras_nlp/models/deberta_v3/deberta_v3_classifier.py          | 2 +-
 keras_nlp/models/deberta_v3/deberta_v3_masked_lm.py           | 2 +-
 keras_nlp/models/distil_bert/distil_bert_classifier.py        | 2 +-
 keras_nlp/models/distil_bert/distil_bert_masked_lm.py         | 2 +-
 keras_nlp/models/f_net/f_net_classifier.py                    | 2 +-
 keras_nlp/models/f_net/f_net_masked_lm.py                     | 2 +-
 keras_nlp/models/generative_task.py                           | 2 +-
 keras_nlp/models/gpt2/gpt2_causal_lm.py                       | 2 +-
 keras_nlp/models/gpt2/gpt2_causal_lm_preprocessor.py          | 2 +-
 keras_nlp/models/gpt_neo_x/gpt_neo_x_causal_lm.py             | 2 +-
 .../models/gpt_neo_x/gpt_neo_x_causal_lm_preprocessor.py      | 2 +-
 keras_nlp/models/opt/opt_causal_lm.py                         | 2 +-
 keras_nlp/models/opt/opt_causal_lm_preprocessor.py            | 4 ++--
 keras_nlp/models/preprocessor.py                              | 2 +-
 keras_nlp/models/roberta/roberta_classifier.py                | 2 +-
 keras_nlp/models/roberta/roberta_masked_lm.py                 | 2 +-
 keras_nlp/models/xlm_roberta/xlm_roberta_masked_lm.py         | 2 +-
 keras_nlp/samplers/contrastive_sampler.py                     | 2 +-
 keras_nlp/tokenizers/byte_pair_tokenizer.py                   | 2 +-
 keras_nlp/tokenizers/sentence_piece_tokenizer.py              | 2 +-
 30 files changed, 31 insertions(+), 31 deletions(-)

diff --git a/keras_nlp/layers/modeling/masked_lm_head.py b/keras_nlp/layers/modeling/masked_lm_head.py
index 46c15f0280..349e696871 100644
--- a/keras_nlp/layers/modeling/masked_lm_head.py
+++ b/keras_nlp/layers/modeling/masked_lm_head.py
@@ -31,7 +31,7 @@ class MaskedLMHead(keras.layers.Layer):
        predict with shape `(batch_size, masks_per_sequence)`.
 
     The token encodings should usually be the last output of an encoder model,
-    and mask positions should be the interger positions you would like to
+    and mask positions should be the integer positions you would like to
     predict for the MaskedLM task.
 
     The layer will first gather the token encodings at the mask positions. These
diff --git a/keras_nlp/layers/modeling/rotary_embedding_test.py b/keras_nlp/layers/modeling/rotary_embedding_test.py
index 577d4a2447..16e8985772 100644
--- a/keras_nlp/layers/modeling/rotary_embedding_test.py
+++ b/keras_nlp/layers/modeling/rotary_embedding_test.py
@@ -60,7 +60,7 @@ def test_multi_dimension_layer_output_shape(self):
         inputs = keras.Input(shape=(None, seq_length, hidden_size))
         outputs = embedding_layer(inputs)
 
-        # When using muliple dimensions before sequence length, the output is
+        # When using multiple dimensions before sequence length, the output is
         # expected to be the same as the input shape in all dimensions.
         expected_output_shape = (None, None, seq_length, hidden_size)
         self.assertEqual(expected_output_shape, outputs.shape)
diff --git a/keras_nlp/layers/modeling/sine_position_encoding_test.py b/keras_nlp/layers/modeling/sine_position_encoding_test.py
index 7bcc665323..1c9bc4fb81 100644
--- a/keras_nlp/layers/modeling/sine_position_encoding_test.py
+++ b/keras_nlp/layers/modeling/sine_position_encoding_test.py
@@ -64,7 +64,7 @@ def test_multi_dimension_layer_output_shape(self):
         inputs = keras.Input(shape=(None, seq_length, hidden_size))
         outputs = pos_encoding(inputs)
 
-        # When using muliple dimensions before sequence length, the output is
+        # When using multiple dimensions before sequence length, the output is
         # expected to be the same as the input shape in all dimensions.
         expected_output_shape = (None, None, seq_length, hidden_size)
         self.assertEqual(expected_output_shape, outputs.shape)
diff --git a/keras_nlp/layers/preprocessing/multi_segment_packer.py b/keras_nlp/layers/preprocessing/multi_segment_packer.py
index 0f61d4751a..638d6f2b91 100644
--- a/keras_nlp/layers/preprocessing/multi_segment_packer.py
+++ b/keras_nlp/layers/preprocessing/multi_segment_packer.py
@@ -32,7 +32,7 @@ class MultiSegmentPacker(PreprocessingLayer):
     """Packs multiple sequences into a single fixed width model input.
 
     This layer packs multiple input sequences into a single fixed width sequence
-    containing start and end delimeters, forming an dense input suitable for a
+    containing start and end delimeters, forming a dense input suitable for a
     classification task for BERT and BERT-like models.
 
     Takes as input a tuple of token segments. Each tuple element should contain
diff --git a/keras_nlp/models/albert/albert_classifier.py b/keras_nlp/models/albert/albert_classifier.py
index e884d05d88..bc6d74a13a 100644
--- a/keras_nlp/models/albert/albert_classifier.py
+++ b/keras_nlp/models/albert/albert_classifier.py
@@ -76,7 +76,7 @@ class AlbertClassifier(Task):
         optimizer=keras.optimizers.Adam(5e-5),
         jit_compile=True,
     )
-    # Access backbone programatically (e.g., to change `trainable`).
+    # Access backbone programmatically (e.g., to change `trainable`).
     classifier.backbone.trainable = False
     # Fit again.
     classifier.fit(x=features, y=labels, batch_size=2)
diff --git a/keras_nlp/models/albert/albert_masked_lm.py b/keras_nlp/models/albert/albert_masked_lm.py
index 8084340aa9..be5a85bf62 100644
--- a/keras_nlp/models/albert/albert_masked_lm.py
+++ b/keras_nlp/models/albert/albert_masked_lm.py
@@ -71,7 +71,7 @@ class AlbertMaskedLM(Task):
         optimizer=keras.optimizers.Adam(5e-5),
         jit_compile=True,
     )
-    # Access backbone programatically (e.g., to change `trainable`).
+    # Access backbone programmatically (e.g., to change `trainable`).
     masked_lm.backbone.trainable = False
     # Fit again.
     masked_lm.fit(x=features, batch_size=2)
diff --git a/keras_nlp/models/backbone.py b/keras_nlp/models/backbone.py
index 26cd4bf86c..280f0303d9 100644
--- a/keras_nlp/models/backbone.py
+++ b/keras_nlp/models/backbone.py
@@ -128,7 +128,7 @@ def __init_subclass__(cls, **kwargs):
         super().__init_subclass__(**kwargs)
 
         # If the subclass does not define from_preset, assign a wrapper so that
-        # each class can have an distinct docstring.
+        # each class can have a distinct docstring.
         if "from_preset" not in cls.__dict__:
 
             def from_preset(calling_cls, *args, **kwargs):
diff --git a/keras_nlp/models/bart/bart_seq_2_seq_lm_preprocessor.py b/keras_nlp/models/bart/bart_seq_2_seq_lm_preprocessor.py
index 0eb9a6cb44..d7ad12de84 100644
--- a/keras_nlp/models/bart/bart_seq_2_seq_lm_preprocessor.py
+++ b/keras_nlp/models/bart/bart_seq_2_seq_lm_preprocessor.py
@@ -209,7 +209,7 @@ def generate_preprocess(
         strings for values, tokenizes and packs the input, and computes a
         padding mask masking all inputs not filled in with a padded value.
 
-        Unlike calling the the layer for training, this method does not compute
+        Unlike calling the layer for training, this method does not compute
         labels and will never append a tokenizer.end_token_id to the end of
         the decoder sequence (as generation is expected to continue at the end
         of the inputted decoder prompt).
diff --git a/keras_nlp/models/bert/bert_classifier.py b/keras_nlp/models/bert/bert_classifier.py
index 719dab5dfc..d09685082f 100644
--- a/keras_nlp/models/bert/bert_classifier.py
+++ b/keras_nlp/models/bert/bert_classifier.py
@@ -77,7 +77,7 @@ class BertClassifier(Task):
         optimizer=keras.optimizers.Adam(5e-5),
         jit_compile=True,
     )
-    # Access backbone programatically (e.g., to change `trainable`).
+    # Access backbone programmatically (e.g., to change `trainable`).
     classifier.backbone.trainable = False
     # Fit again.
     classifier.fit(x=features, y=labels, batch_size=2)
diff --git a/keras_nlp/models/bert/bert_masked_lm.py b/keras_nlp/models/bert/bert_masked_lm.py
index d696f77fdd..8bbaba2269 100644
--- a/keras_nlp/models/bert/bert_masked_lm.py
+++ b/keras_nlp/models/bert/bert_masked_lm.py
@@ -70,7 +70,7 @@ class BertMaskedLM(Task):
         optimizer=keras.optimizers.Adam(5e-5),
         jit_compile=True,
     )
-    # Access backbone programatically (e.g., to change `trainable`).
+    # Access backbone programmatically (e.g., to change `trainable`).
     masked_lm.backbone.trainable = False
     # Fit again.
     masked_lm.fit(x=features, batch_size=2)
diff --git a/keras_nlp/models/deberta_v3/deberta_v3_classifier.py b/keras_nlp/models/deberta_v3/deberta_v3_classifier.py
index dc2e8b1db8..992454c940 100644
--- a/keras_nlp/models/deberta_v3/deberta_v3_classifier.py
+++ b/keras_nlp/models/deberta_v3/deberta_v3_classifier.py
@@ -86,7 +86,7 @@ class DebertaV3Classifier(Task):
         optimizer=keras.optimizers.Adam(5e-5),
         jit_compile=True,
     )
-    # Access backbone programatically (e.g., to change `trainable`).
+    # Access backbone programmatically (e.g., to change `trainable`).
     classifier.backbone.trainable = False
     # Fit again.
     classifier.fit(x=features, y=labels, batch_size=2)
diff --git a/keras_nlp/models/deberta_v3/deberta_v3_masked_lm.py b/keras_nlp/models/deberta_v3/deberta_v3_masked_lm.py
index 2fcb8bf4d3..54b7059a2d 100644
--- a/keras_nlp/models/deberta_v3/deberta_v3_masked_lm.py
+++ b/keras_nlp/models/deberta_v3/deberta_v3_masked_lm.py
@@ -74,7 +74,7 @@ class DebertaV3MaskedLM(Task):
         optimizer=keras.optimizers.Adam(5e-5),
         jit_compile=True,
     )
-    # Access backbone programatically (e.g., to change `trainable`).
+    # Access backbone programmatically (e.g., to change `trainable`).
     masked_lm.backbone.trainable = False
     # Fit again.
     masked_lm.fit(x=features, batch_size=2)
diff --git a/keras_nlp/models/distil_bert/distil_bert_classifier.py b/keras_nlp/models/distil_bert/distil_bert_classifier.py
index 22a68dda8a..fc49d01b0a 100644
--- a/keras_nlp/models/distil_bert/distil_bert_classifier.py
+++ b/keras_nlp/models/distil_bert/distil_bert_classifier.py
@@ -88,7 +88,7 @@ class DistilBertClassifier(Task):
         optimizer=keras.optimizers.Adam(5e-5),
         jit_compile=True,
     )
-    # Access backbone programatically (e.g., to change `trainable`).
+    # Access backbone programmatically (e.g., to change `trainable`).
     classifier.backbone.trainable = False
     # Fit again.
     classifier.fit(x=features, y=labels, batch_size=2)
diff --git a/keras_nlp/models/distil_bert/distil_bert_masked_lm.py b/keras_nlp/models/distil_bert/distil_bert_masked_lm.py
index 0f5299a589..1c69413164 100644
--- a/keras_nlp/models/distil_bert/distil_bert_masked_lm.py
+++ b/keras_nlp/models/distil_bert/distil_bert_masked_lm.py
@@ -74,7 +74,7 @@ class DistilBertMaskedLM(Task):
         optimizer=keras.optimizers.Adam(5e-5),
         jit_compile=True,
     )
-    # Access backbone programatically (e.g., to change `trainable`).
+    # Access backbone programmatically (e.g., to change `trainable`).
     masked_lm.backbone.trainable = False
     # Fit again.
     masked_lm.fit(x=features, batch_size=2)
diff --git a/keras_nlp/models/f_net/f_net_classifier.py b/keras_nlp/models/f_net/f_net_classifier.py
index 1a84a83f48..2a573496b1 100644
--- a/keras_nlp/models/f_net/f_net_classifier.py
+++ b/keras_nlp/models/f_net/f_net_classifier.py
@@ -78,7 +78,7 @@ class FNetClassifier(Task):
         optimizer=keras.optimizers.Adam(5e-5),
         jit_compile=True,
     )
-    # Access backbone programatically (e.g., to change `trainable`).
+    # Access backbone programmatically (e.g., to change `trainable`).
     classifier.backbone.trainable = False
     # Fit again.
     classifier.fit(x=features, y=labels, batch_size=2)
diff --git a/keras_nlp/models/f_net/f_net_masked_lm.py b/keras_nlp/models/f_net/f_net_masked_lm.py
index 0355a235f2..c66b518d85 100644
--- a/keras_nlp/models/f_net/f_net_masked_lm.py
+++ b/keras_nlp/models/f_net/f_net_masked_lm.py
@@ -69,7 +69,7 @@ class FNetMaskedLM(Task):
         optimizer=keras.optimizers.Adam(5e-5),
         jit_compile=True,
     )
-    # Access backbone programatically (e.g., to change `trainable`).
+    # Access backbone programmatically (e.g., to change `trainable`).
     masked_lm.backbone.trainable = False
     # Fit again.
     masked_lm.fit(x=features, batch_size=2)
diff --git a/keras_nlp/models/generative_task.py b/keras_nlp/models/generative_task.py
index 141775dc84..5726f32587 100644
--- a/keras_nlp/models/generative_task.py
+++ b/keras_nlp/models/generative_task.py
@@ -232,7 +232,7 @@ def generate(
                 `preprocessor` is attached to the model, `inputs` should match
                 the structure expected by the `preprocessor` layer. If a
                 `preprocessor` is not attached, `inputs` should match the
-                structure expected the the `backbone` model.
+                structure expected the `backbone` model.
             max_length: Optional. int. The max length of the generated sequence.
                 Will default to the max configured `sequence_length` of the
                 `preprocessor`. If `preprocessor` is `None`, `inputs` should be
diff --git a/keras_nlp/models/gpt2/gpt2_causal_lm.py b/keras_nlp/models/gpt2/gpt2_causal_lm.py
index 6e508c43f8..e6c6eddb57 100644
--- a/keras_nlp/models/gpt2/gpt2_causal_lm.py
+++ b/keras_nlp/models/gpt2/gpt2_causal_lm.py
@@ -29,7 +29,7 @@
 
 @keras_nlp_export("keras_nlp.models.GPT2CausalLM")
 class GPT2CausalLM(GenerativeTask):
-    """An end-to-end GPT2 model for causal langauge modeling.
+    """An end-to-end GPT2 model for causal language modeling.
 
     A causal language model (LM) predicts the next token based on previous
     tokens. This task setup can be used to train the model unsupervised on
diff --git a/keras_nlp/models/gpt2/gpt2_causal_lm_preprocessor.py b/keras_nlp/models/gpt2/gpt2_causal_lm_preprocessor.py
index ddc8a58919..875d7eaa5d 100644
--- a/keras_nlp/models/gpt2/gpt2_causal_lm_preprocessor.py
+++ b/keras_nlp/models/gpt2/gpt2_causal_lm_preprocessor.py
@@ -139,7 +139,7 @@ def generate_preprocess(
         or tensor strings, tokenizes and packs the input, and computes a padding
         mask masking all inputs not filled in with a padded value.
 
-        Unlike calling the the layer for training, this method does not compute
+        Unlike calling the layer for training, this method does not compute
         labels and will never append a `tokenizer.end_token_id` to the end of
         the sequence (as generation is expected to continue at the end of the
         inputted prompt).
diff --git a/keras_nlp/models/gpt_neo_x/gpt_neo_x_causal_lm.py b/keras_nlp/models/gpt_neo_x/gpt_neo_x_causal_lm.py
index 6462a6a8db..5b22c346f8 100644
--- a/keras_nlp/models/gpt_neo_x/gpt_neo_x_causal_lm.py
+++ b/keras_nlp/models/gpt_neo_x/gpt_neo_x_causal_lm.py
@@ -26,7 +26,7 @@
 
 @keras_nlp_export("keras_nlp.models.GPTNeoXCausalLM")
 class GPTNeoXCausalLM(GenerativeTask):
-    """An end-to-end GPTNeoX model for causal langauge modeling.
+    """An end-to-end GPTNeoX model for causal language modeling.
 
     A causal language model (LM) predicts the next token based on previous
     tokens. This task setup can be used to train the model unsupervised on
diff --git a/keras_nlp/models/gpt_neo_x/gpt_neo_x_causal_lm_preprocessor.py b/keras_nlp/models/gpt_neo_x/gpt_neo_x_causal_lm_preprocessor.py
index 123c8582ff..6208b235fc 100644
--- a/keras_nlp/models/gpt_neo_x/gpt_neo_x_causal_lm_preprocessor.py
+++ b/keras_nlp/models/gpt_neo_x/gpt_neo_x_causal_lm_preprocessor.py
@@ -107,7 +107,7 @@ def generate_preprocess(
         or tensor strings, tokenizes and packs the input, and computes a padding
         mask masking all inputs not filled in with a padded value.
 
-        Unlike calling the the layer for training, this method does not compute
+        Unlike calling the layer for training, this method does not compute
         labels and will never append a `tokenizer.end_token_id` to the end of
         the sequence (as generation is expected to continue at the end of the
         inputted prompt).
diff --git a/keras_nlp/models/opt/opt_causal_lm.py b/keras_nlp/models/opt/opt_causal_lm.py
index c649da8552..e855818376 100644
--- a/keras_nlp/models/opt/opt_causal_lm.py
+++ b/keras_nlp/models/opt/opt_causal_lm.py
@@ -29,7 +29,7 @@
 
 @keras_nlp_export("keras_nlp.models.OPTCausalLM")
 class OPTCausalLM(GenerativeTask):
-    """An end-to-end OPT model for causal langauge modeling.
+    """An end-to-end OPT model for causal language modeling.
 
     A causal language model (LM) predicts the next token based on previous
     tokens. This task setup can be used to train the model unsupervised on
diff --git a/keras_nlp/models/opt/opt_causal_lm_preprocessor.py b/keras_nlp/models/opt/opt_causal_lm_preprocessor.py
index 099cacd19c..6f856e682d 100644
--- a/keras_nlp/models/opt/opt_causal_lm_preprocessor.py
+++ b/keras_nlp/models/opt/opt_causal_lm_preprocessor.py
@@ -140,7 +140,7 @@ def generate_preprocess(
         or tensor strings, tokenizes and packs the input, and computes a padding
         mask masking all inputs not filled in with a padded value.
 
-        Unlike calling the the layer for training, this method does not compute
+        Unlike calling the layer for training, this method does not compute
         labels and will never append a `tokenizer.end_token_id` to the end of
         the sequence (as generation is expected to continue at the end of the
         inputted prompt).
@@ -162,7 +162,7 @@ def generate_postprocess(
         """Covert integer token output to strings for generation.
 
         This method reverses `generate_preprocess()`, by first removing all
-        padding and start/end tokens, and then converting the interger sequence
+        padding and start/end tokens, and then converting the integer sequence
         back to a string.
         """
         token_ids, padding_mask = x["token_ids"], x["padding_mask"]
diff --git a/keras_nlp/models/preprocessor.py b/keras_nlp/models/preprocessor.py
index fc2604b3a0..b5ea48a09b 100644
--- a/keras_nlp/models/preprocessor.py
+++ b/keras_nlp/models/preprocessor.py
@@ -125,7 +125,7 @@ def __init_subclass__(cls, **kwargs):
         super().__init_subclass__(**kwargs)
 
         # If the subclass does not define from_preset, assign a wrapper so that
-        # each class can have an distinct docstring.
+        # each class can have a distinct docstring.
         if "from_preset" not in cls.__dict__:
 
             def from_preset(calling_cls, *args, **kwargs):
diff --git a/keras_nlp/models/roberta/roberta_classifier.py b/keras_nlp/models/roberta/roberta_classifier.py
index c5e9f53de1..258480f9a9 100644
--- a/keras_nlp/models/roberta/roberta_classifier.py
+++ b/keras_nlp/models/roberta/roberta_classifier.py
@@ -78,7 +78,7 @@ class RobertaClassifier(Task):
         optimizer=keras.optimizers.Adam(5e-5),
         jit_compile=True,
     )
-    # Access backbone programatically (e.g., to change `trainable`).
+    # Access backbone programmatically (e.g., to change `trainable`).
     classifier.backbone.trainable = False
     # Fit again.
     classifier.fit(x=features, y=labels, batch_size=2)
diff --git a/keras_nlp/models/roberta/roberta_masked_lm.py b/keras_nlp/models/roberta/roberta_masked_lm.py
index f80566c30d..84255446ff 100644
--- a/keras_nlp/models/roberta/roberta_masked_lm.py
+++ b/keras_nlp/models/roberta/roberta_masked_lm.py
@@ -72,7 +72,7 @@ class RobertaMaskedLM(Task):
         optimizer=keras.optimizers.Adam(5e-5),
         jit_compile=True,
     )
-    # Access backbone programatically (e.g., to change `trainable`).
+    # Access backbone programmatically (e.g., to change `trainable`).
     masked_lm.backbone.trainable = False
     # Fit again.
     masked_lm.fit(x=features, batch_size=2)
diff --git a/keras_nlp/models/xlm_roberta/xlm_roberta_masked_lm.py b/keras_nlp/models/xlm_roberta/xlm_roberta_masked_lm.py
index b0d9b7fc34..c4adf56e61 100644
--- a/keras_nlp/models/xlm_roberta/xlm_roberta_masked_lm.py
+++ b/keras_nlp/models/xlm_roberta/xlm_roberta_masked_lm.py
@@ -75,7 +75,7 @@ class XLMRobertaMaskedLM(Task):
         optimizer=keras.optimizers.Adam(5e-5),
         jit_compile=True,
     )
-    # Access backbone programatically (e.g., to change `trainable`).
+    # Access backbone programmatically (e.g., to change `trainable`).
     masked_lm.backbone.trainable = False
     # Fit again.
     masked_lm.fit(x=features, batch_size=2)
diff --git a/keras_nlp/samplers/contrastive_sampler.py b/keras_nlp/samplers/contrastive_sampler.py
index 4e69e72a84..481fa484a1 100644
--- a/keras_nlp/samplers/contrastive_sampler.py
+++ b/keras_nlp/samplers/contrastive_sampler.py
@@ -185,7 +185,7 @@ def body(prompt, cache, index, logits, hidden_states):
                 (1 - self.alpha) * next_token_probabilities
                 - self.alpha * max_similarity_scores
             )
-            # Unflatten varibles to shape [batch_size, self.k, ...] for
+            # Unflatten variables to shape [batch_size, self.k, ...] for
             # gather purpose.
             unflat_score = unflatten_beams(accumulated_scores)
             unflat_prompt = unflatten_beams(prompt_beams)
diff --git a/keras_nlp/tokenizers/byte_pair_tokenizer.py b/keras_nlp/tokenizers/byte_pair_tokenizer.py
index 62fea2ea20..6ec140a113 100644
--- a/keras_nlp/tokenizers/byte_pair_tokenizer.py
+++ b/keras_nlp/tokenizers/byte_pair_tokenizer.py
@@ -660,7 +660,7 @@ def __init_subclass__(cls, **kwargs):
         super().__init_subclass__(**kwargs)
 
         # If the subclass does not define from_preset, assign a wrapper so that
-        # each class can have an distinct docstring.
+        # each class can have a distinct docstring.
         if "from_preset" not in cls.__dict__:
 
             def from_preset(calling_cls, *args, **kwargs):
diff --git a/keras_nlp/tokenizers/sentence_piece_tokenizer.py b/keras_nlp/tokenizers/sentence_piece_tokenizer.py
index 49657a0f47..2308c33506 100644
--- a/keras_nlp/tokenizers/sentence_piece_tokenizer.py
+++ b/keras_nlp/tokenizers/sentence_piece_tokenizer.py
@@ -282,7 +282,7 @@ def __init_subclass__(cls, **kwargs):
         super().__init_subclass__(**kwargs)
 
         # If the subclass does not define from_preset, assign a wrapper so that
-        # each class can have an distinct docstring.
+        # each class can have a distinct docstring.
         if "from_preset" not in cls.__dict__:
 
             def from_preset(calling_cls, *args, **kwargs):