diff --git a/src/nanotron/config/lighteval_config.py b/src/nanotron/config/lighteval_config.py index b5f12059..208091c9 100644 --- a/src/nanotron/config/lighteval_config.py +++ b/src/nanotron/config/lighteval_config.py @@ -51,6 +51,7 @@ def __post_init__(self): class LightEvalTasksArgs: """Arguments related to tasks for LightEval""" + langs: Optional[str] = None tasks: Optional[str] = None custom_tasks: Optional[str] = None max_samples: Optional[int] = None diff --git a/src/nanotron/models/llama.py b/src/nanotron/models/llama.py index ecb26fd2..2c6ddc01 100644 --- a/src/nanotron/models/llama.py +++ b/src/nanotron/models/llama.py @@ -757,7 +757,7 @@ def forward( self, input_ids: Union[torch.Tensor, TensorPointer], # [batch_size, seq_length] input_mask: Union[torch.Tensor, TensorPointer], # [batch_size, seq_length] - lang_code: Union[torch.Tensor, TensorPointer], # [batch_size, 1] + lang_code: Union[torch.Tensor, TensorPointer]=None, # [batch_size, 1] ): return self.forward_with_hidden_states(input_ids=input_ids, input_mask=input_mask, lang_code=lang_code)[0]