Skip to content

Commit

Permalink
Fix eval check
Browse files Browse the repository at this point in the history
  • Loading branch information
TJ-Solergibert committed Sep 4, 2024
1 parent 8e6f8ab commit 1969526
Showing 1 changed file with 6 additions and 4 deletions.
10 changes: 6 additions & 4 deletions src/nanotron/config/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -111,7 +111,9 @@ def __post_init__(self):
class MultilingualNanosetDatasetsArgs:
training_folder: Union[str, dict, List[str]]
validation_folder: Optional[Union[str, List[str]]]
languages: Optional[List[str]] # NOTE(tj.solergibert) Required for 1. Aggregating the result 2. Reporting to WANDB
languages: List[
str
] # NOTE(tj.solergibert) Required for 1. Aggregating the result 2. Embed lang information into the model 3. Reporting to WANDB

def __post_init__(self):
if isinstance(self.training_folder, str): # Case 1: 1 Dataset folder
Expand All @@ -133,15 +135,15 @@ def __post_init__(self):
len(self.training_folder) == len(self.validation_folder) if self.validation_folder else True
), f"The sizes of training_folder and validation_folder mismatch ({len(self.training_folder)} vs {len(self.validation_folder)})"

if not self.languages and self.validation_folder:
raise ValueError(f"You must specify languages to perform the validation step w/ {self.validation_folder}")
if not self.languages:
raise ValueError("You must specify the languages of each dataset")


@dataclass
class DataArgs:
"""Arguments related to the data and data files processing"""

dataset: Union[MultilingualNanosetDatasetsArgs]
dataset: Union[PretrainDatasetsArgs, MultilingualNanosetDatasetsArgs]
seed: Optional[int]
num_loading_workers: Optional[int] = 1

Expand Down

0 comments on commit 1969526

Please sign in to comment.