diff --git a/pyproject.toml b/pyproject.toml index 9c14ad8..58a67bb 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -106,7 +106,8 @@ allowed-confusables = ["а", "с", "е", "З", "о", "г", "х", "у", "А", "С "tests/*.py" = ["S", "PLR2004", "ERA", "D", "ANN", "SLF"] "src/itmo_ai_timetable/db/migrations/versions/*.py" = ["N999"] "courses_processor/*.py" = ["PTH", "T201", "PLW2901", "RUF003", "INP001"] -"src/itmo_ai_timetable/gcal.py" = ["ERA"] +"src/itmo_ai_timetable/gcal.py" = ["ERA001"] +"src/itmo_ai_timetable/schedule_parser.py" = ["ERA001"] [tool.ruff.lint.isort] known-first-party = ["itmo_ai_timetable"] diff --git a/src/itmo_ai_timetable/cleaner.py b/src/itmo_ai_timetable/cleaner.py index 950bb54..4d3b255 100644 --- a/src/itmo_ai_timetable/cleaner.py +++ b/src/itmo_ai_timetable/cleaner.py @@ -1,4 +1,4 @@ -def course_name_cleaner(course: str) -> str | None: +def course_name_cleaner(course: str) -> str: course = course.strip() additional_info = [ @@ -14,6 +14,8 @@ def course_name_cleaner(course: str) -> str | None: replacements = { "A/B тестирование": "А/В тестирование", + "A\\B тестирование": "А/В тестирование", + "UNIX\\Linux системы": "UNIX/Linux системы", "Управление RnD командами": "Проведение научных исследований в области ИИ (Управление RnD командами)", "Проведение научных исследований в области ИИ": ( "Проведение научных исследований в области ИИ (Управление RnD командами)" @@ -34,8 +36,20 @@ def course_name_cleaner(course: str) -> str | None: "Создание технологического бизнеса: чек-лист для предпринимателей": "Создание технологического бизнеса", ( "High Tech Business Creation: check-list for entrepreneurs" - " / Создание технологического бизнеса: чек-лист для предпринимателей" + " // Создание технологического бизнеса: чек-лист для предпринимателей" ): "Создание технологического бизнеса", + ( + "High Tech Business Creation: check-list for entrepreneurs" + r" \ Создание технологического бизнеса: чек-лист для предпринимателей" + ): "Создание технологического бизнеса", + ( + "Компьютерная химия и моделирование химических систем \\ " + "Computational Chemistry and Modeling of Chemical Systems" + ): ( + "Компьютерная химия и моделирование химических систем /" + " Computational Chemistry and Modeling of Chemical Systems" + ), + "Основы машинного обучения (ml basic)": "Основы машинного обучения (ml-basic)", "Воркшоп по разработке автономного агента на основе LLM (Осенний семестр)": ( "Воркшоп по разработке автономного агента на основе LLM" ), diff --git a/src/itmo_ai_timetable/cli.py b/src/itmo_ai_timetable/cli.py index a7e0a05..daa4e5a 100644 --- a/src/itmo_ai_timetable/cli.py +++ b/src/itmo_ai_timetable/cli.py @@ -27,9 +27,17 @@ def create_args() -> argparse.Namespace: help="Путь к файлу excel", default="Расписание 1 курс весна 2024.xlsx", type=str, + required=True, + ) + schedule_parser.add_argument("--output_path", help="Папка для экспорта ics", type=str) + schedule_parser.add_argument("--sheet_name", help="Страница с расписанием в excel файле", type=str) + schedule_parser.add_argument( + "--db", + help="Сохранить результат в db", + action=argparse.BooleanOptionalAction, + type=bool, + default=False, ) - schedule_parser.add_argument("--output_path", help="Папка для экспорта ics", default="ics", type=str) - schedule_parser.add_argument("--sheet", help="Страница с расписанием в excel файле", default=0, type=int) selection_parser = subparsers.add_parser(SubparserName.SELECTION, help="Обработка excel с выборностью") selection_parser.add_argument( @@ -37,8 +45,9 @@ def create_args() -> argparse.Namespace: help="Путь к файлу excel", default="Таблица предвыборности осень 2024 (2 курс).xlsx", type=str, + required=True, ) - selection_parser.add_argument("--output_path", help="Папка для экспорта ics", default="ics", type=str) + selection_parser.add_argument("--output_path", help="Папка для экспорта", default="ics", type=str) selection_parser.add_argument("--sheet_name", help="Страница с расписанием в excel файле", type=str) selection_parser.add_argument("--course_row", help="Строка с заголовками", type=int) selection_parser.add_argument("--first_select_column", help="Первый столбец с выборностью (AA)", type=str) @@ -65,8 +74,10 @@ async def main() -> None: Path.mkdir(output_dir) match args.subparser_name: case SubparserName.SCHEDULE: - schedule = ScheduleParser(args.filepath, args.sheet_num).parse() - export_ics(schedule, output_path) + schedule = ScheduleParser(args.filepath, args.sheet_name).parse() + if args.db: + _ = await Repository.add_classes(schedule) + export_ics(schedule, output_dir) case SubparserName.SELECTION: results = SelectionParser( args.filepath, diff --git a/src/itmo_ai_timetable/db/migrations/versions/2024-09-07_e68d1c12a16b_add_courses.py b/src/itmo_ai_timetable/db/migrations/versions/2024-09-07_e68d1c12a16b_add_courses.py index 3cc0061..211a73c 100644 --- a/src/itmo_ai_timetable/db/migrations/versions/2024-09-07_e68d1c12a16b_add_courses.py +++ b/src/itmo_ai_timetable/db/migrations/versions/2024-09-07_e68d1c12a16b_add_courses.py @@ -104,6 +104,7 @@ def upgrade() -> None: "Цифровая обработка сигналов (Digital Signal Processing)", "Этика искусственного интеллекта", "Хакатон", + "Бизнес аналитика", ] op.bulk_insert(table, [{"name": course} for course in courses]) diff --git a/src/itmo_ai_timetable/schedule_parser.py b/src/itmo_ai_timetable/schedule_parser.py index 22cab8f..aa9b929 100644 --- a/src/itmo_ai_timetable/schedule_parser.py +++ b/src/itmo_ai_timetable/schedule_parser.py @@ -7,6 +7,7 @@ from openpyxl.worksheet.merge import MergedCellRange from openpyxl.worksheet.worksheet import Worksheet +from itmo_ai_timetable.cleaner import course_name_cleaner from itmo_ai_timetable.logger import get_logger from itmo_ai_timetable.schemes import Pair from itmo_ai_timetable.settings import Settings @@ -15,21 +16,23 @@ class ScheduleParser: - def __init__(self, path: str, sheet: int) -> None: + def __init__(self, path: str, sheet: str) -> None: self.settings = Settings() self.timezone = tz.gettz(self.settings.timezone) self.sheet = self._load_workbook(path, sheet) - def _load_workbook(self, path: str, sheet: int) -> Worksheet: + def _load_workbook(self, path: str, sheet: str) -> Worksheet: logger.info("Open file %s", path) - wb = openpyxl.load_workbook(path) - return wb.worksheets[sheet] + workbook = openpyxl.load_workbook(path) + return workbook[sheet] def parse(self) -> list[Pair]: logger.info("Start parse") pairs = [] for day_cell in self._get_days(): day = self._get_day(day_cell) + if day is None: + continue pairs.extend(self._parse_day(day, day_cell)) logger.info("End parse") return pairs @@ -41,10 +44,12 @@ def _get_days(self) -> list[MergedCellRange]: if cell_range.min_col == self.settings.days_column == cell_range.max_col ] - def _get_day(self, day_cell: MergedCellRange) -> datetime: + def _get_day(self, day_cell: MergedCellRange) -> datetime | None: day = self._get_first_cell_from_range(day_cell).value + if day is None: + return None if not isinstance(day, datetime): - raise ValueError(f"Day should be datetime, got {type(day)}") + raise ValueError(f"Day should be datetime, got {type(day)} in cell range {day_cell} with value {day}") day = day.astimezone(self.timezone) self._validate_day(day, day_cell) return day @@ -91,11 +96,13 @@ def _parse_row(self, cells: Iterable[Cell], pair_start: datetime, pair_end: date if cell.value is None or cell.value == "": continue title, pair_type, link = self._process_cell(cell) - title, parsed_pair_start, parsed_pair_end = self._find_time_in_cell(title) - if parsed_pair_start and parsed_pair_end: - pair_start = pair_start.replace(hour=parsed_pair_start[0], minute=parsed_pair_start[1]) - pair_end = pair_end.replace(hour=parsed_pair_end[0], minute=parsed_pair_end[1]) - if title: + + # currently there no time in cells + # title, parsed_pair_start, parsed_pair_end = self._find_time_in_cell(title) + # if parsed_pair_start and parsed_pair_end: + # pair_start = pair_start.replace(hour=parsed_pair_start[0], minute=parsed_pair_start[1]) + # pair_end = pair_end.replace(hour=parsed_pair_end[0], minute=parsed_pair_end[1]) + if title and title not in self.settings.courses_to_skip: pairs.append( Pair( start_time=pair_start, @@ -118,6 +125,7 @@ def _process_cell(self, cell: Cell | MergedCell) -> tuple[str, str | None, str | link = cell.hyperlink.target if cell.hyperlink else None cell_title = self._clean_cell_value(cell) cell_title, key_word = self._find_key_words_in_cell(cell_title) + cell_title = course_name_cleaner(cell_title) return cell_title, key_word, link @@ -155,7 +163,7 @@ def _find_time_in_cell(self, cell: str) -> tuple[str, tuple[int, int] | None, tu "Публичные выступления 1 / Финансовая грамотность 3 17:00 - 19:15" should be 17:00 and 19:15 """ cell = cell.strip() - if not cell: + if not cell or ("-" not in cell and ":" not in cell): return cell, None, None start_time = end_time = None @@ -166,7 +174,7 @@ def _find_time_in_cell(self, cell: str) -> tuple[str, tuple[int, int] | None, tu try: hour, minute = map(int, time.split(":")) except ValueError as e: - raise ValueError(f"Invalid time format in cell: {time}") from e + raise ValueError(f"Invalid time format in cell: {cell}") from e if start_time is None: start_time = (hour, minute) diff --git a/src/itmo_ai_timetable/selection_parser.py b/src/itmo_ai_timetable/selection_parser.py index 1f84f79..352ceee 100644 --- a/src/itmo_ai_timetable/selection_parser.py +++ b/src/itmo_ai_timetable/selection_parser.py @@ -25,7 +25,7 @@ def __init__( self.name_column = name_column self.data_start_row = course_row + 1 - def parse(self) -> dict[str, list[str | None]]: + def parse(self) -> dict[str, list[str]]: courses = self._get_courses() return self._match_names_to_courses(courses) @@ -48,7 +48,7 @@ def _get_courses(self) -> list[tuple[str, str]]: courses.append((cell.column_letter, cell.value)) return courses - def _match_names_to_courses(self, courses: list[tuple[str, str]]) -> dict[str, list[str | None]]: + def _match_names_to_courses(self, courses: list[tuple[str, str]]) -> dict[str, list[str]]: matches = defaultdict(list) for row in self.sheet.iter_rows(min_row=self.data_start_row, min_col=1, max_col=1): name = row[0].value diff --git a/src/itmo_ai_timetable/settings.py b/src/itmo_ai_timetable/settings.py index a2d7467..24aa372 100644 --- a/src/itmo_ai_timetable/settings.py +++ b/src/itmo_ai_timetable/settings.py @@ -22,6 +22,8 @@ class Settings(BaseSettings): timezone: str = "Europe/Moscow" + courses_to_skip: list[str] = Field(["Выходной", "Demoday 12:00-15:30"], description="Courses to skip") + @property def database_settings(self) -> dict[str, str | int]: return { diff --git a/src/itmo_ai_timetable/transform_ics.py b/src/itmo_ai_timetable/transform_ics.py index 6afd671..1bcb970 100644 --- a/src/itmo_ai_timetable/transform_ics.py +++ b/src/itmo_ai_timetable/transform_ics.py @@ -22,6 +22,6 @@ def export_ics(pairs: list[Pair], path: Path) -> None: description=pair.link, ) c.events.add(e) - - with Path.open(path / f"{course}.ics", "w") as f: + course_file_name = course.replace("/", "-") + with Path.open(path / f"{course_file_name}.ics", "w") as f: f.writelines(c.serialize())