From b2cf88ffea8d75808c9210850a03fcc70b0b9e3d Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Josef=20Proch=C3=A1zka?= <josef.prochazka@apify.com>
Date: Mon, 11 Nov 2024 12:57:53 +0100
Subject: [PATCH] feat: Add BeautifulSoupParser type alias (#674)

To avoid repeating same Literal definitions.
---
 src/crawlee/beautifulsoup_crawler/__init__.py               | 4 ++--
 src/crawlee/beautifulsoup_crawler/_beautifulsoup_crawler.py | 4 +++-
 2 files changed, 5 insertions(+), 3 deletions(-)

diff --git a/src/crawlee/beautifulsoup_crawler/__init__.py b/src/crawlee/beautifulsoup_crawler/__init__.py
index 9fa733cc13..58a8e98deb 100644
--- a/src/crawlee/beautifulsoup_crawler/__init__.py
+++ b/src/crawlee/beautifulsoup_crawler/__init__.py
@@ -1,5 +1,5 @@
 try:
-    from ._beautifulsoup_crawler import BeautifulSoupCrawler
+    from ._beautifulsoup_crawler import BeautifulSoupCrawler, BeautifulSoupParser
     from ._beautifulsoup_crawling_context import BeautifulSoupCrawlingContext
 except ImportError as exc:
     raise ImportError(
@@ -7,4 +7,4 @@
         "For example, if you use pip, run `pip install 'crawlee[beautifulsoup]'`.",
     ) from exc
 
-__all__ = ['BeautifulSoupCrawler', 'BeautifulSoupCrawlingContext']
+__all__ = ['BeautifulSoupCrawler', 'BeautifulSoupCrawlingContext', 'BeautifulSoupParser']
diff --git a/src/crawlee/beautifulsoup_crawler/_beautifulsoup_crawler.py b/src/crawlee/beautifulsoup_crawler/_beautifulsoup_crawler.py
index 551bbbd87e..43c7959b43 100644
--- a/src/crawlee/beautifulsoup_crawler/_beautifulsoup_crawler.py
+++ b/src/crawlee/beautifulsoup_crawler/_beautifulsoup_crawler.py
@@ -21,6 +21,8 @@
 if TYPE_CHECKING:
     from crawlee._types import BasicCrawlingContext, EnqueueLinksKwargs
 
+BeautifulSoupParser = Literal['html.parser', 'lxml', 'xml', 'html5lib']
+
 
 class BeautifulSoupCrawler(BasicCrawler[BeautifulSoupCrawlingContext]):
     """A web crawler for performing HTTP requests and parsing HTML/XML content.
@@ -61,7 +63,7 @@ async def request_handler(context: BeautifulSoupCrawlingContext) -> None:
     def __init__(
         self,
         *,
-        parser: Literal['html.parser', 'lxml', 'xml', 'html5lib'] = 'lxml',
+        parser: BeautifulSoupParser = 'lxml',
         additional_http_error_status_codes: Iterable[int] = (),
         ignore_http_error_status_codes: Iterable[int] = (),
         **kwargs: Unpack[BasicCrawlerOptions[BeautifulSoupCrawlingContext]],