Skip to content

Commit

Permalink
feat: Add BeautifulSoupParser type alias (#674)
Browse files Browse the repository at this point in the history
To avoid repeating same Literal definitions.
  • Loading branch information
Pijukatel authored Nov 11, 2024
1 parent a002769 commit b2cf88f
Show file tree
Hide file tree
Showing 2 changed files with 5 additions and 3 deletions.
4 changes: 2 additions & 2 deletions src/crawlee/beautifulsoup_crawler/__init__.py
Original file line number Diff line number Diff line change
@@ -1,10 +1,10 @@
try:
from ._beautifulsoup_crawler import BeautifulSoupCrawler
from ._beautifulsoup_crawler import BeautifulSoupCrawler, BeautifulSoupParser
from ._beautifulsoup_crawling_context import BeautifulSoupCrawlingContext
except ImportError as exc:
raise ImportError(
"To import anything from this subpackage, you need to install the 'beautifulsoup' extra."
"For example, if you use pip, run `pip install 'crawlee[beautifulsoup]'`.",
) from exc

__all__ = ['BeautifulSoupCrawler', 'BeautifulSoupCrawlingContext']
__all__ = ['BeautifulSoupCrawler', 'BeautifulSoupCrawlingContext', 'BeautifulSoupParser']
4 changes: 3 additions & 1 deletion src/crawlee/beautifulsoup_crawler/_beautifulsoup_crawler.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,8 @@
if TYPE_CHECKING:
from crawlee._types import BasicCrawlingContext, EnqueueLinksKwargs

BeautifulSoupParser = Literal['html.parser', 'lxml', 'xml', 'html5lib']


class BeautifulSoupCrawler(BasicCrawler[BeautifulSoupCrawlingContext]):
"""A web crawler for performing HTTP requests and parsing HTML/XML content.
Expand Down Expand Up @@ -61,7 +63,7 @@ async def request_handler(context: BeautifulSoupCrawlingContext) -> None:
def __init__(
self,
*,
parser: Literal['html.parser', 'lxml', 'xml', 'html5lib'] = 'lxml',
parser: BeautifulSoupParser = 'lxml',
additional_http_error_status_codes: Iterable[int] = (),
ignore_http_error_status_codes: Iterable[int] = (),
**kwargs: Unpack[BasicCrawlerOptions[BeautifulSoupCrawlingContext]],
Expand Down

0 comments on commit b2cf88f

Please sign in to comment.