Skip to content

Commit

Permalink
Merge pull request #122 from reworkd/fix_netloc
Browse files Browse the repository at this point in the history
Replace `url.netloc` -> `url.hostname` to strip domain ports
  • Loading branch information
KhoomeiK authored Sep 4, 2024
2 parents 7c40479 + d4202d5 commit 07f2568
Show file tree
Hide file tree
Showing 5 changed files with 588 additions and 8,875 deletions.
2 changes: 1 addition & 1 deletion bananalyzer/__main__.py
Original file line number Diff line number Diff line change
Expand Up @@ -324,7 +324,7 @@ def main() -> int:
filters.append(lambda e: e.source.lower() == args.source_type.lower()) # type: ignore
if args.domain:
filters.append(
lambda e: ".".join(urlparse(e.url).netloc.split(".")[-2:]) == args.domain
lambda e: ".".join(urlparse(e.url).hostname.split(".")[-2:]) == args.domain
)
if args.category:
filters.append(lambda e: e.category == args.category)
Expand Down
2 changes: 1 addition & 1 deletion bananalyzer/runner/generator.py
Original file line number Diff line number Diff line change
Expand Up @@ -89,7 +89,7 @@ async def test_{eval_.type}(self, page, result) -> None:
"""

def _generate_class_name(self, example: Example) -> str:
domain = urlparse(example.url).netloc
domain = urlparse(example.url).hostname
domain = domain.replace(".", "_")
domain = domain.replace("-", "_")
if domain.startswith("www_"):
Expand Down
2 changes: 1 addition & 1 deletion scripts/har_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -167,7 +167,7 @@ async def create_end2end_examples(
detail_scraper: Optional[harambe.AsyncScraperType],
s3_bucket_name: Optional[str] = None,
) -> None:
domain = urlparse(base_url).netloc.replace("www.", "").replace(".", "_")
domain = urlparse(base_url).hostname.replace("www.", "").replace(".", "_")
resource_path = (
f"s3://{s3_bucket_name}/{domain}.tar.gz"
if s3_bucket_name
Expand Down
Loading

0 comments on commit 07f2568

Please sign in to comment.