Skip to content

Commit

Permalink
[aliexpress] fix unexpected localized redirections
Browse files Browse the repository at this point in the history
  • Loading branch information
mazen-r committed Sep 23, 2024
1 parent 031fe8d commit aca5eac
Show file tree
Hide file tree
Showing 2 changed files with 10 additions and 4 deletions.
10 changes: 8 additions & 2 deletions aliexpress-scraper/aliexpress.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,12 @@
# Aliexpress.com requires Anti Scraping Protection bypass feature.
# for more: https://scrapfly.io/docs/scrape-api/anti-scraping-protection
"asp": True,
"country": "US"
"country": "US",
# aliexpress returns differnt results based on localization settings
# apply localization settings from the browser and then copy the aep_usuc_f cookie from devtools
"headers": {
"cookie": "aep_usuc_f=site=glo&province=&city=&c_tp=USD&region=EG&b_locale=en_US&ae_u_p_s=2"
}
}


Expand Down Expand Up @@ -153,13 +158,14 @@ def parse_product(result: ScrapeApiResponse) -> Product:
})
seller_link = selector.xpath("//a[@data-pl='store-name']/@href").get()
seller_followers = selector.xpath("//div[contains(@class,'store-info')]/strong[2]/text()").get()
seller_followers = int(float(seller_followers.replace('K', '')) * 1000) if seller_followers and 'K' in seller_followers else int(seller_followers) if seller_followers else None
seller = {
"name": selector.xpath("//a[@data-pl='store-name']/text()").get(),
"link": seller_link.split("?")[0].replace("//", "") if seller_link else None,
"id": int(seller_link.split("store/")[-1].split("?")[0]) if seller_link else None,
"info": {
"positiveFeedback": selector.xpath("//div[contains(@class,'store-info')]/strong/text()").get(),
"followers": int (seller_followers) if seller_followers else None
"followers": seller_followers
}
}
return {
Expand Down
4 changes: 2 additions & 2 deletions aliexpress-scraper/test.py
Original file line number Diff line number Diff line change
Expand Up @@ -94,14 +94,14 @@ async def test_product_scraping():
"type": "dict",
"schema": {
"question": {"type": "string"},
"answer": {"type": "string"}
"answer": {"type": "string", "nullable": True}
}
}
},
"seller": {
"type": "dict",
"schema": {
"name": {"type": "float"},
"name": {"type": "string"},
"link": {"type": "string"},
"id": {"type": "integer"},
"info": {
Expand Down

0 comments on commit aca5eac

Please sign in to comment.