From aca5eacdbab9fde3bf83f09e039f1510487c00fb Mon Sep 17 00:00:00 2001 From: mazen-r Date: Mon, 23 Sep 2024 15:41:29 +0300 Subject: [PATCH] [aliexpress] fix unexpected localized redirections --- aliexpress-scraper/aliexpress.py | 10 ++++++++-- aliexpress-scraper/test.py | 4 ++-- 2 files changed, 10 insertions(+), 4 deletions(-) diff --git a/aliexpress-scraper/aliexpress.py b/aliexpress-scraper/aliexpress.py index 6b0caeb..80d046d 100644 --- a/aliexpress-scraper/aliexpress.py +++ b/aliexpress-scraper/aliexpress.py @@ -22,7 +22,12 @@ # Aliexpress.com requires Anti Scraping Protection bypass feature. # for more: https://scrapfly.io/docs/scrape-api/anti-scraping-protection "asp": True, - "country": "US" + "country": "US", + # aliexpress returns differnt results based on localization settings + # apply localization settings from the browser and then copy the aep_usuc_f cookie from devtools + "headers": { + "cookie": "aep_usuc_f=site=glo&province=&city=&c_tp=USD®ion=EG&b_locale=en_US&ae_u_p_s=2" + } } @@ -153,13 +158,14 @@ def parse_product(result: ScrapeApiResponse) -> Product: }) seller_link = selector.xpath("//a[@data-pl='store-name']/@href").get() seller_followers = selector.xpath("//div[contains(@class,'store-info')]/strong[2]/text()").get() + seller_followers = int(float(seller_followers.replace('K', '')) * 1000) if seller_followers and 'K' in seller_followers else int(seller_followers) if seller_followers else None seller = { "name": selector.xpath("//a[@data-pl='store-name']/text()").get(), "link": seller_link.split("?")[0].replace("//", "") if seller_link else None, "id": int(seller_link.split("store/")[-1].split("?")[0]) if seller_link else None, "info": { "positiveFeedback": selector.xpath("//div[contains(@class,'store-info')]/strong/text()").get(), - "followers": int (seller_followers) if seller_followers else None + "followers": seller_followers } } return { diff --git a/aliexpress-scraper/test.py b/aliexpress-scraper/test.py index bbdff16..1dcd9c9 100644 --- a/aliexpress-scraper/test.py +++ b/aliexpress-scraper/test.py @@ -94,14 +94,14 @@ async def test_product_scraping(): "type": "dict", "schema": { "question": {"type": "string"}, - "answer": {"type": "string"} + "answer": {"type": "string", "nullable": True} } } }, "seller": { "type": "dict", "schema": { - "name": {"type": "float"}, + "name": {"type": "string"}, "link": {"type": "string"}, "id": {"type": "integer"}, "info": {