Merge pull request #27 from ScrapingAnt/feature/add-async-client-support

feature/add-async-client-support: done
ScrapingAnt · Jun 30, 2022 · 05f3948 · 05f3948
2 parents 10694a2 + c8644f7
commit 05f3948
Show file tree

Hide file tree

Showing 7 changed files with 258 additions and 41 deletions.
diff --git a/Makefile b/Makefile
@@ -1,5 +1,5 @@
 init:
-	pip3 install -e .[dev]
+	pip3 install -e .[dev,async]
 
 test:
 	pytest -p no:cacheprovider

diff --git a/README.md b/README.md
@@ -1,9 +1,10 @@
 # ScrapingAnt API client for Python
+
 [![PyPI version](https://badge.fury.io/py/scrapingant-client.svg)](https://badge.fury.io/py/scrapingant-client)
 
-`scrapingant-client` is the official library to access [ScrapingAnt API](https://docs.scrapingant.com) from your
-Python applications. It  provides useful features like parameters encoding to improve the ScrapingAnt usage experience. 
-Requires python 3.6+.
+`scrapingant-client` is the official library to access [ScrapingAnt API](https://docs.scrapingant.com) from your Python
+applications. It provides useful features like parameters encoding to improve the ScrapingAnt usage experience. Requires
+python 3.6+.
 
 <!-- toc -->
 
@@ -17,6 +18,7 @@ Requires python 3.6+.
 <!-- tocstop -->
 
 ## Quick Start
+
 ```python3
 from scrapingant_client import ScrapingAntClient
 
@@ -26,23 +28,37 @@ result = client.general_request('https://example.com')
 print(result.content)
 ```
 
+## Install
+
+```shell
+pip install scrapingant-client
+```
+
+If you need async support:
+
+```shell
+pip install scrapingant-client[async]
+```
+
 ## API token
+
 In order to get API token you'll need to register at [ScrapingAnt Service](https://app.scrapingant.com)
 
 ## API Reference
+
 All public classes, methods and their parameters can be inspected in this API reference.
 
 #### ScrapingAntClient(token)
 
-Main class of this library. 
+Main class of this library.
 
 | Param | Type  |
 | --- | --- |
 | token | <code>string</code> |
 
 * * *
 
-#### ScrapingAntClient.general_request
+#### ScrapingAntClient.general_request and ScrapingAntClient.general_request_async
 
 https://docs.scrapingant.com/request-response-format#available-parameters
 
@@ -63,6 +79,7 @@ https://docs.scrapingant.com/request-response-format#available-parameters
 * * *
 
 #### Cookie
+
 Class defining cookie. Currently it supports only name and value
 
 | Param | Type | 
@@ -73,7 +90,8 @@ Class defining cookie. Currently it supports only name and value
 * * *
 
 #### Response
-Class defining response from API. 
+
+Class defining response from API.
 
 | Param | Type |
 | --- | --- |
@@ -83,11 +101,11 @@ Class defining response from API.
 
 ## Exceptions
 
-`ScrapingantClientException` is base Exception class, used for all errors. 
+`ScrapingantClientException` is base Exception class, used for all errors.
 
 | Exception | Reason |
 | --- | --- |
-| ScrapingantInvalidTokenException | The API token is wrong or you have exceeded the API calls request limit 
+| ScrapingantInvalidTokenException | The API token is wrong or you have exceeded the API calls request limit
 | ScrapingantInvalidInputException | Invalid value provided. Please, look into error message for more info |
 | ScrapingantInternalException | Something went wrong with the server side code. Try again later or contact ScrapingAnt support |
 | ScrapingantSiteNotReachableException | The requested URL is not reachable. Please, check it locally |
@@ -106,7 +124,7 @@ from scrapingant_client import Cookie
 client = ScrapingAntClient(token='<YOUR-SCRAPINGANT-API-TOKEN>')
 
 result = client.general_request(
-    'https://httpbin.org/cookies', 
+    'https://httpbin.org/cookies',
     cookies=[
         Cookie(name='cookieName1', value='cookieVal1'),
         Cookie(name='cookieName2', value='cookieVal2'),
@@ -122,6 +140,7 @@ response_cookies = result.cookies
 
 ```python
 from scrapingant_client import ScrapingAntClient
+
 client = ScrapingAntClient(token='<YOUR-SCRAPINGANT-API-TOKEN>')
 
 customJsSnippet = """
@@ -130,7 +149,7 @@ var htmlElement = document.getElementsByTagName('html')[0];
 htmlElement.innerHTML = str;
 """
 result = client.general_request(
-    'https://example.com', 
+    'https://example.com',
     js_snippet=customJsSnippet,
 )
 print(result.content)
@@ -145,14 +164,16 @@ client = ScrapingAntClient(token='<YOUR-SCRAPINGANT-API-TOKEN>')
 
 RETRIES_COUNT = 3
 
+
 def parse_html(html: str):
     ...  # Implement your data extraction here
 
+
 parsed_data = None
 for retry_number in range(RETRIES_COUNT):
     try:
         scrapingant_response = client.general_request(
-            'https://example.com', 
+            'https://example.com',
         )
     except ScrapingantInvalidInputException as e:
         print(f'Got invalid input exception: {{repr(e)}}')
@@ -167,7 +188,6 @@ for retry_number in range(RETRIES_COUNT):
             break  # Data is parsed successfully, so we dont need to retry
         except Exception as e:
             print(f'Got exception while parsing data {repr(e)}')
-
 
 if parsed_data is None:
     print(f'Failed to retrieve and parse data after {RETRIES_COUNT} tries')
@@ -184,7 +204,7 @@ from scrapingant_client import ScrapingAntClient
 client = ScrapingAntClient(token='<YOUR-SCRAPINGANT-API-TOKEN>')
 
 result = client.general_request(
-    'https://httpbin.org/headers', 
+    'https://httpbin.org/headers',
     headers={
         'test-header': 'test-value'
     }
@@ -193,13 +213,32 @@ print(result.content)
 
 # Http basic auth example
 result = client.general_request(
-    'https://jigsaw.w3.org/HTTP/Basic/', 
+    'https://jigsaw.w3.org/HTTP/Basic/',
     headers={'Authorization': 'Basic Z3Vlc3Q6Z3Vlc3Q='}
 )
 print(result.content)
 ```
 
+### Simple async example
+
+```python3
+import asyncio
+
+from scrapingant_client import ScrapingAntClient
+
+client = ScrapingAntClient(token='<YOUR-SCRAPINGANT-API-TOKEN>')
+
+
+async def main():
+    # Scrape the example.com site.
+    result = await client.general_request_async('https://example.com')
+    print(result.content)
+
+
+asyncio.run(main())
+```
 
 ## Useful links
+
 - [Scrapingant API doumentation](https://docs.scrapingant.com)
 - [Scrapingant JS Client](https://github.com/scrapingant/scrapingant-client-js)
diff --git a/scrapingant_client/__init__.py b/scrapingant_client/__init__.py
@@ -1,4 +1,4 @@
-__version__ = "0.3.9"
+__version__ = "1.0.0"
 
 from scrapingant_client.client import ScrapingAntClient
 from scrapingant_client.cookie import Cookie

diff --git a/scrapingant_client/client.py b/scrapingant_client/client.py
@@ -25,24 +25,23 @@ def __init__(self, token: str):
         self.token = token
         self.requests_session = requests.Session()
         version = scrapingant_client.__version__
-        user_agent = f'ScrapingAnt Client/{version} ({sys.platform}; Python/{platform.python_version()});'
+        self.user_agent = f'ScrapingAnt Client/{version} ({sys.platform}; Python/{platform.python_version()});'
         self.requests_session.headers.update({
             'x-api-key': self.token,
-            'User-Agent': user_agent,
+            'User-Agent': self.user_agent,
         })
 
-    def general_request(
+    def _form_payload(
             self,
             url: str,
             cookies: Optional[List[Cookie]] = None,
-            headers: Optional[Dict[str, str]] = None,
             js_snippet: Optional[str] = None,
             proxy_type: ProxyType = ProxyType.datacenter,
             proxy_country: Optional[str] = None,
             return_text: bool = False,
             wait_for_selector: Optional[str] = None,
             browser: bool = True,
-    ) -> Response:
+    ) -> Dict:
         request_data = {'url': url}
         if cookies is not None:
             request_data['cookies'] = cookies_list_to_string(cookies)
@@ -56,29 +55,97 @@ def general_request(
             request_data['wait_for_selector'] = wait_for_selector
         request_data['return_text'] = return_text
         request_data['browser'] = browser
+        return request_data
 
-        response = self.requests_session.post(
-            SCRAPINGANT_API_BASE_URL + '/general',
-            json=request_data,
-            headers=convert_headers(headers),
-        )
-        if response.status_code == 403:
+    def _parse_response(self, response_status_code: int, response_data: Dict, url: str) -> Response:
+        if response_status_code == 403:
             raise ScrapingantInvalidTokenException()
-        elif response.status_code == 404:
+        elif response_status_code == 404:
             raise ScrapingantSiteNotReachableException(url)
-        elif response.status_code == 422:
-            raise ScrapingantInvalidInputException(response.text)
-        elif response.status_code == 423:
+        elif response_status_code == 422:
+            raise ScrapingantInvalidInputException(response_data)
+        elif response_status_code == 423:
             raise ScrapingantDetectedException()
-        elif response.status_code == 500:
+        elif response_status_code == 500:
             raise ScrapingantInternalException()
-        json_response = response.json()
-        content = json_response['content']
-        cookies_string = json_response['cookies']
-        status_code = json_response['status_code']
+        content = response_data['content']
+        cookies_string = response_data['cookies']
+        status_code = response_data['status_code']
         cookies_list = cookies_list_from_string(cookies_string)
         return Response(
             content=content,
             cookies=cookies_list,
             status_code=status_code
         )
+
+    def general_request(
+            self,
+            url: str,
+            cookies: Optional[List[Cookie]] = None,
+            headers: Optional[Dict[str, str]] = None,
+            js_snippet: Optional[str] = None,
+            proxy_type: ProxyType = ProxyType.datacenter,
+            proxy_country: Optional[str] = None,
+            return_text: bool = False,
+            wait_for_selector: Optional[str] = None,
+            browser: bool = True,
+    ) -> Response:
+        request_data = self._form_payload(
+            url=url,
+            cookies=cookies,
+            js_snippet=js_snippet,
+            proxy_type=proxy_type,
+            proxy_country=proxy_country,
+            return_text=return_text,
+            wait_for_selector=wait_for_selector,
+            browser=browser,
+        )
+        response = self.requests_session.post(
+            SCRAPINGANT_API_BASE_URL + '/general',
+            json=request_data,
+            headers=convert_headers(headers),
+        )
+        response_status_code = response.status_code
+        response_data = response.json()
+        parsed_response: Response = self._parse_response(response_status_code, response_data, url)
+        return parsed_response
+
+    async def general_request_async(
+            self,
+            url: str,
+            cookies: Optional[List[Cookie]] = None,
+            headers: Optional[Dict[str, str]] = None,
+            js_snippet: Optional[str] = None,
+            proxy_type: ProxyType = ProxyType.datacenter,
+            proxy_country: Optional[str] = None,
+            return_text: bool = False,
+            wait_for_selector: Optional[str] = None,
+            browser: bool = True,
+    ) -> Response:
+        import httpx
+
+        request_data = self._form_payload(
+            url=url,
+            cookies=cookies,
+            js_snippet=js_snippet,
+            proxy_type=proxy_type,
+            proxy_country=proxy_country,
+            return_text=return_text,
+            wait_for_selector=wait_for_selector,
+            browser=browser,
+        )
+        async with httpx.AsyncClient(
+                headers={
+                    'x-api-key': self.token,
+                    'User-Agent': self.user_agent,
+                }
+        ) as client:
+            response = await client.post(
+                SCRAPINGANT_API_BASE_URL + '/general',
+                json=request_data,
+                headers=convert_headers(headers),
+            )
+        response_status_code = response.status_code
+        response_data = response.json()
+        parsed_response: Response = self._parse_response(response_status_code, response_data, url)
+        return parsed_response
diff --git a/setup.py b/setup.py
@@ -38,9 +38,14 @@
     install_requires=['requests>=2,<3'],
     extras_require={
         'dev': [
-            'pytest>=6,<7',
-            'flake8>=3,<4',
-            'responses>=0,<1'
-        ]
+            'pytest>=7,<8',
+            'flake8>=4,<5',
+            'responses>=0,<1',
+            'pytest-httpx>=0,<1',
+            'pytest-asyncio>=0,<1',
+        ],
+        'async': [
+            'httpx<1',
+        ],
     },
 )
diff --git a/tests/test_exceptions.py b/tests/test_exceptions.py
@@ -28,7 +28,7 @@ def test_invalid_input():
     client = ScrapingAntClient(token='some_token')
     with pytest.raises(ScrapingantInvalidInputException) as e:
         client.general_request('bad_url')
-    assert '{"detail": "wrong url"}' in str(e)
+    assert 'wrong url' in str(e)
 
 
 @responses.activate