scrapingant-client
is the official library to access ScrapingAnt API from your Python
applications. It provides useful features like parameters encoding to improve the ScrapingAnt usage experience. Requires
python 3.6+.
from scrapingant_client import ScrapingAntClient
client = ScrapingAntClient(token='<YOUR-SCRAPINGANT-API-TOKEN>')
# Scrape the example.com site
result = client.general_request('https://example.com')
print(result.content)
pip install scrapingant-client
If you need async support:
pip install scrapingant-client[async]
In order to get API token you'll need to register at ScrapingAnt Service
All public classes, methods and their parameters can be inspected in this API reference.
Main class of this library.
Param | Type |
---|---|
token | string |
- ScrapingAntClient.general_request
- ScrapingAntClient.general_request_async
- ScrapingAntClient.markdown_request
- ScrapingAntClient.markdown_request_async
https://docs.scrapingant.com/request-response-format#available-parameters
Param | Type | Default |
---|---|---|
url | string |
|
method | string |
GET |
cookies | List[Cookie] |
None |
headers | List[Dict[str, str]] |
None |
js_snippet | string |
None |
proxy_type | ProxyType |
datacenter |
proxy_country | str |
None |
wait_for_selector | str |
None |
browser | boolean |
True |
return_page_source | boolean |
False |
data | same as requests param 'data' | None |
json | same as requests param 'json' | None |
IMPORTANT NOTE: js_snippet
will be encoded to Base64 automatically by the ScrapingAnt client library.
Class defining cookie. Currently it supports only name and value
Param | Type |
---|---|
name | string |
value | string |
Class defining response from API.
Param | Type |
---|---|
content | string |
cookies | List[Cookie] |
status_code | int |
text | string |
ScrapingantClientException
is base Exception class, used for all errors.
Exception | Reason |
---|---|
ScrapingantInvalidTokenException | The API token is wrong or you have exceeded the API calls request limit |
ScrapingantInvalidInputException | Invalid value provided. Please, look into error message for more info |
ScrapingantInternalException | Something went wrong with the server side code. Try again later or contact ScrapingAnt support |
ScrapingantSiteNotReachableException | The requested URL is not reachable. Please, check it locally |
ScrapingantDetectedException | The anti-bot detection system has detected the request. Please, retry or change the request settings. |
ScrapingantTimeoutException | Got timeout while communicating with Scrapingant servers. Check your network connection. Please try later or contact support |
from scrapingant_client import ScrapingAntClient
from scrapingant_client import Cookie
client = ScrapingAntClient(token='<YOUR-SCRAPINGANT-API-TOKEN>')
result = client.general_request(
'https://httpbin.org/cookies',
cookies=[
Cookie(name='cookieName1', value='cookieVal1'),
Cookie(name='cookieName2', value='cookieVal2'),
]
)
print(result.content)
# Response cookies is a list of Cookie objects
# They can be used in next requests
response_cookies = result.cookies
from scrapingant_client import ScrapingAntClient
client = ScrapingAntClient(token='<YOUR-SCRAPINGANT-API-TOKEN>')
customJsSnippet = """
var str = 'Hello, world!';
var htmlElement = document.getElementsByTagName('html')[0];
htmlElement.innerHTML = str;
"""
result = client.general_request(
'https://example.com',
js_snippet=customJsSnippet,
)
print(result.content)
from scrapingant_client import ScrapingAntClient, ScrapingantClientException, ScrapingantInvalidInputException
client = ScrapingAntClient(token='<YOUR-SCRAPINGANT-API-TOKEN>')
RETRIES_COUNT = 3
def parse_html(html: str):
... # Implement your data extraction here
parsed_data = None
for retry_number in range(RETRIES_COUNT):
try:
scrapingant_response = client.general_request(
'https://example.com',
)
except ScrapingantInvalidInputException as e:
print(f'Got invalid input exception: {{repr(e)}}')
break # We are not retrying if request params are not valid
except ScrapingantClientException as e:
print(f'Got ScrapingAnt exception {repr(e)}')
except Exception as e:
print(f'Got unexpected exception {repr(e)}') # please report this kind of exceptions by creating a new issue
else:
try:
parsed_data = parse_html(scrapingant_response.content)
break # Data is parsed successfully, so we dont need to retry
except Exception as e:
print(f'Got exception while parsing data {repr(e)}')
if parsed_data is None:
print(f'Failed to retrieve and parse data after {RETRIES_COUNT} tries')
# Can sleep and retry later, or stop the script execution, and research the reason
else:
print(f'Successfully parsed data: {parsed_data}')
from scrapingant_client import ScrapingAntClient
client = ScrapingAntClient(token='<YOUR-SCRAPINGANT-API-TOKEN>')
result = client.general_request(
'https://httpbin.org/headers',
headers={
'test-header': 'test-value'
}
)
print(result.content)
# Http basic auth example
result = client.general_request(
'https://jigsaw.w3.org/HTTP/Basic/',
headers={'Authorization': 'Basic Z3Vlc3Q6Z3Vlc3Q='}
)
print(result.content)
import asyncio
from scrapingant_client import ScrapingAntClient
client = ScrapingAntClient(token='<YOUR-SCRAPINGANT-API-TOKEN>')
async def main():
# Scrape the example.com site
result = await client.general_request_async('https://example.com')
print(result.content)
asyncio.run(main())
from scrapingant_client import ScrapingAntClient
client = ScrapingAntClient(token='<YOUR-SCRAPINGANT-API-TOKEN>')
# Sending POST request with json data
result = client.general_request(
url="https://httpbin.org/post",
method="POST",
json={"test": "test"},
)
print(result.content)
# Sending POST request with bytes data
result = client.general_request(
url="https://httpbin.org/post",
method="POST",
data=b'test_bytes',
)
print(result.content)
from scrapingant_client import ScrapingAntClient
client = ScrapingAntClient(token='<YOUR-SCRAPINGANT-API-TOKEN>')
# Sending POST request with json data
result = client.markdown_request(
url="https://example.com",
)
print(result.markdown)