Skip to content

Commit

Permalink
add axios-retry to retry client connection issues
Browse files Browse the repository at this point in the history
update package versions, tests and docs
  • Loading branch information
Granitosaurus committed Dec 18, 2023
1 parent 3b113eb commit d39eae8
Show file tree
Hide file tree
Showing 10 changed files with 1,274 additions and 4,602 deletions.
68 changes: 33 additions & 35 deletions __tests__/client.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -5,9 +5,6 @@ import * as errors from '../src/errors.js';
import { ScrapeConfig } from '../src/scrapeconfig.js';
import { describe, it, expect, jest, beforeEach } from '@jest/globals';

jest.mock('axios');

const mockedAxios = axios as jest.Mocked<typeof axios>;

function resultFactory(params: {
url?: string;
Expand Down Expand Up @@ -39,7 +36,7 @@ describe('concurrent scrape', () => {
// mock axios to return /account data and 2 types of results:
// - success for /success endpoints
// - ASP failure for /failure endpoints
mockedAxios.request.mockImplementation(async (config: AxiosRequestConfig): Promise<any> => {
jest.spyOn(axios, 'request').mockImplementation(async (config: AxiosRequestConfig): Promise<any> => {

Check warning on line 39 in __tests__/client.test.ts

View workflow job for this annotation

GitHub Actions / build

Unexpected any. Specify a different type

Check warning on line 39 in __tests__/client.test.ts

View workflow job for this annotation

GitHub Actions / build

Unexpected any. Specify a different type
if (config.url.includes('/account')) {
return {
status: 200,
Expand Down Expand Up @@ -71,7 +68,7 @@ describe('concurrent scrape', () => {
});

beforeEach(() => {
mockedAxios.request.mockClear(); // clear all mock meta on each test
jest.spyOn(axios, 'request').mockClear(); // clear all mock meta on each test
});

it('success', async () => {
Expand All @@ -91,7 +88,7 @@ describe('concurrent scrape', () => {
expect(results.length).toBe(5);
expect(errors.length).toBe(5);
// 10 requests and 1 account info
expect(mockedAxios.request).toHaveBeenCalledTimes(11);
expect(jest.spyOn(axios, 'request')).toHaveBeenCalledTimes(11);
}, 5_000);

it('success with explicit concurrency', async () => {
Expand All @@ -111,7 +108,7 @@ describe('concurrent scrape', () => {
expect(results.length).toBe(5);
expect(errors.length).toBe(5);
// 10 requests and 1 account info
expect(mockedAxios.request).toHaveBeenCalledTimes(10);
expect(jest.spyOn(axios, 'request')).toHaveBeenCalledTimes(10);
}, 2_000);
});

Expand All @@ -120,12 +117,12 @@ describe('scrape', () => {
const client = new ScrapflyClient({ key: KEY });

beforeEach(() => {
mockedAxios.request.mockClear(); // clear all mock meta on each test
jest.spyOn(axios, 'request').mockClear(); // clear all mock meta on each test
});

it('GET success', async () => {
const url = 'https://httpbin.dev/json';
mockedAxios.request.mockImplementation(async (config: AxiosRequestConfig): Promise<any> => {
jest.spyOn(axios, 'request').mockImplementation(async (config: AxiosRequestConfig): Promise<any> => {

Check warning on line 125 in __tests__/client.test.ts

View workflow job for this annotation

GitHub Actions / build

Unexpected any. Specify a different type

Check warning on line 125 in __tests__/client.test.ts

View workflow job for this annotation

GitHub Actions / build

Unexpected any. Specify a different type
// Ensure the URL matches the pattern
expect(config.url).toMatch(client.HOST + '/scrape');
expect(config.method).toEqual('GET');
Expand All @@ -144,12 +141,12 @@ describe('scrape', () => {
expect(result.context.asp).toBe(false);
expect(result.uuid).toBe('1234');
// a single request:
expect(mockedAxios.request).toHaveBeenCalledTimes(1);
expect(jest.spyOn(axios, 'request')).toHaveBeenCalledTimes(1);
});

it('GET complex urls', async () => {
const url = 'https://httpbin.dev/anything/?website=https://httpbin.dev/anything';
mockedAxios.request.mockImplementation(async (config: AxiosRequestConfig): Promise<any> => {
jest.spyOn(axios, 'request').mockImplementation(async (config: AxiosRequestConfig): Promise<any> => {

Check warning on line 149 in __tests__/client.test.ts

View workflow job for this annotation

GitHub Actions / build

Unexpected any. Specify a different type

Check warning on line 149 in __tests__/client.test.ts

View workflow job for this annotation

GitHub Actions / build

Unexpected any. Specify a different type
// Ensure the URL matches the pattern
expect(config.url).toMatch(client.HOST + '/scrape');
expect(config.method).toEqual('GET');
Expand All @@ -168,12 +165,12 @@ describe('scrape', () => {
expect(result.context.asp).toBe(false);
expect(result.uuid).toBe('1234');
// a single request:
expect(mockedAxios.request).toHaveBeenCalledTimes(1);
expect(jest.spyOn(axios, 'request')).toHaveBeenCalledTimes(1);
});

it('POST success', async () => {
const url = 'https://httpbin.dev/json';
mockedAxios.request.mockImplementation(async (config: AxiosRequestConfig): Promise<any> => {
jest.spyOn(axios, 'request').mockImplementation(async (config: AxiosRequestConfig): Promise<any> => {

Check warning on line 173 in __tests__/client.test.ts

View workflow job for this annotation

GitHub Actions / build

Unexpected any. Specify a different type

Check warning on line 173 in __tests__/client.test.ts

View workflow job for this annotation

GitHub Actions / build

Unexpected any. Specify a different type
// Ensure the URL matches the pattern
expect(config.url).toMatch(client.HOST + '/scrape');
expect(config.method).toEqual('POST');
Expand All @@ -198,16 +195,17 @@ describe('scrape', () => {
expect(result.config.url).toBe('https://httpbin.dev/json');
expect(result.context.asp).toBe(false);
expect(result.uuid).toBe('1234');
expect(mockedAxios.request).toHaveBeenCalledTimes(1);
expect(jest.spyOn(axios, 'request')).toHaveBeenCalledTimes(1);
});

it('unhandled errors propagate up', async () => {
jest.spyOn(axios, 'request').mockReset();
const url = 'https://httpbin.dev/json';
mockedAxios.request.mockImplementation(() => Promise.reject(new Error('Network Error')));
jest.spyOn(axios, 'request').mockImplementation(() => Promise.reject(new Error('Foo Error')));

await expect(async () => {
await client.scrape(new ScrapeConfig({ url }));
}).rejects.toThrow('Network Error');
}).rejects.toThrow('Foo Error');
});
// it('handles ')

Check warning on line 210 in __tests__/client.test.ts

View workflow job for this annotation

GitHub Actions / build

Some tests seem to be commented

Check warning on line 210 in __tests__/client.test.ts

View workflow job for this annotation

GitHub Actions / build

Some tests seem to be commented
});
Expand All @@ -230,12 +228,12 @@ describe('client errors', () => {
const client = new ScrapflyClient({ key: KEY });

beforeEach(() => {
mockedAxios.request.mockClear(); // clear all mock meta on each test
jest.spyOn(axios, 'request').mockClear(); // clear all mock meta on each test
});

it('raises ApiHttpServerError on 500 and success', async () => {
const url = 'https://httpbin.dev/json';
mockedAxios.request.mockImplementation(async (config: AxiosRequestConfig): Promise<any> => {
jest.spyOn(axios, 'request').mockImplementation(async (config: AxiosRequestConfig): Promise<any> => {

Check warning on line 236 in __tests__/client.test.ts

View workflow job for this annotation

GitHub Actions / build

Unexpected any. Specify a different type

Check warning on line 236 in __tests__/client.test.ts

View workflow job for this annotation

GitHub Actions / build

Unexpected any. Specify a different type
return resultFactory({
url: config.url,
status_code: 500,
Expand All @@ -249,7 +247,7 @@ describe('client errors', () => {

it('raises BadApiKeyError on 401', async () => {
const url = 'https://httpbin.dev/json';
mockedAxios.request.mockImplementation(async (config: AxiosRequestConfig): Promise<any> => {
jest.spyOn(axios, 'request').mockImplementation(async (config: AxiosRequestConfig): Promise<any> => {

Check warning on line 250 in __tests__/client.test.ts

View workflow job for this annotation

GitHub Actions / build

Unexpected any. Specify a different type

Check warning on line 250 in __tests__/client.test.ts

View workflow job for this annotation

GitHub Actions / build

Unexpected any. Specify a different type
return resultFactory({
url: config.url,
status_code: 401,
Expand All @@ -262,7 +260,7 @@ describe('client errors', () => {
});
it('raises TooManyRequests on 429 and success', async () => {
const url = 'https://httpbin.dev/json';
mockedAxios.request.mockImplementation(async (config: AxiosRequestConfig): Promise<any> => {
jest.spyOn(axios, 'request').mockImplementation(async (config: AxiosRequestConfig): Promise<any> => {

Check warning on line 263 in __tests__/client.test.ts

View workflow job for this annotation

GitHub Actions / build

Unexpected any. Specify a different type

Check warning on line 263 in __tests__/client.test.ts

View workflow job for this annotation

GitHub Actions / build

Unexpected any. Specify a different type
return resultFactory({
url: config.url,
status_code: 429,
Expand All @@ -273,7 +271,7 @@ describe('client errors', () => {
await expect(client.scrape(new ScrapeConfig({ url }))).rejects.toThrow(errors.TooManyRequests);
});
it('raises ScrapflyScrapeError on ::SCRAPE:: resource and success', async () => {
mockedAxios.request.mockImplementation(async (config: AxiosRequestConfig): Promise<any> => {
jest.spyOn(axios, 'request').mockImplementation(async (config: AxiosRequestConfig): Promise<any> => {

Check warning on line 274 in __tests__/client.test.ts

View workflow job for this annotation

GitHub Actions / build

Unexpected any. Specify a different type

Check warning on line 274 in __tests__/client.test.ts

View workflow job for this annotation

GitHub Actions / build

Unexpected any. Specify a different type
return resultFactory({
url: config.url,
status: 'ERR::SCRAPE::BAD_PROTOCOL',
Expand All @@ -286,7 +284,7 @@ describe('client errors', () => {
});

it('raises ScrapflyWebhookError on ::WEBHOOK:: resource and success', async () => {
mockedAxios.request.mockImplementation(async (config: AxiosRequestConfig): Promise<any> => {
jest.spyOn(axios, 'request').mockImplementation(async (config: AxiosRequestConfig): Promise<any> => {
return resultFactory({
url: config.url,
status: 'ERR::WEBHOOK::DISABLED ',
Expand All @@ -298,7 +296,7 @@ describe('client errors', () => {
);
});
it('raises ScrapflyProxyError on ERR::PROXY::POOL_NOT_FOUND resource and success', async () => {
mockedAxios.request.mockImplementation(async (config: AxiosRequestConfig): Promise<any> => {
jest.spyOn(axios, 'request').mockImplementation(async (config: AxiosRequestConfig): Promise<any> => {
return resultFactory({
url: config.url,
status: 'ERR::PROXY::POOL_NOT_FOUND ',
Expand All @@ -311,7 +309,7 @@ describe('client errors', () => {
});

it('raises ScrapflyScheduleError on ERR::SCHEDULE::DISABLED resource and success', async () => {
mockedAxios.request.mockImplementation(async (config: AxiosRequestConfig): Promise<any> => {
jest.spyOn(axios, 'request').mockImplementation(async (config: AxiosRequestConfig): Promise<any> => {
return resultFactory({
url: config.url,
status: 'ERR::SCHEDULE::DISABLED',
Expand All @@ -324,7 +322,7 @@ describe('client errors', () => {
});

it('raises ScrapflyAspError on ERR::ASP::SHIELD_ERROR resource and success', async () => {
mockedAxios.request.mockImplementation(async (config: AxiosRequestConfig): Promise<any> => {
jest.spyOn(axios, 'request').mockImplementation(async (config: AxiosRequestConfig): Promise<any> => {
return resultFactory({
url: config.url,
status: 'ERR::ASP::SHIELD_ERROR',
Expand All @@ -337,7 +335,7 @@ describe('client errors', () => {
});

it('raises ScrapflySessionError on ERR::SESSION::CONCURRENT_ACCESS resource and success', async () => {
mockedAxios.request.mockImplementation(async (config: AxiosRequestConfig): Promise<any> => {
jest.spyOn(axios, 'request').mockImplementation(async (config: AxiosRequestConfig): Promise<any> => {
return resultFactory({
url: config.url,
status: 'ERR::SESSION::CONCURRENT_ACCESS',
Expand All @@ -350,7 +348,7 @@ describe('client errors', () => {
});

it('raises ApiHttpClientError on success and unknown status', async () => {
mockedAxios.request.mockImplementation(async (config: AxiosRequestConfig): Promise<any> => {
jest.spyOn(axios, 'request').mockImplementation(async (config: AxiosRequestConfig): Promise<any> => {
return resultFactory({
url: config.url,
status: 'ERR::NEW',
Expand All @@ -362,7 +360,7 @@ describe('client errors', () => {
);
});
it('raises UpstreamHttpServerError on failure, ERR::SCRAPE::BAD_UPSTREAM_RESPONSE and >=500', async () => {
mockedAxios.request.mockImplementation(async (config: AxiosRequestConfig): Promise<any> => {
jest.spyOn(axios, 'request').mockImplementation(async (config: AxiosRequestConfig): Promise<any> => {
return resultFactory({
url: config.url,
success: false,
Expand All @@ -375,7 +373,7 @@ describe('client errors', () => {
);
});
it('raises UpstreamHttpClientError on failure, ERR::SCRAPE::BAD_UPSTREAM_RESPONSE and 4xx status', async () => {
mockedAxios.request.mockImplementation(async (config: AxiosRequestConfig): Promise<any> => {
jest.spyOn(axios, 'request').mockImplementation(async (config: AxiosRequestConfig): Promise<any> => {
return resultFactory({
url: config.url,
success: false,
Expand All @@ -398,7 +396,7 @@ describe('client errors', () => {
SESSION: errors.ScrapflySessionError,
};
for (const [resource, err] of Object.entries(resourceErrMap)) {
mockedAxios.request.mockImplementation(async (config: AxiosRequestConfig): Promise<any> => {
jest.spyOn(axios, 'request').mockImplementation(async (config: AxiosRequestConfig): Promise<any> => {
return resultFactory({
url: config.url,
success: false,
Expand All @@ -410,7 +408,7 @@ describe('client errors', () => {
});

it('raises ScrapflyError on unhandled failure', async () => {
mockedAxios.request.mockImplementation(async (config: AxiosRequestConfig): Promise<any> => {
jest.spyOn(axios, 'request').mockImplementation(async (config: AxiosRequestConfig): Promise<any> => {
return resultFactory({
url: config.url,
success: false,
Expand All @@ -423,7 +421,7 @@ describe('client errors', () => {
);
});
it('raises on unhandled failure', async () => {
mockedAxios.request.mockImplementation(async (config: AxiosRequestConfig): Promise<any> => {
jest.spyOn(axios, 'request').mockImplementation(async (config: AxiosRequestConfig): Promise<any> => {
return resultFactory({
url: config.url,
success: false,
Expand All @@ -436,19 +434,19 @@ describe('client errors', () => {
);
});
it('account retrieval status unhandled code (e.g. 404)', async () => {
mockedAxios.request.mockRejectedValue({
jest.spyOn(axios, 'request').mockRejectedValue({
response: { status: 404, data: {} },
});
await expect(client.account()).rejects.toThrow(errors.HttpError);
});
it('account retrieval bad api key (status 401)', async () => {
mockedAxios.request.mockRejectedValue({
jest.spyOn(axios, 'request').mockRejectedValue({
response: { status: 401, data: {} },
});
await expect(client.account()).rejects.toThrow(errors.BadApiKeyError);
});
it('scrape bad api key (status 401)', async () => {
mockedAxios.request.mockRejectedValue({
jest.spyOn(axios, 'request').mockRejectedValue({
response: { status: 401, data: {} },
});
await expect(client.scrape(new ScrapeConfig({ url: 'https://httpbin.dev/json' }))).rejects.toThrow(
Expand Down
7 changes: 6 additions & 1 deletion __tests__/result.test.ts
Original file line number Diff line number Diff line change
@@ -1,15 +1,20 @@
import * as cheerio from 'cheerio';
import * as fs from 'fs';
import { ScrapeResult } from '../src/result.js';
import * as errors from '../src/errors.js';
import { describe, it, expect } from '@jest/globals';
import { describe, it, expect, jest } from '@jest/globals';

describe('cheerio selector', () => {
it('lazy loads and caches itself', () => {
const response = JSON.parse(fs.readFileSync('__tests__/data/response_html_success.json', 'utf8'));
const result = new ScrapeResult(response);
const spy = jest.spyOn(cheerio, 'load');
expect(result.selector('h1').text()).toEqual('Herman Melville - Moby-Dick');
// make sure calling it twice performs the same
expect(result.selector('h1').text()).toEqual('Herman Melville - Moby-Dick');
// cheerio.load is called exactly once - means it's cached
expect(spy).toHaveBeenCalledTimes(1);
spy.mockRestore();
});
it('throws ContentTypeError when accessing .selector on JSON data', () => {
const response = JSON.parse(fs.readFileSync('__tests__/data/response_json_success.json', 'utf8'));
Expand Down
21 changes: 20 additions & 1 deletion examples/README.md
Original file line number Diff line number Diff line change
@@ -1,3 +1,22 @@
# Scrapfly Typescript SDK Examples

This directory contains commonly used examples for the Scrapfly Typescript SDK.
This directory contains commonly used examples for the Scrapfly Typescript SDK which is available in Typescript runtimes (bun, deno) as well as javascript ones like Nodejs.

You can use `node` to run the `.js` examples:

```
node examples/basic-get.js
```

Or compile `.ts` examples to `.js`:

```
tsc examples/basic-get.ts -o examples/basic-get.js
node examples/basic-get.js
```

Or run typescript directly through runtimes like `.ts`:

```
bun examples/basic-get.ts
```
13 changes: 13 additions & 0 deletions examples/basic-get.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
/*
Most basic scrapfly request - GET a provided url
*/
import { ScrapflyClient, ScrapeConfig } from 'scrapfly-sdk';

const key = 'YOUR SCRAPFLY KEY';
const client = new ScrapflyClient({ key });
const result = await client.scrape(
new ScrapeConfig({
url: 'https://httpbin.dev/html',
}),
);
console.log(result.result.content); // html content
4 changes: 3 additions & 1 deletion examples/concurrent-scrape.js
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,9 @@ note:
the client will automatically set the limit to your maximum
if you set the limit to high expect errors.TooManyConcurrentRequests
*/
import { ScrapflyClient, ScrapeConfig } from 'scrapfly-sdk';
import { ScrapflyClient, ScrapeConfig, log } from 'scrapfly-sdk';

log.setLevel('DEBUG');

const key = 'YOUR SCRAPFLY KEY';
const client = new ScrapflyClient({ key });
Expand Down
17 changes: 17 additions & 0 deletions examples/get-binary.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,17 @@
/*
* This example shows how to download binary data from scrapfly responses.
*/
import { ScrapflyClient, ScrapeConfig } from 'scrapfly-sdk';
import fs from 'fs';
const key = 'YOUR SCRAPFLY KEY';
const client = new ScrapflyClient({ key });
const result = await client.scrape(
new ScrapeConfig({
url: 'https://web-scraping.dev/product/1',
render_js: true,
js: 'return document.title',
}),
);
// then stream content as base64 buffer:
const data = Buffer.from(result.result.content, 'base64');
fs.writeFileSync('image.png', data);
Loading

0 comments on commit d39eae8

Please sign in to comment.