Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[Feature] retry failing request before giving up #666

Open
wants to merge 1 commit into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 4 additions & 0 deletions bin/cli.js
Original file line number Diff line number Diff line change
Expand Up @@ -125,6 +125,10 @@ yargs
default: 0,
describe: 'Set timeout between requests. Timeout is in Milliseconds: 1000 mls = 1 s',
},
retry: {
default: 3,
describe: 'Set the amount of times a failing request should be retried before giving up',
},
number: {
alias: 'n',
default: 0,
Expand Down
1 change: 1 addition & 0 deletions package.json
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@
"jsdom": "^16.5.3",
"json2csv": "4.5.1",
"ora": "^4.0.2",
"p-retry": "^4.6.1",
"progress": "^2.0.3",
"request": "^2.88.0",
"request-promise": "^4.2.4",
Expand Down
89 changes: 49 additions & 40 deletions src/core/Downloader.ts
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@ import { fromCallback } from 'bluebird';
import archiver from 'archiver';
import { SocksProxyAgent } from 'socks-proxy-agent';
import { forEachLimit } from 'async';
import pRetry from 'p-retry';

import { MultipleBar } from '../helpers';
import { DownloaderConstructor, PostCollector, DownloadParams, Proxy, Headers } from '../types';
Expand All @@ -23,6 +24,8 @@ export class Downloader {

private proxy: string[] | string;

private retry: number;

public noWaterMark: boolean;

public filepath: string;
Expand All @@ -33,14 +36,15 @@ export class Downloader {

public cookieJar: CookieJar;

constructor({ progress, proxy, noWaterMark, headers, filepath, bulk, cookieJar }: DownloaderConstructor) {
constructor({ progress, proxy, retry, noWaterMark, headers, filepath, bulk, cookieJar }: DownloaderConstructor) {
this.progress = true || progress;
this.progressBar = [];
this.noWaterMark = noWaterMark;
this.headers = headers;
this.filepath = filepath;
this.mbars = new MultipleBar();
this.proxy = proxy;
this.retry = retry;
this.bulk = bulk;
this.cookieJar = cookieJar;
}
Expand Down Expand Up @@ -90,46 +94,51 @@ export class Downloader {
* @param {*} item
*/
public toBuffer(item: PostCollector): Promise<Buffer> {
return new Promise((resolve, reject) => {
const proxy = this.getProxy;
let r = request;
let barIndex;
let buffer = Buffer.from('');
if (proxy.proxy && !proxy.socks) {
r = request.defaults({ proxy: `http://${proxy.proxy}/` });
}
if (proxy.proxy && proxy.socks) {
r = request.defaults({ agent: (proxy.proxy as unknown) as Agent });
}
r.get({
url: item.videoUrlNoWaterMark ? item.videoUrlNoWaterMark : item.videoUrl,
headers: this.headers,
jar: this.cookieJar,
})
.on('response', response => {
const len = parseInt(response.headers['content-length'] as string, 10);
if (this.progress && !this.bulk && len) {
barIndex = this.addBar(!!item.videoUrlNoWaterMark, len);
return pRetry(
() =>
new Promise((resolve, reject) => {
const proxy = this.getProxy;
let r = request;
let barIndex;
let buffer = Buffer.from('');
if (proxy.proxy && !proxy.socks) {
r = request.defaults({ proxy: `http://${proxy.proxy}/` });
}
if (this.progress && !this.bulk && !len) {
console.log(`Empty response! You can try again with a proxy! Can't download video: ${item.id}`);
if (proxy.proxy && proxy.socks) {
r = request.defaults({ agent: (proxy.proxy as unknown) as Agent });
}
})
.on('data', chunk => {
if (chunk.length) {
buffer = Buffer.concat([buffer, chunk as Buffer]);
if (this.progress && !this.bulk && barIndex && barIndex.hasOwnProperty('tick')) {
barIndex.tick(chunk.length, { id: item.id });
}
}
})
.on('end', () => {
resolve(buffer);
})
.on('error', () => {
reject(new Error(`Cant download video: ${item.id}. If you were using proxy, please try without it.`));
});
});

r.get({
url: item.videoUrlNoWaterMark ? item.videoUrlNoWaterMark : item.videoUrl,
headers: this.headers,
jar: this.cookieJar,
})
.on('response', response => {
const len = parseInt(response.headers['content-length'] as string, 10);
if (this.progress && !this.bulk && len) {
barIndex = this.addBar(!!item.videoUrlNoWaterMark, len);
}
if (this.progress && !this.bulk && !len) {
console.log(`Empty response! You can try again with a proxy! Can't download video: ${item.id}`);
}
})
.on('data', chunk => {
if (chunk.length) {
buffer = Buffer.concat([buffer, chunk as Buffer]);
if (this.progress && !this.bulk && barIndex && barIndex.hasOwnProperty('tick')) {
barIndex.tick(chunk.length, { id: item.id });
}
}
})
.on('end', () => {
resolve(buffer);
})
.on('error', () => {
reject(new Error(`Cant download video: ${item.id}. If you were using proxy, please try without it.`));
});
}),
{ retries: this.retry },
);
}

/**
Expand Down Expand Up @@ -207,7 +216,7 @@ export class Downloader {
...(proxy.proxy && !proxy.socks ? { proxy: `http://${proxy.proxy}/` } : {}),
} as unknown) as OptionsWithUri;

const result = await rp(options);
const result = await pRetry(() => rp(options), { retries: this.retry });

await fromCallback(cb => writeFile(`${this.filepath}/${post.id}.mp4`, result, cb));
}
Expand Down
113 changes: 63 additions & 50 deletions src/core/TikTok.ts
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@ import { EventEmitter } from 'events';
import { SocksProxyAgent } from 'socks-proxy-agent';
import { forEachLimit } from 'async';
import { URLSearchParams } from 'url';
import pRetry from 'p-retry';
import CONST from '../constant';
import { sign, makeid } from '../helpers';

Expand Down Expand Up @@ -94,6 +95,8 @@ export class TikTokScraper extends EventEmitter {

private timeout: number;

private retry: number;

private bulk: boolean;

private validHeaders: boolean;
Expand Down Expand Up @@ -148,6 +151,7 @@ export class TikTokScraper extends EventEmitter {
useTestEndpoints = false,
fileName = '',
timeout = 0,
retry = 3,
bulk = false,
zip = false,
test = false,
Expand Down Expand Up @@ -203,13 +207,15 @@ export class TikTokScraper extends EventEmitter {
this.maxCursor = 0;
this.noDuplicates = [];
this.timeout = timeout;
this.retry = retry;
this.bulk = bulk;
this.validHeaders = false;
this.Downloader = new Downloader({
progress,
cookieJar: this.cookieJar,
proxy,
noWaterMark,
retry,
headers,
filepath: process.env.SCRAPING_FROM_DOCKER ? '/usr/app/files' : filepath || '',
bulk,
Expand Down Expand Up @@ -320,57 +326,64 @@ export class TikTokScraper extends EventEmitter {
bodyOnly = true,
): Promise<T> {
// eslint-disable-next-line no-async-promise-executor
return new Promise(async (resolve, reject) => {
const proxy = this.getProxy;
const options = ({
jar: this.cookieJar,
uri,
method,
...(qs ? { qs } : {}),
...(body ? { body } : {}),
...(form ? { form } : {}),
headers: {
...this.headers,
...headers,
...(this.csrf ? { 'x-secsdk-csrf-token': this.csrf } : {}),
},
...(json ? { json: true } : {}),
...(gzip ? { gzip: true } : {}),
resolveWithFullResponse: true,
followAllRedirects: followAllRedirects || false,
simple,
...(proxy.proxy && proxy.socks ? { agent: proxy.proxy } : {}),
...(proxy.proxy && !proxy.socks ? { proxy: `http://${proxy.proxy}/` } : {}),
...(this.strictSSL === false ? { rejectUnauthorized: false } : {}),
timeout: 10000,
} as unknown) as OptionsWithUri;

const session = this.sessionList[Math.floor(Math.random() * this.sessionList.length)];
if (session) {
this.cookieJar.setCookie(session, 'https://tiktok.com');
}
/**
* Set tt_webid_v2 cookie to access video url
*/
const cookies = this.cookieJar.getCookieString('https://tiktok.com');
if (cookies.indexOf('tt_webid_v2') === -1) {
this.cookieJar.setCookie(`tt_webid_v2=69${makeid(17)}; Domain=tiktok.com; Path=/; Secure; hostOnly=false`, 'https://tiktok.com');
}
return pRetry(
() =>
new Promise(async (resolve, reject) => {
const proxy = this.getProxy;
const options = ({
jar: this.cookieJar,
uri,
method,
...(qs ? { qs } : {}),
...(body ? { body } : {}),
...(form ? { form } : {}),
headers: {
...this.headers,
...headers,
...(this.csrf ? { 'x-secsdk-csrf-token': this.csrf } : {}),
},
...(json ? { json: true } : {}),
...(gzip ? { gzip: true } : {}),
resolveWithFullResponse: true,
followAllRedirects: followAllRedirects || false,
simple,
...(proxy.proxy && proxy.socks ? { agent: proxy.proxy } : {}),
...(proxy.proxy && !proxy.socks ? { proxy: `http://${proxy.proxy}/` } : {}),
...(this.strictSSL === false ? { rejectUnauthorized: false } : {}),
timeout: 10000,
} as unknown) as OptionsWithUri;

const session = this.sessionList[Math.floor(Math.random() * this.sessionList.length)];
if (session) {
this.cookieJar.setCookie(session, 'https://tiktok.com');
}
/**
* Set tt_webid_v2 cookie to access video url
*/
const cookies = this.cookieJar.getCookieString('https://tiktok.com');
if (cookies.indexOf('tt_webid_v2') === -1) {
this.cookieJar.setCookie(
`tt_webid_v2=69${makeid(17)}; Domain=tiktok.com; Path=/; Secure; hostOnly=false`,
'https://tiktok.com',
);
}

try {
const response = await rp(options);
// Extract valid csrf token
if (options.method === 'HEAD') {
const csrf = response.headers['x-ware-csrf-token'];
this.csrf = csrf.split(',')[1] as string;
}
setTimeout(() => {
resolve(bodyOnly ? response.body : response);
}, this.timeout);
} catch (error) {
reject(error);
}
});
try {
const response = await rp(options);
// Extract valid csrf token
if (options.method === 'HEAD') {
const csrf = response.headers['x-ware-csrf-token'];
this.csrf = csrf.split(',')[1] as string;
}
setTimeout(() => {
resolve(bodyOnly ? response.body : response);
}, this.timeout);
} catch (error) {
reject(error);
}
}),
{ retries: this.retry },
);
}

private returnInitError(error) {
Expand Down
1 change: 1 addition & 0 deletions src/entry.ts
Original file line number Diff line number Diff line change
Expand Up @@ -39,6 +39,7 @@ const getInitOptions = () => {
noWaterMark: false,
hdVideo: false,
timeout: 0,
retry: 3,
tac: '',
signature: '',
verifyFp: makeVerifyFp(),
Expand Down
1 change: 1 addition & 0 deletions src/types/Downloader.ts
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@ export interface DownloaderConstructor {
progress: boolean;
proxy: string[] | string;
noWaterMark: boolean;
retry: number;
headers: Headers;
filepath: string;
bulk: boolean;
Expand Down
2 changes: 2 additions & 0 deletions src/types/TikTok.ts
Original file line number Diff line number Diff line change
Expand Up @@ -44,6 +44,7 @@ export interface Options {
fileName?: string;
historyPath?: string;
timeout?: number;
retry?: number;
hdVideo?: boolean;
randomUa?: boolean;
webHookUrl?: string;
Expand Down Expand Up @@ -75,6 +76,7 @@ export interface TikTokConstructor {
noWaterMark?: boolean;
fileName?: string;
timeout?: number;
retry?: number;
test?: boolean;
hdVideo?: boolean;
signature?: string;
Expand Down