diff --git a/bin/cli.js b/bin/cli.js index 1a8b88b6..8bcbce14 100644 --- a/bin/cli.js +++ b/bin/cli.js @@ -125,6 +125,10 @@ yargs default: 0, describe: 'Set timeout between requests. Timeout is in Milliseconds: 1000 mls = 1 s', }, + retry: { + default: 3, + describe: 'Set the amount of times a failing request should be retried before giving up', + }, number: { alias: 'n', default: 0, diff --git a/package.json b/package.json index a70884e5..50cd23cb 100644 --- a/package.json +++ b/package.json @@ -18,6 +18,7 @@ "jsdom": "^16.5.3", "json2csv": "4.5.1", "ora": "^4.0.2", + "p-retry": "^4.6.1", "progress": "^2.0.3", "request": "^2.88.0", "request-promise": "^4.2.4", diff --git a/src/core/Downloader.ts b/src/core/Downloader.ts index 9058bbad..c7f3417d 100644 --- a/src/core/Downloader.ts +++ b/src/core/Downloader.ts @@ -10,6 +10,7 @@ import { fromCallback } from 'bluebird'; import archiver from 'archiver'; import { SocksProxyAgent } from 'socks-proxy-agent'; import { forEachLimit } from 'async'; +import pRetry from 'p-retry'; import { MultipleBar } from '../helpers'; import { DownloaderConstructor, PostCollector, DownloadParams, Proxy, Headers } from '../types'; @@ -23,6 +24,8 @@ export class Downloader { private proxy: string[] | string; + private retry: number; + public noWaterMark: boolean; public filepath: string; @@ -33,7 +36,7 @@ export class Downloader { public cookieJar: CookieJar; - constructor({ progress, proxy, noWaterMark, headers, filepath, bulk, cookieJar }: DownloaderConstructor) { + constructor({ progress, proxy, retry, noWaterMark, headers, filepath, bulk, cookieJar }: DownloaderConstructor) { this.progress = true || progress; this.progressBar = []; this.noWaterMark = noWaterMark; @@ -41,6 +44,7 @@ export class Downloader { this.filepath = filepath; this.mbars = new MultipleBar(); this.proxy = proxy; + this.retry = retry; this.bulk = bulk; this.cookieJar = cookieJar; } @@ -90,46 +94,51 @@ export class Downloader { * @param {*} item */ public toBuffer(item: PostCollector): Promise { - return new Promise((resolve, reject) => { - const proxy = this.getProxy; - let r = request; - let barIndex; - let buffer = Buffer.from(''); - if (proxy.proxy && !proxy.socks) { - r = request.defaults({ proxy: `http://${proxy.proxy}/` }); - } - if (proxy.proxy && proxy.socks) { - r = request.defaults({ agent: (proxy.proxy as unknown) as Agent }); - } - r.get({ - url: item.videoUrlNoWaterMark ? item.videoUrlNoWaterMark : item.videoUrl, - headers: this.headers, - jar: this.cookieJar, - }) - .on('response', response => { - const len = parseInt(response.headers['content-length'] as string, 10); - if (this.progress && !this.bulk && len) { - barIndex = this.addBar(!!item.videoUrlNoWaterMark, len); + return pRetry( + () => + new Promise((resolve, reject) => { + const proxy = this.getProxy; + let r = request; + let barIndex; + let buffer = Buffer.from(''); + if (proxy.proxy && !proxy.socks) { + r = request.defaults({ proxy: `http://${proxy.proxy}/` }); } - if (this.progress && !this.bulk && !len) { - console.log(`Empty response! You can try again with a proxy! Can't download video: ${item.id}`); + if (proxy.proxy && proxy.socks) { + r = request.defaults({ agent: (proxy.proxy as unknown) as Agent }); } - }) - .on('data', chunk => { - if (chunk.length) { - buffer = Buffer.concat([buffer, chunk as Buffer]); - if (this.progress && !this.bulk && barIndex && barIndex.hasOwnProperty('tick')) { - barIndex.tick(chunk.length, { id: item.id }); - } - } - }) - .on('end', () => { - resolve(buffer); - }) - .on('error', () => { - reject(new Error(`Cant download video: ${item.id}. If you were using proxy, please try without it.`)); - }); - }); + + r.get({ + url: item.videoUrlNoWaterMark ? item.videoUrlNoWaterMark : item.videoUrl, + headers: this.headers, + jar: this.cookieJar, + }) + .on('response', response => { + const len = parseInt(response.headers['content-length'] as string, 10); + if (this.progress && !this.bulk && len) { + barIndex = this.addBar(!!item.videoUrlNoWaterMark, len); + } + if (this.progress && !this.bulk && !len) { + console.log(`Empty response! You can try again with a proxy! Can't download video: ${item.id}`); + } + }) + .on('data', chunk => { + if (chunk.length) { + buffer = Buffer.concat([buffer, chunk as Buffer]); + if (this.progress && !this.bulk && barIndex && barIndex.hasOwnProperty('tick')) { + barIndex.tick(chunk.length, { id: item.id }); + } + } + }) + .on('end', () => { + resolve(buffer); + }) + .on('error', () => { + reject(new Error(`Cant download video: ${item.id}. If you were using proxy, please try without it.`)); + }); + }), + { retries: this.retry }, + ); } /** @@ -207,7 +216,7 @@ export class Downloader { ...(proxy.proxy && !proxy.socks ? { proxy: `http://${proxy.proxy}/` } : {}), } as unknown) as OptionsWithUri; - const result = await rp(options); + const result = await pRetry(() => rp(options), { retries: this.retry }); await fromCallback(cb => writeFile(`${this.filepath}/${post.id}.mp4`, result, cb)); } diff --git a/src/core/TikTok.ts b/src/core/TikTok.ts index 6b67ccca..00c98ed7 100644 --- a/src/core/TikTok.ts +++ b/src/core/TikTok.ts @@ -13,6 +13,7 @@ import { EventEmitter } from 'events'; import { SocksProxyAgent } from 'socks-proxy-agent'; import { forEachLimit } from 'async'; import { URLSearchParams } from 'url'; +import pRetry from 'p-retry'; import CONST from '../constant'; import { sign, makeid } from '../helpers'; @@ -94,6 +95,8 @@ export class TikTokScraper extends EventEmitter { private timeout: number; + private retry: number; + private bulk: boolean; private validHeaders: boolean; @@ -148,6 +151,7 @@ export class TikTokScraper extends EventEmitter { useTestEndpoints = false, fileName = '', timeout = 0, + retry = 3, bulk = false, zip = false, test = false, @@ -203,6 +207,7 @@ export class TikTokScraper extends EventEmitter { this.maxCursor = 0; this.noDuplicates = []; this.timeout = timeout; + this.retry = retry; this.bulk = bulk; this.validHeaders = false; this.Downloader = new Downloader({ @@ -210,6 +215,7 @@ export class TikTokScraper extends EventEmitter { cookieJar: this.cookieJar, proxy, noWaterMark, + retry, headers, filepath: process.env.SCRAPING_FROM_DOCKER ? '/usr/app/files' : filepath || '', bulk, @@ -320,57 +326,64 @@ export class TikTokScraper extends EventEmitter { bodyOnly = true, ): Promise { // eslint-disable-next-line no-async-promise-executor - return new Promise(async (resolve, reject) => { - const proxy = this.getProxy; - const options = ({ - jar: this.cookieJar, - uri, - method, - ...(qs ? { qs } : {}), - ...(body ? { body } : {}), - ...(form ? { form } : {}), - headers: { - ...this.headers, - ...headers, - ...(this.csrf ? { 'x-secsdk-csrf-token': this.csrf } : {}), - }, - ...(json ? { json: true } : {}), - ...(gzip ? { gzip: true } : {}), - resolveWithFullResponse: true, - followAllRedirects: followAllRedirects || false, - simple, - ...(proxy.proxy && proxy.socks ? { agent: proxy.proxy } : {}), - ...(proxy.proxy && !proxy.socks ? { proxy: `http://${proxy.proxy}/` } : {}), - ...(this.strictSSL === false ? { rejectUnauthorized: false } : {}), - timeout: 10000, - } as unknown) as OptionsWithUri; - - const session = this.sessionList[Math.floor(Math.random() * this.sessionList.length)]; - if (session) { - this.cookieJar.setCookie(session, 'https://tiktok.com'); - } - /** - * Set tt_webid_v2 cookie to access video url - */ - const cookies = this.cookieJar.getCookieString('https://tiktok.com'); - if (cookies.indexOf('tt_webid_v2') === -1) { - this.cookieJar.setCookie(`tt_webid_v2=69${makeid(17)}; Domain=tiktok.com; Path=/; Secure; hostOnly=false`, 'https://tiktok.com'); - } + return pRetry( + () => + new Promise(async (resolve, reject) => { + const proxy = this.getProxy; + const options = ({ + jar: this.cookieJar, + uri, + method, + ...(qs ? { qs } : {}), + ...(body ? { body } : {}), + ...(form ? { form } : {}), + headers: { + ...this.headers, + ...headers, + ...(this.csrf ? { 'x-secsdk-csrf-token': this.csrf } : {}), + }, + ...(json ? { json: true } : {}), + ...(gzip ? { gzip: true } : {}), + resolveWithFullResponse: true, + followAllRedirects: followAllRedirects || false, + simple, + ...(proxy.proxy && proxy.socks ? { agent: proxy.proxy } : {}), + ...(proxy.proxy && !proxy.socks ? { proxy: `http://${proxy.proxy}/` } : {}), + ...(this.strictSSL === false ? { rejectUnauthorized: false } : {}), + timeout: 10000, + } as unknown) as OptionsWithUri; + + const session = this.sessionList[Math.floor(Math.random() * this.sessionList.length)]; + if (session) { + this.cookieJar.setCookie(session, 'https://tiktok.com'); + } + /** + * Set tt_webid_v2 cookie to access video url + */ + const cookies = this.cookieJar.getCookieString('https://tiktok.com'); + if (cookies.indexOf('tt_webid_v2') === -1) { + this.cookieJar.setCookie( + `tt_webid_v2=69${makeid(17)}; Domain=tiktok.com; Path=/; Secure; hostOnly=false`, + 'https://tiktok.com', + ); + } - try { - const response = await rp(options); - // Extract valid csrf token - if (options.method === 'HEAD') { - const csrf = response.headers['x-ware-csrf-token']; - this.csrf = csrf.split(',')[1] as string; - } - setTimeout(() => { - resolve(bodyOnly ? response.body : response); - }, this.timeout); - } catch (error) { - reject(error); - } - }); + try { + const response = await rp(options); + // Extract valid csrf token + if (options.method === 'HEAD') { + const csrf = response.headers['x-ware-csrf-token']; + this.csrf = csrf.split(',')[1] as string; + } + setTimeout(() => { + resolve(bodyOnly ? response.body : response); + }, this.timeout); + } catch (error) { + reject(error); + } + }), + { retries: this.retry }, + ); } private returnInitError(error) { diff --git a/src/entry.ts b/src/entry.ts index 77401ada..1698c9d6 100644 --- a/src/entry.ts +++ b/src/entry.ts @@ -39,6 +39,7 @@ const getInitOptions = () => { noWaterMark: false, hdVideo: false, timeout: 0, + retry: 3, tac: '', signature: '', verifyFp: makeVerifyFp(), diff --git a/src/types/Downloader.ts b/src/types/Downloader.ts index 7e397111..7b97f2fa 100644 --- a/src/types/Downloader.ts +++ b/src/types/Downloader.ts @@ -5,6 +5,7 @@ export interface DownloaderConstructor { progress: boolean; proxy: string[] | string; noWaterMark: boolean; + retry: number; headers: Headers; filepath: string; bulk: boolean; diff --git a/src/types/TikTok.ts b/src/types/TikTok.ts index 6b315cbc..d751cdcc 100644 --- a/src/types/TikTok.ts +++ b/src/types/TikTok.ts @@ -44,6 +44,7 @@ export interface Options { fileName?: string; historyPath?: string; timeout?: number; + retry?: number; hdVideo?: boolean; randomUa?: boolean; webHookUrl?: string; @@ -75,6 +76,7 @@ export interface TikTokConstructor { noWaterMark?: boolean; fileName?: string; timeout?: number; + retry?: number; test?: boolean; hdVideo?: boolean; signature?: string;