diff --git a/README.md b/README.md index dd858f1b..a81a500b 100644 --- a/README.md +++ b/README.md @@ -115,14 +115,14 @@ $ tiktok-scraper --help Usage: tiktok-scraper [options] Commands: - tiktok-scraper user [id] Scrape videos from username. Enter only username - tiktok-scraper hashtag [id] Scrape videos from hashtag. Enter hashtag without # - tiktok-scraper trend Scrape posts from current trends - tiktok-scraper music [id] Scrape posts from a music id number - tiktok-scraper video [id] Download single video without the watermark - tiktok-scraper history View previous download history - tiktok-scraper from-file [file] [async] Scrape users, hashtags, music, videos mentioned - in a file. 1 value per 1 line + tiktok-scraper user [id] Scrape videos from the User Feed. Enter only the username + tiktok-scraper hashtag [id] Scrape videos from the Hashtag Feed. Enter hashtag without the # + tiktok-scraper trend Scrape posts from the Trend Feed + tiktok-scraper music [id] Scrape videos from the Music Feed. Enter only the music id + tiktok-scraper video [id] Extract metadata from a single video without the watermark. To download use -d flag + tiktok-scraper history View previous download history + tiktok-scraper from-file [file] [async] Scrape users, hashtags, music, videos mentioned in a file. One value per one line + tiktok-scraper userprofile [id] Show user metadata Options: --version Show version number [boolean] @@ -134,22 +134,22 @@ Options: 1000 mls = 1 s [default: 0] --number, -n Number of posts to scrape. If you will set 0 then all posts will be scraped [default: 0] - --since Scrape no posts published before this date (timestamp). - If set to 0 the filter is deactived [default: 0] + --since Scrape posts that are published after specified date + (timestamp). The default value is 0 - scrape all posts [default: 0] --proxy, -p Set single proxy [default: ""] --proxy-file Use proxies from a file. Scraper will use random proxies - from the file per each request. 1 line 1 proxy. - [default: ""] + from the file per each request. 1 line 1 proxy. [default: ""] --download, -d Download video posts to the folder with the name input [id] [boolean] [default: false] + --useTestEndpoints Use Tiktok test endpoints. When your requests are blocked + by captcha you can try to use Tiktok test endpoints. [boolean] [default: false] --asyncDownload, -a Number of concurrent downloads [default: 5] --hd Download video in HD. Video size will be x5-x10 times larger and this will affect scraper execution speed. This - option only works in combination with -w flag - [boolean] [default: false] + option only works in combination with -w flag [boolean] [default: false] --zip, -z ZIP all downloaded video posts [boolean] [default: false] --filepath File path to save all output files. - [default: "/Users/karl.wint/Documents/projects/javascript/tiktok-scraper"] + [default: "$(pwd)/tiktok-scraper"] --filetype, -t Type of the output file where post information will be saved. 'all' - save information about all posts to the` 'json' and 'csv' @@ -163,15 +163,16 @@ Options: folder and in the future usage will only download new videos avoiding duplicates [boolean] [default: false] --historypath Set custom path where history file/files will be stored - [default: "/var/folders/d5/fyh1_f2926q7c65g7skc0qh80000gn/T"] + [default: ] + --throttlelimit Set custom maximum number of calls to TikTok within an + interval. + --throttleinterval Set custom timespan for throttle-interval in milliseconds --remove, -r Delete the history record by entering "TYPE:INPUT" or - "all" to clean all the history. For example: user:bob - [default: ""] + "all" to clean all the history. For example: user:bob [default: ""] --webHookUrl Set webhook url to receive scraper result as HTTP requests. For example to your own API [default: ""] - --method Receive data to your webhook url as POST or GET request - [choices: "GET", "POST"] [default: "POST"] - --help Show help [boolean] + --method Receive data to your webhook url as POST or GET request choices: "GET", "POST"] [default: "POST"] + --help Show help [boolean] Examples: tiktok-scraper user USERNAME -d -n 100 --session sid_tt=dae32131231 diff --git a/bin/cli.js b/bin/cli.js index 1a8b88b6..23cedea3 100644 --- a/bin/cli.js +++ b/bin/cli.js @@ -27,6 +27,10 @@ const startScraper = async argv => { if (argv.historypath) { argv.historyPath = argv.historypath; } + if (argv.throttleinterval || argv.throttlelimit) { + argv.throttleLimit = argv.throttlelimit; + argv.throttleInterval = argv.throttleinterval; + } if (argv.file) { argv.input = argv.file; } @@ -205,6 +209,12 @@ yargs default: process.env.SCRAPING_FROM_DOCKER ? '' : tmpdir(), describe: 'Set custom path where history file/files will be stored', }, + throttlelimit: { + describe: 'Set custom maximum number of calls to TikTok within an interval.', + }, + throttleinterval: { + describe: 'Set custom timespan for throttle-interval in milliseconds', + }, remove: { alias: ['r'], default: '', diff --git a/package.json b/package.json index a70884e5..bae82802 100644 --- a/package.json +++ b/package.json @@ -18,6 +18,7 @@ "jsdom": "^16.5.3", "json2csv": "4.5.1", "ora": "^4.0.2", + "p-throttle": "^4.1.1", "progress": "^2.0.3", "request": "^2.88.0", "request-promise": "^4.2.4", diff --git a/src/core/TikTok.ts b/src/core/TikTok.ts index 6b67ccca..766f3c48 100644 --- a/src/core/TikTok.ts +++ b/src/core/TikTok.ts @@ -13,6 +13,7 @@ import { EventEmitter } from 'events'; import { SocksProxyAgent } from 'socks-proxy-agent'; import { forEachLimit } from 'async'; import { URLSearchParams } from 'url'; +import pThrottle from 'p-throttle'; import CONST from '../constant'; import { sign, makeid } from '../helpers'; @@ -125,6 +126,8 @@ export class TikTokScraper extends EventEmitter { private store: string[]; + private throttle: ReturnType | false; + public cookieJar: CookieJar; constructor({ @@ -157,6 +160,8 @@ export class TikTokScraper extends EventEmitter { headers, verifyFp = '', sessionList = [], + throttleLimit, + throttleInterval, }: TikTokConstructor) { super(); this.userIdStore = ''; @@ -221,6 +226,16 @@ export class TikTokScraper extends EventEmitter { bad: 0, }; this.store = []; + this.throttle = + !!(throttleLimit && throttleInterval) && + pThrottle({ + limit: throttleLimit, + interval: throttleInterval, + }); + + if (this.throttle) { + this.request = this.throttle(this.request); + } } /** diff --git a/src/types/TikTok.ts b/src/types/TikTok.ts index 6b315cbc..8f19f05f 100644 --- a/src/types/TikTok.ts +++ b/src/types/TikTok.ts @@ -43,6 +43,8 @@ export interface Options { remove?: string; fileName?: string; historyPath?: string; + throttleLimit?: number; + throttleInterval?: number; timeout?: number; hdVideo?: boolean; randomUa?: boolean; @@ -83,6 +85,8 @@ export interface TikTokConstructor { headers: Headers; verifyFp?: string; sessionList?: string[]; + throttleLimit?: number; + throttleInterval?: number; } export interface Hashtags {