diff --git a/README.md b/README.md index dd858f1b..a4dcb12b 100644 --- a/README.md +++ b/README.md @@ -171,6 +171,8 @@ Options: requests. For example to your own API [default: ""] --method Receive data to your webhook url as POST or GET request [choices: "GET", "POST"] [default: "POST"] + --includeComments Also save all comments when downloading a video. Needs a + valid session to work. [boolean] [default: false] --help Show help [boolean] Examples: diff --git a/bin/cli.js b/bin/cli.js index 6af1c690..526c1532 100644 --- a/bin/cli.js +++ b/bin/cli.js @@ -219,6 +219,11 @@ yargs choices: ['GET', 'POST'], describe: 'Receive data to your webhook url as POST or GET request', }, + includeComments: { + boolean: true, + default: false, + describe: 'Also save all comments when downloading a video. Needs a valid session to work.', + }, }) .check(argv => { if (CONST.scrape.indexOf(argv._[0]) === -1) { diff --git a/src/core/TikTok.ts b/src/core/TikTok.ts index 150ddade..fbf7072e 100644 --- a/src/core/TikTok.ts +++ b/src/core/TikTok.ts @@ -33,6 +33,7 @@ import { Headers, WebHtmlUserMetadata, VideoMetadata, + CommentsData, } from '../types'; import { Downloader } from '../core'; @@ -127,6 +128,8 @@ export class TikTokScraper extends EventEmitter { public cookieJar: CookieJar; + private includeComments: boolean; + constructor({ download, filepath, @@ -157,6 +160,7 @@ export class TikTokScraper extends EventEmitter { headers, verifyFp = '', sessionList = [], + includeComments = false, }: TikTokConstructor) { super(); this.userIdStore = ''; @@ -221,6 +225,7 @@ export class TikTokScraper extends EventEmitter { bad: 0, }; this.store = []; + this.includeComments = includeComments; } /** @@ -868,6 +873,7 @@ export class TikTokScraper extends EventEmitter { name, })) : [], + comments: [], }; if (this.event) { @@ -1259,6 +1265,27 @@ export class TikTokScraper extends EventEmitter { videoData = await this.getVideoMetadata(); } + // get *all* comments of a video (paginated) + let commentData: CommentsData | undefined; + if (this.includeComments) { + try { + for (let paginationStepSize = 30, currentPage = 0; currentPage < videoData.stats.commentCount; currentPage += paginationStepSize) { + const data = await this.getCommentMetadata('', currentPage, paginationStepSize); + // no data could be retrieved: possibly no valid session; skip comment scraping + if (data === undefined) { + break; + } + if (commentData === undefined) { + commentData = data; + } else if (data.comments !== null) { + commentData.comments = commentData.comments.concat(data.comments); + } + } + } catch { + // continue regardless of error + } + } + const videoItem = { id: videoData.id, secretID: videoData.video.id, @@ -1326,6 +1353,7 @@ export class TikTokScraper extends EventEmitter { name, })) : [], + comments: commentData?.comments, } as PostCollector; try { @@ -1374,4 +1402,60 @@ export class TikTokScraper extends EventEmitter { ); }); } + + /** + * Get comment metadata from the API endpoint + * (only works with a valid session!) + */ + private async getCommentMetadata(url = '', _cursor = 0, _count = 30): Promise { + // abort, if no session is set + if (this.cookieJar.getCookieString('https://tiktok.com').indexOf('sid_tt') === -1) { + throw Error(`No valid session given. Can't download comments.`); + } + + // get username and videoId from url/parameter + const videoData = /tiktok.com\/(@[\w.-]+)\/video\/(\d+)/.exec(url || this.input); + if (videoData) { + // const videoUsername = videoData[1]; + const videoId = videoData[2]; + + // prepare api call + const query = { + method: 'GET', + uri: `https://www.tiktok.com/api/comment/list/`, + json: true, + followAllRedirects: true, + headers: { + // referer: this.input ? this.input : `https://www.tiktok.com/@${videoUsername}/video/${videoId}`, + cookie: this.cookieJar.getCookieString(`https://tiktok.com/`), + }, + qs: { + aweme_id: videoId, + aid: 1988, + history_len: 6, + cursor: _cursor, + count: _count, + }, + }; + + // generate signature and add it to query + const unsignedURL = `${query.uri}?${new URLSearchParams(query.qs as any).toString()}`; + const _signature = sign(unsignedURL, this.headers['user-agent']); + // @ts-ignore + query.qs._signature = _signature; + + // call api + try { + const response = await this.request(query); + if (response.status_code === 0) { + return response; + } + } catch (err) { + if (err.statusCode === 404) { + throw new Error(err.string); + } + } + } + throw new Error(`Can't extract comment metadata of ${this.input}`); + } } diff --git a/src/types/TikTok.ts b/src/types/TikTok.ts index 6b315cbc..0a013fd8 100644 --- a/src/types/TikTok.ts +++ b/src/types/TikTok.ts @@ -83,6 +83,7 @@ export interface TikTokConstructor { headers: Headers; verifyFp?: string; sessionList?: string[]; + includeComments?: boolean; } export interface Hashtags { @@ -161,6 +162,59 @@ export interface PostCollector { repeated?: boolean; downloaded: boolean; effectStickers: EffectStickers[]; + comments: Comment[]; +} + +export interface Comment { + aweme_id: string; + cid: string; + create_time: number; + digg_count: number; + status: number; + text: string; + author_pin: boolean; + collect_stat: number; + is_author_digged: boolean; + no_show: boolean; + reply_comment_total: number; + reply_id: string; + reply_to_reply_id: string; + stick_position: number; + text_extra: []; + user_buried: boolean; + user_digged: number; + // label_list: null; + // reply_comment: null; + user: { + avatar_thumb: { + uri: string; + url_list: string[]; + }; + custom_verify: string; + enterprise_verify_reason: string; + nickname: string; + sec_uid: string; + unique_id: string; + uid: string; + // ad_cover_url: null; + // bold_fields: null; + // can_set_geofencing: null; + // cha_list: null; + // cover_url: null; + // events: null; + // followers_detail: null; + // geofencing: null; + // homepage_bottom_toast: null; + // item_list: null; + // mutual_relation_avatars: null; + // need_points: null; + // platform_sync_info: null; + // relative_users: null; + // search_highlight: null; + // type_label: null; + // user_tags: null; + // white_cover_url: null; + }; } export interface Result { diff --git a/src/types/TikTokApi.ts b/src/types/TikTokApi.ts index bc875d45..67b15a74 100644 --- a/src/types/TikTokApi.ts +++ b/src/types/TikTokApi.ts @@ -373,3 +373,68 @@ export interface WebHtmlUserMetadata { }; }; } + +export interface CommentsData { + status_code: number; + status_message: string; + comments: CommentMetadata[]; + cursor: string; + hasMore: boolean; + reply_style: number; + total: number; + // log_pb: + // top_gifts: + alias_comment_deleted: boolean; +} + +export interface CommentMetadata { + aweme_id: string; + cid: string; + create_time: number; + digg_count: number; + status: number; + text: string; + author_pin: boolean; + collect_stat: number; + is_author_digged: boolean; + no_show: boolean; + reply_comment_total: number; + reply_id: string; + reply_to_reply_id: string; + stick_position: number; + text_extra: []; + user_buried: boolean; + user_digged: number; + // label_list: null; + // reply_comment: null; + user: { + avatar_thumb: { + uri: string; + url_list: string[]; + }; + custom_verify: string; + enterprise_verify_reason: string; + nickname: string; + sec_uid: string; + unique_id: string; + uid: string; + // ad_cover_url: null; + // bold_fields: null; + // can_set_geofencing: null; + // cha_list: null; + // cover_url: null; + // events: null; + // followers_detail: null; + // geofencing: null; + // homepage_bottom_toast: null; + // item_list: null; + // mutual_relation_avatars: null; + // need_points: null; + // platform_sync_info: null; + // relative_users: null; + // search_highlight: null; + // type_label: null; + // user_tags: null; + // white_cover_url: null; + }; +}