From 360aed810ad85db950df586282d256516c98cd2d Mon Sep 17 00:00:00 2001 From: N/Ame <173015200+grqz@users.noreply.github.com> Date: Mon, 2 Dec 2024 03:16:50 +1300 Subject: [PATCH 1/7] [ie/instagram] Support `share` URLs (#11677) Closes #11630 Authored by: grqz --- yt_dlp/extractor/instagram.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/yt_dlp/extractor/instagram.py b/yt_dlp/extractor/instagram.py index dee8cb85d529..55086d0b29c7 100644 --- a/yt_dlp/extractor/instagram.py +++ b/yt_dlp/extractor/instagram.py @@ -254,7 +254,7 @@ def _real_extract(self, url): class InstagramIE(InstagramBaseIE): - _VALID_URL = r'(?Phttps?://(?:www\.)?instagram\.com(?:/[^/]+)?/(?:p|tv|reels?(?!/audio/))/(?P[^/?#&]+))' + _VALID_URL = r'(?Phttps?://(?:www\.)?instagram\.com(?:/(?!share/)[^/?#]+)?/(?:p|tv|reels?(?!/audio/))/(?P[^/?#&]+))' _EMBED_REGEX = [r']+src=(["\'])(?P(?:https?:)?//(?:www\.)?instagram\.com/p/[^/]+/embed.*?)\1'] _TESTS = [{ 'url': 'https://instagram.com/p/aye83DjauH/?foo=bar#abc', From cd0f934604587ed793e9177f6a127e5dcf99a7dd Mon Sep 17 00:00:00 2001 From: DarkZeros Date: Sun, 1 Dec 2024 14:21:57 +0000 Subject: [PATCH 2/7] [ie/mitele] Fix extractor (#11683) Closes #11690 Authored by: DarkZeros --- yt_dlp/extractor/mitele.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/yt_dlp/extractor/mitele.py b/yt_dlp/extractor/mitele.py index 3573a2a3fd72..76fef337a2ea 100644 --- a/yt_dlp/extractor/mitele.py +++ b/yt_dlp/extractor/mitele.py @@ -80,9 +80,9 @@ class MiTeleIE(TelecincoBaseIE): def _real_extract(self, url): display_id = self._match_id(url) webpage = self._download_webpage(url, display_id) - pre_player = self._parse_json(self._search_regex( - r'window\.\$REACTBASE_STATE\.prePlayer_mtweb\s*=\s*({.+})', - webpage, 'Pre Player'), display_id)['prePlayer'] + pre_player = self._search_json( + r'window\.\$REACTBASE_STATE\.prePlayer_mtweb\s*=', + webpage, 'Pre Player', display_id)['prePlayer'] title = pre_player['title'] video_info = self._parse_content(pre_player['video'], url) content = pre_player.get('content') or {} From 0d146c1e36f467af30e87b7af651bdee67b73500 Mon Sep 17 00:00:00 2001 From: bashonly <88596187+bashonly@users.noreply.github.com> Date: Sun, 1 Dec 2024 15:25:09 +0000 Subject: [PATCH 3/7] [ie/youtube] Adjust player clients for site changes (#11663) Closes #11640 Authored by: bashonly --- README.md | 4 ++-- yt_dlp/extractor/youtube.py | 46 +++++++++++++++++++++++-------------- 2 files changed, 31 insertions(+), 19 deletions(-) diff --git a/README.md b/README.md index 0a62d8e74c90..772395d24dfb 100644 --- a/README.md +++ b/README.md @@ -1761,7 +1761,7 @@ $ yt-dlp --replace-in-metadata "title,uploader" "[ _]" "-" # EXTRACTOR ARGUMENTS -Some extractors accept additional arguments which can be passed using `--extractor-args KEY:ARGS`. `ARGS` is a `;` (semicolon) separated string of `ARG=VAL1,VAL2`. E.g. `--extractor-args "youtube:player-client=mediaconnect,web;formats=incomplete" --extractor-args "funimation:version=uncut"` +Some extractors accept additional arguments which can be passed using `--extractor-args KEY:ARGS`. `ARGS` is a `;` (semicolon) separated string of `ARG=VAL1,VAL2`. E.g. `--extractor-args "youtube:player-client=tv,mweb;formats=incomplete" --extractor-args "funimation:version=uncut"` Note: In CLI, `ARG` can use `-` instead of `_`; e.g. `youtube:player-client"` becomes `youtube:player_client"` @@ -1770,7 +1770,7 @@ The following extractors use this feature: #### youtube * `lang`: Prefer translated metadata (`title`, `description` etc) of this language code (case-sensitive). By default, the video primary language metadata is preferred, with a fallback to `en` translated. See [youtube.py](https://github.com/yt-dlp/yt-dlp/blob/c26f9b991a0681fd3ea548d535919cec1fbbd430/yt_dlp/extractor/youtube.py#L381-L390) for list of supported content language codes * `skip`: One or more of `hls`, `dash` or `translated_subs` to skip extraction of the m3u8 manifests, dash manifests and [auto-translated subtitles](https://github.com/yt-dlp/yt-dlp/issues/4090#issuecomment-1158102032) respectively -* `player_client`: Clients to extract video data from. The main clients are `web`, `ios` and `android`, with variants `_music` and `_creator` (e.g. `ios_creator`); and `mweb`, `mediaconnect`, `android_vr`, `web_safari`, `web_embedded`, `tv` and `tv_embedded` with no variants. By default, `ios,mweb` is used, and `web_creator` is added as needed for age-gated videos when account age verification is required. Similarly, the `_music` variants are added for `music.youtube.com` URLs. Some clients, such as `web` and `android`, require a `po_token` for their formats to be downloadable. Some clients, such as the `_creator` variants, will only work with authentication. You can use `all` to use all the clients, and `default` for the default clients. You can prefix a client with `-` to exclude it, e.g. `youtube:player_client=all,-web` +* `player_client`: Clients to extract video data from. The main clients are `web`, `ios` and `android`, with variants `_music` and `_creator` (e.g. `ios_creator`); and `mweb`, `android_vr`, `web_safari`, `web_embedded`, `tv` and `tv_embedded` with no variants. By default, `ios,mweb` is used, or `web_creator,mweb` is used when authenticating with cookies. The `_music` variants are added for `music.youtube.com` URLs. Some clients, such as `web` and `android`, require a `po_token` for their formats to be downloadable. Some clients, such as the `_creator` variants, will only work with authentication. Not all clients support authentication via cookies. You can use `all` to use all the clients, and `default` for the default clients. You can prefix a client with `-` to exclude it, e.g. `youtube:player_client=all,-web` * `player_skip`: Skip some network requests that are generally needed for robust extraction. One or more of `configs` (skip client configs), `webpage` (skip initial webpage), `js` (skip js player). While these options can help reduce the number of requests needed or avoid some rate-limiting, they could cause some issues. See [#860](https://github.com/yt-dlp/yt-dlp/pull/860) for more details * `player_params`: YouTube player parameters to use for player requests. Will overwrite any default ones set by yt-dlp. * `comment_sort`: `top` or `new` (default) - choose comment sorting mode (on YouTube's side) diff --git a/yt_dlp/extractor/youtube.py b/yt_dlp/extractor/youtube.py index 7a9133466d9b..a67f09e623cf 100644 --- a/yt_dlp/extractor/youtube.py +++ b/yt_dlp/extractor/youtube.py @@ -83,6 +83,7 @@ }, 'INNERTUBE_CONTEXT_CLIENT_NAME': 1, 'REQUIRE_PO_TOKEN': True, + 'SUPPORTS_COOKIES': True, }, # Safari UA returns pre-merged video+audio 144p/240p/360p/720p/1080p HLS formats 'web_safari': { @@ -95,6 +96,7 @@ }, 'INNERTUBE_CONTEXT_CLIENT_NAME': 1, 'REQUIRE_PO_TOKEN': True, + 'SUPPORTS_COOKIES': True, }, 'web_embedded': { 'INNERTUBE_CONTEXT': { @@ -104,6 +106,7 @@ }, }, 'INNERTUBE_CONTEXT_CLIENT_NAME': 56, + 'SUPPORTS_COOKIES': True, }, 'web_music': { 'INNERTUBE_HOST': 'music.youtube.com', @@ -114,6 +117,7 @@ }, }, 'INNERTUBE_CONTEXT_CLIENT_NAME': 67, + 'SUPPORTS_COOKIES': True, }, # This client now requires sign-in for every video 'web_creator': { @@ -125,6 +129,7 @@ }, 'INNERTUBE_CONTEXT_CLIENT_NAME': 62, 'REQUIRE_AUTH': True, + 'SUPPORTS_COOKIES': True, }, 'android': { 'INNERTUBE_CONTEXT': { @@ -157,6 +162,7 @@ 'REQUIRE_JS_PLAYER': False, 'REQUIRE_PO_TOKEN': True, 'REQUIRE_AUTH': True, + 'SUPPORTS_COOKIES': True, }, # This client now requires sign-in for every video 'android_creator': { @@ -191,6 +197,7 @@ }, 'INNERTUBE_CONTEXT_CLIENT_NAME': 28, 'REQUIRE_JS_PLAYER': False, + 'SUPPORTS_COOKIES': True, }, # iOS clients have HLS live streams. Setting device model to get 60fps formats. # See: https://github.com/TeamNewPipe/NewPipeExtractor/issues/680#issuecomment-1002724558 @@ -225,6 +232,7 @@ 'INNERTUBE_CONTEXT_CLIENT_NAME': 26, 'REQUIRE_JS_PLAYER': False, 'REQUIRE_AUTH': True, + 'SUPPORTS_COOKIES': True, }, # This client now requires sign-in for every video 'ios_creator': { @@ -253,6 +261,7 @@ }, }, 'INNERTUBE_CONTEXT_CLIENT_NAME': 2, + 'SUPPORTS_COOKIES': True, }, 'tv': { 'INNERTUBE_CONTEXT': { @@ -262,6 +271,7 @@ }, }, 'INNERTUBE_CONTEXT_CLIENT_NAME': 7, + 'SUPPORTS_COOKIES': True, }, # This client now requires sign-in for every video # It was previously an age-gate workaround for videos that were `playable_in_embed` @@ -275,19 +285,7 @@ }, 'INNERTUBE_CONTEXT_CLIENT_NAME': 85, 'REQUIRE_AUTH': True, - }, - # This client now requires sign-in for every video - # It may be able to receive pre-merged video+audio 720p/1080p streams - 'mediaconnect': { - 'INNERTUBE_CONTEXT': { - 'client': { - 'clientName': 'MEDIA_CONNECT_FRONTEND', - 'clientVersion': '0.1', - }, - }, - 'INNERTUBE_CONTEXT_CLIENT_NAME': 95, - 'REQUIRE_JS_PLAYER': False, - 'REQUIRE_AUTH': True, + 'SUPPORTS_COOKIES': True, }, } @@ -317,6 +315,7 @@ def build_innertube_clients(): ytcfg.setdefault('REQUIRE_JS_PLAYER', True) ytcfg.setdefault('REQUIRE_PO_TOKEN', False) ytcfg.setdefault('REQUIRE_AUTH', False) + ytcfg.setdefault('SUPPORTS_COOKIES', False) ytcfg.setdefault('PLAYER_PARAMS', None) ytcfg['INNERTUBE_CONTEXT']['client'].setdefault('hl', 'en') @@ -1357,6 +1356,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor): } _SUBTITLE_FORMATS = ('json3', 'srv1', 'srv2', 'srv3', 'ttml', 'vtt') _DEFAULT_CLIENTS = ('ios', 'mweb') + _DEFAULT_AUTHED_CLIENTS = ('web_creator', 'mweb') _GEO_BYPASS = False @@ -3823,12 +3823,13 @@ def _extract_player_response(self, client, video_id, master_ytcfg, player_ytcfg, def _get_requested_clients(self, url, smuggled_data): requested_clients = [] excluded_clients = [] + default_clients = self._DEFAULT_AUTHED_CLIENTS if self.is_authenticated else self._DEFAULT_CLIENTS allowed_clients = sorted( (client for client in INNERTUBE_CLIENTS if client[:1] != '_'), key=lambda client: INNERTUBE_CLIENTS[client]['priority'], reverse=True) for client in self._configuration_arg('player_client'): if client == 'default': - requested_clients.extend(self._DEFAULT_CLIENTS) + requested_clients.extend(default_clients) elif client == 'all': requested_clients.extend(allowed_clients) elif client.startswith('-'): @@ -3838,7 +3839,7 @@ def _get_requested_clients(self, url, smuggled_data): else: requested_clients.append(client) if not requested_clients: - requested_clients.extend(self._DEFAULT_CLIENTS) + requested_clients.extend(default_clients) for excluded_client in excluded_clients: if excluded_client in requested_clients: requested_clients.remove(excluded_client) @@ -3850,9 +3851,18 @@ def _get_requested_clients(self, url, smuggled_data): _, base_client, variant = _split_innertube_client(requested_client) music_client = f'{base_client}_music' if base_client != 'mweb' else 'web_music' if variant != 'music' and music_client in INNERTUBE_CLIENTS: - if not INNERTUBE_CLIENTS[music_client]['REQUIRE_AUTH'] or self.is_authenticated: + client_info = INNERTUBE_CLIENTS[music_client] + if not client_info['REQUIRE_AUTH'] or (self.is_authenticated and client_info['SUPPORTS_COOKIES']): requested_clients.append(music_client) + if self.is_authenticated: + unsupported_clients = [ + client for client in requested_clients if not INNERTUBE_CLIENTS[client]['SUPPORTS_COOKIES'] + ] + for client in unsupported_clients: + self.report_warning(f'Skipping client "{client}" since it does not support cookies', only_once=True) + requested_clients.remove(client) + return orderedSet(requested_clients) def _invalid_player_response(self, pr, video_id): @@ -3958,6 +3968,7 @@ def append_client(*client_names): else: prs.append(pr) + ''' This code is pointless while web_creator is in _DEFAULT_AUTHED_CLIENTS # EU countries require age-verification for accounts to access age-restricted videos # If account is not age-verified, _is_agegated() will be truthy for non-embedded clients if self.is_authenticated and self._is_agegated(pr): @@ -3965,9 +3976,10 @@ def append_client(*client_names): f'{video_id}: This video is age-restricted and YouTube is requiring ' 'account age-verification; some formats may be missing', only_once=True) # web_creator can work around the age-verification requirement - # android_vr and mediaconnect may also be able to work around age-verification + # android_vr may also be able to work around age-verification # tv_embedded may(?) still work around age-verification if the video is embeddable append_client('web_creator') + ''' prs.extend(deprioritized_prs) From 239f5f36fe04603bec59c8b975f6a792f10246db Mon Sep 17 00:00:00 2001 From: N/Ame <173015200+grqz@users.noreply.github.com> Date: Mon, 2 Dec 2024 10:55:18 +1300 Subject: [PATCH 4/7] [ie/bilibili] Fix extractor (#11667) Closes #11665 Authored by: grqz --- yt_dlp/extractor/bilibili.py | 43 ++++++++++++++++++------------------ 1 file changed, 21 insertions(+), 22 deletions(-) diff --git a/yt_dlp/extractor/bilibili.py b/yt_dlp/extractor/bilibili.py index 02ea67707fcd..f01befcc0b6f 100644 --- a/yt_dlp/extractor/bilibili.py +++ b/yt_dlp/extractor/bilibili.py @@ -18,7 +18,6 @@ InAdvancePagedList, OnDemandPagedList, bool_or_none, - clean_html, determine_ext, filter_dict, float_or_none, @@ -639,31 +638,27 @@ def _real_extract(self, url): headers['Referer'] = url initial_state = self._search_json(r'window\.__INITIAL_STATE__\s*=', webpage, 'initial state', video_id) + + if traverse_obj(initial_state, ('error', 'trueCode')) == -403: + self.raise_login_required() + if traverse_obj(initial_state, ('error', 'trueCode')) == -404: + raise ExtractorError( + 'This video may be deleted or geo-restricted. ' + 'You might want to try a VPN or a proxy server (with --proxy)', expected=True) + is_festival = 'videoData' not in initial_state if is_festival: video_data = initial_state['videoInfo'] else: - play_info_obj = self._search_json( - r'window\.__playinfo__\s*=', webpage, 'play info', video_id, fatal=False) - if not play_info_obj: - if traverse_obj(initial_state, ('error', 'trueCode')) == -403: - self.raise_login_required() - if traverse_obj(initial_state, ('error', 'trueCode')) == -404: - raise ExtractorError( - 'This video may be deleted or geo-restricted. ' - 'You might want to try a VPN or a proxy server (with --proxy)', expected=True) - play_info = traverse_obj(play_info_obj, ('data', {dict})) - if not play_info: - if traverse_obj(play_info_obj, 'code') == 87007: - toast = get_element_by_class('tips-toast', webpage) or '' - msg = clean_html( - f'{get_element_by_class("belongs-to", toast) or ""},' - + (get_element_by_class('level', toast) or '')) - raise ExtractorError( - f'This is a supporter-only video: {msg}. {self._login_hint()}', expected=True) - raise ExtractorError('Failed to extract play info') video_data = initial_state['videoData'] + if video_data.get('is_upower_exclusive'): + high_level = traverse_obj(initial_state, ('elecFullInfo', 'show_info', 'high_level', {dict})) or {} + raise ExtractorError( + 'This is a supporter-only video: ' + f'{join_nonempty("title", "sub_title", from_dict=high_level, delim=",")}. ' + f'{self._login_hint()}', expected=True) + video_id, title = video_data['bvid'], video_data.get('title') # Bilibili anthologies are similar to playlists but all videos share the same video ID as the anthology itself. @@ -689,10 +684,14 @@ def _real_extract(self, url): old_video_id = format_field(aid, None, f'%s_part{part_id or 1}') cid = traverse_obj(video_data, ('pages', part_id - 1, 'cid')) if part_id else video_data.get('cid') + play_info = ( + traverse_obj( + self._search_json(r'window\.__playinfo__\s*=', webpage, 'play info', video_id, default=None), + ('data', {dict})) + or self._download_playinfo(video_id, cid, headers=headers)) + festival_info = {} if is_festival: - play_info = self._download_playinfo(video_id, cid, headers=headers) - festival_info = traverse_obj(initial_state, { 'uploader': ('videoInfo', 'upName'), 'uploader_id': ('videoInfo', 'upMid', {str_or_none}), From 62cba8a1bedbfc0ddde7267ae57b72bf5f7ea7b1 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Elan=20Ruusam=C3=A4e?= Date: Mon, 2 Dec 2024 00:33:11 +0200 Subject: [PATCH 5/7] [ie/duoplay] Fix extractor (#11588) Authored by: glensc, bashonly Co-authored-by: bashonly <88596187+bashonly@users.noreply.github.com> --- yt_dlp/extractor/duoplay.py | 60 +++++++++++++++++++++++++++++-------- 1 file changed, 47 insertions(+), 13 deletions(-) diff --git a/yt_dlp/extractor/duoplay.py b/yt_dlp/extractor/duoplay.py index 18642fea3929..75650c3a641e 100644 --- a/yt_dlp/extractor/duoplay.py +++ b/yt_dlp/extractor/duoplay.py @@ -5,15 +5,16 @@ get_element_text_and_html_by_tag, int_or_none, join_nonempty, + parse_qs, str_or_none, try_call, unified_timestamp, ) -from ..utils.traversal import traverse_obj +from ..utils.traversal import traverse_obj, value class DuoplayIE(InfoExtractor): - _VALID_URL = r'https?://duoplay\.ee/(?P\d+)/[\w-]+/?(?:\?(?:[^#]+&)?ep=(?P\d+))?' + _VALID_URL = r'https?://duoplay\.ee/(?P\d+)(?:[/?#]|$)' _TESTS = [{ 'note': 'Siberi võmm S02E12', 'url': 'https://duoplay.ee/4312/siberi-vomm?ep=24', @@ -34,15 +35,16 @@ class DuoplayIE(InfoExtractor): 'episode_number': 12, 'episode_id': '24', }, + 'skip': 'No video found', }, { 'note': 'Empty title', 'url': 'https://duoplay.ee/17/uhikarotid?ep=14', - 'md5': '6aca68be71112314738dd17cced7f8bf', + 'md5': 'cba9f5dabf2582b224d80ac44fb80e47', 'info_dict': { 'id': '17_14', 'ext': 'mp4', - 'title': 'Ühikarotid', - 'thumbnail': r're:https://.+\.jpg(?:\?c=\d+)?$', + 'title': 'Episode 14', + 'thumbnail': r're:https?://.+\.jpg', 'description': 'md5:4719b418e058c209def41d48b601276e', 'upload_date': '20100916', 'timestamp': 1284661800, @@ -52,6 +54,8 @@ class DuoplayIE(InfoExtractor): 'season_number': 2, 'episode_id': '14', 'release_year': 2010, + 'episode': 'Episode 14', + 'episode_number': 14, }, }, { 'note': 'Movie without expiry', @@ -68,10 +72,32 @@ class DuoplayIE(InfoExtractor): 'timestamp': 1671054000, 'release_year': 2018, }, + 'skip': 'No video found', + }, { + 'note': 'Episode url without show name', + 'url': 'https://duoplay.ee/9644?ep=185', + 'md5': '63f324b4fe2dbd8194dca16a6d52184a', + 'info_dict': { + 'id': '9644_185', + 'ext': 'mp4', + 'title': 'Episode 185', + 'thumbnail': r're:https?://.+\.jpg', + 'description': 'md5:ed25ba4e9e5d54bc291a4a0cdd241467', + 'upload_date': '20241120', + 'timestamp': 1732077000, + 'episode': 'Episode 63', + 'episode_id': '185', + 'episode_number': 63, + 'season': 'Season 2', + 'season_number': 2, + 'series': 'Telehommik', + 'series_id': '9644', + }, }] def _real_extract(self, url): - telecast_id, episode = self._match_valid_url(url).group('id', 'ep') + telecast_id = self._match_id(url) + episode = traverse_obj(parse_qs(url), ('ep', 0, {int_or_none}, {str_or_none})) video_id = join_nonempty(telecast_id, episode, delim='_') webpage = self._download_webpage(url, video_id) video_player = try_call(lambda: extract_attributes( @@ -79,25 +105,33 @@ def _real_extract(self, url): if not video_player or not video_player.get('manifest-url'): raise ExtractorError('No video found', expected=True) + manifest_url = video_player['manifest-url'] + session_token = self._download_json( + 'https://sts.postimees.ee/session/register', video_id, 'Registering session', + 'Unable to register session', headers={ + 'Accept': 'application/json', + 'X-Original-URI': manifest_url, + })['session'] + episode_attr = self._parse_json(video_player.get(':episode') or '', video_id, fatal=False) or {} return { 'id': video_id, - 'formats': self._extract_m3u8_formats(video_player['manifest-url'], video_id, 'mp4'), + 'formats': self._extract_m3u8_formats(manifest_url, video_id, 'mp4', query={'s': session_token}), **traverse_obj(episode_attr, { - 'title': 'title', - 'description': 'synopsis', + 'title': ('title', {str}), + 'description': ('synopsis', {str}), 'thumbnail': ('images', 'original'), 'timestamp': ('airtime', {lambda x: unified_timestamp(x + ' +0200')}), - 'cast': ('cast', {lambda x: x.split(', ')}), + 'cast': ('cast', filter, {lambda x: x.split(', ')}), 'release_year': ('year', {int_or_none}), }), **(traverse_obj(episode_attr, { - 'title': (None, ('subtitle', ('episode_nr', {lambda x: f'Episode {x}' if x else None}))), - 'series': 'title', + 'title': (None, (('subtitle', {str}, filter), {value(f'Episode {episode}' if episode else None)})), + 'series': ('title', {str}), 'series_id': ('telecast_id', {str_or_none}), 'season_number': ('season_id', {int_or_none}), - 'episode': 'subtitle', + 'episode': ('subtitle', {str}, filter), 'episode_number': ('episode_nr', {int_or_none}), 'episode_id': ('episode_id', {str_or_none}), }, get_all=False) if episode_attr.get('category') != 'movies' else {}), From 2bea7936323ca4b6f3b9b1fdd892566223e30efa Mon Sep 17 00:00:00 2001 From: sepro Date: Mon, 2 Dec 2024 16:22:16 +0100 Subject: [PATCH 6/7] [ie/MicrosoftEmbed] Make format extraction non fatal (#11654) Authored by: seproDev --- yt_dlp/extractor/microsoftembed.py | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/yt_dlp/extractor/microsoftembed.py b/yt_dlp/extractor/microsoftembed.py index d0135f5a9c61..2575d6c5e4ce 100644 --- a/yt_dlp/extractor/microsoftembed.py +++ b/yt_dlp/extractor/microsoftembed.py @@ -26,6 +26,7 @@ class MicrosoftEmbedIE(InfoExtractor): 'timestamp': 1631658316, 'upload_date': '20210914', }, + 'expected_warnings': ['Failed to parse XML: syntax error: line 1, column 0'], }] _API_URL = 'https://prod-video-cms-rt-microsoft-com.akamaized.net/vhs/api/videos/' @@ -36,11 +37,11 @@ def _real_extract(self, url): formats = [] for source_type, source in metadata['streams'].items(): if source_type == 'smooth_Streaming': - formats.extend(self._extract_ism_formats(source['url'], video_id, 'mss')) + formats.extend(self._extract_ism_formats(source['url'], video_id, 'mss', fatal=False)) elif source_type == 'apple_HTTP_Live_Streaming': - formats.extend(self._extract_m3u8_formats(source['url'], video_id, 'mp4')) + formats.extend(self._extract_m3u8_formats(source['url'], video_id, 'mp4', fatal=False)) elif source_type == 'mPEG_DASH': - formats.extend(self._extract_mpd_formats(source['url'], video_id)) + formats.extend(self._extract_mpd_formats(source['url'], video_id, fatal=False)) else: formats.append({ 'format_id': source_type, From d8fb3490863653182864d2a53522f350d67a9ff8 Mon Sep 17 00:00:00 2001 From: sepro Date: Mon, 2 Dec 2024 16:29:30 +0100 Subject: [PATCH 7/7] [cleanup] Bump ruff to 0.8.x (#11608) Authored by: seproDev --- pyproject.toml | 6 ++---- yt_dlp/YoutubeDL.py | 10 +++++----- yt_dlp/__init__.py | 6 +++--- yt_dlp/aes.py | 14 ++++++-------- yt_dlp/cookies.py | 4 ++-- yt_dlp/downloader/hls.py | 8 ++++---- yt_dlp/downloader/youtube_live_chat.py | 4 ++-- yt_dlp/extractor/bilibili.py | 4 ++-- yt_dlp/extractor/common.py | 2 +- yt_dlp/extractor/funimation.py | 6 +++--- yt_dlp/extractor/youtube.py | 14 +++++++------- yt_dlp/plugins.py | 2 +- yt_dlp/postprocessor/__init__.py | 2 +- yt_dlp/postprocessor/ffmpeg.py | 2 +- yt_dlp/utils/_utils.py | 4 ++-- 15 files changed, 42 insertions(+), 46 deletions(-) diff --git a/pyproject.toml b/pyproject.toml index 97ea4375fcee..96e2d669a435 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -76,7 +76,7 @@ dev = [ ] static-analysis = [ "autopep8~=2.0", - "ruff~=0.7.0", + "ruff~=0.8.0", ] test = [ "pytest~=8.1", @@ -186,6 +186,7 @@ ignore = [ "E501", # line-too-long "E731", # lambda-assignment "E741", # ambiguous-variable-name + "UP031", # printf-string-formatting "UP036", # outdated-version-block "B006", # mutable-argument-default "B008", # function-call-in-default-argument @@ -258,9 +259,6 @@ select = [ "A002", # builtin-argument-shadowing "C408", # unnecessary-collection-call ] -"yt_dlp/jsinterp.py" = [ - "UP031", # printf-string-formatting -] [tool.ruff.lint.isort] known-first-party = [ diff --git a/yt_dlp/YoutubeDL.py b/yt_dlp/YoutubeDL.py index a9a8e4133eb3..65b72e026cb3 100644 --- a/yt_dlp/YoutubeDL.py +++ b/yt_dlp/YoutubeDL.py @@ -1116,7 +1116,7 @@ def report_file_delete(self, file_name): def raise_no_formats(self, info, forced=False, *, msg=None): has_drm = info.get('_has_drm') ignored, expected = self.params.get('ignore_no_formats_error'), bool(msg) - msg = msg or has_drm and 'This video is DRM protected' or 'No video formats found!' + msg = msg or (has_drm and 'This video is DRM protected') or 'No video formats found!' if forced or not ignored: raise ExtractorError(msg, video_id=info['id'], ie=info['extractor'], expected=has_drm or ignored or expected) @@ -2196,7 +2196,7 @@ def _select_formats(self, formats, selector): def _default_format_spec(self, info_dict): prefer_best = ( self.params['outtmpl']['default'] == '-' - or info_dict.get('is_live') and not self.params.get('live_from_start')) + or (info_dict.get('is_live') and not self.params.get('live_from_start'))) def can_merge(): merger = FFmpegMergerPP(self) @@ -2365,7 +2365,7 @@ def _merge(formats_pair): vexts=[f['ext'] for f in video_fmts], aexts=[f['ext'] for f in audio_fmts], preferences=(try_call(lambda: self.params['merge_output_format'].split('/')) - or self.params.get('prefer_free_formats') and ('webm', 'mkv'))) + or (self.params.get('prefer_free_formats') and ('webm', 'mkv')))) filtered = lambda *keys: filter(None, (traverse_obj(fmt, *keys) for fmt in formats_info)) @@ -3541,8 +3541,8 @@ def ffmpeg_fixup(cndn, msg, cls): and info_dict.get('container') == 'm4a_dash', 'writing DASH m4a. Only some players support this container', FFmpegFixupM4aPP) - ffmpeg_fixup(downloader == 'hlsnative' and not self.params.get('hls_use_mpegts') - or info_dict.get('is_live') and self.params.get('hls_use_mpegts') is None, + ffmpeg_fixup((downloader == 'hlsnative' and not self.params.get('hls_use_mpegts')) + or (info_dict.get('is_live') and self.params.get('hls_use_mpegts') is None), 'Possible MPEG-TS in MP4 container or malformed AAC timestamps', FFmpegFixupM3u8PP) ffmpeg_fixup(downloader == 'dashsegments' diff --git a/yt_dlp/__init__.py b/yt_dlp/__init__.py index a1880bf7dc29..20111175b1c3 100644 --- a/yt_dlp/__init__.py +++ b/yt_dlp/__init__.py @@ -1062,7 +1062,7 @@ def make_row(target, handler): # If we only have a single process attached, then the executable was double clicked # When using `pyinstaller` with `--onefile`, two processes get attached is_onefile = hasattr(sys, '_MEIPASS') and os.path.basename(sys._MEIPASS).startswith('_MEI') - if attached_processes == 1 or is_onefile and attached_processes == 2: + if attached_processes == 1 or (is_onefile and attached_processes == 2): print(parser._generate_error_message( 'Do not double-click the executable, instead call it from a command line.\n' 'Please read the README for further information on how to use yt-dlp: ' @@ -1109,9 +1109,9 @@ def main(argv=None): from .extractor import gen_extractors, list_extractors __all__ = [ - 'main', 'YoutubeDL', - 'parse_options', 'gen_extractors', 'list_extractors', + 'main', + 'parse_options', ] diff --git a/yt_dlp/aes.py b/yt_dlp/aes.py index 0930d36df994..9908434a5894 100644 --- a/yt_dlp/aes.py +++ b/yt_dlp/aes.py @@ -534,19 +534,17 @@ def ghash(subkey, data): __all__ = [ 'aes_cbc_decrypt', 'aes_cbc_decrypt_bytes', - 'aes_ctr_decrypt', - 'aes_decrypt_text', - 'aes_decrypt', - 'aes_ecb_decrypt', - 'aes_gcm_decrypt_and_verify', - 'aes_gcm_decrypt_and_verify_bytes', - 'aes_cbc_encrypt', 'aes_cbc_encrypt_bytes', + 'aes_ctr_decrypt', 'aes_ctr_encrypt', + 'aes_decrypt', + 'aes_decrypt_text', + 'aes_ecb_decrypt', 'aes_ecb_encrypt', 'aes_encrypt', - + 'aes_gcm_decrypt_and_verify', + 'aes_gcm_decrypt_and_verify_bytes', 'key_expansion', 'pad_block', 'pkcs7_padding', diff --git a/yt_dlp/cookies.py b/yt_dlp/cookies.py index d5b0d3991b44..772433b0f2d0 100644 --- a/yt_dlp/cookies.py +++ b/yt_dlp/cookies.py @@ -1276,8 +1276,8 @@ def open(self, file, *, write=False): def _really_save(self, f, ignore_discard, ignore_expires): now = time.time() for cookie in self: - if (not ignore_discard and cookie.discard - or not ignore_expires and cookie.is_expired(now)): + if ((not ignore_discard and cookie.discard) + or (not ignore_expires and cookie.is_expired(now))): continue name, value = cookie.name, cookie.value if value is None: diff --git a/yt_dlp/downloader/hls.py b/yt_dlp/downloader/hls.py index 0a00d5dabbcd..da2574da7237 100644 --- a/yt_dlp/downloader/hls.py +++ b/yt_dlp/downloader/hls.py @@ -119,12 +119,12 @@ def real_download(self, filename, info_dict): self.to_screen(f'[{self.FD_NAME}] Fragment downloads will be delegated to {real_downloader.get_basename()}') def is_ad_fragment_start(s): - return (s.startswith('#ANVATO-SEGMENT-INFO') and 'type=ad' in s - or s.startswith('#UPLYNK-SEGMENT') and s.endswith(',ad')) + return ((s.startswith('#ANVATO-SEGMENT-INFO') and 'type=ad' in s) + or (s.startswith('#UPLYNK-SEGMENT') and s.endswith(',ad'))) def is_ad_fragment_end(s): - return (s.startswith('#ANVATO-SEGMENT-INFO') and 'type=master' in s - or s.startswith('#UPLYNK-SEGMENT') and s.endswith(',segment')) + return ((s.startswith('#ANVATO-SEGMENT-INFO') and 'type=master' in s) + or (s.startswith('#UPLYNK-SEGMENT') and s.endswith(',segment'))) fragments = [] diff --git a/yt_dlp/downloader/youtube_live_chat.py b/yt_dlp/downloader/youtube_live_chat.py index 961938d4491f..ddd912ca2b66 100644 --- a/yt_dlp/downloader/youtube_live_chat.py +++ b/yt_dlp/downloader/youtube_live_chat.py @@ -123,8 +123,8 @@ def download_and_parse_fragment(url, frag_index, request_data=None, headers=None data, lambda x: x['continuationContents']['liveChatContinuation'], dict) or {} - func = (info_dict['protocol'] == 'youtube_live_chat' and parse_actions_live - or frag_index == 1 and try_refresh_replay_beginning + func = ((info_dict['protocol'] == 'youtube_live_chat' and parse_actions_live) + or (frag_index == 1 and try_refresh_replay_beginning) or parse_actions_replay) return (True, *func(live_chat_continuation)) except HTTPError as err: diff --git a/yt_dlp/extractor/bilibili.py b/yt_dlp/extractor/bilibili.py index f01befcc0b6f..72d5f20cf36b 100644 --- a/yt_dlp/extractor/bilibili.py +++ b/yt_dlp/extractor/bilibili.py @@ -662,12 +662,12 @@ def _real_extract(self, url): video_id, title = video_data['bvid'], video_data.get('title') # Bilibili anthologies are similar to playlists but all videos share the same video ID as the anthology itself. - page_list_json = not is_festival and traverse_obj( + page_list_json = (not is_festival and traverse_obj( self._download_json( 'https://api.bilibili.com/x/player/pagelist', video_id, fatal=False, query={'bvid': video_id, 'jsonp': 'jsonp'}, note='Extracting videos in anthology', headers=headers), - 'data', expected_type=list) or [] + 'data', expected_type=list)) or [] is_anthology = len(page_list_json) > 1 part_id = int_or_none(parse_qs(url).get('p', [None])[-1]) diff --git a/yt_dlp/extractor/common.py b/yt_dlp/extractor/common.py index ce79e0b62294..92ddad2b76ab 100644 --- a/yt_dlp/extractor/common.py +++ b/yt_dlp/extractor/common.py @@ -3803,7 +3803,7 @@ def _cookies_passed(self): def mark_watched(self, *args, **kwargs): if not self.get_param('mark_watched', False): return - if self.supports_login() and self._get_login_info()[0] is not None or self._cookies_passed: + if (self.supports_login() and self._get_login_info()[0] is not None) or self._cookies_passed: self._mark_watched(*args, **kwargs) def _mark_watched(self, *args, **kwargs): diff --git a/yt_dlp/extractor/funimation.py b/yt_dlp/extractor/funimation.py index d3e61c84f8d2..01b53bcde6c1 100644 --- a/yt_dlp/extractor/funimation.py +++ b/yt_dlp/extractor/funimation.py @@ -193,9 +193,9 @@ def _real_extract(self, url): for lang, version, fmt in self._get_experiences(episode): experience_id = str(fmt['experienceId']) - if (only_initial_experience and experience_id != initial_experience_id - or requested_languages and lang.lower() not in requested_languages - or requested_versions and version.lower() not in requested_versions): + if ((only_initial_experience and experience_id != initial_experience_id) + or (requested_languages and lang.lower() not in requested_languages) + or (requested_versions and version.lower() not in requested_versions)): continue thumbnails.append({'url': fmt.get('poster')}) duration = max(duration, fmt.get('duration', 0)) diff --git a/yt_dlp/extractor/youtube.py b/yt_dlp/extractor/youtube.py index a67f09e623cf..41cd90db950e 100644 --- a/yt_dlp/extractor/youtube.py +++ b/yt_dlp/extractor/youtube.py @@ -2925,7 +2925,7 @@ def _extract_sequence_from_mpd(refresh_sequence, immediate): # Obtain from MPD's maximum seq value old_mpd_url = mpd_url last_error = ctx.pop('last_error', None) - expire_fast = immediate or last_error and isinstance(last_error, HTTPError) and last_error.status == 403 + expire_fast = immediate or (last_error and isinstance(last_error, HTTPError) and last_error.status == 403) mpd_url, stream_number, is_live = (mpd_feed(format_id, 5 if expire_fast else 18000) or (mpd_url, stream_number, False)) if not refresh_sequence: @@ -3995,8 +3995,8 @@ def append_client(*client_names): return prs, player_url def _needs_live_processing(self, live_status, duration): - if (live_status == 'is_live' and self.get_param('live_from_start') - or live_status == 'post_live' and (duration or 0) > 2 * 3600): + if ((live_status == 'is_live' and self.get_param('live_from_start')) + or (live_status == 'post_live' and (duration or 0) > 2 * 3600)): return live_status def _extract_formats_and_subtitles(self, streaming_data, video_id, player_url, live_status, duration): @@ -4192,7 +4192,7 @@ def build_fragments(f): skip_manifests = set(self._configuration_arg('skip')) if (not self.get_param('youtube_include_hls_manifest', True) or needs_live_processing == 'is_live' # These will be filtered out by YoutubeDL anyway - or needs_live_processing and skip_bad_formats): + or (needs_live_processing and skip_bad_formats)): skip_manifests.add('hls') if not self.get_param('youtube_include_dash_manifest', True): @@ -4390,14 +4390,14 @@ def _real_extract(self, url): expected_type=dict) translated_title = self._get_text(microformats, (..., 'title')) - video_title = (self._preferred_lang and translated_title + video_title = ((self._preferred_lang and translated_title) or get_first(video_details, 'title') # primary or translated_title or search_meta(['og:title', 'twitter:title', 'title'])) translated_description = self._get_text(microformats, (..., 'description')) original_description = get_first(video_details, 'shortDescription') video_description = ( - self._preferred_lang and translated_description + (self._preferred_lang and translated_description) # If original description is blank, it will be an empty string. # Do not prefer translated description in this case. or original_description if original_description is not None else translated_description) @@ -6837,7 +6837,7 @@ def _extract_tab_id_and_name(self, tab, base_url='https://www.youtube.com'): tab_url = urljoin(base_url, traverse_obj( tab, ('endpoint', 'commandMetadata', 'webCommandMetadata', 'url'))) - tab_id = (tab_url and self._get_url_mobj(tab_url)['tab'][1:] + tab_id = ((tab_url and self._get_url_mobj(tab_url)['tab'][1:]) or traverse_obj(tab, 'tabIdentifier', expected_type=str)) if tab_id: return { diff --git a/yt_dlp/plugins.py b/yt_dlp/plugins.py index 2bf55df71e08..94335a9a32ee 100644 --- a/yt_dlp/plugins.py +++ b/yt_dlp/plugins.py @@ -183,4 +183,4 @@ def load_plugins(name, suffix): sys.meta_path.insert(0, PluginFinder(f'{PACKAGE_NAME}.extractor', f'{PACKAGE_NAME}.postprocessor')) -__all__ = ['directories', 'load_plugins', 'PACKAGE_NAME', 'COMPAT_PACKAGE_NAME'] +__all__ = ['COMPAT_PACKAGE_NAME', 'PACKAGE_NAME', 'directories', 'load_plugins'] diff --git a/yt_dlp/postprocessor/__init__.py b/yt_dlp/postprocessor/__init__.py index 164540b5dbb7..7b1620544996 100644 --- a/yt_dlp/postprocessor/__init__.py +++ b/yt_dlp/postprocessor/__init__.py @@ -44,4 +44,4 @@ def get_postprocessor(key): globals().update(_PLUGIN_CLASSES) __all__ = [name for name in globals() if name.endswith('PP')] -__all__.extend(('PostProcessor', 'FFmpegPostProcessor')) +__all__.extend(('FFmpegPostProcessor', 'PostProcessor')) diff --git a/yt_dlp/postprocessor/ffmpeg.py b/yt_dlp/postprocessor/ffmpeg.py index d994754fd3bf..8965806ae7b1 100644 --- a/yt_dlp/postprocessor/ffmpeg.py +++ b/yt_dlp/postprocessor/ffmpeg.py @@ -626,7 +626,7 @@ def run(self, info): sub_ext = sub_info['ext'] if sub_ext == 'json': self.report_warning('JSON subtitles cannot be embedded') - elif ext != 'webm' or ext == 'webm' and sub_ext == 'vtt': + elif ext != 'webm' or (ext == 'webm' and sub_ext == 'vtt'): sub_langs.append(lang) sub_names.append(sub_info.get('name')) sub_filenames.append(sub_info['filepath']) diff --git a/yt_dlp/utils/_utils.py b/yt_dlp/utils/_utils.py index 8517b762ef85..699bf1e7f65b 100644 --- a/yt_dlp/utils/_utils.py +++ b/yt_dlp/utils/_utils.py @@ -2683,8 +2683,8 @@ def merge_dicts(*dicts): merged = {} for a_dict in dicts: for k, v in a_dict.items(): - if (v is not None and k not in merged - or isinstance(v, str) and merged[k] == ''): + if ((v is not None and k not in merged) + or (isinstance(v, str) and merged[k] == '')): merged[k] = v return merged