import calendar
import collections
import copy
-import datetime
+import datetime as dt
import enum
import hashlib
import itertools
import os.path
import random
import re
+import shlex
import sys
import threading
import time
import traceback
-import urllib.error
import urllib.parse
from .common import InfoExtractor, SearchInfoExtractor
from .openload import PhantomJSwrapper
from ..compat import functools
from ..jsinterp import JSInterpreter
+from ..networking.exceptions import HTTPError, network_exceptions
from ..utils import (
NO_DEFAULT,
ExtractorError,
clean_html,
datetime_from_str,
dict_get,
+ filesize_from_tbr,
filter_dict,
float_or_none,
format_field,
join_nonempty,
js_to_json,
mimetype2ext,
- network_exceptions,
orderedSet,
parse_codecs,
parse_count,
str_to_int,
strftime_or_none,
traverse_obj,
+ try_call,
try_get,
unescapeHTML,
unified_strdate,
'client': {
'clientName': 'WEB',
'clientVersion': '2.20220801.00.00',
- }
+ },
},
- 'INNERTUBE_CONTEXT_CLIENT_NAME': 1
+ 'INNERTUBE_CONTEXT_CLIENT_NAME': 1,
},
'web_embedded': {
'INNERTUBE_API_KEY': 'AIzaSyAO_FJ2SlqU8Q4STEHLGCilw_Y9_11qcW8',
'clientVersion': '1.20220731.00.00',
},
},
- 'INNERTUBE_CONTEXT_CLIENT_NAME': 56
+ 'INNERTUBE_CONTEXT_CLIENT_NAME': 56,
},
'web_music': {
'INNERTUBE_API_KEY': 'AIzaSyC9XL3ZjWddXya6X74dJoCTL-WEYFDNX30',
'client': {
'clientName': 'WEB_REMIX',
'clientVersion': '1.20220727.01.00',
- }
+ },
},
'INNERTUBE_CONTEXT_CLIENT_NAME': 67,
},
'client': {
'clientName': 'WEB_CREATOR',
'clientVersion': '1.20220726.00.00',
- }
+ },
},
'INNERTUBE_CONTEXT_CLIENT_NAME': 62,
},
'INNERTUBE_CONTEXT': {
'client': {
'clientName': 'ANDROID',
- 'clientVersion': '17.31.35',
+ 'clientVersion': '19.09.37',
'androidSdkVersion': 30,
- 'userAgent': 'com.google.android.youtube/17.31.35 (Linux; U; Android 11) gzip'
- }
+ 'userAgent': 'com.google.android.youtube/19.09.37 (Linux; U; Android 11) gzip',
+ },
},
'INNERTUBE_CONTEXT_CLIENT_NAME': 3,
- 'REQUIRE_JS_PLAYER': False
+ 'REQUIRE_JS_PLAYER': False,
},
'android_embedded': {
'INNERTUBE_API_KEY': 'AIzaSyCjc_pVEDi4qsv5MtC2dMXzpIaDoRFLsxw',
'INNERTUBE_CONTEXT': {
'client': {
'clientName': 'ANDROID_EMBEDDED_PLAYER',
- 'clientVersion': '17.31.35',
+ 'clientVersion': '19.09.37',
'androidSdkVersion': 30,
- 'userAgent': 'com.google.android.youtube/17.31.35 (Linux; U; Android 11) gzip'
+ 'userAgent': 'com.google.android.youtube/19.09.37 (Linux; U; Android 11) gzip',
},
},
'INNERTUBE_CONTEXT_CLIENT_NAME': 55,
- 'REQUIRE_JS_PLAYER': False
+ 'REQUIRE_JS_PLAYER': False,
},
'android_music': {
'INNERTUBE_API_KEY': 'AIzaSyAOghZGza2MQSZkY_zfZ370N-PUdXEo8AI',
'INNERTUBE_CONTEXT': {
'client': {
'clientName': 'ANDROID_MUSIC',
- 'clientVersion': '5.16.51',
+ 'clientVersion': '6.42.52',
'androidSdkVersion': 30,
- 'userAgent': 'com.google.android.apps.youtube.music/5.16.51 (Linux; U; Android 11) gzip'
- }
+ 'userAgent': 'com.google.android.apps.youtube.music/6.42.52 (Linux; U; Android 11) gzip',
+ },
},
'INNERTUBE_CONTEXT_CLIENT_NAME': 21,
- 'REQUIRE_JS_PLAYER': False
+ 'REQUIRE_JS_PLAYER': False,
},
'android_creator': {
'INNERTUBE_API_KEY': 'AIzaSyD_qjV8zaaUMehtLkrKFgVeSX_Iqbtyws8',
'clientName': 'ANDROID_CREATOR',
'clientVersion': '22.30.100',
'androidSdkVersion': 30,
- 'userAgent': 'com.google.android.apps.youtube.creator/22.30.100 (Linux; U; Android 11) gzip'
+ 'userAgent': 'com.google.android.apps.youtube.creator/22.30.100 (Linux; U; Android 11) gzip',
},
},
'INNERTUBE_CONTEXT_CLIENT_NAME': 14,
- 'REQUIRE_JS_PLAYER': False
+ 'REQUIRE_JS_PLAYER': False,
},
# iOS clients have HLS live streams. Setting device model to get 60fps formats.
# See: https://github.com/TeamNewPipe/NewPipeExtractor/issues/680#issuecomment-1002724558
'INNERTUBE_CONTEXT': {
'client': {
'clientName': 'IOS',
- 'clientVersion': '17.33.2',
+ 'clientVersion': '19.09.3',
'deviceModel': 'iPhone14,3',
- 'userAgent': 'com.google.ios.youtube/17.33.2 (iPhone14,3; U; CPU iOS 15_6 like Mac OS X)'
- }
+ 'userAgent': 'com.google.ios.youtube/19.09.3 (iPhone14,3; U; CPU iOS 15_6 like Mac OS X)',
+ },
},
'INNERTUBE_CONTEXT_CLIENT_NAME': 5,
- 'REQUIRE_JS_PLAYER': False
+ 'REQUIRE_JS_PLAYER': False,
},
'ios_embedded': {
'INNERTUBE_CONTEXT': {
'client': {
'clientName': 'IOS_MESSAGES_EXTENSION',
- 'clientVersion': '17.33.2',
+ 'clientVersion': '19.09.3',
'deviceModel': 'iPhone14,3',
- 'userAgent': 'com.google.ios.youtube/17.33.2 (iPhone14,3; U; CPU iOS 15_6 like Mac OS X)'
+ 'userAgent': 'com.google.ios.youtube/19.09.3 (iPhone14,3; U; CPU iOS 15_6 like Mac OS X)',
},
},
'INNERTUBE_CONTEXT_CLIENT_NAME': 66,
- 'REQUIRE_JS_PLAYER': False
+ 'REQUIRE_JS_PLAYER': False,
},
'ios_music': {
'INNERTUBE_API_KEY': 'AIzaSyBAETezhkwP0ZWA02RsqT1zu78Fpt0bC_s',
'INNERTUBE_CONTEXT': {
'client': {
'clientName': 'IOS_MUSIC',
- 'clientVersion': '5.21',
+ 'clientVersion': '6.33.3',
'deviceModel': 'iPhone14,3',
- 'userAgent': 'com.google.ios.youtubemusic/5.21 (iPhone14,3; U; CPU iOS 15_6 like Mac OS X)'
+ 'userAgent': 'com.google.ios.youtubemusic/6.33.3 (iPhone14,3; U; CPU iOS 15_6 like Mac OS X)',
},
},
'INNERTUBE_CONTEXT_CLIENT_NAME': 26,
- 'REQUIRE_JS_PLAYER': False
+ 'REQUIRE_JS_PLAYER': False,
},
'ios_creator': {
'INNERTUBE_CONTEXT': {
'clientName': 'IOS_CREATOR',
'clientVersion': '22.33.101',
'deviceModel': 'iPhone14,3',
- 'userAgent': 'com.google.ios.ytcreator/22.33.101 (iPhone14,3; U; CPU iOS 15_6 like Mac OS X)'
+ 'userAgent': 'com.google.ios.ytcreator/22.33.101 (iPhone14,3; U; CPU iOS 15_6 like Mac OS X)',
},
},
'INNERTUBE_CONTEXT_CLIENT_NAME': 15,
- 'REQUIRE_JS_PLAYER': False
+ 'REQUIRE_JS_PLAYER': False,
},
# mweb has 'ultralow' formats
# See: https://github.com/yt-dlp/yt-dlp/pull/557
'client': {
'clientName': 'MWEB',
'clientVersion': '2.20220801.00.00',
- }
+ },
},
- 'INNERTUBE_CONTEXT_CLIENT_NAME': 2
+ 'INNERTUBE_CONTEXT_CLIENT_NAME': 2,
},
# This client can access age restricted videos (unless the uploader has disabled the 'allow embedding' option)
# See: https://github.com/zerodytrash/YouTube-Internal-Clients
'clientVersion': '2.0',
},
},
- 'INNERTUBE_CONTEXT_CLIENT_NAME': 85
+ 'INNERTUBE_CONTEXT_CLIENT_NAME': 85,
+ },
+ # This client has pre-merged video+audio 720p/1080p streams
+ 'mediaconnect': {
+ 'INNERTUBE_CONTEXT': {
+ 'client': {
+ 'clientName': 'MEDIA_CONNECT_FRONTEND',
+ 'clientVersion': '0.1',
+ },
+ },
+ 'INNERTUBE_CONTEXT_CLIENT_NAME': 95,
},
}
THIRD_PARTY = {
'embedUrl': 'https://www.youtube.com/', # Can be any valid URL
}
- BASE_CLIENTS = ('android', 'web', 'tv', 'ios', 'mweb')
+ BASE_CLIENTS = ('ios', 'android', 'web', 'tv', 'mweb')
priority = qualities(BASE_CLIENTS[::-1])
for client, ytcfg in tuple(INNERTUBE_CLIENTS.items()):
AVAILABILITY_PREMIUM = enum.auto()
AVAILABILITY_SUBSCRIPTION = enum.auto()
LIVE_NOW = enum.auto()
+ VERIFIED = enum.auto()
class YoutubeBaseInfoExtractor(InfoExtractor):
r'(?:www\.)?piped\.adminforge\.de',
r'(?:www\.)?watch\.whatevertinfoil\.de',
r'(?:www\.)?piped\.qdi\.fi',
- r'(?:www\.)?piped\.video',
+ r'(?:(?:www|cf)\.)?piped\.video',
r'(?:www\.)?piped\.aeong\.one',
r'(?:www\.)?piped\.moomoo\.me',
r'(?:www\.)?piped\.chauvet\.pro',
'lt', 'hu', 'nl', 'no', 'uz', 'pl', 'pt-PT', 'pt', 'ro', 'sq', 'sk', 'sl', 'sr-Latn', 'fi',
'sv', 'vi', 'tr', 'be', 'bg', 'ky', 'kk', 'mk', 'mn', 'ru', 'sr', 'uk', 'el', 'hy', 'iw',
'ur', 'ar', 'fa', 'ne', 'mr', 'hi', 'as', 'bn', 'pa', 'gu', 'or', 'ta', 'te', 'kn', 'ml',
- 'si', 'th', 'lo', 'my', 'ka', 'am', 'km', 'zh-CN', 'zh-TW', 'zh-HK', 'ja', 'ko'
+ 'si', 'th', 'lo', 'my', 'ka', 'am', 'km', 'zh-CN', 'zh-TW', 'zh-HK', 'ja', 'ko',
]
_IGNORED_WARNINGS = {'Unavailable videos will be hidden during playback'}
cookies = self._get_cookies('https://www.youtube.com/')
if cookies.get('__Secure-3PSID'):
return
- consent_id = None
- consent = cookies.get('CONSENT')
- if consent:
- if 'YES' in consent.value:
- return
- consent_id = self._search_regex(
- r'PENDING\+(\d+)', consent.value, 'consent', default=None)
- if not consent_id:
- consent_id = random.randint(100, 999)
- self._set_cookie('.youtube.com', 'CONSENT', 'YES+cb.20210328-17-p0.en+FX+%s' % consent_id)
+ socs = cookies.get('SOCS')
+ if socs and not socs.value.startswith('CAA'): # not consented
+ return
+ self._set_cookie('.youtube.com', 'SOCS', 'CAI', secure=True) # accept all (required for mixes)
def _initialize_pref(self):
cookies = self._get_cookies('https://www.youtube.com/')
'X-Youtube-Identity-Token': identity_token or self._extract_identity_token(ytcfg),
'X-Goog-PageId': account_syncid or self._extract_account_syncid(ytcfg),
'X-Goog-Visitor-Id': visitor_data or self._extract_visitor_data(ytcfg),
- 'User-Agent': self._ytcfg_get_safe(ytcfg, lambda x: x['INNERTUBE_CONTEXT']['client']['userAgent'], default_client=default_client)
+ 'User-Agent': self._ytcfg_get_safe(ytcfg, lambda x: x['INNERTUBE_CONTEXT']['client']['userAgent'], default_client=default_client),
}
if session_index is None:
session_index = self._extract_session_index(ytcfg)
url = {
'web': 'https://www.youtube.com',
'web_music': 'https://music.youtube.com',
- 'web_embedded': f'https://www.youtube.com/embed/{video_id}?html5=1'
+ 'web_embedded': f'https://www.youtube.com/embed/{video_id}?html5=1',
}.get(client)
if not url:
return {}
@staticmethod
def _build_api_continuation_query(continuation, ctp=None):
query = {
- 'continuation': continuation
+ 'continuation': continuation,
}
# TODO: Inconsistency with clickTrackingParams.
# Currently we have a fixed ctp contained within context (from ytcfg)
return traverse_obj(renderer, (
('contents', 'items', 'rows'), ..., 'continuationItemRenderer',
- ('continuationEndpoint', ('button', 'buttonRenderer', 'command'))
+ ('continuationEndpoint', ('button', 'buttonRenderer', 'command')),
), get_all=False, expected_type=cls._extract_continuation_ep_data)
@classmethod
for alert_type, alert_message in (warnings + errors[:-1]):
self.report_warning(f'YouTube said: {alert_type} - {alert_message}', only_once=only_once)
if errors:
- raise ExtractorError('YouTube said: %s' % errors[-1][1], expected=expected)
+ raise ExtractorError(f'YouTube said: {errors[-1][1]}', expected=expected)
def _extract_and_report_alerts(self, data, *args, **kwargs):
return self._report_alerts(self._extract_alerts(data), *args, **kwargs)
- def _extract_badges(self, renderer: dict):
- privacy_icon_map = {
+ def _extract_badges(self, badge_list: list):
+ """
+ Extract known BadgeType's from a list of badge renderers.
+ @returns [{'type': BadgeType}]
+ """
+ icon_type_map = {
'PRIVACY_UNLISTED': BadgeType.AVAILABILITY_UNLISTED,
'PRIVACY_PRIVATE': BadgeType.AVAILABILITY_PRIVATE,
- 'PRIVACY_PUBLIC': BadgeType.AVAILABILITY_PUBLIC
+ 'PRIVACY_PUBLIC': BadgeType.AVAILABILITY_PUBLIC,
+ 'CHECK_CIRCLE_THICK': BadgeType.VERIFIED,
+ 'OFFICIAL_ARTIST_BADGE': BadgeType.VERIFIED,
+ 'CHECK': BadgeType.VERIFIED,
}
badge_style_map = {
'BADGE_STYLE_TYPE_MEMBERS_ONLY': BadgeType.AVAILABILITY_SUBSCRIPTION,
'BADGE_STYLE_TYPE_PREMIUM': BadgeType.AVAILABILITY_PREMIUM,
- 'BADGE_STYLE_TYPE_LIVE_NOW': BadgeType.LIVE_NOW
+ 'BADGE_STYLE_TYPE_LIVE_NOW': BadgeType.LIVE_NOW,
+ 'BADGE_STYLE_TYPE_VERIFIED': BadgeType.VERIFIED,
+ 'BADGE_STYLE_TYPE_VERIFIED_ARTIST': BadgeType.VERIFIED,
}
label_map = {
'private': BadgeType.AVAILABILITY_PRIVATE,
'members only': BadgeType.AVAILABILITY_SUBSCRIPTION,
'live': BadgeType.LIVE_NOW,
- 'premium': BadgeType.AVAILABILITY_PREMIUM
+ 'premium': BadgeType.AVAILABILITY_PREMIUM,
+ 'verified': BadgeType.VERIFIED,
+ 'official artist channel': BadgeType.VERIFIED,
}
badges = []
- for badge in traverse_obj(renderer, ('badges', ..., 'metadataBadgeRenderer')):
+ for badge in traverse_obj(badge_list, (..., lambda key, _: re.search(r'[bB]adgeRenderer$', key))):
badge_type = (
- privacy_icon_map.get(traverse_obj(badge, ('icon', 'iconType'), expected_type=str))
+ icon_type_map.get(traverse_obj(badge, ('icon', 'iconType'), expected_type=str))
or badge_style_map.get(traverse_obj(badge, 'style'))
)
if badge_type:
continue
# fallback, won't work in some languages
- label = traverse_obj(badge, 'label', expected_type=str, default='')
+ label = traverse_obj(
+ badge, 'label', ('accessibilityData', 'label'), 'tooltip', 'iconTooltip', get_all=False, expected_type=str, default='')
for match, label_badge_type in label_map.items():
if match in label.lower():
- badges.append({'type': badge_type})
- continue
+ badges.append({'type': label_badge_type})
+ break
return badges
return count
@staticmethod
- def _extract_thumbnails(data, *path_list):
+ def _extract_thumbnails(data, *path_list, final_key='thumbnails'):
"""
Extract thumbnails from thumbnails dict
@param path_list: path list to level that contains 'thumbnails' key
"""
thumbnails = []
for path in path_list or [()]:
- for thumbnail in traverse_obj(data, (*variadic(path), 'thumbnails', ...)):
+ for thumbnail in traverse_obj(data, (*variadic(path), final_key, ...)):
thumbnail_url = url_or_none(thumbnail.get('url'))
if not thumbnail_url:
continue
e.g. 'streamed 6 days ago', '5 seconds ago (edited)', 'updated today', '8 yr ago'
"""
- # XXX: this could be moved to a general function in utils.py
+ # XXX: this could be moved to a general function in utils/_utils.py
# The relative time text strings are roughly the same as what
# Javascript's Intl.RelativeTimeFormat function generates.
# See: https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Global_Objects/Intl/RelativeTimeFormat
if start:
return datetime_from_str(start)
try:
- return datetime_from_str('now-%s%s' % (mobj.group('time'), mobj.group('unit')))
+ return datetime_from_str('now-{}{}'.format(mobj.group('time'), mobj.group('unit')))
except ValueError:
return None
def _parse_time_text(self, text):
if not text:
return
- dt = self.extract_relative_time(text)
+ dt_ = self.extract_relative_time(text)
timestamp = None
- if isinstance(dt, datetime.datetime):
- timestamp = calendar.timegm(dt.timetuple())
+ if isinstance(dt_, dt.datetime):
+ timestamp = calendar.timegm(dt_.timetuple())
if timestamp is None:
timestamp = (
def _extract_response(self, item_id, query, note='Downloading API JSON', headers=None,
ytcfg=None, check_get_keys=None, ep='browse', fatal=True, api_hostname=None,
default_client='web'):
- for retry in self.RetryManager():
+ raise_for_incomplete = bool(self._configuration_arg('raise_incomplete_data', ie_key=YoutubeIE))
+ # Incomplete Data should be a warning by default when retries are exhausted, while other errors should be fatal.
+ icd_retries = iter(self.RetryManager(fatal=raise_for_incomplete))
+ icd_rm = next(icd_retries)
+ main_retries = iter(self.RetryManager())
+ main_rm = next(main_retries)
+ # Manual retry loop for multiple RetryManagers
+ # The proper RetryManager MUST be advanced after an error
+ # and its result MUST be checked if the manager is non fatal
+ while True:
try:
response = self._call_api(
ep=ep, fatal=True, headers=headers,
except ExtractorError as e:
if not isinstance(e.cause, network_exceptions):
return self._error_or_warning(e, fatal=fatal)
- elif not isinstance(e.cause, urllib.error.HTTPError):
- retry.error = e
+ elif not isinstance(e.cause, HTTPError):
+ main_rm.error = e
+ next(main_retries)
continue
- first_bytes = e.cause.read(512)
+ first_bytes = e.cause.response.read(512)
if not is_html(first_bytes):
yt_error = try_get(
self._parse_json(
- self._webpage_read_content(e.cause, None, item_id, prefix=first_bytes) or '{}', item_id, fatal=False),
+ self._webpage_read_content(e.cause.response, None, item_id, prefix=first_bytes) or '{}', item_id, fatal=False),
lambda x: x['error']['message'], str)
if yt_error:
self._report_alerts([('ERROR', yt_error)], fatal=False)
# Downloading page may result in intermittent 5xx HTTP error
- # Sometimes a 404 is also recieved. See: https://github.com/ytdl-org/youtube-dl/issues/28289
+ # Sometimes a 404 is also received. See: https://github.com/ytdl-org/youtube-dl/issues/28289
# We also want to catch all other network exceptions since errors in later pages can be troublesome
# See https://github.com/yt-dlp/yt-dlp/issues/507#issuecomment-880188210
- if e.cause.code not in (403, 429):
- retry.error = e
+ if e.cause.status not in (403, 429):
+ main_rm.error = e
+ next(main_retries)
continue
return self._error_or_warning(e, fatal=fatal)
try:
self._extract_and_report_alerts(response, only_once=True)
except ExtractorError as e:
- # YouTube servers may return errors we want to retry on in a 200 OK response
+ # YouTube's servers may return errors we want to retry on in a 200 OK response
# See: https://github.com/yt-dlp/yt-dlp/issues/839
if 'unknown error' in e.msg.lower():
- retry.error = e
+ main_rm.error = e
+ next(main_retries)
continue
return self._error_or_warning(e, fatal=fatal)
# Youtube sometimes sends incomplete data
# See: https://github.com/ytdl-org/youtube-dl/issues/28194
if not traverse_obj(response, *variadic(check_get_keys)):
- retry.error = ExtractorError('Incomplete data received', expected=True)
+ icd_rm.error = ExtractorError('Incomplete data received', expected=True)
+ should_retry = next(icd_retries, None)
+ if not should_retry:
+ return None
continue
return response
overlay_style = traverse_obj(
renderer, ('thumbnailOverlays', ..., 'thumbnailOverlayTimeStatusRenderer', 'style'),
get_all=False, expected_type=str)
- badges = self._extract_badges(renderer)
-
+ badges = self._extract_badges(traverse_obj(renderer, 'badges'))
+ owner_badges = self._extract_badges(traverse_obj(renderer, 'ownerBadges'))
navigation_url = urljoin('https://www.youtube.com/', traverse_obj(
renderer, ('navigationEndpoint', 'commandMetadata', 'webCommandMetadata', 'url'),
expected_type=str)) or ''
needs_subscription=self._has_badge(badges, BadgeType.AVAILABILITY_SUBSCRIPTION) or None,
is_unlisted=self._has_badge(badges, BadgeType.AVAILABILITY_UNLISTED) or None),
view_count_field: view_count,
- 'live_status': live_status
+ 'live_status': live_status,
+ 'channel_is_verified': True if self._has_badge(owner_badges, BadgeType.VERIFIED) else None,
}
class YoutubeIE(YoutubeBaseInfoExtractor):
IE_DESC = 'YouTube'
- _VALID_URL = r"""(?x)^
+ _VALID_URL = r'''(?x)^
(
(?:https?://|//) # http(s):// or protocol-independent URL
(?:(?:(?:(?:\w+\.)?[yY][oO][uU][tT][uU][bB][eE](?:-nocookie|kids)?\.com|
(?:www\.)?hooktube\.com|
(?:www\.)?yourepeat\.com|
tube\.majestyc\.net|
- %(invidious)s|
+ {invidious}|
youtube\.googleapis\.com)/ # the various hostnames, with wildcard subdomains
(?:.*?\#/)? # handle anchor (#/) redirect urls
(?: # the various things that can precede the ID:
youtu\.be| # just youtu.be/xxxx
vid\.plus| # or vid.plus/xxxx
zwearz\.com/watch| # or zwearz.com/watch/xxxx
- %(invidious)s
+ {invidious}
)/
|(?:www\.)?cleanvideosearch\.com/media/action/yt/watch\?videoId=
)
)? # all until now is optional -> you can pass the naked ID
- (?P<id>[0-9A-Za-z_-]{11}) # here is it! the YouTube video ID
+ (?P<id>[0-9A-Za-z_-]{{11}}) # here is it! the YouTube video ID
(?(1).+)? # if we found the ID, everything can follow
- (?:\#|$)""" % {
- 'invidious': '|'.join(YoutubeBaseInfoExtractor._INVIDIOUS_SITES),
- }
+ (?:\#|$)'''.format(
+ invidious='|'.join(YoutubeBaseInfoExtractor._INVIDIOUS_SITES),
+ )
_EMBED_REGEX = [
r'''(?x)
(?:
r'/(?P<id>[a-zA-Z0-9_-]{8,})/player(?:_ias\.vflset(?:/[a-zA-Z]{2,3}_[a-zA-Z]{2,3})?|-plasma-ias-(?:phone|tablet)-[a-z]{2}_[A-Z]{2}\.vflset)/base\.js$',
r'\b(?P<id>vfl[a-zA-Z0-9_-]+)\b.*?\.js$',
)
- _formats = {
+ _formats = { # NB: Used in YoutubeWebArchiveIE and GoogleDriveIE
'5': {'ext': 'flv', 'width': 400, 'height': 240, 'acodec': 'mp3', 'abr': 64, 'vcodec': 'h263'},
'6': {'ext': 'flv', 'width': 450, 'height': 270, 'acodec': 'mp3', 'abr': 64, 'vcodec': 'h263'},
'13': {'ext': '3gp', 'acodec': 'aac', 'vcodec': 'mp4v'},
'uploader_url': 'https://www.youtube.com/@PhilippHagemeister',
'uploader_id': '@PhilippHagemeister',
'heatmap': 'count:100',
- }
+ 'timestamp': 1349198244,
+ },
},
{
'url': '//www.YouTube.com/watch?v=yZIXLfi8CZQ',
'uploader': 'Philipp Hagemeister',
'uploader_url': 'https://www.youtube.com/@PhilippHagemeister',
'uploader_id': '@PhilippHagemeister',
+ 'heatmap': 'count:100',
+ 'timestamp': 1349198244,
},
'params': {
'skip_download': True,
'ext': 'm4a',
'upload_date': '20121002',
'description': '',
- 'title': 'UHDTV TEST 8K VIDEO.mp4'
+ 'title': 'UHDTV TEST 8K VIDEO.mp4',
},
'params': {
'youtube_include_dash_manifest': True,
'uploader': 'The Witcher',
'uploader_url': 'https://www.youtube.com/@thewitcher',
'uploader_id': '@thewitcher',
+ 'comment_count': int,
+ 'channel_is_verified': True,
+ 'heatmap': 'count:100',
+ 'timestamp': 1401991663,
},
},
{
'uploader_url': 'https://www.youtube.com/@FlyingKitty900',
'uploader_id': '@FlyingKitty900',
'comment_count': int,
+ 'channel_is_verified': True,
},
},
{
'uploader': 'Projekt Melody',
'uploader_url': 'https://www.youtube.com/@ProjektMelody',
'uploader_id': '@ProjektMelody',
+ 'timestamp': 1577508724,
},
},
{
},
'expected_warnings': [
'DASH manifest missing',
- ]
+ ],
},
# Olympics (https://github.com/ytdl-org/youtube-dl/issues/4431)
{
'uploader': 'Olympics',
'uploader_url': 'https://www.youtube.com/@Olympics',
'uploader_id': '@Olympics',
+ 'channel_is_verified': True,
+ 'timestamp': 1440707674,
},
'params': {
'skip_download': 'requires avconv',
- }
+ },
},
# Non-square pixels
{
'uploader': '孫ᄋᄅ',
'uploader_url': 'https://www.youtube.com/@AllenMeow',
'uploader_id': '@AllenMeow',
+ 'timestamp': 1299776999,
},
},
# url_encoded_fmt_stream_map is empty string
},
}],
'params': {'skip_download': True},
+ 'skip': 'Not multifeed anymore',
},
{
# Multifeed video with comma in title (see https://github.com/ytdl-org/youtube-dl/issues/8536)
'playable_in_embed': True,
'like_count': int,
'age_limit': 0,
- 'channel_follower_count': int
+ 'channel_follower_count': int,
},
'params': {
'skip_download': True,
'uploader': 'The Berkman Klein Center for Internet & Society',
'uploader_id': '@BKCHarvard',
'uploader_url': 'https://www.youtube.com/@BKCHarvard',
+ 'timestamp': 1422422076,
},
'params': {
'skip_download': True,
'uploader': 'Bernie Sanders',
'uploader_url': 'https://www.youtube.com/@BernieSanders',
'uploader_id': '@BernieSanders',
+ 'channel_is_verified': True,
+ 'heatmap': 'count:100',
+ 'timestamp': 1447987198,
},
'params': {
'skip_download': True,
'uploader': 'Vsauce',
'uploader_url': 'https://www.youtube.com/@Vsauce',
'uploader_id': '@Vsauce',
+ 'comment_count': int,
+ 'channel_is_verified': True,
+ 'timestamp': 1484761047,
},
'params': {
'skip_download': True,
'title': 'Voyeur Girl',
'description': 'md5:7ae382a65843d6df2685993e90a8628f',
'upload_date': '20190312',
- 'artist': 'Stephen',
+ 'artists': ['Stephen'],
+ 'creators': ['Stephen'],
'track': 'Voyeur Girl',
'album': 'it\'s too much love to know my dear',
'release_date': '20190313',
- 'release_year': 2019,
'alt_title': 'Voyeur Girl',
'view_count': int,
'playable_in_embed': True,
'channel': 'Stephen', # TODO: should be "Stephen - Topic"
'uploader': 'Stephen',
'availability': 'public',
- 'creator': 'Stephen',
'duration': 169,
'thumbnail': 'https://i.ytimg.com/vi_webp/MgNrAu2pzNs/maxresdefault.webp',
'age_limit': 0,
'channel_id': 'UC-pWHpBjdGG69N9mM2auIAA',
'tags': 'count:11',
'live_status': 'not_live',
- 'channel_follower_count': int
+ 'channel_follower_count': int,
},
'params': {
'skip_download': True,
'uploader': 'l\'Or Vert asbl',
'uploader_url': 'https://www.youtube.com/@ElevageOrVert',
'uploader_id': '@ElevageOrVert',
+ 'timestamp': 1497343210,
},
'params': {
'skip_download': True,
'uploader': 'kudvenkat',
'uploader_url': 'https://www.youtube.com/@Csharp-video-tutorialsBlogspot',
'uploader_id': '@Csharp-video-tutorialsBlogspot',
+ 'channel_is_verified': True,
+ 'heatmap': 'count:100',
+ 'timestamp': 1377976349,
},
'params': {
'skip_download': True,
'uploader': 'CBS Mornings',
'uploader_url': 'https://www.youtube.com/@CBSMornings',
'uploader_id': '@CBSMornings',
- }
+ 'comment_count': int,
+ 'channel_is_verified': True,
+ 'timestamp': 1405513526,
+ },
},
{
# restricted location, https://github.com/ytdl-org/youtube-dl/issues/28685
'view_count': int,
'channel': 'Walk around Japan',
'tags': ['Ueno Tokyo', 'Okachimachi Tokyo', 'Ameyoko Street', 'Tokyo attraction', 'Travel in Tokyo'],
- 'thumbnail': 'https://i.ytimg.com/vi_webp/cBvYw8_A0vQ/hqdefault.webp',
+ 'thumbnail': 'https://i.ytimg.com/vi/cBvYw8_A0vQ/hqdefault.jpg',
'age_limit': 0,
'availability': 'public',
'channel_url': 'https://www.youtube.com/channel/UC3o_t8PzBmXf5S9b7GLx1Mw',
'uploader': 'Walk around Japan',
'uploader_url': 'https://www.youtube.com/@walkaroundjapan7124',
'uploader_id': '@walkaroundjapan7124',
+ 'timestamp': 1605884416,
},
'params': {
'skip_download': True,
}, {
# Has multiple audio streams
'url': 'WaOKSUlf4TM',
- 'only_matching': True
+ 'only_matching': True,
}, {
# Requires Premium: has format 141 when requested using YTM url
'url': 'https://music.youtube.com/watch?v=XclachpHxis',
- 'only_matching': True
+ 'only_matching': True,
}, {
# multiple subtitles with same lang_code
'url': 'https://www.youtube.com/watch?v=wsQiKKfKxug',
'uploader': 'colinfurze',
'uploader_url': 'https://www.youtube.com/@colinfurze',
'uploader_id': '@colinfurze',
+ 'comment_count': int,
+ 'channel_is_verified': True,
+ 'heatmap': 'count:100',
},
'params': {
'format': '17', # 3gp format available on android
'extractor_args': {'youtube': {'player_client': ['android']}},
},
+ 'skip': 'android client broken',
},
{
# Skip download of additional client configs (remix client config in this case)
'uploader': 'SciShow',
'uploader_url': 'https://www.youtube.com/@SciShow',
'uploader_id': '@SciShow',
- }, 'params': {'format': 'mhtml', 'skip_download': True}
+ 'comment_count': int,
+ 'channel_is_verified': True,
+ 'heatmap': 'count:100',
+ 'timestamp': 1395685455,
+ }, 'params': {'format': 'mhtml', 'skip_download': True},
}, {
# Ensure video upload_date is in UTC timezone (video was uploaded 1641170939)
'url': 'https://www.youtube.com/watch?v=2NUZ8W2llS4',
'uploader': 'Leon Nguyen',
'uploader_url': 'https://www.youtube.com/@LeonNguyen',
'uploader_id': '@LeonNguyen',
- }
- }, {
- # Same video as above, but with --compat-opt no-youtube-prefer-utc-upload-date
- 'url': 'https://www.youtube.com/watch?v=2NUZ8W2llS4',
- 'info_dict': {
- 'id': '2NUZ8W2llS4',
- 'ext': 'mp4',
- 'title': 'The NP that test your phone performance 🙂',
- 'description': 'md5:144494b24d4f9dfacb97c1bbef5de84d',
- 'channel_id': 'UCRqNBSOHgilHfAczlUmlWHA',
- 'channel_url': 'https://www.youtube.com/channel/UCRqNBSOHgilHfAczlUmlWHA',
- 'duration': 21,
- 'view_count': int,
- 'age_limit': 0,
- 'categories': ['Gaming'],
- 'tags': 'count:23',
- 'playable_in_embed': True,
- 'live_status': 'not_live',
- 'upload_date': '20220102',
- 'like_count': int,
- 'availability': 'public',
- 'channel': 'Leon Nguyen',
- 'thumbnail': 'https://i.ytimg.com/vi_webp/2NUZ8W2llS4/maxresdefault.webp',
- 'comment_count': int,
- 'channel_follower_count': int,
- 'uploader': 'Leon Nguyen',
- 'uploader_url': 'https://www.youtube.com/@LeonNguyen',
- 'uploader_id': '@LeonNguyen',
+ 'heatmap': 'count:100',
+ 'timestamp': 1641170939,
},
- 'params': {'compat_opts': ['no-youtube-prefer-utc-upload-date']}
}, {
# date text is premiered video, ensure upload date in UTC (published 1641172509)
'url': 'https://www.youtube.com/watch?v=mzZzzBU6lrM',
'uploader': 'Quackity',
'uploader_id': '@Quackity',
'uploader_url': 'https://www.youtube.com/@Quackity',
- }
+ 'comment_count': int,
+ 'channel_is_verified': True,
+ 'heatmap': 'count:100',
+ 'timestamp': 1641172509,
+ },
},
- { # continuous livestream. Microformat upload date should be preferred.
- # Upload date was 2021-06-19 (not UTC), while stream start is 2021-11-27
- 'url': 'https://www.youtube.com/watch?v=kgx4WGK0oNU',
+ { # continuous livestream.
+ # Upload date was 2022-07-12T05:12:29-07:00, while stream start is 2022-07-12T15:59:30+00:00
+ 'url': 'https://www.youtube.com/watch?v=jfKfPfyJRdk',
'info_dict': {
- 'id': 'kgx4WGK0oNU',
- 'title': r're:jazz\/lofi hip hop radio🌱chill beats to relax\/study to \[LIVE 24\/7\] \d{4}-\d{2}-\d{2} \d{2}:\d{2}',
+ 'id': 'jfKfPfyJRdk',
'ext': 'mp4',
- 'channel_id': 'UC84whx2xxsiA1gXHXXqKGOA',
- 'availability': 'public',
- 'age_limit': 0,
- 'release_timestamp': 1637975704,
- 'upload_date': '20210619',
- 'channel_url': 'https://www.youtube.com/channel/UC84whx2xxsiA1gXHXXqKGOA',
- 'live_status': 'is_live',
- 'thumbnail': 'https://i.ytimg.com/vi/kgx4WGK0oNU/maxresdefault.jpg',
- 'channel': 'Abao in Tokyo',
- 'channel_follower_count': int,
- 'release_date': '20211127',
- 'tags': 'count:39',
- 'categories': ['People & Blogs'],
+ 'channel_id': 'UCSJ4gkVC6NrvII8umztf0Ow',
'like_count': int,
- 'view_count': int,
- 'playable_in_embed': True,
- 'description': 'md5:2ef1d002cad520f65825346e2084e49d',
+ 'uploader': 'Lofi Girl',
+ 'categories': ['Music'],
'concurrent_view_count': int,
- 'uploader': 'Abao in Tokyo',
- 'uploader_url': 'https://www.youtube.com/@abaointokyo',
- 'uploader_id': '@abaointokyo',
- },
- 'params': {'skip_download': True}
- }, {
- # Story. Requires specific player params to work.
- 'url': 'https://www.youtube.com/watch?v=vv8qTUWmulI',
- 'info_dict': {
- 'id': 'vv8qTUWmulI',
- 'ext': 'mp4',
- 'availability': 'unlisted',
- 'view_count': int,
- 'channel_id': 'UCzIZ8HrzDgc-pNQDUG6avBA',
- 'upload_date': '20220526',
- 'categories': ['Education'],
- 'title': 'Story',
- 'channel': 'IT\'S HISTORY',
- 'description': '',
- 'duration': 12,
'playable_in_embed': True,
+ 'timestamp': 1657627949,
+ 'release_date': '20220712',
+ 'channel_url': 'https://www.youtube.com/channel/UCSJ4gkVC6NrvII8umztf0Ow',
+ 'description': 'md5:13a6f76df898f5674f9127139f3df6f7',
'age_limit': 0,
- 'live_status': 'not_live',
- 'tags': [],
- 'thumbnail': 'https://i.ytimg.com/vi_webp/vv8qTUWmulI/maxresdefault.webp',
- 'channel_url': 'https://www.youtube.com/channel/UCzIZ8HrzDgc-pNQDUG6avBA',
+ 'thumbnail': 'https://i.ytimg.com/vi/jfKfPfyJRdk/maxresdefault.jpg',
+ 'release_timestamp': 1657641570,
+ 'uploader_url': 'https://www.youtube.com/@LofiGirl',
+ 'channel_follower_count': int,
+ 'channel_is_verified': True,
+ 'title': r're:^lofi hip hop radio 📚 - beats to relax/study to',
+ 'view_count': int,
+ 'live_status': 'is_live',
+ 'tags': 'count:32',
+ 'channel': 'Lofi Girl',
+ 'availability': 'public',
+ 'upload_date': '20220712',
+ 'uploader_id': '@LofiGirl',
},
- 'skip': 'stories get removed after some period of time',
+ 'params': {'skip_download': True},
}, {
'url': 'https://www.youtube.com/watch?v=tjjjtzRLHvA',
'info_dict': {
'uploader_id': '@lesmiscore',
'uploader': 'Lesmiscore',
'uploader_url': 'https://www.youtube.com/@lesmiscore',
- }
+ 'timestamp': 1648005313,
+ },
}, {
# Prefer primary title+description language metadata by default
# Do not prefer translated description if primary is empty
'uploader_url': 'https://www.youtube.com/@coletdjnz',
'uploader_id': '@coletdjnz',
'uploader': 'cole-dlp-test-acc',
+ 'timestamp': 1662677394,
},
- 'params': {'skip_download': True}
+ 'params': {'skip_download': True},
}, {
# Extractor argument: prefer translated title+description
'url': 'https://www.youtube.com/watch?v=gHKT4uU8Zng',
'duration': 5,
'live_status': 'not_live',
'channel_id': 'UCiu-3thuViMebBjw_5nWYrA',
- 'upload_date': '20220728',
+ 'upload_date': '20220729',
'view_count': int,
'categories': ['People & Blogs'],
'thumbnail': r're:^https?://.*\.jpg',
'uploader_url': 'https://www.youtube.com/@coletdjnz',
'uploader_id': '@coletdjnz',
'uploader': 'cole-dlp-test-acc',
+ 'timestamp': 1659073275,
+ 'like_count': int,
},
'params': {'skip_download': True, 'extractor_args': {'youtube': {'lang': ['fr']}}},
'expected_warnings': [r'Preferring "fr" translated fields'],
'uploader': 'MrBeast',
'uploader_url': 'https://www.youtube.com/@MrBeast',
'uploader_id': '@MrBeast',
+ 'comment_count': int,
+ 'channel_is_verified': True,
+ 'heatmap': 'count:100',
},
'params': {'extractor_args': {'youtube': {'player_client': ['ios']}}, 'format': '233-1'},
}, {
'uploader': 'Projekt Melody',
'uploader_id': '@ProjektMelody',
'uploader_url': 'https://www.youtube.com/@ProjektMelody',
+ 'timestamp': 1577508724,
},
'params': {'extractor_args': {'youtube': {'player_client': ['tv_embedded']}}, 'format': '251-drc'},
},
'uploader': 'さなちゃんねる',
'uploader_url': 'https://www.youtube.com/@sana_natori',
'uploader_id': '@sana_natori',
+ 'channel_is_verified': True,
+ 'heatmap': 'count:100',
+ 'timestamp': 1671798112,
},
},
{
'thumbnail': r're:^https?://.*\.webp',
'channel_url': 'https://www.youtube.com/channel/UCxzC4EngIsMrPmbm6Nxvb-A',
'playable_in_embed': True,
+ 'comment_count': int,
+ 'channel_is_verified': True,
+ 'heatmap': 'count:100',
},
'params': {
- 'extractor_args': {'youtube': {'player_client': ['android'], 'player_skip': ['webpage']}},
+ 'extractor_args': {'youtube': {'player_client': ['ios'], 'player_skip': ['webpage']}},
},
},
]
'uploader': 'Christopher Sykes',
'uploader_url': 'https://www.youtube.com/@ChristopherSykesDocumentaries',
'uploader_id': '@ChristopherSykesDocumentaries',
+ 'heatmap': 'count:100',
+ 'timestamp': 1211825920,
},
'params': {
'skip_download': True,
- }
+ },
},
]
# Obtain from MPD's maximum seq value
old_mpd_url = mpd_url
last_error = ctx.pop('last_error', None)
- expire_fast = immediate or last_error and isinstance(last_error, urllib.error.HTTPError) and last_error.code == 403
+ expire_fast = immediate or last_error and isinstance(last_error, HTTPError) and last_error.status == 403
mpd_url, stream_number, is_live = (mpd_feed(format_id, 5 if expire_fast else 18000)
or (mpd_url, stream_number, False))
if not refresh_sequence:
if not should_continue:
known_idx = idx - 1
raise ExtractorError('breaking out of outer loop')
- last_segment_url = urljoin(fragment_base_url, 'sq/%d' % idx)
+ last_segment_url = urljoin(fragment_base_url, f'sq/{idx}')
yield {
'url': last_segment_url,
'fragment_count': last_seq,
if id_m:
break
else:
- raise ExtractorError('Cannot identify player %r' % player_url)
+ raise ExtractorError(f'Cannot identify player {player_url!r}')
return id_m.group('id')
def _load_player(self, video_id, player_url, fatal=True):
code = self._download_webpage(
player_url, video_id, fatal=fatal,
note='Downloading player ' + player_id,
- errnote='Download of %s failed' % player_url)
+ errnote=f'Download of {player_url} failed')
if code:
self._code_cache[player_id] = code
return self._code_cache.get(player_id)
cache_res = func(test_string)
cache_spec = [ord(c) for c in cache_res]
expr_code = ' + '.join(gen_sig_code(cache_spec))
- signature_id_tuple = '(%s)' % (
- ', '.join(str(len(p)) for p in example_sig.split('.')))
- code = ('if tuple(len(p) for p in s.split(\'.\')) == %s:\n'
- ' return %s\n') % (signature_id_tuple, expr_code)
+ signature_id_tuple = '({})'.format(', '.join(str(len(p)) for p in example_sig.split('.')))
+ code = (f'if tuple(len(p) for p in s.split(\'.\')) == {signature_id_tuple}:\n'
+ f' return {expr_code}\n')
self.to_screen('Extracted signature function:\n' + code)
def _parse_sig_js(self, jscode):
return funcname
return json.loads(js_to_json(self._search_regex(
- rf'var {re.escape(funcname)}\s*=\s*(\[.+?\]);', jscode,
+ rf'var {re.escape(funcname)}\s*=\s*(\[.+?\])\s*[,;]', jscode,
f'Initial JS player n function list ({funcname}.{idx})')))[int(idx)]
def _extract_n_function_code(self, video_id, player_url):
# For redundancy
func_code = self._search_regex(
- r'''(?xs)%s\s*=\s*function\s*\((?P<var>[\w$]+)\)\s*
+ rf'''(?xs){func_name}\s*=\s*function\s*\((?P<var>[\w$]+)\)\s*
# NB: The end of the regex is intentionally kept strict
- {(?P<code>.+?}\s*return\ [\w$]+.join\(""\))};''' % func_name,
+ {{(?P<code>.+?}}\s*return\ [\w$]+.join\(""\))}};''',
jscode, 'nsig function', group=('var', 'code'), default=None)
if func_code:
func_code = ([func_code[0]], func_code[1])
# cpn generation algorithm is reverse engineered from base.js.
# In fact it works even with dummy cpn.
CPN_ALPHABET = 'abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789-_'
- cpn = ''.join(CPN_ALPHABET[random.randint(0, 256) & 63] for _ in range(0, 16))
+ cpn = ''.join(CPN_ALPHABET[random.randint(0, 256) & 63] for _ in range(16))
# # more consistent results setting it to right before the end
video_length = [str(float((qs.get('len') or ['1.5'])[0]) - 1)]
webpage)
if mobj:
yield cls.url_result(mobj.group('url'), cls)
- raise cls.StopExtraction()
+ raise cls.StopExtraction
yield from super()._extract_from_webpage(url, webpage)
chapter_list = traverse_obj(
data, (
'playerOverlays', 'playerOverlayRenderer', 'decoratedPlayerBarRenderer',
- 'decoratedPlayerBarRenderer', 'playerBar', 'chapteredPlayerBarRenderer', 'chapters'
+ 'decoratedPlayerBarRenderer', 'playerBar', 'chapteredPlayerBarRenderer', 'chapters',
), expected_type=list)
return self._extract_chapters_helper(
chapter_time, chapter_title, duration)
for contents in content_list)), [])
- def _extract_heatmap_from_player_overlay(self, data):
- content_list = traverse_obj(data, (
- 'playerOverlays', 'playerOverlayRenderer', 'decoratedPlayerBarRenderer', 'decoratedPlayerBarRenderer', 'playerBar',
- 'multiMarkersPlayerBarRenderer', 'markersMap', ..., 'value', 'heatmap', 'heatmapRenderer', 'heatMarkers', {list}))
- return next(filter(None, (
- traverse_obj(contents, (..., 'heatMarkerRenderer', {
- 'start_time': ('timeRangeStartMillis', {functools.partial(float_or_none, scale=1000)}),
- 'end_time': {lambda x: (x['timeRangeStartMillis'] + x['markerDurationMillis']) / 1000},
- 'value': ('heatMarkerIntensityScoreNormalized', {float_or_none}),
- })) for contents in content_list)), None)
+ def _extract_heatmap(self, data):
+ return traverse_obj(data, (
+ 'frameworkUpdates', 'entityBatchUpdate', 'mutations',
+ lambda _, v: v['payload']['macroMarkersListEntity']['markersList']['markerType'] == 'MARKER_TYPE_HEATMAP',
+ 'payload', 'macroMarkersListEntity', 'markersList', 'markers', ..., {
+ 'start_time': ('startMillis', {functools.partial(float_or_none, scale=1000)}),
+ 'end_time': {lambda x: (int(x['startMillis']) + int(x['durationMillis'])) / 1000},
+ 'value': ('intensityScoreNormalized', {float_or_none}),
+ })) or None
+
+ def _extract_comment(self, entities, parent=None):
+ comment_entity_payload = get_first(entities, ('payload', 'commentEntityPayload', {dict}))
+ if not (comment_id := traverse_obj(comment_entity_payload, ('properties', 'commentId', {str}))):
+ return
+
+ toolbar_entity_payload = get_first(entities, ('payload', 'engagementToolbarStateEntityPayload', {dict}))
+ time_text = traverse_obj(comment_entity_payload, ('properties', 'publishedTime', {str})) or ''
+
+ return {
+ 'id': comment_id,
+ 'parent': parent or 'root',
+ **traverse_obj(comment_entity_payload, {
+ 'text': ('properties', 'content', 'content', {str}),
+ 'like_count': ('toolbar', 'likeCountA11y', {parse_count}),
+ 'author_id': ('author', 'channelId', {self.ucid_or_none}),
+ 'author': ('author', 'displayName', {str}),
+ 'author_thumbnail': ('author', 'avatarThumbnailUrl', {url_or_none}),
+ 'author_is_uploader': ('author', 'isCreator', {bool}),
+ 'author_is_verified': ('author', 'isVerified', {bool}),
+ 'author_url': ('author', 'channelCommand', 'innertubeCommand', (
+ ('browseEndpoint', 'canonicalBaseUrl'), ('commandMetadata', 'webCommandMetadata', 'url'),
+ ), {lambda x: urljoin('https://www.youtube.com', x)}),
+ }, get_all=False),
+ 'is_favorited': (None if toolbar_entity_payload is None else
+ toolbar_entity_payload.get('heartState') == 'TOOLBAR_HEART_STATE_HEARTED'),
+ '_time_text': time_text, # FIXME: non-standard, but we need a way of showing that it is an estimate.
+ 'timestamp': self._parse_time_text(time_text),
+ }
- def _extract_comment(self, comment_renderer, parent=None):
+ def _extract_comment_old(self, comment_renderer, parent=None):
comment_id = comment_renderer.get('commentId')
if not comment_id:
return
info['author_is_uploader'] = author_is_uploader
comment_abr = traverse_obj(
- comment_renderer, ('actionsButtons', 'commentActionButtonsRenderer'), expected_type=dict)
+ comment_renderer, ('actionButtons', 'commentActionButtonsRenderer'), expected_type=dict)
if comment_abr is not None:
info['is_favorited'] = 'creatorHeart' in comment_abr
- comment_ab_icontype = traverse_obj(
- comment_renderer, ('authorCommentBadge', 'authorCommentBadgeRenderer', 'icon', 'iconType'))
- if comment_ab_icontype is not None:
- info['author_is_verified'] = comment_ab_icontype in ('CHECK_CIRCLE_THICK', 'OFFICIAL_ARTIST_BADGE')
+ badges = self._extract_badges([traverse_obj(comment_renderer, 'authorCommentBadge')])
+ if self._has_badge(badges, BadgeType.VERIFIED):
+ info['author_is_verified'] = True
is_pinned = traverse_obj(comment_renderer, 'pinnedCommentBadge')
if is_pinned:
sort_text = str_or_none(sort_menu_item.get('title'))
if not sort_text:
sort_text = 'top comments' if comment_sort_index == 0 else 'newest first'
- self.to_screen('Sorting comments by %s' % sort_text.lower())
+ self.to_screen(f'Sorting comments by {sort_text.lower()}')
break
return _continuation
- def extract_thread(contents):
+ def extract_thread(contents, entity_payloads):
if not parent:
tracker['current_page_thread'] = 0
for content in contents:
if not parent and tracker['total_parent_comments'] >= max_parents:
yield
comment_thread_renderer = try_get(content, lambda x: x['commentThreadRenderer'])
- comment_renderer = get_first(
- (comment_thread_renderer, content), [['commentRenderer', ('comment', 'commentRenderer')]],
- expected_type=dict, default={})
- comment = self._extract_comment(comment_renderer, parent)
+ # old comment format
+ if not entity_payloads:
+ comment_renderer = get_first(
+ (comment_thread_renderer, content), [['commentRenderer', ('comment', 'commentRenderer')]],
+ expected_type=dict, default={})
+
+ comment = self._extract_comment_old(comment_renderer, parent)
+
+ # new comment format
+ else:
+ view_model = (
+ traverse_obj(comment_thread_renderer, ('commentViewModel', 'commentViewModel', {dict}))
+ or traverse_obj(content, ('commentViewModel', {dict})))
+ comment_keys = traverse_obj(view_model, (('commentKey', 'toolbarStateKey'), {str}))
+ if not comment_keys:
+ continue
+ entities = traverse_obj(entity_payloads, lambda _, v: v['entityKey'] in comment_keys)
+ comment = self._extract_comment(entities, parent)
+ if comment:
+ comment['is_pinned'] = traverse_obj(view_model, ('pinnedText', {str})) is not None
+
if not comment:
continue
comment_id = comment['id']
+
if comment.get('is_pinned'):
tracker['pinned_comment_ids'].add(comment_id)
# Sometimes YouTube may break and give us infinite looping comments.
# Pinned comments may appear a second time in newest first sort
# See: https://github.com/yt-dlp/yt-dlp/issues/6712
continue
- self.report_warning('Detected YouTube comments looping. Stopping comment extraction as we probably cannot get any more.')
+ self.report_warning(
+ 'Detected YouTube comments looping. Stopping comment extraction '
+ f'{"for this thread" if parent else ""} as we probably cannot get any more.')
yield
else:
tracker['seen_comment_ids'].add(comment['id'])
# Keeps track of counts across recursive calls
if not tracker:
- tracker = dict(
- running_total=0,
- est_total=None,
- current_page_thread=0,
- total_parent_comments=0,
- total_reply_comments=0,
- seen_comment_ids=set(),
- pinned_comment_ids=set()
- )
+ tracker = {
+ 'running_total': 0,
+ 'est_total': None,
+ 'current_page_thread': 0,
+ 'total_parent_comments': 0,
+ 'total_reply_comments': 0,
+ 'seen_comment_ids': set(),
+ 'pinned_comment_ids': set(),
+ }
# TODO: Deprecated
# YouTube comments have a max depth of 2
if max_depth == 1 and parent:
return
- max_comments, max_parents, max_replies, max_replies_per_thread, *_ = map(
- lambda p: int_or_none(p, default=sys.maxsize), self._configuration_arg('max_comments', ) + [''] * 4)
+ max_comments, max_parents, max_replies, max_replies_per_thread, *_ = (
+ int_or_none(p, default=sys.maxsize) for p in self._configuration_arg('max_comments') + [''] * 4)
continuation = self._extract_continuation(root_continuation_data)
note_prefix = ' Downloading comment API JSON reply thread %d %s' % (
tracker['current_page_thread'], comment_prog_str)
else:
- note_prefix = '%sDownloading comment%s API JSON page %d %s' % (
+ note_prefix = '{}Downloading comment{} API JSON page {} {}'.format(
' ' if parent else '', ' replies' if parent else '',
page_num, comment_prog_str)
check_get_keys = None
if not is_forced_continuation and not (tracker['est_total'] == 0 and tracker['running_total'] == 0):
check_get_keys = [[*continuation_items_path, ..., (
- 'commentsHeaderRenderer' if is_first_continuation else ('commentThreadRenderer', 'commentRenderer'))]]
+ 'commentsHeaderRenderer' if is_first_continuation else ('commentThreadRenderer', 'commentViewModel', 'commentRenderer'))]]
try:
response = self._extract_response(
item_id=None, query=continuation,
# Ignore incomplete data error for replies if retries didn't work.
# This is to allow any other parent comments and comment threads to be downloaded.
# See: https://github.com/yt-dlp/yt-dlp/issues/4669
- if 'incomplete data' in str(e).lower() and parent and self.get_param('ignoreerrors') is True:
- self.report_warning(
- 'Received incomplete data for a comment reply thread and retrying did not help. '
- 'Ignoring to let other comments be downloaded.')
- else:
- raise
+ if 'incomplete data' in str(e).lower() and parent:
+ if self.get_param('ignoreerrors') in (True, 'only_download'):
+ self.report_warning(
+ 'Received incomplete data for a comment reply thread and retrying did not help. '
+ 'Ignoring to let other comments be downloaded. Pass --no-ignore-errors to not ignore.')
+ return
+ else:
+ raise ExtractorError(
+ 'Incomplete data received for comment reply thread. '
+ 'Pass --ignore-errors to ignore and allow rest of comments to download.',
+ expected=True)
+ raise
is_forced_continuation = False
continuation = None
+ mutations = traverse_obj(response, ('frameworkUpdates', 'entityBatchUpdate', 'mutations', ..., {dict}))
for continuation_items in traverse_obj(response, continuation_items_path, expected_type=list, default=[]):
if is_first_continuation:
continuation = extract_header(continuation_items)
break
continue
- for entry in extract_thread(continuation_items):
+ for entry in extract_thread(continuation_items, mutations):
if not entry:
return
yield entry
context['signatureTimestamp'] = sts
return {
'playbackContext': {
- 'contentPlaybackContext': context
+ 'contentPlaybackContext': context,
},
- **cls._get_checkok_params()
+ **cls._get_checkok_params(),
}
@staticmethod
def _is_unplayable(player_response):
return traverse_obj(player_response, ('playabilityStatus', 'status')) == 'UNPLAYABLE'
- _STORY_PLAYER_PARAMS = '8AEB'
-
def _extract_player_response(self, client, video_id, master_ytcfg, player_ytcfg, player_url, initial_pr, smuggled_data):
session_index = self._extract_session_index(player_ytcfg, master_ytcfg)
yt_query = {
'videoId': video_id,
}
- if smuggled_data.get('is_story') or _split_innertube_client(client)[0] == 'android':
- yt_query['params'] = self._STORY_PLAYER_PARAMS
+
+ pp_arg = self._configuration_arg('player_params', [None], casesense=True)[0]
+ if pp_arg:
+ yt_query['params'] = pp_arg
yt_query.update(self._generate_player_context(sts))
return self._extract_response(
item_id=video_id, ep='player', query=yt_query,
ytcfg=player_ytcfg, headers=headers, fatal=True,
default_client=client,
- note='Downloading %s player API JSON' % client.replace('_', ' ').strip()
+ note='Downloading {} player API JSON'.format(client.replace('_', ' ').strip()),
) or None
def _get_requested_clients(self, url, smuggled_data):
requested_clients = []
- default = ['android', 'web']
+ android_clients = []
+ default = ['ios', 'web']
allowed_clients = sorted(
- (client for client in INNERTUBE_CLIENTS.keys() if client[:1] != '_'),
+ (client for client in INNERTUBE_CLIENTS if client[:1] != '_'),
key=lambda client: INNERTUBE_CLIENTS[client]['priority'], reverse=True)
for client in self._configuration_arg('player_client'):
- if client in allowed_clients:
- requested_clients.append(client)
- elif client == 'default':
+ if client == 'default':
requested_clients.extend(default)
elif client == 'all':
requested_clients.extend(allowed_clients)
- else:
+ elif client not in allowed_clients:
self.report_warning(f'Skipping unsupported client {client}')
+ elif client.startswith('android'):
+ android_clients.append(client)
+ else:
+ requested_clients.append(client)
+ # Force deprioritization of broken Android clients for format de-duplication
+ requested_clients.extend(android_clients)
if not requested_clients:
requested_clients = default
return orderedSet(requested_clients)
+ def _invalid_player_response(self, pr, video_id):
+ # YouTube may return a different video player response than expected.
+ # See: https://github.com/TeamNewPipe/NewPipe/issues/8713
+ if (pr_id := traverse_obj(pr, ('videoDetails', 'videoId'))) != video_id:
+ return pr_id
+
def _extract_player_responses(self, clients, video_id, webpage, master_ytcfg, smuggled_data):
initial_pr = None
if webpage:
initial_pr = self._search_json(
self._YT_INITIAL_PLAYER_RESPONSE_RE, webpage, 'initial player response', video_id, fatal=False)
+ prs = []
+ if initial_pr and not self._invalid_player_response(initial_pr, video_id):
+ # Android player_response does not have microFormats which are needed for
+ # extraction of some data. So we return the initial_pr with formats
+ # stripped out even if not requested by the user
+ # See: https://github.com/yt-dlp/yt-dlp/issues/501
+ prs.append({**initial_pr, 'streamingData': None})
+
all_clients = set(clients)
clients = clients[::-1]
- prs = []
def append_client(*client_names):
""" Append the first client name that exists but not already used """
all_clients.add(actual_client)
return
- # Android player_response does not have microFormats which are needed for
- # extraction of some data. So we return the initial_pr with formats
- # stripped out even if not requested by the user
- # See: https://github.com/yt-dlp/yt-dlp/issues/501
- if initial_pr:
- pr = dict(initial_pr)
- pr['streamingData'] = None
- prs.append(pr)
-
- last_error = None
tried_iframe_fallback = False
player_url = None
+ skipped_clients = {}
while clients:
client, base_client, variant = _split_innertube_client(clients.pop())
player_ytcfg = master_ytcfg if client == 'web' else {}
pr = initial_pr if client == 'web' and initial_pr else self._extract_player_response(
client, video_id, player_ytcfg or master_ytcfg, player_ytcfg, player_url if require_js_player else None, initial_pr, smuggled_data)
except ExtractorError as e:
- if last_error:
- self.report_warning(last_error)
- last_error = e
+ self.report_warning(e)
continue
- if pr:
- # YouTube may return a different video player response than expected.
- # See: https://github.com/TeamNewPipe/NewPipe/issues/8713
- pr_video_id = traverse_obj(pr, ('videoDetails', 'videoId'))
- if pr_video_id and pr_video_id != video_id:
- self.report_warning(
- f'Skipping player response from {client} client (got player response for video "{pr_video_id}" instead of "{video_id}")' + bug_reports_message())
- else:
- # Save client name for introspection later
- name = short_client_name(client)
- sd = traverse_obj(pr, ('streamingData', {dict})) or {}
- sd[STREAMING_DATA_CLIENT_NAME] = name
- for f in traverse_obj(sd, (('formats', 'adaptiveFormats'), ..., {dict})):
- f[STREAMING_DATA_CLIENT_NAME] = name
- prs.append(pr)
+ if pr_id := self._invalid_player_response(pr, video_id):
+ skipped_clients[client] = pr_id
+ elif pr:
+ # Save client name for introspection later
+ name = short_client_name(client)
+ sd = traverse_obj(pr, ('streamingData', {dict})) or {}
+ sd[STREAMING_DATA_CLIENT_NAME] = name
+ for f in traverse_obj(sd, (('formats', 'adaptiveFormats'), ..., {dict})):
+ f[STREAMING_DATA_CLIENT_NAME] = name
+ prs.append(pr)
# creator clients can bypass AGE_VERIFICATION_REQUIRED if logged in
if variant == 'embedded' and self._is_unplayable(pr) and self.is_authenticated:
elif not variant:
append_client(f'tv_embedded.{base_client}', f'{base_client}_embedded')
- if last_error:
- if not len(prs):
- raise last_error
- self.report_warning(last_error)
+ if skipped_clients:
+ self.report_warning(
+ f'Skipping player responses from {"/".join(skipped_clients)} clients '
+ f'(got player responses for video "{"/".join(set(skipped_clients.values()))}" instead of "{video_id}")')
+ if not prs:
+ raise ExtractorError(
+ 'All player responses are invalid. Your IP is likely being blocked by Youtube', expected=True)
+ elif not prs:
+ raise ExtractorError('Failed to extract any player response')
return prs, player_url
def _needs_live_processing(self, live_status, duration):
if (live_status == 'is_live' and self.get_param('live_from_start')
- or live_status == 'post_live' and (duration or 0) > 4 * 3600):
+ or live_status == 'post_live' and (duration or 0) > 2 * 3600):
return live_status
def _extract_formats_and_subtitles(self, streaming_data, video_id, player_url, live_status, duration):
CHUNK_SIZE = 10 << 20
+ PREFERRED_LANG_VALUE = 10
+ original_language = None
itags, stream_ids = collections.defaultdict(set), []
itag_qualities, res_qualities = {}, {0: None}
q = qualities([
# audio-only formats with unknown quality may get tagged as tiny
'tiny',
'audio_quality_ultralow', 'audio_quality_low', 'audio_quality_medium', 'audio_quality_high', # Audio only formats
- 'small', 'medium', 'large', 'hd720', 'hd1080', 'hd1440', 'hd2160', 'hd2880', 'highres'
+ 'small', 'medium', 'large', 'hd720', 'hd1080', 'hd1440', 'hd2160', 'hd2880', 'highres',
])
streaming_formats = traverse_obj(streaming_data, (..., ('formats', 'adaptiveFormats'), ...))
- all_formats = self._configuration_arg('include_duplicate_formats')
+ format_types = self._configuration_arg('formats')
+ all_formats = 'duplicate' in format_types
+ if self._configuration_arg('include_duplicate_formats'):
+ all_formats = True
+ self._downloader.deprecated_feature('[youtube] include_duplicate_formats extractor argument is deprecated. '
+ 'Use formats=duplicate extractor argument instead')
def build_fragments(f):
return LazyList({
'url': update_url_query(f['url'], {
- 'range': f'{range_start}-{min(range_start + CHUNK_SIZE - 1, f["filesize"])}'
- })
+ 'range': f'{range_start}-{min(range_start + CHUNK_SIZE - 1, f["filesize"])}',
+ }),
} for range_start in range(0, f['filesize'], CHUNK_SIZE))
for fmt in streaming_formats:
itag_qualities[itag] = quality
if height:
res_qualities[height] = quality
+
+ is_default = audio_track.get('audioIsDefault')
+ is_descriptive = 'descriptive' in (audio_track.get('displayName') or '').lower()
+ language_code = audio_track.get('id', '').split('.')[0]
+ if language_code and is_default:
+ original_language = language_code
+
# FORMAT_STREAM_TYPE_OTF(otf=1) requires downloading the init fragment
# (adding `&sq=0` to the URL) and parsing emsg box to determine the
# number of fragment that would subsequently requested with (`&sq=N`)
if not all((sc, fmt_url, player_url, encrypted_sig)):
continue
try:
- fmt_url += '&%s=%s' % (
+ fmt_url += '&{}={}'.format(
traverse_obj(sc, ('sp', -1)) or 'signature',
- self._decrypt_signature(encrypted_sig, video_id, player_url)
+ self._decrypt_signature(encrypted_sig, video_id, player_url),
)
except ExtractorError as e:
self.report_warning('Signature extraction failed: Some formats may be missing',
continue
query = parse_qs(fmt_url)
- throttled = False
if query.get('n'):
try:
decrypt_nsig = self._cached(self._decrypt_nsig, 'nsig', query['n'][0])
fmt_url = update_url_query(fmt_url, {
- 'n': decrypt_nsig(query['n'][0], video_id, player_url)
+ 'n': decrypt_nsig(query['n'][0], video_id, player_url),
})
except ExtractorError as e:
phantomjs_hint = ''
f'to workaround the issue. {PhantomJSwrapper.INSTALL_HINT}\n')
if player_url:
self.report_warning(
- f'nsig extraction failed: You may experience throttling for some formats\n{phantomjs_hint}'
+ f'nsig extraction failed: Some formats may be missing\n{phantomjs_hint}'
f' n = {query["n"][0]} ; player = {player_url}', video_id=video_id, only_once=True)
self.write_debug(e, only_once=True)
else:
self.report_warning(
- 'Cannot decrypt nsig without player_url: You may experience throttling for some formats',
+ 'Cannot decrypt nsig without player_url: Some formats may be missing',
video_id=video_id, only_once=True)
- throttled = True
+ continue
tbr = float_or_none(fmt.get('averageBitrate') or fmt.get('bitrate'), 1000)
- language_preference = (
- 10 if audio_track.get('audioIsDefault') and 10
- else -10 if 'descriptive' in (audio_track.get('displayName') or '').lower() and -10
- else -1)
+ format_duration = traverse_obj(fmt, ('approxDurationMs', {lambda x: float_or_none(x, 1000)}))
# Some formats may have much smaller duration than others (possibly damaged during encoding)
# E.g. 2-nOtRESiUc Ref: https://github.com/yt-dlp/yt-dlp/issues/2823
# Make sure to avoid false positives with small duration differences.
# E.g. __2ABJjxzNo, ySuUZEjARPY
- is_damaged = try_get(fmt, lambda x: float(x['approxDurationMs']) / duration < 500)
+ is_damaged = try_call(lambda: format_duration < duration // 2)
if is_damaged:
self.report_warning(
f'{video_id}: Some formats are possibly damaged. They will be deprioritized', only_once=True)
client_name = fmt.get(STREAMING_DATA_CLIENT_NAME)
+ # Android client formats are broken due to integrity check enforcement
+ # Ref: https://github.com/yt-dlp/yt-dlp/issues/9554
+ is_broken = client_name and client_name.startswith(short_client_name('android'))
+ if is_broken:
+ self.report_warning(
+ f'{video_id}: Android client formats are broken and may yield HTTP Error 403. '
+ 'They will be deprioritized', only_once=True)
+
+ name = fmt.get('qualityLabel') or quality.replace('audio_quality_', '') or ''
+ fps = int_or_none(fmt.get('fps')) or 0
dct = {
'asr': int_or_none(fmt.get('audioSampleRate')),
'filesize': int_or_none(fmt.get('contentLength')),
'format_id': f'{itag}{"-drc" if fmt.get("isDrc") else ""}',
'format_note': join_nonempty(
- join_nonempty(audio_track.get('displayName'),
- language_preference > 0 and ' (default)', delim=''),
- fmt.get('qualityLabel') or quality.replace('audio_quality_', ''),
- fmt.get('isDrc') and 'DRC',
+ join_nonempty(audio_track.get('displayName'), is_default and ' (default)', delim=''),
+ name, fmt.get('isDrc') and 'DRC',
try_get(fmt, lambda x: x['projectionType'].replace('RECTANGULAR', '').lower()),
try_get(fmt, lambda x: x['spatialAudioType'].replace('SPATIAL_AUDIO_TYPE_', '').lower()),
- throttled and 'THROTTLED', is_damaged and 'DAMAGED',
+ is_damaged and 'DAMAGED', is_broken and 'BROKEN',
(self.get_param('verbose') or all_formats) and client_name,
delim=', '),
# Format 22 is likely to be damaged. See https://github.com/yt-dlp/yt-dlp/issues/3372
- 'source_preference': -10 if throttled else -5 if itag == '22' else -1,
- 'fps': int_or_none(fmt.get('fps')) or None,
+ 'source_preference': (-5 if itag == '22' else -1) + (100 if 'Premium' in name else 0),
+ 'fps': fps if fps > 1 else None, # For some formats, fps is wrongly returned as 1
'audio_channels': fmt.get('audioChannels'),
'height': height,
'quality': q(quality) - bool(fmt.get('isDrc')) / 2,
'has_drm': bool(fmt.get('drmFamilies')),
'tbr': tbr,
+ 'filesize_approx': filesize_from_tbr(tbr, format_duration),
'url': fmt_url,
'width': int_or_none(fmt.get('width')),
- 'language': join_nonempty(audio_track.get('id', '').split('.')[0],
- 'desc' if language_preference < -1 else '') or None,
- 'language_preference': language_preference,
- # Strictly de-prioritize damaged and 3gp formats
- 'preference': -10 if is_damaged else -2 if itag == '17' else None,
+ 'language': join_nonempty(language_code, 'desc' if is_descriptive else '') or None,
+ 'language_preference': PREFERRED_LANG_VALUE if is_default else -10 if is_descriptive else -1,
+ # Strictly de-prioritize broken, damaged and 3gp formats
+ 'preference': -20 if is_broken else -10 if is_damaged else -2 if itag == '17' else None,
}
mime_mobj = re.match(
r'((?:[^/]+)/(?:[^;]+))(?:;\s*codecs="([^"]+)")?', fmt.get('mimeType') or '')
if single_stream and dct.get('ext'):
dct['container'] = dct['ext'] + '_dash'
- if all_formats and dct['filesize']:
+ if (all_formats or 'dashy' in format_types) and dct['filesize']:
yield {
**dct,
'format_id': f'{dct["format_id"]}-dashy' if all_formats else dct['format_id'],
'protocol': 'http_dash_segments',
'fragments': build_fragments(dct),
}
- dct['downloader_options'] = {'http_chunk_size': CHUNK_SIZE}
- yield dct
+ if all_formats or 'dashy' not in format_types:
+ dct['downloader_options'] = {'http_chunk_size': CHUNK_SIZE}
+ yield dct
needs_live_processing = self._needs_live_processing(live_status, duration)
- skip_bad_formats = not self._configuration_arg('include_incomplete_formats')
+ skip_bad_formats = 'incomplete' not in format_types
+ if self._configuration_arg('include_incomplete_formats'):
+ skip_bad_formats = False
+ self._downloader.deprecated_feature('[youtube] include_incomplete_formats extractor argument is deprecated. '
+ 'Use formats=incomplete extractor argument instead')
skip_manifests = set(self._configuration_arg('skip'))
if (not self.get_param('youtube_include_hls_manifest', True)
skip_manifests.add('dash')
if self._configuration_arg('include_live_dash'):
self._downloader.deprecated_feature('[youtube] include_live_dash extractor argument is deprecated. '
- 'Use include_incomplete_formats extractor argument instead')
+ 'Use formats=incomplete extractor argument instead')
elif skip_bad_formats and live_status == 'is_live' and needs_live_processing != 'is_live':
skip_manifests.add('dash')
elif itag:
f['format_id'] = itag
+ if original_language and f.get('language') == original_language:
+ f['format_note'] = join_nonempty(f.get('format_note'), '(default)', delim=' ')
+ f['language_preference'] = PREFERRED_LANG_VALUE
+
+ if f.get('source_preference') is None:
+ f['source_preference'] = -1
+
+ if itag in ('616', '235'):
+ f['format_note'] = join_nonempty(f.get('format_note'), 'Premium', delim=' ')
+ f['source_preference'] += 100
+
f['quality'] = q(itag_qualities.get(try_get(f, lambda f: f['format_id'].split('-')[0]), -1))
if f['quality'] == -1 and f.get('height'):
f['quality'] = q(res_qualities[min(res_qualities, key=lambda x: abs(x - f['height']))])
- if self.get_param('verbose'):
+ if self.get_param('verbose') or all_formats:
f['format_note'] = join_nonempty(f.get('format_note'), client_name, delim=', ')
+ if f.get('fps') and f['fps'] <= 1:
+ del f['fps']
+
+ if proto == 'hls' and f.get('has_drm'):
+ f['has_drm'] = 'maybe'
+ f['source_preference'] -= 5
return True
subtitles = {}
webpage = None
if 'webpage' not in self._configuration_arg('player_skip'):
query = {'bpctr': '9999999999', 'has_verified': '1'}
- if smuggled_data.get('is_story'):
- query['pp'] = self._STORY_PLAYER_PARAMS
+ pp = self._configuration_arg('player_params', [None], casesense=True)[0]
+ if pp:
+ query['pp'] = pp
webpage = self._download_webpage(
webpage_url, video_id, fatal=False, query=query)
else None)
streaming_data = traverse_obj(player_responses, (..., 'streamingData'))
*formats, subtitles = self._extract_formats_and_subtitles(streaming_data, video_id, player_url, live_status, duration)
+ if all(f.get('has_drm') for f in formats):
+ # If there are no formats that definitely don't have DRM, all have DRM
+ for f in formats:
+ f['has_drm'] = True
return live_broadcast_details, live_status, streaming_data, formats, subtitles
expected_type=str)
if multifeed_metadata_list and not smuggled_data.get('force_singlefeed'):
if self.get_param('noplaylist'):
- self.to_screen('Downloading just video %s because of --no-playlist' % video_id)
+ self.to_screen(f'Downloading just video {video_id} because of --no-playlist')
else:
entries = []
feed_ids = []
feed_title = feed_entry('title')
title = video_title
if feed_title:
- title += ' (%s)' % feed_title
+ title += f' ({feed_title})'
entries.append({
'_type': 'url_transparent',
'ie_key': 'Youtube',
'url': smuggle_url(
- '%swatch?v=%s' % (base_url, feed_data['id'][0]),
+ '{}watch?v={}'.format(base_url, feed_data['id'][0]),
{'force_singlefeed': True}),
'title': title,
})
feed_ids.append(feed_id)
self.to_screen(
- 'Downloading multifeed video (%s) - add --no-playlist to just download video %s'
- % (', '.join(feed_ids), video_id))
+ 'Downloading multifeed video ({}) - add --no-playlist to just download video {}'.format(
+ ', '.join(feed_ids), video_id))
return self.playlist_result(
entries, video_id, video_title, video_description)
# While the *1,*2,*3 thumbnails are just below their corresponding "*default" variants
# in resolution, these are not the custom thumbnail. So de-prioritize them
'maxresdefault', 'hq720', 'sddefault', 'hqdefault', '0', 'mqdefault', 'default',
- 'sd1', 'sd2', 'sd3', 'hq1', 'hq2', 'hq3', 'mq1', 'mq2', 'mq3', '1', '2', '3'
+ 'sd1', 'sd2', 'sd3', 'hq1', 'hq2', 'hq3', 'mq1', 'mq2', 'mq3', '1', '2', '3',
]
n_thumbnail_names = len(thumbnail_names)
thumbnails.extend({
for fmt in filter(is_bad_format, formats):
fmt['preference'] = (fmt.get('preference') or -1) - 10
- fmt['format_note'] = join_nonempty(fmt.get('format_note'), '(Last 4 hours)', delim=' ')
+ fmt['format_note'] = join_nonempty(fmt.get('format_note'), '(Last 2 hours)', delim=' ')
if needs_live_processing:
self._prepare_live_from_start_formats(
'playable_in_embed': get_first(playability_statuses, 'playableInEmbed'),
'live_status': live_status,
'release_timestamp': live_start_time,
- '_format_sort_fields': ( # source_preference is lower for throttled/potentially damaged formats
- 'quality', 'res', 'fps', 'hdr:12', 'source', 'vcodec:vp9.2', 'channels', 'acodec', 'lang', 'proto')
+ '_format_sort_fields': ( # source_preference is lower for potentially damaged formats
+ 'quality', 'res', 'fps', 'hdr:12', 'source', 'vcodec:vp9.2', 'channels', 'acodec', 'lang', 'proto'),
}
subtitles = {}
continue
trans_code += f'-{lang_code}'
trans_name += format_field(lang_name, None, ' from %s')
- # Add an "-orig" label to the original language so that it can be distinguished.
- # The subs are returned without "-orig" as well for compatibility
if lang_code == f'a-{orig_trans_code}':
+ # Set audio language based on original subtitles
+ for f in formats:
+ if f.get('acodec') != 'none' and not f.get('language'):
+ f['language'] = orig_trans_code
+ # Add an "-orig" label to the original language so that it can be distinguished.
+ # The subs are returned without "-orig" as well for compatibility
process_language(
automatic_captions, base_url, f'{trans_code}-orig', f'{trans_name} (Original)', {})
# Setting tlang=lang returns damaged subtitles.
for d_k, s_ks in [('start', ('start', 't')), ('end', ('end',))]:
d_k += '_time'
if d_k not in info and k in s_ks:
- info[d_k] = parse_duration(query[k][0])
+ info[d_k] = parse_duration(v[0])
# Youtube Music Auto-generated description
- if video_description:
+ if (video_description or '').strip().endswith('\nAuto-generated by YouTube.'):
+ # XXX: Causes catastrophic backtracking if description has "·"
+ # E.g. https://www.youtube.com/watch?v=DoPaAxMQoiI
+ # Simulating atomic groups: (?P<a>[^xy]+)x => (?=(?P<a>[^xy]+))(?P=a)x
+ # reduces it, but does not fully fix it. https://regex101.com/r/8Ssf2h/2
mobj = re.search(
r'''(?xs)
- (?P<track>[^·\n]+)·(?P<artist>[^\n]+)\n+
- (?P<album>[^\n]+)
+ (?=(?P<track>[^\n·]+))(?P=track)·
+ (?=(?P<artist>[^\n]+))(?P=artist)\n+
+ (?=(?P<album>[^\n]+))(?P=album)\n
(?:.+?℗\s*(?P<release_year>\d{4})(?!\d))?
(?:.+?Released on\s*:\s*(?P<release_date>\d{4}-\d{2}-\d{2}))?
- (.+?\nArtist\s*:\s*(?P<clean_artist>[^\n]+))?
- .+\nAuto-generated\ by\ YouTube\.\s*$
+ (.+?\nArtist\s*:\s*
+ (?=(?P<clean_artist>[^\n]+))(?P=clean_artist)\n
+ )?.+\nAuto-generated\ by\ YouTube\.\s*$
''', video_description)
if mobj:
release_year = mobj.group('release_year')
release_year = release_date[:4]
info.update({
'album': mobj.group('album'.strip()),
- 'artist': mobj.group('clean_artist') or ', '.join(a.strip() for a in mobj.group('artist').split('·')),
+ 'artists': ([a] if (a := mobj.group('clean_artist'))
+ else [a.strip() for a in mobj.group('artist').split('·')]),
'track': mobj.group('track').strip(),
'release_date': release_date,
'release_year': int_or_none(release_year),
info['comment_count'] = traverse_obj(initial_data, (
'contents', 'twoColumnWatchNextResults', 'results', 'results', 'contents', ..., 'itemSectionRenderer',
- 'contents', ..., 'commentsEntryPointHeaderRenderer', 'commentCount'
+ 'contents', ..., 'commentsEntryPointHeaderRenderer', 'commentCount',
), (
'engagementPanels', lambda _, v: v['engagementPanelSectionListRenderer']['panelIdentifier'] == 'comment-item-section',
- 'engagementPanelSectionListRenderer', 'header', 'engagementPanelTitleHeaderRenderer', 'contextualInfo'
+ 'engagementPanelSectionListRenderer', 'header', 'engagementPanelTitleHeaderRenderer', 'contextualInfo',
), expected_type=self._get_count, get_all=False)
try: # This will error if there is no livechat
or self._extract_chapters_from_description(video_description, duration)
or None)
- info['heatmap'] = self._extract_heatmap_from_player_overlay(initial_data)
+ info['heatmap'] = self._extract_heatmap(initial_data)
contents = traverse_obj(
initial_data, ('contents', 'twoColumnWatchNextResults', 'results', 'results', 'contents'),
if mobj:
info[mobj.group('type') + '_count'] = str_to_int(mobj.group('count'))
break
- sbr_tooltip = try_get(
- vpir, lambda x: x['sentimentBar']['sentimentBarRenderer']['tooltip'])
- if sbr_tooltip:
- like_count, dislike_count = sbr_tooltip.split(' / ')
- info.update({
- 'like_count': str_to_int(like_count),
- 'dislike_count': str_to_int(dislike_count),
- })
+
+ info['like_count'] = traverse_obj(vpir, (
+ 'videoActions', 'menuRenderer', 'topLevelButtons', ...,
+ 'segmentedLikeDislikeButtonViewModel', 'likeButtonViewModel', 'likeButtonViewModel',
+ 'toggleButtonViewModel', 'toggleButtonViewModel', 'defaultButtonViewModel',
+ 'buttonViewModel', 'accessibilityText', {parse_count}), get_all=False)
+
vcr = traverse_obj(vpir, ('viewCount', 'videoViewCountRenderer'))
if vcr:
vc = self._get_count(vcr, 'viewCount')
if mrr_title == 'Album':
info['album'] = mrr_contents_text
elif mrr_title == 'Artist':
- info['artist'] = mrr_contents_text
+ info['artists'] = [mrr_contents_text] if mrr_contents_text else None
elif mrr_title == 'Song':
info['track'] = mrr_contents_text
+ owner_badges = self._extract_badges(traverse_obj(vsir, ('owner', 'videoOwnerRenderer', 'badges')))
+ if self._has_badge(owner_badges, BadgeType.VERIFIED):
+ info['channel_is_verified'] = True
info.update({
'uploader': info.get('channel'),
'uploader_id': channel_handle,
'uploader_url': format_field(channel_handle, None, 'https://www.youtube.com/%s', default=None),
})
+
+ # We only want timestamp IF it has time precision AND a timezone
+ # Currently the uploadDate in microformats appears to be in US/Pacific timezone.
+ timestamp = (
+ parse_iso8601(get_first(microformats, 'uploadDate'), timezone=NO_DEFAULT)
+ or parse_iso8601(search_meta('uploadDate'), timezone=NO_DEFAULT)
+ )
+ upload_date = (
+ dt.datetime.fromtimestamp(timestamp, dt.timezone.utc).strftime('%Y%m%d') if timestamp else
+ (
+ unified_strdate(get_first(microformats, 'uploadDate'))
+ or unified_strdate(search_meta('uploadDate'))
+ ))
+
+ # In the case we cannot get the timestamp:
# The upload date for scheduled, live and past live streams / premieres in microformats
# may be different from the stream date. Although not in UTC, we will prefer it in this case.
# See: https://github.com/yt-dlp/yt-dlp/pull/2223#issuecomment-1008485139
- upload_date = (
- unified_strdate(get_first(microformats, 'uploadDate'))
- or unified_strdate(search_meta('uploadDate')))
- if not upload_date or (
- live_status in ('not_live', None)
- and 'no-youtube-prefer-utc-upload-date' not in self.get_param('compat_opts', [])
- ):
+ if not upload_date or (not timestamp and live_status in ('not_live', None)):
+ # this should be in UTC, as configured in the cookie/client context
upload_date = strftime_or_none(
- self._parse_time_text(self._get_text(vpir, 'dateText')), '%Y%m%d') or upload_date
+ self._parse_time_text(self._get_text(vpir, 'dateText'))) or upload_date
+
info['upload_date'] = upload_date
+ info['timestamp'] = timestamp
- for s_k, d_k in [('artist', 'creator'), ('track', 'alt_title')]:
+ if upload_date and live_status not in ('is_live', 'post_live', 'is_upcoming'):
+ # Newly uploaded videos' HLS formats are potentially problematic and need to be checked
+ upload_datetime = datetime_from_str(upload_date).replace(tzinfo=dt.timezone.utc)
+ if upload_datetime >= datetime_from_str('today-2days'):
+ for fmt in info['formats']:
+ if fmt.get('protocol') == 'm3u8_native':
+ fmt['__needs_testing'] = True
+
+ for s_k, d_k in [('artists', 'creators'), ('track', 'alt_title')]:
v = info.get(s_k)
if v:
info[d_k] = v
- badges = self._extract_badges(traverse_obj(contents, (..., 'videoPrimaryInfoRenderer'), get_all=False))
+ badges = self._extract_badges(traverse_obj(vpir, 'badges'))
is_private = (self._has_badge(badges, BadgeType.AVAILABILITY_PRIVATE)
or get_first(video_details, 'isPrivate', expected_type=bool))
def _extract_basic_item_renderer(item):
# Modified from _extract_grid_item_renderer
known_basic_renderers = (
- 'playlistRenderer', 'videoRenderer', 'channelRenderer', 'showRenderer', 'reelItemRenderer'
+ 'playlistRenderer', 'videoRenderer', 'channelRenderer', 'showRenderer', 'reelItemRenderer',
)
for key, renderer in item.items():
if not isinstance(renderer, dict):
channel_id = self.ucid_or_none(renderer['channelId'])
title = self._get_text(renderer, 'title')
channel_url = format_field(channel_id, None, 'https://www.youtube.com/channel/%s', default=None)
- # As of 2023-03-01 YouTube doesn't use the channel handles on these renderers yet.
- # However we can expect them to change that in the future.
channel_handle = self.handle_from_url(
traverse_obj(renderer, (
'navigationEndpoint', (('commandMetadata', 'webCommandMetadata', 'url'),
('browseEndpoint', 'canonicalBaseUrl')),
{str}), get_all=False))
+ if not channel_handle:
+ # As of 2023-06-01, YouTube sets subscriberCountText to the handle in search
+ channel_handle = self.handle_or_none(self._get_text(renderer, 'subscriberCountText'))
return {
'_type': 'url',
'url': channel_url,
'title': title,
'uploader_id': channel_handle,
'uploader_url': format_field(channel_handle, None, 'https://www.youtube.com/%s', default=None),
- 'channel_follower_count': self._get_count(renderer, 'subscriberCountText'),
+ # See above. YouTube sets videoCountText to the subscriber text in search channel renderers.
+ # However, in feed/channels this is set correctly to the subscriber count
+ 'channel_follower_count': traverse_obj(
+ renderer, 'subscriberCountText', 'videoCountText', expected_type=self._get_count),
'thumbnails': self._extract_thumbnails(renderer, 'thumbnail'),
- 'playlist_count': self._get_count(renderer, 'videoCountText'),
+ 'playlist_count': (
+ # videoCountText may be the subscriber count
+ self._get_count(renderer, 'videoCountText')
+ if self._get_count(renderer, 'subscriberCountText') is not None else None),
'description': self._get_text(renderer, 'descriptionSnippet'),
+ 'channel_is_verified': True if self._has_badge(
+ self._extract_badges(traverse_obj(renderer, 'ownerBadges')), BadgeType.VERIFIED) else None,
}
def _grid_entries(self, grid_renderer):
playlist_id = renderer.get('playlistId')
if playlist_id:
yield self.url_result(
- 'https://www.youtube.com/playlist?list=%s' % playlist_id,
+ f'https://www.youtube.com/playlist?list={playlist_id}',
ie=YoutubeTabIE.ie_key(), video_id=playlist_id,
video_title=title)
continue
yield from self._grid_entries(renderer)
renderer = content.get('horizontalListRenderer')
if renderer:
- # TODO
+ # TODO: handle case
pass
def _shelf_entries(self, shelf_renderer, skip_channels=False):
post_renderer, lambda x: x['backstageAttachment']['playlistRenderer']['playlistId'], str)
if playlist_id:
yield self.url_result(
- 'https://www.youtube.com/playlist?list=%s' % playlist_id,
+ f'https://www.youtube.com/playlist?list={playlist_id}',
ie=YoutubeTabIE.ie_key(), video_id=playlist_id)
# inline video links
runs = try_get(post_renderer, lambda x: x['contentText']['runs'], list) or []
'videoRenderer': lambda x: [self._video_entry(x)],
'playlistRenderer': lambda x: self._grid_entries({'items': [{'playlistRenderer': x}]}),
'channelRenderer': lambda x: self._grid_entries({'items': [{'channelRenderer': x}]}),
- 'hashtagTileRenderer': lambda x: [self._hashtag_tile_entry(x)]
+ 'hashtagTileRenderer': lambda x: [self._hashtag_tile_entry(x)],
+ 'richGridRenderer': lambda x: self._extract_entries(x, continuation_list),
}
for key, renderer in isr_content.items():
if key not in known_renderers:
or try_get(tab_content, lambda x: x['richGridRenderer'], dict) or {})
yield from extract_entries(parent_renderer)
continuation = continuation_list[0]
-
+ seen_continuations = set()
for page_num in itertools.count(1):
if not continuation:
break
+ continuation_token = continuation.get('continuation')
+ if continuation_token is not None and continuation_token in seen_continuations:
+ self.write_debug('Detected YouTube feed looping - assuming end of feed.')
+ break
+ seen_continuations.add(continuation_token)
headers = self.generate_api_headers(
ytcfg=ytcfg, account_syncid=account_syncid, visitor_data=visitor_data)
response = self._extract_response(
continuation_items = traverse_obj(response, (
('onResponseReceivedActions', 'onResponseReceivedEndpoints'), ...,
- 'appendContinuationItemsAction', 'continuationItems'
+ 'appendContinuationItemsAction', 'continuationItems',
), 'continuationContents', get_all=False)
continuation_item = traverse_obj(continuation_items, 0, None, expected_type=dict, default={})
video_items_renderer = None
- for key in continuation_item.keys():
+ for key in continuation_item:
if key not in known_renderers:
continue
func, parent_key = known_renderers[key]
else:
metadata_renderer = traverse_obj(data, ('metadata', 'playlistMetadataRenderer'), expected_type=dict)
+ # pageHeaderViewModel slow rollout began April 2024
+ page_header_view_model = traverse_obj(data, (
+ 'header', 'pageHeaderRenderer', 'content', 'pageHeaderViewModel', {dict}))
+
# We can get the uncropped banner/avatar by replacing the crop params with '=s0'
# See: https://github.com/yt-dlp/yt-dlp/issues/2237#issuecomment-1013694714
def _get_uncropped(url):
avatar_thumbnails.append({
'url': uncropped_avatar,
'id': 'avatar_uncropped',
- 'preference': 1
+ 'preference': 1,
})
- channel_banners = self._extract_thumbnails(
- data, ('header', ..., ('banner', 'mobileBanner', 'tvBanner')))
+ channel_banners = (
+ self._extract_thumbnails(data, ('header', ..., ('banner', 'mobileBanner', 'tvBanner')))
+ or self._extract_thumbnails(
+ page_header_view_model, ('banner', 'imageBannerViewModel', 'image'), final_key='sources'))
for banner in channel_banners:
banner['preference'] = -10
channel_banners.append({
'url': uncropped_banner,
'id': 'banner_uncropped',
- 'preference': -5
+ 'preference': -5,
})
# Deprecated - remove primary_sidebar_renderer when layout discontinued
or self._get_text(data, ('header', 'hashtagHeaderRenderer', 'hashtag'))
or info['id']),
'availability': self._extract_availability(data),
- 'channel_follower_count': self._get_count(data, ('header', ..., 'subscriberCountText')),
+ 'channel_follower_count': (
+ self._get_count(data, ('header', ..., 'subscriberCountText'))
+ or traverse_obj(page_header_view_model, (
+ 'metadata', 'contentMetadataViewModel', 'metadataRows', ..., 'metadataParts',
+ lambda _, v: 'subscribers' in v['text']['content'], 'text', 'content', {parse_count}, any))),
'description': try_get(metadata_renderer, lambda x: x.get('description', '')),
- 'tags': try_get(metadata_renderer or {}, lambda x: x.get('keywords', '').split()),
+ 'tags': (traverse_obj(data, ('microformat', 'microformatDataRenderer', 'tags', ..., {str}))
+ or traverse_obj(metadata_renderer, ('keywords', {lambda x: x and shlex.split(x)}, ...))),
'thumbnails': (primary_thumbnails or playlist_thumbnails) + avatar_thumbnails + channel_banners,
})
'uploader_id': channel_handle,
'uploader_url': format_field(channel_handle, None, 'https://www.youtube.com/%s', default=None),
})
+
+ channel_badges = self._extract_badges(traverse_obj(data, ('header', ..., 'badges'), get_all=False))
+ if self._has_badge(channel_badges, BadgeType.VERIFIED):
+ info['channel_is_verified'] = True
# Playlist stats is a text runs array containing [video count, view count, last updated].
# last updated or (view count and last updated) may be missing.
playlist_stats = get_first(
last_updated_unix = self._parse_time_text(
self._get_text(playlist_stats, 2) # deprecated, remove when old layout discontinued
or self._get_text(playlist_header_renderer, ('byline', 1, 'playlistBylineRenderer', 'text')))
- info['modified_date'] = strftime_or_none(last_updated_unix, '%Y%m%d')
+ info['modified_date'] = strftime_or_none(last_updated_unix)
info['view_count'] = self._get_count(playlist_stats, 1)
if info['view_count'] is None: # 0 is allowed
info.update({
'channel': self._search_regex(r'^by (.+) and \d+ others?$', owner_text, 'uploader', default=owner_text),
'channel_id': self.ucid_or_none(browse_ep.get('browseId')),
- 'uploader_id': self.handle_from_url(urljoin('https://www.youtube.com', browse_ep.get('canonicalBaseUrl')))
+ 'uploader_id': self.handle_from_url(urljoin('https://www.youtube.com', browse_ep.get('canonicalBaseUrl'))),
})
info.update({
'playlistId': playlist_id,
'videoId': watch_endpoint.get('videoId') or last_id,
'index': watch_endpoint.get('index') or len(videos),
- 'params': watch_endpoint.get('params') or 'OAE%3D'
+ 'params': watch_endpoint.get('params') or 'OAE%3D',
}
response = self._extract_response(
- item_id='%s page %d' % (playlist_id, page_num),
+ item_id=f'{playlist_id} page {page_num}',
query=query, ep='next', headers=headers, ytcfg=ytcfg,
- check_get_keys='contents'
+ check_get_keys='contents',
)
playlist = try_get(
response, lambda x: x['contents']['twoColumnWatchNextResults']['playlist']['playlist'], dict)
playlist_header_renderer = traverse_obj(data, ('header', 'playlistHeaderRenderer')) or {}
player_header_privacy = playlist_header_renderer.get('privacy')
- badges = self._extract_badges(sidebar_renderer)
+ badges = self._extract_badges(traverse_obj(sidebar_renderer, 'badges'))
# Personal playlists, when authenticated, have a dropdown visibility selector instead of a badge
privacy_setting_icon = get_first(
visitor_data=self._extract_visitor_data(data, ytcfg))
query = {
'params': 'wgYCCAA=',
- 'browseId': f'VL{item_id}'
+ 'browseId': f'VL{item_id}',
}
return self._extract_response(
item_id=item_id, headers=headers, query=query,
data = self.extract_yt_initial_data(item_id, webpage or '', fatal=fatal) or {}
except ExtractorError as e:
if isinstance(e.cause, network_exceptions):
- if not isinstance(e.cause, urllib.error.HTTPError) or e.cause.code not in (403, 429):
+ if not isinstance(e.cause, HTTPError) or e.cause.status not in (403, 429):
retry.error = e
continue
self._error_or_warning(e, fatal=fatal)
# See: https://github.com/yt-dlp/yt-dlp/issues/116
if not traverse_obj(data, 'contents', 'currentVideoEndpoint', 'onResponseReceivedActions'):
retry.error = ExtractorError('Incomplete yt initial data received')
+ data = None
continue
return webpage, data
(?!consent\.)(?:\w+\.)?
(?:
youtube(?:kids)?\.com|
- %(invidious)s
+ {invidious}
)/
(?:
(?P<channel_type>channel|c|user|browse)/|
feed/|hashtag/|
(?:playlist|watch)\?.*?\blist=
)|
- (?!(?:%(reserved_names)s)\b) # Direct URLs
+ (?!(?:{reserved_names})\b) # Direct URLs
)
(?P<id>[^/?\#&]+)
- )''' % {
- 'reserved_names': YoutubeBaseInfoExtractor._RESERVED_NAMES,
- 'invidious': '|'.join(YoutubeBaseInfoExtractor._INVIDIOUS_SITES),
- }
+ )'''.format(
+ reserved_names=YoutubeBaseInfoExtractor._RESERVED_NAMES,
+ invidious='|'.join(YoutubeBaseInfoExtractor._INVIDIOUS_SITES),
+ )
IE_NAME = 'youtube:tab'
_TESTS = [{
'playlist_mincount': 94,
'info_dict': {
'id': 'UCqj7Cz7revf5maW9g5pgNcg',
- 'title': 'Igor Kleiner - Playlists',
- 'description': 'md5:be97ee0f14ee314f1f002cf187166ee2',
- 'uploader': 'Igor Kleiner',
+ 'title': 'Igor Kleiner Ph.D. - Playlists',
+ 'description': 'md5:15d7dd9e333cb987907fcb0d604b233a',
+ 'uploader': 'Igor Kleiner Ph.D.',
'uploader_id': '@IgorDataScience',
'uploader_url': 'https://www.youtube.com/@IgorDataScience',
- 'channel': 'Igor Kleiner',
+ 'channel': 'Igor Kleiner Ph.D.',
'channel_id': 'UCqj7Cz7revf5maW9g5pgNcg',
- 'tags': ['"критическое', 'мышление"', '"наука', 'просто"', 'математика', '"анализ', 'данных"'],
+ 'tags': ['критическое мышление', 'наука просто', 'математика', 'анализ данных'],
'channel_url': 'https://www.youtube.com/channel/UCqj7Cz7revf5maW9g5pgNcg',
- 'channel_follower_count': int
+ 'channel_follower_count': int,
},
}, {
'note': 'playlists, multipage, different order',
'playlist_mincount': 94,
'info_dict': {
'id': 'UCqj7Cz7revf5maW9g5pgNcg',
- 'title': 'Igor Kleiner - Playlists',
- 'description': 'md5:be97ee0f14ee314f1f002cf187166ee2',
- 'uploader': 'Igor Kleiner',
+ 'title': 'Igor Kleiner Ph.D. - Playlists',
+ 'description': 'md5:15d7dd9e333cb987907fcb0d604b233a',
+ 'uploader': 'Igor Kleiner Ph.D.',
'uploader_id': '@IgorDataScience',
'uploader_url': 'https://www.youtube.com/@IgorDataScience',
- 'tags': ['"критическое', 'мышление"', '"наука', 'просто"', 'математика', '"анализ', 'данных"'],
+ 'tags': ['критическое мышление', 'наука просто', 'математика', 'анализ данных'],
'channel_id': 'UCqj7Cz7revf5maW9g5pgNcg',
- 'channel': 'Igor Kleiner',
+ 'channel': 'Igor Kleiner Ph.D.',
'channel_url': 'https://www.youtube.com/channel/UCqj7Cz7revf5maW9g5pgNcg',
- 'channel_follower_count': int
+ 'channel_follower_count': int,
},
}, {
'note': 'playlists, series',
'info_dict': {
'id': 'UCYO_jab_esuFRV4b17AJtAw',
'title': '3Blue1Brown - Playlists',
- 'description': 'md5:e1384e8a133307dd10edee76e875d62f',
+ 'description': 'md5:4d1da95432004b7ba840ebc895b6b4c9',
'channel_url': 'https://www.youtube.com/channel/UCYO_jab_esuFRV4b17AJtAw',
'channel': '3Blue1Brown',
'channel_id': 'UCYO_jab_esuFRV4b17AJtAw',
'uploader_url': 'https://www.youtube.com/@3blue1brown',
'uploader': '3Blue1Brown',
'tags': ['Mathematics'],
- 'channel_follower_count': int
+ 'channel_follower_count': int,
+ 'channel_is_verified': True,
},
}, {
'note': 'playlists, singlepage',
'uploader_id': '@ThirstForScience',
'channel_id': 'UCAEtajcuhQ6an9WEzY9LEMQ',
'channel_url': 'https://www.youtube.com/channel/UCAEtajcuhQ6an9WEzY9LEMQ',
- 'tags': 'count:13',
+ 'tags': 'count:12',
'channel': 'ThirstForScience',
- 'channel_follower_count': int
- }
+ 'channel_follower_count': int,
+ },
}, {
'url': 'https://www.youtube.com/c/ChristophLaimer/playlists',
'only_matching': True,
'tags': [],
'channel': 'Sergey M.',
'description': '',
- 'modified_date': '20160902',
+ 'modified_date': '20230921',
'channel_id': 'UCmlqkdCBesrv2Lak1mF_MxA',
'channel_url': 'https://www.youtube.com/channel/UCmlqkdCBesrv2Lak1mF_MxA',
- 'availability': 'public',
+ 'availability': 'unlisted',
'uploader_url': 'https://www.youtube.com/@sergeym.6173',
'uploader_id': '@sergeym.6173',
'uploader': 'Sergey M.',
'uploader_url': 'https://www.youtube.com/@lexwill718',
'channel_url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w',
'channel_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
- 'channel_follower_count': int
+ 'channel_follower_count': int,
},
'playlist_mincount': 2,
}, {
'channel_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
'uploader_url': 'https://www.youtube.com/@lexwill718',
'channel': 'lex will',
- 'channel_follower_count': int
+ 'channel_follower_count': int,
},
'playlist_mincount': 975,
}, {
'channel': 'lex will',
'tags': ['bible', 'history', 'prophesy'],
'channel_url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w',
- 'channel_follower_count': int
+ 'channel_follower_count': int,
},
'playlist_mincount': 199,
}, {
'channel_url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w',
'channel_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
'tags': ['bible', 'history', 'prophesy'],
- 'channel_follower_count': int
+ 'channel_follower_count': int,
},
'playlist_mincount': 17,
}, {
'info_dict': {
'id': 'UCYO_jab_esuFRV4b17AJtAw',
'title': '3Blue1Brown - Search - linear algebra',
- 'description': 'md5:e1384e8a133307dd10edee76e875d62f',
+ 'description': 'md5:4d1da95432004b7ba840ebc895b6b4c9',
'channel_url': 'https://www.youtube.com/channel/UCYO_jab_esuFRV4b17AJtAw',
'tags': ['Mathematics'],
'channel': '3Blue1Brown',
'uploader_url': 'https://www.youtube.com/@3blue1brown',
'uploader_id': '@3blue1brown',
'uploader': '3Blue1Brown',
+ 'channel_is_verified': True,
},
}, {
'url': 'https://invidio.us/channel/UCmlqkdCBesrv2Lak1mF_MxA',
}, {
'url': 'https://www.youtube.com/channel/UCoMdktPbSTixAyNGwb-UYkQ/live',
'info_dict': {
- 'id': 'AlTsmyW4auo', # This will keep changing
+ 'id': 'hGkQjiJLjWQ', # This will keep changing
'ext': 'mp4',
'title': str,
'upload_date': r're:\d{8}',
'uploader_url': 'https://www.youtube.com/@SkyNews',
'uploader_id': '@SkyNews',
'uploader': 'Sky News',
+ 'channel_is_verified': True,
},
'params': {
'skip_download': True,
'url': 'https://www.youtube.com/hashtag/cctv9',
'info_dict': {
'id': 'cctv9',
- 'title': '#cctv9',
+ 'title': 'cctv9 - All',
'tags': [],
},
'playlist_mincount': 300, # not consistent but should be over 300
}, {
'note': 'Requires Premium: should request additional YTM-info webpage (and have format 141) for videos in playlist',
'url': 'https://music.youtube.com/playlist?list=PLRBp0Fe2GpgmgoscNFLxNyBVSFVdYmFkq',
- 'only_matching': True
+ 'only_matching': True,
}, {
'note': '/browse/ should redirect to /channel/',
'url': 'https://music.youtube.com/browse/UC1a8OFewdjuLq6KlF8M_8Ng',
- 'only_matching': True
+ 'only_matching': True,
}, {
'note': 'VLPL, should redirect to playlist?list=PL...',
'url': 'https://music.youtube.com/browse/VLPLRBp0Fe2GpgmgoscNFLxNyBVSFVdYmFkq',
'uploader_id': '@PhilippHagemeister',
'uploader_url': 'https://www.youtube.com/@PhilippHagemeister',
'uploader': 'Philipp Hagemeister',
- }
+ },
}],
'playlist_count': 1,
'params': {'extract_flat': True},
'playlist_mincount': 50,
'params': {
'skip_download': True,
- 'extractor_args': {'youtubetab': {'skip': ['webpage']}}
+ 'extractor_args': {'youtubetab': {'skip': ['webpage']}},
},
}, {
'note': 'API Fallback: /videos tab, sorted by oldest first',
'channel_id': 'UCu6mSoMNzHQiBIOCkHUa2Aw',
'tags': [],
'channel_url': 'https://www.youtube.com/channel/UCu6mSoMNzHQiBIOCkHUa2Aw',
- 'channel_follower_count': int
+ 'channel_follower_count': int,
},
'playlist_mincount': 650,
'params': {
'skip_download': True,
- 'extractor_args': {'youtubetab': {'skip': ['webpage']}}
+ 'extractor_args': {'youtubetab': {'skip': ['webpage']}},
},
'skip': 'Query for sorting no longer works',
}, {
'playlist_mincount': 101,
'params': {
'skip_download': True,
- 'extractor_args': {'youtubetab': {'skip': ['webpage']}}
+ 'extractor_args': {'youtubetab': {'skip': ['webpage']}},
},
'expected_warnings': ['YouTube Music is not directly supported', r'[Uu]navailable videos (are|will be) hidden'],
}, {
'note': 'non-standard redirect to regional channel',
'url': 'https://www.youtube.com/channel/UCwVVpHQ2Cs9iGJfpdFngePQ',
- 'only_matching': True
+ 'only_matching': True,
}, {
'note': 'collaborative playlist (uploader name in the form "by <uploader> and x other(s)")',
'url': 'https://www.youtube.com/playlist?list=PLx-_-Kk4c89oOHEDQAojOXzEzemXxoqx6',
'uploader_id': '@pukkandan',
'uploader': 'pukkandan',
},
- 'playlist_mincount': 2
+ 'playlist_mincount': 2,
}, {
'note': 'translated tab name',
'url': 'https://www.youtube.com/channel/UCiu-3thuViMebBjw_5nWYrA/playlists',
'channel_follower_count': int,
'channel_id': 'UCK9V2B22uJYu3N7eR_BT9QA',
'channel_url': 'https://www.youtube.com/channel/UCK9V2B22uJYu3N7eR_BT9QA',
- 'description': 'md5:e56b74b5bb7e9c701522162e9abfb822',
+ 'description': 'md5:49809d8bf9da539bc48ed5d1f83c33f2',
'channel': 'Polka Ch. 尾丸ポルカ',
'tags': 'count:35',
'uploader_url': 'https://www.youtube.com/@OmaruPolka',
'uploader': 'Polka Ch. 尾丸ポルカ',
'uploader_id': '@OmaruPolka',
+ 'channel_is_verified': True,
},
'playlist_count': 3,
}, {
'info_dict': {
'id': 'UC0intLFzLaudFG-xAvUEO-A',
'title': 'Not Just Bikes - Shorts',
- 'tags': 'count:12',
+ 'tags': 'count:10',
'channel_url': 'https://www.youtube.com/channel/UC0intLFzLaudFG-xAvUEO-A',
- 'description': 'md5:26bc55af26855a608a5cf89dfa595c8d',
+ 'description': 'md5:5e82545b3a041345927a92d0585df247',
'channel_follower_count': int,
'channel_id': 'UC0intLFzLaudFG-xAvUEO-A',
'channel': 'Not Just Bikes',
'uploader_url': 'https://www.youtube.com/@NotJustBikes',
'uploader': 'Not Just Bikes',
'uploader_id': '@NotJustBikes',
+ 'channel_is_verified': True,
},
'playlist_mincount': 10,
}, {
# No uploads and no UCID given. Should fail with no uploads error
# See test_youtube_lists
'url': 'https://www.youtube.com/news',
- 'only_matching': True
+ 'only_matching': True,
}, {
# No videos tab but has a shorts tab
'url': 'https://www.youtube.com/c/TKFShorts',
'title': 'SHORT short',
'view_count': int,
'thumbnails': list,
- }
+ },
}],
'params': {'extract_flat': True},
}, {
'url': 'https://www.youtube.com/channel/UCQvWX73GQygcwXOTSf_VDVg/live',
'info_dict': {
'id': 'UCQvWX73GQygcwXOTSf_VDVg',
- 'title': 'UCQvWX73GQygcwXOTSf_VDVg - Live', # TODO, should be Minecraft - Live or Minecraft - Topic - Live
- 'tags': []
+ 'title': 'UCQvWX73GQygcwXOTSf_VDVg - Live', # TODO: should be Minecraft - Live or Minecraft - Topic - Live
+ 'tags': [],
},
'playlist': [{
'info_dict': {
'channel': str,
'uploader': str,
'uploader_url': str,
- 'uploader_id': str
- }
+ 'uploader_id': str,
+ 'channel_is_verified': bool, # this will keep changing
+ },
}],
'params': {'extract_flat': True, 'playlist_items': '1'},
- 'playlist_mincount': 1
+ 'playlist_mincount': 1,
}, {
# Channel renderer metadata. Contains number of videos on the channel
'url': 'https://www.youtube.com/channel/UCiu-3thuViMebBjw_5nWYrA/channels',
'uploader': 'PewDiePie',
'uploader_url': 'https://www.youtube.com/@PewDiePie',
'uploader_id': '@PewDiePie',
- }
+ 'channel_is_verified': True,
+ },
}],
'params': {'extract_flat': True},
}, {
'url': 'https://www.youtube.com/@3blue1brown/about',
'info_dict': {
- 'id': 'UCYO_jab_esuFRV4b17AJtAw',
+ 'id': '@3blue1brown',
'tags': ['Mathematics'],
- 'title': '3Blue1Brown - About',
+ 'title': '3Blue1Brown',
'channel_follower_count': int,
'channel_id': 'UCYO_jab_esuFRV4b17AJtAw',
'channel': '3Blue1Brown',
- 'view_count': int,
'channel_url': 'https://www.youtube.com/channel/UCYO_jab_esuFRV4b17AJtAw',
- 'description': 'md5:e1384e8a133307dd10edee76e875d62f',
+ 'description': 'md5:4d1da95432004b7ba840ebc895b6b4c9',
'uploader_url': 'https://www.youtube.com/@3blue1brown',
'uploader_id': '@3blue1brown',
'uploader': '3Blue1Brown',
+ 'channel_is_verified': True,
},
'playlist_count': 0,
}, {
'channel': '99 Percent Invisible',
'uploader_id': '@99percentinvisiblepodcast',
},
- 'playlist_count': 1,
+ 'playlist_count': 0,
}, {
# Releases tab, with rich entry playlistRenderers (same as Podcasts tab)
'url': 'https://www.youtube.com/@AHimitsu/releases',
'uploader_id': '@AHimitsu',
'uploader': 'A Himitsu',
'channel_id': 'UCgFwu-j5-xNJml2FtTrrB3A',
- 'tags': 'count:16',
+ 'tags': 'count:12',
'description': 'I make music',
'channel_url': 'https://www.youtube.com/channel/UCgFwu-j5-xNJml2FtTrrB3A',
'channel_follower_count': int,
+ 'channel_is_verified': True,
},
'playlist_mincount': 10,
+ }, {
+ # Playlist with only shorts, shown as reel renderers
+ # FIXME: future: YouTube currently doesn't give continuation for this,
+ # may do in future.
+ 'url': 'https://www.youtube.com/playlist?list=UUxqPAgubo4coVn9Lx1FuKcg',
+ 'info_dict': {
+ 'id': 'UUxqPAgubo4coVn9Lx1FuKcg',
+ 'channel_url': 'https://www.youtube.com/channel/UCxqPAgubo4coVn9Lx1FuKcg',
+ 'view_count': int,
+ 'uploader_id': '@BangyShorts',
+ 'description': '',
+ 'uploader_url': 'https://www.youtube.com/@BangyShorts',
+ 'channel_id': 'UCxqPAgubo4coVn9Lx1FuKcg',
+ 'channel': 'Bangy Shorts',
+ 'uploader': 'Bangy Shorts',
+ 'tags': [],
+ 'availability': 'public',
+ 'modified_date': r're:\d{8}',
+ 'title': 'Uploads from Bangy Shorts',
+ },
+ 'playlist_mincount': 100,
+ 'expected_warnings': [r'[Uu]navailable videos (are|will be) hidden'],
+ }, {
+ 'note': 'Tags containing spaces',
+ 'url': 'https://www.youtube.com/channel/UC7_YxT-KID8kRbqZo7MyscQ',
+ 'playlist_count': 3,
+ 'info_dict': {
+ 'id': 'UC7_YxT-KID8kRbqZo7MyscQ',
+ 'channel': 'Markiplier',
+ 'channel_id': 'UC7_YxT-KID8kRbqZo7MyscQ',
+ 'title': 'Markiplier',
+ 'channel_follower_count': int,
+ 'description': 'md5:0c010910558658824402809750dc5d97',
+ 'uploader_id': '@markiplier',
+ 'uploader_url': 'https://www.youtube.com/@markiplier',
+ 'uploader': 'Markiplier',
+ 'channel_url': 'https://www.youtube.com/channel/UC7_YxT-KID8kRbqZo7MyscQ',
+ 'channel_is_verified': True,
+ 'tags': ['markiplier', 'comedy', 'gaming', 'funny videos', 'funny moments',
+ 'sketch comedy', 'laughing', 'lets play', 'challenge videos', 'hilarious',
+ 'challenges', 'sketches', 'scary games', 'funny games', 'rage games',
+ 'mark fischbach'],
+ },
}]
@classmethod
def _has_tab(self, tabs, tab_id):
return any(self._extract_tab_id_and_name(tab)[0] == tab_id for tab in tabs)
+ def _empty_playlist(self, item_id, data):
+ return self.playlist_result([], item_id, **self._extract_metadata_from_tabs(item_id, data))
+
@YoutubeTabBaseInfoExtractor.passthrough_smuggled_data
def _real_extract(self, url, smuggled_data):
item_id = self._match_id(url)
# Handle both video/playlist URLs
qs = parse_qs(url)
- video_id, playlist_id = [traverse_obj(qs, (key, 0)) for key in ('v', 'list')]
+ video_id, playlist_id = (traverse_obj(qs, (key, 0)) for key in ('v', 'list'))
if not video_id and mobj['not_channel'].startswith('watch'):
if not playlist_id:
# If there is neither video or playlist ids, youtube redirects to home page, which is undesirable
selected_tab_id, selected_tab_name = self._extract_tab_id_and_name(selected_tab, url) # NB: Name may be translated
self.write_debug(f'Selected tab: {selected_tab_id!r} ({selected_tab_name}), Requested tab: {original_tab_id!r}')
+ # /about is no longer a tab
+ if original_tab_id == 'about':
+ return self._empty_playlist(item_id, data)
+
if not original_tab_id and selected_tab_name:
self.to_screen('Downloading all uploads of the channel. '
'To download only the videos in a specific tab, pass the tab\'s URL')
if not extra_tabs and selected_tab_id != 'videos':
# Channel does not have streams, shorts or videos tabs
if item_id[:2] != 'UC':
- raise ExtractorError('This channel has no uploads', expected=True)
+ return self._empty_playlist(item_id, data)
# Topic channels don't have /videos. Use the equivalent playlist instead
pl_id = f'UU{item_id[2:]}'
try:
data, ytcfg = self._extract_data(pl_url, pl_id, ytcfg=ytcfg, fatal=True, webpage_fatal=True)
except ExtractorError:
- raise ExtractorError('This channel has no uploads', expected=True)
+ return self._empty_playlist(item_id, data)
else:
item_id, url = pl_id, pl_url
self.to_screen(
(?:
(?:
youtube(?:kids)?\.com|
- %(invidious)s
+ {invidious}
)
/.*?\?.*?\blist=
)?
- (?P<id>%(playlist_id)s)
- )''' % {
- 'playlist_id': YoutubeBaseInfoExtractor._PLAYLIST_ID_RE,
- 'invidious': '|'.join(YoutubeBaseInfoExtractor._INVIDIOUS_SITES),
- }
+ (?P<id>{playlist_id})
+ )'''.format(
+ playlist_id=YoutubeBaseInfoExtractor._PLAYLIST_ID_RE,
+ invidious='|'.join(YoutubeBaseInfoExtractor._INVIDIOUS_SITES),
+ )
IE_NAME = 'youtube:playlist'
_TESTS = [{
'note': 'issue #673',
'uploader_url': 'https://www.youtube.com/@milan5503',
'availability': 'public',
},
- 'expected_warnings': [r'[Uu]navailable videos? (is|are|will be) hidden'],
+ 'expected_warnings': [r'[Uu]navailable videos? (is|are|will be) hidden', 'Retrying', 'Giving up'],
}, {
'url': 'http://www.youtube.com/embed/_xDOZElKyNU?list=PLsyOSbh5bs16vubvKePAQ1x3PhKavfBIl',
'playlist_mincount': 455,
class YoutubeYtBeIE(InfoExtractor):
IE_DESC = 'youtu.be'
- _VALID_URL = r'https?://youtu\.be/(?P<id>[0-9A-Za-z_-]{11})/*?.*?\blist=(?P<playlist_id>%(playlist_id)s)' % {'playlist_id': YoutubeBaseInfoExtractor._PLAYLIST_ID_RE}
+ _VALID_URL = rf'https?://youtu\.be/(?P<id>[0-9A-Za-z_-]{{11}})/*?.*?\blist=(?P<playlist_id>{YoutubeBaseInfoExtractor._PLAYLIST_ID_RE})'
_TESTS = [{
'url': 'https://youtu.be/yeWKywCrFtk?list=PL2qgrgXsNUG5ig9cat4ohreBjYLAPC0J5',
'info_dict': {
'availability': 'public',
'duration': 59,
'comment_count': int,
- 'channel_follower_count': int
+ 'channel_follower_count': int,
},
'params': {
'noplaylist': True,
IE_DESC = 'YouTube search'
IE_NAME = 'youtube:search'
_SEARCH_KEY = 'ytsearch'
- _SEARCH_PARAMS = 'EgIQAQ%3D%3D' # Videos only
+ _SEARCH_PARAMS = 'EgIQAfABAQ==' # Videos only
_TESTS = [{
'url': 'ytsearch5:youtube-dl test video',
'playlist_count': 5,
'info_dict': {
'id': 'youtube-dl test video',
'title': 'youtube-dl test video',
- }
+ },
+ }, {
+ 'note': 'Suicide/self-harm search warning',
+ 'url': 'ytsearch1:i hate myself and i wanna die',
+ 'playlist_count': 1,
+ 'info_dict': {
+ 'id': 'i hate myself and i wanna die',
+ 'title': 'i hate myself and i wanna die',
+ },
}]
IE_NAME = YoutubeSearchIE.IE_NAME + ':date'
_SEARCH_KEY = 'ytsearchdate'
IE_DESC = 'YouTube search, newest videos first'
- _SEARCH_PARAMS = 'CAISAhAB' # Videos only, sorted by date
+ _SEARCH_PARAMS = 'CAISAhAB8AEB' # Videos only, sorted by date
_TESTS = [{
'url': 'ytsearchdate5:youtube-dl test video',
'playlist_count': 5,
'info_dict': {
'id': 'youtube-dl test video',
'title': 'youtube-dl test video',
- }
+ },
}]
'info_dict': {
'id': 'youtube-dl test video',
'title': 'youtube-dl test video',
- }
+ },
}, {
'url': 'https://www.youtube.com/results?search_query=python&sp=EgIQAg%253D%253D',
'playlist_mincount': 5,
'info_dict': {
'id': 'python',
'title': 'python',
- }
+ },
}, {
'url': 'https://www.youtube.com/results?search_query=%23cats',
'playlist_mincount': 1,
'description': 'md5:4ae48dfa9505ffc307dad26342d06bfc',
'title': 'Kurzgesagt – In a Nutshell',
'channel_id': 'UCsXVk37bltHxD1rDPwtNM8Q',
- 'playlist_count': int, # XXX: should have a way of saying > 1
+ # No longer available for search as it is set to the handle.
+ # 'playlist_count': int,
'channel_url': 'https://www.youtube.com/channel/UCsXVk37bltHxD1rDPwtNM8Q',
'thumbnails': list,
'uploader_id': '@kurzgesagt',
'uploader_url': 'https://www.youtube.com/@kurzgesagt',
'uploader': 'Kurzgesagt – In a Nutshell',
- }
+ 'channel_is_verified': True,
+ 'channel_follower_count': int,
+ },
}],
'params': {'extract_flat': True, 'playlist_items': '1'},
'playlist_mincount': 1,
'info_dict': {
'id': 'royalty free music',
'title': 'royalty free music',
- }
+ },
}, {
'url': 'https://music.youtube.com/search?q=royalty+free+music&sp=EgWKAQIIAWoKEAoQAxAEEAkQBQ%3D%3D',
'playlist_mincount': 30,
'id': 'royalty free music - songs',
'title': 'royalty free music - songs',
},
- 'params': {'extract_flat': 'in_playlist'}
+ 'params': {'extract_flat': 'in_playlist'},
}, {
'url': 'https://music.youtube.com/search?q=royalty+free+music#community+playlists',
'playlist_mincount': 30,
'id': 'royalty free music - community playlists',
'title': 'royalty free music - community playlists',
},
- 'params': {'extract_flat': 'in_playlist'}
+ 'params': {'extract_flat': 'in_playlist'},
}]
_SECTIONS = {
if params:
section = next((k for k, v in self._SECTIONS.items() if v == params), params)
else:
- section = urllib.parse.unquote_plus((url.split('#') + [''])[1]).lower()
+ section = urllib.parse.unquote_plus(([*url.split('#'), ''])[1]).lower()
params = self._SECTIONS.get(section)
if not params:
section = None
YoutubeBaseInfoExtractor._check_login_required(self)
@classproperty
- def IE_NAME(self):
- return f'youtube:{self._FEED_NAME}'
+ def IE_NAME(cls):
+ return f'youtube:{cls._FEED_NAME}'
def _real_extract(self, url):
return self.url_result(
}]
-class YoutubeStoriesIE(InfoExtractor):
- IE_DESC = 'YouTube channel stories; "ytstories:" prefix'
- IE_NAME = 'youtube:stories'
- _VALID_URL = r'ytstories:UC(?P<id>[A-Za-z0-9_-]{21}[AQgw])$'
- _TESTS = [{
- 'url': 'ytstories:UCwFCb4jeqaKWnciAYM-ZVHg',
- 'only_matching': True,
- }]
-
- def _real_extract(self, url):
- playlist_id = f'RLTD{self._match_id(url)}'
- return self.url_result(
- smuggle_url(f'https://www.youtube.com/playlist?list={playlist_id}&playnext=1', {'is_story': True}),
- ie=YoutubeTabIE, video_id=playlist_id)
-
-
class YoutubeShortsAudioPivotIE(InfoExtractor):
IE_DESC = 'YouTube Shorts audio pivot (Shorts using audio of a given video)'
IE_NAME = 'youtube:shorts:pivot:audio'
'live_status': 'not_live',
'channel_follower_count': int,
'chapters': 'count:20',
- }
+ 'comment_count': int,
+ 'heatmap': 'count:100',
+ },
}]
def _real_extract(self, url):
'channel': 'さなちゃんねる',
'description': 'md5:6aebf95cc4a1d731aebc01ad6cc9806d',
'uploader': 'さなちゃんねる',
+ 'channel_is_verified': True,
+ 'heatmap': 'count:100',
},
'add_ie': ['Youtube'],
'params': {'skip_download': 'Youtube'},