mimetype2ext,
parse_codecs,
parse_duration,
- # qualities,
+ qualities,
remove_start,
smuggle_url,
str_or_none,
_TFA_URL = 'https://accounts.google.com/_/signin/challenge?hl=en&TL={0}'
_RESERVED_NAMES = (
- r'embed|e|watch_popup|channel|c|user|playlist|watch|w|v|movies|results|shared|'
+ r'embed|e|watch_popup|channel|c|user|playlist|watch|w|v|movies|results|shared|hashtag|'
r'storefront|oops|index|account|reporthistory|t/terms|about|upload|signin|logout|'
r'feed/(?:watch_later|history|subscriptions|library|trending|recommended)')
(?:www\.)?yourepeat\.com/|
tube\.majestyc\.net/|
# Invidious instances taken from https://github.com/omarroth/invidious/wiki/Invidious-Instances
+ (?:www\.)?invidious\.pussthecat\.org/|
+ (?:www\.)?invidious\.048596\.xyz/|
+ (?:www\.)?invidious\.zee\.li/|
+ (?:www\.)?vid\.puffyan\.us/|
+ (?:(?:www|au)\.)?ytprivate\.com/|
+ (?:www\.)?invidious\.namazso\.eu/|
+ (?:www\.)?invidious\.ethibox\.fr/|
+ (?:www\.)?inv\.skyn3t\.in/|
+ (?:www\.)?invidious\.himiko\.cloud/|
+ (?:www\.)?w6ijuptxiku4xpnnaetxvnkc5vqcdu7mgns2u77qefoixi63vbvnpnqd\.onion/|
+ (?:www\.)?kbjggqkzv65ivcqj6bumvp337z6264huv5kpkwuv6gu5yjiskvan7fad\.onion/|
+ (?:www\.)?invidious\.3o7z6yfxhbw7n3za4rss6l434kmv55cgw2vuziwuigpwegswvwzqipyd\.onion/|
+ (?:www\.)?grwp24hodrefzvjjuccrkw3mjq4tzhaaq32amf33dzpmuxe7ilepcmad\.onion/|
(?:(?:www|dev)\.)?invidio\.us/|
(?:(?:www|no)\.)?invidiou\.sh/|
(?:(?:www|fi)\.)?invidious\.snopyta\.org/|
(?:www\.)?yt\.lelux\.fi/|
(?:www\.)?invidious\.ggc-project\.de/|
(?:www\.)?yt\.maisputain\.ovh/|
- (?:www\.)?invidious\.13ad\.de/|
(?:www\.)?invidious\.toot\.koeln/|
(?:www\.)?invidious\.fdn\.fr/|
(?:www\.)?watch\.nettohikari\.com/|
(?(1).+)? # if we found the ID, everything can follow
$""" % {'playlist_id': YoutubeBaseInfoExtractor._PLAYLIST_ID_RE}
_PLAYER_INFO_RE = (
- r'/(?P<id>[a-zA-Z0-9_-]{8,})/player_ias\.vflset(?:/[a-zA-Z]{2,3}_[a-zA-Z]{2,3})?/base\.js$',
+ r'/s/player/(?P<id>[a-zA-Z0-9_-]{8,})/player',
+ r'/(?P<id>[a-zA-Z0-9_-]{8,})/player(?:_ias\.vflset(?:/[a-zA-Z]{2,3}_[a-zA-Z]{2,3})?|-plasma-ias-(?:phone|tablet)-[a-z]{2}_[A-Z]{2}\.vflset)/base\.js$',
r'\b(?P<id>vfl[a-zA-Z0-9_-]+)\b.*?\.js$',
)
_formats = {
'uploader': 'AfrojackVEVO',
'uploader_id': 'AfrojackVEVO',
'upload_date': '20131011',
+ 'abr': 129.495,
},
'params': {
'youtube_include_dash_manifest': True,
'only_matching': True,
},
{
- # Age-gated video only available with authentication (unavailable
- # via embed page workaround)
- 'url': 'XgnwCQzjau8',
- 'only_matching': True,
+ # https://github.com/ytdl-org/youtube-dl/pull/28094
+ 'url': 'OtqTfy26tG0',
+ 'info_dict': {
+ 'id': 'OtqTfy26tG0',
+ 'ext': 'mp4',
+ 'title': 'Burn Out',
+ 'description': 'md5:8d07b84dcbcbfb34bc12a56d968b6131',
+ 'upload_date': '20141120',
+ 'uploader': 'The Cinematic Orchestra - Topic',
+ 'uploader_id': 'UCIzsJBIyo8hhpFm1NK0uLgw',
+ 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UCIzsJBIyo8hhpFm1NK0uLgw',
+ 'artist': 'The Cinematic Orchestra',
+ 'track': 'Burn Out',
+ 'album': 'Every Day',
+ 'release_data': None,
+ 'release_year': None,
+ },
+ 'params': {
+ 'skip_download': True,
+ },
},
]
funcname = self._search_regex(
(r'\b[cs]\s*&&\s*[adf]\.set\([^,]+\s*,\s*encodeURIComponent\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\(',
r'\b[a-zA-Z0-9]+\s*&&\s*[a-zA-Z0-9]+\.set\([^,]+\s*,\s*encodeURIComponent\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\(',
+ r'\bm=(?P<sig>[a-zA-Z0-9$]{2})\(decodeURIComponent\(h\.s\)\)',
+ r'\bc&&\(c=(?P<sig>[a-zA-Z0-9$]{2})\(decodeURIComponent\(c\)\)',
+ r'(?:\b|[^a-zA-Z0-9$])(?P<sig>[a-zA-Z0-9$]{2})\s*=\s*function\(\s*a\s*\)\s*{\s*a\s*=\s*a\.split\(\s*""\s*\);[a-zA-Z0-9$]{2}\.[a-zA-Z0-9$]{2}\(a,\d+\)',
r'(?:\b|[^a-zA-Z0-9$])(?P<sig>[a-zA-Z0-9$]{2})\s*=\s*function\(\s*a\s*\)\s*{\s*a\s*=\s*a\.split\(\s*""\s*\)',
r'(?P<sig>[a-zA-Z0-9$]+)\s*=\s*function\(\s*a\s*\)\s*{\s*a\s*=\s*a\.split\(\s*""\s*\)',
# Obsolete patterns
url, smuggled_data = unsmuggle_url(url, {})
video_id = self._match_id(url)
base_url = self.http_scheme() + '//www.youtube.com/'
- webpage_url = base_url + 'watch?v=' + video_id
+ webpage_url = base_url + 'watch?v=' + video_id + '&has_verified=1'
webpage = self._download_webpage(webpage_url, video_id, fatal=False)
player_response = None
formats = []
itags = []
+ itag_qualities = {}
player_url = None
- # q = qualities(['tiny', 'small', 'medium', 'large', 'hd720', 'hd1080', 'hd1440', 'hd2160', 'hd2880', 'highres'])
+ q = qualities(['tiny', 'small', 'medium', 'large', 'hd720', 'hd1080', 'hd1440', 'hd2160', 'hd2880', 'highres'])
streaming_data = player_response.get('streamingData') or {}
streaming_formats = streaming_data.get('formats') or []
streaming_formats.extend(streaming_data.get('adaptiveFormats') or [])
if fmt.get('targetDurationSec') or fmt.get('drmFamilies'):
continue
+ itag = str_or_none(fmt.get('itag'))
+ quality = fmt.get('quality')
+ if itag and quality:
+ itag_qualities[itag] = quality
+ # FORMAT_STREAM_TYPE_OTF(otf=1) requires downloading the init fragment
+ # (adding `&sq=0` to the URL) and parsing emsg box to determine the
+ # number of fragment that would subsequently requested with (`&sq=N`)
+ if fmt.get('type') == 'FORMAT_STREAM_TYPE_OTF':
+ continue
+
fmt_url = fmt.get('url')
if not fmt_url:
sc = compat_parse_qs(fmt.get('signatureCipher'))
sp = try_get(sc, lambda x: x['sp'][0]) or 'signature'
fmt_url += '&' + sp + '=' + signature
- itag = str_or_none(fmt.get('itag'))
if itag:
itags.append(itag)
- quality = fmt.get('quality')
+ tbr = float_or_none(
+ fmt.get('averageBitrate') or fmt.get('bitrate'), 1000)
dct = {
'asr': int_or_none(fmt.get('audioSampleRate')),
'filesize': int_or_none(fmt.get('contentLength')),
'format_note': fmt.get('qualityLabel') or quality,
'fps': int_or_none(fmt.get('fps')),
'height': int_or_none(fmt.get('height')),
- # 'quality': q(quality), # This does not correctly reflect the overall quality of the format
- 'tbr': float_or_none(fmt.get(
- 'averageBitrate') or fmt.get('bitrate'), 1000),
+ 'quality': q(quality),
+ 'tbr': tbr,
'url': fmt_url,
'width': fmt.get('width'),
}
if mobj:
dct['ext'] = mimetype2ext(mobj.group(1))
dct.update(parse_codecs(mobj.group(2)))
- if dct.get('acodec') == 'none' or dct.get('vcodec') == 'none':
+ no_audio = dct.get('acodec') == 'none'
+ no_video = dct.get('vcodec') == 'none'
+ if no_audio:
+ dct['vbr'] = tbr
+ if no_video:
+ dct['abr'] = tbr
+ if no_audio or no_video:
dct['downloader_options'] = {
# Youtube throttles chunks >~10M
'http_chunk_size': 10485760,
if self._downloader.params.get('youtube_include_dash_manifest'):
dash_manifest_url = streaming_data.get('dashManifestUrl')
if dash_manifest_url:
- dash_formats = []
for f in self._extract_mpd_formats(
dash_manifest_url, video_id, fatal=False):
+ itag = f['format_id']
+ if itag in itags:
+ continue
+ if itag in itag_qualities:
+ # Not actually usefull since the sorting is already done with "quality,res,fps,codec"
+ # but kept to maintain feature parity (and code similarity) with youtube-dl
+ # Remove if this causes any issues with sorting in future
+ f['quality'] = q(itag_qualities[itag])
filesize = int_or_none(self._search_regex(
r'/clen/(\d+)', f.get('fragment_base_url')
or f['url'], 'file size', default=None))
if filesize:
f['filesize'] = filesize
- dash_formats.append(f)
- # Until further investigation prefer DASH formats as non-DASH
- # may not be available (see [1])
- # 1. https://github.com/ytdl-org/youtube-dl/issues/28070
- if dash_formats:
- dash_formats_keys = [f['format_id'] for f in dash_formats]
- formats = [f for f in formats if f['format_id'] not in dash_formats_keys]
- formats.extend(dash_formats)
+ formats.append(f)
if not formats:
- if streaming_data.get('licenseInfos'):
+ if not self._downloader.params.get('allow_unplayable_formats') and streaming_data.get('licenseInfos'):
raise ExtractorError(
'This video is DRM protected.', expected=True)
pemr = try_get(
'artist': mobj.group('clean_artist') or ', '.join(a.strip() for a in mobj.group('artist').split('·')),
'track': mobj.group('track').strip(),
'release_date': release_date,
- 'release_year': int(release_year),
+ 'release_year': int_or_none(release_year),
})
initial_data = None
(?:
(?:channel|c|user)/|
(?P<not_channel>
- feed/|
+ feed/|hashtag/|
(?:playlist|watch)\?.*?\blist=
)|
(?!(?:%s)\b) # Direct URLs
'id': 'UCqj7Cz7revf5maW9g5pgNcg',
'title': 'Игорь Клейнер - Playlists',
'description': 'md5:be97ee0f14ee314f1f002cf187166ee2',
+ 'uploader': 'Игорь Клейнер',
+ 'uploader_id': 'UCqj7Cz7revf5maW9g5pgNcg',
},
}, {
# playlists, multipage, different order
'id': 'UCqj7Cz7revf5maW9g5pgNcg',
'title': 'Игорь Клейнер - Playlists',
'description': 'md5:be97ee0f14ee314f1f002cf187166ee2',
+ 'uploader_id': 'UCqj7Cz7revf5maW9g5pgNcg',
+ 'uploader': 'Игорь Клейнер',
},
}, {
# playlists, singlepage
'id': 'UCAEtajcuhQ6an9WEzY9LEMQ',
'title': 'ThirstForScience - Playlists',
'description': 'md5:609399d937ea957b0f53cbffb747a14c',
+ 'uploader': 'ThirstForScience',
+ 'uploader_id': 'UCAEtajcuhQ6an9WEzY9LEMQ',
}
}, {
'url': 'https://www.youtube.com/c/ChristophLaimer/playlists',
'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
'title': 'lex will - Home',
'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488',
+ 'uploader': 'lex will',
+ 'uploader_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
},
'playlist_mincount': 2,
}, {
'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
'title': 'lex will - Videos',
'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488',
+ 'uploader': 'lex will',
+ 'uploader_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
},
'playlist_mincount': 975,
}, {
'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
'title': 'lex will - Videos',
'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488',
+ 'uploader': 'lex will',
+ 'uploader_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
},
'playlist_mincount': 199,
}, {
'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
'title': 'lex will - Playlists',
'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488',
+ 'uploader': 'lex will',
+ 'uploader_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
},
'playlist_mincount': 17,
}, {
'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
'title': 'lex will - Community',
'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488',
+ 'uploader': 'lex will',
+ 'uploader_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
},
'playlist_mincount': 18,
}, {
'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
'title': 'lex will - Channels',
'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488',
+ 'uploader': 'lex will',
+ 'uploader_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
},
- 'playlist_mincount': 138,
+ 'playlist_mincount': 12,
}, {
'url': 'https://invidio.us/channel/UCmlqkdCBesrv2Lak1mF_MxA',
'only_matching': True,
'id': 'PLwP_SiAcdui0KVebT0mU9Apz359a4ubsC',
'uploader': 'Christiaan008',
'uploader_id': 'UCEPzS1rYsrkqzSLNp76nrcg',
+ 'description': 'md5:a14dc1a8ef8307a9807fe136a0660268',
},
'playlist_count': 96,
}, {
'id': 'PLzH6n4zXuckpfMu_4Ff8E7Z1behQks5ba',
'uploader_id': 'UC9-y-6csu5WGm29I7JiwpnA',
'uploader': 'Computerphile',
+ 'description': 'md5:7f567c574d13d3f8c0954d9ffee4e487',
},
'playlist_mincount': 11,
}, {
'info_dict': {
'id': '9Auq9mYxFEE',
'ext': 'mp4',
- 'title': 'Watch Sky News live',
+ 'title': compat_str,
'uploader': 'Sky News',
'uploader_id': 'skynews',
'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/skynews',
'upload_date': '20191102',
- 'description': 'md5:78de4e1c2359d0ea3ed829678e38b662',
+ 'description': 'md5:85ddd75d888674631aaf9599a9a0b0ae',
'categories': ['News & Politics'],
'tags': list,
'like_count': int,
next_continuation = cls._extract_next_continuation_data(renderer)
if next_continuation:
return next_continuation
- contents = renderer.get('contents')
- if not isinstance(contents, list):
- return
+ contents = []
+ for key in ('contents', 'items'):
+ contents.extend(try_get(renderer, lambda x: x[key], list) or [])
for content in contents:
if not isinstance(content, dict):
continue
for isr_content in isr_contents:
if not isinstance(isr_content, dict):
continue
- renderer = isr_content.get('playlistVideoListRenderer')
- if renderer:
- for entry in self._playlist_entries(renderer):
- yield entry
- continuation_list[0] = self._extract_continuation(renderer)
- continue
- renderer = isr_content.get('gridRenderer')
- if renderer:
- for entry in self._grid_entries(renderer):
- yield entry
- continuation_list[0] = self._extract_continuation(renderer)
- continue
- renderer = isr_content.get('shelfRenderer')
- if renderer:
- is_channels_tab = tab.get('title') == 'Channels'
- for entry in self._shelf_entries(renderer, not is_channels_tab):
- yield entry
- continue
- renderer = isr_content.get('backstagePostThreadRenderer')
- if renderer:
- for entry in self._post_thread_entries(renderer):
- yield entry
+
+ known_renderers = {
+ 'playlistVideoListRenderer': self._playlist_entries,
+ 'gridRenderer': self._grid_entries,
+ 'shelfRenderer': lambda x: self._shelf_entries(x, tab.get('title') != 'Channels'),
+ 'backstagePostThreadRenderer': self._post_thread_entries,
+ 'videoRenderer': lambda x: [self._video_entry(x)],
+ }
+ for key, renderer in isr_content.items():
+ if key not in known_renderers:
+ continue
+ for entry in known_renderers[key](renderer):
+ if entry:
+ yield entry
continuation_list[0] = self._extract_continuation(renderer)
- continue
- renderer = isr_content.get('videoRenderer')
- if renderer:
- entry = self._video_entry(renderer)
- if entry:
- yield entry
+ break
if not continuation_list[0]:
continuation_list[0] = self._extract_continuation(is_renderer)
if not response:
break
+ known_continuation_renderers = {
+ 'playlistVideoListContinuation': self._playlist_entries,
+ 'gridContinuation': self._grid_entries,
+ 'itemSectionContinuation': self._post_thread_continuation_entries,
+ 'sectionListContinuation': extract_entries, # for feeds
+ }
continuation_contents = try_get(
- response, lambda x: x['continuationContents'], dict)
- if continuation_contents:
- continuation_renderer = continuation_contents.get('playlistVideoListContinuation')
- if continuation_renderer:
- for entry in self._playlist_entries(continuation_renderer):
- yield entry
- continuation = self._extract_continuation(continuation_renderer)
- continue
- continuation_renderer = continuation_contents.get('gridContinuation')
- if continuation_renderer:
- for entry in self._grid_entries(continuation_renderer):
- yield entry
- continuation = self._extract_continuation(continuation_renderer)
- continue
- continuation_renderer = continuation_contents.get('itemSectionContinuation')
- if continuation_renderer:
- for entry in self._post_thread_continuation_entries(continuation_renderer):
- yield entry
- continuation = self._extract_continuation(continuation_renderer)
- continue
- continuation_renderer = continuation_contents.get('sectionListContinuation') # for feeds
- if continuation_renderer:
- continuation_list = [None]
- for entry in extract_entries(continuation_renderer):
- yield entry
- continuation = continuation_list[0]
+ response, lambda x: x['continuationContents'], dict) or {}
+ continuation_renderer = None
+ for key, value in continuation_contents.items():
+ if key not in known_continuation_renderers:
continue
+ continuation_renderer = value
+ continuation_list = [None]
+ for entry in known_continuation_renderers[key](continuation_renderer):
+ yield entry
+ continuation = continuation_list[0] or self._extract_continuation(continuation_renderer)
+ break
+ if continuation_renderer:
+ continue
+ known_renderers = {
+ 'gridPlaylistRenderer': (self._grid_entries, 'items'),
+ 'gridVideoRenderer': (self._grid_entries, 'items'),
+ 'playlistVideoRenderer': (self._playlist_entries, 'contents'),
+ 'itemSectionRenderer': (self._playlist_entries, 'contents'),
+ 'richItemRenderer': (extract_entries, 'contents'), # for hashtag
+ }
continuation_items = try_get(
response, lambda x: x['onResponseReceivedActions'][0]['appendContinuationItemsAction']['continuationItems'], list)
- if continuation_items:
- continuation_item = continuation_items[0]
- if not isinstance(continuation_item, dict):
- continue
- renderer = continuation_item.get('playlistVideoRenderer') or continuation_item.get('itemSectionRenderer')
- if renderer:
- video_list_renderer = {'contents': continuation_items}
- for entry in self._playlist_entries(video_list_renderer):
- yield entry
- continuation = self._extract_continuation(video_list_renderer)
+ continuation_item = try_get(continuation_items, lambda x: x[0], dict) or {}
+ video_items_renderer = None
+ for key, value in continuation_item.items():
+ if key not in known_renderers:
continue
+ video_items_renderer = {known_renderers[key][1]: continuation_items}
+ continuation_list = [None]
+ for entry in known_renderers[key][0](video_items_renderer):
+ yield entry
+ continuation = continuation_list[0] or self._extract_continuation(video_items_renderer)
+ break
+ if video_items_renderer:
+ continue
break
@staticmethod
data, lambda x: x['metadata']['playlistMetadataRenderer'], dict)
if renderer:
title = renderer.get('title')
- description = renderer.get('description')
+ description = renderer.get('description', '')
playlist_id = channel_id
tags = renderer.get('keywords', '').split()
thumbnails_list = (
try_get(renderer, lambda x: x['avatar']['thumbnails'], list)
- or data['sidebar']['playlistSidebarRenderer']['items'][0]['playlistSidebarPrimaryInfoRenderer']['thumbnailRenderer']['playlistVideoThumbnailRenderer']['thumbnail']['thumbnails']
+ or try_get(
+ data,
+ lambda x: x['sidebar']['playlistSidebarRenderer']['items'][0]['playlistSidebarPrimaryInfoRenderer']['thumbnailRenderer']['playlistVideoThumbnailRenderer']['thumbnail']['thumbnails'],
+ list)
or [])
thumbnails = []
class YoutubeSearchIE(SearchInfoExtractor, YoutubeBaseInfoExtractor):
- IE_DESC = 'YouTube.com searches'
+ IE_DESC = 'YouTube.com searches, "ytsearch" keyword'
# there doesn't appear to be a real limit, for example if you search for
# 'python' you get more than 8.000.000 results
_MAX_RESULTS = float('inf')
# So we search through all entries till we find them.
continuation_token = None
for slr_content in slr_contents:
+ if continuation_token is None:
+ continuation_token = try_get(
+ slr_content,
+ lambda x: x['continuationItemRenderer']['continuationEndpoint']['continuationCommand']['token'],
+ compat_str)
+
isr_contents = try_get(
slr_content,
lambda x: x['itemSectionRenderer']['contents'],
if total == n:
return
- if continuation_token is None:
- continuation_token = try_get(
- slr_content,
- lambda x: x['continuationItemRenderer']['continuationEndpoint']['continuationCommand']['token'],
- compat_str)
-
if not continuation_token:
break
data['continuation'] = continuation_token
class YoutubeSearchURLIE(YoutubeSearchIE):
- IE_DESC = 'YouTube.com searches, "ytsearch" keyword'
+ IE_DESC = 'YouTube.com search URLs'
IE_NAME = YoutubeSearchIE.IE_NAME + '_url'
_VALID_URL = r'https?://(?:www\.)?youtube\.com/results\?(.*?&)?(?:search_query|q)=(?:[^&]+)(?:[&]|$)'
# _MAX_RESULTS = 100