_RESERVED_NAMES = (
r'channel|c|user|playlist|watch|w|v|embed|e|watch_popup|clip|'
- r'shorts|movies|results|shared|hashtag|trending|explore|feed|feeds|'
+ r'shorts|movies|results|search|shared|hashtag|trending|explore|feed|feeds|'
r'browse|oembed|get_video_info|iframe_api|s/player|'
r'storefront|oops|index|account|reporthistory|t/terms|about|upload|signin|logout')
def _extract_n_function_name(self, jscode):
nfunc, idx = self._search_regex(
- r'\.get\("n"\)\)&&\(b=(?P<nfunc>[a-zA-Z0-9$]{3})(\[(?P<idx>\d+)\])?\([a-zA-Z0-9]\)',
+ r'\.get\("n"\)\)&&\(b=(?P<nfunc>[a-zA-Z0-9$]{3})(?:\[(?P<idx>\d+)\])?\([a-zA-Z0-9]\)',
jscode, 'Initial JS player n function name', group=('nfunc', 'idx'))
if not idx:
return nfunc
tbr = float_or_none(
fmt.get('averageBitrate') or fmt.get('bitrate'), 1000)
+ language_preference = (
+ 10 if audio_track.get('audioIsDefault') and 10
+ else -10 if 'descriptive' in (audio_track.get('displayName') or '').lower() and -10
+ else -1)
dct = {
'asr': int_or_none(fmt.get('audioSampleRate')),
'filesize': int_or_none(fmt.get('contentLength')),
'format_id': itag,
'format_note': join_nonempty(
'%s%s' % (audio_track.get('displayName') or '',
- ' (default)' if audio_track.get('audioIsDefault') else ''),
+ ' (default)' if language_preference > 0 else ''),
fmt.get('qualityLabel') or quality.replace('audio_quality_', ''),
throttled and 'THROTTLED', delim=', '),
'source_preference': -10 if throttled else -1,
'tbr': tbr,
'url': fmt_url,
'width': int_or_none(fmt.get('width')),
- 'language': audio_track.get('id', '').split('.')[0],
- 'language_preference': 1 if audio_track.get('audioIsDefault') else -1,
+ 'language': join_nonempty(audio_track.get('id', '').split('.')[0],
+ 'desc' if language_preference < -1 else ''),
+ 'language_preference': language_preference,
}
mime_mobj = re.match(
r'((?:[^/]+)/(?:[^;]+))(?:;\s*codecs="([^"]+)")?', fmt.get('mimeType') or '')
trans_name += format_field(lang_name, template=' from %s')
process_language(
automatic_captions, base_url, trans_code, trans_name, {'tlang': trans_code})
+ if lang_code == f'a-{trans_code}':
+ process_language(
+ automatic_captions, base_url, f'{trans_code}-orig', f'{trans_name} (Original)', {'tlang': trans_code})
info['automatic_captions'] = automatic_captions
info['subtitles'] = subtitles
class YoutubeTabBaseInfoExtractor(YoutubeBaseInfoExtractor):
+ @staticmethod
+ def passthrough_smuggled_data(func):
+ def _smuggle(entries, smuggled_data):
+ for entry in entries:
+ # TODO: Convert URL to music.youtube instead.
+ # Do we need to passthrough any other smuggled_data?
+ entry['url'] = smuggle_url(entry['url'], smuggled_data)
+ yield entry
+
+ @functools.wraps(func)
+ def wrapper(self, url):
+ url, smuggled_data = unsmuggle_url(url, {})
+ if self.is_music_url(url):
+ smuggled_data['is_music_url'] = True
+ info_dict = func(self, url, smuggled_data)
+ if smuggled_data and info_dict.get('entries'):
+ info_dict['entries'] = _smuggle(info_dict['entries'], smuggled_data)
+ return info_dict
+ return wrapper
+
def _extract_channel_id(self, webpage):
channel_id = self._html_search_meta(
'channelId', webpage, 'channel id', default=None)
raise ExtractorError(err_note, expected=True)
self.report_warning(err_note, item_id)
- @staticmethod
- def _smuggle_data(entries, data):
- for entry in entries:
- if data:
- entry['url'] = smuggle_url(entry['url'], data)
- yield entry
-
_SEARCH_PARAMS = None
- def _search_results(self, query, params=NO_DEFAULT, client=None):
+ def _search_results(self, query, params=NO_DEFAULT, default_client='web'):
data = {'query': query}
if params is NO_DEFAULT:
params = self._SEARCH_PARAMS
data.update(continuation_list[0] or {})
search = self._extract_response(
item_id='query "%s" page %s' % (query, page_num), ep='search', query=data,
- default_client=client, check_get_keys=check_get_keys)
+ default_client=default_client, check_get_keys=check_get_keys)
slr_contents = traverse_obj(search, *content_keys)
yield from self._extract_entries({'contents': list(variadic(slr_contents))}, continuation_list)
if not continuation_list[0]:
return False if YoutubeIE.suitable(url) else super(
YoutubeTabIE, cls).suitable(url)
- def _real_extract(self, url):
- url, smuggled_data = unsmuggle_url(url, {})
- if self.is_music_url(url):
- smuggled_data['is_music_url'] = True
- info_dict = self.__real_extract(url, smuggled_data)
- if info_dict.get('entries'):
- info_dict['entries'] = self._smuggle_data(info_dict['entries'], smuggled_data)
- return info_dict
-
_URL_RE = re.compile(rf'(?P<pre>{_VALID_URL})(?(not_channel)|(?P<tab>/\w+))?(?P<post>.*)$')
- def __real_extract(self, url, smuggled_data):
+ @YoutubeTabBaseInfoExtractor.passthrough_smuggled_data
+ def _real_extract(self, url, smuggled_data):
item_id = self._match_id(url)
url = compat_urlparse.urlunparse(
compat_urlparse.urlparse(url)._replace(netloc='www.youtube.com'))
IE_NAME = 'youtube:search'
_SEARCH_KEY = 'ytsearch'
_SEARCH_PARAMS = 'EgIQAQ%3D%3D' # Videos only
- _TESTS = []
+ _TESTS = [{
+ 'url': 'ytsearch5:youtube-dl test video',
+ 'playlist_count': 5,
+ 'info_dict': {
+ 'id': 'youtube-dl test video',
+ 'title': 'youtube-dl test video',
+ }
+ }]
class YoutubeSearchDateIE(YoutubeTabBaseInfoExtractor, SearchInfoExtractor):
_SEARCH_KEY = 'ytsearchdate'
IE_DESC = 'YouTube search, newest videos first'
_SEARCH_PARAMS = 'CAISAhAB' # Videos only, sorted by date
+ _TESTS = [{
+ 'url': 'ytsearchdate5:youtube-dl test video',
+ 'playlist_count': 5,
+ 'info_dict': {
+ 'id': 'youtube-dl test video',
+ 'title': 'youtube-dl test video',
+ }
+ }]
class YoutubeSearchURLIE(YoutubeTabBaseInfoExtractor):
IE_DESC = 'YouTube search URLs with sorting and filter support'
IE_NAME = YoutubeSearchIE.IE_NAME + '_url'
- _VALID_URL = r'https?://(?:www\.)?youtube\.com/results\?(.*?&)?(?:search_query|q)=(?:[^&]+)(?:[&]|$)'
+ _VALID_URL = r'https?://(?:www\.)?youtube\.com/(?:results|search)\?([^#]+&)?(?:search_query|q)=(?:[^&]+)(?:[&#]|$)'
_TESTS = [{
'url': 'https://www.youtube.com/results?baz=bar&search_query=youtube-dl+test+video&filters=video&lclk=video',
'playlist_mincount': 5,
class YoutubeMusicSearchURLIE(YoutubeTabBaseInfoExtractor):
- IE_DESC = 'YouTube music search URLs with sorting and filter support'
+ IE_DESC = 'YouTube music search URLs with selectable sections (Eg: #songs)'
IE_NAME = 'youtube:music:search_url'
_VALID_URL = r'https?://music\.youtube\.com/search\?([^#]+&)?(?:search_query|q)=(?:[^&]+)(?:[&#]|$)'
_TESTS = [{
if not params:
section = None
title = join_nonempty(query, section, delim=' - ')
- return self.playlist_result(self._search_results(query, params, client='web_music'), title, title)
+ return self.playlist_result(self._search_results(query, params, default_client='web_music'), title, title)
-class YoutubeFeedsInfoExtractor(YoutubeTabIE):
+class YoutubeFeedsInfoExtractor(InfoExtractor):
"""
Base class for feed extractors
Subclasses must define the _FEED_NAME property.
def _real_extract(self, url):
return self.url_result(
- 'https://www.youtube.com/feed/%s' % self._FEED_NAME,
- ie=YoutubeTabIE.ie_key())
+ f'https://www.youtube.com/feed/{self._FEED_NAME}', ie=YoutubeTabIE.ie_key())
class YoutubeWatchLaterIE(InfoExtractor):