from ..swfinterp import SWFInterpreter
from ..compat import (
compat_chr,
- compat_HTTPError,
compat_kwargs,
compat_parse_qs,
compat_urllib_parse_unquote,
bool_or_none,
clean_html,
error_to_compat_str,
- extract_attributes,
ExtractorError,
float_or_none,
- get_element_by_attribute,
get_element_by_id,
int_or_none,
- js_to_json,
mimetype2ext,
- orderedSet,
parse_codecs,
parse_count,
parse_duration,
unescapeHTML,
unified_strdate,
unsmuggle_url,
+ update_url_query,
uppercase_escape,
url_or_none,
urlencode_postdata,
+ urljoin,
)
_CHALLENGE_URL = 'https://accounts.google.com/_/signin/sl/challenge'
_TFA_URL = 'https://accounts.google.com/_/signin/challenge?hl=en&TL={0}'
+ _RESERVED_NAMES = (
+ r'course|embed|watch|w|results|storefront|'
+ r'shared|index|account|reporthistory|t/terms|about|upload|signin|logout|'
+ r'feed/(watch_later|history|subscriptions|library|trending|recommended)')
+
_NETRC_MACHINE = 'youtube'
# If True it will raise an error if no login info is provided
_LOGIN_REQUIRED = False
- _PLAYLIST_ID_RE = r'(?:PL|LL|EC|UU|FL|RD|UL|TL|PU|OLAK5uy_)[0-9A-Za-z-_]{10,}'
- _INITIAL_DATA_RE = r'(?:window\["ytInitialData"\]|ytInitialData)\W?=\W?({.*?});'
- _YTCFG_DATA_RE = r"ytcfg.set\(({.*?})\)"
+ _PLAYLIST_ID_RE = r'(?:(?:PL|LL|EC|UU|FL|RD|UL|TL|PU|OLAK5uy_)[0-9A-Za-z-_]{10,}|RDMM|LL|WL)'
_YOUTUBE_CLIENT_HEADERS = {
'x-youtube-client-name': '1',
if not self._login():
return
+ _DEFAULT_API_DATA = {
+ 'context': {
+ 'client': {
+ 'clientName': 'WEB',
+ 'clientVersion': '2.20201021.03.00',
+ }
+ },
+ }
-class YoutubeEntryListBaseInfoExtractor(YoutubeBaseInfoExtractor):
-
- def _find_entries_in_json(self, extracted):
- entries = []
- c = {}
-
- def _real_find(obj):
- if obj is None or isinstance(obj, str):
- return
-
- if type(obj) is list:
- for elem in obj:
- _real_find(elem)
-
- if type(obj) is dict:
- if self._is_entry(obj):
- entries.append(obj)
- return
-
- if 'continuationCommand' in obj:
- c['continuation'] = obj
- return
-
- for _, o in obj.items():
- _real_find(o)
-
- _real_find(extracted)
-
- return entries, try_get(c, lambda x: x["continuation"])
-
- def _entries(self, page, playlist_id, max_pages=None):
- seen = []
-
- yt_conf = {}
- for m in re.finditer(self._YTCFG_DATA_RE, page):
- parsed = self._parse_json(m.group(1), playlist_id,
- transform_source=js_to_json, fatal=False)
- if parsed:
- yt_conf.update(parsed)
-
- data_json = self._parse_json(self._search_regex(self._INITIAL_DATA_RE, page, 'ytInitialData'), None)
-
- for page_num in range(1, max_pages + 1) if max_pages is not None else itertools.count(1):
- entries, continuation = self._find_entries_in_json(data_json)
- processed = self._process_entries(entries, seen)
-
- if not processed:
- break
- for entry in processed:
- yield entry
-
- if not continuation or not yt_conf:
- break
- continuation_token = try_get(continuation, lambda x: x['continuationCommand']['token'])
- continuation_url = try_get(continuation, lambda x: x['commandMetadata']['webCommandMetadata']['apiUrl'])
- if not continuation_token or not continuation_url:
- break
-
- count = 0
- retries = 3
- while count <= retries:
- try:
- # Downloading page may result in intermittent 5xx HTTP error
- # that is usually worked around with a retry
- data_json = self._download_json(
- 'https://www.youtube.com%s' % continuation_url,
- playlist_id,
- 'Downloading continuation page #%s%s' % (page_num, ' (retry #%d)' % count if count else ''),
-
- transform_source=uppercase_escape,
- query={
- 'key': try_get(yt_conf, lambda x: x['INNERTUBE_API_KEY'])
- },
- data= str(json.dumps({
- 'context': try_get(yt_conf, lambda x: x['INNERTUBE_CONTEXT']),
- 'continuation': continuation_token
- })).encode(encoding='UTF-8',errors='strict'),
- headers={
- 'Content-Type': 'application/json'
- }
- )
- break
- except ExtractorError as e:
- if isinstance(e.cause, compat_HTTPError) and e.cause.code in (500, 503):
- count += 1
- if count <= retries:
- continue
- raise
-
- def _extract_title(self, renderer):
- title = try_get(renderer, lambda x: x['title']['runs'][0]['text'], compat_str)
- if title:
- return title
- return try_get(renderer, lambda x: x['title']['simpleText'], compat_str)
-
-
-class YoutubePlaylistBaseInfoExtractor(YoutubeEntryListBaseInfoExtractor):
- def _is_entry(self, obj):
- return 'videoId' in obj
-
- def _process_entries(self, entries, seen):
- ids_in_page = []
- titles_in_page = []
- for renderer in entries:
- video_id = try_get(renderer, lambda x: x['videoId'])
- video_title = self._extract_title(renderer)
-
- if video_id is None or video_title is None:
- # we do not have a videoRenderer or title extraction broke
- continue
-
- video_title = video_title.strip()
-
- try:
- idx = ids_in_page.index(video_id)
- if video_title and not titles_in_page[idx]:
- titles_in_page[idx] = video_title
- except ValueError:
- ids_in_page.append(video_id)
- titles_in_page.append(video_title)
-
- for video_id, video_title in zip(ids_in_page, titles_in_page):
- yield self.url_result(video_id, 'Youtube', video_id, video_title)
-
-
-class YoutubePlaylistsBaseInfoExtractor(YoutubeEntryListBaseInfoExtractor):
- def _is_entry(self, obj):
- return 'playlistId' in obj
+ def _call_api(self, ep, query, video_id):
+ data = self._DEFAULT_API_DATA.copy()
+ data.update(query)
- def _process_entries(self, entries, seen):
- for playlist_id in orderedSet(try_get(r, lambda x: x['playlistId']) for r in entries):
+ response = self._download_json(
+ 'https://www.youtube.com/youtubei/v1/%s' % ep, video_id=video_id,
+ note='Downloading API JSON', errnote='Unable to download API page',
+ data=json.dumps(data).encode('utf8'),
+ headers={'content-type': 'application/json'},
+ query={'key': 'AIzaSyAO_FJ2SlqU8Q4STEHLGCilw_Y9_11qcW8'})
- yield self.url_result(
- 'https://www.youtube.com/playlist?list=%s' % playlist_id, 'YoutubePlaylist')
+ return response
- def _real_extract(self, url):
- playlist_id = self._match_id(url)
- webpage = self._download_webpage(url, playlist_id)
- title = self._og_search_title(webpage, fatal=False)
- return self.playlist_result(self._entries(webpage, playlist_id), playlist_id, title)
+ def _extract_yt_initial_data(self, video_id, webpage):
+ return self._parse_json(
+ self._search_regex(
+ r'(?:window\s*\[\s*["\']ytInitialData["\']\s*\]|ytInitialData)\s*=\s*({.+?})\s*;',
+ webpage, 'yt initial data'),
+ video_id)
class YoutubeIE(YoutubeBaseInfoExtractor):
|(?:www\.)?cleanvideosearch\.com/media/action/yt/watch\?videoId=
)
)? # all until now is optional -> you can pass the naked ID
- ([0-9A-Za-z_-]{11}) # here is it! the YouTube video ID
+ (?P<id>[0-9A-Za-z_-]{11}) # here is it! the YouTube video ID
(?!.*?\blist=
(?:
%(playlist_id)s| # combined list/video URLs are handled by the playlist IE
}
},
{
- 'url': 'https://www.youtube.com/watch?v=BaW_jenozKc&v=UxxajLWwzqY',
+ 'url': 'https://www.youtube.com/watch?v=BaW_jenozKc&v=yZIXLfi8CZQ',
'note': 'Use the first video ID in the URL',
'info_dict': {
'id': 'BaW_jenozKc',
},
'skip': 'format 141 not served anymore',
},
+ # DASH manifest with encrypted signature
+ {
+ 'url': 'https://www.youtube.com/watch?v=IB3lcPjvWLA',
+ 'info_dict': {
+ 'id': 'IB3lcPjvWLA',
+ 'ext': 'm4a',
+ 'title': 'Afrojack, Spree Wilson - The Spark (Official Music Video) ft. Spree Wilson',
+ 'description': 'md5:8f5e2b82460520b619ccac1f509d43bf',
+ 'duration': 244,
+ 'uploader': 'AfrojackVEVO',
+ 'uploader_id': 'AfrojackVEVO',
+ 'upload_date': '20131011',
+ },
+ 'params': {
+ 'youtube_include_dash_manifest': True,
+ 'format': '141/bestaudio[ext=m4a]',
+ },
+ },
# Controversy video
{
'url': 'https://www.youtube.com/watch?v=T4XJQO3qol8',
'age_limit': 18,
},
},
+ # video_info is None (https://github.com/ytdl-org/youtube-dl/issues/4421)
+ # YouTube Red ad is not captured for creator
+ {
+ 'url': '__2ABJjxzNo',
+ 'info_dict': {
+ 'id': '__2ABJjxzNo',
+ 'ext': 'mp4',
+ 'duration': 266,
+ 'upload_date': '20100430',
+ 'uploader_id': 'deadmau5',
+ 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/deadmau5',
+ 'creator': 'Dada Life, deadmau5',
+ 'description': 'md5:12c56784b8032162bb936a5f76d55360',
+ 'uploader': 'deadmau5',
+ 'title': 'Deadmau5 - Some Chords (HD)',
+ 'alt_title': 'This Machine Kills Some Chords',
+ },
+ 'expected_warnings': [
+ 'DASH manifest missing',
+ ]
+ },
# Olympics (https://github.com/ytdl-org/youtube-dl/issues/4431)
{
'url': 'lqQg6PlCWgI',
'url': 'sJL6WA-aGkQ',
'only_matching': True,
},
- {
- 'url': 'https://www.youtube.com/watch?v=MuAGGZNfUkU&list=RDMM',
- 'only_matching': True,
- },
{
'url': 'https://invidio.us/watch?v=BaW_jenozKc',
'only_matching': True,
'skip_download': True,
},
},
- {
- # Youtube Music Auto-generated description
- # Retrieve 'artist' field from 'Artist:' in video description
- # when it is present on youtube music video
- 'url': 'https://www.youtube.com/watch?v=k0jLE7tTwjY',
- 'info_dict': {
- 'id': 'k0jLE7tTwjY',
- 'ext': 'mp4',
- 'title': 'Latch Feat. Sam Smith',
- 'description': 'md5:3cb1e8101a7c85fcba9b4fb41b951335',
- 'upload_date': '20150110',
- 'uploader': 'Various Artists - Topic',
- 'uploader_id': 'UCNkEcmYdjrH4RqtNgh7BZ9w',
- 'artist': 'Disclosure',
- 'track': 'Latch Feat. Sam Smith',
- 'album': 'Latch Featuring Sam Smith',
- 'release_date': '20121008',
- 'release_year': 2012,
- },
- 'params': {
- 'skip_download': True,
- },
- },
- {
- # Youtube Music Auto-generated description
- # handle multiple artists on youtube music video
- 'url': 'https://www.youtube.com/watch?v=74qn0eJSjpA',
- 'info_dict': {
- 'id': '74qn0eJSjpA',
- 'ext': 'mp4',
- 'title': 'Eastside',
- 'description': 'md5:290516bb73dcbfab0dcc4efe6c3de5f2',
- 'upload_date': '20180710',
- 'uploader': 'Benny Blanco - Topic',
- 'uploader_id': 'UCzqz_ksRu_WkIzmivMdIS7A',
- 'artist': 'benny blanco, Halsey, Khalid',
- 'track': 'Eastside',
- 'album': 'Eastside',
- 'release_date': '20180713',
- 'release_year': 2018,
- },
- 'params': {
- 'skip_download': True,
- },
- },
- {
- # Youtube Music Auto-generated description
- # handle youtube music video with release_year and no release_date
- 'url': 'https://www.youtube.com/watch?v=-hcAI0g-f5M',
- 'info_dict': {
- 'id': '-hcAI0g-f5M',
- 'ext': 'mp4',
- 'title': 'Put It On Me',
- 'description': 'md5:f6422397c07c4c907c6638e1fee380a5',
- 'upload_date': '20180426',
- 'uploader': 'Matt Maeson - Topic',
- 'uploader_id': 'UCnEkIGqtGcQMLk73Kp-Q5LQ',
- 'artist': 'Matt Maeson',
- 'track': 'Put It On Me',
- 'album': 'The Hearse',
- 'release_date': None,
- 'release_year': 2018,
- },
- 'params': {
- 'skip_download': True,
- },
- },
{
'url': 'https://www.youtubekids.com/watch?v=3b8nCWDgZ6Q',
'only_matching': True,
# https://github.com/ytdl-org/youtube-dl/pull/7599)
r';ytplayer\.config\s*=\s*({.+?});ytplayer',
r';ytplayer\.config\s*=\s*({.+?});',
- r'ytInitialPlayerResponse\s*=\s*({.+?});var meta'
+ r'ytInitialPlayerResponse\s*=\s*({.+?});var meta' # Needed???
)
config = self._search_regex(
patterns, webpage, 'ytplayer.config', default=None)
self._downloader.report_warning(err_msg)
return {}
try:
- if "args" in player_config and "ttsurl" in player_config["args"]:
- args = player_config['args']
- caption_url = args['ttsurl']
+ args = player_config['args']
+ caption_url = args.get('ttsurl')
+ if caption_url:
timestamp = args['timestamp']
-
# We get the available subtitles
list_params = compat_urllib_parse_urlencode({
'type': 'list',
return captions
# New captions format as of 22.06.2017
- if "args" in player_config:
- player_response = player_config["args"].get('player_response')
- else:
- # New player system (ytInitialPlayerResponse) as of October 2020
- player_response = player_config
-
- if player_response:
- if isinstance(player_response, compat_str):
- player_response = self._parse_json(
- player_response, video_id, fatal=False)
-
- renderer = player_response['captions']['playerCaptionsTracklistRenderer']
- caption_tracks = renderer['captionTracks']
- for caption_track in caption_tracks:
- if 'kind' not in caption_track:
- # not an automatic transcription
- continue
- base_url = caption_track['baseUrl']
+ player_response = args.get('player_response')
+ if player_response and isinstance(player_response, compat_str):
+ player_response = self._parse_json(
+ player_response, video_id, fatal=False)
+ if player_response:
+ renderer = player_response['captions']['playerCaptionsTracklistRenderer']
+ base_url = renderer['captionTracks'][0]['baseUrl']
sub_lang_list = []
for lang in renderer['translationLanguages']:
lang_code = lang.get('languageCode')
sub_lang_list.append(lang_code)
return make_captions(base_url, sub_lang_list)
- self._downloader.report_warning("Couldn't find automatic captions for %s" % video_id)
- return {}
-
- if "args" in player_config:
- args = player_config["args"]
-
- # Some videos don't provide ttsurl but rather caption_tracks and
- # caption_translation_languages (e.g. 20LmZk1hakA)
- # Does not used anymore as of 22.06.2017
- caption_tracks = args['caption_tracks']
- caption_translation_languages = args['caption_translation_languages']
- caption_url = compat_parse_qs(caption_tracks.split(',')[0])['u'][0]
- sub_lang_list = []
- for lang in caption_translation_languages.split(','):
- lang_qs = compat_parse_qs(compat_urllib_parse_unquote_plus(lang))
- sub_lang = lang_qs.get('lc', [None])[0]
- if sub_lang:
- sub_lang_list.append(sub_lang)
- return make_captions(caption_url, sub_lang_list)
+ # Some videos don't provide ttsurl but rather caption_tracks and
+ # caption_translation_languages (e.g. 20LmZk1hakA)
+ # Does not used anymore as of 22.06.2017
+ caption_tracks = args['caption_tracks']
+ caption_translation_languages = args['caption_translation_languages']
+ caption_url = compat_parse_qs(caption_tracks.split(',')[0])['u'][0]
+ sub_lang_list = []
+ for lang in caption_translation_languages.split(','):
+ lang_qs = compat_parse_qs(compat_urllib_parse_unquote_plus(lang))
+ sub_lang = lang_qs.get('lc', [None])[0]
+ if sub_lang:
+ sub_lang_list.append(sub_lang)
+ return make_captions(caption_url, sub_lang_list)
# An extractor error can be raise by the download process if there are
# no automatic captions but there are subtitles
except (KeyError, IndexError, ExtractorError):
def _extract_chapters_from_json(self, webpage, video_id, duration):
if not webpage:
return
- initial_data = self._parse_json(
- self._search_regex(
- r'window\["ytInitialData"\] = (.+);\n', webpage,
- 'player args', default='{}'),
- video_id, fatal=False)
- if not initial_data or not isinstance(initial_data, dict):
+ data = self._extract_yt_initial_data(video_id, webpage)
+ if not data or not isinstance(data, dict):
return
chapters_list = try_get(
- initial_data,
+ data,
lambda x: x['playerOverlays']
['playerOverlayRenderer']
['decoratedPlayerBarRenderer']
age_gate = False
# Try looking directly into the video webpage
ytplayer_config = self._get_ytplayer_config(video_id, video_webpage)
- args = ytplayer_config.get("args")
- if args is not None:
+ if ytplayer_config:
+ args = ytplayer_config.get('args', {})
if args.get('url_encoded_fmt_stream_map') or args.get('hlsvp'):
# Convert to the same format returned by compat_parse_qs
video_info = dict((k, [v]) for k, v in args.items())
is_live = True
if not player_response:
player_response = extract_player_response(args.get('player_response'), video_id)
- elif not player_response:
- player_response = ytplayer_config
if not video_info or self._downloader.params.get('youtube_include_dash_manifest', True):
add_dash_mpd_pr(player_response)
+ if not video_info and not player_response:
+ player_response = extract_player_response(
+ self._search_regex(
+ r'ytInitialPlayerResponse\s*=\s*({.+?})\s*;', video_webpage,
+ 'initial player response', default='{}'),
+ video_id)
+
def extract_unavailable_message():
messages = []
for tag, kind in (('h1', 'message'), ('div', 'submessage')):
if cipher:
if 's' in url_data or self._downloader.params.get('youtube_include_dash_manifest', True):
- ASSETS_RE = r'(?:"assets":.+?"js":\s*("[^"]+"))|(?:"jsUrl":\s*("[^"]+"))'
+ ASSETS_RE = (
+ r'<script[^>]+\bsrc=("[^"]+")[^>]+\bname=["\']player_ias/base',
+ r'"jsUrl"\s*:\s*("[^"]+")',
+ r'"assets":.+?"js":\s*("[^"]+")')
jsplayer_url_json = self._search_regex(
ASSETS_RE,
embed_webpage if age_gate else video_webpage,
def _extract_count(count_name):
return str_to_int(self._search_regex(
- r'"accessibilityData":\{"label":"([\d,\w]+) %ss"\}'
+ r'-%s-button[^>]+><span[^>]+class="yt-uix-button-content"[^>]*>([\d,]+)</span>'
% re.escape(count_name),
video_webpage, count_name, default=None))
}
-class YoutubePlaylistIE(YoutubePlaylistBaseInfoExtractor):
- IE_DESC = 'YouTube.com playlists'
- _VALID_URL = r"""(?x)(?:
- (?:https?://)?
- (?:\w+\.)?
- (?:
- (?:
- youtube(?:kids)?\.com|
- invidio\.us
- )
- /
- (?:
- (?:course|view_play_list|my_playlists|artist|playlist|watch|embed/(?:videoseries|[0-9A-Za-z_-]{11}))
- \? (?:.*?[&;])*? (?:p|a|list)=
- | p/
- )|
- youtu\.be/[0-9A-Za-z_-]{11}\?.*?\blist=
- )
- (
- (?:PL|LL|EC|UU|FL|RD|UL|TL|PU|OLAK5uy_)?[0-9A-Za-z-_]{10,}
- # Top tracks, they can also include dots
- |(?:MC)[\w\.]*
- )
- .*
- |
- (%(playlist_id)s)
- )""" % {'playlist_id': YoutubeBaseInfoExtractor._PLAYLIST_ID_RE}
- _TEMPLATE_URL = 'https://www.youtube.com/playlist?list=%s'
- _VIDEO_RE_TPL = r'href="\s*/watch\?v=%s(?:&(?:[^"]*?index=(?P<index>\d+))?(?:[^>]+>(?P<title>[^<]+))?)?'
- _VIDEO_RE = _VIDEO_RE_TPL % r'(?P<id>[0-9A-Za-z_-]{11})'
- IE_NAME = 'youtube:playlist'
- _YTM_PLAYLIST_PREFIX = 'RDCLAK5uy_'
- _YTM_CHANNEL_INFO = {
- 'uploader': 'Youtube Music',
- 'uploader_id': 'music', # or "UC-9-kyTW8ZkZNDHQJ6FgpwQ"
- 'uploader_url': 'https://www.youtube.com/music'
- }
+class YoutubeTabIE(YoutubeBaseInfoExtractor):
+ IE_DESC = 'YouTube.com tab'
+ # (?x)^ will cause warning in LiveIE. So I cant split this into multiple lines using '''
+ _VALID_URL = (
+ r'https?://(?:\w+\.)?(?:youtube(?:kids)?\.com|invidio\.us)/'
+ r'(?:(?!(%s)([/#?]|$))|'
+ r'(?:channel|c|user)/|'
+ r'(?:playlist|watch)\?.*?\blist=)'
+ r'(?P<id>[^/?#&]+)') % YoutubeBaseInfoExtractor._RESERVED_NAMES
+ IE_NAME = 'youtube:tab'
+
_TESTS = [{
+ # playlists, multipage
+ 'url': 'https://www.youtube.com/c/ИгорьКлейнер/playlists?view=1&flow=grid',
+ 'playlist_mincount': 94,
+ 'info_dict': {
+ 'id': 'UCqj7Cz7revf5maW9g5pgNcg',
+ 'title': 'Игорь Клейнер - Playlists',
+ 'description': 'md5:be97ee0f14ee314f1f002cf187166ee2',
+ },
+ }, {
+ # playlists, multipage, different order
+ 'url': 'https://www.youtube.com/user/igorkle1/playlists?view=1&sort=dd',
+ 'playlist_mincount': 94,
+ 'info_dict': {
+ 'id': 'UCqj7Cz7revf5maW9g5pgNcg',
+ 'title': 'Игорь Клейнер - Playlists',
+ 'description': 'md5:be97ee0f14ee314f1f002cf187166ee2',
+ },
+ }, {
+ # playlists, singlepage
+ 'url': 'https://www.youtube.com/user/ThirstForScience/playlists',
+ 'playlist_mincount': 4,
+ 'info_dict': {
+ 'id': 'UCAEtajcuhQ6an9WEzY9LEMQ',
+ 'title': 'ThirstForScience - Playlists',
+ 'description': 'md5:609399d937ea957b0f53cbffb747a14c',
+ }
+ }, {
+ 'url': 'https://www.youtube.com/c/ChristophLaimer/playlists',
+ 'only_matching': True,
+ }, {
+ # basic, single video playlist
'url': 'https://www.youtube.com/playlist?list=PL4lCao7KL_QFVb7Iudeipvc2BCavECqzc',
'info_dict': {
'uploader_id': 'UCmlqkdCBesrv2Lak1mF_MxA',
},
'playlist_count': 1,
}, {
+ # empty playlist
'url': 'https://www.youtube.com/playlist?list=PL4lCao7KL_QFodcLWhDpGCYnngnHtQ-Xf',
'info_dict': {
'uploader_id': 'UCmlqkdCBesrv2Lak1mF_MxA',
},
'playlist_count': 0,
}, {
- 'note': 'Playlist with deleted videos (#651). As a bonus, the video #51 is also twice in this list.',
- 'url': 'https://www.youtube.com/playlist?list=PLwP_SiAcdui0KVebT0mU9Apz359a4ubsC',
+ # Home tab
+ 'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/featured',
'info_dict': {
- 'title': '29C3: Not my department',
- 'id': 'PLwP_SiAcdui0KVebT0mU9Apz359a4ubsC',
- 'uploader': 'Christiaan008',
- 'uploader_id': 'ChRiStIaAn008',
+ 'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
+ 'title': 'lex will - Home',
+ 'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488',
},
- 'playlist_count': 96,
+ 'playlist_mincount': 2,
}, {
- 'note': 'issue #673',
- 'url': 'PLBB231211A4F62143',
+ # Videos tab
+ 'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/videos',
'info_dict': {
- 'title': '[OLD]Team Fortress 2 (Class-based LP)',
- 'id': 'PLBB231211A4F62143',
- 'uploader': 'Wickydoo',
- 'uploader_id': 'Wickydoo',
+ 'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
+ 'title': 'lex will - Videos',
+ 'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488',
},
- 'playlist_mincount': 26,
+ 'playlist_mincount': 975,
}, {
- 'note': 'Large playlist',
- 'url': 'https://www.youtube.com/playlist?list=UUBABnxM4Ar9ten8Mdjj1j0Q',
+ # Videos tab, sorted by popular
+ 'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/videos?view=0&sort=p&flow=grid',
'info_dict': {
- 'title': 'Uploads from Cauchemar',
- 'id': 'UUBABnxM4Ar9ten8Mdjj1j0Q',
- 'uploader': 'Cauchemar',
- 'uploader_id': 'Cauchemar89',
+ 'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
+ 'title': 'lex will - Videos',
+ 'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488',
},
- 'playlist_mincount': 799,
+ 'playlist_mincount': 199,
}, {
- 'url': 'PLtPgu7CB4gbY9oDN3drwC3cMbJggS7dKl',
+ # Playlists tab
+ 'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/playlists',
'info_dict': {
- 'title': 'YDL_safe_search',
- 'id': 'PLtPgu7CB4gbY9oDN3drwC3cMbJggS7dKl',
+ 'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
+ 'title': 'lex will - Playlists',
+ 'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488',
},
- 'playlist_count': 2,
- 'skip': 'This playlist is private',
+ 'playlist_mincount': 17,
}, {
- 'note': 'embedded',
- 'url': 'https://www.youtube.com/embed/videoseries?list=PL6IaIsEjSbf96XFRuNccS_RuEXwNdsoEu',
- 'playlist_count': 4,
+ # Community tab
+ 'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/community',
'info_dict': {
- 'title': 'JODA15',
- 'id': 'PL6IaIsEjSbf96XFRuNccS_RuEXwNdsoEu',
- 'uploader': 'milan',
- 'uploader_id': 'UCEI1-PVPcYXjB73Hfelbmaw',
- }
+ 'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
+ 'title': 'lex will - Community',
+ 'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488',
+ },
+ 'playlist_mincount': 18,
}, {
- 'url': 'http://www.youtube.com/embed/_xDOZElKyNU?list=PLsyOSbh5bs16vubvKePAQ1x3PhKavfBIl',
- 'playlist_mincount': 485,
+ # Channels tab
+ 'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/channels',
'info_dict': {
- 'title': '2018 Chinese New Singles (11/6 updated)',
- 'id': 'PLsyOSbh5bs16vubvKePAQ1x3PhKavfBIl',
- 'uploader': 'LBK',
- 'uploader_id': 'sdragonfang',
- }
+ 'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
+ 'title': 'lex will - Channels',
+ 'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488',
+ },
+ 'playlist_mincount': 138,
}, {
- 'note': 'Embedded SWF player',
- 'url': 'https://www.youtube.com/p/YN5VISEtHet5D4NEvfTd0zcgFk84NqFZ?hl=en_US&fs=1&rel=0',
- 'playlist_count': 4,
+ 'url': 'https://invidio.us/channel/UC23qupoDRn9YOAVzeoxjOQA',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://www.youtubekids.com/channel/UCyu8StPfZWapR6rfW_JgqcA',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://music.youtube.com/channel/UCT-K0qO8z6NzWrywqefBPBQ',
+ 'only_matching': True,
+ }, {
+ 'note': 'Playlist with deleted videos (#651). As a bonus, the video #51 is also twice in this list.',
+ 'url': 'https://www.youtube.com/playlist?list=PLwP_SiAcdui0KVebT0mU9Apz359a4ubsC',
'info_dict': {
- 'title': 'JODA7',
- 'id': 'YN5VISEtHet5D4NEvfTd0zcgFk84NqFZ',
+ 'title': '29C3: Not my department',
+ 'id': 'PLwP_SiAcdui0KVebT0mU9Apz359a4ubsC',
+ 'uploader': 'Christiaan008',
+ 'uploader_id': 'UCEPzS1rYsrkqzSLNp76nrcg',
},
- 'skip': 'This playlist does not exist',
+ 'playlist_count': 96,
+ }, {
+ 'note': 'Large playlist',
+ 'url': 'https://www.youtube.com/playlist?list=UUBABnxM4Ar9ten8Mdjj1j0Q',
+ 'info_dict': {
+ 'title': 'Uploads from Cauchemar',
+ 'id': 'UUBABnxM4Ar9ten8Mdjj1j0Q',
+ 'uploader': 'Cauchemar',
+ 'uploader_id': 'UCBABnxM4Ar9ten8Mdjj1j0Q',
+ },
+ 'playlist_mincount': 1123,
+ }, {
+ # even larger playlist, 8832 videos
+ 'url': 'http://www.youtube.com/user/NASAgovVideo/videos',
+ 'only_matching': True,
}, {
'note': 'Buggy playlist: the webpage has a "Load more" button but it doesn\'t have more videos',
'url': 'https://www.youtube.com/playlist?list=UUXw-G3eDE9trcvY2sBMM_aA',
'title': 'Uploads from Interstellar Movie',
'id': 'UUXw-G3eDE9trcvY2sBMM_aA',
'uploader': 'Interstellar Movie',
- 'uploader_id': 'InterstellarMovie1',
+ 'uploader_id': 'UCXw-G3eDE9trcvY2sBMM_aA',
},
'playlist_mincount': 21,
+ }, {
+ # https://github.com/ytdl-org/youtube-dl/issues/21844
+ 'url': 'https://www.youtube.com/playlist?list=PLzH6n4zXuckpfMu_4Ff8E7Z1behQks5ba',
+ 'info_dict': {
+ 'title': 'Data Analysis with Dr Mike Pound',
+ 'id': 'PLzH6n4zXuckpfMu_4Ff8E7Z1behQks5ba',
+ 'uploader_id': 'UC9-y-6csu5WGm29I7JiwpnA',
+ 'uploader': 'Computerphile',
+ },
+ 'playlist_mincount': 11,
+ }, {
+ 'url': 'https://invidio.us/playlist?list=PLDIoUOhQQPlXr63I_vwF9GD8sAKh77dWU',
+ 'only_matching': True,
}, {
# Playlist URL that does not actually serve a playlist
'url': 'https://www.youtube.com/watch?v=FqZTN594JQw&list=PLMYEtVRpaqY00V9W81Cwmzp6N6vZqfUKD4',
'skip': 'This video is not available.',
'add_ie': [YoutubeIE.ie_key()],
}, {
- 'url': 'https://youtu.be/yeWKywCrFtk?list=PL2qgrgXsNUG5ig9cat4ohreBjYLAPC0J5',
- 'info_dict': {
- 'id': 'yeWKywCrFtk',
- 'ext': 'mp4',
- 'title': 'Small Scale Baler and Braiding Rugs',
- 'uploader': 'Backus-Page House Museum',
- 'uploader_id': 'backuspagemuseum',
- 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/backuspagemuseum',
- 'upload_date': '20161008',
- 'description': 'md5:800c0c78d5eb128500bffd4f0b4f2e8a',
- 'categories': ['Nonprofits & Activism'],
- 'tags': list,
- 'like_count': int,
- 'dislike_count': int,
- },
- 'params': {
- 'noplaylist': True,
- 'skip_download': True,
- },
- }, {
- # https://github.com/ytdl-org/youtube-dl/issues/21844
- 'url': 'https://www.youtube.com/playlist?list=PLzH6n4zXuckpfMu_4Ff8E7Z1behQks5ba',
- 'info_dict': {
- 'title': 'Data Analysis with Dr Mike Pound',
- 'id': 'PLzH6n4zXuckpfMu_4Ff8E7Z1behQks5ba',
- 'uploader_id': 'Computerphile',
- 'uploader': 'Computerphile',
- },
- 'playlist_mincount': 11,
- }, {
- 'url': 'https://youtu.be/uWyaPkt-VOI?list=PL9D9FC436B881BA21',
- 'only_matching': True,
- }, {
- 'url': 'TLGGrESM50VT6acwMjAyMjAxNw',
- 'only_matching': True,
- }, {
- # music album playlist
- 'url': 'OLAK5uy_m4xAFdmMC5rX3Ji3g93pQe3hqLZw_9LhM',
- 'only_matching': True,
- }, {
- 'url': 'https://invidio.us/playlist?list=PLDIoUOhQQPlXr63I_vwF9GD8sAKh77dWU',
+ 'url': 'https://www.youtubekids.com/watch?v=Agk7R8I8o5U&list=PUZ6jURNr1WQZCNHF0ao-c0g',
'only_matching': True,
}, {
- 'url': 'https://www.youtubekids.com/watch?v=Agk7R8I8o5U&list=PUZ6jURNr1WQZCNHF0ao-c0g',
+ 'url': 'https://www.youtube.com/watch?v=MuAGGZNfUkU&list=RDMM',
'only_matching': True,
}]
- def _real_initialize(self):
- self._login()
+ @classmethod
+ def suitable(cls, url):
+ IGNORE = (YoutubeLiveIE,)
+ return (
+ False if any(ie.suitable(url) for ie in IGNORE)
+ else super(YoutubeTabIE, cls).suitable(url))
+
+ def _extract_channel_id(self, webpage):
+ channel_id = self._html_search_meta(
+ 'channelId', webpage, 'channel id', default=None)
+ if channel_id:
+ return channel_id
+ channel_url = self._html_search_meta(
+ ('og:url', 'al:ios:url', 'al:android:url', 'al:web:url',
+ 'twitter:url', 'twitter:app:url:iphone', 'twitter:app:url:ipad',
+ 'twitter:app:url:googleplay'), webpage, 'channel url')
+ return self._search_regex(
+ r'https?://(?:www\.)?youtube\.com/channel/([^/?#&])+',
+ channel_url, 'channel id')
- def extract_videos_from_page(self, page):
- ids_in_page = []
- titles_in_page = []
-
- for item in re.findall(
- r'(<[^>]*\bdata-video-id\s*=\s*["\'][0-9A-Za-z_-]{11}[^>]+>)', page):
- attrs = extract_attributes(item)
- video_id = attrs['data-video-id']
- video_title = unescapeHTML(attrs.get('data-title'))
- if video_title:
- video_title = video_title.strip()
- ids_in_page.append(video_id)
- titles_in_page.append(video_title)
-
- # Fallback with old _VIDEO_RE
- self.extract_videos_from_page_impl(
- self._VIDEO_RE, page, ids_in_page, titles_in_page)
-
- # Relaxed fallbacks
- self.extract_videos_from_page_impl(
- r'href="\s*/watch\?v\s*=\s*(?P<id>[0-9A-Za-z_-]{11})', page,
- ids_in_page, titles_in_page)
- self.extract_videos_from_page_impl(
- r'data-video-ids\s*=\s*["\'](?P<id>[0-9A-Za-z_-]{11})', page,
- ids_in_page, titles_in_page)
-
- return zip(ids_in_page, titles_in_page)
-
- def _extract_mix_ids_from_yt_initial(self, yt_initial):
- ids = []
- playlist_contents = try_get(yt_initial, lambda x: x['contents']['twoColumnWatchNextResults']['playlist']['playlist']['contents'], list)
- if playlist_contents:
- for item in playlist_contents:
- videoId = try_get(item, lambda x: x['playlistPanelVideoRenderer']['videoId'], compat_str)
- if videoId:
- ids.append(videoId)
- return ids
-
- def _extract_mix(self, playlist_id):
- # The mixes are generated from a single video
- # the id of the playlist is just 'RD' + video_id
- ids = []
- yt_initial = None
- last_id = playlist_id[-11:]
- for n in itertools.count(1):
- url = 'https://www.youtube.com/watch?v=%s&list=%s' % (last_id, playlist_id)
- webpage = self._download_webpage(
- url, playlist_id, 'Downloading page {0} of Youtube mix'.format(n))
- new_ids = orderedSet(re.findall(
- r'''(?xs)data-video-username=".*?".*?
- href="/watch\?v=([0-9A-Za-z_-]{11})&[^"]*?list=%s''' % re.escape(playlist_id),
- webpage))
-
- # if no ids in html of page, try using embedded json
- if (len(new_ids) == 0):
- yt_initial = self._get_yt_initial_data(playlist_id, webpage)
- if yt_initial:
- new_ids = self._extract_mix_ids_from_yt_initial(yt_initial)
-
- # Fetch new pages until all the videos are repeated, it seems that
- # there are always 51 unique videos.
- new_ids = [_id for _id in new_ids if _id not in ids]
- if not new_ids:
- break
- ids.extend(new_ids)
- last_id = ids[-1]
+ @staticmethod
+ def _extract_grid_item_renderer(item):
+ for item_kind in ('Playlist', 'Video', 'Channel'):
+ renderer = item.get('grid%sRenderer' % item_kind)
+ if renderer:
+ return renderer
+
+ def _extract_video(self, renderer):
+ video_id = renderer.get('videoId')
+ title = try_get(
+ renderer,
+ (lambda x: x['title']['runs'][0]['text'],
+ lambda x: x['title']['simpleText']), compat_str)
+ description = try_get(
+ renderer, lambda x: x['descriptionSnippet']['runs'][0]['text'],
+ compat_str)
+ duration = parse_duration(try_get(
+ renderer, lambda x: x['lengthText']['simpleText'], compat_str))
+ view_count_text = try_get(
+ renderer, lambda x: x['viewCountText']['simpleText'], compat_str) or ''
+ view_count = str_to_int(self._search_regex(
+ r'^([\d,]+)', re.sub(r'\s', '', view_count_text),
+ 'view count', default=None))
+ uploader = try_get(
+ renderer, lambda x: x['ownerText']['runs'][0]['text'], compat_str)
+ return {
+ '_type': 'url_transparent',
+ 'ie_key': YoutubeIE.ie_key(),
+ 'id': video_id,
+ 'url': video_id,
+ 'title': title,
+ 'description': description,
+ 'duration': duration,
+ 'view_count': view_count,
+ 'uploader': uploader,
+ }
- url_results = self._ids_to_results(ids)
+ def _grid_entries(self, grid_renderer):
+ for item in grid_renderer['items']:
+ if not isinstance(item, dict):
+ continue
+ renderer = self._extract_grid_item_renderer(item)
+ if not isinstance(renderer, dict):
+ continue
+ title = try_get(
+ renderer, lambda x: x['title']['runs'][0]['text'], compat_str)
+ # playlist
+ playlist_id = renderer.get('playlistId')
+ if playlist_id:
+ yield self.url_result(
+ 'https://www.youtube.com/playlist?list=%s' % playlist_id,
+ ie=YoutubeTabIE.ie_key(), video_id=playlist_id,
+ video_title=title)
+ # video
+ video_id = renderer.get('videoId')
+ if video_id:
+ yield self._extract_video(renderer)
+ # channel
+ channel_id = renderer.get('channelId')
+ if channel_id:
+ title = try_get(
+ renderer, lambda x: x['title']['simpleText'], compat_str)
+ yield self.url_result(
+ 'https://www.youtube.com/channel/%s' % channel_id,
+ ie=YoutubeTabIE.ie_key(), video_title=title)
+
+ def _shelf_entries_trimmed(self, shelf_renderer):
+ renderer = try_get(
+ shelf_renderer, lambda x: x['content']['horizontalListRenderer'], dict)
+ if not renderer:
+ return
+ # TODO: add support for nested playlists so each shelf is processed
+ # as separate playlist
+ # TODO: this includes only first N items
+ for entry in self._grid_entries(renderer):
+ yield entry
+
+ def _shelf_entries(self, shelf_renderer):
+ ep = try_get(
+ shelf_renderer, lambda x: x['endpoint']['commandMetadata']['webCommandMetadata']['url'],
+ compat_str)
+ shelf_url = urljoin('https://www.youtube.com', ep)
+ if not shelf_url:
+ return
+ title = try_get(
+ shelf_renderer, lambda x: x['title']['runs'][0]['text'], compat_str)
+ yield self.url_result(shelf_url, video_title=title)
- search_title = lambda class_name: get_element_by_attribute('class', class_name, webpage)
- title_span = (
- search_title('playlist-title')
- or search_title('title long-title')
- or search_title('title'))
- title = clean_html(title_span)
+ def _playlist_entries(self, video_list_renderer):
+ for content in video_list_renderer['contents']:
+ if not isinstance(content, dict):
+ continue
+ renderer = content.get('playlistVideoRenderer') or content.get('playlistPanelVideoRenderer')
+ if not isinstance(renderer, dict):
+ continue
+ video_id = renderer.get('videoId')
+ if not video_id:
+ continue
+ yield self._extract_video(renderer)
- if not title:
- title = try_get(yt_initial, lambda x: x['contents']['twoColumnWatchNextResults']['playlist']['playlist']['title'], compat_str)
+ def _itemSection_entries(self, item_sect_renderer):
+ for content in item_sect_renderer['contents']:
+ if not isinstance(content, dict):
+ continue
+ renderer = content.get('videoRenderer', {})
+ if not isinstance(renderer, dict):
+ continue
+ video_id = renderer.get('videoId')
+ if not video_id:
+ continue
+ yield self._extract_video(renderer)
- return self.playlist_result(url_results, playlist_id, title)
+ def _rich_entries(self, rich_grid_renderer):
+ renderer = try_get(
+ rich_grid_renderer, lambda x: x['content']['videoRenderer'], dict)
+ video_id = renderer.get('videoId')
+ if not video_id:
+ return
+ yield self._extract_video(renderer)
- def _extract_playlist(self, playlist_id):
- url = self._TEMPLATE_URL % playlist_id
- page = self._download_webpage(url, playlist_id)
+ def _video_entry(self, video_renderer):
+ video_id = video_renderer.get('videoId')
+ if video_id:
+ return self._extract_video(video_renderer)
- # the yt-alert-message now has tabindex attribute (see https://github.com/ytdl-org/youtube-dl/issues/11604)
- for match in re.findall(r'<div class="yt-alert-message"[^>]*>([^<]+)</div>', page):
- match = match.strip()
- # Check if the playlist exists or is private
- mobj = re.match(r'[^<]*(?:The|This) playlist (?P<reason>does not exist|is private)[^<]*', match)
- if mobj:
- reason = mobj.group('reason')
- message = 'This playlist %s' % reason
- if 'private' in reason:
- message += ', use --username or --netrc to access it'
- message += '.'
- raise ExtractorError(message, expected=True)
- elif re.match(r'[^<]*Invalid parameters[^<]*', match):
- raise ExtractorError(
- 'Invalid parameters. Maybe URL is incorrect.',
- expected=True)
- elif re.match(r'[^<]*Choose your language[^<]*', match):
+ def _post_thread_entries(self, post_thread_renderer):
+ post_renderer = try_get(
+ post_thread_renderer, lambda x: x['post']['backstagePostRenderer'], dict)
+ if not post_renderer:
+ return
+ # video attachment
+ video_renderer = try_get(
+ post_renderer, lambda x: x['backstageAttachment']['videoRenderer'], dict)
+ video_id = None
+ if video_renderer:
+ entry = self._video_entry(video_renderer)
+ if entry:
+ yield entry
+ # inline video links
+ runs = try_get(post_renderer, lambda x: x['contentText']['runs'], list) or []
+ for run in runs:
+ if not isinstance(run, dict):
continue
- else:
- self.report_warning('Youtube gives an alert message: ' + match)
-
- playlist_title = self._html_search_regex(
- r'(?s)<h1 class="pl-header-title[^"]*"[^>]*>\s*(.*?)\s*</h1>',
- page, 'title', default=None)
+ ep_url = try_get(
+ run, lambda x: x['navigationEndpoint']['urlEndpoint']['url'], compat_str)
+ if not ep_url:
+ continue
+ if not YoutubeIE.suitable(ep_url):
+ continue
+ ep_video_id = YoutubeIE._match_id(ep_url)
+ if video_id == ep_video_id:
+ continue
+ yield self.url_result(ep_url, ie=YoutubeIE.ie_key(), video_id=video_id)
- _UPLOADER_BASE = r'class=["\']pl-header-details[^>]+>\s*<li>\s*<a[^>]+\bhref='
- uploader = self._html_search_regex(
- r'%s["\']/(?:user|channel)/[^>]+>([^<]+)' % _UPLOADER_BASE,
- page, 'uploader', default=None)
- mobj = re.search(
- r'%s(["\'])(?P<path>/(?:user|channel)/(?P<uploader_id>.+?))\1' % _UPLOADER_BASE,
- page)
- if mobj:
- uploader_id = mobj.group('uploader_id')
- uploader_url = compat_urlparse.urljoin(url, mobj.group('path'))
- else:
- uploader_id = uploader_url = None
+ def _post_thread_continuation_entries(self, post_thread_continuation):
+ contents = post_thread_continuation.get('contents')
+ if not isinstance(contents, list):
+ return
+ for content in contents:
+ renderer = content.get('backstagePostThreadRenderer')
+ if not isinstance(renderer, dict):
+ continue
+ for entry in self._post_thread_entries(renderer):
+ yield entry
- has_videos = True
+ @staticmethod
+ def _extract_next_continuation_data(renderer):
+ next_continuation = try_get(
+ renderer, lambda x: x['continuations'][0]['nextContinuationData'], dict)
+ if not next_continuation:
+ return
+ continuation = next_continuation.get('continuation')
+ if not continuation:
+ return
+ ctp = next_continuation.get('clickTrackingParams')
+ return {
+ 'ctoken': continuation,
+ 'continuation': continuation,
+ 'itct': ctp,
+ }
- if not playlist_title:
- try:
- # Some playlist URLs don't actually serve a playlist (e.g.
- # https://www.youtube.com/watch?v=FqZTN594JQw&list=PLMYEtVRpaqY00V9W81Cwmzp6N6vZqfUKD4)
- next(self._entries(page, playlist_id))
- except StopIteration:
- has_videos = False
+ @classmethod
+ def _extract_continuation(cls, renderer):
+ next_continuation = cls._extract_next_continuation_data(renderer)
+ if next_continuation:
+ return next_continuation
+ contents = renderer.get('contents')
+ if not isinstance(contents, list):
+ return
+ for content in contents:
+ if not isinstance(content, dict):
+ continue
+ continuation_ep = try_get(
+ content, lambda x: x['continuationItemRenderer']['continuationEndpoint'],
+ dict)
+ if not continuation_ep:
+ continue
+ continuation = try_get(
+ continuation_ep, lambda x: x['continuationCommand']['token'], compat_str)
+ if not continuation:
+ continue
+ ctp = continuation_ep.get('clickTrackingParams')
+ if not ctp:
+ continue
+ return {
+ 'ctoken': continuation,
+ 'continuation': continuation,
+ 'itct': ctp,
+ }
- playlist = self.playlist_result(
- self._entries(page, playlist_id), playlist_id, playlist_title)
- playlist.update({
- 'uploader': uploader,
- 'uploader_id': uploader_id,
- 'uploader_url': uploader_url,
- })
- if playlist_id.startswith(self._YTM_PLAYLIST_PREFIX):
- playlist.update(self._YTM_CHANNEL_INFO)
+ def _entries(self, tab, identity_token):
- return has_videos, playlist
+ def extract_entries(parent_renderer):
+ slr_contents = try_get(parent_renderer, lambda x: x['contents'], list) or []
+ for slr_content in slr_contents:
+ if not isinstance(slr_content, dict):
+ continue
+ is_renderer = try_get(slr_content, lambda x: x['itemSectionRenderer'], dict)
+ if not is_renderer:
+ renderer = slr_content.get('richItemRenderer')
+ if renderer:
+ for entry in self._rich_entries(renderer):
+ yield entry
+ continuation_list[0] = self._extract_continuation(parent_renderer)
+ continue
+ isr_contents = try_get(is_renderer, lambda x: x['contents'], list) or []
+ for isr_content in isr_contents:
+ if not isinstance(isr_content, dict):
+ continue
+ renderer = isr_content.get('playlistVideoListRenderer')
+ if renderer:
+ for entry in self._playlist_entries(renderer):
+ yield entry
+ continuation_list[0] = self._extract_continuation(renderer)
+ continue
+ renderer = isr_content.get('gridRenderer')
+ if renderer:
+ for entry in self._grid_entries(renderer):
+ yield entry
+ continuation_list[0] = self._extract_continuation(renderer)
+ continue
+ renderer = isr_content.get('shelfRenderer')
+ if renderer:
+ for entry in self._shelf_entries(renderer):
+ yield entry
+ continuation_list[0] = self._extract_continuation(parent_renderer)
+ continue
+ renderer = isr_content.get('backstagePostThreadRenderer')
+ if renderer:
+ for entry in self._post_thread_entries(renderer):
+ yield entry
+ continuation_list[0] = self._extract_continuation(renderer)
+ continue
+ renderer = isr_content.get('videoRenderer')
+ if renderer:
+ entry = self._video_entry(renderer)
+ if entry:
+ yield entry
+ if not continuation_list[0]:
+ continuation_list[0] = self._extract_continuation(is_renderer)
+ if not continuation_list[0]:
+ continuation_list[0] = self._extract_continuation(parent_renderer)
+
+ continuation_list = [None] # Python 2 doesnot support nonlocal
+ parent_renderer = (
+ try_get(tab, lambda x: x['sectionListRenderer'], dict)
+ or try_get(tab, lambda x: x['richGridRenderer'], dict) or {})
+ if parent_renderer:
+ for entry in extract_entries(parent_renderer):
+ yield entry
- def _check_download_just_video(self, url, playlist_id):
- # Check if it's a video-specific URL
- query_dict = compat_urlparse.parse_qs(compat_urlparse.urlparse(url).query)
- video_id = query_dict.get('v', [None])[0] or self._search_regex(
- r'(?:(?:^|//)youtu\.be/|youtube\.com/embed/(?!videoseries))([0-9A-Za-z_-]{11})', url,
- 'video id', default=None)
- if video_id:
- if self._downloader.params.get('noplaylist'):
- self.to_screen('Downloading just video %s because of --no-playlist' % video_id)
- return video_id, self.url_result(video_id, 'Youtube', video_id=video_id)
- else:
- self.to_screen('Downloading playlist %s - add --no-playlist to just download video %s' % (playlist_id, video_id))
- return video_id, None
- return None, None
+ continuation = continuation_list[0]
- def _real_extract(self, url):
- # Extract playlist id
- mobj = re.match(self._VALID_URL, url)
- if mobj is None:
- raise ExtractorError('Invalid URL: %s' % url)
- playlist_id = mobj.group(1) or mobj.group(2)
-
- video_id, video = self._check_download_just_video(url, playlist_id)
- if video:
- return video
-
- if playlist_id.startswith(('RD', 'UL', 'PU')):
- if not playlist_id.startswith(self._YTM_PLAYLIST_PREFIX):
- # Mixes require a custom extraction process,
- # Youtube Music playlists act like normal playlists (with randomized order)
- return self._extract_mix(playlist_id)
-
- has_videos, playlist = self._extract_playlist(playlist_id)
- if has_videos or not video_id:
- return playlist
-
- # Some playlist URLs don't actually serve a playlist (see
- # https://github.com/ytdl-org/youtube-dl/issues/10537).
- # Fallback to plain video extraction if there is a video id
- # along with playlist id.
- return self.url_result(video_id, 'Youtube', video_id=video_id)
-
-
-class YoutubeChannelIE(YoutubePlaylistBaseInfoExtractor):
- IE_DESC = 'YouTube.com channels'
- _VALID_URL = r'https?://(?:youtu\.be|(?:\w+\.)?youtube(?:-nocookie|kids)?\.com|(?:www\.)?invidio\.us)/channel/(?P<id>[0-9A-Za-z_-]+)'
- _TEMPLATE_URL = 'https://www.youtube.com/channel/%s/videos'
- _VIDEO_RE = r'(?:title="(?P<title>[^"]+)"[^>]+)?href="/watch\?v=(?P<id>[0-9A-Za-z_-]+)&?'
- IE_NAME = 'youtube:channel'
- _TESTS = [{
- 'note': 'paginated channel',
- 'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w',
- 'playlist_mincount': 91,
- 'info_dict': {
- 'id': 'UUKfVa3S1e4PHvxWcwyMMg8w',
- 'title': 'Uploads from lex will',
- 'uploader': 'lex will',
- 'uploader_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
+ headers = {
+ 'x-youtube-client-name': '1',
+ 'x-youtube-client-version': '2.20201112.04.01',
}
- }, {
- 'note': 'Age restricted channel',
- # from https://www.youtube.com/user/DeusExOfficial
- 'url': 'https://www.youtube.com/channel/UCs0ifCMCm1icqRbqhUINa0w',
- 'playlist_mincount': 64,
- 'info_dict': {
- 'id': 'UUs0ifCMCm1icqRbqhUINa0w',
- 'title': 'Uploads from Deus Ex',
- 'uploader': 'Deus Ex',
- 'uploader_id': 'DeusExOfficial',
- },
- }, {
- 'url': 'https://invidio.us/channel/UC23qupoDRn9YOAVzeoxjOQA',
- 'only_matching': True,
- }, {
- 'url': 'https://www.youtubekids.com/channel/UCyu8StPfZWapR6rfW_JgqcA',
- 'only_matching': True,
- }]
+ if identity_token:
+ headers['x-youtube-identity-token'] = identity_token
- @classmethod
- def suitable(cls, url):
- return (False if YoutubePlaylistsIE.suitable(url) or YoutubeLiveIE.suitable(url)
- else super(YoutubeChannelIE, cls).suitable(url))
+ for page_num in itertools.count(1):
+ if not continuation:
+ break
+ if hasattr(self, '_MAX_PAGES') and page_num > self._MAX_PAGES:
+ break
+ browse = self._download_json(
+ 'https://www.youtube.com/browse_ajax', None,
+ 'Downloading page %d' % page_num,
+ headers=headers, query=continuation, fatal=False)
+ if not browse:
+ break
+ response = try_get(browse, lambda x: x[1]['response'], dict)
+ if not response:
+ break
- def _build_template_url(self, url, channel_id):
- return self._TEMPLATE_URL % channel_id
+ continuation_contents = try_get(
+ response, lambda x: x['continuationContents'], dict)
+ if continuation_contents:
+ continuation_renderer = continuation_contents.get('playlistVideoListContinuation')
+ if continuation_renderer:
+ for entry in self._playlist_entries(continuation_renderer):
+ yield entry
+ continuation = self._extract_continuation(continuation_renderer)
+ continue
+ continuation_renderer = continuation_contents.get('gridContinuation')
+ if continuation_renderer:
+ for entry in self._grid_entries(continuation_renderer):
+ yield entry
+ continuation = self._extract_continuation(continuation_renderer)
+ continue
+ continuation_renderer = continuation_contents.get('itemSectionContinuation')
+ if continuation_renderer:
+ for entry in self._post_thread_continuation_entries(continuation_renderer):
+ yield entry
+ continuation = self._extract_continuation(continuation_renderer)
+ continue
+ continuation_renderer = continuation_contents.get('sectionListContinuation')
+ if continuation_renderer:
+ continuation_list = [None]
+ for entry in extract_entries(continuation_renderer):
+ yield entry
+ continuation = continuation_list[0]
+ continue
- def _real_extract(self, url):
- channel_id = self._match_id(url)
-
- url = self._build_template_url(url, channel_id)
-
- # Channel by page listing is restricted to 35 pages of 30 items, i.e. 1050 videos total (see #5778)
- # Workaround by extracting as a playlist if managed to obtain channel playlist URL
- # otherwise fallback on channel by page extraction
- channel_page = self._download_webpage(
- url + '?view=57', channel_id,
- 'Downloading channel page', fatal=False)
- if channel_page is False:
- channel_playlist_id = False
+ continuation_items = try_get(
+ response, lambda x: x['onResponseReceivedActions'][0]['appendContinuationItemsAction']['continuationItems'], list)
+ if continuation_items:
+ continuation_item = continuation_items[0]
+ if not isinstance(continuation_item, dict):
+ continue
+ renderer = continuation_item.get('playlistVideoRenderer')
+ if renderer:
+ video_list_renderer = {'contents': continuation_items}
+ for entry in self._playlist_entries(video_list_renderer):
+ yield entry
+ continuation = self._extract_continuation(video_list_renderer)
+ continue
+ renderer = continuation_item.get('itemSectionRenderer')
+ if renderer:
+ for entry in self._itemSection_entries(renderer):
+ yield entry
+ continuation = self._extract_continuation({'contents': continuation_items})
+ continue
+ break
+
+ @staticmethod
+ def _extract_selected_tab(tabs):
+ for tab in tabs:
+ if try_get(tab, lambda x: x['tabRenderer']['selected'], bool):
+ return tab['tabRenderer']
else:
- channel_playlist_id = self._html_search_meta(
- 'channelId', channel_page, 'channel id', default=None)
- if not channel_playlist_id:
- channel_url = self._html_search_meta(
- ('al:ios:url', 'twitter:app:url:iphone', 'twitter:app:url:ipad'),
- channel_page, 'channel url', default=None)
- if channel_url:
- channel_playlist_id = self._search_regex(
- r'vnd\.youtube://user/([0-9A-Za-z_-]+)',
- channel_url, 'channel id', default=None)
- if channel_playlist_id and channel_playlist_id.startswith('UC'):
- playlist_id = 'UU' + channel_playlist_id[2:]
- return self.url_result(
- compat_urlparse.urljoin(url, '/playlist?list=%s' % playlist_id), 'YoutubePlaylist')
-
- channel_page = self._download_webpage(url, channel_id, 'Downloading page #1')
- autogenerated = re.search(r'''(?x)
- class="[^"]*?(?:
- channel-header-autogenerated-label|
- yt-channel-title-autogenerated
- )[^"]*"''', channel_page) is not None
-
- if autogenerated:
- # The videos are contained in a single page
- # the ajax pages can't be used, they are empty
- entries = [
- self.url_result(
- video_id, 'Youtube', video_id=video_id,
- video_title=video_title)
- for video_id, video_title in self.extract_videos_from_page(channel_page)]
- return self.playlist_result(entries, channel_id)
+ raise ExtractorError('Unable to find selected tab')
- try:
- next(self._entries(channel_page, channel_id))
- except StopIteration:
- alert_message = self._html_search_regex(
- r'(?s)<div[^>]+class=(["\']).*?\byt-alert-message\b.*?\1[^>]*>(?P<alert>[^<]+)</div>',
- channel_page, 'alert', default=None, group='alert')
- if alert_message:
- raise ExtractorError('Youtube said: %s' % alert_message, expected=True)
+ @staticmethod
+ def _extract_uploader(data):
+ uploader = {}
+ sidebar_renderer = try_get(
+ data, lambda x: x['sidebar']['playlistSidebarRenderer']['items'], list)
+ if sidebar_renderer:
+ for item in sidebar_renderer:
+ if not isinstance(item, dict):
+ continue
+ renderer = item.get('playlistSidebarSecondaryInfoRenderer')
+ if not isinstance(renderer, dict):
+ continue
+ owner = try_get(
+ renderer, lambda x: x['videoOwner']['videoOwnerRenderer']['title']['runs'][0], dict)
+ if owner:
+ uploader['uploader'] = owner.get('text')
+ uploader['uploader_id'] = try_get(
+ owner, lambda x: x['navigationEndpoint']['browseEndpoint']['browseId'], compat_str)
+ uploader['uploader_url'] = urljoin(
+ 'https://www.youtube.com/',
+ try_get(owner, lambda x: x['navigationEndpoint']['browseEndpoint']['canonicalBaseUrl'], compat_str))
+ return uploader
+
+ def _extract_from_tabs(self, item_id, webpage, data, tabs, identity_token):
+ selected_tab = self._extract_selected_tab(tabs)
+ renderer = try_get(
+ data, lambda x: x['metadata']['channelMetadataRenderer'], dict)
+ playlist_id = None
+ if renderer:
+ channel_title = renderer.get('title') or item_id
+ tab_title = selected_tab.get('title')
+ title = channel_title or item_id
+ if tab_title:
+ title += ' - %s' % tab_title
+ description = renderer.get('description')
+ playlist_id = renderer.get('externalId')
+ renderer = try_get(
+ data, lambda x: x['metadata']['playlistMetadataRenderer'], dict)
+ if renderer:
+ title = renderer.get('title')
+ description = None
+ playlist_id = item_id
+ if playlist_id is None:
+ return None
+ playlist = self.playlist_result(
+ self._entries(selected_tab['content'], identity_token),
+ playlist_id=playlist_id, playlist_title=title,
+ playlist_description=description)
+ playlist.update(self._extract_uploader(data))
+ return playlist
- return self.playlist_result(self._entries(channel_page, channel_id), channel_id)
+ def _extract_from_playlist(self, item_id, data, playlist):
+ title = playlist.get('title') or try_get(
+ data, lambda x: x['titleText']['simpleText'], compat_str)
+ playlist_id = playlist.get('playlistId') or item_id
+ return self.playlist_result(
+ self._playlist_entries(playlist), playlist_id=playlist_id,
+ playlist_title=title)
+ def _real_extract(self, url):
+ item_id = self._match_id(url)
+ url = compat_urlparse.urlunparse(
+ compat_urlparse.urlparse(url)._replace(netloc='www.youtube.com'))
+ # Handle both video/playlist URLs
+ qs = compat_urlparse.parse_qs(compat_urlparse.urlparse(url).query)
+ video_id = qs.get('v', [None])[0]
+ playlist_id = qs.get('list', [None])[0]
+ if video_id and playlist_id:
+ if self._downloader.params.get('noplaylist'):
+ self.to_screen('Downloading just video %s because of --no-playlist' % video_id)
+ return self.url_result(video_id, ie=YoutubeIE.ie_key(), video_id=video_id)
+ self.to_screen('Downloading playlist %s - add --no-playlist to just download video %s' % (playlist_id, video_id))
+ webpage = self._download_webpage(url, item_id)
+ identity_token = self._search_regex(
+ r'\bID_TOKEN["\']\s*:\s*["\'](.+?)["\']', webpage,
+ 'identity token', default=None)
+ data = self._extract_yt_initial_data(item_id, webpage)
+ tabs = try_get(
+ data, lambda x: x['contents']['twoColumnBrowseResultsRenderer']['tabs'], list)
+ if tabs:
+ return self._extract_from_tabs(item_id, webpage, data, tabs, identity_token)
+ playlist = try_get(
+ data, lambda x: x['contents']['twoColumnWatchNextResults']['playlist']['playlist'], dict)
+ if playlist:
+ return self._extract_from_playlist(item_id, data, playlist)
+ # Fallback to video extraction if no playlist alike page is recognized
+ if video_id:
+ return self.url_result(video_id, ie=YoutubeIE.ie_key(), video_id=video_id)
+ # Failed to recognize
+ raise ExtractorError('Unable to recognize tab page')
-class YoutubeUserIE(YoutubeChannelIE):
- IE_DESC = 'YouTube.com user videos (URL or "ytuser" keyword)'
- _VALID_URL = r'(?:(?:https?://(?:\w+\.)?youtube\.com/(?:(?P<user>user|c)/)?(?!(?:attribution_link|watch|results|shared)(?:$|[^a-z_A-Z0-9%-])))|ytuser:)(?!feed/)(?P<id>[A-Za-z0-9_%-]+)'
- _TEMPLATE_URL = 'https://www.youtube.com/%s/%s/videos'
- IE_NAME = 'youtube:user'
+class YoutubePlaylistIE(InfoExtractor):
+ IE_DESC = 'YouTube.com playlists'
+ _VALID_URL = r'''(?x)(?:
+ (?:https?://)?
+ (?:\w+\.)?
+ (?:
+ (?:
+ youtube(?:kids)?\.com|
+ invidio\.us|
+ youtu\.be
+ )
+ /.*?\?.*?\blist=
+ )?
+ (?P<id>%(playlist_id)s)
+ )''' % {'playlist_id': YoutubeBaseInfoExtractor._PLAYLIST_ID_RE}
+ IE_NAME = 'youtube:playlist'
_TESTS = [{
- 'url': 'https://www.youtube.com/user/TheLinuxFoundation',
- 'playlist_mincount': 320,
+ 'note': 'issue #673',
+ 'url': 'PLBB231211A4F62143',
'info_dict': {
- 'id': 'UUfX55Sx5hEFjoC3cNs6mCUQ',
- 'title': 'Uploads from The Linux Foundation',
- 'uploader': 'The Linux Foundation',
- 'uploader_id': 'TheLinuxFoundation',
- }
+ 'title': '[OLD]Team Fortress 2 (Class-based LP)',
+ 'id': 'PLBB231211A4F62143',
+ 'uploader': 'Wickydoo',
+ 'uploader_id': 'UCKSpbfbl5kRQpTdL7kMc-1Q',
+ },
+ 'playlist_mincount': 29,
}, {
- # Only available via https://www.youtube.com/c/12minuteathlete/videos
- # but not https://www.youtube.com/user/12minuteathlete/videos
- 'url': 'https://www.youtube.com/c/12minuteathlete/videos',
- 'playlist_mincount': 249,
+ 'url': 'PLtPgu7CB4gbY9oDN3drwC3cMbJggS7dKl',
+ 'info_dict': {
+ 'title': 'YDL_safe_search',
+ 'id': 'PLtPgu7CB4gbY9oDN3drwC3cMbJggS7dKl',
+ },
+ 'playlist_count': 2,
+ 'skip': 'This playlist is private',
+ }, {
+ 'note': 'embedded',
+ 'url': 'https://www.youtube.com/embed/videoseries?list=PL6IaIsEjSbf96XFRuNccS_RuEXwNdsoEu',
+ 'playlist_count': 4,
'info_dict': {
- 'id': 'UUVjM-zV6_opMDx7WYxnjZiQ',
- 'title': 'Uploads from 12 Minute Athlete',
- 'uploader': '12 Minute Athlete',
- 'uploader_id': 'the12minuteathlete',
+ 'title': 'JODA15',
+ 'id': 'PL6IaIsEjSbf96XFRuNccS_RuEXwNdsoEu',
+ 'uploader': 'milan',
+ 'uploader_id': 'UCEI1-PVPcYXjB73Hfelbmaw',
}
}, {
- 'url': 'ytuser:phihag',
- 'only_matching': True,
+ 'url': 'http://www.youtube.com/embed/_xDOZElKyNU?list=PLsyOSbh5bs16vubvKePAQ1x3PhKavfBIl',
+ 'playlist_mincount': 982,
+ 'info_dict': {
+ 'title': '2018 Chinese New Singles (11/6 updated)',
+ 'id': 'PLsyOSbh5bs16vubvKePAQ1x3PhKavfBIl',
+ 'uploader': 'LBK',
+ 'uploader_id': 'UC21nz3_MesPLqtDqwdvnoxA',
+ }
}, {
- 'url': 'https://www.youtube.com/c/gametrailers',
- 'only_matching': True,
+ 'url': 'https://youtu.be/yeWKywCrFtk?list=PL2qgrgXsNUG5ig9cat4ohreBjYLAPC0J5',
+ 'info_dict': {
+ 'id': 'yeWKywCrFtk',
+ 'ext': 'mp4',
+ 'title': 'Small Scale Baler and Braiding Rugs',
+ 'uploader': 'Backus-Page House Museum',
+ 'uploader_id': 'backuspagemuseum',
+ 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/backuspagemuseum',
+ 'upload_date': '20161008',
+ 'description': 'md5:800c0c78d5eb128500bffd4f0b4f2e8a',
+ 'categories': ['Nonprofits & Activism'],
+ 'tags': list,
+ 'like_count': int,
+ 'dislike_count': int,
+ },
+ 'params': {
+ 'noplaylist': True,
+ 'skip_download': True,
+ },
}, {
- 'url': 'https://www.youtube.com/c/Pawe%C5%82Zadro%C5%BCniak',
+ 'url': 'https://youtu.be/uWyaPkt-VOI?list=PL9D9FC436B881BA21',
'only_matching': True,
}, {
- 'url': 'https://www.youtube.com/gametrailers',
+ 'url': 'TLGGrESM50VT6acwMjAyMjAxNw',
'only_matching': True,
}, {
- # This channel is not available, geo restricted to JP
- 'url': 'https://www.youtube.com/user/kananishinoSMEJ/videos',
+ # music album playlist
+ 'url': 'OLAK5uy_m4xAFdmMC5rX3Ji3g93pQe3hqLZw_9LhM',
'only_matching': True,
}]
@classmethod
def suitable(cls, url):
- # Don't return True if the url can be extracted with other youtube
- # extractor, the regex would is too permissive and it would match.
- other_yt_ies = iter(klass for (name, klass) in globals().items() if name.startswith('Youtube') and name.endswith('IE') and klass is not cls)
- if any(ie.suitable(url) for ie in other_yt_ies):
- return False
- else:
- return super(YoutubeUserIE, cls).suitable(url)
+ return False if YoutubeTabIE.suitable(url) else super(
+ YoutubePlaylistIE, cls).suitable(url)
- def _build_template_url(self, url, channel_id):
- mobj = re.match(self._VALID_URL, url)
- return self._TEMPLATE_URL % (mobj.group('user') or 'user', mobj.group('id'))
+ def _real_extract(self, url):
+ playlist_id = self._match_id(url)
+ qs = compat_urlparse.parse_qs(compat_urlparse.urlparse(url).query)
+ if not qs:
+ qs = {'list': playlist_id}
+ return self.url_result(
+ update_url_query('https://www.youtube.com/playlist', qs),
+ ie=YoutubeTabIE.ie_key(), video_id=playlist_id)
+
+
+class YoutubeYtUserIE(InfoExtractor):
+ _VALID_URL = r'ytuser:(?P<id>.+)'
+ _TESTS = [{
+ 'url': 'ytuser:phihag',
+ 'only_matching': True,
+ }]
+
+ def _real_extract(self, url):
+ user_id = self._match_id(url)
+ return self.url_result(
+ 'https://www.youtube.com/user/%s' % user_id,
+ ie=YoutubeTabIE.ie_key(), video_id=user_id)
class YoutubeLiveIE(YoutubeBaseInfoExtractor):
IE_DESC = 'YouTube.com live streams'
- _VALID_URL = r'(?P<base_url>https?://(?:\w+\.)?youtube\.com/(?:(?:user|channel|c)/)?(?P<id>[^/]+))/live'
+ _VALID_URL = r'(?P<base_url>%s)/live' % YoutubeTabIE._VALID_URL
IE_NAME = 'youtube:live'
_TESTS = [{
return self.url_result(base_url)
-class YoutubePlaylistsIE(YoutubePlaylistsBaseInfoExtractor):
- IE_DESC = 'YouTube.com user/channel playlists'
- _VALID_URL = r'https?://(?:\w+\.)?youtube\.com/(?:user|channel|c)/(?P<id>[^/]+)/playlists'
- IE_NAME = 'youtube:playlists'
-
- _TESTS = [{
- 'url': 'https://www.youtube.com/user/ThirstForScience/playlists',
- 'playlist_mincount': 4,
- 'info_dict': {
- 'id': 'ThirstForScience',
- 'title': 'ThirstForScience',
- },
- }, {
- # with "Load more" button
- 'url': 'https://www.youtube.com/user/igorkle1/playlists?view=1&sort=dd',
- 'playlist_mincount': 70,
- 'info_dict': {
- 'id': 'igorkle1',
- 'title': 'Игорь Клейнер',
- },
- }, {
- 'url': 'https://www.youtube.com/channel/UCiU1dHvZObB2iP6xkJ__Icw/playlists',
- 'playlist_mincount': 17,
- 'info_dict': {
- 'id': 'UCiU1dHvZObB2iP6xkJ__Icw',
- 'title': 'Chem Player',
- },
- 'skip': 'Blocked',
- }, {
- 'url': 'https://www.youtube.com/c/ChristophLaimer/playlists',
- 'only_matching': True,
- }]
-
-
-class YoutubeSearchIE(SearchInfoExtractor, YoutubePlaylistBaseInfoExtractor):
+class YoutubeSearchIE(SearchInfoExtractor, YoutubeBaseInfoExtractor):
IE_DESC = 'YouTube.com searches'
# there doesn't appear to be a real limit, for example if you search for
# 'python' you get more than 8.000.000 results
_SEARCH_PARAMS = 'CAI%3D'
-class YoutubeSearchURLIE(YoutubePlaylistBaseInfoExtractor):
+class YoutubeSearchURLIE(InfoExtractor):
IE_DESC = 'YouTube.com search URLs'
IE_NAME = 'youtube:search_url'
- _VALID_URL = r'https?://(?:www\.)?youtube\.com/results\?(.*?&)?(?:search_query|q)=(?P<query>[^&]+)(?:[&]|$)'
+ _PARAM_REGEX = r''
+ _VALID_URL = r'https?://(?:www\.)?youtube\.com/results/?(?:\?|\?[^#]*?&)(?:sp=(?P<param1>[^&#]+)&(?:[^#]*&)?)?(?:q|search_query)=(?P<query>[^#&]+)(?:[^#]*?&sp=(?P<param2>[^#&]+))?'
+ _MAX_RESULTS = 100
_TESTS = [{
'url': 'https://www.youtube.com/results?baz=bar&search_query=youtube-dl+test+video&filters=video&lclk=video',
'playlist_mincount': 5,
'only_matching': True,
}]
- def _process_json_dict(self, obj, videos, c):
- if "videoId" in obj:
- videos.append(obj)
- return
-
- if "nextContinuationData" in obj:
- c["continuation"] = obj["nextContinuationData"]
- return
-
def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url)
query = compat_urllib_parse_unquote_plus(mobj.group('query'))
- webpage = self._download_webpage(url, query)
- return self.playlist_result(self._entries(webpage, query, max_pages=5), playlist_title=query)
-
-
-class YoutubeShowIE(YoutubePlaylistsBaseInfoExtractor):
- IE_DESC = 'YouTube.com (multi-season) shows'
- _VALID_URL = r'https?://(?:www\.)?youtube\.com/show/(?P<id>[^?#]*)'
- IE_NAME = 'youtube:show'
- _TESTS = [{
- 'url': 'https://www.youtube.com/show/airdisasters',
- 'playlist_mincount': 5,
- 'info_dict': {
- 'id': 'airdisasters',
- 'title': 'Air Disasters',
- }
- }]
-
- def _real_extract(self, url):
- playlist_id = self._match_id(url)
- return super(YoutubeShowIE, self)._real_extract(
- 'https://www.youtube.com/show/%s/playlists' % playlist_id)
+ IE = YoutubeSearchIE(self._downloader)
+ IE._SEARCH_PARAMS = mobj.group('param1') or mobj.group('param2')
+ self._downloader.to_screen(IE._SEARCH_PARAMS)
+ IE._MAX_RESULTS = self._MAX_RESULTS
+ return IE._get_n_results(query, self._MAX_RESULTS)
-class YoutubeFeedsInfoExtractor(YoutubePlaylistBaseInfoExtractor):
+class YoutubeFeedsInfoExtractor(YoutubeTabIE):
"""
Base class for feed extractors
Subclasses must define the _FEED_NAME and _PLAYLIST_TITLE properties.
"""
_LOGIN_REQUIRED = True
+ _TESTS = []
+
+ # _MAX_PAGES = 5
@property
def IE_NAME(self):
def _real_initialize(self):
self._login()
- def _process_entries(self, entries, seen):
- new_info = []
- for v in entries:
- v_id = try_get(v, lambda x: x['videoId'])
- if not v_id:
- continue
-
- have_video = False
- for old in seen:
- if old['videoId'] == v_id:
- have_video = True
- break
-
- if not have_video:
- new_info.append(v)
-
- if not new_info:
+ def _shelf_entries(self, shelf_renderer):
+ renderer = try_get(shelf_renderer, lambda x: x['content']['gridRenderer'], dict)
+ if not renderer:
return
+ for entry in self._grid_entries(renderer):
+ yield entry
- seen.extend(new_info)
- for video in new_info:
- yield self.url_result(try_get(video, lambda x: x['videoId']), YoutubeIE.ie_key(), video_title=self._extract_title(video))
+ def _extract_from_tabs(self, item_id, webpage, data, tabs, identity_token):
+ selected_tab = self._extract_selected_tab(tabs)
+ return self.playlist_result(
+ self._entries(selected_tab['content'], identity_token),
+ playlist_title=self._PLAYLIST_TITLE)
def _real_extract(self, url):
- page = self._download_webpage(
- 'https://www.youtube.com/feed/%s' % self._FEED_NAME,
- self._PLAYLIST_TITLE)
- return self.playlist_result(self._entries(page, self._PLAYLIST_TITLE),
- playlist_title=self._PLAYLIST_TITLE)
-
-
-class YoutubeWatchLaterIE(YoutubePlaylistIE):
- IE_NAME = 'youtube:watchlater'
- IE_DESC = 'Youtube watch later list, ":ytwatchlater" for short (requires authentication)'
- _VALID_URL = r'https?://(?:www\.)?youtube\.com/(?:feed/watch_later|(?:playlist|watch)\?(?:.+&)?list=WL)|:ytwatchlater'
+ item_id = self._FEED_NAME
+ url = 'https://www.youtube.com/feed/%s' % self._FEED_NAME
+ webpage = self._download_webpage(url, item_id)
+ identity_token = self._search_regex(
+ r'\bID_TOKEN["\']\s*:\s*["\'](.+?)["\']', webpage,
+ 'identity token', default=None)
+ data = self._extract_yt_initial_data(item_id, webpage)
+ tabs = try_get(
+ data, lambda x: x['contents']['twoColumnBrowseResultsRenderer']['tabs'], list)
+ if tabs:
+ return self._extract_from_tabs(item_id, webpage, data, tabs, identity_token)
+ # Failed to recognize
+ raise ExtractorError('Unable to recognize feed page')
+
+
+class YoutubeWatchLaterIE(YoutubeFeedsInfoExtractor):
+ IE_DESC = 'Youtube watch later list, ":ytwatchlater" or "WL" for short (requires authentication)'
+ _VALID_URL = r'https?://(?:www\.)?youtube\.com/feed/watch_later|:ytwatchlater'
+ _FEED_NAME = 'watchlater'
_TESTS = [{
- 'url': 'https://www.youtube.com/playlist?list=WL',
+ 'url': 'https://www.youtube.com/feed/watch_later',
'only_matching': True,
}, {
- 'url': 'https://www.youtube.com/watch?v=bCNU9TrbiRk&index=1&list=WL',
+ 'url': ':ytwatchlater',
'only_matching': True,
}]
def _real_extract(self, url):
- _, video = self._check_download_just_video(url, 'WL')
- if video:
- return video
- _, playlist = self._extract_playlist('WL')
- return playlist
+ return self.url_result('WL', ie=YoutubePlaylistIE.ie_key())
-class YoutubeFavouritesIE(YoutubeBaseInfoExtractor):
- IE_NAME = 'youtube:favorites'
- IE_DESC = 'YouTube.com favourite videos, ":ytfav" for short (requires authentication)'
- _VALID_URL = r'https?://(?:www\.)?youtube\.com/my_favorites|:ytfav(?:ou?rites)?'
- _LOGIN_REQUIRED = True
+class YoutubeFavouritesIE(YoutubeFeedsInfoExtractor):
+ IE_DESC = 'YouTube.com liked videos, ":ytfav" or "LL" for short (requires authentication)'
+ _VALID_URL = r':ytfav(?:ou?rite)s?'
+ _FEED_NAME = 'favourites'
+
+ _TESTS = [{
+ 'url': ':ytfav',
+ 'only_matching': True,
+ }]
def _real_extract(self, url):
- webpage = self._download_webpage('https://www.youtube.com/my_favorites', 'Youtube Favourites videos')
- playlist_id = self._search_regex(r'list=(.+?)["&]', webpage, 'favourites playlist id')
- return self.url_result(playlist_id, 'YoutubePlaylist')
+ return self.url_result('LL', ie=YoutubePlaylistIE.ie_key())
class YoutubeRecommendedIE(YoutubeFeedsInfoExtractor):
IE_DESC = 'YouTube.com recommended videos, ":ytrec" for short (requires authentication)'
- _VALID_URL = r'https?://(?:www\.)?youtube\.com/feed/recommended|:ytrec(?:ommended)?'
+ _VALID_URL = r'https?://(?:www\.)?youtube\.com(?:/feed/recommended|/?[?#]|/?$)|:ytrec(?:ommended)?'
_FEED_NAME = 'recommended'
_PLAYLIST_TITLE = 'Youtube Recommended videos'
class YoutubeSubscriptionsIE(YoutubeFeedsInfoExtractor):
IE_DESC = 'YouTube.com subscriptions feed, "ytsubs" keyword (requires authentication)'
- _VALID_URL = r'https?://(?:www\.)?youtube\.com/feed/subscriptions|:ytsubs(?:criptions)?'
+ _VALID_URL = r'https?://(?:www\.)?youtube\.com/feed/subscriptions|:ytsub(?:scription)?s?'
_FEED_NAME = 'subscriptions'
_PLAYLIST_TITLE = 'Youtube Subscriptions'
raise ExtractorError(
'Incomplete YouTube ID %s. URL %s looks truncated.' % (video_id, url),
expected=True)
+
+
+# Do Youtube show urls even exist anymore? I couldn't find any
+r'''
+class YoutubeShowIE(YoutubeTabIE):
+ IE_DESC = 'YouTube.com (multi-season) shows'
+ _VALID_URL = r'https?://(?:www\.)?youtube\.com/show/(?P<id>[^?#]*)'
+ IE_NAME = 'youtube:show'
+ _TESTS = [{
+ 'url': 'https://www.youtube.com/show/airdisasters',
+ 'playlist_mincount': 5,
+ 'info_dict': {
+ 'id': 'airdisasters',
+ 'title': 'Air Disasters',
+ }
+ }]
+
+ def _real_extract(self, url):
+ playlist_id = self._match_id(url)
+ return super(YoutubeShowIE, self)._real_extract(
+ 'https://www.youtube.com/show/%s/playlists' % playlist_id)
+'''