import calendar
import collections
import copy
-import datetime
+import datetime as dt
import enum
import hashlib
import itertools
import os.path
import random
import re
+import shlex
import sys
import threading
import time
import traceback
-import urllib.error
import urllib.parse
from .common import InfoExtractor, SearchInfoExtractor
from .openload import PhantomJSwrapper
from ..compat import functools
from ..jsinterp import JSInterpreter
+from ..networking.exceptions import HTTPError, network_exceptions
from ..utils import (
NO_DEFAULT,
ExtractorError,
clean_html,
datetime_from_str,
dict_get,
+ filesize_from_tbr,
filter_dict,
float_or_none,
format_field,
join_nonempty,
js_to_json,
mimetype2ext,
- network_exceptions,
orderedSet,
parse_codecs,
parse_count,
str_to_int,
strftime_or_none,
traverse_obj,
+ try_call,
try_get,
unescapeHTML,
unified_strdate,
'INNERTUBE_CONTEXT': {
'client': {
'clientName': 'ANDROID',
- 'clientVersion': '17.31.35',
+ 'clientVersion': '19.09.37',
'androidSdkVersion': 30,
- 'userAgent': 'com.google.android.youtube/17.31.35 (Linux; U; Android 11) gzip'
+ 'userAgent': 'com.google.android.youtube/19.09.37 (Linux; U; Android 11) gzip'
}
},
'INNERTUBE_CONTEXT_CLIENT_NAME': 3,
'INNERTUBE_CONTEXT': {
'client': {
'clientName': 'ANDROID_EMBEDDED_PLAYER',
- 'clientVersion': '17.31.35',
+ 'clientVersion': '19.09.37',
'androidSdkVersion': 30,
- 'userAgent': 'com.google.android.youtube/17.31.35 (Linux; U; Android 11) gzip'
+ 'userAgent': 'com.google.android.youtube/19.09.37 (Linux; U; Android 11) gzip'
},
},
'INNERTUBE_CONTEXT_CLIENT_NAME': 55,
'INNERTUBE_CONTEXT': {
'client': {
'clientName': 'ANDROID_MUSIC',
- 'clientVersion': '5.16.51',
+ 'clientVersion': '6.42.52',
'androidSdkVersion': 30,
- 'userAgent': 'com.google.android.apps.youtube.music/5.16.51 (Linux; U; Android 11) gzip'
+ 'userAgent': 'com.google.android.apps.youtube.music/6.42.52 (Linux; U; Android 11) gzip'
}
},
'INNERTUBE_CONTEXT_CLIENT_NAME': 21,
'INNERTUBE_CONTEXT': {
'client': {
'clientName': 'IOS',
- 'clientVersion': '17.33.2',
+ 'clientVersion': '19.09.3',
'deviceModel': 'iPhone14,3',
- 'userAgent': 'com.google.ios.youtube/17.33.2 (iPhone14,3; U; CPU iOS 15_6 like Mac OS X)'
+ 'userAgent': 'com.google.ios.youtube/19.09.3 (iPhone14,3; U; CPU iOS 15_6 like Mac OS X)'
}
},
'INNERTUBE_CONTEXT_CLIENT_NAME': 5,
'INNERTUBE_CONTEXT': {
'client': {
'clientName': 'IOS_MESSAGES_EXTENSION',
- 'clientVersion': '17.33.2',
+ 'clientVersion': '19.09.3',
'deviceModel': 'iPhone14,3',
- 'userAgent': 'com.google.ios.youtube/17.33.2 (iPhone14,3; U; CPU iOS 15_6 like Mac OS X)'
+ 'userAgent': 'com.google.ios.youtube/19.09.3 (iPhone14,3; U; CPU iOS 15_6 like Mac OS X)'
},
},
'INNERTUBE_CONTEXT_CLIENT_NAME': 66,
'INNERTUBE_CONTEXT': {
'client': {
'clientName': 'IOS_MUSIC',
- 'clientVersion': '5.21',
+ 'clientVersion': '6.33.3',
'deviceModel': 'iPhone14,3',
- 'userAgent': 'com.google.ios.youtubemusic/5.21 (iPhone14,3; U; CPU iOS 15_6 like Mac OS X)'
+ 'userAgent': 'com.google.ios.youtubemusic/6.33.3 (iPhone14,3; U; CPU iOS 15_6 like Mac OS X)'
},
},
'INNERTUBE_CONTEXT_CLIENT_NAME': 26,
r'(?:www\.)?piped\.adminforge\.de',
r'(?:www\.)?watch\.whatevertinfoil\.de',
r'(?:www\.)?piped\.qdi\.fi',
- r'(?:www\.)?piped\.video',
+ r'(?:(?:www|cf)\.)?piped\.video',
r'(?:www\.)?piped\.aeong\.one',
r'(?:www\.)?piped\.moomoo\.me',
r'(?:www\.)?piped\.chauvet\.pro',
cookies = self._get_cookies('https://www.youtube.com/')
if cookies.get('__Secure-3PSID'):
return
- consent_id = None
- consent = cookies.get('CONSENT')
- if consent:
- if 'YES' in consent.value:
- return
- consent_id = self._search_regex(
- r'PENDING\+(\d+)', consent.value, 'consent', default=None)
- if not consent_id:
- consent_id = random.randint(100, 999)
- self._set_cookie('.youtube.com', 'CONSENT', 'YES+cb.20210328-17-p0.en+FX+%s' % consent_id)
+ socs = cookies.get('SOCS')
+ if socs and not socs.value.startswith('CAA'): # not consented
+ return
+ self._set_cookie('.youtube.com', 'SOCS', 'CAI', secure=True) # accept all (required for mixes)
def _initialize_pref(self):
cookies = self._get_cookies('https://www.youtube.com/')
e.g. 'streamed 6 days ago', '5 seconds ago (edited)', 'updated today', '8 yr ago'
"""
- # XXX: this could be moved to a general function in utils.py
+ # XXX: this could be moved to a general function in utils/_utils.py
# The relative time text strings are roughly the same as what
# Javascript's Intl.RelativeTimeFormat function generates.
# See: https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Global_Objects/Intl/RelativeTimeFormat
def _parse_time_text(self, text):
if not text:
return
- dt = self.extract_relative_time(text)
+ dt_ = self.extract_relative_time(text)
timestamp = None
- if isinstance(dt, datetime.datetime):
- timestamp = calendar.timegm(dt.timetuple())
+ if isinstance(dt_, dt.datetime):
+ timestamp = calendar.timegm(dt_.timetuple())
if timestamp is None:
timestamp = (
def _extract_response(self, item_id, query, note='Downloading API JSON', headers=None,
ytcfg=None, check_get_keys=None, ep='browse', fatal=True, api_hostname=None,
default_client='web'):
- for retry in self.RetryManager():
+ raise_for_incomplete = bool(self._configuration_arg('raise_incomplete_data', ie_key=YoutubeIE))
+ # Incomplete Data should be a warning by default when retries are exhausted, while other errors should be fatal.
+ icd_retries = iter(self.RetryManager(fatal=raise_for_incomplete))
+ icd_rm = next(icd_retries)
+ main_retries = iter(self.RetryManager())
+ main_rm = next(main_retries)
+ # Manual retry loop for multiple RetryManagers
+ # The proper RetryManager MUST be advanced after an error
+ # and its result MUST be checked if the manager is non fatal
+ while True:
try:
response = self._call_api(
ep=ep, fatal=True, headers=headers,
except ExtractorError as e:
if not isinstance(e.cause, network_exceptions):
return self._error_or_warning(e, fatal=fatal)
- elif not isinstance(e.cause, urllib.error.HTTPError):
- retry.error = e
+ elif not isinstance(e.cause, HTTPError):
+ main_rm.error = e
+ next(main_retries)
continue
- first_bytes = e.cause.read(512)
+ first_bytes = e.cause.response.read(512)
if not is_html(first_bytes):
yt_error = try_get(
self._parse_json(
- self._webpage_read_content(e.cause, None, item_id, prefix=first_bytes) or '{}', item_id, fatal=False),
+ self._webpage_read_content(e.cause.response, None, item_id, prefix=first_bytes) or '{}', item_id, fatal=False),
lambda x: x['error']['message'], str)
if yt_error:
self._report_alerts([('ERROR', yt_error)], fatal=False)
# Downloading page may result in intermittent 5xx HTTP error
- # Sometimes a 404 is also recieved. See: https://github.com/ytdl-org/youtube-dl/issues/28289
+ # Sometimes a 404 is also received. See: https://github.com/ytdl-org/youtube-dl/issues/28289
# We also want to catch all other network exceptions since errors in later pages can be troublesome
# See https://github.com/yt-dlp/yt-dlp/issues/507#issuecomment-880188210
- if e.cause.code not in (403, 429):
- retry.error = e
+ if e.cause.status not in (403, 429):
+ main_rm.error = e
+ next(main_retries)
continue
return self._error_or_warning(e, fatal=fatal)
try:
self._extract_and_report_alerts(response, only_once=True)
except ExtractorError as e:
- # YouTube servers may return errors we want to retry on in a 200 OK response
+ # YouTube's servers may return errors we want to retry on in a 200 OK response
# See: https://github.com/yt-dlp/yt-dlp/issues/839
if 'unknown error' in e.msg.lower():
- retry.error = e
+ main_rm.error = e
+ next(main_retries)
continue
return self._error_or_warning(e, fatal=fatal)
# Youtube sometimes sends incomplete data
# See: https://github.com/ytdl-org/youtube-dl/issues/28194
if not traverse_obj(response, *variadic(check_get_keys)):
- retry.error = ExtractorError('Incomplete data received', expected=True)
+ icd_rm.error = ExtractorError('Incomplete data received', expected=True)
+ should_retry = next(icd_retries, None)
+ if not should_retry:
+ return None
continue
return response
'title': 'Voyeur Girl',
'description': 'md5:7ae382a65843d6df2685993e90a8628f',
'upload_date': '20190312',
- 'artist': 'Stephen',
+ 'artists': ['Stephen'],
+ 'creators': ['Stephen'],
'track': 'Voyeur Girl',
'album': 'it\'s too much love to know my dear',
'release_date': '20190313',
- 'release_year': 2019,
'alt_title': 'Voyeur Girl',
'view_count': int,
'playable_in_embed': True,
'channel': 'Stephen', # TODO: should be "Stephen - Topic"
'uploader': 'Stephen',
'availability': 'public',
- 'creator': 'Stephen',
'duration': 169,
'thumbnail': 'https://i.ytimg.com/vi_webp/MgNrAu2pzNs/maxresdefault.webp',
'age_limit': 0,
# Obtain from MPD's maximum seq value
old_mpd_url = mpd_url
last_error = ctx.pop('last_error', None)
- expire_fast = immediate or last_error and isinstance(last_error, urllib.error.HTTPError) and last_error.code == 403
+ expire_fast = immediate or last_error and isinstance(last_error, HTTPError) and last_error.status == 403
mpd_url, stream_number, is_live = (mpd_feed(format_id, 5 if expire_fast else 18000)
or (mpd_url, stream_number, False))
if not refresh_sequence:
chapter_time, chapter_title, duration)
for contents in content_list)), [])
- def _extract_heatmap_from_player_overlay(self, data):
- content_list = traverse_obj(data, (
- 'playerOverlays', 'playerOverlayRenderer', 'decoratedPlayerBarRenderer', 'decoratedPlayerBarRenderer', 'playerBar',
- 'multiMarkersPlayerBarRenderer', 'markersMap', ..., 'value', 'heatmap', 'heatmapRenderer', 'heatMarkers', {list}))
- return next(filter(None, (
- traverse_obj(contents, (..., 'heatMarkerRenderer', {
- 'start_time': ('timeRangeStartMillis', {functools.partial(float_or_none, scale=1000)}),
- 'end_time': {lambda x: (x['timeRangeStartMillis'] + x['markerDurationMillis']) / 1000},
- 'value': ('heatMarkerIntensityScoreNormalized', {float_or_none}),
- })) for contents in content_list)), None)
+ def _extract_heatmap(self, data):
+ return traverse_obj(data, (
+ 'frameworkUpdates', 'entityBatchUpdate', 'mutations',
+ lambda _, v: v['payload']['macroMarkersListEntity']['markersList']['markerType'] == 'MARKER_TYPE_HEATMAP',
+ 'payload', 'macroMarkersListEntity', 'markersList', 'markers', ..., {
+ 'start_time': ('startMillis', {functools.partial(float_or_none, scale=1000)}),
+ 'end_time': {lambda x: (int(x['startMillis']) + int(x['durationMillis'])) / 1000},
+ 'value': ('intensityScoreNormalized', {float_or_none}),
+ })) or None
def _extract_comment(self, comment_renderer, parent=None):
comment_id = comment_renderer.get('commentId')
def _is_unplayable(player_response):
return traverse_obj(player_response, ('playabilityStatus', 'status')) == 'UNPLAYABLE'
- _PLAYER_PARAMS = 'CgIQBg=='
-
def _extract_player_response(self, client, video_id, master_ytcfg, player_ytcfg, player_url, initial_pr, smuggled_data):
session_index = self._extract_session_index(player_ytcfg, master_ytcfg)
yt_query = {
'videoId': video_id,
}
- if _split_innertube_client(client)[0] == 'android':
- yt_query['params'] = self._PLAYER_PARAMS
+ if _split_innertube_client(client)[0] in ('android', 'android_embedscreen'):
+ yt_query['params'] = 'CgIIAQ=='
+
+ pp_arg = self._configuration_arg('player_params', [None], casesense=True)[0]
+ if pp_arg:
+ yt_query['params'] = pp_arg
yt_query.update(self._generate_player_context(sts))
return self._extract_response(
return orderedSet(requested_clients)
+ def _invalid_player_response(self, pr, video_id):
+ # YouTube may return a different video player response than expected.
+ # See: https://github.com/TeamNewPipe/NewPipe/issues/8713
+ if (pr_id := traverse_obj(pr, ('videoDetails', 'videoId'))) != video_id:
+ return pr_id
+
def _extract_player_responses(self, clients, video_id, webpage, master_ytcfg, smuggled_data):
initial_pr = None
if webpage:
initial_pr = self._search_json(
self._YT_INITIAL_PLAYER_RESPONSE_RE, webpage, 'initial player response', video_id, fatal=False)
+ prs = []
+ if initial_pr and not self._invalid_player_response(initial_pr, video_id):
+ # Android player_response does not have microFormats which are needed for
+ # extraction of some data. So we return the initial_pr with formats
+ # stripped out even if not requested by the user
+ # See: https://github.com/yt-dlp/yt-dlp/issues/501
+ prs.append({**initial_pr, 'streamingData': None})
+
all_clients = set(clients)
clients = clients[::-1]
- prs = []
def append_client(*client_names):
""" Append the first client name that exists but not already used """
all_clients.add(actual_client)
return
- # Android player_response does not have microFormats which are needed for
- # extraction of some data. So we return the initial_pr with formats
- # stripped out even if not requested by the user
- # See: https://github.com/yt-dlp/yt-dlp/issues/501
- if initial_pr:
- pr = dict(initial_pr)
- pr['streamingData'] = None
- prs.append(pr)
-
- last_error = None
tried_iframe_fallback = False
player_url = None
+ skipped_clients = {}
while clients:
client, base_client, variant = _split_innertube_client(clients.pop())
player_ytcfg = master_ytcfg if client == 'web' else {}
pr = initial_pr if client == 'web' and initial_pr else self._extract_player_response(
client, video_id, player_ytcfg or master_ytcfg, player_ytcfg, player_url if require_js_player else None, initial_pr, smuggled_data)
except ExtractorError as e:
- if last_error:
- self.report_warning(last_error)
- last_error = e
+ self.report_warning(e)
continue
- if pr:
- # YouTube may return a different video player response than expected.
- # See: https://github.com/TeamNewPipe/NewPipe/issues/8713
- pr_video_id = traverse_obj(pr, ('videoDetails', 'videoId'))
- if pr_video_id and pr_video_id != video_id:
- self.report_warning(
- f'Skipping player response from {client} client (got player response for video "{pr_video_id}" instead of "{video_id}")' + bug_reports_message())
- else:
- # Save client name for introspection later
- name = short_client_name(client)
- sd = traverse_obj(pr, ('streamingData', {dict})) or {}
- sd[STREAMING_DATA_CLIENT_NAME] = name
- for f in traverse_obj(sd, (('formats', 'adaptiveFormats'), ..., {dict})):
- f[STREAMING_DATA_CLIENT_NAME] = name
- prs.append(pr)
+ if pr_id := self._invalid_player_response(pr, video_id):
+ skipped_clients[client] = pr_id
+ elif pr:
+ # Save client name for introspection later
+ name = short_client_name(client)
+ sd = traverse_obj(pr, ('streamingData', {dict})) or {}
+ sd[STREAMING_DATA_CLIENT_NAME] = name
+ for f in traverse_obj(sd, (('formats', 'adaptiveFormats'), ..., {dict})):
+ f[STREAMING_DATA_CLIENT_NAME] = name
+ prs.append(pr)
# creator clients can bypass AGE_VERIFICATION_REQUIRED if logged in
if variant == 'embedded' and self._is_unplayable(pr) and self.is_authenticated:
elif not variant:
append_client(f'tv_embedded.{base_client}', f'{base_client}_embedded')
- if last_error:
- if not len(prs):
- raise last_error
- self.report_warning(last_error)
+ if skipped_clients:
+ self.report_warning(
+ f'Skipping player responses from {"/".join(skipped_clients)} clients '
+ f'(got player responses for video "{"/".join(set(skipped_clients.values()))}" instead of "{video_id}")')
+ if not prs:
+ raise ExtractorError(
+ 'All player responses are invalid. Your IP is likely being blocked by Youtube', expected=True)
+ elif not prs:
+ raise ExtractorError('Failed to extract any player response')
return prs, player_url
def _needs_live_processing(self, live_status, duration):
10 if audio_track.get('audioIsDefault') and 10
else -10 if 'descriptive' in (audio_track.get('displayName') or '').lower() and -10
else -1)
+ format_duration = traverse_obj(fmt, ('approxDurationMs', {lambda x: float_or_none(x, 1000)}))
# Some formats may have much smaller duration than others (possibly damaged during encoding)
# E.g. 2-nOtRESiUc Ref: https://github.com/yt-dlp/yt-dlp/issues/2823
# Make sure to avoid false positives with small duration differences.
# E.g. __2ABJjxzNo, ySuUZEjARPY
- is_damaged = try_get(fmt, lambda x: float(x['approxDurationMs']) / duration < 500)
+ is_damaged = try_call(lambda: format_duration < duration // 2)
if is_damaged:
self.report_warning(
f'{video_id}: Some formats are possibly damaged. They will be deprioritized', only_once=True)
'quality': q(quality) - bool(fmt.get('isDrc')) / 2,
'has_drm': bool(fmt.get('drmFamilies')),
'tbr': tbr,
+ 'filesize_approx': filesize_from_tbr(tbr, format_duration),
'url': fmt_url,
'width': int_or_none(fmt.get('width')),
'language': join_nonempty(audio_track.get('id', '').split('.')[0],
elif itag:
f['format_id'] = itag
+ if f.get('source_preference') is None:
+ f['source_preference'] = -1
+
if itag in ('616', '235'):
f['format_note'] = join_nonempty(f.get('format_note'), 'Premium', delim=' ')
- f['source_preference'] = (f.get('source_preference') or -1) + 100
+ f['source_preference'] += 100
f['quality'] = q(itag_qualities.get(try_get(f, lambda f: f['format_id'].split('-')[0]), -1))
if f['quality'] == -1 and f.get('height'):
f['format_note'] = join_nonempty(f.get('format_note'), client_name, delim=', ')
if f.get('fps') and f['fps'] <= 1:
del f['fps']
+
+ if proto == 'hls' and f.get('has_drm'):
+ f['has_drm'] = 'maybe'
+ f['source_preference'] -= 5
return True
subtitles = {}
webpage = None
if 'webpage' not in self._configuration_arg('player_skip'):
query = {'bpctr': '9999999999', 'has_verified': '1'}
+ pp = self._configuration_arg('player_params', [None], casesense=True)[0]
+ if pp:
+ query['pp'] = pp
webpage = self._download_webpage(
webpage_url, video_id, fatal=False, query=query)
else None)
streaming_data = traverse_obj(player_responses, (..., 'streamingData'))
*formats, subtitles = self._extract_formats_and_subtitles(streaming_data, video_id, player_url, live_status, duration)
+ if all(f.get('has_drm') for f in formats):
+ # If there are no formats that definitely don't have DRM, all have DRM
+ for f in formats:
+ f['has_drm'] = True
return live_broadcast_details, live_status, streaming_data, formats, subtitles
release_year = release_date[:4]
info.update({
'album': mobj.group('album'.strip()),
- 'artist': mobj.group('clean_artist') or ', '.join(a.strip() for a in mobj.group('artist').split('·')),
+ 'artists': ([a] if (a := mobj.group('clean_artist'))
+ else [a.strip() for a in mobj.group('artist').split('·')]),
'track': mobj.group('track').strip(),
'release_date': release_date,
'release_year': int_or_none(release_year),
or self._extract_chapters_from_description(video_description, duration)
or None)
- info['heatmap'] = self._extract_heatmap_from_player_overlay(initial_data)
+ info['heatmap'] = self._extract_heatmap(initial_data)
contents = traverse_obj(
initial_data, ('contents', 'twoColumnWatchNextResults', 'results', 'results', 'contents'),
if mobj:
info[mobj.group('type') + '_count'] = str_to_int(mobj.group('count'))
break
- sbr_tooltip = try_get(
- vpir, lambda x: x['sentimentBar']['sentimentBarRenderer']['tooltip'])
- if sbr_tooltip:
- like_count, dislike_count = sbr_tooltip.split(' / ')
- info.update({
- 'like_count': str_to_int(like_count),
- 'dislike_count': str_to_int(dislike_count),
- })
+
+ info['like_count'] = traverse_obj(vpir, (
+ 'videoActions', 'menuRenderer', 'topLevelButtons', ...,
+ 'segmentedLikeDislikeButtonViewModel', 'likeButtonViewModel', 'likeButtonViewModel',
+ 'toggleButtonViewModel', 'toggleButtonViewModel', 'defaultButtonViewModel',
+ 'buttonViewModel', 'accessibilityText', {parse_count}), get_all=False)
+
vcr = traverse_obj(vpir, ('viewCount', 'videoViewCountRenderer'))
if vcr:
vc = self._get_count(vcr, 'viewCount')
if mrr_title == 'Album':
info['album'] = mrr_contents_text
elif mrr_title == 'Artist':
- info['artist'] = mrr_contents_text
+ info['artists'] = [mrr_contents_text] if mrr_contents_text else None
elif mrr_title == 'Song':
info['track'] = mrr_contents_text
owner_badges = self._extract_badges(traverse_obj(vsir, ('owner', 'videoOwnerRenderer', 'badges')))
self._parse_time_text(self._get_text(vpir, 'dateText'))) or upload_date
info['upload_date'] = upload_date
- for s_k, d_k in [('artist', 'creator'), ('track', 'alt_title')]:
+ if upload_date and live_status not in ('is_live', 'post_live', 'is_upcoming'):
+ # Newly uploaded videos' HLS formats are potentially problematic and need to be checked
+ upload_datetime = datetime_from_str(upload_date).replace(tzinfo=dt.timezone.utc)
+ if upload_datetime >= datetime_from_str('today-2days'):
+ for fmt in info['formats']:
+ if fmt.get('protocol') == 'm3u8_native':
+ fmt['__needs_testing'] = True
+
+ for s_k, d_k in [('artists', 'creators'), ('track', 'alt_title')]:
v = info.get(s_k)
if v:
info[d_k] = v
or try_get(tab_content, lambda x: x['richGridRenderer'], dict) or {})
yield from extract_entries(parent_renderer)
continuation = continuation_list[0]
-
+ seen_continuations = set()
for page_num in itertools.count(1):
if not continuation:
break
+ continuation_token = continuation.get('continuation')
+ if continuation_token is not None and continuation_token in seen_continuations:
+ self.write_debug('Detected YouTube feed looping - assuming end of feed.')
+ break
+ seen_continuations.add(continuation_token)
headers = self.generate_api_headers(
ytcfg=ytcfg, account_syncid=account_syncid, visitor_data=visitor_data)
response = self._extract_response(
'availability': self._extract_availability(data),
'channel_follower_count': self._get_count(data, ('header', ..., 'subscriberCountText')),
'description': try_get(metadata_renderer, lambda x: x.get('description', '')),
- 'tags': try_get(metadata_renderer or {}, lambda x: x.get('keywords', '').split()),
+ 'tags': (traverse_obj(data, ('microformat', 'microformatDataRenderer', 'tags', ..., {str}))
+ or traverse_obj(metadata_renderer, ('keywords', {lambda x: x and shlex.split(x)}, ...))),
'thumbnails': (primary_thumbnails or playlist_thumbnails) + avatar_thumbnails + channel_banners,
})
data = self.extract_yt_initial_data(item_id, webpage or '', fatal=fatal) or {}
except ExtractorError as e:
if isinstance(e.cause, network_exceptions):
- if not isinstance(e.cause, urllib.error.HTTPError) or e.cause.code not in (403, 429):
+ if not isinstance(e.cause, HTTPError) or e.cause.status not in (403, 429):
retry.error = e
continue
self._error_or_warning(e, fatal=fatal)
# See: https://github.com/yt-dlp/yt-dlp/issues/116
if not traverse_obj(data, 'contents', 'currentVideoEndpoint', 'onResponseReceivedActions'):
retry.error = ExtractorError('Incomplete yt initial data received')
+ data = None
continue
return webpage, data
'playlist_mincount': 94,
'info_dict': {
'id': 'UCqj7Cz7revf5maW9g5pgNcg',
- 'title': 'Igor Kleiner - Playlists',
- 'description': 'md5:be97ee0f14ee314f1f002cf187166ee2',
- 'uploader': 'Igor Kleiner',
+ 'title': 'Igor Kleiner Ph.D. - Playlists',
+ 'description': 'md5:15d7dd9e333cb987907fcb0d604b233a',
+ 'uploader': 'Igor Kleiner Ph.D.',
'uploader_id': '@IgorDataScience',
'uploader_url': 'https://www.youtube.com/@IgorDataScience',
- 'channel': 'Igor Kleiner',
+ 'channel': 'Igor Kleiner Ph.D.',
'channel_id': 'UCqj7Cz7revf5maW9g5pgNcg',
- 'tags': ['"критическое', 'мышление"', '"наука', 'просто"', 'математика', '"анализ', 'данных"'],
+ 'tags': ['критическое мышление', 'наука просто', 'математика', 'анализ данных'],
'channel_url': 'https://www.youtube.com/channel/UCqj7Cz7revf5maW9g5pgNcg',
'channel_follower_count': int
},
'playlist_mincount': 94,
'info_dict': {
'id': 'UCqj7Cz7revf5maW9g5pgNcg',
- 'title': 'Igor Kleiner - Playlists',
- 'description': 'md5:be97ee0f14ee314f1f002cf187166ee2',
- 'uploader': 'Igor Kleiner',
+ 'title': 'Igor Kleiner Ph.D. - Playlists',
+ 'description': 'md5:15d7dd9e333cb987907fcb0d604b233a',
+ 'uploader': 'Igor Kleiner Ph.D.',
'uploader_id': '@IgorDataScience',
'uploader_url': 'https://www.youtube.com/@IgorDataScience',
- 'tags': ['"критическое', 'мышление"', '"наука', 'просто"', 'математика', '"анализ', 'данных"'],
+ 'tags': ['критическое мышление', 'наука просто', 'математика', 'анализ данных'],
'channel_id': 'UCqj7Cz7revf5maW9g5pgNcg',
- 'channel': 'Igor Kleiner',
+ 'channel': 'Igor Kleiner Ph.D.',
'channel_url': 'https://www.youtube.com/channel/UCqj7Cz7revf5maW9g5pgNcg',
'channel_follower_count': int
},
'info_dict': {
'id': 'UCYO_jab_esuFRV4b17AJtAw',
'title': '3Blue1Brown - Playlists',
- 'description': 'md5:e1384e8a133307dd10edee76e875d62f',
+ 'description': 'md5:4d1da95432004b7ba840ebc895b6b4c9',
'channel_url': 'https://www.youtube.com/channel/UCYO_jab_esuFRV4b17AJtAw',
'channel': '3Blue1Brown',
'channel_id': 'UCYO_jab_esuFRV4b17AJtAw',
'uploader_id': '@ThirstForScience',
'channel_id': 'UCAEtajcuhQ6an9WEzY9LEMQ',
'channel_url': 'https://www.youtube.com/channel/UCAEtajcuhQ6an9WEzY9LEMQ',
- 'tags': 'count:13',
+ 'tags': 'count:12',
'channel': 'ThirstForScience',
'channel_follower_count': int
}
'tags': [],
'channel': 'Sergey M.',
'description': '',
- 'modified_date': '20160902',
+ 'modified_date': '20230921',
'channel_id': 'UCmlqkdCBesrv2Lak1mF_MxA',
'channel_url': 'https://www.youtube.com/channel/UCmlqkdCBesrv2Lak1mF_MxA',
- 'availability': 'public',
+ 'availability': 'unlisted',
'uploader_url': 'https://www.youtube.com/@sergeym.6173',
'uploader_id': '@sergeym.6173',
'uploader': 'Sergey M.',
'info_dict': {
'id': 'UCYO_jab_esuFRV4b17AJtAw',
'title': '3Blue1Brown - Search - linear algebra',
- 'description': 'md5:e1384e8a133307dd10edee76e875d62f',
+ 'description': 'md5:4d1da95432004b7ba840ebc895b6b4c9',
'channel_url': 'https://www.youtube.com/channel/UCYO_jab_esuFRV4b17AJtAw',
'tags': ['Mathematics'],
'channel': '3Blue1Brown',
'url': 'https://www.youtube.com/hashtag/cctv9',
'info_dict': {
'id': 'cctv9',
- 'title': '#cctv9',
+ 'title': 'cctv9 - All',
'tags': [],
},
'playlist_mincount': 300, # not consistent but should be over 300
'channel_follower_count': int,
'channel_id': 'UCK9V2B22uJYu3N7eR_BT9QA',
'channel_url': 'https://www.youtube.com/channel/UCK9V2B22uJYu3N7eR_BT9QA',
- 'description': 'md5:e56b74b5bb7e9c701522162e9abfb822',
+ 'description': 'md5:49809d8bf9da539bc48ed5d1f83c33f2',
'channel': 'Polka Ch. 尾丸ポルカ',
'tags': 'count:35',
'uploader_url': 'https://www.youtube.com/@OmaruPolka',
'uploader': 'Polka Ch. 尾丸ポルカ',
'uploader_id': '@OmaruPolka',
+ 'channel_is_verified': True,
},
'playlist_count': 3,
}, {
'info_dict': {
'id': 'UC0intLFzLaudFG-xAvUEO-A',
'title': 'Not Just Bikes - Shorts',
- 'tags': 'count:12',
+ 'tags': 'count:10',
'channel_url': 'https://www.youtube.com/channel/UC0intLFzLaudFG-xAvUEO-A',
- 'description': 'md5:26bc55af26855a608a5cf89dfa595c8d',
+ 'description': 'md5:5e82545b3a041345927a92d0585df247',
'channel_follower_count': int,
'channel_id': 'UC0intLFzLaudFG-xAvUEO-A',
'channel': 'Not Just Bikes',
'uploader_url': 'https://www.youtube.com/@NotJustBikes',
'uploader': 'Not Just Bikes',
'uploader_id': '@NotJustBikes',
+ 'channel_is_verified': True,
},
'playlist_mincount': 10,
}, {
}, {
'url': 'https://www.youtube.com/@3blue1brown/about',
'info_dict': {
- 'id': 'UCYO_jab_esuFRV4b17AJtAw',
+ 'id': '@3blue1brown',
'tags': ['Mathematics'],
- 'title': '3Blue1Brown - About',
+ 'title': '3Blue1Brown',
'channel_follower_count': int,
'channel_id': 'UCYO_jab_esuFRV4b17AJtAw',
'channel': '3Blue1Brown',
- 'view_count': int,
'channel_url': 'https://www.youtube.com/channel/UCYO_jab_esuFRV4b17AJtAw',
- 'description': 'md5:e1384e8a133307dd10edee76e875d62f',
+ 'description': 'md5:4d1da95432004b7ba840ebc895b6b4c9',
'uploader_url': 'https://www.youtube.com/@3blue1brown',
'uploader_id': '@3blue1brown',
'uploader': '3Blue1Brown',
'channel': '99 Percent Invisible',
'uploader_id': '@99percentinvisiblepodcast',
},
- 'playlist_count': 1,
+ 'playlist_count': 0,
}, {
# Releases tab, with rich entry playlistRenderers (same as Podcasts tab)
'url': 'https://www.youtube.com/@AHimitsu/releases',
'uploader_id': '@AHimitsu',
'uploader': 'A Himitsu',
'channel_id': 'UCgFwu-j5-xNJml2FtTrrB3A',
- 'tags': 'count:16',
+ 'tags': 'count:12',
'description': 'I make music',
'channel_url': 'https://www.youtube.com/channel/UCgFwu-j5-xNJml2FtTrrB3A',
'channel_follower_count': int,
'uploader': 'Bangy Shorts',
'tags': [],
'availability': 'public',
- 'modified_date': '20230626',
+ 'modified_date': r're:\d{8}',
'title': 'Uploads from Bangy Shorts',
},
'playlist_mincount': 100,
'expected_warnings': [r'[Uu]navailable videos (are|will be) hidden'],
+ }, {
+ 'note': 'Tags containing spaces',
+ 'url': 'https://www.youtube.com/channel/UC7_YxT-KID8kRbqZo7MyscQ',
+ 'playlist_count': 3,
+ 'info_dict': {
+ 'id': 'UC7_YxT-KID8kRbqZo7MyscQ',
+ 'channel': 'Markiplier',
+ 'channel_id': 'UC7_YxT-KID8kRbqZo7MyscQ',
+ 'title': 'Markiplier',
+ 'channel_follower_count': int,
+ 'description': 'md5:0c010910558658824402809750dc5d97',
+ 'uploader_id': '@markiplier',
+ 'uploader_url': 'https://www.youtube.com/@markiplier',
+ 'uploader': 'Markiplier',
+ 'channel_url': 'https://www.youtube.com/channel/UC7_YxT-KID8kRbqZo7MyscQ',
+ 'channel_is_verified': True,
+ 'tags': ['markiplier', 'comedy', 'gaming', 'funny videos', 'funny moments',
+ 'sketch comedy', 'laughing', 'lets play', 'challenge videos', 'hilarious',
+ 'challenges', 'sketches', 'scary games', 'funny games', 'rage games',
+ 'mark fischbach'],
+ },
}]
@classmethod
def _has_tab(self, tabs, tab_id):
return any(self._extract_tab_id_and_name(tab)[0] == tab_id for tab in tabs)
+ def _empty_playlist(self, item_id, data):
+ return self.playlist_result([], item_id, **self._extract_metadata_from_tabs(item_id, data))
+
@YoutubeTabBaseInfoExtractor.passthrough_smuggled_data
def _real_extract(self, url, smuggled_data):
item_id = self._match_id(url)
selected_tab_id, selected_tab_name = self._extract_tab_id_and_name(selected_tab, url) # NB: Name may be translated
self.write_debug(f'Selected tab: {selected_tab_id!r} ({selected_tab_name}), Requested tab: {original_tab_id!r}')
+ # /about is no longer a tab
+ if original_tab_id == 'about':
+ return self._empty_playlist(item_id, data)
+
if not original_tab_id and selected_tab_name:
self.to_screen('Downloading all uploads of the channel. '
'To download only the videos in a specific tab, pass the tab\'s URL')
if not extra_tabs and selected_tab_id != 'videos':
# Channel does not have streams, shorts or videos tabs
if item_id[:2] != 'UC':
- raise ExtractorError('This channel has no uploads', expected=True)
+ return self._empty_playlist(item_id, data)
# Topic channels don't have /videos. Use the equivalent playlist instead
pl_id = f'UU{item_id[2:]}'
try:
data, ytcfg = self._extract_data(pl_url, pl_id, ytcfg=ytcfg, fatal=True, webpage_fatal=True)
except ExtractorError:
- raise ExtractorError('This channel has no uploads', expected=True)
+ return self._empty_playlist(item_id, data)
else:
item_id, url = pl_id, pl_url
self.to_screen(
'uploader_url': 'https://www.youtube.com/@milan5503',
'availability': 'public',
},
- 'expected_warnings': [r'[Uu]navailable videos? (is|are|will be) hidden'],
+ 'expected_warnings': [r'[Uu]navailable videos? (is|are|will be) hidden', 'Retrying', 'Giving up'],
}, {
'url': 'http://www.youtube.com/embed/_xDOZElKyNU?list=PLsyOSbh5bs16vubvKePAQ1x3PhKavfBIl',
'playlist_mincount': 455,
IE_DESC = 'YouTube search'
IE_NAME = 'youtube:search'
_SEARCH_KEY = 'ytsearch'
- _SEARCH_PARAMS = 'EgIQAQ%3D%3D' # Videos only
+ _SEARCH_PARAMS = 'EgIQAfABAQ==' # Videos only
_TESTS = [{
'url': 'ytsearch5:youtube-dl test video',
'playlist_count': 5,
'id': 'youtube-dl test video',
'title': 'youtube-dl test video',
}
+ }, {
+ 'note': 'Suicide/self-harm search warning',
+ 'url': 'ytsearch1:i hate myself and i wanna die',
+ 'playlist_count': 1,
+ 'info_dict': {
+ 'id': 'i hate myself and i wanna die',
+ 'title': 'i hate myself and i wanna die',
+ }
}]
IE_NAME = YoutubeSearchIE.IE_NAME + ':date'
_SEARCH_KEY = 'ytsearchdate'
IE_DESC = 'YouTube search, newest videos first'
- _SEARCH_PARAMS = 'CAISAhAB' # Videos only, sorted by date
+ _SEARCH_PARAMS = 'CAISAhAB8AEB' # Videos only, sorted by date
_TESTS = [{
'url': 'ytsearchdate5:youtube-dl test video',
'playlist_count': 5,