import threading
import time
import traceback
-import urllib.error
import urllib.parse
from .common import InfoExtractor, SearchInfoExtractor
from .openload import PhantomJSwrapper
from ..compat import functools
from ..jsinterp import JSInterpreter
+from ..networking.exceptions import HTTPError, network_exceptions
from ..utils import (
NO_DEFAULT,
ExtractorError,
join_nonempty,
js_to_json,
mimetype2ext,
- network_exceptions,
orderedSet,
parse_codecs,
parse_count,
cookies = self._get_cookies('https://www.youtube.com/')
if cookies.get('__Secure-3PSID'):
return
- consent_id = None
- consent = cookies.get('CONSENT')
- if consent:
- if 'YES' in consent.value:
- return
- consent_id = self._search_regex(
- r'PENDING\+(\d+)', consent.value, 'consent', default=None)
- if not consent_id:
- consent_id = random.randint(100, 999)
- self._set_cookie('.youtube.com', 'CONSENT', 'YES+cb.20210328-17-p0.en+FX+%s' % consent_id)
+ socs = cookies.get('SOCS')
+ if socs and not socs.value.startswith('CAA'): # not consented
+ return
+ self._set_cookie('.youtube.com', 'SOCS', 'CAI', secure=True) # accept all (required for mixes)
def _initialize_pref(self):
cookies = self._get_cookies('https://www.youtube.com/')
e.g. 'streamed 6 days ago', '5 seconds ago (edited)', 'updated today', '8 yr ago'
"""
- # XXX: this could be moved to a general function in utils.py
+ # XXX: this could be moved to a general function in utils/_utils.py
# The relative time text strings are roughly the same as what
# Javascript's Intl.RelativeTimeFormat function generates.
# See: https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Global_Objects/Intl/RelativeTimeFormat
def _extract_response(self, item_id, query, note='Downloading API JSON', headers=None,
ytcfg=None, check_get_keys=None, ep='browse', fatal=True, api_hostname=None,
default_client='web'):
- for retry in self.RetryManager():
+ raise_for_incomplete = bool(self._configuration_arg('raise_incomplete_data', ie_key=YoutubeIE))
+ # Incomplete Data should be a warning by default when retries are exhausted, while other errors should be fatal.
+ icd_retries = iter(self.RetryManager(fatal=raise_for_incomplete))
+ icd_rm = next(icd_retries)
+ main_retries = iter(self.RetryManager())
+ main_rm = next(main_retries)
+ # Manual retry loop for multiple RetryManagers
+ # The proper RetryManager MUST be advanced after an error
+ # and its result MUST be checked if the manager is non fatal
+ while True:
try:
response = self._call_api(
ep=ep, fatal=True, headers=headers,
except ExtractorError as e:
if not isinstance(e.cause, network_exceptions):
return self._error_or_warning(e, fatal=fatal)
- elif not isinstance(e.cause, urllib.error.HTTPError):
- retry.error = e
+ elif not isinstance(e.cause, HTTPError):
+ main_rm.error = e
+ next(main_retries)
continue
- first_bytes = e.cause.read(512)
+ first_bytes = e.cause.response.read(512)
if not is_html(first_bytes):
yt_error = try_get(
self._parse_json(
- self._webpage_read_content(e.cause, None, item_id, prefix=first_bytes) or '{}', item_id, fatal=False),
+ self._webpage_read_content(e.cause.response, None, item_id, prefix=first_bytes) or '{}', item_id, fatal=False),
lambda x: x['error']['message'], str)
if yt_error:
self._report_alerts([('ERROR', yt_error)], fatal=False)
# Downloading page may result in intermittent 5xx HTTP error
- # Sometimes a 404 is also recieved. See: https://github.com/ytdl-org/youtube-dl/issues/28289
+ # Sometimes a 404 is also received. See: https://github.com/ytdl-org/youtube-dl/issues/28289
# We also want to catch all other network exceptions since errors in later pages can be troublesome
# See https://github.com/yt-dlp/yt-dlp/issues/507#issuecomment-880188210
- if e.cause.code not in (403, 429):
- retry.error = e
+ if e.cause.status not in (403, 429):
+ main_rm.error = e
+ next(main_retries)
continue
return self._error_or_warning(e, fatal=fatal)
try:
self._extract_and_report_alerts(response, only_once=True)
except ExtractorError as e:
- # YouTube servers may return errors we want to retry on in a 200 OK response
+ # YouTube's servers may return errors we want to retry on in a 200 OK response
# See: https://github.com/yt-dlp/yt-dlp/issues/839
if 'unknown error' in e.msg.lower():
- retry.error = e
+ main_rm.error = e
+ next(main_retries)
continue
return self._error_or_warning(e, fatal=fatal)
# Youtube sometimes sends incomplete data
# See: https://github.com/ytdl-org/youtube-dl/issues/28194
if not traverse_obj(response, *variadic(check_get_keys)):
- retry.error = ExtractorError('Incomplete data received', expected=True)
+ icd_rm.error = ExtractorError('Incomplete data received', expected=True)
+ should_retry = next(icd_retries, None)
+ if not should_retry:
+ return None
continue
return response
# Obtain from MPD's maximum seq value
old_mpd_url = mpd_url
last_error = ctx.pop('last_error', None)
- expire_fast = immediate or last_error and isinstance(last_error, urllib.error.HTTPError) and last_error.code == 403
+ expire_fast = immediate or last_error and isinstance(last_error, HTTPError) and last_error.status == 403
mpd_url, stream_number, is_live = (mpd_feed(format_id, 5 if expire_fast else 18000)
or (mpd_url, stream_number, False))
if not refresh_sequence:
chapter_time, chapter_title, duration)
for contents in content_list)), [])
- def _extract_heatmap_from_player_overlay(self, data):
- content_list = traverse_obj(data, (
- 'playerOverlays', 'playerOverlayRenderer', 'decoratedPlayerBarRenderer', 'decoratedPlayerBarRenderer', 'playerBar',
- 'multiMarkersPlayerBarRenderer', 'markersMap', ..., 'value', 'heatmap', 'heatmapRenderer', 'heatMarkers', {list}))
- return next(filter(None, (
- traverse_obj(contents, (..., 'heatMarkerRenderer', {
- 'start_time': ('timeRangeStartMillis', {functools.partial(float_or_none, scale=1000)}),
- 'end_time': {lambda x: (x['timeRangeStartMillis'] + x['markerDurationMillis']) / 1000},
- 'value': ('heatMarkerIntensityScoreNormalized', {float_or_none}),
- })) for contents in content_list)), None)
+ def _extract_heatmap(self, data):
+ return traverse_obj(data, (
+ 'frameworkUpdates', 'entityBatchUpdate', 'mutations',
+ lambda _, v: v['payload']['macroMarkersListEntity']['markersList']['markerType'] == 'MARKER_TYPE_HEATMAP',
+ 'payload', 'macroMarkersListEntity', 'markersList', 'markers', ..., {
+ 'start_time': ('startMillis', {functools.partial(float_or_none, scale=1000)}),
+ 'end_time': {lambda x: (int(x['startMillis']) + int(x['durationMillis'])) / 1000},
+ 'value': ('intensityScoreNormalized', {float_or_none}),
+ })) or None
def _extract_comment(self, comment_renderer, parent=None):
comment_id = comment_renderer.get('commentId')
def _is_unplayable(player_response):
return traverse_obj(player_response, ('playabilityStatus', 'status')) == 'UNPLAYABLE'
- _PLAYER_PARAMS = 'CgIQBg=='
-
def _extract_player_response(self, client, video_id, master_ytcfg, player_ytcfg, player_url, initial_pr, smuggled_data):
session_index = self._extract_session_index(player_ytcfg, master_ytcfg)
'videoId': video_id,
}
if _split_innertube_client(client)[0] == 'android':
- yt_query['params'] = self._PLAYER_PARAMS
+ yt_query['params'] = 'CgIQBg=='
+
+ pp_arg = self._configuration_arg('player_params', [None], casesense=True)[0]
+ if pp_arg:
+ yt_query['params'] = pp_arg
yt_query.update(self._generate_player_context(sts))
return self._extract_response(
elif itag:
f['format_id'] = itag
+ if f.get('source_preference') is None:
+ f['source_preference'] = -1
+
if itag in ('616', '235'):
f['format_note'] = join_nonempty(f.get('format_note'), 'Premium', delim=' ')
- f['source_preference'] = (f.get('source_preference') or -1) + 100
+ f['source_preference'] += 100
f['quality'] = q(itag_qualities.get(try_get(f, lambda f: f['format_id'].split('-')[0]), -1))
if f['quality'] == -1 and f.get('height'):
f['format_note'] = join_nonempty(f.get('format_note'), client_name, delim=', ')
if f.get('fps') and f['fps'] <= 1:
del f['fps']
+
+ if proto == 'hls' and f.get('has_drm'):
+ f['has_drm'] = 'maybe'
+ f['source_preference'] -= 5
return True
subtitles = {}
webpage = None
if 'webpage' not in self._configuration_arg('player_skip'):
query = {'bpctr': '9999999999', 'has_verified': '1'}
+ pp = self._configuration_arg('player_params', [None], casesense=True)[0]
+ if pp:
+ query['pp'] = pp
webpage = self._download_webpage(
webpage_url, video_id, fatal=False, query=query)
else None)
streaming_data = traverse_obj(player_responses, (..., 'streamingData'))
*formats, subtitles = self._extract_formats_and_subtitles(streaming_data, video_id, player_url, live_status, duration)
+ if all(f.get('has_drm') for f in formats):
+ # If there are no formats that definitely don't have DRM, all have DRM
+ for f in formats:
+ f['has_drm'] = True
return live_broadcast_details, live_status, streaming_data, formats, subtitles
or self._extract_chapters_from_description(video_description, duration)
or None)
- info['heatmap'] = self._extract_heatmap_from_player_overlay(initial_data)
+ info['heatmap'] = self._extract_heatmap(initial_data)
contents = traverse_obj(
initial_data, ('contents', 'twoColumnWatchNextResults', 'results', 'results', 'contents'),
or try_get(tab_content, lambda x: x['richGridRenderer'], dict) or {})
yield from extract_entries(parent_renderer)
continuation = continuation_list[0]
-
+ seen_continuations = set()
for page_num in itertools.count(1):
if not continuation:
break
+ continuation_token = continuation.get('continuation')
+ if continuation_token is not None and continuation_token in seen_continuations:
+ self.write_debug('Detected YouTube feed looping - assuming end of feed.')
+ break
+ seen_continuations.add(continuation_token)
headers = self.generate_api_headers(
ytcfg=ytcfg, account_syncid=account_syncid, visitor_data=visitor_data)
response = self._extract_response(
data = self.extract_yt_initial_data(item_id, webpage or '', fatal=fatal) or {}
except ExtractorError as e:
if isinstance(e.cause, network_exceptions):
- if not isinstance(e.cause, urllib.error.HTTPError) or e.cause.code not in (403, 429):
+ if not isinstance(e.cause, HTTPError) or e.cause.status not in (403, 429):
retry.error = e
continue
self._error_or_warning(e, fatal=fatal)