if not self._login():
return
- _YT_WEB_CLIENT_VERSION = '2.20210301.08.00'
- _DEFAULT_API_DATA = {
- 'context': {
- 'client': {
- 'clientName': 'WEB',
- 'clientVersion': _YT_WEB_CLIENT_VERSION,
- }
- },
- }
-
- _DEFAULT_BASIC_API_HEADERS = {
- 'X-YouTube-Client-Name': '1',
- 'X-YouTube-Client-Version': _YT_WEB_CLIENT_VERSION
- }
-
+ _YT_WEB_CLIENT_VERSION = '2.20210407.08.00'
+ _YT_INNERTUBE_API_KEY = 'AIzaSyAO_FJ2SlqU8Q4STEHLGCilw_Y9_11qcW8'
_YT_INITIAL_DATA_RE = r'(?:window\s*\[\s*["\']ytInitialData["\']\s*\]|ytInitialData)\s*=\s*({.+?})\s*;'
_YT_INITIAL_PLAYER_RESPONSE_RE = r'ytInitialPlayerResponse\s*=\s*({.+?})\s*;'
_YT_INITIAL_BOUNDARY_RE = r'(?:var\s+meta|</script|\n)'
return "SAPISIDHASH %s_%s" % (time_now, sapisidhash)
def _call_api(self, ep, query, video_id, fatal=True, headers=None,
- note='Downloading API JSON', errnote='Unable to download API page'):
- data = self._DEFAULT_API_DATA.copy()
+ note='Downloading API JSON', errnote='Unable to download API page',
+ context=None, api_key=None):
+
+ data = {'context': context} if context else {'context': self._extract_context()}
data.update(query)
- headers = headers or {}
- headers.update({'content-type': 'application/json'})
- auth = self._generate_sapisidhash_header()
- if auth is not None:
- headers.update({'Authorization': auth, 'X-Origin': 'https://www.youtube.com'})
+ real_headers = self._generate_api_headers()
+ real_headers.update({'content-type': 'application/json'})
+ if headers:
+ real_headers.update(headers)
return self._download_json(
'https://www.youtube.com/youtubei/v1/%s' % ep,
video_id=video_id, fatal=fatal, note=note, errnote=errnote,
- data=json.dumps(data).encode('utf8'), headers=headers,
- query={'key': 'AIzaSyAO_FJ2SlqU8Q4STEHLGCilw_Y9_11qcW8'})
+ data=json.dumps(data).encode('utf8'), headers=real_headers,
+ query={'key': api_key or self._extract_api_key()})
+
+ def _extract_api_key(self, ytcfg=None):
+ return try_get(ytcfg, lambda x: x['INNERTUBE_API_KEY'], compat_str) or self._YT_INNERTUBE_API_KEY
def _extract_yt_initial_data(self, video_id, webpage):
return self._parse_json(
return self._parse_json(
self._search_regex(
r'ytcfg\.set\s*\(\s*({.+?})\s*\)\s*;', webpage, 'ytcfg',
- default='{}'), video_id, fatal=False)
+ default='{}'), video_id, fatal=False) or {}
+
+ def __extract_client_version(self, ytcfg):
+ return try_get(ytcfg, lambda x: x['INNERTUBE_CLIENT_VERSION'], compat_str) or self._YT_WEB_CLIENT_VERSION
+
+ def _extract_context(self, ytcfg=None):
+ context = try_get(ytcfg, lambda x: x['INNERTUBE_CONTEXT'], dict)
+ if context:
+ return context
+
+ # Recreate the client context (required)
+ client_version = self.__extract_client_version(ytcfg)
+ client_name = try_get(ytcfg, lambda x: x['INNERTUBE_CLIENT_NAME'], compat_str) or 'WEB'
+ context = {
+ 'client': {
+ 'clientName': client_name,
+ 'clientVersion': client_version,
+ }
+ }
+ visitor_data = try_get(ytcfg, lambda x: x['VISITOR_DATA'], compat_str)
+ if visitor_data:
+ context['client']['visitorData'] = visitor_data
+ return context
+
+ def _generate_api_headers(self, ytcfg=None, identity_token=None, account_syncid=None, visitor_data=None):
+ headers = {
+ 'X-YouTube-Client-Name': '1',
+ 'X-YouTube-Client-Version': self.__extract_client_version(ytcfg),
+ }
+ if identity_token:
+ headers['x-youtube-identity-token'] = identity_token
+ if account_syncid:
+ headers['X-Goog-PageId'] = account_syncid
+ headers['X-Goog-AuthUser'] = 0
+ if visitor_data:
+ headers['x-goog-visitor-id'] = visitor_data
+ auth = self._generate_sapisidhash_header()
+ if auth is not None:
+ headers['Authorization'] = auth
+ headers['X-Origin'] = 'https://www.youtube.com'
+ return headers
def _extract_video(self, renderer):
video_id = renderer.get('videoId')
}
def _comment_entries(self, root_continuation_data, identity_token, account_syncid,
- session_token_list, parent=None, comment_counts=None):
+ ytcfg, session_token_list, parent=None, comment_counts=None):
def extract_thread(parent_renderer):
contents = try_get(parent_renderer, lambda x: x['contents'], list) or []
if comment_replies_renderer:
comment_counts[2] += 1
comment_entries_iter = self._comment_entries(
- comment_replies_renderer, identity_token, account_syncid,
+ comment_replies_renderer, identity_token, account_syncid, ytcfg,
parent=comment.get('id'), session_token_list=session_token_list,
comment_counts=comment_counts)
if not comment_counts:
# comment so far, est. total comments, current comment thread #
comment_counts = [0, 0, 0]
- headers = self._DEFAULT_BASIC_API_HEADERS.copy()
# TODO: Generalize the download code with TabIE
- if identity_token:
- headers['x-youtube-identity-token'] = identity_token
-
- if account_syncid:
- headers['X-Goog-PageId'] = account_syncid
- headers['X-Goog-AuthUser'] = 0
-
+ context = self._extract_context(ytcfg)
+ visitor_data = try_get(context, lambda x: x['client']['visitorData'], compat_str)
continuation = YoutubeTabIE._extract_continuation(root_continuation_data) # TODO
first_continuation = False
if parent is None:
for page_num in itertools.count(0):
if not continuation:
break
+ headers = self._generate_api_headers(ytcfg, identity_token, account_syncid, visitor_data)
retries = self._downloader.params.get('extractor_retries', 3)
count = -1
last_error = None
if not response:
break
+ visitor_data = try_get(
+ response,
+ lambda x: x['responseContext']['webResponseContextExtensionData']['ytConfigData']['visitorData'],
+ compat_str) or visitor_data
known_continuation_renderers = {
'itemSectionContinuation': extract_thread,
renderer,
identity_token=self._extract_identity_token(webpage, item_id=video_id),
account_syncid=self._extract_account_syncid(ytcfg),
+ ytcfg=ytcfg,
session_token_list=[xsrf_token])
for comment in comment_iter:
player_response = self._extract_yt_initial_variable(
webpage, self._YT_INITIAL_PLAYER_RESPONSE_RE,
video_id, 'initial player response')
+
+ ytcfg = self._extract_ytcfg(video_id, webpage)
if not player_response:
player_response = self._call_api(
- 'player', {'videoId': video_id}, video_id)
+ 'player', {'videoId': video_id}, video_id, api_key=self._extract_api_key(ytcfg))
playability_status = player_response.get('playabilityStatus') or {}
if playability_status.get('reason') == 'Sign in to confirm your age':
'yt initial data')
if not initial_data:
initial_data = self._call_api(
- 'next', {'videoId': video_id}, video_id, fatal=False)
+ 'next', {'videoId': video_id}, video_id, fatal=False, api_key=self._extract_api_key(ytcfg))
if not is_live:
try:
ctp = continuation_ep.get('clickTrackingParams')
return YoutubeTabIE._build_continuation_query(continuation, ctp)
- def _entries(self, tab, item_id, identity_token, account_syncid):
+ def _entries(self, tab, item_id, identity_token, account_syncid, ytcfg):
def extract_entries(parent_renderer): # this needs to called again for continuation to work with feeds
contents = try_get(parent_renderer, lambda x: x['contents'], list) or []
for entry in extract_entries(parent_renderer):
yield entry
continuation = continuation_list[0]
-
- headers = {
- 'x-youtube-client-name': '1',
- 'x-youtube-client-version': '2.20201112.04.01',
- }
- if identity_token:
- headers['x-youtube-identity-token'] = identity_token
-
- if account_syncid:
- headers['X-Goog-PageId'] = account_syncid
- headers['X-Goog-AuthUser'] = 0
+ context = self._extract_context(ytcfg)
+ visitor_data = try_get(context, lambda x: x['client']['visitorData'], compat_str)
for page_num in itertools.count(1):
if not continuation:
break
+ headers = self._generate_api_headers(ytcfg, identity_token, account_syncid, visitor_data)
retries = self._downloader.params.get('extractor_retries', 3)
count = -1
last_error = None
'continuation': continuation['continuation'],
'clickTracking': {'clickTrackingParams': continuation['itct']},
},
+ context=context,
+ api_key=self._extract_api_key(ytcfg),
note='Downloading API JSON%s' % (' (retry #%d)' % count if count else ''))
except ExtractorError as e:
if isinstance(e.cause, compat_HTTPError) and e.cause.code in (500, 503, 404):
if not response:
break
+ visitor_data = try_get(
+ response, lambda x: x['responseContext']['visitorData'], compat_str) or visitor_data
known_continuation_renderers = {
'playlistVideoListContinuation': self._playlist_entries,
self._entries(
selected_tab, playlist_id,
self._extract_identity_token(webpage, item_id),
- self._extract_account_syncid(data)),
+ self._extract_account_syncid(data),
+ self._extract_ytcfg(item_id, webpage)),
**metadata)
def _extract_mix_playlist(self, playlist, playlist_id):