yt_dlp/extractor/youtube.py

   1 # coding: utf-8
   2
   3 from __future__ import unicode_literals
   4
   5 import base64
   6 import calendar
   7 import copy
   8 import datetime
   9 import hashlib
  10 import itertools
  11 import json
  12 import os.path
  13 import random
  14 import re
  15 import time
  16 import traceback
  17
  18 from .common import InfoExtractor, SearchInfoExtractor
  19 from ..compat import (
  20     compat_chr,
  21     compat_HTTPError,
  22     compat_parse_qs,
  23     compat_str,
  24     compat_urllib_parse_unquote_plus,
  25     compat_urllib_parse_urlencode,
  26     compat_urllib_parse_urlparse,
  27     compat_urlparse,
  28 )
  29 from ..jsinterp import JSInterpreter
  30 from ..utils import (
  31     bytes_to_intlist,
  32     clean_html,
  33     datetime_from_str,
  34     dict_get,
  35     error_to_compat_str,
  36     ExtractorError,
  37     float_or_none,
  38     format_field,
  39     int_or_none,
  40     intlist_to_bytes,
  41     mimetype2ext,
  42     network_exceptions,
  43     orderedSet,
  44     parse_codecs,
  45     parse_count,
  46     parse_duration,
  47     parse_iso8601,
  48     qualities,
  49     remove_start,
  50     smuggle_url,
  51     str_or_none,
  52     str_to_int,
  53     traverse_obj,
  54     try_get,
  55     unescapeHTML,
  56     unified_strdate,
  57     unsmuggle_url,
  58     update_url_query,
  59     url_or_none,
  60     urlencode_postdata,
  61     urljoin,
  62     variadic,
  63 )
  64
  65
  66 def parse_qs(url):
  67     return compat_urlparse.parse_qs(compat_urlparse.urlparse(url).query)
  68
  69
  70 class YoutubeBaseInfoExtractor(InfoExtractor):
  71     """Provide base functions for Youtube extractors"""
  72     _LOGIN_URL = 'https://accounts.google.com/ServiceLogin'
  73     _TWOFACTOR_URL = 'https://accounts.google.com/signin/challenge'
  74
  75     _LOOKUP_URL = 'https://accounts.google.com/_/signin/sl/lookup'
  76     _CHALLENGE_URL = 'https://accounts.google.com/_/signin/sl/challenge'
  77     _TFA_URL = 'https://accounts.google.com/_/signin/challenge?hl=en&TL={0}'
  78
  79     _RESERVED_NAMES = (
  80         r'channel|c|user|browse|playlist|watch|w|v|embed|e|watch_popup|shorts|'
  81         r'movies|results|shared|hashtag|trending|feed|feeds|oembed|get_video_info|'
  82         r'storefront|oops|index|account|reporthistory|t/terms|about|upload|signin|logout')
  83
  84     _NETRC_MACHINE = 'youtube'
  85     # If True it will raise an error if no login info is provided
  86     _LOGIN_REQUIRED = False
  87
  88     _PLAYLIST_ID_RE = r'(?:(?:PL|LL|EC|UU|FL|RD|UL|TL|PU|OLAK5uy_)[0-9A-Za-z-_]{10,}|RDMM|WL|LL|LM)'
  89
  90     def _login(self):
  91         """
  92         Attempt to log in to YouTube.
  93         True is returned if successful or skipped.
  94         False is returned if login failed.
  95
  96         If _LOGIN_REQUIRED is set and no authentication was provided, an error is raised.
  97         """
  98
  99         def warn(message):
 100             self.report_warning(message)
 101
 102         # username+password login is broken
 103         if (self._LOGIN_REQUIRED
 104                 and self.get_param('cookiefile') is None
 105                 and self.get_param('cookiesfrombrowser') is None):
 106             self.raise_login_required(
 107                 'Login details are needed to download this content', method='cookies')
 108         username, password = self._get_login_info()
 109         if username:
 110             warn('Logging in using username and password is broken. %s' % self._LOGIN_HINTS['cookies'])
 111         return
 112
 113         # Everything below this is broken!
 114         r'''
 115         # No authentication to be performed
 116         if username is None:
 117             if self._LOGIN_REQUIRED and self.get_param('cookiefile') is None:
 118                 raise ExtractorError('No login info available, needed for using %s.' % self.IE_NAME, expected=True)
 119             # if self.get_param('cookiefile'):  # TODO remove 'and False' later - too many people using outdated cookies and open issues, remind them.
 120             #     self.to_screen('[Cookies] Reminder - Make sure to always use up to date cookies!')
 121             return True
 122
 123         login_page = self._download_webpage(
 124             self._LOGIN_URL, None,
 125             note='Downloading login page',
 126             errnote='unable to fetch login page', fatal=False)
 127         if login_page is False:
 128             return
 129
 130         login_form = self._hidden_inputs(login_page)
 131
 132         def req(url, f_req, note, errnote):
 133             data = login_form.copy()
 134             data.update({
 135                 'pstMsg': 1,
 136                 'checkConnection': 'youtube',
 137                 'checkedDomains': 'youtube',
 138                 'hl': 'en',
 139                 'deviceinfo': '[null,null,null,[],null,"US",null,null,[],"GlifWebSignIn",null,[null,null,[]]]',
 140                 'f.req': json.dumps(f_req),
 141                 'flowName': 'GlifWebSignIn',
 142                 'flowEntry': 'ServiceLogin',
 143                 # TODO: reverse actual botguard identifier generation algo
 144                 'bgRequest': '["identifier",""]',
 145             })
 146             return self._download_json(
 147                 url, None, note=note, errnote=errnote,
 148                 transform_source=lambda s: re.sub(r'^[^[]*', '', s),
 149                 fatal=False,
 150                 data=urlencode_postdata(data), headers={
 151                     'Content-Type': 'application/x-www-form-urlencoded;charset=utf-8',
 152                     'Google-Accounts-XSRF': 1,
 153                 })
 154
 155         lookup_req = [
 156             username,
 157             None, [], None, 'US', None, None, 2, False, True,
 158             [
 159                 None, None,
 160                 [2, 1, None, 1,
 161                  'https://accounts.google.com/ServiceLogin?passive=true&continue=https%3A%2F%2Fwww.youtube.com%2Fsignin%3Fnext%3D%252F%26action_handle_signin%3Dtrue%26hl%3Den%26app%3Ddesktop%26feature%3Dsign_in_button&hl=en&service=youtube&uilel=3&requestPath=%2FServiceLogin&Page=PasswordSeparationSignIn',
 162                  None, [], 4],
 163                 1, [None, None, []], None, None, None, True
 164             ],
 165             username,
 166         ]
 167
 168         lookup_results = req(
 169             self._LOOKUP_URL, lookup_req,
 170             'Looking up account info', 'Unable to look up account info')
 171
 172         if lookup_results is False:
 173             return False
 174
 175         user_hash = try_get(lookup_results, lambda x: x[0][2], compat_str)
 176         if not user_hash:
 177             warn('Unable to extract user hash')
 178             return False
 179
 180         challenge_req = [
 181             user_hash,
 182             None, 1, None, [1, None, None, None, [password, None, True]],
 183             [
 184                 None, None, [2, 1, None, 1, 'https://accounts.google.com/ServiceLogin?passive=true&continue=https%3A%2F%2Fwww.youtube.com%2Fsignin%3Fnext%3D%252F%26action_handle_signin%3Dtrue%26hl%3Den%26app%3Ddesktop%26feature%3Dsign_in_button&hl=en&service=youtube&uilel=3&requestPath=%2FServiceLogin&Page=PasswordSeparationSignIn', None, [], 4],
 185                 1, [None, None, []], None, None, None, True
 186             ]]
 187
 188         challenge_results = req(
 189             self._CHALLENGE_URL, challenge_req,
 190             'Logging in', 'Unable to log in')
 191
 192         if challenge_results is False:
 193             return
 194
 195         login_res = try_get(challenge_results, lambda x: x[0][5], list)
 196         if login_res:
 197             login_msg = try_get(login_res, lambda x: x[5], compat_str)
 198             warn(
 199                 'Unable to login: %s' % 'Invalid password'
 200                 if login_msg == 'INCORRECT_ANSWER_ENTERED' else login_msg)
 201             return False
 202
 203         res = try_get(challenge_results, lambda x: x[0][-1], list)
 204         if not res:
 205             warn('Unable to extract result entry')
 206             return False
 207
 208         login_challenge = try_get(res, lambda x: x[0][0], list)
 209         if login_challenge:
 210             challenge_str = try_get(login_challenge, lambda x: x[2], compat_str)
 211             if challenge_str == 'TWO_STEP_VERIFICATION':
 212                 # SEND_SUCCESS - TFA code has been successfully sent to phone
 213                 # QUOTA_EXCEEDED - reached the limit of TFA codes
 214                 status = try_get(login_challenge, lambda x: x[5], compat_str)
 215                 if status == 'QUOTA_EXCEEDED':
 216                     warn('Exceeded the limit of TFA codes, try later')
 217                     return False
 218
 219                 tl = try_get(challenge_results, lambda x: x[1][2], compat_str)
 220                 if not tl:
 221                     warn('Unable to extract TL')
 222                     return False
 223
 224                 tfa_code = self._get_tfa_info('2-step verification code')
 225
 226                 if not tfa_code:
 227                     warn(
 228                         'Two-factor authentication required. Provide it either interactively or with --twofactor <code>'
 229                         '(Note that only TOTP (Google Authenticator App) codes work at this time.)')
 230                     return False
 231
 232                 tfa_code = remove_start(tfa_code, 'G-')
 233
 234                 tfa_req = [
 235                     user_hash, None, 2, None,
 236                     [
 237                         9, None, None, None, None, None, None, None,
 238                         [None, tfa_code, True, 2]
 239                     ]]
 240
 241                 tfa_results = req(
 242                     self._TFA_URL.format(tl), tfa_req,
 243                     'Submitting TFA code', 'Unable to submit TFA code')
 244
 245                 if tfa_results is False:
 246                     return False
 247
 248                 tfa_res = try_get(tfa_results, lambda x: x[0][5], list)
 249                 if tfa_res:
 250                     tfa_msg = try_get(tfa_res, lambda x: x[5], compat_str)
 251                     warn(
 252                         'Unable to finish TFA: %s' % 'Invalid TFA code'
 253                         if tfa_msg == 'INCORRECT_ANSWER_ENTERED' else tfa_msg)
 254                     return False
 255
 256                 check_cookie_url = try_get(
 257                     tfa_results, lambda x: x[0][-1][2], compat_str)
 258             else:
 259                 CHALLENGES = {
 260                     'LOGIN_CHALLENGE': "This device isn't recognized. For your security, Google wants to make sure it's really you.",
 261                     'USERNAME_RECOVERY': 'Please provide additional information to aid in the recovery process.',
 262                     'REAUTH': "There is something unusual about your activity. For your security, Google wants to make sure it's really you.",
 263                 }
 264                 challenge = CHALLENGES.get(
 265                     challenge_str,
 266                     '%s returned error %s.' % (self.IE_NAME, challenge_str))
 267                 warn('%s\nGo to https://accounts.google.com/, login and solve a challenge.' % challenge)
 268                 return False
 269         else:
 270             check_cookie_url = try_get(res, lambda x: x[2], compat_str)
 271
 272         if not check_cookie_url:
 273             warn('Unable to extract CheckCookie URL')
 274             return False
 275
 276         check_cookie_results = self._download_webpage(
 277             check_cookie_url, None, 'Checking cookie', fatal=False)
 278
 279         if check_cookie_results is False:
 280             return False
 281
 282         if 'https://myaccount.google.com/' not in check_cookie_results:
 283             warn('Unable to log in')
 284             return False
 285
 286         return True
 287         '''
 288
 289     def _initialize_consent(self):
 290         cookies = self._get_cookies('https://www.youtube.com/')
 291         if cookies.get('__Secure-3PSID'):
 292             return
 293         consent_id = None
 294         consent = cookies.get('CONSENT')
 295         if consent:
 296             if 'YES' in consent.value:
 297                 return
 298             consent_id = self._search_regex(
 299                 r'PENDING\+(\d+)', consent.value, 'consent', default=None)
 300         if not consent_id:
 301             consent_id = random.randint(100, 999)
 302         self._set_cookie('.youtube.com', 'CONSENT', 'YES+cb.20210328-17-p0.en+FX+%s' % consent_id)
 303
 304     def _real_initialize(self):
 305         self._initialize_consent()
 306         if self._downloader is None:
 307             return
 308         if not self._login():
 309             return
 310
 311     _YT_INITIAL_DATA_RE = r'(?:window\s*\[\s*["\']ytInitialData["\']\s*\]|ytInitialData)\s*=\s*({.+?})\s*;'
 312     _YT_INITIAL_PLAYER_RESPONSE_RE = r'ytInitialPlayerResponse\s*=\s*({.+?})\s*;'
 313     _YT_INITIAL_BOUNDARY_RE = r'(?:var\s+meta|</script|\n)'
 314
 315     _YT_DEFAULT_YTCFGS = {
 316         'WEB': {
 317             'INNERTUBE_API_VERSION': 'v1',
 318             'INNERTUBE_CLIENT_NAME': 'WEB',
 319             'INNERTUBE_CLIENT_VERSION': '2.20210622.10.00',
 320             'INNERTUBE_API_KEY': 'AIzaSyAO_FJ2SlqU8Q4STEHLGCilw_Y9_11qcW8',
 321             'INNERTUBE_CONTEXT': {
 322                 'client': {
 323                     'clientName': 'WEB',
 324                     'clientVersion': '2.20210622.10.00',
 325                     'hl': 'en',
 326                 }
 327             },
 328             'INNERTUBE_CONTEXT_CLIENT_NAME': 1
 329         },
 330         'WEB_AGEGATE': {
 331             'INNERTUBE_API_VERSION': 'v1',
 332             'INNERTUBE_CLIENT_NAME': 'WEB',
 333             'INNERTUBE_CLIENT_VERSION': '2.20210622.10.00',
 334             'INNERTUBE_API_KEY': 'AIzaSyAO_FJ2SlqU8Q4STEHLGCilw_Y9_11qcW8',
 335             'INNERTUBE_CONTEXT': {
 336                 'client': {
 337                     'clientName': 'WEB',
 338                     'clientVersion': '2.20210622.10.00',
 339                     'clientScreen': 'EMBED',
 340                     'hl': 'en',
 341                 }
 342             },
 343             'INNERTUBE_CONTEXT_CLIENT_NAME': 1
 344         },
 345         'WEB_REMIX': {
 346             'INNERTUBE_API_VERSION': 'v1',
 347             'INNERTUBE_CLIENT_NAME': 'WEB_REMIX',
 348             'INNERTUBE_CLIENT_VERSION': '1.20210621.00.00',
 349             'INNERTUBE_API_KEY': 'AIzaSyC9XL3ZjWddXya6X74dJoCTL-WEYFDNX30',
 350             'INNERTUBE_CONTEXT': {
 351                 'client': {
 352                     'clientName': 'WEB_REMIX',
 353                     'clientVersion': '1.20210621.00.00',
 354                     'hl': 'en',
 355                 }
 356             },
 357             'INNERTUBE_CONTEXT_CLIENT_NAME': 67
 358         },
 359         'WEB_EMBEDDED_PLAYER': {
 360             'INNERTUBE_API_VERSION': 'v1',
 361             'INNERTUBE_CLIENT_NAME': 'WEB_EMBEDDED_PLAYER',
 362             'INNERTUBE_CLIENT_VERSION': '1.20210620.0.1',
 363             'INNERTUBE_API_KEY': 'AIzaSyAO_FJ2SlqU8Q4STEHLGCilw_Y9_11qcW8',
 364             'INNERTUBE_CONTEXT': {
 365                 'client': {
 366                     'clientName': 'WEB_EMBEDDED_PLAYER',
 367                     'clientVersion': '1.20210620.0.1',
 368                     'hl': 'en',
 369                 }
 370             },
 371             'INNERTUBE_CONTEXT_CLIENT_NAME': 56
 372         },
 373         'ANDROID': {
 374             'INNERTUBE_API_VERSION': 'v1',
 375             'INNERTUBE_CLIENT_NAME': 'ANDROID',
 376             'INNERTUBE_CLIENT_VERSION': '16.20',
 377             'INNERTUBE_API_KEY': 'AIzaSyAO_FJ2SlqU8Q4STEHLGCilw_Y9_11qcW8',
 378             'INNERTUBE_CONTEXT': {
 379                 'client': {
 380                     'clientName': 'ANDROID',
 381                     'clientVersion': '16.20',
 382                     'hl': 'en',
 383                 }
 384             },
 385             'INNERTUBE_CONTEXT_CLIENT_NAME': 3
 386         },
 387         'ANDROID_AGEGATE': {
 388             'INNERTUBE_API_VERSION': 'v1',
 389             'INNERTUBE_CLIENT_NAME': 'ANDROID',
 390             'INNERTUBE_CLIENT_VERSION': '16.20',
 391             'INNERTUBE_API_KEY': 'AIzaSyAO_FJ2SlqU8Q4STEHLGCilw_Y9_11qcW8',
 392             'INNERTUBE_CONTEXT': {
 393                 'client': {
 394                     'clientName': 'ANDROID',
 395                     'clientVersion': '16.20',
 396                     'clientScreen': 'EMBED',
 397                     'hl': 'en',
 398                 }
 399             },
 400             'INNERTUBE_CONTEXT_CLIENT_NAME': 3
 401         },
 402         'ANDROID_EMBEDDED_PLAYER': {
 403             'INNERTUBE_API_VERSION': 'v1',
 404             'INNERTUBE_CLIENT_NAME': 'ANDROID_EMBEDDED_PLAYER',
 405             'INNERTUBE_CLIENT_VERSION': '16.20',
 406             'INNERTUBE_API_KEY': 'AIzaSyAO_FJ2SlqU8Q4STEHLGCilw_Y9_11qcW8',
 407             'INNERTUBE_CONTEXT': {
 408                 'client': {
 409                     'clientName': 'ANDROID_EMBEDDED_PLAYER',
 410                     'clientVersion': '16.20',
 411                     'hl': 'en',
 412                 }
 413             },
 414             'INNERTUBE_CONTEXT_CLIENT_NAME': 55
 415         },
 416         'ANDROID_MUSIC': {
 417             'INNERTUBE_API_VERSION': 'v1',
 418             'INNERTUBE_CLIENT_NAME': 'ANDROID_MUSIC',
 419             'INNERTUBE_CLIENT_VERSION': '4.32',
 420             'INNERTUBE_API_KEY': 'AIzaSyC9XL3ZjWddXya6X74dJoCTL-WEYFDNX30',
 421             'INNERTUBE_CONTEXT': {
 422                 'client': {
 423                     'clientName': 'ANDROID_MUSIC',
 424                     'clientVersion': '4.32',
 425                     'hl': 'en',
 426                 }
 427             },
 428             'INNERTUBE_CONTEXT_CLIENT_NAME': 21
 429         },
 430         'IOS': {
 431             'INNERTUBE_API_VERSION': 'v1',
 432             'INNERTUBE_CLIENT_NAME': 'IOS',
 433             'INNERTUBE_CLIENT_VERSION': '16.20',
 434             'INNERTUBE_API_KEY': 'AIzaSyAO_FJ2SlqU8Q4STEHLGCilw_Y9_11qcW8',
 435             'INNERTUBE_CONTEXT': {
 436                 'client': {
 437                     'clientName': 'IOS',
 438                     'clientVersion': '16.20',
 439                     'hl': 'en',
 440                 }
 441             },
 442             'INNERTUBE_CONTEXT_CLIENT_NAME': 5
 443         },
 444         'IOS_AGEGATE': {
 445             'INNERTUBE_API_VERSION': 'v1',
 446             'INNERTUBE_CLIENT_NAME': 'IOS',
 447             'INNERTUBE_CLIENT_VERSION': '16.20',
 448             'INNERTUBE_API_KEY': 'AIzaSyAO_FJ2SlqU8Q4STEHLGCilw_Y9_11qcW8',
 449             'INNERTUBE_CONTEXT': {
 450                 'client': {
 451                     'clientName': 'IOS',
 452                     'clientVersion': '16.20',
 453                     'clientScreen': 'EMBED',
 454                     'hl': 'en',
 455                 }
 456             },
 457             'INNERTUBE_CONTEXT_CLIENT_NAME': 5
 458         },
 459         'IOS_MUSIC': {
 460             'INNERTUBE_API_VERSION': 'v1',
 461             'INNERTUBE_CLIENT_NAME': 'IOS_MUSIC',
 462             'INNERTUBE_CLIENT_VERSION': '4.32',
 463             'INNERTUBE_API_KEY': 'AIzaSyDK3iBpDP9nHVTk2qL73FLJICfOC3c51Og',
 464             'INNERTUBE_CONTEXT': {
 465                 'client': {
 466                     'clientName': 'IOS_MUSIC',
 467                     'clientVersion': '4.32',
 468                     'hl': 'en',
 469                 }
 470             },
 471             'INNERTUBE_CONTEXT_CLIENT_NAME': 26
 472         },
 473         'IOS_MESSAGES_EXTENSION': {
 474             'INNERTUBE_API_VERSION': 'v1',
 475             'INNERTUBE_CLIENT_NAME': 'IOS_MESSAGES_EXTENSION',
 476             'INNERTUBE_CLIENT_VERSION': '16.20',
 477             'INNERTUBE_API_KEY': 'AIzaSyDCU8hByM-4DrUqRUYnGn-3llEO78bcxq8',
 478             'INNERTUBE_CONTEXT': {
 479                 'client': {
 480                     'clientName': 'IOS_MESSAGES_EXTENSION',
 481                     'clientVersion': '16.20',
 482                     'hl': 'en',
 483                 }
 484             },
 485             'INNERTUBE_CONTEXT_CLIENT_NAME': 66
 486         },
 487         'MWEB': {
 488             'INNERTUBE_API_VERSION': 'v1',
 489             'INNERTUBE_CLIENT_NAME': 'MWEB',
 490             'INNERTUBE_CLIENT_VERSION': '2.20210721.07.00',
 491             'INNERTUBE_API_KEY': 'AIzaSyDCU8hByM-4DrUqRUYnGn-3llEO78bcxq8',
 492             'INNERTUBE_CONTEXT': {
 493                 'client': {
 494                     'clientName': 'MWEB',
 495                     'clientVersion': '2.20210721.07.00',
 496                     'hl': 'en',
 497                 }
 498             },
 499             'INNERTUBE_CONTEXT_CLIENT_NAME': 2
 500         },
 501         'MWEB_AGEGATE': {
 502             'INNERTUBE_API_VERSION': 'v1',
 503             'INNERTUBE_CLIENT_NAME': 'MWEB',
 504             'INNERTUBE_CLIENT_VERSION': '2.20210721.07.00',
 505             'INNERTUBE_API_KEY': 'AIzaSyDCU8hByM-4DrUqRUYnGn-3llEO78bcxq8',
 506             'INNERTUBE_CONTEXT': {
 507                 'client': {
 508                     'clientName': 'MWEB',
 509                     'clientVersion': '2.20210721.07.00',
 510                     'clientScreen': 'EMBED',
 511                     'hl': 'en',
 512                 }
 513             },
 514             'INNERTUBE_CONTEXT_CLIENT_NAME': 2
 515         },
 516     }
 517
 518     _YT_DEFAULT_INNERTUBE_HOSTS = {
 519         'DIRECT': 'youtubei.googleapis.com',
 520         'WEB': 'www.youtube.com',
 521         'WEB_REMIX': 'music.youtube.com',
 522         'ANDROID_MUSIC': 'music.youtube.com'
 523     }
 524
 525     # clients starting with _ cannot be explicity requested by the user
 526     _YT_CLIENTS = {
 527         'android': 'ANDROID',
 528         'android_music': 'ANDROID_MUSIC',
 529         'android_embedded': 'ANDROID_EMBEDDED_PLAYER',
 530         'android_agegate': 'ANDROID_AGEGATE',
 531         'ios': 'IOS',
 532         'ios_music': 'IOS_MUSIC',
 533         'ios_embedded': 'IOS_MESSAGES_EXTENSION',
 534         'ios_agegate': 'IOS_AGEGATE',
 535         'web': 'WEB',
 536         'web_music': 'WEB_REMIX',
 537         'web_embedded': 'WEB_EMBEDDED_PLAYER',
 538         'web_agegate': 'WEB_AGEGATE',
 539         'mweb': 'MWEB',
 540         'mweb_agegate': 'MWEB_AGEGATE',
 541     }
 542
 543     def _get_default_ytcfg(self, client='WEB'):
 544         if client in self._YT_DEFAULT_YTCFGS:
 545             return copy.deepcopy(self._YT_DEFAULT_YTCFGS[client])
 546         self.write_debug(f'INNERTUBE default client {client} does not exist - falling back to WEB client.')
 547         return copy.deepcopy(self._YT_DEFAULT_YTCFGS['WEB'])
 548
 549     def _get_innertube_host(self, client='WEB'):
 550         return dict_get(self._YT_DEFAULT_INNERTUBE_HOSTS, (client, 'WEB'))
 551
 552     def _ytcfg_get_safe(self, ytcfg, getter, expected_type=None, default_client='WEB'):
 553         # try_get but with fallback to default ytcfg client values when present
 554         _func = lambda y: try_get(y, getter, expected_type)
 555         return _func(ytcfg) or _func(self._get_default_ytcfg(default_client))
 556
 557     def _extract_client_name(self, ytcfg, default_client='WEB'):
 558         return self._ytcfg_get_safe(ytcfg, lambda x: x['INNERTUBE_CLIENT_NAME'], compat_str, default_client)
 559
 560     @staticmethod
 561     def _extract_session_index(*data):
 562         for ytcfg in data:
 563             session_index = int_or_none(try_get(ytcfg, lambda x: x['SESSION_INDEX']))
 564             if session_index is not None:
 565                 return session_index
 566
 567     def _extract_client_version(self, ytcfg, default_client='WEB'):
 568         return self._ytcfg_get_safe(ytcfg, lambda x: x['INNERTUBE_CLIENT_VERSION'], compat_str, default_client)
 569
 570     def _extract_api_key(self, ytcfg=None, default_client='WEB'):
 571         return self._ytcfg_get_safe(ytcfg, lambda x: x['INNERTUBE_API_KEY'], compat_str, default_client)
 572
 573     def _extract_context(self, ytcfg=None, default_client='WEB'):
 574         _get_context = lambda y: try_get(y, lambda x: x['INNERTUBE_CONTEXT'], dict)
 575         context = _get_context(ytcfg)
 576         if context:
 577             return context
 578
 579         context = _get_context(self._get_default_ytcfg(default_client))
 580         if not ytcfg:
 581             return context
 582
 583         # Recreate the client context (required)
 584         context['client'].update({
 585             'clientVersion': self._extract_client_version(ytcfg, default_client),
 586             'clientName': self._extract_client_name(ytcfg, default_client),
 587         })
 588         visitor_data = try_get(ytcfg, lambda x: x['VISITOR_DATA'], compat_str)
 589         if visitor_data:
 590             context['client']['visitorData'] = visitor_data
 591         return context
 592
 593     def _generate_sapisidhash_header(self, origin='https://www.youtube.com'):
 594         # Sometimes SAPISID cookie isn't present but __Secure-3PAPISID is.
 595         # See: https://github.com/yt-dlp/yt-dlp/issues/393
 596         yt_cookies = self._get_cookies('https://www.youtube.com')
 597         sapisid_cookie = dict_get(
 598             yt_cookies, ('__Secure-3PAPISID', 'SAPISID'))
 599         if sapisid_cookie is None or not sapisid_cookie.value:
 600             return
 601         time_now = round(time.time())
 602         # SAPISID cookie is required if not already present
 603         if not yt_cookies.get('SAPISID'):
 604             self.write_debug('Copying __Secure-3PAPISID cookie to SAPISID cookie', only_once=True)
 605             self._set_cookie(
 606                 '.youtube.com', 'SAPISID', sapisid_cookie.value, secure=True, expire_time=time_now + 3600)
 607         self.write_debug('Extracted SAPISID cookie', only_once=True)
 608         # SAPISIDHASH algorithm from https://stackoverflow.com/a/32065323
 609         sapisidhash = hashlib.sha1(
 610             f'{time_now} {sapisid_cookie.value} {origin}'.encode('utf-8')).hexdigest()
 611         return f'SAPISIDHASH {time_now}_{sapisidhash}'
 612
 613     def _call_api(self, ep, query, video_id, fatal=True, headers=None,
 614                   note='Downloading API JSON', errnote='Unable to download API page',
 615                   context=None, api_key=None, api_hostname=None, default_client='WEB'):
 616
 617         data = {'context': context} if context else {'context': self._extract_context(default_client=default_client)}
 618         data.update(query)
 619         real_headers = self.generate_api_headers(default_client=default_client)
 620         real_headers.update({'content-type': 'application/json'})
 621         if headers:
 622             real_headers.update(headers)
 623         return self._download_json(
 624             'https://%s/youtubei/v1/%s' % (api_hostname or self._get_innertube_host(default_client), ep),
 625             video_id=video_id, fatal=fatal, note=note, errnote=errnote,
 626             data=json.dumps(data).encode('utf8'), headers=real_headers,
 627             query={'key': api_key or self._extract_api_key()})
 628
 629     def extract_yt_initial_data(self, video_id, webpage):
 630         return self._parse_json(
 631             self._search_regex(
 632                 (r'%s\s*%s' % (self._YT_INITIAL_DATA_RE, self._YT_INITIAL_BOUNDARY_RE),
 633                  self._YT_INITIAL_DATA_RE), webpage, 'yt initial data'),
 634             video_id)
 635
 636     def _extract_identity_token(self, webpage, item_id):
 637         if not webpage:
 638             return None
 639         ytcfg = self.extract_ytcfg(item_id, webpage)
 640         if ytcfg:
 641             token = try_get(ytcfg, lambda x: x['ID_TOKEN'], compat_str)
 642             if token:
 643                 return token
 644         return self._search_regex(
 645             r'\bID_TOKEN["\']\s*:\s*["\'](.+?)["\']', webpage,
 646             'identity token', default=None)
 647
 648     @staticmethod
 649     def _extract_account_syncid(*args):
 650         """
 651         Extract syncId required to download private playlists of secondary channels
 652         @params response and/or ytcfg
 653         """
 654         for data in args:
 655             # ytcfg includes channel_syncid if on secondary channel
 656             delegated_sid = try_get(data, lambda x: x['DELEGATED_SESSION_ID'], compat_str)
 657             if delegated_sid:
 658                 return delegated_sid
 659             sync_ids = (try_get(
 660                 data, (lambda x: x['responseContext']['mainAppWebResponseContext']['datasyncId'],
 661                        lambda x: x['DATASYNC_ID']), compat_str) or '').split("||")
 662             if len(sync_ids) >= 2 and sync_ids[1]:
 663                 # datasyncid is of the form "channel_syncid||user_syncid" for secondary channel
 664                 # and just "user_syncid||" for primary channel. We only want the channel_syncid
 665                 return sync_ids[0]
 666
 667     def extract_ytcfg(self, video_id, webpage):
 668         if not webpage:
 669             return {}
 670         return self._parse_json(
 671             self._search_regex(
 672                 r'ytcfg\.set\s*\(\s*({.+?})\s*\)\s*;', webpage, 'ytcfg',
 673                 default='{}'), video_id, fatal=False) or {}
 674
 675     def generate_api_headers(
 676             self, ytcfg=None, identity_token=None, account_syncid=None,
 677             visitor_data=None, api_hostname=None, default_client='WEB', session_index=None):
 678         origin = 'https://' + (api_hostname if api_hostname else self._get_innertube_host(default_client))
 679         headers = {
 680             'X-YouTube-Client-Name': compat_str(
 681                 self._ytcfg_get_safe(ytcfg, lambda x: x['INNERTUBE_CONTEXT_CLIENT_NAME'], default_client=default_client)),
 682             'X-YouTube-Client-Version': self._extract_client_version(ytcfg, default_client),
 683             'Origin': origin
 684         }
 685         if not visitor_data and ytcfg:
 686             visitor_data = try_get(
 687                 self._extract_context(ytcfg, default_client), lambda x: x['client']['visitorData'], compat_str)
 688         if identity_token:
 689             headers['X-Youtube-Identity-Token'] = identity_token
 690         if account_syncid:
 691             headers['X-Goog-PageId'] = account_syncid
 692         if session_index is None and ytcfg:
 693             session_index = self._extract_session_index(ytcfg)
 694         if account_syncid or session_index is not None:
 695             headers['X-Goog-AuthUser'] = session_index if session_index is not None else 0
 696         if visitor_data:
 697             headers['X-Goog-Visitor-Id'] = visitor_data
 698         auth = self._generate_sapisidhash_header(origin)
 699         if auth is not None:
 700             headers['Authorization'] = auth
 701             headers['X-Origin'] = origin
 702         return headers
 703
 704     @staticmethod
 705     def _build_api_continuation_query(continuation, ctp=None):
 706         query = {
 707             'continuation': continuation
 708         }
 709         # TODO: Inconsistency with clickTrackingParams.
 710         # Currently we have a fixed ctp contained within context (from ytcfg)
 711         # and a ctp in root query for continuation.
 712         if ctp:
 713             query['clickTracking'] = {'clickTrackingParams': ctp}
 714         return query
 715
 716     @classmethod
 717     def _extract_next_continuation_data(cls, renderer):
 718         next_continuation = try_get(
 719             renderer, (lambda x: x['continuations'][0]['nextContinuationData'],
 720                        lambda x: x['continuation']['reloadContinuationData']), dict)
 721         if not next_continuation:
 722             return
 723         continuation = next_continuation.get('continuation')
 724         if not continuation:
 725             return
 726         ctp = next_continuation.get('clickTrackingParams')
 727         return cls._build_api_continuation_query(continuation, ctp)
 728
 729     @classmethod
 730     def _extract_continuation_ep_data(cls, continuation_ep: dict):
 731         if isinstance(continuation_ep, dict):
 732             continuation = try_get(
 733                 continuation_ep, lambda x: x['continuationCommand']['token'], compat_str)
 734             if not continuation:
 735                 return
 736             ctp = continuation_ep.get('clickTrackingParams')
 737             return cls._build_api_continuation_query(continuation, ctp)
 738
 739     @classmethod
 740     def _extract_continuation(cls, renderer):
 741         next_continuation = cls._extract_next_continuation_data(renderer)
 742         if next_continuation:
 743             return next_continuation
 744
 745         contents = []
 746         for key in ('contents', 'items'):
 747             contents.extend(try_get(renderer, lambda x: x[key], list) or [])
 748
 749         for content in contents:
 750             if not isinstance(content, dict):
 751                 continue
 752             continuation_ep = try_get(
 753                 content, (lambda x: x['continuationItemRenderer']['continuationEndpoint'],
 754                           lambda x: x['continuationItemRenderer']['button']['buttonRenderer']['command']),
 755                 dict)
 756             continuation = cls._extract_continuation_ep_data(continuation_ep)
 757             if continuation:
 758                 return continuation
 759
 760     @classmethod
 761     def _extract_alerts(cls, data):
 762         for alert_dict in try_get(data, lambda x: x['alerts'], list) or []:
 763             if not isinstance(alert_dict, dict):
 764                 continue
 765             for alert in alert_dict.values():
 766                 alert_type = alert.get('type')
 767                 if not alert_type:
 768                     continue
 769                 message = cls._get_text(alert, 'text')
 770                 if message:
 771                     yield alert_type, message
 772
 773     def _report_alerts(self, alerts, expected=True):
 774         errors = []
 775         warnings = []
 776         for alert_type, alert_message in alerts:
 777             if alert_type.lower() == 'error':
 778                 errors.append([alert_type, alert_message])
 779             else:
 780                 warnings.append([alert_type, alert_message])
 781
 782         for alert_type, alert_message in (warnings + errors[:-1]):
 783             self.report_warning('YouTube said: %s - %s' % (alert_type, alert_message))
 784         if errors:
 785             raise ExtractorError('YouTube said: %s' % errors[-1][1], expected=expected)
 786
 787     def _extract_and_report_alerts(self, data, *args, **kwargs):
 788         return self._report_alerts(self._extract_alerts(data), *args, **kwargs)
 789
 790     def _extract_badges(self, renderer: dict):
 791         badges = set()
 792         for badge in try_get(renderer, lambda x: x['badges'], list) or []:
 793             label = try_get(badge, lambda x: x['metadataBadgeRenderer']['label'], compat_str)
 794             if label:
 795                 badges.add(label.lower())
 796         return badges
 797
 798     @staticmethod
 799     def _get_text(data, *path_list, max_runs=None):
 800         for path in path_list or [None]:
 801             if path is None:
 802                 obj = [data]
 803             else:
 804                 obj = traverse_obj(data, path, default=[])
 805                 if not any(key is ... or isinstance(key, (list, tuple)) for key in variadic(path)):
 806                     obj = [obj]
 807             for item in obj:
 808                 text = try_get(item, lambda x: x['simpleText'], compat_str)
 809                 if text:
 810                     return text
 811                 runs = try_get(item, lambda x: x['runs'], list) or []
 812                 if not runs and isinstance(item, list):
 813                     runs = item
 814
 815                 runs = runs[:min(len(runs), max_runs or len(runs))]
 816                 text = ''.join(traverse_obj(runs, (..., 'text'), expected_type=str, default=[]))
 817                 if text:
 818                     return text
 819
 820     def _extract_response(self, item_id, query, note='Downloading API JSON', headers=None,
 821                           ytcfg=None, check_get_keys=None, ep='browse', fatal=True, api_hostname=None,
 822                           default_client='WEB'):
 823         response = None
 824         last_error = None
 825         count = -1
 826         retries = self.get_param('extractor_retries', 3)
 827         if check_get_keys is None:
 828             check_get_keys = []
 829         while count < retries:
 830             count += 1
 831             if last_error:
 832                 self.report_warning('%s. Retrying ...' % last_error)
 833             try:
 834                 response = self._call_api(
 835                     ep=ep, fatal=True, headers=headers,
 836                     video_id=item_id, query=query,
 837                     context=self._extract_context(ytcfg, default_client),
 838                     api_key=self._extract_api_key(ytcfg, default_client),
 839                     api_hostname=api_hostname, default_client=default_client,
 840                     note='%s%s' % (note, ' (retry #%d)' % count if count else ''))
 841             except ExtractorError as e:
 842                 if isinstance(e.cause, network_exceptions):
 843                     # Downloading page may result in intermittent 5xx HTTP error
 844                     # Sometimes a 404 is also recieved. See: https://github.com/ytdl-org/youtube-dl/issues/28289
 845                     # We also want to catch all other network exceptions since errors in later pages can be troublesome
 846                     # See https://github.com/yt-dlp/yt-dlp/issues/507#issuecomment-880188210
 847                     if not isinstance(e.cause, compat_HTTPError) or e.cause.code not in (403, 429):
 848                         last_error = error_to_compat_str(e.cause or e)
 849                         if count < retries:
 850                             continue
 851                 if fatal:
 852                     raise
 853                 else:
 854                     self.report_warning(error_to_compat_str(e))
 855                     return
 856
 857             else:
 858                 # Youtube may send alerts if there was an issue with the continuation page
 859                 try:
 860                     self._extract_and_report_alerts(response, expected=False)
 861                 except ExtractorError as e:
 862                     if fatal:
 863                         raise
 864                     self.report_warning(error_to_compat_str(e))
 865                     return
 866                 if not check_get_keys or dict_get(response, check_get_keys):
 867                     break
 868                 # Youtube sometimes sends incomplete data
 869                 # See: https://github.com/ytdl-org/youtube-dl/issues/28194
 870                 last_error = 'Incomplete data received'
 871                 if count >= retries:
 872                     if fatal:
 873                         raise ExtractorError(last_error)
 874                     else:
 875                         self.report_warning(last_error)
 876                         return
 877         return response
 878
 879     @staticmethod
 880     def is_music_url(url):
 881         return re.match(r'https?://music\.youtube\.com/', url) is not None
 882
 883     def _extract_video(self, renderer):
 884         video_id = renderer.get('videoId')
 885         title = self._get_text(renderer, 'title')
 886         description = self._get_text(renderer, 'descriptionSnippet')
 887         duration = parse_duration(self._get_text(
 888             renderer, 'lengthText', ('thumbnailOverlays', ..., 'thumbnailOverlayTimeStatusRenderer', 'text')))
 889         view_count_text = self._get_text(renderer, 'viewCountText') or ''
 890         view_count = str_to_int(self._search_regex(
 891             r'^([\d,]+)', re.sub(r'\s', '', view_count_text),
 892             'view count', default=None))
 893
 894         uploader = self._get_text(renderer, 'ownerText', 'shortBylineText')
 895
 896         return {
 897             '_type': 'url',
 898             'ie_key': YoutubeIE.ie_key(),
 899             'id': video_id,
 900             'url': video_id,
 901             'title': title,
 902             'description': description,
 903             'duration': duration,
 904             'view_count': view_count,
 905             'uploader': uploader,
 906         }
 907
 908
 909 class YoutubeIE(YoutubeBaseInfoExtractor):
 910     IE_DESC = 'YouTube.com'
 911     _INVIDIOUS_SITES = (
 912         # invidious-redirect websites
 913         r'(?:www\.)?redirect\.invidious\.io',
 914         r'(?:(?:www|dev)\.)?invidio\.us',
 915         # Invidious instances taken from https://github.com/iv-org/documentation/blob/master/Invidious-Instances.md
 916         r'(?:www\.)?invidious\.pussthecat\.org',
 917         r'(?:www\.)?invidious\.zee\.li',
 918         r'(?:www\.)?invidious\.ethibox\.fr',
 919         r'(?:www\.)?invidious\.3o7z6yfxhbw7n3za4rss6l434kmv55cgw2vuziwuigpwegswvwzqipyd\.onion',
 920         # youtube-dl invidious instances list
 921         r'(?:(?:www|no)\.)?invidiou\.sh',
 922         r'(?:(?:www|fi)\.)?invidious\.snopyta\.org',
 923         r'(?:www\.)?invidious\.kabi\.tk',
 924         r'(?:www\.)?invidious\.mastodon\.host',
 925         r'(?:www\.)?invidious\.zapashcanon\.fr',
 926         r'(?:www\.)?(?:invidious(?:-us)?|piped)\.kavin\.rocks',
 927         r'(?:www\.)?invidious\.tinfoil-hat\.net',
 928         r'(?:www\.)?invidious\.himiko\.cloud',
 929         r'(?:www\.)?invidious\.reallyancient\.tech',
 930         r'(?:www\.)?invidious\.tube',
 931         r'(?:www\.)?invidiou\.site',
 932         r'(?:www\.)?invidious\.site',
 933         r'(?:www\.)?invidious\.xyz',
 934         r'(?:www\.)?invidious\.nixnet\.xyz',
 935         r'(?:www\.)?invidious\.048596\.xyz',
 936         r'(?:www\.)?invidious\.drycat\.fr',
 937         r'(?:www\.)?inv\.skyn3t\.in',
 938         r'(?:www\.)?tube\.poal\.co',
 939         r'(?:www\.)?tube\.connect\.cafe',
 940         r'(?:www\.)?vid\.wxzm\.sx',
 941         r'(?:www\.)?vid\.mint\.lgbt',
 942         r'(?:www\.)?vid\.puffyan\.us',
 943         r'(?:www\.)?yewtu\.be',
 944         r'(?:www\.)?yt\.elukerio\.org',
 945         r'(?:www\.)?yt\.lelux\.fi',
 946         r'(?:www\.)?invidious\.ggc-project\.de',
 947         r'(?:www\.)?yt\.maisputain\.ovh',
 948         r'(?:www\.)?ytprivate\.com',
 949         r'(?:www\.)?invidious\.13ad\.de',
 950         r'(?:www\.)?invidious\.toot\.koeln',
 951         r'(?:www\.)?invidious\.fdn\.fr',
 952         r'(?:www\.)?watch\.nettohikari\.com',
 953         r'(?:www\.)?invidious\.namazso\.eu',
 954         r'(?:www\.)?invidious\.silkky\.cloud',
 955         r'(?:www\.)?invidious\.exonip\.de',
 956         r'(?:www\.)?invidious\.riverside\.rocks',
 957         r'(?:www\.)?invidious\.blamefran\.net',
 958         r'(?:www\.)?invidious\.moomoo\.de',
 959         r'(?:www\.)?ytb\.trom\.tf',
 960         r'(?:www\.)?yt\.cyberhost\.uk',
 961         r'(?:www\.)?kgg2m7yk5aybusll\.onion',
 962         r'(?:www\.)?qklhadlycap4cnod\.onion',
 963         r'(?:www\.)?axqzx4s6s54s32yentfqojs3x5i7faxza6xo3ehd4bzzsg2ii4fv2iid\.onion',
 964         r'(?:www\.)?c7hqkpkpemu6e7emz5b4vyz7idjgdvgaaa3dyimmeojqbgpea3xqjoid\.onion',
 965         r'(?:www\.)?fz253lmuao3strwbfbmx46yu7acac2jz27iwtorgmbqlkurlclmancad\.onion',
 966         r'(?:www\.)?invidious\.l4qlywnpwqsluw65ts7md3khrivpirse744un3x7mlskqauz5pyuzgqd\.onion',
 967         r'(?:www\.)?owxfohz4kjyv25fvlqilyxast7inivgiktls3th44jhk3ej3i7ya\.b32\.i2p',
 968         r'(?:www\.)?4l2dgddgsrkf2ous66i6seeyi6etzfgrue332grh2n7madpwopotugyd\.onion',
 969         r'(?:www\.)?w6ijuptxiku4xpnnaetxvnkc5vqcdu7mgns2u77qefoixi63vbvnpnqd\.onion',
 970         r'(?:www\.)?kbjggqkzv65ivcqj6bumvp337z6264huv5kpkwuv6gu5yjiskvan7fad\.onion',
 971         r'(?:www\.)?grwp24hodrefzvjjuccrkw3mjq4tzhaaq32amf33dzpmuxe7ilepcmad\.onion',
 972         r'(?:www\.)?hpniueoejy4opn7bc4ftgazyqjoeqwlvh2uiku2xqku6zpoa4bf5ruid\.onion',
 973     )
 974     _VALID_URL = r"""(?x)^
 975                      (
 976                          (?:https?://|//)                                    # http(s):// or protocol-independent URL
 977                          (?:(?:(?:(?:\w+\.)?[yY][oO][uU][tT][uU][bB][eE](?:-nocookie|kids)?\.com|
 978                             (?:www\.)?deturl\.com/www\.youtube\.com|
 979                             (?:www\.)?pwnyoutube\.com|
 980                             (?:www\.)?hooktube\.com|
 981                             (?:www\.)?yourepeat\.com|
 982                             tube\.majestyc\.net|
 983                             %(invidious)s|
 984                             youtube\.googleapis\.com)/                        # the various hostnames, with wildcard subdomains
 985                          (?:.*?\#/)?                                          # handle anchor (#/) redirect urls
 986                          (?:                                                  # the various things that can precede the ID:
 987                              (?:(?:v|embed|e)/(?!videoseries))                # v/ or embed/ or e/
 988                              |(?:                                             # or the v= param in all its forms
 989                                  (?:(?:watch|movie)(?:_popup)?(?:\.php)?/?)?  # preceding watch(_popup|.php) or nothing (like /?v=xxxx)
 990                                  (?:\?|\#!?)                                  # the params delimiter ? or # or #!
 991                                  (?:.*?[&;])??                                # any other preceding param (like /?s=tuff&v=xxxx or ?s=tuff&amp;v=V36LpHqtcDY)
 992                                  v=
 993                              )
 994                          ))
 995                          |(?:
 996                             youtu\.be|                                        # just youtu.be/xxxx
 997                             vid\.plus|                                        # or vid.plus/xxxx
 998                             zwearz\.com/watch|                                # or zwearz.com/watch/xxxx
 999                             %(invidious)s
1000                          )/
1001                          |(?:www\.)?cleanvideosearch\.com/media/action/yt/watch\?videoId=
1002                          )
1003                      )?                                                       # all until now is optional -> you can pass the naked ID
1004                      (?P<id>[0-9A-Za-z_-]{11})                                # here is it! the YouTube video ID
1005                      (?(1).+)?                                                # if we found the ID, everything can follow
1006                      (?:\#|$)""" % {
1007         'invidious': '|'.join(_INVIDIOUS_SITES),
1008     }
1009     _PLAYER_INFO_RE = (
1010         r'/s/player/(?P<id>[a-zA-Z0-9_-]{8,})/player',
1011         r'/(?P<id>[a-zA-Z0-9_-]{8,})/player(?:_ias\.vflset(?:/[a-zA-Z]{2,3}_[a-zA-Z]{2,3})?|-plasma-ias-(?:phone|tablet)-[a-z]{2}_[A-Z]{2}\.vflset)/base\.js$',
1012         r'\b(?P<id>vfl[a-zA-Z0-9_-]+)\b.*?\.js$',
1013     )
1014     _formats = {
1015         '5': {'ext': 'flv', 'width': 400, 'height': 240, 'acodec': 'mp3', 'abr': 64, 'vcodec': 'h263'},
1016         '6': {'ext': 'flv', 'width': 450, 'height': 270, 'acodec': 'mp3', 'abr': 64, 'vcodec': 'h263'},
1017         '13': {'ext': '3gp', 'acodec': 'aac', 'vcodec': 'mp4v'},
1018         '17': {'ext': '3gp', 'width': 176, 'height': 144, 'acodec': 'aac', 'abr': 24, 'vcodec': 'mp4v'},
1019         '18': {'ext': 'mp4', 'width': 640, 'height': 360, 'acodec': 'aac', 'abr': 96, 'vcodec': 'h264'},
1020         '22': {'ext': 'mp4', 'width': 1280, 'height': 720, 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264'},
1021         '34': {'ext': 'flv', 'width': 640, 'height': 360, 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264'},
1022         '35': {'ext': 'flv', 'width': 854, 'height': 480, 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264'},
1023         # itag 36 videos are either 320x180 (BaW_jenozKc) or 320x240 (__2ABJjxzNo), abr varies as well
1024         '36': {'ext': '3gp', 'width': 320, 'acodec': 'aac', 'vcodec': 'mp4v'},
1025         '37': {'ext': 'mp4', 'width': 1920, 'height': 1080, 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264'},
1026         '38': {'ext': 'mp4', 'width': 4096, 'height': 3072, 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264'},
1027         '43': {'ext': 'webm', 'width': 640, 'height': 360, 'acodec': 'vorbis', 'abr': 128, 'vcodec': 'vp8'},
1028         '44': {'ext': 'webm', 'width': 854, 'height': 480, 'acodec': 'vorbis', 'abr': 128, 'vcodec': 'vp8'},
1029         '45': {'ext': 'webm', 'width': 1280, 'height': 720, 'acodec': 'vorbis', 'abr': 192, 'vcodec': 'vp8'},
1030         '46': {'ext': 'webm', 'width': 1920, 'height': 1080, 'acodec': 'vorbis', 'abr': 192, 'vcodec': 'vp8'},
1031         '59': {'ext': 'mp4', 'width': 854, 'height': 480, 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264'},
1032         '78': {'ext': 'mp4', 'width': 854, 'height': 480, 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264'},
1033
1034
1035         # 3D videos
1036         '82': {'ext': 'mp4', 'height': 360, 'format_note': '3D', 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264', 'preference': -20},
1037         '83': {'ext': 'mp4', 'height': 480, 'format_note': '3D', 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264', 'preference': -20},
1038         '84': {'ext': 'mp4', 'height': 720, 'format_note': '3D', 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264', 'preference': -20},
1039         '85': {'ext': 'mp4', 'height': 1080, 'format_note': '3D', 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264', 'preference': -20},
1040         '100': {'ext': 'webm', 'height': 360, 'format_note': '3D', 'acodec': 'vorbis', 'abr': 128, 'vcodec': 'vp8', 'preference': -20},
1041         '101': {'ext': 'webm', 'height': 480, 'format_note': '3D', 'acodec': 'vorbis', 'abr': 192, 'vcodec': 'vp8', 'preference': -20},
1042         '102': {'ext': 'webm', 'height': 720, 'format_note': '3D', 'acodec': 'vorbis', 'abr': 192, 'vcodec': 'vp8', 'preference': -20},
1043
1044         # Apple HTTP Live Streaming
1045         '91': {'ext': 'mp4', 'height': 144, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 48, 'vcodec': 'h264', 'preference': -10},
1046         '92': {'ext': 'mp4', 'height': 240, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 48, 'vcodec': 'h264', 'preference': -10},
1047         '93': {'ext': 'mp4', 'height': 360, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264', 'preference': -10},
1048         '94': {'ext': 'mp4', 'height': 480, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264', 'preference': -10},
1049         '95': {'ext': 'mp4', 'height': 720, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 256, 'vcodec': 'h264', 'preference': -10},
1050         '96': {'ext': 'mp4', 'height': 1080, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 256, 'vcodec': 'h264', 'preference': -10},
1051         '132': {'ext': 'mp4', 'height': 240, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 48, 'vcodec': 'h264', 'preference': -10},
1052         '151': {'ext': 'mp4', 'height': 72, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 24, 'vcodec': 'h264', 'preference': -10},
1053
1054         # DASH mp4 video
1055         '133': {'ext': 'mp4', 'height': 240, 'format_note': 'DASH video', 'vcodec': 'h264'},
1056         '134': {'ext': 'mp4', 'height': 360, 'format_note': 'DASH video', 'vcodec': 'h264'},
1057         '135': {'ext': 'mp4', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'h264'},
1058         '136': {'ext': 'mp4', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'h264'},
1059         '137': {'ext': 'mp4', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'h264'},
1060         '138': {'ext': 'mp4', 'format_note': 'DASH video', 'vcodec': 'h264'},  # Height can vary (https://github.com/ytdl-org/youtube-dl/issues/4559)
1061         '160': {'ext': 'mp4', 'height': 144, 'format_note': 'DASH video', 'vcodec': 'h264'},
1062         '212': {'ext': 'mp4', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'h264'},
1063         '264': {'ext': 'mp4', 'height': 1440, 'format_note': 'DASH video', 'vcodec': 'h264'},
1064         '298': {'ext': 'mp4', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'h264', 'fps': 60},
1065         '299': {'ext': 'mp4', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'h264', 'fps': 60},
1066         '266': {'ext': 'mp4', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'h264'},
1067
1068         # Dash mp4 audio
1069         '139': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'abr': 48, 'container': 'm4a_dash'},
1070         '140': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'abr': 128, 'container': 'm4a_dash'},
1071         '141': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'abr': 256, 'container': 'm4a_dash'},
1072         '256': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'container': 'm4a_dash'},
1073         '258': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'container': 'm4a_dash'},
1074         '325': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'dtse', 'container': 'm4a_dash'},
1075         '328': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'ec-3', 'container': 'm4a_dash'},
1076
1077         # Dash webm
1078         '167': {'ext': 'webm', 'height': 360, 'width': 640, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
1079         '168': {'ext': 'webm', 'height': 480, 'width': 854, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
1080         '169': {'ext': 'webm', 'height': 720, 'width': 1280, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
1081         '170': {'ext': 'webm', 'height': 1080, 'width': 1920, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
1082         '218': {'ext': 'webm', 'height': 480, 'width': 854, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
1083         '219': {'ext': 'webm', 'height': 480, 'width': 854, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
1084         '278': {'ext': 'webm', 'height': 144, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp9'},
1085         '242': {'ext': 'webm', 'height': 240, 'format_note': 'DASH video', 'vcodec': 'vp9'},
1086         '243': {'ext': 'webm', 'height': 360, 'format_note': 'DASH video', 'vcodec': 'vp9'},
1087         '244': {'ext': 'webm', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'vp9'},
1088         '245': {'ext': 'webm', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'vp9'},
1089         '246': {'ext': 'webm', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'vp9'},
1090         '247': {'ext': 'webm', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'vp9'},
1091         '248': {'ext': 'webm', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'vp9'},
1092         '271': {'ext': 'webm', 'height': 1440, 'format_note': 'DASH video', 'vcodec': 'vp9'},
1093         # itag 272 videos are either 3840x2160 (e.g. RtoitU2A-3E) or 7680x4320 (sLprVF6d7Ug)
1094         '272': {'ext': 'webm', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'vp9'},
1095         '302': {'ext': 'webm', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'vp9', 'fps': 60},
1096         '303': {'ext': 'webm', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'vp9', 'fps': 60},
1097         '308': {'ext': 'webm', 'height': 1440, 'format_note': 'DASH video', 'vcodec': 'vp9', 'fps': 60},
1098         '313': {'ext': 'webm', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'vp9'},
1099         '315': {'ext': 'webm', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'vp9', 'fps': 60},
1100
1101         # Dash webm audio
1102         '171': {'ext': 'webm', 'acodec': 'vorbis', 'format_note': 'DASH audio', 'abr': 128},
1103         '172': {'ext': 'webm', 'acodec': 'vorbis', 'format_note': 'DASH audio', 'abr': 256},
1104
1105         # Dash webm audio with opus inside
1106         '249': {'ext': 'webm', 'format_note': 'DASH audio', 'acodec': 'opus', 'abr': 50},
1107         '250': {'ext': 'webm', 'format_note': 'DASH audio', 'acodec': 'opus', 'abr': 70},
1108         '251': {'ext': 'webm', 'format_note': 'DASH audio', 'acodec': 'opus', 'abr': 160},
1109
1110         # RTMP (unnamed)
1111         '_rtmp': {'protocol': 'rtmp'},
1112
1113         # av01 video only formats sometimes served with "unknown" codecs
1114         '394': {'acodec': 'none', 'vcodec': 'av01.0.05M.08'},
1115         '395': {'acodec': 'none', 'vcodec': 'av01.0.05M.08'},
1116         '396': {'acodec': 'none', 'vcodec': 'av01.0.05M.08'},
1117         '397': {'acodec': 'none', 'vcodec': 'av01.0.05M.08'},
1118     }
1119     _SUBTITLE_FORMATS = ('json3', 'srv1', 'srv2', 'srv3', 'ttml', 'vtt')
1120
1121     _AGE_GATE_REASONS = (
1122         'Sign in to confirm your age',
1123         'This video may be inappropriate for some users.',
1124         'Sorry, this content is age-restricted.',
1125         'Please confirm your age.')
1126
1127     _AGE_GATE_STATUS_REASONS = (
1128         'AGE_VERIFICATION_REQUIRED',
1129         'AGE_CHECK_REQUIRED'
1130     )
1131
1132     _GEO_BYPASS = False
1133
1134     IE_NAME = 'youtube'
1135     _TESTS = [
1136         {
1137             'url': 'https://www.youtube.com/watch?v=BaW_jenozKc&t=1s&end=9',
1138             'info_dict': {
1139                 'id': 'BaW_jenozKc',
1140                 'ext': 'mp4',
1141                 'title': 'youtube-dl test video "\'/\\ä↭𝕐',
1142                 'uploader': 'Philipp Hagemeister',
1143                 'uploader_id': 'phihag',
1144                 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/phihag',
1145                 'channel_id': 'UCLqxVugv74EIW3VWh2NOa3Q',
1146                 'channel_url': r're:https?://(?:www\.)?youtube\.com/channel/UCLqxVugv74EIW3VWh2NOa3Q',
1147                 'upload_date': '20121002',
1148                 'description': 'test chars:  "\'/\\ä↭𝕐\ntest URL: https://github.com/rg3/youtube-dl/issues/1892\n\nThis is a test video for youtube-dl.\n\nFor more information, contact phihag@phihag.de .',
1149                 'categories': ['Science & Technology'],
1150                 'tags': ['youtube-dl'],
1151                 'duration': 10,
1152                 'view_count': int,
1153                 'like_count': int,
1154                 'dislike_count': int,
1155                 'start_time': 1,
1156                 'end_time': 9,
1157             }
1158         },
1159         {
1160             'url': '//www.YouTube.com/watch?v=yZIXLfi8CZQ',
1161             'note': 'Embed-only video (#1746)',
1162             'info_dict': {
1163                 'id': 'yZIXLfi8CZQ',
1164                 'ext': 'mp4',
1165                 'upload_date': '20120608',
1166                 'title': 'Principal Sexually Assaults A Teacher - Episode 117 - 8th June 2012',
1167                 'description': 'md5:09b78bd971f1e3e289601dfba15ca4f7',
1168                 'uploader': 'SET India',
1169                 'uploader_id': 'setindia',
1170                 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/setindia',
1171                 'age_limit': 18,
1172             },
1173             'skip': 'Private video',
1174         },
1175         {
1176             'url': 'https://www.youtube.com/watch?v=BaW_jenozKc&v=yZIXLfi8CZQ',
1177             'note': 'Use the first video ID in the URL',
1178             'info_dict': {
1179                 'id': 'BaW_jenozKc',
1180                 'ext': 'mp4',
1181                 'title': 'youtube-dl test video "\'/\\ä↭𝕐',
1182                 'uploader': 'Philipp Hagemeister',
1183                 'uploader_id': 'phihag',
1184                 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/phihag',
1185                 'upload_date': '20121002',
1186                 'description': 'test chars:  "\'/\\ä↭𝕐\ntest URL: https://github.com/rg3/youtube-dl/issues/1892\n\nThis is a test video for youtube-dl.\n\nFor more information, contact phihag@phihag.de .',
1187                 'categories': ['Science & Technology'],
1188                 'tags': ['youtube-dl'],
1189                 'duration': 10,
1190                 'view_count': int,
1191                 'like_count': int,
1192                 'dislike_count': int,
1193             },
1194             'params': {
1195                 'skip_download': True,
1196             },
1197         },
1198         {
1199             'url': 'https://www.youtube.com/watch?v=a9LDPn-MO4I',
1200             'note': '256k DASH audio (format 141) via DASH manifest',
1201             'info_dict': {
1202                 'id': 'a9LDPn-MO4I',
1203                 'ext': 'm4a',
1204                 'upload_date': '20121002',
1205                 'uploader_id': '8KVIDEO',
1206                 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/8KVIDEO',
1207                 'description': '',
1208                 'uploader': '8KVIDEO',
1209                 'title': 'UHDTV TEST 8K VIDEO.mp4'
1210             },
1211             'params': {
1212                 'youtube_include_dash_manifest': True,
1213                 'format': '141',
1214             },
1215             'skip': 'format 141 not served anymore',
1216         },
1217         # DASH manifest with encrypted signature
1218         {
1219             'url': 'https://www.youtube.com/watch?v=IB3lcPjvWLA',
1220             'info_dict': {
1221                 'id': 'IB3lcPjvWLA',
1222                 'ext': 'm4a',
1223                 'title': 'Afrojack, Spree Wilson - The Spark (Official Music Video) ft. Spree Wilson',
1224                 'description': 'md5:8f5e2b82460520b619ccac1f509d43bf',
1225                 'duration': 244,
1226                 'uploader': 'AfrojackVEVO',
1227                 'uploader_id': 'AfrojackVEVO',
1228                 'upload_date': '20131011',
1229                 'abr': 129.495,
1230             },
1231             'params': {
1232                 'youtube_include_dash_manifest': True,
1233                 'format': '141/bestaudio[ext=m4a]',
1234             },
1235         },
1236         # Normal age-gate video (embed allowed)
1237         {
1238             'url': 'https://youtube.com/watch?v=HtVdAasjOgU',
1239             'info_dict': {
1240                 'id': 'HtVdAasjOgU',
1241                 'ext': 'mp4',
1242                 'title': 'The Witcher 3: Wild Hunt - The Sword Of Destiny Trailer',
1243                 'description': r're:(?s).{100,}About the Game\n.*?The Witcher 3: Wild Hunt.{100,}',
1244                 'duration': 142,
1245                 'uploader': 'The Witcher',
1246                 'uploader_id': 'WitcherGame',
1247                 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/WitcherGame',
1248                 'upload_date': '20140605',
1249                 'age_limit': 18,
1250             },
1251         },
1252         # video_info is None (https://github.com/ytdl-org/youtube-dl/issues/4421)
1253         # YouTube Red ad is not captured for creator
1254         {
1255             'url': '__2ABJjxzNo',
1256             'info_dict': {
1257                 'id': '__2ABJjxzNo',
1258                 'ext': 'mp4',
1259                 'duration': 266,
1260                 'upload_date': '20100430',
1261                 'uploader_id': 'deadmau5',
1262                 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/deadmau5',
1263                 'creator': 'deadmau5',
1264                 'description': 'md5:6cbcd3a92ce1bc676fc4d6ab4ace2336',
1265                 'uploader': 'deadmau5',
1266                 'title': 'Deadmau5 - Some Chords (HD)',
1267                 'alt_title': 'Some Chords',
1268             },
1269             'expected_warnings': [
1270                 'DASH manifest missing',
1271             ]
1272         },
1273         # Olympics (https://github.com/ytdl-org/youtube-dl/issues/4431)
1274         {
1275             'url': 'lqQg6PlCWgI',
1276             'info_dict': {
1277                 'id': 'lqQg6PlCWgI',
1278                 'ext': 'mp4',
1279                 'duration': 6085,
1280                 'upload_date': '20150827',
1281                 'uploader_id': 'olympic',
1282                 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/olympic',
1283                 'description': 'HO09  - Women -  GER-AUS - Hockey - 31 July 2012 - London 2012 Olympic Games',
1284                 'uploader': 'Olympics',
1285                 'title': 'Hockey - Women -  GER-AUS - London 2012 Olympic Games',
1286             },
1287             'params': {
1288                 'skip_download': 'requires avconv',
1289             }
1290         },
1291         # Non-square pixels
1292         {
1293             'url': 'https://www.youtube.com/watch?v=_b-2C3KPAM0',
1294             'info_dict': {
1295                 'id': '_b-2C3KPAM0',
1296                 'ext': 'mp4',
1297                 'stretched_ratio': 16 / 9.,
1298                 'duration': 85,
1299                 'upload_date': '20110310',
1300                 'uploader_id': 'AllenMeow',
1301                 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/AllenMeow',
1302                 'description': 'made by Wacom from Korea | 字幕&加油添醋 by TY\'s Allen | 感謝heylisa00cavey1001同學熱情提供梗及翻譯',
1303                 'uploader': '孫ᄋᄅ',
1304                 'title': '[A-made] 變態妍字幕版 太妍 我就是這樣的人',
1305             },
1306         },
1307         # url_encoded_fmt_stream_map is empty string
1308         {
1309             'url': 'qEJwOuvDf7I',
1310             'info_dict': {
1311                 'id': 'qEJwOuvDf7I',
1312                 'ext': 'webm',
1313                 'title': 'Обсуждение судебной практики по выборам 14 сентября 2014 года в Санкт-Петербурге',
1314                 'description': '',
1315                 'upload_date': '20150404',
1316                 'uploader_id': 'spbelect',
1317                 'uploader': 'Наблюдатели Петербурга',
1318             },
1319             'params': {
1320                 'skip_download': 'requires avconv',
1321             },
1322             'skip': 'This live event has ended.',
1323         },
1324         # Extraction from multiple DASH manifests (https://github.com/ytdl-org/youtube-dl/pull/6097)
1325         {
1326             'url': 'https://www.youtube.com/watch?v=FIl7x6_3R5Y',
1327             'info_dict': {
1328                 'id': 'FIl7x6_3R5Y',
1329                 'ext': 'webm',
1330                 'title': 'md5:7b81415841e02ecd4313668cde88737a',
1331                 'description': 'md5:116377fd2963b81ec4ce64b542173306',
1332                 'duration': 220,
1333                 'upload_date': '20150625',
1334                 'uploader_id': 'dorappi2000',
1335                 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/dorappi2000',
1336                 'uploader': 'dorappi2000',
1337                 'formats': 'mincount:31',
1338             },
1339             'skip': 'not actual anymore',
1340         },
1341         # DASH manifest with segment_list
1342         {
1343             'url': 'https://www.youtube.com/embed/CsmdDsKjzN8',
1344             'md5': '8ce563a1d667b599d21064e982ab9e31',
1345             'info_dict': {
1346                 'id': 'CsmdDsKjzN8',
1347                 'ext': 'mp4',
1348                 'upload_date': '20150501',  # According to '<meta itemprop="datePublished"', but in other places it's 20150510
1349                 'uploader': 'Airtek',
1350                 'description': 'Retransmisión en directo de la XVIII media maratón de Zaragoza.',
1351                 'uploader_id': 'UCzTzUmjXxxacNnL8I3m4LnQ',
1352                 'title': 'Retransmisión XVIII Media maratón Zaragoza 2015',
1353             },
1354             'params': {
1355                 'youtube_include_dash_manifest': True,
1356                 'format': '135',  # bestvideo
1357             },
1358             'skip': 'This live event has ended.',
1359         },
1360         {
1361             # Multifeed videos (multiple cameras), URL is for Main Camera
1362             'url': 'https://www.youtube.com/watch?v=jvGDaLqkpTg',
1363             'info_dict': {
1364                 'id': 'jvGDaLqkpTg',
1365                 'title': 'Tom Clancy Free Weekend Rainbow Whatever',
1366                 'description': 'md5:e03b909557865076822aa169218d6a5d',
1367             },
1368             'playlist': [{
1369                 'info_dict': {
1370                     'id': 'jvGDaLqkpTg',
1371                     'ext': 'mp4',
1372                     'title': 'Tom Clancy Free Weekend Rainbow Whatever (Main Camera)',
1373                     'description': 'md5:e03b909557865076822aa169218d6a5d',
1374                     'duration': 10643,
1375                     'upload_date': '20161111',
1376                     'uploader': 'Team PGP',
1377                     'uploader_id': 'UChORY56LMMETTuGjXaJXvLg',
1378                     'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UChORY56LMMETTuGjXaJXvLg',
1379                 },
1380             }, {
1381                 'info_dict': {
1382                     'id': '3AKt1R1aDnw',
1383                     'ext': 'mp4',
1384                     'title': 'Tom Clancy Free Weekend Rainbow Whatever (Camera 2)',
1385                     'description': 'md5:e03b909557865076822aa169218d6a5d',
1386                     'duration': 10991,
1387                     'upload_date': '20161111',
1388                     'uploader': 'Team PGP',
1389                     'uploader_id': 'UChORY56LMMETTuGjXaJXvLg',
1390                     'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UChORY56LMMETTuGjXaJXvLg',
1391                 },
1392             }, {
1393                 'info_dict': {
1394                     'id': 'RtAMM00gpVc',
1395                     'ext': 'mp4',
1396                     'title': 'Tom Clancy Free Weekend Rainbow Whatever (Camera 3)',
1397                     'description': 'md5:e03b909557865076822aa169218d6a5d',
1398                     'duration': 10995,
1399                     'upload_date': '20161111',
1400                     'uploader': 'Team PGP',
1401                     'uploader_id': 'UChORY56LMMETTuGjXaJXvLg',
1402                     'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UChORY56LMMETTuGjXaJXvLg',
1403                 },
1404             }, {
1405                 'info_dict': {
1406                     'id': '6N2fdlP3C5U',
1407                     'ext': 'mp4',
1408                     'title': 'Tom Clancy Free Weekend Rainbow Whatever (Camera 4)',
1409                     'description': 'md5:e03b909557865076822aa169218d6a5d',
1410                     'duration': 10990,
1411                     'upload_date': '20161111',
1412                     'uploader': 'Team PGP',
1413                     'uploader_id': 'UChORY56LMMETTuGjXaJXvLg',
1414                     'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UChORY56LMMETTuGjXaJXvLg',
1415                 },
1416             }],
1417             'params': {
1418                 'skip_download': True,
1419             },
1420         },
1421         {
1422             # Multifeed video with comma in title (see https://github.com/ytdl-org/youtube-dl/issues/8536)
1423             'url': 'https://www.youtube.com/watch?v=gVfLd0zydlo',
1424             'info_dict': {
1425                 'id': 'gVfLd0zydlo',
1426                 'title': 'DevConf.cz 2016 Day 2 Workshops 1 14:00 - 15:30',
1427             },
1428             'playlist_count': 2,
1429             'skip': 'Not multifeed anymore',
1430         },
1431         {
1432             'url': 'https://vid.plus/FlRa-iH7PGw',
1433             'only_matching': True,
1434         },
1435         {
1436             'url': 'https://zwearz.com/watch/9lWxNJF-ufM/electra-woman-dyna-girl-official-trailer-grace-helbig.html',
1437             'only_matching': True,
1438         },
1439         {
1440             # Title with JS-like syntax "};" (see https://github.com/ytdl-org/youtube-dl/issues/7468)
1441             # Also tests cut-off URL expansion in video description (see
1442             # https://github.com/ytdl-org/youtube-dl/issues/1892,
1443             # https://github.com/ytdl-org/youtube-dl/issues/8164)
1444             'url': 'https://www.youtube.com/watch?v=lsguqyKfVQg',
1445             'info_dict': {
1446                 'id': 'lsguqyKfVQg',
1447                 'ext': 'mp4',
1448                 'title': '{dark walk}; Loki/AC/Dishonored; collab w/Elflover21',
1449                 'alt_title': 'Dark Walk',
1450                 'description': 'md5:8085699c11dc3f597ce0410b0dcbb34a',
1451                 'duration': 133,
1452                 'upload_date': '20151119',
1453                 'uploader_id': 'IronSoulElf',
1454                 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/IronSoulElf',
1455                 'uploader': 'IronSoulElf',
1456                 'creator': 'Todd Haberman;\nDaniel Law Heath and Aaron Kaplan',
1457                 'track': 'Dark Walk',
1458                 'artist': 'Todd Haberman;\nDaniel Law Heath and Aaron Kaplan',
1459                 'album': 'Position Music - Production Music Vol. 143 - Dark Walk',
1460             },
1461             'params': {
1462                 'skip_download': True,
1463             },
1464         },
1465         {
1466             # Tags with '};' (see https://github.com/ytdl-org/youtube-dl/issues/7468)
1467             'url': 'https://www.youtube.com/watch?v=Ms7iBXnlUO8',
1468             'only_matching': True,
1469         },
1470         {
1471             # Video with yt:stretch=17:0
1472             'url': 'https://www.youtube.com/watch?v=Q39EVAstoRM',
1473             'info_dict': {
1474                 'id': 'Q39EVAstoRM',
1475                 'ext': 'mp4',
1476                 'title': 'Clash Of Clans#14 Dicas De Ataque Para CV 4',
1477                 'description': 'md5:ee18a25c350637c8faff806845bddee9',
1478                 'upload_date': '20151107',
1479                 'uploader_id': 'UCCr7TALkRbo3EtFzETQF1LA',
1480                 'uploader': 'CH GAMER DROID',
1481             },
1482             'params': {
1483                 'skip_download': True,
1484             },
1485             'skip': 'This video does not exist.',
1486         },
1487         {
1488             # Video with incomplete 'yt:stretch=16:'
1489             'url': 'https://www.youtube.com/watch?v=FRhJzUSJbGI',
1490             'only_matching': True,
1491         },
1492         {
1493             # Video licensed under Creative Commons
1494             'url': 'https://www.youtube.com/watch?v=M4gD1WSo5mA',
1495             'info_dict': {
1496                 'id': 'M4gD1WSo5mA',
1497                 'ext': 'mp4',
1498                 'title': 'md5:e41008789470fc2533a3252216f1c1d1',
1499                 'description': 'md5:a677553cf0840649b731a3024aeff4cc',
1500                 'duration': 721,
1501                 'upload_date': '20150127',
1502                 'uploader_id': 'BerkmanCenter',
1503                 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/BerkmanCenter',
1504                 'uploader': 'The Berkman Klein Center for Internet & Society',
1505                 'license': 'Creative Commons Attribution license (reuse allowed)',
1506             },
1507             'params': {
1508                 'skip_download': True,
1509             },
1510         },
1511         {
1512             # Channel-like uploader_url
1513             'url': 'https://www.youtube.com/watch?v=eQcmzGIKrzg',
1514             'info_dict': {
1515                 'id': 'eQcmzGIKrzg',
1516                 'ext': 'mp4',
1517                 'title': 'Democratic Socialism and Foreign Policy | Bernie Sanders',
1518                 'description': 'md5:13a2503d7b5904ef4b223aa101628f39',
1519                 'duration': 4060,
1520                 'upload_date': '20151119',
1521                 'uploader': 'Bernie Sanders',
1522                 'uploader_id': 'UCH1dpzjCEiGAt8CXkryhkZg',
1523                 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UCH1dpzjCEiGAt8CXkryhkZg',
1524                 'license': 'Creative Commons Attribution license (reuse allowed)',
1525             },
1526             'params': {
1527                 'skip_download': True,
1528             },
1529         },
1530         {
1531             'url': 'https://www.youtube.com/watch?feature=player_embedded&amp;amp;v=V36LpHqtcDY',
1532             'only_matching': True,
1533         },
1534         {
1535             # YouTube Red paid video (https://github.com/ytdl-org/youtube-dl/issues/10059)
1536             'url': 'https://www.youtube.com/watch?v=i1Ko8UG-Tdo',
1537             'only_matching': True,
1538         },
1539         {
1540             # Rental video preview
1541             'url': 'https://www.youtube.com/watch?v=yYr8q0y5Jfg',
1542             'info_dict': {
1543                 'id': 'uGpuVWrhIzE',
1544                 'ext': 'mp4',
1545                 'title': 'Piku - Trailer',
1546                 'description': 'md5:c36bd60c3fd6f1954086c083c72092eb',
1547                 'upload_date': '20150811',
1548                 'uploader': 'FlixMatrix',
1549                 'uploader_id': 'FlixMatrixKaravan',
1550                 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/FlixMatrixKaravan',
1551                 'license': 'Standard YouTube License',
1552             },
1553             'params': {
1554                 'skip_download': True,
1555             },
1556             'skip': 'This video is not available.',
1557         },
1558         {
1559             # YouTube Red video with episode data
1560             'url': 'https://www.youtube.com/watch?v=iqKdEhx-dD4',
1561             'info_dict': {
1562                 'id': 'iqKdEhx-dD4',
1563                 'ext': 'mp4',
1564                 'title': 'Isolation - Mind Field (Ep 1)',
1565                 'description': 'md5:f540112edec5d09fc8cc752d3d4ba3cd',
1566                 'duration': 2085,
1567                 'upload_date': '20170118',
1568                 'uploader': 'Vsauce',
1569                 'uploader_id': 'Vsauce',
1570                 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/Vsauce',
1571                 'series': 'Mind Field',
1572                 'season_number': 1,
1573                 'episode_number': 1,
1574             },
1575             'params': {
1576                 'skip_download': True,
1577             },
1578             'expected_warnings': [
1579                 'Skipping DASH manifest',
1580             ],
1581         },
1582         {
1583             # The following content has been identified by the YouTube community
1584             # as inappropriate or offensive to some audiences.
1585             'url': 'https://www.youtube.com/watch?v=6SJNVb0GnPI',
1586             'info_dict': {
1587                 'id': '6SJNVb0GnPI',
1588                 'ext': 'mp4',
1589                 'title': 'Race Differences in Intelligence',
1590                 'description': 'md5:5d161533167390427a1f8ee89a1fc6f1',
1591                 'duration': 965,
1592                 'upload_date': '20140124',
1593                 'uploader': 'New Century Foundation',
1594                 'uploader_id': 'UCEJYpZGqgUob0zVVEaLhvVg',
1595                 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UCEJYpZGqgUob0zVVEaLhvVg',
1596             },
1597             'params': {
1598                 'skip_download': True,
1599             },
1600             'skip': 'This video has been removed for violating YouTube\'s policy on hate speech.',
1601         },
1602         {
1603             # itag 212
1604             'url': '1t24XAntNCY',
1605             'only_matching': True,
1606         },
1607         {
1608             # geo restricted to JP
1609             'url': 'sJL6WA-aGkQ',
1610             'only_matching': True,
1611         },
1612         {
1613             'url': 'https://invidio.us/watch?v=BaW_jenozKc',
1614             'only_matching': True,
1615         },
1616         {
1617             'url': 'https://redirect.invidious.io/watch?v=BaW_jenozKc',
1618             'only_matching': True,
1619         },
1620         {
1621             # from https://nitter.pussthecat.org/YouTube/status/1360363141947944964#m
1622             'url': 'https://redirect.invidious.io/Yh0AhrY9GjA',
1623             'only_matching': True,
1624         },
1625         {
1626             # DRM protected
1627             'url': 'https://www.youtube.com/watch?v=s7_qI6_mIXc',
1628             'only_matching': True,
1629         },
1630         {
1631             # Video with unsupported adaptive stream type formats
1632             'url': 'https://www.youtube.com/watch?v=Z4Vy8R84T1U',
1633             'info_dict': {
1634                 'id': 'Z4Vy8R84T1U',
1635                 'ext': 'mp4',
1636                 'title': 'saman SMAN 53 Jakarta(Sancety) opening COFFEE4th at SMAN 53 Jakarta',
1637                 'description': 'md5:d41d8cd98f00b204e9800998ecf8427e',
1638                 'duration': 433,
1639                 'upload_date': '20130923',
1640                 'uploader': 'Amelia Putri Harwita',
1641                 'uploader_id': 'UCpOxM49HJxmC1qCalXyB3_Q',
1642                 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UCpOxM49HJxmC1qCalXyB3_Q',
1643                 'formats': 'maxcount:10',
1644             },
1645             'params': {
1646                 'skip_download': True,
1647                 'youtube_include_dash_manifest': False,
1648             },
1649             'skip': 'not actual anymore',
1650         },
1651         {
1652             # Youtube Music Auto-generated description
1653             'url': 'https://music.youtube.com/watch?v=MgNrAu2pzNs',
1654             'info_dict': {
1655                 'id': 'MgNrAu2pzNs',
1656                 'ext': 'mp4',
1657                 'title': 'Voyeur Girl',
1658                 'description': 'md5:7ae382a65843d6df2685993e90a8628f',
1659                 'upload_date': '20190312',
1660                 'uploader': 'Stephen - Topic',
1661                 'uploader_id': 'UC-pWHpBjdGG69N9mM2auIAA',
1662                 'artist': 'Stephen',
1663                 'track': 'Voyeur Girl',
1664                 'album': 'it\'s too much love to know my dear',
1665                 'release_date': '20190313',
1666                 'release_year': 2019,
1667             },
1668             'params': {
1669                 'skip_download': True,
1670             },
1671         },
1672         {
1673             'url': 'https://www.youtubekids.com/watch?v=3b8nCWDgZ6Q',
1674             'only_matching': True,
1675         },
1676         {
1677             # invalid -> valid video id redirection
1678             'url': 'DJztXj2GPfl',
1679             'info_dict': {
1680                 'id': 'DJztXj2GPfk',
1681                 'ext': 'mp4',
1682                 'title': 'Panjabi MC - Mundian To Bach Ke (The Dictator Soundtrack)',
1683                 'description': 'md5:bf577a41da97918e94fa9798d9228825',
1684                 'upload_date': '20090125',
1685                 'uploader': 'Prochorowka',
1686                 'uploader_id': 'Prochorowka',
1687                 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/Prochorowka',
1688                 'artist': 'Panjabi MC',
1689                 'track': 'Beware of the Boys (Mundian to Bach Ke) - Motivo Hi-Lectro Remix',
1690                 'album': 'Beware of the Boys (Mundian To Bach Ke)',
1691             },
1692             'params': {
1693                 'skip_download': True,
1694             },
1695             'skip': 'Video unavailable',
1696         },
1697         {
1698             # empty description results in an empty string
1699             'url': 'https://www.youtube.com/watch?v=x41yOUIvK2k',
1700             'info_dict': {
1701                 'id': 'x41yOUIvK2k',
1702                 'ext': 'mp4',
1703                 'title': 'IMG 3456',
1704                 'description': '',
1705                 'upload_date': '20170613',
1706                 'uploader_id': 'ElevageOrVert',
1707                 'uploader': 'ElevageOrVert',
1708             },
1709             'params': {
1710                 'skip_download': True,
1711             },
1712         },
1713         {
1714             # with '};' inside yt initial data (see [1])
1715             # see [2] for an example with '};' inside ytInitialPlayerResponse
1716             # 1. https://github.com/ytdl-org/youtube-dl/issues/27093
1717             # 2. https://github.com/ytdl-org/youtube-dl/issues/27216
1718             'url': 'https://www.youtube.com/watch?v=CHqg6qOn4no',
1719             'info_dict': {
1720                 'id': 'CHqg6qOn4no',
1721                 'ext': 'mp4',
1722                 'title': 'Part 77   Sort a list of simple types in c#',
1723                 'description': 'md5:b8746fa52e10cdbf47997903f13b20dc',
1724                 'upload_date': '20130831',
1725                 'uploader_id': 'kudvenkat',
1726                 'uploader': 'kudvenkat',
1727             },
1728             'params': {
1729                 'skip_download': True,
1730             },
1731         },
1732         {
1733             # another example of '};' in ytInitialData
1734             'url': 'https://www.youtube.com/watch?v=gVfgbahppCY',
1735             'only_matching': True,
1736         },
1737         {
1738             'url': 'https://www.youtube.com/watch_popup?v=63RmMXCd_bQ',
1739             'only_matching': True,
1740         },
1741         {
1742             # https://github.com/ytdl-org/youtube-dl/pull/28094
1743             'url': 'OtqTfy26tG0',
1744             'info_dict': {
1745                 'id': 'OtqTfy26tG0',
1746                 'ext': 'mp4',
1747                 'title': 'Burn Out',
1748                 'description': 'md5:8d07b84dcbcbfb34bc12a56d968b6131',
1749                 'upload_date': '20141120',
1750                 'uploader': 'The Cinematic Orchestra - Topic',
1751                 'uploader_id': 'UCIzsJBIyo8hhpFm1NK0uLgw',
1752                 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UCIzsJBIyo8hhpFm1NK0uLgw',
1753                 'artist': 'The Cinematic Orchestra',
1754                 'track': 'Burn Out',
1755                 'album': 'Every Day',
1756                 'release_data': None,
1757                 'release_year': None,
1758             },
1759             'params': {
1760                 'skip_download': True,
1761             },
1762         },
1763         {
1764             # controversial video, only works with bpctr when authenticated with cookies
1765             'url': 'https://www.youtube.com/watch?v=nGC3D_FkCmg',
1766             'only_matching': True,
1767         },
1768         {
1769             # controversial video, requires bpctr/contentCheckOk
1770             'url': 'https://www.youtube.com/watch?v=SZJvDhaSDnc',
1771             'info_dict': {
1772                 'id': 'SZJvDhaSDnc',
1773                 'ext': 'mp4',
1774                 'title': 'San Diego teen commits suicide after bullying over embarrassing video',
1775                 'channel_id': 'UC-SJ6nODDmufqBzPBwCvYvQ',
1776                 'uploader': 'CBS This Morning',
1777                 'uploader_id': 'CBSThisMorning',
1778                 'upload_date': '20140716',
1779                 'description': 'md5:acde3a73d3f133fc97e837a9f76b53b7'
1780             }
1781         },
1782         {
1783             # restricted location, https://github.com/ytdl-org/youtube-dl/issues/28685
1784             'url': 'cBvYw8_A0vQ',
1785             'info_dict': {
1786                 'id': 'cBvYw8_A0vQ',
1787                 'ext': 'mp4',
1788                 'title': '4K Ueno Okachimachi  Street  Scenes  上野御徒町歩き',
1789                 'description': 'md5:ea770e474b7cd6722b4c95b833c03630',
1790                 'upload_date': '20201120',
1791                 'uploader': 'Walk around Japan',
1792                 'uploader_id': 'UC3o_t8PzBmXf5S9b7GLx1Mw',
1793                 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UC3o_t8PzBmXf5S9b7GLx1Mw',
1794             },
1795             'params': {
1796                 'skip_download': True,
1797             },
1798         }, {
1799             # Has multiple audio streams
1800             'url': 'WaOKSUlf4TM',
1801             'only_matching': True
1802         }, {
1803             # Requires Premium: has format 141 when requested using YTM url
1804             'url': 'https://music.youtube.com/watch?v=XclachpHxis',
1805             'only_matching': True
1806         }, {
1807             # multiple subtitles with same lang_code
1808             'url': 'https://www.youtube.com/watch?v=wsQiKKfKxug',
1809             'only_matching': True,
1810         }, {
1811             # Force use android client fallback
1812             'url': 'https://www.youtube.com/watch?v=YOelRv7fMxY',
1813             'info_dict': {
1814                 'id': 'YOelRv7fMxY',
1815                 'title': 'DIGGING A SECRET TUNNEL Part 1',
1816                 'ext': '3gp',
1817                 'upload_date': '20210624',
1818                 'channel_id': 'UCp68_FLety0O-n9QU6phsgw',
1819                 'uploader': 'colinfurze',
1820                 'uploader_id': 'colinfurze',
1821                 'channel_url': r're:https?://(?:www\.)?youtube\.com/channel/UCp68_FLety0O-n9QU6phsgw',
1822                 'description': 'md5:b5096f56af7ccd7a555c84db81738b22'
1823             },
1824             'params': {
1825                 'format': '17',  # 3gp format available on android
1826                 'extractor_args': {'youtube': {'player_client': ['android']}},
1827             },
1828         },
1829         {
1830             # Skip download of additional client configs (remix client config in this case)
1831             'url': 'https://music.youtube.com/watch?v=MgNrAu2pzNs',
1832             'only_matching': True,
1833             'params': {
1834                 'extractor_args': {'youtube': {'player_skip': ['configs']}},
1835             },
1836         }
1837     ]
1838
1839     @classmethod
1840     def suitable(cls, url):
1841         # Hack for lazy extractors until more generic solution is implemented
1842         # (see #28780)
1843         from .youtube import parse_qs
1844         qs = parse_qs(url)
1845         if qs.get('list', [None])[0]:
1846             return False
1847         return super(YoutubeIE, cls).suitable(url)
1848
1849     def __init__(self, *args, **kwargs):
1850         super(YoutubeIE, self).__init__(*args, **kwargs)
1851         self._code_cache = {}
1852         self._player_cache = {}
1853
1854     def _extract_player_url(self, ytcfg=None, webpage=None):
1855         player_url = try_get(ytcfg, (lambda x: x['PLAYER_JS_URL']), str)
1856         if not player_url and webpage:
1857             player_url = self._search_regex(
1858                 r'"(?:PLAYER_JS_URL|jsUrl)"\s*:\s*"([^"]+)"',
1859                 webpage, 'player URL', fatal=False)
1860         if not player_url:
1861             return None
1862         if player_url.startswith('//'):
1863             player_url = 'https:' + player_url
1864         elif not re.match(r'https?://', player_url):
1865             player_url = compat_urlparse.urljoin(
1866                 'https://www.youtube.com', player_url)
1867         return player_url
1868
1869     def _signature_cache_id(self, example_sig):
1870         """ Return a string representation of a signature """
1871         return '.'.join(compat_str(len(part)) for part in example_sig.split('.'))
1872
1873     @classmethod
1874     def _extract_player_info(cls, player_url):
1875         for player_re in cls._PLAYER_INFO_RE:
1876             id_m = re.search(player_re, player_url)
1877             if id_m:
1878                 break
1879         else:
1880             raise ExtractorError('Cannot identify player %r' % player_url)
1881         return id_m.group('id')
1882
1883     def _load_player(self, video_id, player_url, fatal=True) -> bool:
1884         player_id = self._extract_player_info(player_url)
1885         if player_id not in self._code_cache:
1886             self._code_cache[player_id] = self._download_webpage(
1887                 player_url, video_id, fatal=fatal,
1888                 note='Downloading player ' + player_id,
1889                 errnote='Download of %s failed' % player_url)
1890         return player_id in self._code_cache
1891
1892     def _extract_signature_function(self, video_id, player_url, example_sig):
1893         player_id = self._extract_player_info(player_url)
1894
1895         # Read from filesystem cache
1896         func_id = 'js_%s_%s' % (
1897             player_id, self._signature_cache_id(example_sig))
1898         assert os.path.basename(func_id) == func_id
1899
1900         cache_spec = self._downloader.cache.load('youtube-sigfuncs', func_id)
1901         if cache_spec is not None:
1902             return lambda s: ''.join(s[i] for i in cache_spec)
1903
1904         if self._load_player(video_id, player_url):
1905             code = self._code_cache[player_id]
1906             res = self._parse_sig_js(code)
1907
1908             test_string = ''.join(map(compat_chr, range(len(example_sig))))
1909             cache_res = res(test_string)
1910             cache_spec = [ord(c) for c in cache_res]
1911
1912             self._downloader.cache.store('youtube-sigfuncs', func_id, cache_spec)
1913             return res
1914
1915     def _print_sig_code(self, func, example_sig):
1916         def gen_sig_code(idxs):
1917             def _genslice(start, end, step):
1918                 starts = '' if start == 0 else str(start)
1919                 ends = (':%d' % (end + step)) if end + step >= 0 else ':'
1920                 steps = '' if step == 1 else (':%d' % step)
1921                 return 's[%s%s%s]' % (starts, ends, steps)
1922
1923             step = None
1924             # Quelch pyflakes warnings - start will be set when step is set
1925             start = '(Never used)'
1926             for i, prev in zip(idxs[1:], idxs[:-1]):
1927                 if step is not None:
1928                     if i - prev == step:
1929                         continue
1930                     yield _genslice(start, prev, step)
1931                     step = None
1932                     continue
1933                 if i - prev in [-1, 1]:
1934                     step = i - prev
1935                     start = prev
1936                     continue
1937                 else:
1938                     yield 's[%d]' % prev
1939             if step is None:
1940                 yield 's[%d]' % i
1941             else:
1942                 yield _genslice(start, i, step)
1943
1944         test_string = ''.join(map(compat_chr, range(len(example_sig))))
1945         cache_res = func(test_string)
1946         cache_spec = [ord(c) for c in cache_res]
1947         expr_code = ' + '.join(gen_sig_code(cache_spec))
1948         signature_id_tuple = '(%s)' % (
1949             ', '.join(compat_str(len(p)) for p in example_sig.split('.')))
1950         code = ('if tuple(len(p) for p in s.split(\'.\')) == %s:\n'
1951                 '    return %s\n') % (signature_id_tuple, expr_code)
1952         self.to_screen('Extracted signature function:\n' + code)
1953
1954     def _parse_sig_js(self, jscode):
1955         funcname = self._search_regex(
1956             (r'\b[cs]\s*&&\s*[adf]\.set\([^,]+\s*,\s*encodeURIComponent\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\(',
1957              r'\b[a-zA-Z0-9]+\s*&&\s*[a-zA-Z0-9]+\.set\([^,]+\s*,\s*encodeURIComponent\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\(',
1958              r'\bm=(?P<sig>[a-zA-Z0-9$]{2})\(decodeURIComponent\(h\.s\)\)',
1959              r'\bc&&\(c=(?P<sig>[a-zA-Z0-9$]{2})\(decodeURIComponent\(c\)\)',
1960              r'(?:\b|[^a-zA-Z0-9$])(?P<sig>[a-zA-Z0-9$]{2})\s*=\s*function\(\s*a\s*\)\s*{\s*a\s*=\s*a\.split\(\s*""\s*\);[a-zA-Z0-9$]{2}\.[a-zA-Z0-9$]{2}\(a,\d+\)',
1961              r'(?:\b|[^a-zA-Z0-9$])(?P<sig>[a-zA-Z0-9$]{2})\s*=\s*function\(\s*a\s*\)\s*{\s*a\s*=\s*a\.split\(\s*""\s*\)',
1962              r'(?P<sig>[a-zA-Z0-9$]+)\s*=\s*function\(\s*a\s*\)\s*{\s*a\s*=\s*a\.split\(\s*""\s*\)',
1963              # Obsolete patterns
1964              r'(["\'])signature\1\s*,\s*(?P<sig>[a-zA-Z0-9$]+)\(',
1965              r'\.sig\|\|(?P<sig>[a-zA-Z0-9$]+)\(',
1966              r'yt\.akamaized\.net/\)\s*\|\|\s*.*?\s*[cs]\s*&&\s*[adf]\.set\([^,]+\s*,\s*(?:encodeURIComponent\s*\()?\s*(?P<sig>[a-zA-Z0-9$]+)\(',
1967              r'\b[cs]\s*&&\s*[adf]\.set\([^,]+\s*,\s*(?P<sig>[a-zA-Z0-9$]+)\(',
1968              r'\b[a-zA-Z0-9]+\s*&&\s*[a-zA-Z0-9]+\.set\([^,]+\s*,\s*(?P<sig>[a-zA-Z0-9$]+)\(',
1969              r'\bc\s*&&\s*a\.set\([^,]+\s*,\s*\([^)]*\)\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\(',
1970              r'\bc\s*&&\s*[a-zA-Z0-9]+\.set\([^,]+\s*,\s*\([^)]*\)\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\(',
1971              r'\bc\s*&&\s*[a-zA-Z0-9]+\.set\([^,]+\s*,\s*\([^)]*\)\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\('),
1972             jscode, 'Initial JS player signature function name', group='sig')
1973
1974         jsi = JSInterpreter(jscode)
1975         initial_function = jsi.extract_function(funcname)
1976         return lambda s: initial_function([s])
1977
1978     def _decrypt_signature(self, s, video_id, player_url):
1979         """Turn the encrypted s field into a working signature"""
1980
1981         if player_url is None:
1982             raise ExtractorError('Cannot decrypt signature without player_url')
1983
1984         try:
1985             player_id = (player_url, self._signature_cache_id(s))
1986             if player_id not in self._player_cache:
1987                 func = self._extract_signature_function(
1988                     video_id, player_url, s
1989                 )
1990                 self._player_cache[player_id] = func
1991             func = self._player_cache[player_id]
1992             if self.get_param('youtube_print_sig_code'):
1993                 self._print_sig_code(func, s)
1994             return func(s)
1995         except Exception as e:
1996             tb = traceback.format_exc()
1997             raise ExtractorError(
1998                 'Signature extraction failed: ' + tb, cause=e)
1999
2000     def _extract_signature_timestamp(self, video_id, player_url, ytcfg=None, fatal=False):
2001         """
2002         Extract signatureTimestamp (sts)
2003         Required to tell API what sig/player version is in use.
2004         """
2005         sts = None
2006         if isinstance(ytcfg, dict):
2007             sts = int_or_none(ytcfg.get('STS'))
2008
2009         if not sts:
2010             # Attempt to extract from player
2011             if player_url is None:
2012                 error_msg = 'Cannot extract signature timestamp without player_url.'
2013                 if fatal:
2014                     raise ExtractorError(error_msg)
2015                 self.report_warning(error_msg)
2016                 return
2017             if self._load_player(video_id, player_url, fatal=fatal):
2018                 player_id = self._extract_player_info(player_url)
2019                 code = self._code_cache[player_id]
2020                 sts = int_or_none(self._search_regex(
2021                     r'(?:signatureTimestamp|sts)\s*:\s*(?P<sts>[0-9]{5})', code,
2022                     'JS player signature timestamp', group='sts', fatal=fatal))
2023         return sts
2024
2025     def _mark_watched(self, video_id, player_responses):
2026         playback_url = traverse_obj(
2027             player_responses, (..., 'playbackTracking', 'videostatsPlaybackUrl', 'baseUrl'),
2028             expected_type=url_or_none, get_all=False)
2029         if not playback_url:
2030             self.report_warning('Unable to mark watched')
2031             return
2032         parsed_playback_url = compat_urlparse.urlparse(playback_url)
2033         qs = compat_urlparse.parse_qs(parsed_playback_url.query)
2034
2035         # cpn generation algorithm is reverse engineered from base.js.
2036         # In fact it works even with dummy cpn.
2037         CPN_ALPHABET = 'abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789-_'
2038         cpn = ''.join((CPN_ALPHABET[random.randint(0, 256) & 63] for _ in range(0, 16)))
2039
2040         qs.update({
2041             'ver': ['2'],
2042             'cpn': [cpn],
2043         })
2044         playback_url = compat_urlparse.urlunparse(
2045             parsed_playback_url._replace(query=compat_urllib_parse_urlencode(qs, True)))
2046
2047         self._download_webpage(
2048             playback_url, video_id, 'Marking watched',
2049             'Unable to mark watched', fatal=False)
2050
2051     @staticmethod
2052     def _extract_urls(webpage):
2053         # Embedded YouTube player
2054         entries = [
2055             unescapeHTML(mobj.group('url'))
2056             for mobj in re.finditer(r'''(?x)
2057             (?:
2058                 <iframe[^>]+?src=|
2059                 data-video-url=|
2060                 <embed[^>]+?src=|
2061                 embedSWF\(?:\s*|
2062                 <object[^>]+data=|
2063                 new\s+SWFObject\(
2064             )
2065             (["\'])
2066                 (?P<url>(?:https?:)?//(?:www\.)?youtube(?:-nocookie)?\.com/
2067                 (?:embed|v|p)/[0-9A-Za-z_-]{11}.*?)
2068             \1''', webpage)]
2069
2070         # lazyYT YouTube embed
2071         entries.extend(list(map(
2072             unescapeHTML,
2073             re.findall(r'class="lazyYT" data-youtube-id="([^"]+)"', webpage))))
2074
2075         # Wordpress "YouTube Video Importer" plugin
2076         matches = re.findall(r'''(?x)<div[^>]+
2077             class=(?P<q1>[\'"])[^\'"]*\byvii_single_video_player\b[^\'"]*(?P=q1)[^>]+
2078             data-video_id=(?P<q2>[\'"])([^\'"]+)(?P=q2)''', webpage)
2079         entries.extend(m[-1] for m in matches)
2080
2081         return entries
2082
2083     @staticmethod
2084     def _extract_url(webpage):
2085         urls = YoutubeIE._extract_urls(webpage)
2086         return urls[0] if urls else None
2087
2088     @classmethod
2089     def extract_id(cls, url):
2090         mobj = re.match(cls._VALID_URL, url, re.VERBOSE)
2091         if mobj is None:
2092             raise ExtractorError('Invalid URL: %s' % url)
2093         video_id = mobj.group(2)
2094         return video_id
2095
2096     def _extract_chapters_from_json(self, data, duration):
2097         chapter_list = traverse_obj(
2098             data, (
2099                 'playerOverlays', 'playerOverlayRenderer', 'decoratedPlayerBarRenderer',
2100                 'decoratedPlayerBarRenderer', 'playerBar', 'chapteredPlayerBarRenderer', 'chapters'
2101             ), expected_type=list)
2102
2103         return self._extract_chapters(
2104             chapter_list,
2105             chapter_time=lambda chapter: float_or_none(
2106                 traverse_obj(chapter, ('chapterRenderer', 'timeRangeStartMillis')), scale=1000),
2107             chapter_title=lambda chapter: traverse_obj(
2108                 chapter, ('chapterRenderer', 'title', 'simpleText'), expected_type=str),
2109             duration=duration)
2110
2111     def _extract_chapters_from_engagement_panel(self, data, duration):
2112         content_list = traverse_obj(
2113             data,
2114             ('engagementPanels', ..., 'engagementPanelSectionListRenderer', 'content', 'macroMarkersListRenderer', 'contents'),
2115             expected_type=list, default=[])
2116         chapter_time = lambda chapter: parse_duration(self._get_text(chapter, 'timeDescription'))
2117         chapter_title = lambda chapter: self._get_text(chapter, 'title')
2118
2119         return next((
2120             filter(None, (
2121                 self._extract_chapters(
2122                     traverse_obj(contents, (..., 'macroMarkersListItemRenderer')),
2123                     chapter_time, chapter_title, duration)
2124                 for contents in content_list
2125             ))), [])
2126
2127     def _extract_chapters(self, chapter_list, chapter_time, chapter_title, duration):
2128         chapters = []
2129         last_chapter = {'start_time': 0}
2130         for idx, chapter in enumerate(chapter_list or []):
2131             title = chapter_title(chapter)
2132             start_time = chapter_time(chapter)
2133             if start_time is None:
2134                 continue
2135             last_chapter['end_time'] = start_time
2136             if start_time < last_chapter['start_time']:
2137                 if idx == 1:
2138                     chapters.pop()
2139                     self.report_warning('Invalid start time for chapter "%s"' % last_chapter['title'])
2140                 else:
2141                     self.report_warning(f'Invalid start time for chapter "{title}"')
2142                     continue
2143             last_chapter = {'start_time': start_time, 'title': title}
2144             chapters.append(last_chapter)
2145         last_chapter['end_time'] = duration
2146         return chapters
2147
2148     def _extract_yt_initial_variable(self, webpage, regex, video_id, name):
2149         return self._parse_json(self._search_regex(
2150             (r'%s\s*%s' % (regex, self._YT_INITIAL_BOUNDARY_RE),
2151              regex), webpage, name, default='{}'), video_id, fatal=False)
2152
2153     @staticmethod
2154     def parse_time_text(time_text):
2155         """
2156         Parse the comment time text
2157         time_text is in the format 'X units ago (edited)'
2158         """
2159         time_text_split = time_text.split(' ')
2160         if len(time_text_split) >= 3:
2161             try:
2162                 return datetime_from_str('now-%s%s' % (time_text_split[0], time_text_split[1]), precision='auto')
2163             except ValueError:
2164                 return None
2165
2166     def _extract_comment(self, comment_renderer, parent=None):
2167         comment_id = comment_renderer.get('commentId')
2168         if not comment_id:
2169             return
2170
2171         text = self._get_text(comment_renderer, 'contentText')
2172
2173         # note: timestamp is an estimate calculated from the current time and time_text
2174         time_text = self._get_text(comment_renderer, 'publishedTimeText') or ''
2175         time_text_dt = self.parse_time_text(time_text)
2176         if isinstance(time_text_dt, datetime.datetime):
2177             timestamp = calendar.timegm(time_text_dt.timetuple())
2178         author = self._get_text(comment_renderer, 'authorText')
2179         author_id = try_get(comment_renderer,
2180                             lambda x: x['authorEndpoint']['browseEndpoint']['browseId'], compat_str)
2181
2182         votes = parse_count(try_get(comment_renderer, (lambda x: x['voteCount']['simpleText'],
2183                                                        lambda x: x['likeCount']), compat_str)) or 0
2184         author_thumbnail = try_get(comment_renderer,
2185                                    lambda x: x['authorThumbnail']['thumbnails'][-1]['url'], compat_str)
2186
2187         author_is_uploader = try_get(comment_renderer, lambda x: x['authorIsChannelOwner'], bool)
2188         is_favorited = 'creatorHeart' in (try_get(
2189             comment_renderer, lambda x: x['actionButtons']['commentActionButtonsRenderer'], dict) or {})
2190         return {
2191             'id': comment_id,
2192             'text': text,
2193             'timestamp': timestamp,
2194             'time_text': time_text,
2195             'like_count': votes,
2196             'is_favorited': is_favorited,
2197             'author': author,
2198             'author_id': author_id,
2199             'author_thumbnail': author_thumbnail,
2200             'author_is_uploader': author_is_uploader,
2201             'parent': parent or 'root'
2202         }
2203
2204     def _comment_entries(self, root_continuation_data, identity_token, account_syncid,
2205                          ytcfg, video_id, parent=None, comment_counts=None):
2206
2207         def extract_header(contents):
2208             _total_comments = 0
2209             _continuation = None
2210             for content in contents:
2211                 comments_header_renderer = try_get(content, lambda x: x['commentsHeaderRenderer'])
2212                 expected_comment_count = parse_count(self._get_text(
2213                     comments_header_renderer, 'countText', 'commentsCount', max_runs=1))
2214
2215                 if expected_comment_count:
2216                     comment_counts[1] = expected_comment_count
2217                     self.to_screen('Downloading ~%d comments' % expected_comment_count)
2218                     _total_comments = comment_counts[1]
2219                 sort_mode_str = self._configuration_arg('comment_sort', [''])[0]
2220                 comment_sort_index = int(sort_mode_str != 'top')  # 1 = new, 0 = top
2221
2222                 sort_menu_item = try_get(
2223                     comments_header_renderer,
2224                     lambda x: x['sortMenu']['sortFilterSubMenuRenderer']['subMenuItems'][comment_sort_index], dict) or {}
2225                 sort_continuation_ep = sort_menu_item.get('serviceEndpoint') or {}
2226
2227                 _continuation = self._extract_continuation_ep_data(sort_continuation_ep) or self._extract_continuation(sort_menu_item)
2228                 if not _continuation:
2229                     continue
2230
2231                 sort_text = sort_menu_item.get('title')
2232                 if isinstance(sort_text, compat_str):
2233                     sort_text = sort_text.lower()
2234                 else:
2235                     sort_text = 'top comments' if comment_sort_index == 0 else 'newest first'
2236                 self.to_screen('Sorting comments by %s' % sort_text)
2237                 break
2238             return _total_comments, _continuation
2239
2240         def extract_thread(contents):
2241             if not parent:
2242                 comment_counts[2] = 0
2243             for content in contents:
2244                 comment_thread_renderer = try_get(content, lambda x: x['commentThreadRenderer'])
2245                 comment_renderer = try_get(
2246                     comment_thread_renderer, (lambda x: x['comment']['commentRenderer'], dict)) or try_get(
2247                     content, (lambda x: x['commentRenderer'], dict))
2248
2249                 if not comment_renderer:
2250                     continue
2251                 comment = self._extract_comment(comment_renderer, parent)
2252                 if not comment:
2253                     continue
2254                 comment_counts[0] += 1
2255                 yield comment
2256                 # Attempt to get the replies
2257                 comment_replies_renderer = try_get(
2258                     comment_thread_renderer, lambda x: x['replies']['commentRepliesRenderer'], dict)
2259
2260                 if comment_replies_renderer:
2261                     comment_counts[2] += 1
2262                     comment_entries_iter = self._comment_entries(
2263                         comment_replies_renderer, identity_token, account_syncid, ytcfg,
2264                         video_id, parent=comment.get('id'), comment_counts=comment_counts)
2265
2266                     for reply_comment in comment_entries_iter:
2267                         yield reply_comment
2268
2269         # YouTube comments have a max depth of 2
2270         max_depth = int_or_none(self._configuration_arg('max_comment_depth', [''])[0]) or float('inf')
2271         if max_depth == 1 and parent:
2272             return
2273         if not comment_counts:
2274             # comment so far, est. total comments, current comment thread #
2275             comment_counts = [0, 0, 0]
2276
2277         continuation = self._extract_continuation(root_continuation_data)
2278         if continuation and len(continuation['continuation']) < 27:
2279             self.write_debug('Detected old API continuation token. Generating new API compatible token.')
2280             continuation_token = self._generate_comment_continuation(video_id)
2281             continuation = self._build_api_continuation_query(continuation_token, None)
2282
2283         visitor_data = None
2284         is_first_continuation = parent is None
2285
2286         for page_num in itertools.count(0):
2287             if not continuation:
2288                 break
2289             headers = self.generate_api_headers(ytcfg, identity_token, account_syncid, visitor_data)
2290             comment_prog_str = '(%d/%d)' % (comment_counts[0], comment_counts[1])
2291             if page_num == 0:
2292                 if is_first_continuation:
2293                     note_prefix = 'Downloading comment section API JSON'
2294                 else:
2295                     note_prefix = '    Downloading comment API JSON reply thread %d %s' % (
2296                         comment_counts[2], comment_prog_str)
2297             else:
2298                 note_prefix = '%sDownloading comment%s API JSON page %d %s' % (
2299                     '       ' if parent else '', ' replies' if parent else '',
2300                     page_num, comment_prog_str)
2301
2302             response = self._extract_response(
2303                 item_id=None, query=continuation,
2304                 ep='next', ytcfg=ytcfg, headers=headers, note=note_prefix,
2305                 check_get_keys=('onResponseReceivedEndpoints', 'continuationContents'))
2306             if not response:
2307                 break
2308             visitor_data = try_get(
2309                 response,
2310                 lambda x: x['responseContext']['webResponseContextExtensionData']['ytConfigData']['visitorData'],
2311                 compat_str) or visitor_data
2312
2313             continuation_contents = dict_get(response, ('onResponseReceivedEndpoints', 'continuationContents'))
2314
2315             continuation = None
2316             if isinstance(continuation_contents, list):
2317                 for continuation_section in continuation_contents:
2318                     if not isinstance(continuation_section, dict):
2319                         continue
2320                     continuation_items = try_get(
2321                         continuation_section,
2322                         (lambda x: x['reloadContinuationItemsCommand']['continuationItems'],
2323                          lambda x: x['appendContinuationItemsAction']['continuationItems']),
2324                         list) or []
2325                     if is_first_continuation:
2326                         total_comments, continuation = extract_header(continuation_items)
2327                         if total_comments:
2328                             yield total_comments
2329                         is_first_continuation = False
2330                         if continuation:
2331                             break
2332                         continue
2333                     count = 0
2334                     for count, entry in enumerate(extract_thread(continuation_items)):
2335                         yield entry
2336                     continuation = self._extract_continuation({'contents': continuation_items})
2337                     if continuation:
2338                         # Sometimes YouTube provides a continuation without any comments
2339                         # In most cases we end up just downloading these with very little comments to come.
2340                         if count == 0:
2341                             if not parent:
2342                                 self.report_warning('No comments received - assuming end of comments')
2343                             continuation = None
2344                         break
2345
2346             # Deprecated response structure
2347             elif isinstance(continuation_contents, dict):
2348                 known_continuation_renderers = ('itemSectionContinuation', 'commentRepliesContinuation')
2349                 for key, continuation_renderer in continuation_contents.items():
2350                     if key not in known_continuation_renderers:
2351                         continue
2352                     if not isinstance(continuation_renderer, dict):
2353                         continue
2354                     if is_first_continuation:
2355                         header_continuation_items = [continuation_renderer.get('header') or {}]
2356                         total_comments, continuation = extract_header(header_continuation_items)
2357                         if total_comments:
2358                             yield total_comments
2359                         is_first_continuation = False
2360                         if continuation:
2361                             break
2362
2363                     # Sometimes YouTube provides a continuation without any comments
2364                     # In most cases we end up just downloading these with very little comments to come.
2365                     count = 0
2366                     for count, entry in enumerate(extract_thread(continuation_renderer.get('contents') or {})):
2367                         yield entry
2368                     continuation = self._extract_continuation(continuation_renderer)
2369                     if count == 0:
2370                         if not parent:
2371                             self.report_warning('No comments received - assuming end of comments')
2372                         continuation = None
2373                     break
2374
2375     @staticmethod
2376     def _generate_comment_continuation(video_id):
2377         """
2378         Generates initial comment section continuation token from given video id
2379         """
2380         b64_vid_id = base64.b64encode(bytes(video_id.encode('utf-8')))
2381         parts = ('Eg0SCw==', b64_vid_id, 'GAYyJyIRIgs=', b64_vid_id, 'MAB4AjAAQhBjb21tZW50cy1zZWN0aW9u')
2382         new_continuation_intlist = list(itertools.chain.from_iterable(
2383             [bytes_to_intlist(base64.b64decode(part)) for part in parts]))
2384         return base64.b64encode(intlist_to_bytes(new_continuation_intlist)).decode('utf-8')
2385
2386     def _extract_comments(self, ytcfg, video_id, contents, webpage):
2387         """Entry for comment extraction"""
2388         def _real_comment_extract(contents):
2389             if isinstance(contents, list):
2390                 for entry in contents:
2391                     for key, renderer in entry.items():
2392                         if key not in known_entry_comment_renderers:
2393                             continue
2394                         yield from self._comment_entries(
2395                             renderer, video_id=video_id, ytcfg=ytcfg,
2396                             identity_token=self._extract_identity_token(webpage, item_id=video_id),
2397                             account_syncid=self._extract_account_syncid(ytcfg))
2398                         break
2399         comments = []
2400         known_entry_comment_renderers = ('itemSectionRenderer',)
2401         estimated_total = 0
2402         max_comments = int_or_none(self._configuration_arg('max_comments', [''])[0]) or float('inf')
2403         # Force English regardless of account setting to prevent parsing issues
2404         # See: https://github.com/yt-dlp/yt-dlp/issues/532
2405         ytcfg = copy.deepcopy(ytcfg)
2406         traverse_obj(
2407             ytcfg, ('INNERTUBE_CONTEXT', 'client'), expected_type=dict, default={})['hl'] = 'en'
2408         try:
2409             for comment in _real_comment_extract(contents):
2410                 if len(comments) >= max_comments:
2411                     break
2412                 if isinstance(comment, int):
2413                     estimated_total = comment
2414                     continue
2415                 comments.append(comment)
2416         except KeyboardInterrupt:
2417             self.to_screen('Interrupted by user')
2418         self.to_screen('Downloaded %d/%d comments' % (len(comments), estimated_total))
2419         return {
2420             'comments': comments,
2421             'comment_count': len(comments),
2422         }
2423
2424     @staticmethod
2425     def _generate_player_context(sts=None):
2426         context = {
2427             'html5Preference': 'HTML5_PREF_WANTS',
2428         }
2429         if sts is not None:
2430             context['signatureTimestamp'] = sts
2431         return {
2432             'playbackContext': {
2433                 'contentPlaybackContext': context
2434             },
2435             'contentCheckOk': True,
2436             'racyCheckOk': True
2437         }
2438
2439     def _is_agegated(self, player_response):
2440         reasons = traverse_obj(player_response, ('playabilityStatus', ('status', 'reason')), default=[])
2441         for reason in reasons:
2442             if reason in self._AGE_GATE_REASONS + self._AGE_GATE_STATUS_REASONS:
2443                 return True
2444         if traverse_obj(player_response, ('playabilityStatus', 'desktopLegacyAgeGateReason')) is not None:
2445             return True
2446         return False
2447
2448     def _extract_player_response(self, client, video_id, master_ytcfg, player_ytcfg, identity_token, player_url, initial_pr):
2449
2450         session_index = self._extract_session_index(player_ytcfg, master_ytcfg)
2451         syncid = self._extract_account_syncid(player_ytcfg, master_ytcfg, initial_pr)
2452         sts = self._extract_signature_timestamp(video_id, player_url, master_ytcfg, fatal=False)
2453         headers = self.generate_api_headers(
2454             player_ytcfg, identity_token, syncid,
2455             default_client=self._YT_CLIENTS[client], session_index=session_index)
2456
2457         yt_query = {'videoId': video_id}
2458         yt_query.update(self._generate_player_context(sts))
2459         return self._extract_response(
2460             item_id=video_id, ep='player', query=yt_query,
2461             ytcfg=player_ytcfg, headers=headers, fatal=False,
2462             default_client=self._YT_CLIENTS[client],
2463             note='Downloading %s player API JSON' % client.replace('_', ' ').strip()
2464         ) or None
2465
2466     def _get_requested_clients(self, url, smuggled_data):
2467         requested_clients = []
2468         allowed_clients = [client for client in self._YT_CLIENTS.keys() if client[:1] != '_']
2469         for client in self._configuration_arg('player_client'):
2470             if client in allowed_clients:
2471                 requested_clients.append(client)
2472             elif client == 'all':
2473                 requested_clients.extend(allowed_clients)
2474             else:
2475                 self.report_warning(f'Skipping unsupported client {client}')
2476         if not requested_clients:
2477             requested_clients = ['android', 'web']
2478
2479         if smuggled_data.get('is_music_url') or self.is_music_url(url):
2480             requested_clients.extend(
2481                 f'{client}_music' for client in requested_clients if not client.endswith('_music'))
2482
2483         return orderedSet(requested_clients)
2484
2485     def _extract_player_ytcfg(self, client, video_id):
2486         url = {
2487             'web_music': 'https://music.youtube.com',
2488             'web_embedded': f'https://www.youtube.com/embed/{video_id}?html5=1'
2489         }.get(client)
2490         if not url:
2491             return {}
2492         webpage = self._download_webpage(url, video_id, fatal=False, note=f'Downloading {client} config')
2493         return self.extract_ytcfg(video_id, webpage) or {}
2494
2495     def _extract_player_responses(self, clients, video_id, webpage, master_ytcfg, player_url, identity_token):
2496         initial_pr = None
2497         if webpage:
2498             initial_pr = self._extract_yt_initial_variable(
2499                 webpage, self._YT_INITIAL_PLAYER_RESPONSE_RE,
2500                 video_id, 'initial player response')
2501
2502         original_clients = clients
2503         clients = clients[::-1]
2504         while clients:
2505             client = clients.pop()
2506             player_ytcfg = master_ytcfg if client == 'web' else {}
2507             if 'configs' not in self._configuration_arg('player_skip'):
2508                 player_ytcfg = self._extract_player_ytcfg(client, video_id) or player_ytcfg
2509
2510             pr = (
2511                 initial_pr if client == 'web' and initial_pr
2512                 else self._extract_player_response(
2513                     client, video_id, player_ytcfg or master_ytcfg, player_ytcfg, identity_token, player_url, initial_pr))
2514             if pr:
2515                 yield pr
2516
2517             if self._is_agegated(pr):
2518                 client = f'{client}_agegate'
2519                 if client in self._YT_CLIENTS and client not in original_clients:
2520                     clients.append(client)
2521
2522         # Android player_response does not have microFormats which are needed for
2523         # extraction of some data. So we return the initial_pr with formats
2524         # stripped out even if not requested by the user
2525         # See: https://github.com/yt-dlp/yt-dlp/issues/501
2526         if initial_pr and 'web' not in original_clients:
2527             initial_pr['streamingData'] = None
2528             yield initial_pr
2529
2530     def _extract_formats(self, streaming_data, video_id, player_url, is_live):
2531         itags, stream_ids = [], []
2532         itag_qualities, res_qualities = {}, {}
2533         q = qualities([
2534             # Normally tiny is the smallest video-only formats. But
2535             # audio-only formats with unknown quality may get tagged as tiny
2536             'tiny',
2537             'audio_quality_ultralow', 'audio_quality_low', 'audio_quality_medium', 'audio_quality_high',  # Audio only formats
2538             'small', 'medium', 'large', 'hd720', 'hd1080', 'hd1440', 'hd2160', 'hd2880', 'highres'
2539         ])
2540         streaming_formats = traverse_obj(streaming_data, (..., ('formats', 'adaptiveFormats'), ...), default=[])
2541
2542         for fmt in streaming_formats:
2543             if fmt.get('targetDurationSec') or fmt.get('drmFamilies'):
2544                 continue
2545
2546             itag = str_or_none(fmt.get('itag'))
2547             audio_track = fmt.get('audioTrack') or {}
2548             stream_id = '%s.%s' % (itag or '', audio_track.get('id', ''))
2549             if stream_id in stream_ids:
2550                 continue
2551
2552             quality = fmt.get('quality')
2553             height = int_or_none(fmt.get('height'))
2554             if quality == 'tiny' or not quality:
2555                 quality = fmt.get('audioQuality', '').lower() or quality
2556             # The 3gp format (17) in android client has a quality of "small",
2557             # but is actually worse than other formats
2558             if itag == '17':
2559                 quality = 'tiny'
2560             if quality:
2561                 if itag:
2562                     itag_qualities[itag] = quality
2563                 if height:
2564                     res_qualities[height] = quality
2565             # FORMAT_STREAM_TYPE_OTF(otf=1) requires downloading the init fragment
2566             # (adding `&sq=0` to the URL) and parsing emsg box to determine the
2567             # number of fragment that would subsequently requested with (`&sq=N`)
2568             if fmt.get('type') == 'FORMAT_STREAM_TYPE_OTF':
2569                 continue
2570
2571             fmt_url = fmt.get('url')
2572             if not fmt_url:
2573                 sc = compat_parse_qs(fmt.get('signatureCipher'))
2574                 fmt_url = url_or_none(try_get(sc, lambda x: x['url'][0]))
2575                 encrypted_sig = try_get(sc, lambda x: x['s'][0])
2576                 if not (sc and fmt_url and encrypted_sig):
2577                     continue
2578                 if not player_url:
2579                     continue
2580                 signature = self._decrypt_signature(sc['s'][0], video_id, player_url)
2581                 sp = try_get(sc, lambda x: x['sp'][0]) or 'signature'
2582                 fmt_url += '&' + sp + '=' + signature
2583
2584             if itag:
2585                 itags.append(itag)
2586                 stream_ids.append(stream_id)
2587
2588             tbr = float_or_none(
2589                 fmt.get('averageBitrate') or fmt.get('bitrate'), 1000)
2590             dct = {
2591                 'asr': int_or_none(fmt.get('audioSampleRate')),
2592                 'filesize': int_or_none(fmt.get('contentLength')),
2593                 'format_id': itag,
2594                 'format_note': ', '.join(filter(None, (
2595                     audio_track.get('displayName'),
2596                     fmt.get('qualityLabel') or quality.replace('audio_quality_', '')))),
2597                 'fps': int_or_none(fmt.get('fps')),
2598                 'height': height,
2599                 'quality': q(quality),
2600                 'tbr': tbr,
2601                 'url': fmt_url,
2602                 'width': int_or_none(fmt.get('width')),
2603                 'language': audio_track.get('id', '').split('.')[0],
2604             }
2605             mime_mobj = re.match(
2606                 r'((?:[^/]+)/(?:[^;]+))(?:;\s*codecs="([^"]+)")?', fmt.get('mimeType') or '')
2607             if mime_mobj:
2608                 dct['ext'] = mimetype2ext(mime_mobj.group(1))
2609                 dct.update(parse_codecs(mime_mobj.group(2)))
2610             no_audio = dct.get('acodec') == 'none'
2611             no_video = dct.get('vcodec') == 'none'
2612             if no_audio:
2613                 dct['vbr'] = tbr
2614             if no_video:
2615                 dct['abr'] = tbr
2616             if no_audio or no_video:
2617                 dct['downloader_options'] = {
2618                     # Youtube throttles chunks >~10M
2619                     'http_chunk_size': 10485760,
2620                 }
2621                 if dct.get('ext'):
2622                     dct['container'] = dct['ext'] + '_dash'
2623             yield dct
2624
2625         skip_manifests = self._configuration_arg('skip')
2626         get_dash = not is_live and 'dash' not in skip_manifests and self.get_param('youtube_include_dash_manifest', True)
2627         get_hls = 'hls' not in skip_manifests and self.get_param('youtube_include_hls_manifest', True)
2628
2629         def guess_quality(f):
2630             for val, qdict in ((f.get('format_id'), itag_qualities), (f.get('height'), res_qualities)):
2631                 if val in qdict:
2632                     return q(qdict[val])
2633             return -1
2634
2635         for sd in streaming_data:
2636             hls_manifest_url = get_hls and sd.get('hlsManifestUrl')
2637             if hls_manifest_url:
2638                 for f in self._extract_m3u8_formats(hls_manifest_url, video_id, 'mp4', fatal=False):
2639                     itag = self._search_regex(
2640                         r'/itag/(\d+)', f['url'], 'itag', default=None)
2641                     if itag in itags:
2642                         continue
2643                     if itag:
2644                         f['format_id'] = itag
2645                         itags.append(itag)
2646                     f['quality'] = guess_quality(f)
2647                     yield f
2648
2649             dash_manifest_url = get_dash and sd.get('dashManifestUrl')
2650             if dash_manifest_url:
2651                 for f in self._extract_mpd_formats(dash_manifest_url, video_id, fatal=False):
2652                     itag = f['format_id']
2653                     if itag in itags:
2654                         continue
2655                     if itag:
2656                         itags.append(itag)
2657                     f['quality'] = guess_quality(f)
2658                     filesize = int_or_none(self._search_regex(
2659                         r'/clen/(\d+)', f.get('fragment_base_url')
2660                         or f['url'], 'file size', default=None))
2661                     if filesize:
2662                         f['filesize'] = filesize
2663                     yield f
2664
2665     def _real_extract(self, url):
2666         url, smuggled_data = unsmuggle_url(url, {})
2667         video_id = self._match_id(url)
2668
2669         base_url = self.http_scheme() + '//www.youtube.com/'
2670         webpage_url = base_url + 'watch?v=' + video_id
2671         webpage = self._download_webpage(
2672             webpage_url + '&bpctr=9999999999&has_verified=1', video_id, fatal=False)
2673
2674         master_ytcfg = self.extract_ytcfg(video_id, webpage) or self._get_default_ytcfg()
2675         player_url = self._extract_player_url(master_ytcfg, webpage)
2676         identity_token = self._extract_identity_token(webpage, video_id)
2677
2678         player_responses = list(self._extract_player_responses(
2679             self._get_requested_clients(url, smuggled_data),
2680             video_id, webpage, master_ytcfg, player_url, identity_token))
2681
2682         get_first = lambda obj, keys, **kwargs: traverse_obj(obj, (..., *variadic(keys)), **kwargs, get_all=False)
2683
2684         playability_statuses = traverse_obj(
2685             player_responses, (..., 'playabilityStatus'), expected_type=dict, default=[])
2686
2687         trailer_video_id = get_first(
2688             playability_statuses,
2689             ('errorScreen', 'playerLegacyDesktopYpcTrailerRenderer', 'trailerVideoId'),
2690             expected_type=str)
2691         if trailer_video_id:
2692             return self.url_result(
2693                 trailer_video_id, self.ie_key(), trailer_video_id)
2694
2695         search_meta = ((lambda x: self._html_search_meta(x, webpage, default=None))
2696                        if webpage else (lambda x: None))
2697
2698         video_details = traverse_obj(
2699             player_responses, (..., 'videoDetails'), expected_type=dict, default=[])
2700         microformats = traverse_obj(
2701             player_responses, (..., 'microformat', 'playerMicroformatRenderer'),
2702             expected_type=dict, default=[])
2703         video_title = (
2704             get_first(video_details, 'title')
2705             or self._get_text(microformats, (..., 'title'))
2706             or search_meta(['og:title', 'twitter:title', 'title']))
2707         video_description = get_first(video_details, 'shortDescription')
2708
2709         if not smuggled_data.get('force_singlefeed', False):
2710             if not self.get_param('noplaylist'):
2711                 multifeed_metadata_list = get_first(
2712                     player_responses,
2713                     ('multicamera', 'playerLegacyMulticameraRenderer', 'metadataList'),
2714                     expected_type=str)
2715                 if multifeed_metadata_list:
2716                     entries = []
2717                     feed_ids = []
2718                     for feed in multifeed_metadata_list.split(','):
2719                         # Unquote should take place before split on comma (,) since textual
2720                         # fields may contain comma as well (see
2721                         # https://github.com/ytdl-org/youtube-dl/issues/8536)
2722                         feed_data = compat_parse_qs(
2723                             compat_urllib_parse_unquote_plus(feed))
2724
2725                         def feed_entry(name):
2726                             return try_get(
2727                                 feed_data, lambda x: x[name][0], compat_str)
2728
2729                         feed_id = feed_entry('id')
2730                         if not feed_id:
2731                             continue
2732                         feed_title = feed_entry('title')
2733                         title = video_title
2734                         if feed_title:
2735                             title += ' (%s)' % feed_title
2736                         entries.append({
2737                             '_type': 'url_transparent',
2738                             'ie_key': 'Youtube',
2739                             'url': smuggle_url(
2740                                 '%swatch?v=%s' % (base_url, feed_data['id'][0]),
2741                                 {'force_singlefeed': True}),
2742                             'title': title,
2743                         })
2744                         feed_ids.append(feed_id)
2745                     self.to_screen(
2746                         'Downloading multifeed video (%s) - add --no-playlist to just download video %s'
2747                         % (', '.join(feed_ids), video_id))
2748                     return self.playlist_result(
2749                         entries, video_id, video_title, video_description)
2750             else:
2751                 self.to_screen('Downloading just video %s because of --no-playlist' % video_id)
2752
2753         live_broadcast_details = traverse_obj(microformats, (..., 'liveBroadcastDetails'))
2754         is_live = get_first(video_details, 'isLive')
2755         if is_live is None:
2756             is_live = get_first(live_broadcast_details, 'isLiveNow')
2757
2758         streaming_data = traverse_obj(player_responses, (..., 'streamingData'), default=[])
2759         formats = list(self._extract_formats(streaming_data, video_id, player_url, is_live))
2760
2761         if not formats:
2762             if not self.get_param('allow_unplayable_formats') and traverse_obj(streaming_data, (..., 'licenseInfos')):
2763                 self.raise_no_formats(
2764                     'This video is DRM protected.', expected=True)
2765             pemr = get_first(
2766                 playability_statuses,
2767                 ('errorScreen', 'playerErrorMessageRenderer'), expected_type=dict) or {}
2768             reason = self._get_text(pemr, 'reason') or get_first(playability_statuses, 'reason')
2769             subreason = clean_html(self._get_text(pemr, 'subreason') or '')
2770             if subreason:
2771                 if subreason == 'The uploader has not made this video available in your country.':
2772                     countries = get_first(microformats, 'availableCountries')
2773                     if not countries:
2774                         regions_allowed = search_meta('regionsAllowed')
2775                         countries = regions_allowed.split(',') if regions_allowed else None
2776                     self.raise_geo_restricted(subreason, countries, metadata_available=True)
2777                 reason += f'. {subreason}'
2778             if reason:
2779                 self.raise_no_formats(reason, expected=True)
2780
2781         for f in formats:
2782             if '&c=WEB&' in f['url'] and '&ratebypass=yes&' not in f['url']:  # throttled
2783                 f['source_preference'] = -10
2784                 note = f.get('format_note')
2785                 f['format_note'] = f'{note} (throttled)' if note else '(throttled)'
2786
2787         # Source is given priority since formats that throttle are given lower source_preference
2788         # When throttling issue is fully fixed, remove this
2789         self._sort_formats(formats, ('quality', 'height', 'fps', 'source'))
2790
2791         keywords = get_first(video_details, 'keywords', expected_type=list) or []
2792         if not keywords and webpage:
2793             keywords = [
2794                 unescapeHTML(m.group('content'))
2795                 for m in re.finditer(self._meta_regex('og:video:tag'), webpage)]
2796         for keyword in keywords:
2797             if keyword.startswith('yt:stretch='):
2798                 mobj = re.search(r'(\d+)\s*:\s*(\d+)', keyword)
2799                 if mobj:
2800                     # NB: float is intentional for forcing float division
2801                     w, h = (float(v) for v in mobj.groups())
2802                     if w > 0 and h > 0:
2803                         ratio = w / h
2804                         for f in formats:
2805                             if f.get('vcodec') != 'none':
2806                                 f['stretched_ratio'] = ratio
2807                         break
2808
2809         thumbnails = []
2810         thumbnail_dicts = traverse_obj(
2811             (video_details, microformats), (..., ..., 'thumbnail', 'thumbnails', ...),
2812             expected_type=dict, default=[])
2813         for thumbnail in thumbnail_dicts:
2814             thumbnail_url = thumbnail.get('url')
2815             if not thumbnail_url:
2816                 continue
2817             # Sometimes youtube gives a wrong thumbnail URL. See:
2818             # https://github.com/yt-dlp/yt-dlp/issues/233
2819             # https://github.com/ytdl-org/youtube-dl/issues/28023
2820             if 'maxresdefault' in thumbnail_url:
2821                 thumbnail_url = thumbnail_url.split('?')[0]
2822             thumbnails.append({
2823                 'url': thumbnail_url,
2824                 'height': int_or_none(thumbnail.get('height')),
2825                 'width': int_or_none(thumbnail.get('width')),
2826             })
2827         thumbnail_url = search_meta(['og:image', 'twitter:image'])
2828         if thumbnail_url:
2829             thumbnails.append({
2830                 'url': thumbnail_url,
2831             })
2832         # The best resolution thumbnails sometimes does not appear in the webpage
2833         # See: https://github.com/ytdl-org/youtube-dl/issues/29049, https://github.com/yt-dlp/yt-dlp/issues/340
2834         # List of possible thumbnails - Ref: <https://stackoverflow.com/a/20542029>
2835         hq_thumbnail_names = ['maxresdefault', 'hq720', 'sddefault', 'sd1', 'sd2', 'sd3']
2836         # TODO: Test them also? - For some videos, even these don't exist
2837         guaranteed_thumbnail_names = [
2838             'hqdefault', 'hq1', 'hq2', 'hq3', '0',
2839             'mqdefault', 'mq1', 'mq2', 'mq3',
2840             'default', '1', '2', '3'
2841         ]
2842         thumbnail_names = hq_thumbnail_names + guaranteed_thumbnail_names
2843         n_thumbnail_names = len(thumbnail_names)
2844
2845         thumbnails.extend({
2846             'url': 'https://i.ytimg.com/vi{webp}/{video_id}/{name}{live}.{ext}'.format(
2847                 video_id=video_id, name=name, ext=ext,
2848                 webp='_webp' if ext == 'webp' else '', live='_live' if is_live else ''),
2849             '_test_url': name in hq_thumbnail_names,
2850         } for name in thumbnail_names for ext in ('webp', 'jpg'))
2851         for thumb in thumbnails:
2852             i = next((i for i, t in enumerate(thumbnail_names) if f'/{video_id}/{t}' in thumb['url']), n_thumbnail_names)
2853             thumb['preference'] = (0 if '.webp' in thumb['url'] else -1) - (2 * i)
2854         self._remove_duplicate_formats(thumbnails)
2855
2856         category = get_first(microformats, 'category') or search_meta('genre')
2857         channel_id = str_or_none(
2858             get_first(video_details, 'channelId')
2859             or get_first(microformats, 'externalChannelId')
2860             or search_meta('channelId'))
2861         duration = int_or_none(
2862             get_first(video_details, 'lengthSeconds')
2863             or get_first(microformats, 'lengthSeconds')
2864             or parse_duration(search_meta('duration'))) or None
2865         owner_profile_url = get_first(microformats, 'ownerProfileUrl')
2866
2867         live_content = get_first(video_details, 'isLiveContent')
2868         is_upcoming = get_first(video_details, 'isUpcoming')
2869         if is_live is None:
2870             if is_upcoming or live_content is False:
2871                 is_live = False
2872         if is_upcoming is None and (live_content or is_live):
2873             is_upcoming = False
2874         live_starttime = parse_iso8601(get_first(live_broadcast_details, 'startTimestamp'))
2875         live_endtime = parse_iso8601(get_first(live_broadcast_details, 'endTimestamp'))
2876         if not duration and live_endtime and live_starttime:
2877             duration = live_endtime - live_starttime
2878
2879         info = {
2880             'id': video_id,
2881             'title': self._live_title(video_title) if is_live else video_title,
2882             'formats': formats,
2883             'thumbnails': thumbnails,
2884             'description': video_description,
2885             'upload_date': unified_strdate(
2886                 get_first(microformats, 'uploadDate')
2887                 or search_meta('uploadDate')),
2888             'uploader': get_first(video_details, 'author'),
2889             'uploader_id': self._search_regex(r'/(?:channel|user)/([^/?&#]+)', owner_profile_url, 'uploader id') if owner_profile_url else None,
2890             'uploader_url': owner_profile_url,
2891             'channel_id': channel_id,
2892             'channel_url': f'https://www.youtube.com/channel/{channel_id}' if channel_id else None,
2893             'duration': duration,
2894             'view_count': int_or_none(
2895                 get_first((video_details, microformats), (..., 'viewCount'))
2896                 or search_meta('interactionCount')),
2897             'average_rating': float_or_none(get_first(video_details, 'averageRating')),
2898             'age_limit': 18 if (
2899                 get_first(microformats, 'isFamilySafe') is False
2900                 or search_meta('isFamilyFriendly') == 'false'
2901                 or search_meta('og:restrictions:age') == '18+') else 0,
2902             'webpage_url': webpage_url,
2903             'categories': [category] if category else None,
2904             'tags': keywords,
2905             'playable_in_embed': get_first(playability_statuses, 'playableInEmbed'),
2906             'is_live': is_live,
2907             'was_live': (False if is_live or is_upcoming or live_content is False
2908                          else None if is_live is None or is_upcoming is None
2909                          else live_content),
2910             'live_status': 'is_upcoming' if is_upcoming else None,  # rest will be set by YoutubeDL
2911             'release_timestamp': live_starttime,
2912         }
2913
2914         pctr = traverse_obj(player_responses, (..., 'captions', 'playerCaptionsTracklistRenderer'), expected_type=dict)
2915         # Converted into dicts to remove duplicates
2916         captions = {
2917             sub.get('baseUrl'): sub
2918             for sub in traverse_obj(pctr, (..., 'captionTracks', ...), default=[])}
2919         translation_languages = {
2920             lang.get('languageCode'): lang.get('languageName')
2921             for lang in traverse_obj(pctr, (..., 'translationLanguages', ...), default=[])}
2922         subtitles = {}
2923         if pctr:
2924             def process_language(container, base_url, lang_code, sub_name, query):
2925                 lang_subs = container.setdefault(lang_code, [])
2926                 for fmt in self._SUBTITLE_FORMATS:
2927                     query.update({
2928                         'fmt': fmt,
2929                     })
2930                     lang_subs.append({
2931                         'ext': fmt,
2932                         'url': update_url_query(base_url, query),
2933                         'name': sub_name,
2934                     })
2935
2936             for base_url, caption_track in captions.items():
2937                 if not base_url:
2938                     continue
2939                 if caption_track.get('kind') != 'asr':
2940                     lang_code = (
2941                         remove_start(caption_track.get('vssId') or '', '.').replace('.', '-')
2942                         or caption_track.get('languageCode'))
2943                     if not lang_code:
2944                         continue
2945                     process_language(
2946                         subtitles, base_url, lang_code,
2947                         traverse_obj(caption_track, ('name', 'simpleText')),
2948                         {})
2949                     continue
2950                 automatic_captions = {}
2951                 for trans_code, trans_name in translation_languages.items():
2952                     if not trans_code:
2953                         continue
2954                     process_language(
2955                         automatic_captions, base_url, trans_code,
2956                         self._get_text(trans_name, max_runs=1),
2957                         {'tlang': trans_code})
2958                 info['automatic_captions'] = automatic_captions
2959         info['subtitles'] = subtitles
2960
2961         parsed_url = compat_urllib_parse_urlparse(url)
2962         for component in [parsed_url.fragment, parsed_url.query]:
2963             query = compat_parse_qs(component)
2964             for k, v in query.items():
2965                 for d_k, s_ks in [('start', ('start', 't')), ('end', ('end',))]:
2966                     d_k += '_time'
2967                     if d_k not in info and k in s_ks:
2968                         info[d_k] = parse_duration(query[k][0])
2969
2970         # Youtube Music Auto-generated description
2971         if video_description:
2972             mobj = re.search(r'(?s)(?P<track>[^·\n]+)·(?P<artist>[^\n]+)\n+(?P<album>[^\n]+)(?:.+?℗\s*(?P<release_year>\d{4})(?!\d))?(?:.+?Released on\s*:\s*(?P<release_date>\d{4}-\d{2}-\d{2}))?(.+?\nArtist\s*:\s*(?P<clean_artist>[^\n]+))?.+\nAuto-generated by YouTube\.\s*$', video_description)
2973             if mobj:
2974                 release_year = mobj.group('release_year')
2975                 release_date = mobj.group('release_date')
2976                 if release_date:
2977                     release_date = release_date.replace('-', '')
2978                     if not release_year:
2979                         release_year = release_date[:4]
2980                 info.update({
2981                     'album': mobj.group('album'.strip()),
2982                     'artist': mobj.group('clean_artist') or ', '.join(a.strip() for a in mobj.group('artist').split('·')),
2983                     'track': mobj.group('track').strip(),
2984                     'release_date': release_date,
2985                     'release_year': int_or_none(release_year),
2986                 })
2987
2988         initial_data = None
2989         if webpage:
2990             initial_data = self._extract_yt_initial_variable(
2991                 webpage, self._YT_INITIAL_DATA_RE, video_id,
2992                 'yt initial data')
2993         if not initial_data:
2994             headers = self.generate_api_headers(
2995                 master_ytcfg, identity_token, self._extract_account_syncid(master_ytcfg),
2996                 session_index=self._extract_session_index(master_ytcfg))
2997
2998             initial_data = self._extract_response(
2999                 item_id=video_id, ep='next', fatal=False,
3000                 ytcfg=master_ytcfg, headers=headers, query={'videoId': video_id},
3001                 note='Downloading initial data API JSON')
3002
3003         try:
3004             # This will error if there is no livechat
3005             initial_data['contents']['twoColumnWatchNextResults']['conversationBar']['liveChatRenderer']['continuations'][0]['reloadContinuationData']['continuation']
3006             info['subtitles']['live_chat'] = [{
3007                 'url': 'https://www.youtube.com/watch?v=%s' % video_id,  # url is needed to set cookies
3008                 'video_id': video_id,
3009                 'ext': 'json',
3010                 'protocol': 'youtube_live_chat' if is_live or is_upcoming else 'youtube_live_chat_replay',
3011             }]
3012         except (KeyError, IndexError, TypeError):
3013             pass
3014
3015         if initial_data:
3016             info['chapters'] = (
3017                 self._extract_chapters_from_json(initial_data, duration)
3018                 or self._extract_chapters_from_engagement_panel(initial_data, duration)
3019                 or None)
3020
3021             contents = try_get(
3022                 initial_data,
3023                 lambda x: x['contents']['twoColumnWatchNextResults']['results']['results']['contents'],
3024                 list) or []
3025             for content in contents:
3026                 vpir = content.get('videoPrimaryInfoRenderer')
3027                 if vpir:
3028                     stl = vpir.get('superTitleLink')
3029                     if stl:
3030                         stl = self._get_text(stl)
3031                         if try_get(
3032                                 vpir,
3033                                 lambda x: x['superTitleIcon']['iconType']) == 'LOCATION_PIN':
3034                             info['location'] = stl
3035                         else:
3036                             mobj = re.search(r'(.+?)\s*S(\d+)\s*•\s*E(\d+)', stl)
3037                             if mobj:
3038                                 info.update({
3039                                     'series': mobj.group(1),
3040                                     'season_number': int(mobj.group(2)),
3041                                     'episode_number': int(mobj.group(3)),
3042                                 })
3043                     for tlb in (try_get(
3044                             vpir,
3045                             lambda x: x['videoActions']['menuRenderer']['topLevelButtons'],
3046                             list) or []):
3047                         tbr = tlb.get('toggleButtonRenderer') or {}
3048                         for getter, regex in [(
3049                                 lambda x: x['defaultText']['accessibility']['accessibilityData'],
3050                                 r'(?P<count>[\d,]+)\s*(?P<type>(?:dis)?like)'), ([
3051                                     lambda x: x['accessibility'],
3052                                     lambda x: x['accessibilityData']['accessibilityData'],
3053                                 ], r'(?P<type>(?:dis)?like) this video along with (?P<count>[\d,]+) other people')]:
3054                             label = (try_get(tbr, getter, dict) or {}).get('label')
3055                             if label:
3056                                 mobj = re.match(regex, label)
3057                                 if mobj:
3058                                     info[mobj.group('type') + '_count'] = str_to_int(mobj.group('count'))
3059                                     break
3060                     sbr_tooltip = try_get(
3061                         vpir, lambda x: x['sentimentBar']['sentimentBarRenderer']['tooltip'])
3062                     if sbr_tooltip:
3063                         like_count, dislike_count = sbr_tooltip.split(' / ')
3064                         info.update({
3065                             'like_count': str_to_int(like_count),
3066                             'dislike_count': str_to_int(dislike_count),
3067                         })
3068                 vsir = content.get('videoSecondaryInfoRenderer')
3069                 if vsir:
3070                     info['channel'] = self._get_text(vsir, ('owner', 'videoOwnerRenderer', 'title'))
3071                     rows = try_get(
3072                         vsir,
3073                         lambda x: x['metadataRowContainer']['metadataRowContainerRenderer']['rows'],
3074                         list) or []
3075                     multiple_songs = False
3076                     for row in rows:
3077                         if try_get(row, lambda x: x['metadataRowRenderer']['hasDividerLine']) is True:
3078                             multiple_songs = True
3079                             break
3080                     for row in rows:
3081                         mrr = row.get('metadataRowRenderer') or {}
3082                         mrr_title = mrr.get('title')
3083                         if not mrr_title:
3084                             continue
3085                         mrr_title = self._get_text(mrr, 'title')
3086                         mrr_contents_text = self._get_text(mrr, ('contents', 0))
3087                         if mrr_title == 'License':
3088                             info['license'] = mrr_contents_text
3089                         elif not multiple_songs:
3090                             if mrr_title == 'Album':
3091                                 info['album'] = mrr_contents_text
3092                             elif mrr_title == 'Artist':
3093                                 info['artist'] = mrr_contents_text
3094                             elif mrr_title == 'Song':
3095                                 info['track'] = mrr_contents_text
3096
3097         fallbacks = {
3098             'channel': 'uploader',
3099             'channel_id': 'uploader_id',
3100             'channel_url': 'uploader_url',
3101         }
3102         for to, frm in fallbacks.items():
3103             if not info.get(to):
3104                 info[to] = info.get(frm)
3105
3106         for s_k, d_k in [('artist', 'creator'), ('track', 'alt_title')]:
3107             v = info.get(s_k)
3108             if v:
3109                 info[d_k] = v
3110
3111         is_private = get_first(video_details, 'isPrivate', expected_type=bool)
3112         is_unlisted = get_first(microformats, 'isUnlisted', expected_type=bool)
3113         is_membersonly = None
3114         is_premium = None
3115         if initial_data and is_private is not None:
3116             is_membersonly = False
3117             is_premium = False
3118             contents = try_get(initial_data, lambda x: x['contents']['twoColumnWatchNextResults']['results']['results']['contents'], list) or []
3119             badge_labels = set()
3120             for content in contents:
3121                 if not isinstance(content, dict):
3122                     continue
3123                 badge_labels.update(self._extract_badges(content.get('videoPrimaryInfoRenderer')))
3124             for badge_label in badge_labels:
3125                 if badge_label.lower() == 'members only':
3126                     is_membersonly = True
3127                 elif badge_label.lower() == 'premium':
3128                     is_premium = True
3129                 elif badge_label.lower() == 'unlisted':
3130                     is_unlisted = True
3131
3132         info['availability'] = self._availability(
3133             is_private=is_private,
3134             needs_premium=is_premium,
3135             needs_subscription=is_membersonly,
3136             needs_auth=info['age_limit'] >= 18,
3137             is_unlisted=None if is_private is None else is_unlisted)
3138
3139         # get xsrf for annotations or comments
3140         get_annotations = self.get_param('writeannotations', False)
3141         get_comments = self.get_param('getcomments', False)
3142         if get_annotations or get_comments:
3143             xsrf_token = None
3144             if master_ytcfg:
3145                 xsrf_token = try_get(master_ytcfg, lambda x: x['XSRF_TOKEN'], compat_str)
3146             if not xsrf_token:
3147                 xsrf_token = self._search_regex(
3148                     r'([\'"])XSRF_TOKEN\1\s*:\s*([\'"])(?P<xsrf_token>(?:(?!\2).)+)\2',
3149                     webpage, 'xsrf token', group='xsrf_token', fatal=False)
3150
3151         # annotations
3152         if get_annotations:
3153             invideo_url = get_first(
3154                 player_responses,
3155                 ('annotations', 0, 'playerAnnotationsUrlsRenderer', 'invideoUrl'),
3156                 expected_type=str)
3157             if xsrf_token and invideo_url:
3158                 xsrf_field_name = None
3159                 if master_ytcfg:
3160                     xsrf_field_name = try_get(master_ytcfg, lambda x: x['XSRF_FIELD_NAME'], compat_str)
3161                 if not xsrf_field_name:
3162                     xsrf_field_name = self._search_regex(
3163                         r'([\'"])XSRF_FIELD_NAME\1\s*:\s*([\'"])(?P<xsrf_field_name>\w+)\2',
3164                         webpage, 'xsrf field name',
3165                         group='xsrf_field_name', default='session_token')
3166                 info['annotations'] = self._download_webpage(
3167                     self._proto_relative_url(invideo_url),
3168                     video_id, note='Downloading annotations',
3169                     errnote='Unable to download video annotations', fatal=False,
3170                     data=urlencode_postdata({xsrf_field_name: xsrf_token}))
3171
3172         if get_comments:
3173             info['__post_extractor'] = lambda: self._extract_comments(master_ytcfg, video_id, contents, webpage)
3174
3175         self.mark_watched(video_id, player_responses)
3176
3177         return info
3178
3179
3180 class YoutubeTabIE(YoutubeBaseInfoExtractor):
3181     IE_DESC = 'YouTube.com tab'
3182     _VALID_URL = r'''(?x)
3183                     https?://
3184                         (?:\w+\.)?
3185                         (?:
3186                             youtube(?:kids)?\.com|
3187                             invidio\.us
3188                         )/
3189                         (?:
3190                             (?P<channel_type>channel|c|user|browse)/|
3191                             (?P<not_channel>
3192                                 feed/|hashtag/|
3193                                 (?:playlist|watch)\?.*?\blist=
3194                             )|
3195                             (?!(?:%s)\b)  # Direct URLs
3196                         )
3197                         (?P<id>[^/?\#&]+)
3198                     ''' % YoutubeBaseInfoExtractor._RESERVED_NAMES
3199     IE_NAME = 'youtube:tab'
3200
3201     _TESTS = [{
3202         'note': 'playlists, multipage',
3203         'url': 'https://www.youtube.com/c/ИгорьКлейнер/playlists?view=1&flow=grid',
3204         'playlist_mincount': 94,
3205         'info_dict': {
3206             'id': 'UCqj7Cz7revf5maW9g5pgNcg',
3207             'title': 'Игорь Клейнер - Playlists',
3208             'description': 'md5:be97ee0f14ee314f1f002cf187166ee2',
3209             'uploader': 'Игорь Клейнер',
3210             'uploader_id': 'UCqj7Cz7revf5maW9g5pgNcg',
3211         },
3212     }, {
3213         'note': 'playlists, multipage, different order',
3214         'url': 'https://www.youtube.com/user/igorkle1/playlists?view=1&sort=dd',
3215         'playlist_mincount': 94,
3216         'info_dict': {
3217             'id': 'UCqj7Cz7revf5maW9g5pgNcg',
3218             'title': 'Игорь Клейнер - Playlists',
3219             'description': 'md5:be97ee0f14ee314f1f002cf187166ee2',
3220             'uploader_id': 'UCqj7Cz7revf5maW9g5pgNcg',
3221             'uploader': 'Игорь Клейнер',
3222         },
3223     }, {
3224         'note': 'playlists, series',
3225         'url': 'https://www.youtube.com/c/3blue1brown/playlists?view=50&sort=dd&shelf_id=3',
3226         'playlist_mincount': 5,
3227         'info_dict': {
3228             'id': 'UCYO_jab_esuFRV4b17AJtAw',
3229             'title': '3Blue1Brown - Playlists',
3230             'description': 'md5:e1384e8a133307dd10edee76e875d62f',
3231             'uploader_id': 'UCYO_jab_esuFRV4b17AJtAw',
3232             'uploader': '3Blue1Brown',
3233         },
3234     }, {
3235         'note': 'playlists, singlepage',
3236         'url': 'https://www.youtube.com/user/ThirstForScience/playlists',
3237         'playlist_mincount': 4,
3238         'info_dict': {
3239             'id': 'UCAEtajcuhQ6an9WEzY9LEMQ',
3240             'title': 'ThirstForScience - Playlists',
3241             'description': 'md5:609399d937ea957b0f53cbffb747a14c',
3242             'uploader': 'ThirstForScience',
3243             'uploader_id': 'UCAEtajcuhQ6an9WEzY9LEMQ',
3244         }
3245     }, {
3246         'url': 'https://www.youtube.com/c/ChristophLaimer/playlists',
3247         'only_matching': True,
3248     }, {
3249         'note': 'basic, single video playlist',
3250         'url': 'https://www.youtube.com/playlist?list=PL4lCao7KL_QFVb7Iudeipvc2BCavECqzc',
3251         'info_dict': {
3252             'uploader_id': 'UCmlqkdCBesrv2Lak1mF_MxA',
3253             'uploader': 'Sergey M.',
3254             'id': 'PL4lCao7KL_QFVb7Iudeipvc2BCavECqzc',
3255             'title': 'youtube-dl public playlist',
3256         },
3257         'playlist_count': 1,
3258     }, {
3259         'note': 'empty playlist',
3260         'url': 'https://www.youtube.com/playlist?list=PL4lCao7KL_QFodcLWhDpGCYnngnHtQ-Xf',
3261         'info_dict': {
3262             'uploader_id': 'UCmlqkdCBesrv2Lak1mF_MxA',
3263             'uploader': 'Sergey M.',
3264             'id': 'PL4lCao7KL_QFodcLWhDpGCYnngnHtQ-Xf',
3265             'title': 'youtube-dl empty playlist',
3266         },
3267         'playlist_count': 0,
3268     }, {
3269         'note': 'Home tab',
3270         'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/featured',
3271         'info_dict': {
3272             'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
3273             'title': 'lex will - Home',
3274             'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488',
3275             'uploader': 'lex will',
3276             'uploader_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
3277         },
3278         'playlist_mincount': 2,
3279     }, {
3280         'note': 'Videos tab',
3281         'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/videos',
3282         'info_dict': {
3283             'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
3284             'title': 'lex will - Videos',
3285             'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488',
3286             'uploader': 'lex will',
3287             'uploader_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
3288         },
3289         'playlist_mincount': 975,
3290     }, {
3291         'note': 'Videos tab, sorted by popular',
3292         'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/videos?view=0&sort=p&flow=grid',
3293         'info_dict': {
3294             'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
3295             'title': 'lex will - Videos',
3296             'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488',
3297             'uploader': 'lex will',
3298             'uploader_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
3299         },
3300         'playlist_mincount': 199,
3301     }, {
3302         'note': 'Playlists tab',
3303         'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/playlists',
3304         'info_dict': {
3305             'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
3306             'title': 'lex will - Playlists',
3307             'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488',
3308             'uploader': 'lex will',
3309             'uploader_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
3310         },
3311         'playlist_mincount': 17,
3312     }, {
3313         'note': 'Community tab',
3314         'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/community',
3315         'info_dict': {
3316             'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
3317             'title': 'lex will - Community',
3318             'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488',
3319             'uploader': 'lex will',
3320             'uploader_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
3321         },
3322         'playlist_mincount': 18,
3323     }, {
3324         'note': 'Channels tab',
3325         'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/channels',
3326         'info_dict': {
3327             'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
3328             'title': 'lex will - Channels',
3329             'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488',
3330             'uploader': 'lex will',
3331             'uploader_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
3332         },
3333         'playlist_mincount': 12,
3334     }, {
3335         'note': 'Search tab',
3336         'url': 'https://www.youtube.com/c/3blue1brown/search?query=linear%20algebra',
3337         'playlist_mincount': 40,
3338         'info_dict': {
3339             'id': 'UCYO_jab_esuFRV4b17AJtAw',
3340             'title': '3Blue1Brown - Search - linear algebra',
3341             'description': 'md5:e1384e8a133307dd10edee76e875d62f',
3342             'uploader': '3Blue1Brown',
3343             'uploader_id': 'UCYO_jab_esuFRV4b17AJtAw',
3344         },
3345     }, {
3346         'url': 'https://invidio.us/channel/UCmlqkdCBesrv2Lak1mF_MxA',
3347         'only_matching': True,
3348     }, {
3349         'url': 'https://www.youtubekids.com/channel/UCmlqkdCBesrv2Lak1mF_MxA',
3350         'only_matching': True,
3351     }, {
3352         'url': 'https://music.youtube.com/channel/UCmlqkdCBesrv2Lak1mF_MxA',
3353         'only_matching': True,
3354     }, {
3355         'note': 'Playlist with deleted videos (#651). As a bonus, the video #51 is also twice in this list.',
3356         'url': 'https://www.youtube.com/playlist?list=PLwP_SiAcdui0KVebT0mU9Apz359a4ubsC',
3357         'info_dict': {
3358             'title': '29C3: Not my department',
3359             'id': 'PLwP_SiAcdui0KVebT0mU9Apz359a4ubsC',
3360             'uploader': 'Christiaan008',
3361             'uploader_id': 'UCEPzS1rYsrkqzSLNp76nrcg',
3362             'description': 'md5:a14dc1a8ef8307a9807fe136a0660268',
3363         },
3364         'playlist_count': 96,
3365     }, {
3366         'note': 'Large playlist',
3367         'url': 'https://www.youtube.com/playlist?list=UUBABnxM4Ar9ten8Mdjj1j0Q',
3368         'info_dict': {
3369             'title': 'Uploads from Cauchemar',
3370             'id': 'UUBABnxM4Ar9ten8Mdjj1j0Q',
3371             'uploader': 'Cauchemar',
3372             'uploader_id': 'UCBABnxM4Ar9ten8Mdjj1j0Q',
3373         },
3374         'playlist_mincount': 1123,
3375     }, {
3376         'note': 'even larger playlist, 8832 videos',
3377         'url': 'http://www.youtube.com/user/NASAgovVideo/videos',
3378         'only_matching': True,
3379     }, {
3380         'note': 'Buggy playlist: the webpage has a "Load more" button but it doesn\'t have more videos',
3381         'url': 'https://www.youtube.com/playlist?list=UUXw-G3eDE9trcvY2sBMM_aA',
3382         'info_dict': {
3383             'title': 'Uploads from Interstellar Movie',
3384             'id': 'UUXw-G3eDE9trcvY2sBMM_aA',
3385             'uploader': 'Interstellar Movie',
3386             'uploader_id': 'UCXw-G3eDE9trcvY2sBMM_aA',
3387         },
3388         'playlist_mincount': 21,
3389     }, {
3390         'note': 'Playlist with "show unavailable videos" button',
3391         'url': 'https://www.youtube.com/playlist?list=UUTYLiWFZy8xtPwxFwX9rV7Q',
3392         'info_dict': {
3393             'title': 'Uploads from Phim Siêu Nhân Nhật Bản',
3394             'id': 'UUTYLiWFZy8xtPwxFwX9rV7Q',
3395             'uploader': 'Phim Siêu Nhân Nhật Bản',
3396             'uploader_id': 'UCTYLiWFZy8xtPwxFwX9rV7Q',
3397         },
3398         'playlist_mincount': 200,
3399     }, {
3400         'note': 'Playlist with unavailable videos in page 7',
3401         'url': 'https://www.youtube.com/playlist?list=UU8l9frL61Yl5KFOl87nIm2w',
3402         'info_dict': {
3403             'title': 'Uploads from BlankTV',
3404             'id': 'UU8l9frL61Yl5KFOl87nIm2w',
3405             'uploader': 'BlankTV',
3406             'uploader_id': 'UC8l9frL61Yl5KFOl87nIm2w',
3407         },
3408         'playlist_mincount': 1000,
3409     }, {
3410         'note': 'https://github.com/ytdl-org/youtube-dl/issues/21844',
3411         'url': 'https://www.youtube.com/playlist?list=PLzH6n4zXuckpfMu_4Ff8E7Z1behQks5ba',
3412         'info_dict': {
3413             'title': 'Data Analysis with Dr Mike Pound',
3414             'id': 'PLzH6n4zXuckpfMu_4Ff8E7Z1behQks5ba',
3415             'uploader_id': 'UC9-y-6csu5WGm29I7JiwpnA',
3416             'uploader': 'Computerphile',
3417             'description': 'md5:7f567c574d13d3f8c0954d9ffee4e487',
3418         },
3419         'playlist_mincount': 11,
3420     }, {
3421         'url': 'https://invidio.us/playlist?list=PL4lCao7KL_QFVb7Iudeipvc2BCavECqzc',
3422         'only_matching': True,
3423     }, {
3424         'note': 'Playlist URL that does not actually serve a playlist',
3425         'url': 'https://www.youtube.com/watch?v=FqZTN594JQw&list=PLMYEtVRpaqY00V9W81Cwmzp6N6vZqfUKD4',
3426         'info_dict': {
3427             'id': 'FqZTN594JQw',
3428             'ext': 'webm',
3429             'title': "Smiley's People 01 detective, Adventure Series, Action",
3430             'uploader': 'STREEM',
3431             'uploader_id': 'UCyPhqAZgwYWZfxElWVbVJng',
3432             'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UCyPhqAZgwYWZfxElWVbVJng',
3433             'upload_date': '20150526',
3434             'license': 'Standard YouTube License',
3435             'description': 'md5:507cdcb5a49ac0da37a920ece610be80',
3436             'categories': ['People & Blogs'],
3437             'tags': list,
3438             'view_count': int,
3439             'like_count': int,
3440             'dislike_count': int,
3441         },
3442         'params': {
3443             'skip_download': True,
3444         },
3445         'skip': 'This video is not available.',
3446         'add_ie': [YoutubeIE.ie_key()],
3447     }, {
3448         'url': 'https://www.youtubekids.com/watch?v=Agk7R8I8o5U&list=PUZ6jURNr1WQZCNHF0ao-c0g',
3449         'only_matching': True,
3450     }, {
3451         'url': 'https://www.youtube.com/watch?v=MuAGGZNfUkU&list=RDMM',
3452         'only_matching': True,
3453     }, {
3454         'url': 'https://www.youtube.com/channel/UCoMdktPbSTixAyNGwb-UYkQ/live',
3455         'info_dict': {
3456             'id': 'FMtPN8yp5LU',  # This will keep changing
3457             'ext': 'mp4',
3458             'title': compat_str,
3459             'uploader': 'Sky News',
3460             'uploader_id': 'skynews',
3461             'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/skynews',
3462             'upload_date': r're:\d{8}',
3463             'description': compat_str,
3464             'categories': ['News & Politics'],
3465             'tags': list,
3466             'like_count': int,
3467             'dislike_count': int,
3468         },
3469         'params': {
3470             'skip_download': True,
3471         },
3472         'expected_warnings': ['Downloading just video ', 'Ignoring subtitle tracks found in '],
3473     }, {
3474         'url': 'https://www.youtube.com/user/TheYoungTurks/live',
3475         'info_dict': {
3476             'id': 'a48o2S1cPoo',
3477             'ext': 'mp4',
3478             'title': 'The Young Turks - Live Main Show',
3479             'uploader': 'The Young Turks',
3480             'uploader_id': 'TheYoungTurks',
3481             'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/TheYoungTurks',
3482             'upload_date': '20150715',
3483             'license': 'Standard YouTube License',
3484             'description': 'md5:438179573adcdff3c97ebb1ee632b891',
3485             'categories': ['News & Politics'],
3486             'tags': ['Cenk Uygur (TV Program Creator)', 'The Young Turks (Award-Winning Work)', 'Talk Show (TV Genre)'],
3487             'like_count': int,
3488             'dislike_count': int,
3489         },
3490         'params': {
3491             'skip_download': True,
3492         },
3493         'only_matching': True,
3494     }, {
3495         'url': 'https://www.youtube.com/channel/UC1yBKRuGpC1tSM73A0ZjYjQ/live',
3496         'only_matching': True,
3497     }, {
3498         'url': 'https://www.youtube.com/c/CommanderVideoHq/live',
3499         'only_matching': True,
3500     }, {
3501         'note': 'A channel that is not live. Should raise error',
3502         'url': 'https://www.youtube.com/user/numberphile/live',
3503         'only_matching': True,
3504     }, {
3505         'url': 'https://www.youtube.com/feed/trending',
3506         'only_matching': True,
3507     }, {
3508         'url': 'https://www.youtube.com/feed/library',
3509         'only_matching': True,
3510     }, {
3511         'url': 'https://www.youtube.com/feed/history',
3512         'only_matching': True,
3513     }, {
3514         'url': 'https://www.youtube.com/feed/subscriptions',
3515         'only_matching': True,
3516     }, {
3517         'url': 'https://www.youtube.com/feed/watch_later',
3518         'only_matching': True,
3519     }, {
3520         'note': 'Recommended - redirects to home page',
3521         'url': 'https://www.youtube.com/feed/recommended',
3522         'only_matching': True,
3523     }, {
3524         'note': 'inline playlist with not always working continuations',
3525         'url': 'https://www.youtube.com/watch?v=UC6u0Tct-Fo&list=PL36D642111D65BE7C',
3526         'only_matching': True,
3527     }, {
3528         'url': 'https://www.youtube.com/course?list=ECUl4u3cNGP61MdtwGTqZA0MreSaDybji8',
3529         'only_matching': True,
3530     }, {
3531         'url': 'https://www.youtube.com/course',
3532         'only_matching': True,
3533     }, {
3534         'url': 'https://www.youtube.com/zsecurity',
3535         'only_matching': True,
3536     }, {
3537         'url': 'http://www.youtube.com/NASAgovVideo/videos',
3538         'only_matching': True,
3539     }, {
3540         'url': 'https://www.youtube.com/TheYoungTurks/live',
3541         'only_matching': True,
3542     }, {
3543         'url': 'https://www.youtube.com/hashtag/cctv9',
3544         'info_dict': {
3545             'id': 'cctv9',
3546             'title': '#cctv9',
3547         },
3548         'playlist_mincount': 350,
3549     }, {
3550         'url': 'https://www.youtube.com/watch?list=PLW4dVinRY435CBE_JD3t-0SRXKfnZHS1P&feature=youtu.be&v=M9cJMXmQ_ZU',
3551         'only_matching': True,
3552     }, {
3553         'note': 'Requires Premium: should request additional YTM-info webpage (and have format 141) for videos in playlist',
3554         'url': 'https://music.youtube.com/playlist?list=PLRBp0Fe2GpgmgoscNFLxNyBVSFVdYmFkq',
3555         'only_matching': True
3556     }, {
3557         'note': '/browse/ should redirect to /channel/',
3558         'url': 'https://music.youtube.com/browse/UC1a8OFewdjuLq6KlF8M_8Ng',
3559         'only_matching': True
3560     }, {
3561         'note': 'VLPL, should redirect to playlist?list=PL...',
3562         'url': 'https://music.youtube.com/browse/VLPLRBp0Fe2GpgmgoscNFLxNyBVSFVdYmFkq',
3563         'info_dict': {
3564             'id': 'PLRBp0Fe2GpgmgoscNFLxNyBVSFVdYmFkq',
3565             'uploader': 'NoCopyrightSounds',
3566             'description': 'Providing you with copyright free / safe music for gaming, live streaming, studying and more!',
3567             'uploader_id': 'UC_aEa8K-EOJ3D6gOs7HcyNg',
3568             'title': 'NCS Releases',
3569         },
3570         'playlist_mincount': 166,
3571     }, {
3572         'note': 'Topic, should redirect to playlist?list=UU...',
3573         'url': 'https://music.youtube.com/browse/UC9ALqqC4aIeG5iDs7i90Bfw',
3574         'info_dict': {
3575             'id': 'UU9ALqqC4aIeG5iDs7i90Bfw',
3576             'uploader_id': 'UC9ALqqC4aIeG5iDs7i90Bfw',
3577             'title': 'Uploads from Royalty Free Music - Topic',
3578             'uploader': 'Royalty Free Music - Topic',
3579         },
3580         'expected_warnings': [
3581             'A channel/user page was given',
3582             'The URL does not have a videos tab',
3583         ],
3584         'playlist_mincount': 101,
3585     }, {
3586         'note': 'Topic without a UU playlist',
3587         'url': 'https://www.youtube.com/channel/UCtFRv9O2AHqOZjjynzrv-xg',
3588         'info_dict': {
3589             'id': 'UCtFRv9O2AHqOZjjynzrv-xg',
3590             'title': 'UCtFRv9O2AHqOZjjynzrv-xg',
3591         },
3592         'expected_warnings': [
3593             'A channel/user page was given',
3594             'The URL does not have a videos tab',
3595             'Falling back to channel URL',
3596         ],
3597         'playlist_mincount': 9,
3598     }, {
3599         'note': 'Youtube music Album',
3600         'url': 'https://music.youtube.com/browse/MPREb_gTAcphH99wE',
3601         'info_dict': {
3602             'id': 'OLAK5uy_l1m0thk3g31NmIIz_vMIbWtyv7eZixlH0',
3603             'title': 'Album - Royalty Free Music Library V2 (50 Songs)',
3604         },
3605         'playlist_count': 50,
3606     }, {
3607         'note': 'unlisted single video playlist',
3608         'url': 'https://www.youtube.com/playlist?list=PLwL24UFy54GrB3s2KMMfjZscDi1x5Dajf',
3609         'info_dict': {
3610             'uploader_id': 'UC9zHu_mHU96r19o-wV5Qs1Q',
3611             'uploader': 'colethedj',
3612             'id': 'PLwL24UFy54GrB3s2KMMfjZscDi1x5Dajf',
3613             'title': 'yt-dlp unlisted playlist test',
3614             'availability': 'unlisted'
3615         },
3616         'playlist_count': 1,
3617     }]
3618
3619     @classmethod
3620     def suitable(cls, url):
3621         return False if YoutubeIE.suitable(url) else super(
3622             YoutubeTabIE, cls).suitable(url)
3623
3624     def _extract_channel_id(self, webpage):
3625         channel_id = self._html_search_meta(
3626             'channelId', webpage, 'channel id', default=None)
3627         if channel_id:
3628             return channel_id
3629         channel_url = self._html_search_meta(
3630             ('og:url', 'al:ios:url', 'al:android:url', 'al:web:url',
3631              'twitter:url', 'twitter:app:url:iphone', 'twitter:app:url:ipad',
3632              'twitter:app:url:googleplay'), webpage, 'channel url')
3633         return self._search_regex(
3634             r'https?://(?:www\.)?youtube\.com/channel/([^/?#&])+',
3635             channel_url, 'channel id')
3636
3637     @staticmethod
3638     def _extract_basic_item_renderer(item):
3639         # Modified from _extract_grid_item_renderer
3640         known_basic_renderers = (
3641             'playlistRenderer', 'videoRenderer', 'channelRenderer', 'showRenderer'
3642         )
3643         for key, renderer in item.items():
3644             if not isinstance(renderer, dict):
3645                 continue
3646             elif key in known_basic_renderers:
3647                 return renderer
3648             elif key.startswith('grid') and key.endswith('Renderer'):
3649                 return renderer
3650
3651     def _grid_entries(self, grid_renderer):
3652         for item in grid_renderer['items']:
3653             if not isinstance(item, dict):
3654                 continue
3655             renderer = self._extract_basic_item_renderer(item)
3656             if not isinstance(renderer, dict):
3657                 continue
3658             title = self._get_text(renderer, 'title')
3659
3660             # playlist
3661             playlist_id = renderer.get('playlistId')
3662             if playlist_id:
3663                 yield self.url_result(
3664                     'https://www.youtube.com/playlist?list=%s' % playlist_id,
3665                     ie=YoutubeTabIE.ie_key(), video_id=playlist_id,
3666                     video_title=title)
3667                 continue
3668             # video
3669             video_id = renderer.get('videoId')
3670             if video_id:
3671                 yield self._extract_video(renderer)
3672                 continue
3673             # channel
3674             channel_id = renderer.get('channelId')
3675             if channel_id:
3676                 yield self.url_result(
3677                     'https://www.youtube.com/channel/%s' % channel_id,
3678                     ie=YoutubeTabIE.ie_key(), video_title=title)
3679                 continue
3680             # generic endpoint URL support
3681             ep_url = urljoin('https://www.youtube.com/', try_get(
3682                 renderer, lambda x: x['navigationEndpoint']['commandMetadata']['webCommandMetadata']['url'],
3683                 compat_str))
3684             if ep_url:
3685                 for ie in (YoutubeTabIE, YoutubePlaylistIE, YoutubeIE):
3686                     if ie.suitable(ep_url):
3687                         yield self.url_result(
3688                             ep_url, ie=ie.ie_key(), video_id=ie._match_id(ep_url), video_title=title)
3689                         break
3690
3691     def _shelf_entries_from_content(self, shelf_renderer):
3692         content = shelf_renderer.get('content')
3693         if not isinstance(content, dict):
3694             return
3695         renderer = content.get('gridRenderer') or content.get('expandedShelfContentsRenderer')
3696         if renderer:
3697             # TODO: add support for nested playlists so each shelf is processed
3698             # as separate playlist
3699             # TODO: this includes only first N items
3700             for entry in self._grid_entries(renderer):
3701                 yield entry
3702         renderer = content.get('horizontalListRenderer')
3703         if renderer:
3704             # TODO
3705             pass
3706
3707     def _shelf_entries(self, shelf_renderer, skip_channels=False):
3708         ep = try_get(
3709             shelf_renderer, lambda x: x['endpoint']['commandMetadata']['webCommandMetadata']['url'],
3710             compat_str)
3711         shelf_url = urljoin('https://www.youtube.com', ep)
3712         if shelf_url:
3713             # Skipping links to another channels, note that checking for
3714             # endpoint.commandMetadata.webCommandMetadata.webPageTypwebPageType == WEB_PAGE_TYPE_CHANNEL
3715             # will not work
3716             if skip_channels and '/channels?' in shelf_url:
3717                 return
3718             title = self._get_text(shelf_renderer, 'title')
3719             yield self.url_result(shelf_url, video_title=title)
3720         # Shelf may not contain shelf URL, fallback to extraction from content
3721         for entry in self._shelf_entries_from_content(shelf_renderer):
3722             yield entry
3723
3724     def _playlist_entries(self, video_list_renderer):
3725         for content in video_list_renderer['contents']:
3726             if not isinstance(content, dict):
3727                 continue
3728             renderer = content.get('playlistVideoRenderer') or content.get('playlistPanelVideoRenderer')
3729             if not isinstance(renderer, dict):
3730                 continue
3731             video_id = renderer.get('videoId')
3732             if not video_id:
3733                 continue
3734             yield self._extract_video(renderer)
3735
3736     def _rich_entries(self, rich_grid_renderer):
3737         renderer = try_get(
3738             rich_grid_renderer, lambda x: x['content']['videoRenderer'], dict) or {}
3739         video_id = renderer.get('videoId')
3740         if not video_id:
3741             return
3742         yield self._extract_video(renderer)
3743
3744     def _video_entry(self, video_renderer):
3745         video_id = video_renderer.get('videoId')
3746         if video_id:
3747             return self._extract_video(video_renderer)
3748
3749     def _post_thread_entries(self, post_thread_renderer):
3750         post_renderer = try_get(
3751             post_thread_renderer, lambda x: x['post']['backstagePostRenderer'], dict)
3752         if not post_renderer:
3753             return
3754         # video attachment
3755         video_renderer = try_get(
3756             post_renderer, lambda x: x['backstageAttachment']['videoRenderer'], dict) or {}
3757         video_id = video_renderer.get('videoId')
3758         if video_id:
3759             entry = self._extract_video(video_renderer)
3760             if entry:
3761                 yield entry
3762         # playlist attachment
3763         playlist_id = try_get(
3764             post_renderer, lambda x: x['backstageAttachment']['playlistRenderer']['playlistId'], compat_str)
3765         if playlist_id:
3766             yield self.url_result(
3767                 'https://www.youtube.com/playlist?list=%s' % playlist_id,
3768                 ie=YoutubeTabIE.ie_key(), video_id=playlist_id)
3769         # inline video links
3770         runs = try_get(post_renderer, lambda x: x['contentText']['runs'], list) or []
3771         for run in runs:
3772             if not isinstance(run, dict):
3773                 continue
3774             ep_url = try_get(
3775                 run, lambda x: x['navigationEndpoint']['urlEndpoint']['url'], compat_str)
3776             if not ep_url:
3777                 continue
3778             if not YoutubeIE.suitable(ep_url):
3779                 continue
3780             ep_video_id = YoutubeIE._match_id(ep_url)
3781             if video_id == ep_video_id:
3782                 continue
3783             yield self.url_result(ep_url, ie=YoutubeIE.ie_key(), video_id=ep_video_id)
3784
3785     def _post_thread_continuation_entries(self, post_thread_continuation):
3786         contents = post_thread_continuation.get('contents')
3787         if not isinstance(contents, list):
3788             return
3789         for content in contents:
3790             renderer = content.get('backstagePostThreadRenderer')
3791             if not isinstance(renderer, dict):
3792                 continue
3793             for entry in self._post_thread_entries(renderer):
3794                 yield entry
3795
3796     r''' # unused
3797     def _rich_grid_entries(self, contents):
3798         for content in contents:
3799             video_renderer = try_get(content, lambda x: x['richItemRenderer']['content']['videoRenderer'], dict)
3800             if video_renderer:
3801                 entry = self._video_entry(video_renderer)
3802                 if entry:
3803                     yield entry
3804     '''
3805     def _entries(self, tab, item_id, identity_token, account_syncid, ytcfg):
3806
3807         def extract_entries(parent_renderer):  # this needs to called again for continuation to work with feeds
3808             contents = try_get(parent_renderer, lambda x: x['contents'], list) or []
3809             for content in contents:
3810                 if not isinstance(content, dict):
3811                     continue
3812                 is_renderer = try_get(content, lambda x: x['itemSectionRenderer'], dict)
3813                 if not is_renderer:
3814                     renderer = content.get('richItemRenderer')
3815                     if renderer:
3816                         for entry in self._rich_entries(renderer):
3817                             yield entry
3818                         continuation_list[0] = self._extract_continuation(parent_renderer)
3819                     continue
3820                 isr_contents = try_get(is_renderer, lambda x: x['contents'], list) or []
3821                 for isr_content in isr_contents:
3822                     if not isinstance(isr_content, dict):
3823                         continue
3824
3825                     known_renderers = {
3826                         'playlistVideoListRenderer': self._playlist_entries,
3827                         'gridRenderer': self._grid_entries,
3828                         'shelfRenderer': lambda x: self._shelf_entries(x, tab.get('title') != 'Channels'),
3829                         'backstagePostThreadRenderer': self._post_thread_entries,
3830                         'videoRenderer': lambda x: [self._video_entry(x)],
3831                     }
3832                     for key, renderer in isr_content.items():
3833                         if key not in known_renderers:
3834                             continue
3835                         for entry in known_renderers[key](renderer):
3836                             if entry:
3837                                 yield entry
3838                         continuation_list[0] = self._extract_continuation(renderer)
3839                         break
3840
3841                 if not continuation_list[0]:
3842                     continuation_list[0] = self._extract_continuation(is_renderer)
3843
3844             if not continuation_list[0]:
3845                 continuation_list[0] = self._extract_continuation(parent_renderer)
3846
3847         continuation_list = [None]  # Python 2 doesnot support nonlocal
3848         tab_content = try_get(tab, lambda x: x['content'], dict)
3849         if not tab_content:
3850             return
3851         parent_renderer = (
3852             try_get(tab_content, lambda x: x['sectionListRenderer'], dict)
3853             or try_get(tab_content, lambda x: x['richGridRenderer'], dict) or {})
3854         for entry in extract_entries(parent_renderer):
3855             yield entry
3856         continuation = continuation_list[0]
3857         visitor_data = None
3858
3859         for page_num in itertools.count(1):
3860             if not continuation:
3861                 break
3862             headers = self.generate_api_headers(ytcfg, identity_token, account_syncid, visitor_data)
3863             response = self._extract_response(
3864                 item_id='%s page %s' % (item_id, page_num),
3865                 query=continuation, headers=headers, ytcfg=ytcfg,
3866                 check_get_keys=('continuationContents', 'onResponseReceivedActions', 'onResponseReceivedEndpoints'))
3867
3868             if not response:
3869                 break
3870             visitor_data = try_get(
3871                 response, lambda x: x['responseContext']['visitorData'], compat_str) or visitor_data
3872
3873             known_continuation_renderers = {
3874                 'playlistVideoListContinuation': self._playlist_entries,
3875                 'gridContinuation': self._grid_entries,
3876                 'itemSectionContinuation': self._post_thread_continuation_entries,
3877                 'sectionListContinuation': extract_entries,  # for feeds
3878             }
3879             continuation_contents = try_get(
3880                 response, lambda x: x['continuationContents'], dict) or {}
3881             continuation_renderer = None
3882             for key, value in continuation_contents.items():
3883                 if key not in known_continuation_renderers:
3884                     continue
3885                 continuation_renderer = value
3886                 continuation_list = [None]
3887                 for entry in known_continuation_renderers[key](continuation_renderer):
3888                     yield entry
3889                 continuation = continuation_list[0] or self._extract_continuation(continuation_renderer)
3890                 break
3891             if continuation_renderer:
3892                 continue
3893
3894             known_renderers = {
3895                 'gridPlaylistRenderer': (self._grid_entries, 'items'),
3896                 'gridVideoRenderer': (self._grid_entries, 'items'),
3897                 'gridChannelRenderer': (self._grid_entries, 'items'),
3898                 'playlistVideoRenderer': (self._playlist_entries, 'contents'),
3899                 'itemSectionRenderer': (extract_entries, 'contents'),  # for feeds
3900                 'richItemRenderer': (extract_entries, 'contents'),  # for hashtag
3901                 'backstagePostThreadRenderer': (self._post_thread_continuation_entries, 'contents')
3902             }
3903             on_response_received = dict_get(response, ('onResponseReceivedActions', 'onResponseReceivedEndpoints'))
3904             continuation_items = try_get(
3905                 on_response_received, lambda x: x[0]['appendContinuationItemsAction']['continuationItems'], list)
3906             continuation_item = try_get(continuation_items, lambda x: x[0], dict) or {}
3907             video_items_renderer = None
3908             for key, value in continuation_item.items():
3909                 if key not in known_renderers:
3910                     continue
3911                 video_items_renderer = {known_renderers[key][1]: continuation_items}
3912                 continuation_list = [None]
3913                 for entry in known_renderers[key][0](video_items_renderer):
3914                     yield entry
3915                 continuation = continuation_list[0] or self._extract_continuation(video_items_renderer)
3916                 break
3917             if video_items_renderer:
3918                 continue
3919             break
3920
3921     @staticmethod
3922     def _extract_selected_tab(tabs):
3923         for tab in tabs:
3924             renderer = dict_get(tab, ('tabRenderer', 'expandableTabRenderer')) or {}
3925             if renderer.get('selected') is True:
3926                 return renderer
3927         else:
3928             raise ExtractorError('Unable to find selected tab')
3929
3930     @classmethod
3931     def _extract_uploader(cls, data):
3932         uploader = {}
3933         renderer = cls._extract_sidebar_info_renderer(data, 'playlistSidebarSecondaryInfoRenderer') or {}
3934         owner = try_get(
3935             renderer, lambda x: x['videoOwner']['videoOwnerRenderer']['title']['runs'][0], dict)
3936         if owner:
3937             uploader['uploader'] = owner.get('text')
3938             uploader['uploader_id'] = try_get(
3939                 owner, lambda x: x['navigationEndpoint']['browseEndpoint']['browseId'], compat_str)
3940             uploader['uploader_url'] = urljoin(
3941                 'https://www.youtube.com/',
3942                 try_get(owner, lambda x: x['navigationEndpoint']['browseEndpoint']['canonicalBaseUrl'], compat_str))
3943         return {k: v for k, v in uploader.items() if v is not None}
3944
3945     def _extract_from_tabs(self, item_id, webpage, data, tabs):
3946         playlist_id = title = description = channel_url = channel_name = channel_id = None
3947         thumbnails_list = tags = []
3948
3949         selected_tab = self._extract_selected_tab(tabs)
3950         renderer = try_get(
3951             data, lambda x: x['metadata']['channelMetadataRenderer'], dict)
3952         if renderer:
3953             channel_name = renderer.get('title')
3954             channel_url = renderer.get('channelUrl')
3955             channel_id = renderer.get('externalId')
3956         else:
3957             renderer = try_get(
3958                 data, lambda x: x['metadata']['playlistMetadataRenderer'], dict)
3959
3960         if renderer:
3961             title = renderer.get('title')
3962             description = renderer.get('description', '')
3963             playlist_id = channel_id
3964             tags = renderer.get('keywords', '').split()
3965             thumbnails_list = (
3966                 try_get(renderer, lambda x: x['avatar']['thumbnails'], list)
3967                 or try_get(
3968                     self._extract_sidebar_info_renderer(data, 'playlistSidebarPrimaryInfoRenderer'),
3969                     lambda x: x['thumbnailRenderer']['playlistVideoThumbnailRenderer']['thumbnail']['thumbnails'],
3970                     list)
3971                 or [])
3972
3973         thumbnails = []
3974         for t in thumbnails_list:
3975             if not isinstance(t, dict):
3976                 continue
3977             thumbnail_url = url_or_none(t.get('url'))
3978             if not thumbnail_url:
3979                 continue
3980             thumbnails.append({
3981                 'url': thumbnail_url,
3982                 'width': int_or_none(t.get('width')),
3983                 'height': int_or_none(t.get('height')),
3984             })
3985         if playlist_id is None:
3986             playlist_id = item_id
3987         if title is None:
3988             title = (
3989                 try_get(data, lambda x: x['header']['hashtagHeaderRenderer']['hashtag']['simpleText'])
3990                 or playlist_id)
3991         title += format_field(selected_tab, 'title', ' - %s')
3992         title += format_field(selected_tab, 'expandedText', ' - %s')
3993         metadata = {
3994             'playlist_id': playlist_id,
3995             'playlist_title': title,
3996             'playlist_description': description,
3997             'uploader': channel_name,
3998             'uploader_id': channel_id,
3999             'uploader_url': channel_url,
4000             'thumbnails': thumbnails,
4001             'tags': tags,
4002         }
4003         availability = self._extract_availability(data)
4004         if availability:
4005             metadata['availability'] = availability
4006         if not channel_id:
4007             metadata.update(self._extract_uploader(data))
4008         metadata.update({
4009             'channel': metadata['uploader'],
4010             'channel_id': metadata['uploader_id'],
4011             'channel_url': metadata['uploader_url']})
4012         ytcfg = self.extract_ytcfg(item_id, webpage)
4013         return self.playlist_result(
4014             self._entries(
4015                 selected_tab, playlist_id,
4016                 self._extract_identity_token(webpage, item_id),
4017                 self._extract_account_syncid(ytcfg, data), ytcfg),
4018             **metadata)
4019
4020     def _extract_mix_playlist(self, playlist, playlist_id, data, webpage):
4021         first_id = last_id = None
4022         ytcfg = self.extract_ytcfg(playlist_id, webpage)
4023         headers = self.generate_api_headers(
4024             ytcfg, account_syncid=self._extract_account_syncid(ytcfg, data),
4025             identity_token=self._extract_identity_token(webpage, item_id=playlist_id))
4026         for page_num in itertools.count(1):
4027             videos = list(self._playlist_entries(playlist))
4028             if not videos:
4029                 return
4030             start = next((i for i, v in enumerate(videos) if v['id'] == last_id), -1) + 1
4031             if start >= len(videos):
4032                 return
4033             for video in videos[start:]:
4034                 if video['id'] == first_id:
4035                     self.to_screen('First video %s found again; Assuming end of Mix' % first_id)
4036                     return
4037                 yield video
4038             first_id = first_id or videos[0]['id']
4039             last_id = videos[-1]['id']
4040             watch_endpoint = try_get(
4041                 playlist, lambda x: x['contents'][-1]['playlistPanelVideoRenderer']['navigationEndpoint']['watchEndpoint'])
4042             query = {
4043                 'playlistId': playlist_id,
4044                 'videoId': watch_endpoint.get('videoId') or last_id,
4045                 'index': watch_endpoint.get('index') or len(videos),
4046                 'params': watch_endpoint.get('params') or 'OAE%3D'
4047             }
4048             response = self._extract_response(
4049                 item_id='%s page %d' % (playlist_id, page_num),
4050                 query=query, ep='next', headers=headers, ytcfg=ytcfg,
4051                 check_get_keys='contents'
4052             )
4053             playlist = try_get(
4054                 response, lambda x: x['contents']['twoColumnWatchNextResults']['playlist']['playlist'], dict)
4055
4056     def _extract_from_playlist(self, item_id, url, data, playlist, webpage):
4057         title = playlist.get('title') or try_get(
4058             data, lambda x: x['titleText']['simpleText'], compat_str)
4059         playlist_id = playlist.get('playlistId') or item_id
4060
4061         # Delegating everything except mix playlists to regular tab-based playlist URL
4062         playlist_url = urljoin(url, try_get(
4063             playlist, lambda x: x['endpoint']['commandMetadata']['webCommandMetadata']['url'],
4064             compat_str))
4065         if playlist_url and playlist_url != url:
4066             return self.url_result(
4067                 playlist_url, ie=YoutubeTabIE.ie_key(), video_id=playlist_id,
4068                 video_title=title)
4069
4070         return self.playlist_result(
4071             self._extract_mix_playlist(playlist, playlist_id, data, webpage),
4072             playlist_id=playlist_id, playlist_title=title)
4073
4074     def _extract_availability(self, data):
4075         """
4076         Gets the availability of a given playlist/tab.
4077         Note: Unless YouTube tells us explicitly, we do not assume it is public
4078         @param data: response
4079         """
4080         is_private = is_unlisted = None
4081         renderer = self._extract_sidebar_info_renderer(data, 'playlistSidebarPrimaryInfoRenderer') or {}
4082         badge_labels = self._extract_badges(renderer)
4083
4084         # Personal playlists, when authenticated, have a dropdown visibility selector instead of a badge
4085         privacy_dropdown_entries = try_get(
4086             renderer, lambda x: x['privacyForm']['dropdownFormFieldRenderer']['dropdown']['dropdownRenderer']['entries'], list) or []
4087         for renderer_dict in privacy_dropdown_entries:
4088             is_selected = try_get(
4089                 renderer_dict, lambda x: x['privacyDropdownItemRenderer']['isSelected'], bool) or False
4090             if not is_selected:
4091                 continue
4092             label = self._get_text(renderer_dict, ('privacyDropdownItemRenderer', 'label'))
4093             if label:
4094                 badge_labels.add(label.lower())
4095                 break
4096
4097         for badge_label in badge_labels:
4098             if badge_label == 'unlisted':
4099                 is_unlisted = True
4100             elif badge_label == 'private':
4101                 is_private = True
4102             elif badge_label == 'public':
4103                 is_unlisted = is_private = False
4104         return self._availability(is_private, False, False, False, is_unlisted)
4105
4106     @staticmethod
4107     def _extract_sidebar_info_renderer(data, info_renderer, expected_type=dict):
4108         sidebar_renderer = try_get(
4109             data, lambda x: x['sidebar']['playlistSidebarRenderer']['items'], list) or []
4110         for item in sidebar_renderer:
4111             renderer = try_get(item, lambda x: x[info_renderer], expected_type)
4112             if renderer:
4113                 return renderer
4114
4115     def _reload_with_unavailable_videos(self, item_id, data, webpage):
4116         """
4117         Get playlist with unavailable videos if the 'show unavailable videos' button exists.
4118         """
4119         browse_id = params = None
4120         renderer = self._extract_sidebar_info_renderer(data, 'playlistSidebarPrimaryInfoRenderer')
4121         if not renderer:
4122             return
4123         menu_renderer = try_get(
4124             renderer, lambda x: x['menu']['menuRenderer']['items'], list) or []
4125         for menu_item in menu_renderer:
4126             if not isinstance(menu_item, dict):
4127                 continue
4128             nav_item_renderer = menu_item.get('menuNavigationItemRenderer')
4129             text = try_get(
4130                 nav_item_renderer, lambda x: x['text']['simpleText'], compat_str)
4131             if not text or text.lower() != 'show unavailable videos':
4132                 continue
4133             browse_endpoint = try_get(
4134                 nav_item_renderer, lambda x: x['navigationEndpoint']['browseEndpoint'], dict) or {}
4135             browse_id = browse_endpoint.get('browseId')
4136             params = browse_endpoint.get('params')
4137             break
4138
4139         ytcfg = self.extract_ytcfg(item_id, webpage)
4140         headers = self.generate_api_headers(
4141             ytcfg, account_syncid=self._extract_account_syncid(ytcfg, data),
4142             identity_token=self._extract_identity_token(webpage, item_id=item_id),
4143             visitor_data=try_get(
4144                 self._extract_context(ytcfg), lambda x: x['client']['visitorData'], compat_str))
4145         query = {
4146             'params': params or 'wgYCCAA=',
4147             'browseId': browse_id or 'VL%s' % item_id
4148         }
4149         return self._extract_response(
4150             item_id=item_id, headers=headers, query=query,
4151             check_get_keys='contents', fatal=False, ytcfg=ytcfg,
4152             note='Downloading API JSON with unavailable videos')
4153
4154     def _extract_webpage(self, url, item_id):
4155         retries = self.get_param('extractor_retries', 3)
4156         count = -1
4157         last_error = 'Incomplete yt initial data recieved'
4158         while count < retries:
4159             count += 1
4160             # Sometimes youtube returns a webpage with incomplete ytInitialData
4161             # See: https://github.com/yt-dlp/yt-dlp/issues/116
4162             if count:
4163                 self.report_warning('%s. Retrying ...' % last_error)
4164             webpage = self._download_webpage(
4165                 url, item_id,
4166                 'Downloading webpage%s' % (' (retry #%d)' % count if count else ''))
4167             data = self.extract_yt_initial_data(item_id, webpage)
4168             if data.get('contents') or data.get('currentVideoEndpoint'):
4169                 break
4170             # Extract alerts here only when there is error
4171             self._extract_and_report_alerts(data)
4172             if count >= retries:
4173                 raise ExtractorError(last_error)
4174         return webpage, data
4175
4176     @staticmethod
4177     def _smuggle_data(entries, data):
4178         for entry in entries:
4179             if data:
4180                 entry['url'] = smuggle_url(entry['url'], data)
4181             yield entry
4182
4183     def _real_extract(self, url):
4184         url, smuggled_data = unsmuggle_url(url, {})
4185         if self.is_music_url(url):
4186             smuggled_data['is_music_url'] = True
4187         info_dict = self.__real_extract(url, smuggled_data)
4188         if info_dict.get('entries'):
4189             info_dict['entries'] = self._smuggle_data(info_dict['entries'], smuggled_data)
4190         return info_dict
4191
4192     _url_re = re.compile(r'(?P<pre>%s)(?(channel_type)(?P<tab>/\w+))?(?P<post>.*)$' % _VALID_URL)
4193
4194     def __real_extract(self, url, smuggled_data):
4195         item_id = self._match_id(url)
4196         url = compat_urlparse.urlunparse(
4197             compat_urlparse.urlparse(url)._replace(netloc='www.youtube.com'))
4198         compat_opts = self.get_param('compat_opts', [])
4199
4200         def get_mobj(url):
4201             mobj = self._url_re.match(url).groupdict()
4202             mobj.update((k, '') for k, v in mobj.items() if v is None)
4203             return mobj
4204
4205         mobj = get_mobj(url)
4206         # Youtube returns incomplete data if tabname is not lower case
4207         pre, tab, post, is_channel = mobj['pre'], mobj['tab'].lower(), mobj['post'], not mobj['not_channel']
4208
4209         if is_channel:
4210             if smuggled_data.get('is_music_url'):
4211                 if item_id[:2] == 'VL':
4212                     # Youtube music VL channels have an equivalent playlist
4213                     item_id = item_id[2:]
4214                     pre, tab, post, is_channel = 'https://www.youtube.com/playlist?list=%s' % item_id, '', '', False
4215                 elif item_id[:2] == 'MP':
4216                     # Youtube music albums (/channel/MP...) have a OLAK playlist that can be extracted from the webpage
4217                     item_id = self._search_regex(
4218                         r'\\x22audioPlaylistId\\x22:\\x22([0-9A-Za-z_-]+)\\x22',
4219                         self._download_webpage('https://music.youtube.com/channel/%s' % item_id, item_id),
4220                         'playlist id')
4221                     pre, tab, post, is_channel = 'https://www.youtube.com/playlist?list=%s' % item_id, '', '', False
4222                 elif mobj['channel_type'] == 'browse':
4223                     # Youtube music /browse/ should be changed to /channel/
4224                     pre = 'https://www.youtube.com/channel/%s' % item_id
4225         if is_channel and not tab and 'no-youtube-channel-redirect' not in compat_opts:
4226             # Home URLs should redirect to /videos/
4227             self.report_warning(
4228                 'A channel/user page was given. All the channel\'s videos will be downloaded. '
4229                 'To download only the videos in the home page, add a "/featured" to the URL')
4230             tab = '/videos'
4231
4232         url = ''.join((pre, tab, post))
4233         mobj = get_mobj(url)
4234
4235         # Handle both video/playlist URLs
4236         qs = parse_qs(url)
4237         video_id = qs.get('v', [None])[0]
4238         playlist_id = qs.get('list', [None])[0]
4239
4240         if not video_id and mobj['not_channel'].startswith('watch'):
4241             if not playlist_id:
4242                 # If there is neither video or playlist ids, youtube redirects to home page, which is undesirable
4243                 raise ExtractorError('Unable to recognize tab page')
4244             # Common mistake: https://www.youtube.com/watch?list=playlist_id
4245             self.report_warning('A video URL was given without video ID. Trying to download playlist %s' % playlist_id)
4246             url = 'https://www.youtube.com/playlist?list=%s' % playlist_id
4247             mobj = get_mobj(url)
4248
4249         if video_id and playlist_id:
4250             if self.get_param('noplaylist'):
4251                 self.to_screen('Downloading just video %s because of --no-playlist' % video_id)
4252                 return self.url_result(video_id, ie=YoutubeIE.ie_key(), video_id=video_id)
4253             self.to_screen('Downloading playlist %s; add --no-playlist to just download video %s' % (playlist_id, video_id))
4254
4255         webpage, data = self._extract_webpage(url, item_id)
4256
4257         tabs = try_get(
4258             data, lambda x: x['contents']['twoColumnBrowseResultsRenderer']['tabs'], list)
4259         if tabs:
4260             selected_tab = self._extract_selected_tab(tabs)
4261             tab_name = selected_tab.get('title', '')
4262             if 'no-youtube-channel-redirect' not in compat_opts:
4263                 if mobj['tab'] == '/live':
4264                     # Live tab should have redirected to the video
4265                     raise ExtractorError('The channel is not currently live', expected=True)
4266                 if mobj['tab'] == '/videos' and tab_name.lower() != mobj['tab'][1:]:
4267                     if not mobj['not_channel'] and item_id[:2] == 'UC':
4268                         # Topic channels don't have /videos. Use the equivalent playlist instead
4269                         self.report_warning('The URL does not have a %s tab. Trying to redirect to playlist UU%s instead' % (mobj['tab'][1:], item_id[2:]))
4270                         pl_id = 'UU%s' % item_id[2:]
4271                         pl_url = 'https://www.youtube.com/playlist?list=%s%s' % (pl_id, mobj['post'])
4272                         try:
4273                             pl_webpage, pl_data = self._extract_webpage(pl_url, pl_id)
4274                             for alert_type, alert_message in self._extract_alerts(pl_data):
4275                                 if alert_type == 'error':
4276                                     raise ExtractorError('Youtube said: %s' % alert_message)
4277                             item_id, url, webpage, data = pl_id, pl_url, pl_webpage, pl_data
4278                         except ExtractorError:
4279                             self.report_warning('The playlist gave error. Falling back to channel URL')
4280                     else:
4281                         self.report_warning('The URL does not have a %s tab. %s is being downloaded instead' % (mobj['tab'][1:], tab_name))
4282
4283         self.write_debug('Final URL: %s' % url)
4284
4285         # YouTube sometimes provides a button to reload playlist with unavailable videos.
4286         if 'no-youtube-unavailable-videos' not in compat_opts:
4287             data = self._reload_with_unavailable_videos(item_id, data, webpage) or data
4288         self._extract_and_report_alerts(data)
4289         tabs = try_get(
4290             data, lambda x: x['contents']['twoColumnBrowseResultsRenderer']['tabs'], list)
4291         if tabs:
4292             return self._extract_from_tabs(item_id, webpage, data, tabs)
4293
4294         playlist = try_get(
4295             data, lambda x: x['contents']['twoColumnWatchNextResults']['playlist']['playlist'], dict)
4296         if playlist:
4297             return self._extract_from_playlist(item_id, url, data, playlist, webpage)
4298
4299         video_id = try_get(
4300             data, lambda x: x['currentVideoEndpoint']['watchEndpoint']['videoId'],
4301             compat_str) or video_id
4302         if video_id:
4303             if mobj['tab'] != '/live':  # live tab is expected to redirect to video
4304                 self.report_warning('Unable to recognize playlist. Downloading just video %s' % video_id)
4305             return self.url_result(video_id, ie=YoutubeIE.ie_key(), video_id=video_id)
4306
4307         raise ExtractorError('Unable to recognize tab page')
4308
4309
4310 class YoutubePlaylistIE(InfoExtractor):
4311     IE_DESC = 'YouTube.com playlists'
4312     _VALID_URL = r'''(?x)(?:
4313                         (?:https?://)?
4314                         (?:\w+\.)?
4315                         (?:
4316                             (?:
4317                                 youtube(?:kids)?\.com|
4318                                 invidio\.us
4319                             )
4320                             /.*?\?.*?\blist=
4321                         )?
4322                         (?P<id>%(playlist_id)s)
4323                      )''' % {'playlist_id': YoutubeBaseInfoExtractor._PLAYLIST_ID_RE}
4324     IE_NAME = 'youtube:playlist'
4325     _TESTS = [{
4326         'note': 'issue #673',
4327         'url': 'PLBB231211A4F62143',
4328         'info_dict': {
4329             'title': '[OLD]Team Fortress 2 (Class-based LP)',
4330             'id': 'PLBB231211A4F62143',
4331             'uploader': 'Wickydoo',
4332             'uploader_id': 'UCKSpbfbl5kRQpTdL7kMc-1Q',
4333             'description': 'md5:8fa6f52abb47a9552002fa3ddfc57fc2',
4334         },
4335         'playlist_mincount': 29,
4336     }, {
4337         'url': 'PLtPgu7CB4gbY9oDN3drwC3cMbJggS7dKl',
4338         'info_dict': {
4339             'title': 'YDL_safe_search',
4340             'id': 'PLtPgu7CB4gbY9oDN3drwC3cMbJggS7dKl',
4341         },
4342         'playlist_count': 2,
4343         'skip': 'This playlist is private',
4344     }, {
4345         'note': 'embedded',
4346         'url': 'https://www.youtube.com/embed/videoseries?list=PL6IaIsEjSbf96XFRuNccS_RuEXwNdsoEu',
4347         'playlist_count': 4,
4348         'info_dict': {
4349             'title': 'JODA15',
4350             'id': 'PL6IaIsEjSbf96XFRuNccS_RuEXwNdsoEu',
4351             'uploader': 'milan',
4352             'uploader_id': 'UCEI1-PVPcYXjB73Hfelbmaw',
4353         }
4354     }, {
4355         'url': 'http://www.youtube.com/embed/_xDOZElKyNU?list=PLsyOSbh5bs16vubvKePAQ1x3PhKavfBIl',
4356         'playlist_mincount': 654,
4357         'info_dict': {
4358             'title': '2018 Chinese New Singles (11/6 updated)',
4359             'id': 'PLsyOSbh5bs16vubvKePAQ1x3PhKavfBIl',
4360             'uploader': 'LBK',
4361             'uploader_id': 'UC21nz3_MesPLqtDqwdvnoxA',
4362             'description': 'md5:da521864744d60a198e3a88af4db0d9d',
4363         }
4364     }, {
4365         'url': 'TLGGrESM50VT6acwMjAyMjAxNw',
4366         'only_matching': True,
4367     }, {
4368         # music album playlist
4369         'url': 'OLAK5uy_m4xAFdmMC5rX3Ji3g93pQe3hqLZw_9LhM',
4370         'only_matching': True,
4371     }]
4372
4373     @classmethod
4374     def suitable(cls, url):
4375         if YoutubeTabIE.suitable(url):
4376             return False
4377         # Hack for lazy extractors until more generic solution is implemented
4378         # (see #28780)
4379         from .youtube import parse_qs
4380         qs = parse_qs(url)
4381         if qs.get('v', [None])[0]:
4382             return False
4383         return super(YoutubePlaylistIE, cls).suitable(url)
4384
4385     def _real_extract(self, url):
4386         playlist_id = self._match_id(url)
4387         is_music_url = YoutubeBaseInfoExtractor.is_music_url(url)
4388         url = update_url_query(
4389             'https://www.youtube.com/playlist',
4390             parse_qs(url) or {'list': playlist_id})
4391         if is_music_url:
4392             url = smuggle_url(url, {'is_music_url': True})
4393         return self.url_result(url, ie=YoutubeTabIE.ie_key(), video_id=playlist_id)
4394
4395
4396 class YoutubeYtBeIE(InfoExtractor):
4397     IE_DESC = 'youtu.be'
4398     _VALID_URL = r'https?://youtu\.be/(?P<id>[0-9A-Za-z_-]{11})/*?.*?\blist=(?P<playlist_id>%(playlist_id)s)' % {'playlist_id': YoutubeBaseInfoExtractor._PLAYLIST_ID_RE}
4399     _TESTS = [{
4400         'url': 'https://youtu.be/yeWKywCrFtk?list=PL2qgrgXsNUG5ig9cat4ohreBjYLAPC0J5',
4401         'info_dict': {
4402             'id': 'yeWKywCrFtk',
4403             'ext': 'mp4',
4404             'title': 'Small Scale Baler and Braiding Rugs',
4405             'uploader': 'Backus-Page House Museum',
4406             'uploader_id': 'backuspagemuseum',
4407             'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/backuspagemuseum',
4408             'upload_date': '20161008',
4409             'description': 'md5:800c0c78d5eb128500bffd4f0b4f2e8a',
4410             'categories': ['Nonprofits & Activism'],
4411             'tags': list,
4412             'like_count': int,
4413             'dislike_count': int,
4414         },
4415         'params': {
4416             'noplaylist': True,
4417             'skip_download': True,
4418         },
4419     }, {
4420         'url': 'https://youtu.be/uWyaPkt-VOI?list=PL9D9FC436B881BA21',
4421         'only_matching': True,
4422     }]
4423
4424     def _real_extract(self, url):
4425         mobj = re.match(self._VALID_URL, url)
4426         video_id = mobj.group('id')
4427         playlist_id = mobj.group('playlist_id')
4428         return self.url_result(
4429             update_url_query('https://www.youtube.com/watch', {
4430                 'v': video_id,
4431                 'list': playlist_id,
4432                 'feature': 'youtu.be',
4433             }), ie=YoutubeTabIE.ie_key(), video_id=playlist_id)
4434
4435
4436 class YoutubeYtUserIE(InfoExtractor):
4437     IE_DESC = 'YouTube.com user videos, URL or "ytuser" keyword'
4438     _VALID_URL = r'ytuser:(?P<id>.+)'
4439     _TESTS = [{
4440         'url': 'ytuser:phihag',
4441         'only_matching': True,
4442     }]
4443
4444     def _real_extract(self, url):
4445         user_id = self._match_id(url)
4446         return self.url_result(
4447             'https://www.youtube.com/user/%s' % user_id,
4448             ie=YoutubeTabIE.ie_key(), video_id=user_id)
4449
4450
4451 class YoutubeFavouritesIE(YoutubeBaseInfoExtractor):
4452     IE_NAME = 'youtube:favorites'
4453     IE_DESC = 'YouTube.com liked videos, ":ytfav" for short (requires authentication)'
4454     _VALID_URL = r':ytfav(?:ou?rite)?s?'
4455     _LOGIN_REQUIRED = True
4456     _TESTS = [{
4457         'url': ':ytfav',
4458         'only_matching': True,
4459     }, {
4460         'url': ':ytfavorites',
4461         'only_matching': True,
4462     }]
4463
4464     def _real_extract(self, url):
4465         return self.url_result(
4466             'https://www.youtube.com/playlist?list=LL',
4467             ie=YoutubeTabIE.ie_key())
4468
4469
4470 class YoutubeSearchIE(SearchInfoExtractor, YoutubeTabIE):
4471     IE_DESC = 'YouTube.com searches, "ytsearch" keyword'
4472     # there doesn't appear to be a real limit, for example if you search for
4473     # 'python' you get more than 8.000.000 results
4474     _MAX_RESULTS = float('inf')
4475     IE_NAME = 'youtube:search'
4476     _SEARCH_KEY = 'ytsearch'
4477     _SEARCH_PARAMS = None
4478     _TESTS = []
4479
4480     def _entries(self, query, n):
4481         data = {'query': query}
4482         if self._SEARCH_PARAMS:
4483             data['params'] = self._SEARCH_PARAMS
4484         total = 0
4485         continuation = {}
4486         for page_num in itertools.count(1):
4487             data.update(continuation)
4488             search = self._extract_response(
4489                 item_id='query "%s" page %s' % (query, page_num), ep='search', query=data,
4490                 check_get_keys=('contents', 'onResponseReceivedCommands')
4491             )
4492             if not search:
4493                 break
4494             slr_contents = try_get(
4495                 search,
4496                 (lambda x: x['contents']['twoColumnSearchResultsRenderer']['primaryContents']['sectionListRenderer']['contents'],
4497                  lambda x: x['onResponseReceivedCommands'][0]['appendContinuationItemsAction']['continuationItems']),
4498                 list)
4499             if not slr_contents:
4500                 break
4501
4502             # Youtube sometimes adds promoted content to searches,
4503             # changing the index location of videos and token.
4504             # So we search through all entries till we find them.
4505             continuation = None
4506             for slr_content in slr_contents:
4507                 if not continuation:
4508                     continuation = self._extract_continuation({'contents': [slr_content]})
4509
4510                 isr_contents = try_get(
4511                     slr_content,
4512                     lambda x: x['itemSectionRenderer']['contents'],
4513                     list)
4514                 if not isr_contents:
4515                     continue
4516                 for content in isr_contents:
4517                     if not isinstance(content, dict):
4518                         continue
4519                     video = content.get('videoRenderer')
4520                     if not isinstance(video, dict):
4521                         continue
4522                     video_id = video.get('videoId')
4523                     if not video_id:
4524                         continue
4525
4526                     yield self._extract_video(video)
4527                     total += 1
4528                     if total == n:
4529                         return
4530
4531             if not continuation:
4532                 break
4533
4534     def _get_n_results(self, query, n):
4535         """Get a specified number of results for a query"""
4536         return self.playlist_result(self._entries(query, n), query, query)
4537
4538
4539 class YoutubeSearchDateIE(YoutubeSearchIE):
4540     IE_NAME = YoutubeSearchIE.IE_NAME + ':date'
4541     _SEARCH_KEY = 'ytsearchdate'
4542     IE_DESC = 'YouTube.com searches, newest videos first, "ytsearchdate" keyword'
4543     _SEARCH_PARAMS = 'CAI%3D'
4544
4545
4546 class YoutubeSearchURLIE(YoutubeSearchIE):
4547     IE_DESC = 'YouTube.com search URLs'
4548     IE_NAME = YoutubeSearchIE.IE_NAME + '_url'
4549     _VALID_URL = r'https?://(?:www\.)?youtube\.com/results\?(.*?&)?(?:search_query|q)=(?:[^&]+)(?:[&]|$)'
4550     # _MAX_RESULTS = 100
4551     _TESTS = [{
4552         'url': 'https://www.youtube.com/results?baz=bar&search_query=youtube-dl+test+video&filters=video&lclk=video',
4553         'playlist_mincount': 5,
4554         'info_dict': {
4555             'id': 'youtube-dl test video',
4556             'title': 'youtube-dl test video',
4557         }
4558     }, {
4559         'url': 'https://www.youtube.com/results?q=test&sp=EgQIBBgB',
4560         'only_matching': True,
4561     }]
4562
4563     @classmethod
4564     def _make_valid_url(cls):
4565         return cls._VALID_URL
4566
4567     def _real_extract(self, url):
4568         qs = compat_parse_qs(compat_urllib_parse_urlparse(url).query)
4569         query = (qs.get('search_query') or qs.get('q'))[0]
4570         self._SEARCH_PARAMS = qs.get('sp', ('',))[0]
4571         return self._get_n_results(query, self._MAX_RESULTS)
4572
4573
4574 class YoutubeFeedsInfoExtractor(YoutubeTabIE):
4575     """
4576     Base class for feed extractors
4577     Subclasses must define the _FEED_NAME property.
4578     """
4579     _LOGIN_REQUIRED = True
4580     _TESTS = []
4581
4582     @property
4583     def IE_NAME(self):
4584         return 'youtube:%s' % self._FEED_NAME
4585
4586     def _real_extract(self, url):
4587         return self.url_result(
4588             'https://www.youtube.com/feed/%s' % self._FEED_NAME,
4589             ie=YoutubeTabIE.ie_key())
4590
4591
4592 class YoutubeWatchLaterIE(InfoExtractor):
4593     IE_NAME = 'youtube:watchlater'
4594     IE_DESC = 'Youtube watch later list, ":ytwatchlater" for short (requires authentication)'
4595     _VALID_URL = r':ytwatchlater'
4596     _TESTS = [{
4597         'url': ':ytwatchlater',
4598         'only_matching': True,
4599     }]
4600
4601     def _real_extract(self, url):
4602         return self.url_result(
4603             'https://www.youtube.com/playlist?list=WL', ie=YoutubeTabIE.ie_key())
4604
4605
4606 class YoutubeRecommendedIE(YoutubeFeedsInfoExtractor):
4607     IE_DESC = 'YouTube.com recommended videos, ":ytrec" for short (requires authentication)'
4608     _VALID_URL = r'https?://(?:www\.)?youtube\.com/?(?:[?#]|$)|:ytrec(?:ommended)?'
4609     _FEED_NAME = 'recommended'
4610     _LOGIN_REQUIRED = False
4611     _TESTS = [{
4612         'url': ':ytrec',
4613         'only_matching': True,
4614     }, {
4615         'url': ':ytrecommended',
4616         'only_matching': True,
4617     }, {
4618         'url': 'https://youtube.com',
4619         'only_matching': True,
4620     }]
4621
4622
4623 class YoutubeSubscriptionsIE(YoutubeFeedsInfoExtractor):
4624     IE_DESC = 'YouTube.com subscriptions feed, ":ytsubs" for short (requires authentication)'
4625     _VALID_URL = r':ytsub(?:scription)?s?'
4626     _FEED_NAME = 'subscriptions'
4627     _TESTS = [{
4628         'url': ':ytsubs',
4629         'only_matching': True,
4630     }, {
4631         'url': ':ytsubscriptions',
4632         'only_matching': True,
4633     }]
4634
4635
4636 class YoutubeHistoryIE(YoutubeFeedsInfoExtractor):
4637     IE_DESC = 'Youtube watch history, ":ythis" for short (requires authentication)'
4638     _VALID_URL = r':ythis(?:tory)?'
4639     _FEED_NAME = 'history'
4640     _TESTS = [{
4641         'url': ':ythistory',
4642         'only_matching': True,
4643     }]
4644
4645
4646 class YoutubeTruncatedURLIE(InfoExtractor):
4647     IE_NAME = 'youtube:truncated_url'
4648     IE_DESC = False  # Do not list
4649     _VALID_URL = r'''(?x)
4650         (?:https?://)?
4651         (?:\w+\.)?[yY][oO][uU][tT][uU][bB][eE](?:-nocookie)?\.com/
4652         (?:watch\?(?:
4653             feature=[a-z_]+|
4654             annotation_id=annotation_[^&]+|
4655             x-yt-cl=[0-9]+|
4656             hl=[^&]*|
4657             t=[0-9]+
4658         )?
4659         |
4660             attribution_link\?a=[^&]+
4661         )
4662         $
4663     '''
4664
4665     _TESTS = [{
4666         'url': 'https://www.youtube.com/watch?annotation_id=annotation_3951667041',
4667         'only_matching': True,
4668     }, {
4669         'url': 'https://www.youtube.com/watch?',
4670         'only_matching': True,
4671     }, {
4672         'url': 'https://www.youtube.com/watch?x-yt-cl=84503534',
4673         'only_matching': True,
4674     }, {
4675         'url': 'https://www.youtube.com/watch?feature=foo',
4676         'only_matching': True,
4677     }, {
4678         'url': 'https://www.youtube.com/watch?hl=en-GB',
4679         'only_matching': True,
4680     }, {
4681         'url': 'https://www.youtube.com/watch?t=2372',
4682         'only_matching': True,
4683     }]
4684
4685     def _real_extract(self, url):
4686         raise ExtractorError(
4687             'Did you forget to quote the URL? Remember that & is a meta '
4688             'character in most shells, so you want to put the URL in quotes, '
4689             'like  youtube-dl '
4690             '"https://www.youtube.com/watch?feature=foo&v=BaW_jenozKc" '
4691             ' or simply  youtube-dl BaW_jenozKc  .',
4692             expected=True)
4693
4694
4695 class YoutubeTruncatedIDIE(InfoExtractor):
4696     IE_NAME = 'youtube:truncated_id'
4697     IE_DESC = False  # Do not list
4698     _VALID_URL = r'https?://(?:www\.)?youtube\.com/watch\?v=(?P<id>[0-9A-Za-z_-]{1,10})$'
4699
4700     _TESTS = [{
4701         'url': 'https://www.youtube.com/watch?v=N_708QY7Ob',
4702         'only_matching': True,
4703     }]
4704
4705     def _real_extract(self, url):
4706         video_id = self._match_id(url)
4707         raise ExtractorError(
4708             'Incomplete YouTube ID %s. URL %s looks truncated.' % (video_id, url),
4709             expected=True)