yt_dlp/extractor/youtube.py

   1 # coding: utf-8
   2
   3 from __future__ import unicode_literals
   4
   5 import base64
   6 import calendar
   7 import copy
   8 import datetime
   9 import hashlib
  10 import itertools
  11 import json
  12 import os.path
  13 import random
  14 import re
  15 import time
  16 import traceback
  17
  18 from .common import InfoExtractor, SearchInfoExtractor
  19 from ..compat import (
  20     compat_chr,
  21     compat_HTTPError,
  22     compat_parse_qs,
  23     compat_str,
  24     compat_urllib_parse_unquote_plus,
  25     compat_urllib_parse_urlencode,
  26     compat_urllib_parse_urlparse,
  27     compat_urlparse,
  28 )
  29 from ..jsinterp import JSInterpreter
  30 from ..utils import (
  31     bytes_to_intlist,
  32     clean_html,
  33     datetime_from_str,
  34     dict_get,
  35     error_to_compat_str,
  36     ExtractorError,
  37     float_or_none,
  38     format_field,
  39     int_or_none,
  40     intlist_to_bytes,
  41     mimetype2ext,
  42     network_exceptions,
  43     orderedSet,
  44     parse_codecs,
  45     parse_count,
  46     parse_duration,
  47     parse_iso8601,
  48     qualities,
  49     remove_start,
  50     smuggle_url,
  51     str_or_none,
  52     str_to_int,
  53     traverse_obj,
  54     try_get,
  55     unescapeHTML,
  56     unified_strdate,
  57     unsmuggle_url,
  58     update_url_query,
  59     url_or_none,
  60     urlencode_postdata,
  61     urljoin,
  62     variadic,
  63 )
  64
  65
  66 def parse_qs(url):
  67     return compat_urlparse.parse_qs(compat_urlparse.urlparse(url).query)
  68
  69
  70 class YoutubeBaseInfoExtractor(InfoExtractor):
  71     """Provide base functions for Youtube extractors"""
  72     _LOGIN_URL = 'https://accounts.google.com/ServiceLogin'
  73     _TWOFACTOR_URL = 'https://accounts.google.com/signin/challenge'
  74
  75     _LOOKUP_URL = 'https://accounts.google.com/_/signin/sl/lookup'
  76     _CHALLENGE_URL = 'https://accounts.google.com/_/signin/sl/challenge'
  77     _TFA_URL = 'https://accounts.google.com/_/signin/challenge?hl=en&TL={0}'
  78
  79     _RESERVED_NAMES = (
  80         r'channel|c|user|browse|playlist|watch|w|v|embed|e|watch_popup|shorts|'
  81         r'movies|results|shared|hashtag|trending|feed|feeds|oembed|get_video_info|'
  82         r'storefront|oops|index|account|reporthistory|t/terms|about|upload|signin|logout')
  83
  84     _NETRC_MACHINE = 'youtube'
  85     # If True it will raise an error if no login info is provided
  86     _LOGIN_REQUIRED = False
  87
  88     _PLAYLIST_ID_RE = r'(?:(?:PL|LL|EC|UU|FL|RD|UL|TL|PU|OLAK5uy_)[0-9A-Za-z-_]{10,}|RDMM|WL|LL|LM)'
  89
  90     def _login(self):
  91         """
  92         Attempt to log in to YouTube.
  93         True is returned if successful or skipped.
  94         False is returned if login failed.
  95
  96         If _LOGIN_REQUIRED is set and no authentication was provided, an error is raised.
  97         """
  98
  99         def warn(message):
 100             self.report_warning(message)
 101
 102         # username+password login is broken
 103         if (self._LOGIN_REQUIRED
 104                 and self.get_param('cookiefile') is None
 105                 and self.get_param('cookiesfrombrowser') is None):
 106             self.raise_login_required(
 107                 'Login details are needed to download this content', method='cookies')
 108         username, password = self._get_login_info()
 109         if username:
 110             warn('Logging in using username and password is broken. %s' % self._LOGIN_HINTS['cookies'])
 111         return
 112
 113         # Everything below this is broken!
 114         r'''
 115         # No authentication to be performed
 116         if username is None:
 117             if self._LOGIN_REQUIRED and self.get_param('cookiefile') is None:
 118                 raise ExtractorError('No login info available, needed for using %s.' % self.IE_NAME, expected=True)
 119             # if self.get_param('cookiefile'):  # TODO remove 'and False' later - too many people using outdated cookies and open issues, remind them.
 120             #     self.to_screen('[Cookies] Reminder - Make sure to always use up to date cookies!')
 121             return True
 122
 123         login_page = self._download_webpage(
 124             self._LOGIN_URL, None,
 125             note='Downloading login page',
 126             errnote='unable to fetch login page', fatal=False)
 127         if login_page is False:
 128             return
 129
 130         login_form = self._hidden_inputs(login_page)
 131
 132         def req(url, f_req, note, errnote):
 133             data = login_form.copy()
 134             data.update({
 135                 'pstMsg': 1,
 136                 'checkConnection': 'youtube',
 137                 'checkedDomains': 'youtube',
 138                 'hl': 'en',
 139                 'deviceinfo': '[null,null,null,[],null,"US",null,null,[],"GlifWebSignIn",null,[null,null,[]]]',
 140                 'f.req': json.dumps(f_req),
 141                 'flowName': 'GlifWebSignIn',
 142                 'flowEntry': 'ServiceLogin',
 143                 # TODO: reverse actual botguard identifier generation algo
 144                 'bgRequest': '["identifier",""]',
 145             })
 146             return self._download_json(
 147                 url, None, note=note, errnote=errnote,
 148                 transform_source=lambda s: re.sub(r'^[^[]*', '', s),
 149                 fatal=False,
 150                 data=urlencode_postdata(data), headers={
 151                     'Content-Type': 'application/x-www-form-urlencoded;charset=utf-8',
 152                     'Google-Accounts-XSRF': 1,
 153                 })
 154
 155         lookup_req = [
 156             username,
 157             None, [], None, 'US', None, None, 2, False, True,
 158             [
 159                 None, None,
 160                 [2, 1, None, 1,
 161                  'https://accounts.google.com/ServiceLogin?passive=true&continue=https%3A%2F%2Fwww.youtube.com%2Fsignin%3Fnext%3D%252F%26action_handle_signin%3Dtrue%26hl%3Den%26app%3Ddesktop%26feature%3Dsign_in_button&hl=en&service=youtube&uilel=3&requestPath=%2FServiceLogin&Page=PasswordSeparationSignIn',
 162                  None, [], 4],
 163                 1, [None, None, []], None, None, None, True
 164             ],
 165             username,
 166         ]
 167
 168         lookup_results = req(
 169             self._LOOKUP_URL, lookup_req,
 170             'Looking up account info', 'Unable to look up account info')
 171
 172         if lookup_results is False:
 173             return False
 174
 175         user_hash = try_get(lookup_results, lambda x: x[0][2], compat_str)
 176         if not user_hash:
 177             warn('Unable to extract user hash')
 178             return False
 179
 180         challenge_req = [
 181             user_hash,
 182             None, 1, None, [1, None, None, None, [password, None, True]],
 183             [
 184                 None, None, [2, 1, None, 1, 'https://accounts.google.com/ServiceLogin?passive=true&continue=https%3A%2F%2Fwww.youtube.com%2Fsignin%3Fnext%3D%252F%26action_handle_signin%3Dtrue%26hl%3Den%26app%3Ddesktop%26feature%3Dsign_in_button&hl=en&service=youtube&uilel=3&requestPath=%2FServiceLogin&Page=PasswordSeparationSignIn', None, [], 4],
 185                 1, [None, None, []], None, None, None, True
 186             ]]
 187
 188         challenge_results = req(
 189             self._CHALLENGE_URL, challenge_req,
 190             'Logging in', 'Unable to log in')
 191
 192         if challenge_results is False:
 193             return
 194
 195         login_res = try_get(challenge_results, lambda x: x[0][5], list)
 196         if login_res:
 197             login_msg = try_get(login_res, lambda x: x[5], compat_str)
 198             warn(
 199                 'Unable to login: %s' % 'Invalid password'
 200                 if login_msg == 'INCORRECT_ANSWER_ENTERED' else login_msg)
 201             return False
 202
 203         res = try_get(challenge_results, lambda x: x[0][-1], list)
 204         if not res:
 205             warn('Unable to extract result entry')
 206             return False
 207
 208         login_challenge = try_get(res, lambda x: x[0][0], list)
 209         if login_challenge:
 210             challenge_str = try_get(login_challenge, lambda x: x[2], compat_str)
 211             if challenge_str == 'TWO_STEP_VERIFICATION':
 212                 # SEND_SUCCESS - TFA code has been successfully sent to phone
 213                 # QUOTA_EXCEEDED - reached the limit of TFA codes
 214                 status = try_get(login_challenge, lambda x: x[5], compat_str)
 215                 if status == 'QUOTA_EXCEEDED':
 216                     warn('Exceeded the limit of TFA codes, try later')
 217                     return False
 218
 219                 tl = try_get(challenge_results, lambda x: x[1][2], compat_str)
 220                 if not tl:
 221                     warn('Unable to extract TL')
 222                     return False
 223
 224                 tfa_code = self._get_tfa_info('2-step verification code')
 225
 226                 if not tfa_code:
 227                     warn(
 228                         'Two-factor authentication required. Provide it either interactively or with --twofactor <code>'
 229                         '(Note that only TOTP (Google Authenticator App) codes work at this time.)')
 230                     return False
 231
 232                 tfa_code = remove_start(tfa_code, 'G-')
 233
 234                 tfa_req = [
 235                     user_hash, None, 2, None,
 236                     [
 237                         9, None, None, None, None, None, None, None,
 238                         [None, tfa_code, True, 2]
 239                     ]]
 240
 241                 tfa_results = req(
 242                     self._TFA_URL.format(tl), tfa_req,
 243                     'Submitting TFA code', 'Unable to submit TFA code')
 244
 245                 if tfa_results is False:
 246                     return False
 247
 248                 tfa_res = try_get(tfa_results, lambda x: x[0][5], list)
 249                 if tfa_res:
 250                     tfa_msg = try_get(tfa_res, lambda x: x[5], compat_str)
 251                     warn(
 252                         'Unable to finish TFA: %s' % 'Invalid TFA code'
 253                         if tfa_msg == 'INCORRECT_ANSWER_ENTERED' else tfa_msg)
 254                     return False
 255
 256                 check_cookie_url = try_get(
 257                     tfa_results, lambda x: x[0][-1][2], compat_str)
 258             else:
 259                 CHALLENGES = {
 260                     'LOGIN_CHALLENGE': "This device isn't recognized. For your security, Google wants to make sure it's really you.",
 261                     'USERNAME_RECOVERY': 'Please provide additional information to aid in the recovery process.',
 262                     'REAUTH': "There is something unusual about your activity. For your security, Google wants to make sure it's really you.",
 263                 }
 264                 challenge = CHALLENGES.get(
 265                     challenge_str,
 266                     '%s returned error %s.' % (self.IE_NAME, challenge_str))
 267                 warn('%s\nGo to https://accounts.google.com/, login and solve a challenge.' % challenge)
 268                 return False
 269         else:
 270             check_cookie_url = try_get(res, lambda x: x[2], compat_str)
 271
 272         if not check_cookie_url:
 273             warn('Unable to extract CheckCookie URL')
 274             return False
 275
 276         check_cookie_results = self._download_webpage(
 277             check_cookie_url, None, 'Checking cookie', fatal=False)
 278
 279         if check_cookie_results is False:
 280             return False
 281
 282         if 'https://myaccount.google.com/' not in check_cookie_results:
 283             warn('Unable to log in')
 284             return False
 285
 286         return True
 287         '''
 288
 289     def _initialize_consent(self):
 290         cookies = self._get_cookies('https://www.youtube.com/')
 291         if cookies.get('__Secure-3PSID'):
 292             return
 293         consent_id = None
 294         consent = cookies.get('CONSENT')
 295         if consent:
 296             if 'YES' in consent.value:
 297                 return
 298             consent_id = self._search_regex(
 299                 r'PENDING\+(\d+)', consent.value, 'consent', default=None)
 300         if not consent_id:
 301             consent_id = random.randint(100, 999)
 302         self._set_cookie('.youtube.com', 'CONSENT', 'YES+cb.20210328-17-p0.en+FX+%s' % consent_id)
 303
 304     def _real_initialize(self):
 305         self._initialize_consent()
 306         if self._downloader is None:
 307             return
 308         if not self._login():
 309             return
 310
 311     _YT_INITIAL_DATA_RE = r'(?:window\s*\[\s*["\']ytInitialData["\']\s*\]|ytInitialData)\s*=\s*({.+?})\s*;'
 312     _YT_INITIAL_PLAYER_RESPONSE_RE = r'ytInitialPlayerResponse\s*=\s*({.+?})\s*;'
 313     _YT_INITIAL_BOUNDARY_RE = r'(?:var\s+meta|</script|\n)'
 314
 315     _YT_DEFAULT_YTCFGS = {
 316         'WEB': {
 317             'INNERTUBE_API_VERSION': 'v1',
 318             'INNERTUBE_CLIENT_NAME': 'WEB',
 319             'INNERTUBE_CLIENT_VERSION': '2.20210622.10.00',
 320             'INNERTUBE_API_KEY': 'AIzaSyAO_FJ2SlqU8Q4STEHLGCilw_Y9_11qcW8',
 321             'INNERTUBE_CONTEXT': {
 322                 'client': {
 323                     'clientName': 'WEB',
 324                     'clientVersion': '2.20210622.10.00',
 325                     'hl': 'en',
 326                 }
 327             },
 328             'INNERTUBE_CONTEXT_CLIENT_NAME': 1
 329         },
 330         'WEB_REMIX': {
 331             'INNERTUBE_API_VERSION': 'v1',
 332             'INNERTUBE_CLIENT_NAME': 'WEB_REMIX',
 333             'INNERTUBE_CLIENT_VERSION': '1.20210621.00.00',
 334             'INNERTUBE_API_KEY': 'AIzaSyC9XL3ZjWddXya6X74dJoCTL-WEYFDNX30',
 335             'INNERTUBE_CONTEXT': {
 336                 'client': {
 337                     'clientName': 'WEB_REMIX',
 338                     'clientVersion': '1.20210621.00.00',
 339                     'hl': 'en',
 340                 }
 341             },
 342             'INNERTUBE_CONTEXT_CLIENT_NAME': 67
 343         },
 344         'WEB_EMBEDDED_PLAYER': {
 345             'INNERTUBE_API_VERSION': 'v1',
 346             'INNERTUBE_CLIENT_NAME': 'WEB_EMBEDDED_PLAYER',
 347             'INNERTUBE_CLIENT_VERSION': '1.20210620.0.1',
 348             'INNERTUBE_API_KEY': 'AIzaSyAO_FJ2SlqU8Q4STEHLGCilw_Y9_11qcW8',
 349             'INNERTUBE_CONTEXT': {
 350                 'client': {
 351                     'clientName': 'WEB_EMBEDDED_PLAYER',
 352                     'clientVersion': '1.20210620.0.1',
 353                     'hl': 'en',
 354                 }
 355             },
 356             'INNERTUBE_CONTEXT_CLIENT_NAME': 56
 357         },
 358         'ANDROID': {
 359             'INNERTUBE_API_VERSION': 'v1',
 360             'INNERTUBE_CLIENT_NAME': 'ANDROID',
 361             'INNERTUBE_CLIENT_VERSION': '16.20',
 362             'INNERTUBE_API_KEY': 'AIzaSyAO_FJ2SlqU8Q4STEHLGCilw_Y9_11qcW8',
 363             'INNERTUBE_CONTEXT': {
 364                 'client': {
 365                     'clientName': 'ANDROID',
 366                     'clientVersion': '16.20',
 367                     'hl': 'en',
 368                 }
 369             },
 370             'INNERTUBE_CONTEXT_CLIENT_NAME': 3
 371         },
 372         'ANDROID_EMBEDDED_PLAYER': {
 373             'INNERTUBE_API_VERSION': 'v1',
 374             'INNERTUBE_CLIENT_NAME': 'ANDROID_EMBEDDED_PLAYER',
 375             'INNERTUBE_CLIENT_VERSION': '16.20',
 376             'INNERTUBE_API_KEY': 'AIzaSyAO_FJ2SlqU8Q4STEHLGCilw_Y9_11qcW8',
 377             'INNERTUBE_CONTEXT': {
 378                 'client': {
 379                     'clientName': 'ANDROID_EMBEDDED_PLAYER',
 380                     'clientVersion': '16.20',
 381                     'hl': 'en',
 382                 }
 383             },
 384             'INNERTUBE_CONTEXT_CLIENT_NAME': 55
 385         },
 386         'ANDROID_MUSIC': {
 387             'INNERTUBE_API_VERSION': 'v1',
 388             'INNERTUBE_CLIENT_NAME': 'ANDROID_MUSIC',
 389             'INNERTUBE_CLIENT_VERSION': '4.32',
 390             'INNERTUBE_API_KEY': 'AIzaSyC9XL3ZjWddXya6X74dJoCTL-WEYFDNX30',
 391             'INNERTUBE_CONTEXT': {
 392                 'client': {
 393                     'clientName': 'ANDROID_MUSIC',
 394                     'clientVersion': '4.32',
 395                     'hl': 'en',
 396                 }
 397             },
 398             'INNERTUBE_CONTEXT_CLIENT_NAME': 21
 399         },
 400         'IOS': {
 401             'INNERTUBE_API_VERSION': 'v1',
 402             'INNERTUBE_CLIENT_NAME': 'IOS',
 403             'INNERTUBE_CLIENT_VERSION': '16.20',
 404             'INNERTUBE_API_KEY': 'AIzaSyAO_FJ2SlqU8Q4STEHLGCilw_Y9_11qcW8',
 405             'INNERTUBE_CONTEXT': {
 406                 'client': {
 407                     'clientName': 'IOS',
 408                     'clientVersion': '16.20',
 409                     'hl': 'en',
 410                 }
 411             },
 412             'INNERTUBE_CONTEXT_CLIENT_NAME': 5
 413
 414         },
 415         'IOS_MUSIC': {
 416             'INNERTUBE_API_VERSION': 'v1',
 417             'INNERTUBE_CLIENT_NAME': 'IOS_MUSIC',
 418             'INNERTUBE_CLIENT_VERSION': '4.32',
 419             'INNERTUBE_API_KEY': 'AIzaSyDK3iBpDP9nHVTk2qL73FLJICfOC3c51Og',
 420             'INNERTUBE_CONTEXT': {
 421                 'client': {
 422                     'clientName': 'IOS_MUSIC',
 423                     'clientVersion': '4.32',
 424                     'hl': 'en',
 425                 }
 426             },
 427             'INNERTUBE_CONTEXT_CLIENT_NAME': 26
 428         },
 429         'IOS_MESSAGES_EXTENSION': {
 430             'INNERTUBE_API_VERSION': 'v1',
 431             'INNERTUBE_CLIENT_NAME': 'IOS_MESSAGES_EXTENSION',
 432             'INNERTUBE_CLIENT_VERSION': '16.20',
 433             'INNERTUBE_API_KEY': 'AIzaSyDCU8hByM-4DrUqRUYnGn-3llEO78bcxq8',
 434             'INNERTUBE_CONTEXT': {
 435                 'client': {
 436                     'clientName': 'IOS_MESSAGES_EXTENSION',
 437                     'clientVersion': '16.20',
 438                     'hl': 'en',
 439                 }
 440             },
 441             'INNERTUBE_CONTEXT_CLIENT_NAME': 66
 442         }
 443     }
 444
 445     _YT_DEFAULT_INNERTUBE_HOSTS = {
 446         'DIRECT': 'youtubei.googleapis.com',
 447         'WEB': 'www.youtube.com',
 448         'WEB_REMIX': 'music.youtube.com',
 449         'ANDROID_MUSIC': 'music.youtube.com'
 450     }
 451
 452     # clients starting with _ cannot be explicity requested by the user
 453     _YT_CLIENTS = {
 454         'web': 'WEB',
 455         'web_music': 'WEB_REMIX',
 456         '_web_embedded': 'WEB_EMBEDDED_PLAYER',
 457         '_web_agegate': 'TVHTML5',
 458         'android': 'ANDROID',
 459         'android_music': 'ANDROID_MUSIC',
 460         '_android_embedded': 'ANDROID_EMBEDDED_PLAYER',
 461         '_android_agegate': 'ANDROID',
 462         'ios': 'IOS',
 463         'ios_music': 'IOS_MUSIC',
 464         '_ios_embedded': 'IOS_MESSAGES_EXTENSION',
 465         '_ios_agegate': 'IOS'
 466     }
 467
 468     def _get_default_ytcfg(self, client='WEB'):
 469         if client in self._YT_DEFAULT_YTCFGS:
 470             return copy.deepcopy(self._YT_DEFAULT_YTCFGS[client])
 471         self.write_debug(f'INNERTUBE default client {client} does not exist - falling back to WEB client.')
 472         return copy.deepcopy(self._YT_DEFAULT_YTCFGS['WEB'])
 473
 474     def _get_innertube_host(self, client='WEB'):
 475         return dict_get(self._YT_DEFAULT_INNERTUBE_HOSTS, (client, 'WEB'))
 476
 477     def _ytcfg_get_safe(self, ytcfg, getter, expected_type=None, default_client='WEB'):
 478         # try_get but with fallback to default ytcfg client values when present
 479         _func = lambda y: try_get(y, getter, expected_type)
 480         return _func(ytcfg) or _func(self._get_default_ytcfg(default_client))
 481
 482     def _extract_client_name(self, ytcfg, default_client='WEB'):
 483         return self._ytcfg_get_safe(ytcfg, lambda x: x['INNERTUBE_CLIENT_NAME'], compat_str, default_client)
 484
 485     @staticmethod
 486     def _extract_session_index(*data):
 487         for ytcfg in data:
 488             session_index = int_or_none(try_get(ytcfg, lambda x: x['SESSION_INDEX']))
 489             if session_index is not None:
 490                 return session_index
 491
 492     def _extract_client_version(self, ytcfg, default_client='WEB'):
 493         return self._ytcfg_get_safe(ytcfg, lambda x: x['INNERTUBE_CLIENT_VERSION'], compat_str, default_client)
 494
 495     def _extract_api_key(self, ytcfg=None, default_client='WEB'):
 496         return self._ytcfg_get_safe(ytcfg, lambda x: x['INNERTUBE_API_KEY'], compat_str, default_client)
 497
 498     def _extract_context(self, ytcfg=None, default_client='WEB'):
 499         _get_context = lambda y: try_get(y, lambda x: x['INNERTUBE_CONTEXT'], dict)
 500         context = _get_context(ytcfg)
 501         if context:
 502             return context
 503
 504         context = _get_context(self._get_default_ytcfg(default_client))
 505         if not ytcfg:
 506             return context
 507
 508         # Recreate the client context (required)
 509         context['client'].update({
 510             'clientVersion': self._extract_client_version(ytcfg, default_client),
 511             'clientName': self._extract_client_name(ytcfg, default_client),
 512         })
 513         visitor_data = try_get(ytcfg, lambda x: x['VISITOR_DATA'], compat_str)
 514         if visitor_data:
 515             context['client']['visitorData'] = visitor_data
 516         return context
 517
 518     def _generate_sapisidhash_header(self, origin='https://www.youtube.com'):
 519         # Sometimes SAPISID cookie isn't present but __Secure-3PAPISID is.
 520         # See: https://github.com/yt-dlp/yt-dlp/issues/393
 521         yt_cookies = self._get_cookies('https://www.youtube.com')
 522         sapisid_cookie = dict_get(
 523             yt_cookies, ('__Secure-3PAPISID', 'SAPISID'))
 524         if sapisid_cookie is None or not sapisid_cookie.value:
 525             return
 526         time_now = round(time.time())
 527         # SAPISID cookie is required if not already present
 528         if not yt_cookies.get('SAPISID'):
 529             self.write_debug('Copying __Secure-3PAPISID cookie to SAPISID cookie', only_once=True)
 530             self._set_cookie(
 531                 '.youtube.com', 'SAPISID', sapisid_cookie.value, secure=True, expire_time=time_now + 3600)
 532         self.write_debug('Extracted SAPISID cookie', only_once=True)
 533         # SAPISIDHASH algorithm from https://stackoverflow.com/a/32065323
 534         sapisidhash = hashlib.sha1(
 535             f'{time_now} {sapisid_cookie.value} {origin}'.encode('utf-8')).hexdigest()
 536         return f'SAPISIDHASH {time_now}_{sapisidhash}'
 537
 538     def _call_api(self, ep, query, video_id, fatal=True, headers=None,
 539                   note='Downloading API JSON', errnote='Unable to download API page',
 540                   context=None, api_key=None, api_hostname=None, default_client='WEB'):
 541
 542         data = {'context': context} if context else {'context': self._extract_context(default_client=default_client)}
 543         data.update(query)
 544         real_headers = self.generate_api_headers(default_client=default_client)
 545         real_headers.update({'content-type': 'application/json'})
 546         if headers:
 547             real_headers.update(headers)
 548         return self._download_json(
 549             'https://%s/youtubei/v1/%s' % (api_hostname or self._get_innertube_host(default_client), ep),
 550             video_id=video_id, fatal=fatal, note=note, errnote=errnote,
 551             data=json.dumps(data).encode('utf8'), headers=real_headers,
 552             query={'key': api_key or self._extract_api_key()})
 553
 554     def extract_yt_initial_data(self, video_id, webpage):
 555         return self._parse_json(
 556             self._search_regex(
 557                 (r'%s\s*%s' % (self._YT_INITIAL_DATA_RE, self._YT_INITIAL_BOUNDARY_RE),
 558                  self._YT_INITIAL_DATA_RE), webpage, 'yt initial data'),
 559             video_id)
 560
 561     def _extract_identity_token(self, webpage, item_id):
 562         if not webpage:
 563             return None
 564         ytcfg = self.extract_ytcfg(item_id, webpage)
 565         if ytcfg:
 566             token = try_get(ytcfg, lambda x: x['ID_TOKEN'], compat_str)
 567             if token:
 568                 return token
 569         return self._search_regex(
 570             r'\bID_TOKEN["\']\s*:\s*["\'](.+?)["\']', webpage,
 571             'identity token', default=None)
 572
 573     @staticmethod
 574     def _extract_account_syncid(*args):
 575         """
 576         Extract syncId required to download private playlists of secondary channels
 577         @params response and/or ytcfg
 578         """
 579         for data in args:
 580             # ytcfg includes channel_syncid if on secondary channel
 581             delegated_sid = try_get(data, lambda x: x['DELEGATED_SESSION_ID'], compat_str)
 582             if delegated_sid:
 583                 return delegated_sid
 584             sync_ids = (try_get(
 585                 data, (lambda x: x['responseContext']['mainAppWebResponseContext']['datasyncId'],
 586                        lambda x: x['DATASYNC_ID']), compat_str) or '').split("||")
 587             if len(sync_ids) >= 2 and sync_ids[1]:
 588                 # datasyncid is of the form "channel_syncid||user_syncid" for secondary channel
 589                 # and just "user_syncid||" for primary channel. We only want the channel_syncid
 590                 return sync_ids[0]
 591
 592     def extract_ytcfg(self, video_id, webpage):
 593         if not webpage:
 594             return {}
 595         return self._parse_json(
 596             self._search_regex(
 597                 r'ytcfg\.set\s*\(\s*({.+?})\s*\)\s*;', webpage, 'ytcfg',
 598                 default='{}'), video_id, fatal=False) or {}
 599
 600     def generate_api_headers(
 601             self, ytcfg=None, identity_token=None, account_syncid=None,
 602             visitor_data=None, api_hostname=None, default_client='WEB', session_index=None):
 603         origin = 'https://' + (api_hostname if api_hostname else self._get_innertube_host(default_client))
 604         headers = {
 605             'X-YouTube-Client-Name': compat_str(
 606                 self._ytcfg_get_safe(ytcfg, lambda x: x['INNERTUBE_CONTEXT_CLIENT_NAME'], default_client=default_client)),
 607             'X-YouTube-Client-Version': self._extract_client_version(ytcfg, default_client),
 608             'Origin': origin
 609         }
 610         if not visitor_data and ytcfg:
 611             visitor_data = try_get(
 612                 self._extract_context(ytcfg, default_client), lambda x: x['client']['visitorData'], compat_str)
 613         if identity_token:
 614             headers['X-Youtube-Identity-Token'] = identity_token
 615         if account_syncid:
 616             headers['X-Goog-PageId'] = account_syncid
 617         if session_index is None and ytcfg:
 618             session_index = self._extract_session_index(ytcfg)
 619         if account_syncid or session_index is not None:
 620             headers['X-Goog-AuthUser'] = session_index if session_index is not None else 0
 621         if visitor_data:
 622             headers['X-Goog-Visitor-Id'] = visitor_data
 623         auth = self._generate_sapisidhash_header(origin)
 624         if auth is not None:
 625             headers['Authorization'] = auth
 626             headers['X-Origin'] = origin
 627         return headers
 628
 629     @staticmethod
 630     def _build_api_continuation_query(continuation, ctp=None):
 631         query = {
 632             'continuation': continuation
 633         }
 634         # TODO: Inconsistency with clickTrackingParams.
 635         # Currently we have a fixed ctp contained within context (from ytcfg)
 636         # and a ctp in root query for continuation.
 637         if ctp:
 638             query['clickTracking'] = {'clickTrackingParams': ctp}
 639         return query
 640
 641     @classmethod
 642     def _extract_next_continuation_data(cls, renderer):
 643         next_continuation = try_get(
 644             renderer, (lambda x: x['continuations'][0]['nextContinuationData'],
 645                        lambda x: x['continuation']['reloadContinuationData']), dict)
 646         if not next_continuation:
 647             return
 648         continuation = next_continuation.get('continuation')
 649         if not continuation:
 650             return
 651         ctp = next_continuation.get('clickTrackingParams')
 652         return cls._build_api_continuation_query(continuation, ctp)
 653
 654     @classmethod
 655     def _extract_continuation_ep_data(cls, continuation_ep: dict):
 656         if isinstance(continuation_ep, dict):
 657             continuation = try_get(
 658                 continuation_ep, lambda x: x['continuationCommand']['token'], compat_str)
 659             if not continuation:
 660                 return
 661             ctp = continuation_ep.get('clickTrackingParams')
 662             return cls._build_api_continuation_query(continuation, ctp)
 663
 664     @classmethod
 665     def _extract_continuation(cls, renderer):
 666         next_continuation = cls._extract_next_continuation_data(renderer)
 667         if next_continuation:
 668             return next_continuation
 669
 670         contents = []
 671         for key in ('contents', 'items'):
 672             contents.extend(try_get(renderer, lambda x: x[key], list) or [])
 673
 674         for content in contents:
 675             if not isinstance(content, dict):
 676                 continue
 677             continuation_ep = try_get(
 678                 content, (lambda x: x['continuationItemRenderer']['continuationEndpoint'],
 679                           lambda x: x['continuationItemRenderer']['button']['buttonRenderer']['command']),
 680                 dict)
 681             continuation = cls._extract_continuation_ep_data(continuation_ep)
 682             if continuation:
 683                 return continuation
 684
 685     @classmethod
 686     def _extract_alerts(cls, data):
 687         for alert_dict in try_get(data, lambda x: x['alerts'], list) or []:
 688             if not isinstance(alert_dict, dict):
 689                 continue
 690             for alert in alert_dict.values():
 691                 alert_type = alert.get('type')
 692                 if not alert_type:
 693                     continue
 694                 message = cls._get_text(alert.get('text'))
 695                 if message:
 696                     yield alert_type, message
 697
 698     def _report_alerts(self, alerts, expected=True):
 699         errors = []
 700         warnings = []
 701         for alert_type, alert_message in alerts:
 702             if alert_type.lower() == 'error':
 703                 errors.append([alert_type, alert_message])
 704             else:
 705                 warnings.append([alert_type, alert_message])
 706
 707         for alert_type, alert_message in (warnings + errors[:-1]):
 708             self.report_warning('YouTube said: %s - %s' % (alert_type, alert_message))
 709         if errors:
 710             raise ExtractorError('YouTube said: %s' % errors[-1][1], expected=expected)
 711
 712     def _extract_and_report_alerts(self, data, *args, **kwargs):
 713         return self._report_alerts(self._extract_alerts(data), *args, **kwargs)
 714
 715     def _extract_badges(self, renderer: dict):
 716         badges = set()
 717         for badge in try_get(renderer, lambda x: x['badges'], list) or []:
 718             label = try_get(badge, lambda x: x['metadataBadgeRenderer']['label'], compat_str)
 719             if label:
 720                 badges.add(label.lower())
 721         return badges
 722
 723     @staticmethod
 724     def _get_text(data, getter=None, max_runs=None):
 725         for get in variadic(getter):
 726             d = try_get(data, get) if get is not None else data
 727             text = try_get(d, lambda x: x['simpleText'], compat_str)
 728             if text:
 729                 return text
 730             runs = try_get(d, lambda x: x['runs'], list) or []
 731             if not runs and isinstance(d, list):
 732                 runs = d
 733
 734             def get_runs(runs):
 735                 for run in runs[:min(len(runs), max_runs or len(runs))]:
 736                     yield try_get(run, lambda x: x['text'], compat_str) or ''
 737
 738             text = ''.join(get_runs(runs))
 739             if text:
 740                 return text
 741
 742     def _extract_response(self, item_id, query, note='Downloading API JSON', headers=None,
 743                           ytcfg=None, check_get_keys=None, ep='browse', fatal=True, api_hostname=None,
 744                           default_client='WEB'):
 745         response = None
 746         last_error = None
 747         count = -1
 748         retries = self.get_param('extractor_retries', 3)
 749         if check_get_keys is None:
 750             check_get_keys = []
 751         while count < retries:
 752             count += 1
 753             if last_error:
 754                 self.report_warning('%s. Retrying ...' % last_error)
 755             try:
 756                 response = self._call_api(
 757                     ep=ep, fatal=True, headers=headers,
 758                     video_id=item_id, query=query,
 759                     context=self._extract_context(ytcfg, default_client),
 760                     api_key=self._extract_api_key(ytcfg, default_client),
 761                     api_hostname=api_hostname, default_client=default_client,
 762                     note='%s%s' % (note, ' (retry #%d)' % count if count else ''))
 763             except ExtractorError as e:
 764                 if isinstance(e.cause, network_exceptions):
 765                     # Downloading page may result in intermittent 5xx HTTP error
 766                     # Sometimes a 404 is also recieved. See: https://github.com/ytdl-org/youtube-dl/issues/28289
 767                     # We also want to catch all other network exceptions since errors in later pages can be troublesome
 768                     # See https://github.com/yt-dlp/yt-dlp/issues/507#issuecomment-880188210
 769                     if not isinstance(e.cause, compat_HTTPError) or e.cause.code not in (403, 429):
 770                         last_error = error_to_compat_str(e.cause or e)
 771                         if count < retries:
 772                             continue
 773                 if fatal:
 774                     raise
 775                 else:
 776                     self.report_warning(error_to_compat_str(e))
 777                     return
 778
 779             else:
 780                 # Youtube may send alerts if there was an issue with the continuation page
 781                 try:
 782                     self._extract_and_report_alerts(response, expected=False)
 783                 except ExtractorError as e:
 784                     if fatal:
 785                         raise
 786                     self.report_warning(error_to_compat_str(e))
 787                     return
 788                 if not check_get_keys or dict_get(response, check_get_keys):
 789                     break
 790                 # Youtube sometimes sends incomplete data
 791                 # See: https://github.com/ytdl-org/youtube-dl/issues/28194
 792                 last_error = 'Incomplete data received'
 793                 if count >= retries:
 794                     if fatal:
 795                         raise ExtractorError(last_error)
 796                     else:
 797                         self.report_warning(last_error)
 798                         return
 799         return response
 800
 801     @staticmethod
 802     def is_music_url(url):
 803         return re.match(r'https?://music\.youtube\.com/', url) is not None
 804
 805     def _extract_video(self, renderer):
 806         video_id = renderer.get('videoId')
 807         title = self._get_text(renderer.get('title'))
 808         description = self._get_text(renderer.get('descriptionSnippet'))
 809         duration = parse_duration(self._get_text(renderer.get('lengthText')))
 810         view_count_text = self._get_text(renderer.get('viewCountText')) or ''
 811         view_count = str_to_int(self._search_regex(
 812             r'^([\d,]+)', re.sub(r'\s', '', view_count_text),
 813             'view count', default=None))
 814
 815         uploader = self._get_text(renderer, (lambda x: x['ownerText'], lambda x: x['shortBylineText']))
 816
 817         return {
 818             '_type': 'url',
 819             'ie_key': YoutubeIE.ie_key(),
 820             'id': video_id,
 821             'url': video_id,
 822             'title': title,
 823             'description': description,
 824             'duration': duration,
 825             'view_count': view_count,
 826             'uploader': uploader,
 827         }
 828
 829
 830 class YoutubeIE(YoutubeBaseInfoExtractor):
 831     IE_DESC = 'YouTube.com'
 832     _INVIDIOUS_SITES = (
 833         # invidious-redirect websites
 834         r'(?:www\.)?redirect\.invidious\.io',
 835         r'(?:(?:www|dev)\.)?invidio\.us',
 836         # Invidious instances taken from https://github.com/iv-org/documentation/blob/master/Invidious-Instances.md
 837         r'(?:www\.)?invidious\.pussthecat\.org',
 838         r'(?:www\.)?invidious\.zee\.li',
 839         r'(?:www\.)?invidious\.ethibox\.fr',
 840         r'(?:www\.)?invidious\.3o7z6yfxhbw7n3za4rss6l434kmv55cgw2vuziwuigpwegswvwzqipyd\.onion',
 841         # youtube-dl invidious instances list
 842         r'(?:(?:www|no)\.)?invidiou\.sh',
 843         r'(?:(?:www|fi)\.)?invidious\.snopyta\.org',
 844         r'(?:www\.)?invidious\.kabi\.tk',
 845         r'(?:www\.)?invidious\.mastodon\.host',
 846         r'(?:www\.)?invidious\.zapashcanon\.fr',
 847         r'(?:www\.)?(?:invidious(?:-us)?|piped)\.kavin\.rocks',
 848         r'(?:www\.)?invidious\.tinfoil-hat\.net',
 849         r'(?:www\.)?invidious\.himiko\.cloud',
 850         r'(?:www\.)?invidious\.reallyancient\.tech',
 851         r'(?:www\.)?invidious\.tube',
 852         r'(?:www\.)?invidiou\.site',
 853         r'(?:www\.)?invidious\.site',
 854         r'(?:www\.)?invidious\.xyz',
 855         r'(?:www\.)?invidious\.nixnet\.xyz',
 856         r'(?:www\.)?invidious\.048596\.xyz',
 857         r'(?:www\.)?invidious\.drycat\.fr',
 858         r'(?:www\.)?inv\.skyn3t\.in',
 859         r'(?:www\.)?tube\.poal\.co',
 860         r'(?:www\.)?tube\.connect\.cafe',
 861         r'(?:www\.)?vid\.wxzm\.sx',
 862         r'(?:www\.)?vid\.mint\.lgbt',
 863         r'(?:www\.)?vid\.puffyan\.us',
 864         r'(?:www\.)?yewtu\.be',
 865         r'(?:www\.)?yt\.elukerio\.org',
 866         r'(?:www\.)?yt\.lelux\.fi',
 867         r'(?:www\.)?invidious\.ggc-project\.de',
 868         r'(?:www\.)?yt\.maisputain\.ovh',
 869         r'(?:www\.)?ytprivate\.com',
 870         r'(?:www\.)?invidious\.13ad\.de',
 871         r'(?:www\.)?invidious\.toot\.koeln',
 872         r'(?:www\.)?invidious\.fdn\.fr',
 873         r'(?:www\.)?watch\.nettohikari\.com',
 874         r'(?:www\.)?invidious\.namazso\.eu',
 875         r'(?:www\.)?invidious\.silkky\.cloud',
 876         r'(?:www\.)?invidious\.exonip\.de',
 877         r'(?:www\.)?invidious\.riverside\.rocks',
 878         r'(?:www\.)?invidious\.blamefran\.net',
 879         r'(?:www\.)?invidious\.moomoo\.de',
 880         r'(?:www\.)?ytb\.trom\.tf',
 881         r'(?:www\.)?yt\.cyberhost\.uk',
 882         r'(?:www\.)?kgg2m7yk5aybusll\.onion',
 883         r'(?:www\.)?qklhadlycap4cnod\.onion',
 884         r'(?:www\.)?axqzx4s6s54s32yentfqojs3x5i7faxza6xo3ehd4bzzsg2ii4fv2iid\.onion',
 885         r'(?:www\.)?c7hqkpkpemu6e7emz5b4vyz7idjgdvgaaa3dyimmeojqbgpea3xqjoid\.onion',
 886         r'(?:www\.)?fz253lmuao3strwbfbmx46yu7acac2jz27iwtorgmbqlkurlclmancad\.onion',
 887         r'(?:www\.)?invidious\.l4qlywnpwqsluw65ts7md3khrivpirse744un3x7mlskqauz5pyuzgqd\.onion',
 888         r'(?:www\.)?owxfohz4kjyv25fvlqilyxast7inivgiktls3th44jhk3ej3i7ya\.b32\.i2p',
 889         r'(?:www\.)?4l2dgddgsrkf2ous66i6seeyi6etzfgrue332grh2n7madpwopotugyd\.onion',
 890         r'(?:www\.)?w6ijuptxiku4xpnnaetxvnkc5vqcdu7mgns2u77qefoixi63vbvnpnqd\.onion',
 891         r'(?:www\.)?kbjggqkzv65ivcqj6bumvp337z6264huv5kpkwuv6gu5yjiskvan7fad\.onion',
 892         r'(?:www\.)?grwp24hodrefzvjjuccrkw3mjq4tzhaaq32amf33dzpmuxe7ilepcmad\.onion',
 893         r'(?:www\.)?hpniueoejy4opn7bc4ftgazyqjoeqwlvh2uiku2xqku6zpoa4bf5ruid\.onion',
 894     )
 895     _VALID_URL = r"""(?x)^
 896                      (
 897                          (?:https?://|//)                                    # http(s):// or protocol-independent URL
 898                          (?:(?:(?:(?:\w+\.)?[yY][oO][uU][tT][uU][bB][eE](?:-nocookie|kids)?\.com|
 899                             (?:www\.)?deturl\.com/www\.youtube\.com|
 900                             (?:www\.)?pwnyoutube\.com|
 901                             (?:www\.)?hooktube\.com|
 902                             (?:www\.)?yourepeat\.com|
 903                             tube\.majestyc\.net|
 904                             %(invidious)s|
 905                             youtube\.googleapis\.com)/                        # the various hostnames, with wildcard subdomains
 906                          (?:.*?\#/)?                                          # handle anchor (#/) redirect urls
 907                          (?:                                                  # the various things that can precede the ID:
 908                              (?:(?:v|embed|e)/(?!videoseries))                # v/ or embed/ or e/
 909                              |(?:                                             # or the v= param in all its forms
 910                                  (?:(?:watch|movie)(?:_popup)?(?:\.php)?/?)?  # preceding watch(_popup|.php) or nothing (like /?v=xxxx)
 911                                  (?:\?|\#!?)                                  # the params delimiter ? or # or #!
 912                                  (?:.*?[&;])??                                # any other preceding param (like /?s=tuff&v=xxxx or ?s=tuff&amp;v=V36LpHqtcDY)
 913                                  v=
 914                              )
 915                          ))
 916                          |(?:
 917                             youtu\.be|                                        # just youtu.be/xxxx
 918                             vid\.plus|                                        # or vid.plus/xxxx
 919                             zwearz\.com/watch|                                # or zwearz.com/watch/xxxx
 920                             %(invidious)s
 921                          )/
 922                          |(?:www\.)?cleanvideosearch\.com/media/action/yt/watch\?videoId=
 923                          )
 924                      )?                                                       # all until now is optional -> you can pass the naked ID
 925                      (?P<id>[0-9A-Za-z_-]{11})                                # here is it! the YouTube video ID
 926                      (?(1).+)?                                                # if we found the ID, everything can follow
 927                      (?:\#|$)""" % {
 928         'invidious': '|'.join(_INVIDIOUS_SITES),
 929     }
 930     _PLAYER_INFO_RE = (
 931         r'/s/player/(?P<id>[a-zA-Z0-9_-]{8,})/player',
 932         r'/(?P<id>[a-zA-Z0-9_-]{8,})/player(?:_ias\.vflset(?:/[a-zA-Z]{2,3}_[a-zA-Z]{2,3})?|-plasma-ias-(?:phone|tablet)-[a-z]{2}_[A-Z]{2}\.vflset)/base\.js$',
 933         r'\b(?P<id>vfl[a-zA-Z0-9_-]+)\b.*?\.js$',
 934     )
 935     _formats = {
 936         '5': {'ext': 'flv', 'width': 400, 'height': 240, 'acodec': 'mp3', 'abr': 64, 'vcodec': 'h263'},
 937         '6': {'ext': 'flv', 'width': 450, 'height': 270, 'acodec': 'mp3', 'abr': 64, 'vcodec': 'h263'},
 938         '13': {'ext': '3gp', 'acodec': 'aac', 'vcodec': 'mp4v'},
 939         '17': {'ext': '3gp', 'width': 176, 'height': 144, 'acodec': 'aac', 'abr': 24, 'vcodec': 'mp4v'},
 940         '18': {'ext': 'mp4', 'width': 640, 'height': 360, 'acodec': 'aac', 'abr': 96, 'vcodec': 'h264'},
 941         '22': {'ext': 'mp4', 'width': 1280, 'height': 720, 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264'},
 942         '34': {'ext': 'flv', 'width': 640, 'height': 360, 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264'},
 943         '35': {'ext': 'flv', 'width': 854, 'height': 480, 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264'},
 944         # itag 36 videos are either 320x180 (BaW_jenozKc) or 320x240 (__2ABJjxzNo), abr varies as well
 945         '36': {'ext': '3gp', 'width': 320, 'acodec': 'aac', 'vcodec': 'mp4v'},
 946         '37': {'ext': 'mp4', 'width': 1920, 'height': 1080, 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264'},
 947         '38': {'ext': 'mp4', 'width': 4096, 'height': 3072, 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264'},
 948         '43': {'ext': 'webm', 'width': 640, 'height': 360, 'acodec': 'vorbis', 'abr': 128, 'vcodec': 'vp8'},
 949         '44': {'ext': 'webm', 'width': 854, 'height': 480, 'acodec': 'vorbis', 'abr': 128, 'vcodec': 'vp8'},
 950         '45': {'ext': 'webm', 'width': 1280, 'height': 720, 'acodec': 'vorbis', 'abr': 192, 'vcodec': 'vp8'},
 951         '46': {'ext': 'webm', 'width': 1920, 'height': 1080, 'acodec': 'vorbis', 'abr': 192, 'vcodec': 'vp8'},
 952         '59': {'ext': 'mp4', 'width': 854, 'height': 480, 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264'},
 953         '78': {'ext': 'mp4', 'width': 854, 'height': 480, 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264'},
 954
 955
 956         # 3D videos
 957         '82': {'ext': 'mp4', 'height': 360, 'format_note': '3D', 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264', 'preference': -20},
 958         '83': {'ext': 'mp4', 'height': 480, 'format_note': '3D', 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264', 'preference': -20},
 959         '84': {'ext': 'mp4', 'height': 720, 'format_note': '3D', 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264', 'preference': -20},
 960         '85': {'ext': 'mp4', 'height': 1080, 'format_note': '3D', 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264', 'preference': -20},
 961         '100': {'ext': 'webm', 'height': 360, 'format_note': '3D', 'acodec': 'vorbis', 'abr': 128, 'vcodec': 'vp8', 'preference': -20},
 962         '101': {'ext': 'webm', 'height': 480, 'format_note': '3D', 'acodec': 'vorbis', 'abr': 192, 'vcodec': 'vp8', 'preference': -20},
 963         '102': {'ext': 'webm', 'height': 720, 'format_note': '3D', 'acodec': 'vorbis', 'abr': 192, 'vcodec': 'vp8', 'preference': -20},
 964
 965         # Apple HTTP Live Streaming
 966         '91': {'ext': 'mp4', 'height': 144, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 48, 'vcodec': 'h264', 'preference': -10},
 967         '92': {'ext': 'mp4', 'height': 240, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 48, 'vcodec': 'h264', 'preference': -10},
 968         '93': {'ext': 'mp4', 'height': 360, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264', 'preference': -10},
 969         '94': {'ext': 'mp4', 'height': 480, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264', 'preference': -10},
 970         '95': {'ext': 'mp4', 'height': 720, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 256, 'vcodec': 'h264', 'preference': -10},
 971         '96': {'ext': 'mp4', 'height': 1080, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 256, 'vcodec': 'h264', 'preference': -10},
 972         '132': {'ext': 'mp4', 'height': 240, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 48, 'vcodec': 'h264', 'preference': -10},
 973         '151': {'ext': 'mp4', 'height': 72, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 24, 'vcodec': 'h264', 'preference': -10},
 974
 975         # DASH mp4 video
 976         '133': {'ext': 'mp4', 'height': 240, 'format_note': 'DASH video', 'vcodec': 'h264'},
 977         '134': {'ext': 'mp4', 'height': 360, 'format_note': 'DASH video', 'vcodec': 'h264'},
 978         '135': {'ext': 'mp4', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'h264'},
 979         '136': {'ext': 'mp4', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'h264'},
 980         '137': {'ext': 'mp4', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'h264'},
 981         '138': {'ext': 'mp4', 'format_note': 'DASH video', 'vcodec': 'h264'},  # Height can vary (https://github.com/ytdl-org/youtube-dl/issues/4559)
 982         '160': {'ext': 'mp4', 'height': 144, 'format_note': 'DASH video', 'vcodec': 'h264'},
 983         '212': {'ext': 'mp4', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'h264'},
 984         '264': {'ext': 'mp4', 'height': 1440, 'format_note': 'DASH video', 'vcodec': 'h264'},
 985         '298': {'ext': 'mp4', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'h264', 'fps': 60},
 986         '299': {'ext': 'mp4', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'h264', 'fps': 60},
 987         '266': {'ext': 'mp4', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'h264'},
 988
 989         # Dash mp4 audio
 990         '139': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'abr': 48, 'container': 'm4a_dash'},
 991         '140': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'abr': 128, 'container': 'm4a_dash'},
 992         '141': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'abr': 256, 'container': 'm4a_dash'},
 993         '256': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'container': 'm4a_dash'},
 994         '258': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'container': 'm4a_dash'},
 995         '325': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'dtse', 'container': 'm4a_dash'},
 996         '328': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'ec-3', 'container': 'm4a_dash'},
 997
 998         # Dash webm
 999         '167': {'ext': 'webm', 'height': 360, 'width': 640, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
1000         '168': {'ext': 'webm', 'height': 480, 'width': 854, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
1001         '169': {'ext': 'webm', 'height': 720, 'width': 1280, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
1002         '170': {'ext': 'webm', 'height': 1080, 'width': 1920, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
1003         '218': {'ext': 'webm', 'height': 480, 'width': 854, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
1004         '219': {'ext': 'webm', 'height': 480, 'width': 854, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
1005         '278': {'ext': 'webm', 'height': 144, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp9'},
1006         '242': {'ext': 'webm', 'height': 240, 'format_note': 'DASH video', 'vcodec': 'vp9'},
1007         '243': {'ext': 'webm', 'height': 360, 'format_note': 'DASH video', 'vcodec': 'vp9'},
1008         '244': {'ext': 'webm', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'vp9'},
1009         '245': {'ext': 'webm', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'vp9'},
1010         '246': {'ext': 'webm', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'vp9'},
1011         '247': {'ext': 'webm', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'vp9'},
1012         '248': {'ext': 'webm', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'vp9'},
1013         '271': {'ext': 'webm', 'height': 1440, 'format_note': 'DASH video', 'vcodec': 'vp9'},
1014         # itag 272 videos are either 3840x2160 (e.g. RtoitU2A-3E) or 7680x4320 (sLprVF6d7Ug)
1015         '272': {'ext': 'webm', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'vp9'},
1016         '302': {'ext': 'webm', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'vp9', 'fps': 60},
1017         '303': {'ext': 'webm', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'vp9', 'fps': 60},
1018         '308': {'ext': 'webm', 'height': 1440, 'format_note': 'DASH video', 'vcodec': 'vp9', 'fps': 60},
1019         '313': {'ext': 'webm', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'vp9'},
1020         '315': {'ext': 'webm', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'vp9', 'fps': 60},
1021
1022         # Dash webm audio
1023         '171': {'ext': 'webm', 'acodec': 'vorbis', 'format_note': 'DASH audio', 'abr': 128},
1024         '172': {'ext': 'webm', 'acodec': 'vorbis', 'format_note': 'DASH audio', 'abr': 256},
1025
1026         # Dash webm audio with opus inside
1027         '249': {'ext': 'webm', 'format_note': 'DASH audio', 'acodec': 'opus', 'abr': 50},
1028         '250': {'ext': 'webm', 'format_note': 'DASH audio', 'acodec': 'opus', 'abr': 70},
1029         '251': {'ext': 'webm', 'format_note': 'DASH audio', 'acodec': 'opus', 'abr': 160},
1030
1031         # RTMP (unnamed)
1032         '_rtmp': {'protocol': 'rtmp'},
1033
1034         # av01 video only formats sometimes served with "unknown" codecs
1035         '394': {'acodec': 'none', 'vcodec': 'av01.0.05M.08'},
1036         '395': {'acodec': 'none', 'vcodec': 'av01.0.05M.08'},
1037         '396': {'acodec': 'none', 'vcodec': 'av01.0.05M.08'},
1038         '397': {'acodec': 'none', 'vcodec': 'av01.0.05M.08'},
1039     }
1040     _SUBTITLE_FORMATS = ('json3', 'srv1', 'srv2', 'srv3', 'ttml', 'vtt')
1041
1042     _AGE_GATE_REASONS = (
1043         'Sign in to confirm your age',
1044         'This video may be inappropriate for some users.',
1045         'Sorry, this content is age-restricted.')
1046
1047     _GEO_BYPASS = False
1048
1049     IE_NAME = 'youtube'
1050     _TESTS = [
1051         {
1052             'url': 'https://www.youtube.com/watch?v=BaW_jenozKc&t=1s&end=9',
1053             'info_dict': {
1054                 'id': 'BaW_jenozKc',
1055                 'ext': 'mp4',
1056                 'title': 'youtube-dl test video "\'/\\ä↭𝕐',
1057                 'uploader': 'Philipp Hagemeister',
1058                 'uploader_id': 'phihag',
1059                 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/phihag',
1060                 'channel_id': 'UCLqxVugv74EIW3VWh2NOa3Q',
1061                 'channel_url': r're:https?://(?:www\.)?youtube\.com/channel/UCLqxVugv74EIW3VWh2NOa3Q',
1062                 'upload_date': '20121002',
1063                 'description': 'test chars:  "\'/\\ä↭𝕐\ntest URL: https://github.com/rg3/youtube-dl/issues/1892\n\nThis is a test video for youtube-dl.\n\nFor more information, contact phihag@phihag.de .',
1064                 'categories': ['Science & Technology'],
1065                 'tags': ['youtube-dl'],
1066                 'duration': 10,
1067                 'view_count': int,
1068                 'like_count': int,
1069                 'dislike_count': int,
1070                 'start_time': 1,
1071                 'end_time': 9,
1072             }
1073         },
1074         {
1075             'url': '//www.YouTube.com/watch?v=yZIXLfi8CZQ',
1076             'note': 'Embed-only video (#1746)',
1077             'info_dict': {
1078                 'id': 'yZIXLfi8CZQ',
1079                 'ext': 'mp4',
1080                 'upload_date': '20120608',
1081                 'title': 'Principal Sexually Assaults A Teacher - Episode 117 - 8th June 2012',
1082                 'description': 'md5:09b78bd971f1e3e289601dfba15ca4f7',
1083                 'uploader': 'SET India',
1084                 'uploader_id': 'setindia',
1085                 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/setindia',
1086                 'age_limit': 18,
1087             },
1088             'skip': 'Private video',
1089         },
1090         {
1091             'url': 'https://www.youtube.com/watch?v=BaW_jenozKc&v=yZIXLfi8CZQ',
1092             'note': 'Use the first video ID in the URL',
1093             'info_dict': {
1094                 'id': 'BaW_jenozKc',
1095                 'ext': 'mp4',
1096                 'title': 'youtube-dl test video "\'/\\ä↭𝕐',
1097                 'uploader': 'Philipp Hagemeister',
1098                 'uploader_id': 'phihag',
1099                 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/phihag',
1100                 'upload_date': '20121002',
1101                 'description': 'test chars:  "\'/\\ä↭𝕐\ntest URL: https://github.com/rg3/youtube-dl/issues/1892\n\nThis is a test video for youtube-dl.\n\nFor more information, contact phihag@phihag.de .',
1102                 'categories': ['Science & Technology'],
1103                 'tags': ['youtube-dl'],
1104                 'duration': 10,
1105                 'view_count': int,
1106                 'like_count': int,
1107                 'dislike_count': int,
1108             },
1109             'params': {
1110                 'skip_download': True,
1111             },
1112         },
1113         {
1114             'url': 'https://www.youtube.com/watch?v=a9LDPn-MO4I',
1115             'note': '256k DASH audio (format 141) via DASH manifest',
1116             'info_dict': {
1117                 'id': 'a9LDPn-MO4I',
1118                 'ext': 'm4a',
1119                 'upload_date': '20121002',
1120                 'uploader_id': '8KVIDEO',
1121                 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/8KVIDEO',
1122                 'description': '',
1123                 'uploader': '8KVIDEO',
1124                 'title': 'UHDTV TEST 8K VIDEO.mp4'
1125             },
1126             'params': {
1127                 'youtube_include_dash_manifest': True,
1128                 'format': '141',
1129             },
1130             'skip': 'format 141 not served anymore',
1131         },
1132         # DASH manifest with encrypted signature
1133         {
1134             'url': 'https://www.youtube.com/watch?v=IB3lcPjvWLA',
1135             'info_dict': {
1136                 'id': 'IB3lcPjvWLA',
1137                 'ext': 'm4a',
1138                 'title': 'Afrojack, Spree Wilson - The Spark (Official Music Video) ft. Spree Wilson',
1139                 'description': 'md5:8f5e2b82460520b619ccac1f509d43bf',
1140                 'duration': 244,
1141                 'uploader': 'AfrojackVEVO',
1142                 'uploader_id': 'AfrojackVEVO',
1143                 'upload_date': '20131011',
1144                 'abr': 129.495,
1145             },
1146             'params': {
1147                 'youtube_include_dash_manifest': True,
1148                 'format': '141/bestaudio[ext=m4a]',
1149             },
1150         },
1151         # Normal age-gate video (embed allowed)
1152         {
1153             'url': 'https://youtube.com/watch?v=HtVdAasjOgU',
1154             'info_dict': {
1155                 'id': 'HtVdAasjOgU',
1156                 'ext': 'mp4',
1157                 'title': 'The Witcher 3: Wild Hunt - The Sword Of Destiny Trailer',
1158                 'description': r're:(?s).{100,}About the Game\n.*?The Witcher 3: Wild Hunt.{100,}',
1159                 'duration': 142,
1160                 'uploader': 'The Witcher',
1161                 'uploader_id': 'WitcherGame',
1162                 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/WitcherGame',
1163                 'upload_date': '20140605',
1164                 'age_limit': 18,
1165             },
1166         },
1167         # video_info is None (https://github.com/ytdl-org/youtube-dl/issues/4421)
1168         # YouTube Red ad is not captured for creator
1169         {
1170             'url': '__2ABJjxzNo',
1171             'info_dict': {
1172                 'id': '__2ABJjxzNo',
1173                 'ext': 'mp4',
1174                 'duration': 266,
1175                 'upload_date': '20100430',
1176                 'uploader_id': 'deadmau5',
1177                 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/deadmau5',
1178                 'creator': 'deadmau5',
1179                 'description': 'md5:6cbcd3a92ce1bc676fc4d6ab4ace2336',
1180                 'uploader': 'deadmau5',
1181                 'title': 'Deadmau5 - Some Chords (HD)',
1182                 'alt_title': 'Some Chords',
1183             },
1184             'expected_warnings': [
1185                 'DASH manifest missing',
1186             ]
1187         },
1188         # Olympics (https://github.com/ytdl-org/youtube-dl/issues/4431)
1189         {
1190             'url': 'lqQg6PlCWgI',
1191             'info_dict': {
1192                 'id': 'lqQg6PlCWgI',
1193                 'ext': 'mp4',
1194                 'duration': 6085,
1195                 'upload_date': '20150827',
1196                 'uploader_id': 'olympic',
1197                 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/olympic',
1198                 'description': 'HO09  - Women -  GER-AUS - Hockey - 31 July 2012 - London 2012 Olympic Games',
1199                 'uploader': 'Olympics',
1200                 'title': 'Hockey - Women -  GER-AUS - London 2012 Olympic Games',
1201             },
1202             'params': {
1203                 'skip_download': 'requires avconv',
1204             }
1205         },
1206         # Non-square pixels
1207         {
1208             'url': 'https://www.youtube.com/watch?v=_b-2C3KPAM0',
1209             'info_dict': {
1210                 'id': '_b-2C3KPAM0',
1211                 'ext': 'mp4',
1212                 'stretched_ratio': 16 / 9.,
1213                 'duration': 85,
1214                 'upload_date': '20110310',
1215                 'uploader_id': 'AllenMeow',
1216                 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/AllenMeow',
1217                 'description': 'made by Wacom from Korea | 字幕&加油添醋 by TY\'s Allen | 感謝heylisa00cavey1001同學熱情提供梗及翻譯',
1218                 'uploader': '孫ᄋᄅ',
1219                 'title': '[A-made] 變態妍字幕版 太妍 我就是這樣的人',
1220             },
1221         },
1222         # url_encoded_fmt_stream_map is empty string
1223         {
1224             'url': 'qEJwOuvDf7I',
1225             'info_dict': {
1226                 'id': 'qEJwOuvDf7I',
1227                 'ext': 'webm',
1228                 'title': 'Обсуждение судебной практики по выборам 14 сентября 2014 года в Санкт-Петербурге',
1229                 'description': '',
1230                 'upload_date': '20150404',
1231                 'uploader_id': 'spbelect',
1232                 'uploader': 'Наблюдатели Петербурга',
1233             },
1234             'params': {
1235                 'skip_download': 'requires avconv',
1236             },
1237             'skip': 'This live event has ended.',
1238         },
1239         # Extraction from multiple DASH manifests (https://github.com/ytdl-org/youtube-dl/pull/6097)
1240         {
1241             'url': 'https://www.youtube.com/watch?v=FIl7x6_3R5Y',
1242             'info_dict': {
1243                 'id': 'FIl7x6_3R5Y',
1244                 'ext': 'webm',
1245                 'title': 'md5:7b81415841e02ecd4313668cde88737a',
1246                 'description': 'md5:116377fd2963b81ec4ce64b542173306',
1247                 'duration': 220,
1248                 'upload_date': '20150625',
1249                 'uploader_id': 'dorappi2000',
1250                 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/dorappi2000',
1251                 'uploader': 'dorappi2000',
1252                 'formats': 'mincount:31',
1253             },
1254             'skip': 'not actual anymore',
1255         },
1256         # DASH manifest with segment_list
1257         {
1258             'url': 'https://www.youtube.com/embed/CsmdDsKjzN8',
1259             'md5': '8ce563a1d667b599d21064e982ab9e31',
1260             'info_dict': {
1261                 'id': 'CsmdDsKjzN8',
1262                 'ext': 'mp4',
1263                 'upload_date': '20150501',  # According to '<meta itemprop="datePublished"', but in other places it's 20150510
1264                 'uploader': 'Airtek',
1265                 'description': 'Retransmisión en directo de la XVIII media maratón de Zaragoza.',
1266                 'uploader_id': 'UCzTzUmjXxxacNnL8I3m4LnQ',
1267                 'title': 'Retransmisión XVIII Media maratón Zaragoza 2015',
1268             },
1269             'params': {
1270                 'youtube_include_dash_manifest': True,
1271                 'format': '135',  # bestvideo
1272             },
1273             'skip': 'This live event has ended.',
1274         },
1275         {
1276             # Multifeed videos (multiple cameras), URL is for Main Camera
1277             'url': 'https://www.youtube.com/watch?v=jvGDaLqkpTg',
1278             'info_dict': {
1279                 'id': 'jvGDaLqkpTg',
1280                 'title': 'Tom Clancy Free Weekend Rainbow Whatever',
1281                 'description': 'md5:e03b909557865076822aa169218d6a5d',
1282             },
1283             'playlist': [{
1284                 'info_dict': {
1285                     'id': 'jvGDaLqkpTg',
1286                     'ext': 'mp4',
1287                     'title': 'Tom Clancy Free Weekend Rainbow Whatever (Main Camera)',
1288                     'description': 'md5:e03b909557865076822aa169218d6a5d',
1289                     'duration': 10643,
1290                     'upload_date': '20161111',
1291                     'uploader': 'Team PGP',
1292                     'uploader_id': 'UChORY56LMMETTuGjXaJXvLg',
1293                     'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UChORY56LMMETTuGjXaJXvLg',
1294                 },
1295             }, {
1296                 'info_dict': {
1297                     'id': '3AKt1R1aDnw',
1298                     'ext': 'mp4',
1299                     'title': 'Tom Clancy Free Weekend Rainbow Whatever (Camera 2)',
1300                     'description': 'md5:e03b909557865076822aa169218d6a5d',
1301                     'duration': 10991,
1302                     'upload_date': '20161111',
1303                     'uploader': 'Team PGP',
1304                     'uploader_id': 'UChORY56LMMETTuGjXaJXvLg',
1305                     'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UChORY56LMMETTuGjXaJXvLg',
1306                 },
1307             }, {
1308                 'info_dict': {
1309                     'id': 'RtAMM00gpVc',
1310                     'ext': 'mp4',
1311                     'title': 'Tom Clancy Free Weekend Rainbow Whatever (Camera 3)',
1312                     'description': 'md5:e03b909557865076822aa169218d6a5d',
1313                     'duration': 10995,
1314                     'upload_date': '20161111',
1315                     'uploader': 'Team PGP',
1316                     'uploader_id': 'UChORY56LMMETTuGjXaJXvLg',
1317                     'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UChORY56LMMETTuGjXaJXvLg',
1318                 },
1319             }, {
1320                 'info_dict': {
1321                     'id': '6N2fdlP3C5U',
1322                     'ext': 'mp4',
1323                     'title': 'Tom Clancy Free Weekend Rainbow Whatever (Camera 4)',
1324                     'description': 'md5:e03b909557865076822aa169218d6a5d',
1325                     'duration': 10990,
1326                     'upload_date': '20161111',
1327                     'uploader': 'Team PGP',
1328                     'uploader_id': 'UChORY56LMMETTuGjXaJXvLg',
1329                     'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UChORY56LMMETTuGjXaJXvLg',
1330                 },
1331             }],
1332             'params': {
1333                 'skip_download': True,
1334             },
1335         },
1336         {
1337             # Multifeed video with comma in title (see https://github.com/ytdl-org/youtube-dl/issues/8536)
1338             'url': 'https://www.youtube.com/watch?v=gVfLd0zydlo',
1339             'info_dict': {
1340                 'id': 'gVfLd0zydlo',
1341                 'title': 'DevConf.cz 2016 Day 2 Workshops 1 14:00 - 15:30',
1342             },
1343             'playlist_count': 2,
1344             'skip': 'Not multifeed anymore',
1345         },
1346         {
1347             'url': 'https://vid.plus/FlRa-iH7PGw',
1348             'only_matching': True,
1349         },
1350         {
1351             'url': 'https://zwearz.com/watch/9lWxNJF-ufM/electra-woman-dyna-girl-official-trailer-grace-helbig.html',
1352             'only_matching': True,
1353         },
1354         {
1355             # Title with JS-like syntax "};" (see https://github.com/ytdl-org/youtube-dl/issues/7468)
1356             # Also tests cut-off URL expansion in video description (see
1357             # https://github.com/ytdl-org/youtube-dl/issues/1892,
1358             # https://github.com/ytdl-org/youtube-dl/issues/8164)
1359             'url': 'https://www.youtube.com/watch?v=lsguqyKfVQg',
1360             'info_dict': {
1361                 'id': 'lsguqyKfVQg',
1362                 'ext': 'mp4',
1363                 'title': '{dark walk}; Loki/AC/Dishonored; collab w/Elflover21',
1364                 'alt_title': 'Dark Walk',
1365                 'description': 'md5:8085699c11dc3f597ce0410b0dcbb34a',
1366                 'duration': 133,
1367                 'upload_date': '20151119',
1368                 'uploader_id': 'IronSoulElf',
1369                 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/IronSoulElf',
1370                 'uploader': 'IronSoulElf',
1371                 'creator': 'Todd Haberman;\nDaniel Law Heath and Aaron Kaplan',
1372                 'track': 'Dark Walk',
1373                 'artist': 'Todd Haberman;\nDaniel Law Heath and Aaron Kaplan',
1374                 'album': 'Position Music - Production Music Vol. 143 - Dark Walk',
1375             },
1376             'params': {
1377                 'skip_download': True,
1378             },
1379         },
1380         {
1381             # Tags with '};' (see https://github.com/ytdl-org/youtube-dl/issues/7468)
1382             'url': 'https://www.youtube.com/watch?v=Ms7iBXnlUO8',
1383             'only_matching': True,
1384         },
1385         {
1386             # Video with yt:stretch=17:0
1387             'url': 'https://www.youtube.com/watch?v=Q39EVAstoRM',
1388             'info_dict': {
1389                 'id': 'Q39EVAstoRM',
1390                 'ext': 'mp4',
1391                 'title': 'Clash Of Clans#14 Dicas De Ataque Para CV 4',
1392                 'description': 'md5:ee18a25c350637c8faff806845bddee9',
1393                 'upload_date': '20151107',
1394                 'uploader_id': 'UCCr7TALkRbo3EtFzETQF1LA',
1395                 'uploader': 'CH GAMER DROID',
1396             },
1397             'params': {
1398                 'skip_download': True,
1399             },
1400             'skip': 'This video does not exist.',
1401         },
1402         {
1403             # Video with incomplete 'yt:stretch=16:'
1404             'url': 'https://www.youtube.com/watch?v=FRhJzUSJbGI',
1405             'only_matching': True,
1406         },
1407         {
1408             # Video licensed under Creative Commons
1409             'url': 'https://www.youtube.com/watch?v=M4gD1WSo5mA',
1410             'info_dict': {
1411                 'id': 'M4gD1WSo5mA',
1412                 'ext': 'mp4',
1413                 'title': 'md5:e41008789470fc2533a3252216f1c1d1',
1414                 'description': 'md5:a677553cf0840649b731a3024aeff4cc',
1415                 'duration': 721,
1416                 'upload_date': '20150127',
1417                 'uploader_id': 'BerkmanCenter',
1418                 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/BerkmanCenter',
1419                 'uploader': 'The Berkman Klein Center for Internet & Society',
1420                 'license': 'Creative Commons Attribution license (reuse allowed)',
1421             },
1422             'params': {
1423                 'skip_download': True,
1424             },
1425         },
1426         {
1427             # Channel-like uploader_url
1428             'url': 'https://www.youtube.com/watch?v=eQcmzGIKrzg',
1429             'info_dict': {
1430                 'id': 'eQcmzGIKrzg',
1431                 'ext': 'mp4',
1432                 'title': 'Democratic Socialism and Foreign Policy | Bernie Sanders',
1433                 'description': 'md5:13a2503d7b5904ef4b223aa101628f39',
1434                 'duration': 4060,
1435                 'upload_date': '20151119',
1436                 'uploader': 'Bernie Sanders',
1437                 'uploader_id': 'UCH1dpzjCEiGAt8CXkryhkZg',
1438                 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UCH1dpzjCEiGAt8CXkryhkZg',
1439                 'license': 'Creative Commons Attribution license (reuse allowed)',
1440             },
1441             'params': {
1442                 'skip_download': True,
1443             },
1444         },
1445         {
1446             'url': 'https://www.youtube.com/watch?feature=player_embedded&amp;amp;v=V36LpHqtcDY',
1447             'only_matching': True,
1448         },
1449         {
1450             # YouTube Red paid video (https://github.com/ytdl-org/youtube-dl/issues/10059)
1451             'url': 'https://www.youtube.com/watch?v=i1Ko8UG-Tdo',
1452             'only_matching': True,
1453         },
1454         {
1455             # Rental video preview
1456             'url': 'https://www.youtube.com/watch?v=yYr8q0y5Jfg',
1457             'info_dict': {
1458                 'id': 'uGpuVWrhIzE',
1459                 'ext': 'mp4',
1460                 'title': 'Piku - Trailer',
1461                 'description': 'md5:c36bd60c3fd6f1954086c083c72092eb',
1462                 'upload_date': '20150811',
1463                 'uploader': 'FlixMatrix',
1464                 'uploader_id': 'FlixMatrixKaravan',
1465                 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/FlixMatrixKaravan',
1466                 'license': 'Standard YouTube License',
1467             },
1468             'params': {
1469                 'skip_download': True,
1470             },
1471             'skip': 'This video is not available.',
1472         },
1473         {
1474             # YouTube Red video with episode data
1475             'url': 'https://www.youtube.com/watch?v=iqKdEhx-dD4',
1476             'info_dict': {
1477                 'id': 'iqKdEhx-dD4',
1478                 'ext': 'mp4',
1479                 'title': 'Isolation - Mind Field (Ep 1)',
1480                 'description': 'md5:f540112edec5d09fc8cc752d3d4ba3cd',
1481                 'duration': 2085,
1482                 'upload_date': '20170118',
1483                 'uploader': 'Vsauce',
1484                 'uploader_id': 'Vsauce',
1485                 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/Vsauce',
1486                 'series': 'Mind Field',
1487                 'season_number': 1,
1488                 'episode_number': 1,
1489             },
1490             'params': {
1491                 'skip_download': True,
1492             },
1493             'expected_warnings': [
1494                 'Skipping DASH manifest',
1495             ],
1496         },
1497         {
1498             # The following content has been identified by the YouTube community
1499             # as inappropriate or offensive to some audiences.
1500             'url': 'https://www.youtube.com/watch?v=6SJNVb0GnPI',
1501             'info_dict': {
1502                 'id': '6SJNVb0GnPI',
1503                 'ext': 'mp4',
1504                 'title': 'Race Differences in Intelligence',
1505                 'description': 'md5:5d161533167390427a1f8ee89a1fc6f1',
1506                 'duration': 965,
1507                 'upload_date': '20140124',
1508                 'uploader': 'New Century Foundation',
1509                 'uploader_id': 'UCEJYpZGqgUob0zVVEaLhvVg',
1510                 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UCEJYpZGqgUob0zVVEaLhvVg',
1511             },
1512             'params': {
1513                 'skip_download': True,
1514             },
1515             'skip': 'This video has been removed for violating YouTube\'s policy on hate speech.',
1516         },
1517         {
1518             # itag 212
1519             'url': '1t24XAntNCY',
1520             'only_matching': True,
1521         },
1522         {
1523             # geo restricted to JP
1524             'url': 'sJL6WA-aGkQ',
1525             'only_matching': True,
1526         },
1527         {
1528             'url': 'https://invidio.us/watch?v=BaW_jenozKc',
1529             'only_matching': True,
1530         },
1531         {
1532             'url': 'https://redirect.invidious.io/watch?v=BaW_jenozKc',
1533             'only_matching': True,
1534         },
1535         {
1536             # from https://nitter.pussthecat.org/YouTube/status/1360363141947944964#m
1537             'url': 'https://redirect.invidious.io/Yh0AhrY9GjA',
1538             'only_matching': True,
1539         },
1540         {
1541             # DRM protected
1542             'url': 'https://www.youtube.com/watch?v=s7_qI6_mIXc',
1543             'only_matching': True,
1544         },
1545         {
1546             # Video with unsupported adaptive stream type formats
1547             'url': 'https://www.youtube.com/watch?v=Z4Vy8R84T1U',
1548             'info_dict': {
1549                 'id': 'Z4Vy8R84T1U',
1550                 'ext': 'mp4',
1551                 'title': 'saman SMAN 53 Jakarta(Sancety) opening COFFEE4th at SMAN 53 Jakarta',
1552                 'description': 'md5:d41d8cd98f00b204e9800998ecf8427e',
1553                 'duration': 433,
1554                 'upload_date': '20130923',
1555                 'uploader': 'Amelia Putri Harwita',
1556                 'uploader_id': 'UCpOxM49HJxmC1qCalXyB3_Q',
1557                 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UCpOxM49HJxmC1qCalXyB3_Q',
1558                 'formats': 'maxcount:10',
1559             },
1560             'params': {
1561                 'skip_download': True,
1562                 'youtube_include_dash_manifest': False,
1563             },
1564             'skip': 'not actual anymore',
1565         },
1566         {
1567             # Youtube Music Auto-generated description
1568             'url': 'https://music.youtube.com/watch?v=MgNrAu2pzNs',
1569             'info_dict': {
1570                 'id': 'MgNrAu2pzNs',
1571                 'ext': 'mp4',
1572                 'title': 'Voyeur Girl',
1573                 'description': 'md5:7ae382a65843d6df2685993e90a8628f',
1574                 'upload_date': '20190312',
1575                 'uploader': 'Stephen - Topic',
1576                 'uploader_id': 'UC-pWHpBjdGG69N9mM2auIAA',
1577                 'artist': 'Stephen',
1578                 'track': 'Voyeur Girl',
1579                 'album': 'it\'s too much love to know my dear',
1580                 'release_date': '20190313',
1581                 'release_year': 2019,
1582             },
1583             'params': {
1584                 'skip_download': True,
1585             },
1586         },
1587         {
1588             'url': 'https://www.youtubekids.com/watch?v=3b8nCWDgZ6Q',
1589             'only_matching': True,
1590         },
1591         {
1592             # invalid -> valid video id redirection
1593             'url': 'DJztXj2GPfl',
1594             'info_dict': {
1595                 'id': 'DJztXj2GPfk',
1596                 'ext': 'mp4',
1597                 'title': 'Panjabi MC - Mundian To Bach Ke (The Dictator Soundtrack)',
1598                 'description': 'md5:bf577a41da97918e94fa9798d9228825',
1599                 'upload_date': '20090125',
1600                 'uploader': 'Prochorowka',
1601                 'uploader_id': 'Prochorowka',
1602                 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/Prochorowka',
1603                 'artist': 'Panjabi MC',
1604                 'track': 'Beware of the Boys (Mundian to Bach Ke) - Motivo Hi-Lectro Remix',
1605                 'album': 'Beware of the Boys (Mundian To Bach Ke)',
1606             },
1607             'params': {
1608                 'skip_download': True,
1609             },
1610             'skip': 'Video unavailable',
1611         },
1612         {
1613             # empty description results in an empty string
1614             'url': 'https://www.youtube.com/watch?v=x41yOUIvK2k',
1615             'info_dict': {
1616                 'id': 'x41yOUIvK2k',
1617                 'ext': 'mp4',
1618                 'title': 'IMG 3456',
1619                 'description': '',
1620                 'upload_date': '20170613',
1621                 'uploader_id': 'ElevageOrVert',
1622                 'uploader': 'ElevageOrVert',
1623             },
1624             'params': {
1625                 'skip_download': True,
1626             },
1627         },
1628         {
1629             # with '};' inside yt initial data (see [1])
1630             # see [2] for an example with '};' inside ytInitialPlayerResponse
1631             # 1. https://github.com/ytdl-org/youtube-dl/issues/27093
1632             # 2. https://github.com/ytdl-org/youtube-dl/issues/27216
1633             'url': 'https://www.youtube.com/watch?v=CHqg6qOn4no',
1634             'info_dict': {
1635                 'id': 'CHqg6qOn4no',
1636                 'ext': 'mp4',
1637                 'title': 'Part 77   Sort a list of simple types in c#',
1638                 'description': 'md5:b8746fa52e10cdbf47997903f13b20dc',
1639                 'upload_date': '20130831',
1640                 'uploader_id': 'kudvenkat',
1641                 'uploader': 'kudvenkat',
1642             },
1643             'params': {
1644                 'skip_download': True,
1645             },
1646         },
1647         {
1648             # another example of '};' in ytInitialData
1649             'url': 'https://www.youtube.com/watch?v=gVfgbahppCY',
1650             'only_matching': True,
1651         },
1652         {
1653             'url': 'https://www.youtube.com/watch_popup?v=63RmMXCd_bQ',
1654             'only_matching': True,
1655         },
1656         {
1657             # https://github.com/ytdl-org/youtube-dl/pull/28094
1658             'url': 'OtqTfy26tG0',
1659             'info_dict': {
1660                 'id': 'OtqTfy26tG0',
1661                 'ext': 'mp4',
1662                 'title': 'Burn Out',
1663                 'description': 'md5:8d07b84dcbcbfb34bc12a56d968b6131',
1664                 'upload_date': '20141120',
1665                 'uploader': 'The Cinematic Orchestra - Topic',
1666                 'uploader_id': 'UCIzsJBIyo8hhpFm1NK0uLgw',
1667                 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UCIzsJBIyo8hhpFm1NK0uLgw',
1668                 'artist': 'The Cinematic Orchestra',
1669                 'track': 'Burn Out',
1670                 'album': 'Every Day',
1671                 'release_data': None,
1672                 'release_year': None,
1673             },
1674             'params': {
1675                 'skip_download': True,
1676             },
1677         },
1678         {
1679             # controversial video, only works with bpctr when authenticated with cookies
1680             'url': 'https://www.youtube.com/watch?v=nGC3D_FkCmg',
1681             'only_matching': True,
1682         },
1683         {
1684             # controversial video, requires bpctr/contentCheckOk
1685             'url': 'https://www.youtube.com/watch?v=SZJvDhaSDnc',
1686             'info_dict': {
1687                 'id': 'SZJvDhaSDnc',
1688                 'ext': 'mp4',
1689                 'title': 'San Diego teen commits suicide after bullying over embarrassing video',
1690                 'channel_id': 'UC-SJ6nODDmufqBzPBwCvYvQ',
1691                 'uploader': 'CBS This Morning',
1692                 'uploader_id': 'CBSThisMorning',
1693                 'upload_date': '20140716',
1694                 'description': 'md5:acde3a73d3f133fc97e837a9f76b53b7'
1695             }
1696         },
1697         {
1698             # restricted location, https://github.com/ytdl-org/youtube-dl/issues/28685
1699             'url': 'cBvYw8_A0vQ',
1700             'info_dict': {
1701                 'id': 'cBvYw8_A0vQ',
1702                 'ext': 'mp4',
1703                 'title': '4K Ueno Okachimachi  Street  Scenes  上野御徒町歩き',
1704                 'description': 'md5:ea770e474b7cd6722b4c95b833c03630',
1705                 'upload_date': '20201120',
1706                 'uploader': 'Walk around Japan',
1707                 'uploader_id': 'UC3o_t8PzBmXf5S9b7GLx1Mw',
1708                 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UC3o_t8PzBmXf5S9b7GLx1Mw',
1709             },
1710             'params': {
1711                 'skip_download': True,
1712             },
1713         }, {
1714             # Has multiple audio streams
1715             'url': 'WaOKSUlf4TM',
1716             'only_matching': True
1717         }, {
1718             # Requires Premium: has format 141 when requested using YTM url
1719             'url': 'https://music.youtube.com/watch?v=XclachpHxis',
1720             'only_matching': True
1721         }, {
1722             # multiple subtitles with same lang_code
1723             'url': 'https://www.youtube.com/watch?v=wsQiKKfKxug',
1724             'only_matching': True,
1725         }, {
1726             # Force use android client fallback
1727             'url': 'https://www.youtube.com/watch?v=YOelRv7fMxY',
1728             'info_dict': {
1729                 'id': 'YOelRv7fMxY',
1730                 'title': 'DIGGING A SECRET TUNNEL Part 1',
1731                 'ext': '3gp',
1732                 'upload_date': '20210624',
1733                 'channel_id': 'UCp68_FLety0O-n9QU6phsgw',
1734                 'uploader': 'colinfurze',
1735                 'uploader_id': 'colinfurze',
1736                 'channel_url': r're:https?://(?:www\.)?youtube\.com/channel/UCp68_FLety0O-n9QU6phsgw',
1737                 'description': 'md5:b5096f56af7ccd7a555c84db81738b22'
1738             },
1739             'params': {
1740                 'format': '17',  # 3gp format available on android
1741                 'extractor_args': {'youtube': {'player_client': ['android']}},
1742             },
1743         },
1744         {
1745             # Skip download of additional client configs (remix client config in this case)
1746             'url': 'https://music.youtube.com/watch?v=MgNrAu2pzNs',
1747             'only_matching': True,
1748             'params': {
1749                 'extractor_args': {'youtube': {'player_skip': ['configs']}},
1750             },
1751         }
1752     ]
1753
1754     @classmethod
1755     def suitable(cls, url):
1756         # Hack for lazy extractors until more generic solution is implemented
1757         # (see #28780)
1758         from .youtube import parse_qs
1759         qs = parse_qs(url)
1760         if qs.get('list', [None])[0]:
1761             return False
1762         return super(YoutubeIE, cls).suitable(url)
1763
1764     def __init__(self, *args, **kwargs):
1765         super(YoutubeIE, self).__init__(*args, **kwargs)
1766         self._code_cache = {}
1767         self._player_cache = {}
1768
1769     def _extract_player_url(self, ytcfg=None, webpage=None):
1770         player_url = try_get(ytcfg, (lambda x: x['PLAYER_JS_URL']), str)
1771         if not player_url and webpage:
1772             player_url = self._search_regex(
1773                 r'"(?:PLAYER_JS_URL|jsUrl)"\s*:\s*"([^"]+)"',
1774                 webpage, 'player URL', fatal=False)
1775         if not player_url:
1776             return None
1777         if player_url.startswith('//'):
1778             player_url = 'https:' + player_url
1779         elif not re.match(r'https?://', player_url):
1780             player_url = compat_urlparse.urljoin(
1781                 'https://www.youtube.com', player_url)
1782         return player_url
1783
1784     def _signature_cache_id(self, example_sig):
1785         """ Return a string representation of a signature """
1786         return '.'.join(compat_str(len(part)) for part in example_sig.split('.'))
1787
1788     @classmethod
1789     def _extract_player_info(cls, player_url):
1790         for player_re in cls._PLAYER_INFO_RE:
1791             id_m = re.search(player_re, player_url)
1792             if id_m:
1793                 break
1794         else:
1795             raise ExtractorError('Cannot identify player %r' % player_url)
1796         return id_m.group('id')
1797
1798     def _load_player(self, video_id, player_url, fatal=True) -> bool:
1799         player_id = self._extract_player_info(player_url)
1800         if player_id not in self._code_cache:
1801             self._code_cache[player_id] = self._download_webpage(
1802                 player_url, video_id, fatal=fatal,
1803                 note='Downloading player ' + player_id,
1804                 errnote='Download of %s failed' % player_url)
1805         return player_id in self._code_cache
1806
1807     def _extract_signature_function(self, video_id, player_url, example_sig):
1808         player_id = self._extract_player_info(player_url)
1809
1810         # Read from filesystem cache
1811         func_id = 'js_%s_%s' % (
1812             player_id, self._signature_cache_id(example_sig))
1813         assert os.path.basename(func_id) == func_id
1814
1815         cache_spec = self._downloader.cache.load('youtube-sigfuncs', func_id)
1816         if cache_spec is not None:
1817             return lambda s: ''.join(s[i] for i in cache_spec)
1818
1819         if self._load_player(video_id, player_url):
1820             code = self._code_cache[player_id]
1821             res = self._parse_sig_js(code)
1822
1823             test_string = ''.join(map(compat_chr, range(len(example_sig))))
1824             cache_res = res(test_string)
1825             cache_spec = [ord(c) for c in cache_res]
1826
1827             self._downloader.cache.store('youtube-sigfuncs', func_id, cache_spec)
1828             return res
1829
1830     def _print_sig_code(self, func, example_sig):
1831         def gen_sig_code(idxs):
1832             def _genslice(start, end, step):
1833                 starts = '' if start == 0 else str(start)
1834                 ends = (':%d' % (end + step)) if end + step >= 0 else ':'
1835                 steps = '' if step == 1 else (':%d' % step)
1836                 return 's[%s%s%s]' % (starts, ends, steps)
1837
1838             step = None
1839             # Quelch pyflakes warnings - start will be set when step is set
1840             start = '(Never used)'
1841             for i, prev in zip(idxs[1:], idxs[:-1]):
1842                 if step is not None:
1843                     if i - prev == step:
1844                         continue
1845                     yield _genslice(start, prev, step)
1846                     step = None
1847                     continue
1848                 if i - prev in [-1, 1]:
1849                     step = i - prev
1850                     start = prev
1851                     continue
1852                 else:
1853                     yield 's[%d]' % prev
1854             if step is None:
1855                 yield 's[%d]' % i
1856             else:
1857                 yield _genslice(start, i, step)
1858
1859         test_string = ''.join(map(compat_chr, range(len(example_sig))))
1860         cache_res = func(test_string)
1861         cache_spec = [ord(c) for c in cache_res]
1862         expr_code = ' + '.join(gen_sig_code(cache_spec))
1863         signature_id_tuple = '(%s)' % (
1864             ', '.join(compat_str(len(p)) for p in example_sig.split('.')))
1865         code = ('if tuple(len(p) for p in s.split(\'.\')) == %s:\n'
1866                 '    return %s\n') % (signature_id_tuple, expr_code)
1867         self.to_screen('Extracted signature function:\n' + code)
1868
1869     def _parse_sig_js(self, jscode):
1870         funcname = self._search_regex(
1871             (r'\b[cs]\s*&&\s*[adf]\.set\([^,]+\s*,\s*encodeURIComponent\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\(',
1872              r'\b[a-zA-Z0-9]+\s*&&\s*[a-zA-Z0-9]+\.set\([^,]+\s*,\s*encodeURIComponent\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\(',
1873              r'\bm=(?P<sig>[a-zA-Z0-9$]{2})\(decodeURIComponent\(h\.s\)\)',
1874              r'\bc&&\(c=(?P<sig>[a-zA-Z0-9$]{2})\(decodeURIComponent\(c\)\)',
1875              r'(?:\b|[^a-zA-Z0-9$])(?P<sig>[a-zA-Z0-9$]{2})\s*=\s*function\(\s*a\s*\)\s*{\s*a\s*=\s*a\.split\(\s*""\s*\);[a-zA-Z0-9$]{2}\.[a-zA-Z0-9$]{2}\(a,\d+\)',
1876              r'(?:\b|[^a-zA-Z0-9$])(?P<sig>[a-zA-Z0-9$]{2})\s*=\s*function\(\s*a\s*\)\s*{\s*a\s*=\s*a\.split\(\s*""\s*\)',
1877              r'(?P<sig>[a-zA-Z0-9$]+)\s*=\s*function\(\s*a\s*\)\s*{\s*a\s*=\s*a\.split\(\s*""\s*\)',
1878              # Obsolete patterns
1879              r'(["\'])signature\1\s*,\s*(?P<sig>[a-zA-Z0-9$]+)\(',
1880              r'\.sig\|\|(?P<sig>[a-zA-Z0-9$]+)\(',
1881              r'yt\.akamaized\.net/\)\s*\|\|\s*.*?\s*[cs]\s*&&\s*[adf]\.set\([^,]+\s*,\s*(?:encodeURIComponent\s*\()?\s*(?P<sig>[a-zA-Z0-9$]+)\(',
1882              r'\b[cs]\s*&&\s*[adf]\.set\([^,]+\s*,\s*(?P<sig>[a-zA-Z0-9$]+)\(',
1883              r'\b[a-zA-Z0-9]+\s*&&\s*[a-zA-Z0-9]+\.set\([^,]+\s*,\s*(?P<sig>[a-zA-Z0-9$]+)\(',
1884              r'\bc\s*&&\s*a\.set\([^,]+\s*,\s*\([^)]*\)\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\(',
1885              r'\bc\s*&&\s*[a-zA-Z0-9]+\.set\([^,]+\s*,\s*\([^)]*\)\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\(',
1886              r'\bc\s*&&\s*[a-zA-Z0-9]+\.set\([^,]+\s*,\s*\([^)]*\)\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\('),
1887             jscode, 'Initial JS player signature function name', group='sig')
1888
1889         jsi = JSInterpreter(jscode)
1890         initial_function = jsi.extract_function(funcname)
1891         return lambda s: initial_function([s])
1892
1893     def _decrypt_signature(self, s, video_id, player_url):
1894         """Turn the encrypted s field into a working signature"""
1895
1896         if player_url is None:
1897             raise ExtractorError('Cannot decrypt signature without player_url')
1898
1899         try:
1900             player_id = (player_url, self._signature_cache_id(s))
1901             if player_id not in self._player_cache:
1902                 func = self._extract_signature_function(
1903                     video_id, player_url, s
1904                 )
1905                 self._player_cache[player_id] = func
1906             func = self._player_cache[player_id]
1907             if self.get_param('youtube_print_sig_code'):
1908                 self._print_sig_code(func, s)
1909             return func(s)
1910         except Exception as e:
1911             tb = traceback.format_exc()
1912             raise ExtractorError(
1913                 'Signature extraction failed: ' + tb, cause=e)
1914
1915     def _extract_signature_timestamp(self, video_id, player_url, ytcfg=None, fatal=False):
1916         """
1917         Extract signatureTimestamp (sts)
1918         Required to tell API what sig/player version is in use.
1919         """
1920         sts = None
1921         if isinstance(ytcfg, dict):
1922             sts = int_or_none(ytcfg.get('STS'))
1923
1924         if not sts:
1925             # Attempt to extract from player
1926             if player_url is None:
1927                 error_msg = 'Cannot extract signature timestamp without player_url.'
1928                 if fatal:
1929                     raise ExtractorError(error_msg)
1930                 self.report_warning(error_msg)
1931                 return
1932             if self._load_player(video_id, player_url, fatal=fatal):
1933                 player_id = self._extract_player_info(player_url)
1934                 code = self._code_cache[player_id]
1935                 sts = int_or_none(self._search_regex(
1936                     r'(?:signatureTimestamp|sts)\s*:\s*(?P<sts>[0-9]{5})', code,
1937                     'JS player signature timestamp', group='sts', fatal=fatal))
1938         return sts
1939
1940     def _mark_watched(self, video_id, player_responses):
1941         playback_url = traverse_obj(
1942             player_responses, (..., 'playbackTracking', 'videostatsPlaybackUrl', 'baseUrl'),
1943             expected_type=url_or_none, get_all=False)
1944         if not playback_url:
1945             self.report_warning('Unable to mark watched')
1946             return
1947         parsed_playback_url = compat_urlparse.urlparse(playback_url)
1948         qs = compat_urlparse.parse_qs(parsed_playback_url.query)
1949
1950         # cpn generation algorithm is reverse engineered from base.js.
1951         # In fact it works even with dummy cpn.
1952         CPN_ALPHABET = 'abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789-_'
1953         cpn = ''.join((CPN_ALPHABET[random.randint(0, 256) & 63] for _ in range(0, 16)))
1954
1955         qs.update({
1956             'ver': ['2'],
1957             'cpn': [cpn],
1958         })
1959         playback_url = compat_urlparse.urlunparse(
1960             parsed_playback_url._replace(query=compat_urllib_parse_urlencode(qs, True)))
1961
1962         self._download_webpage(
1963             playback_url, video_id, 'Marking watched',
1964             'Unable to mark watched', fatal=False)
1965
1966     @staticmethod
1967     def _extract_urls(webpage):
1968         # Embedded YouTube player
1969         entries = [
1970             unescapeHTML(mobj.group('url'))
1971             for mobj in re.finditer(r'''(?x)
1972             (?:
1973                 <iframe[^>]+?src=|
1974                 data-video-url=|
1975                 <embed[^>]+?src=|
1976                 embedSWF\(?:\s*|
1977                 <object[^>]+data=|
1978                 new\s+SWFObject\(
1979             )
1980             (["\'])
1981                 (?P<url>(?:https?:)?//(?:www\.)?youtube(?:-nocookie)?\.com/
1982                 (?:embed|v|p)/[0-9A-Za-z_-]{11}.*?)
1983             \1''', webpage)]
1984
1985         # lazyYT YouTube embed
1986         entries.extend(list(map(
1987             unescapeHTML,
1988             re.findall(r'class="lazyYT" data-youtube-id="([^"]+)"', webpage))))
1989
1990         # Wordpress "YouTube Video Importer" plugin
1991         matches = re.findall(r'''(?x)<div[^>]+
1992             class=(?P<q1>[\'"])[^\'"]*\byvii_single_video_player\b[^\'"]*(?P=q1)[^>]+
1993             data-video_id=(?P<q2>[\'"])([^\'"]+)(?P=q2)''', webpage)
1994         entries.extend(m[-1] for m in matches)
1995
1996         return entries
1997
1998     @staticmethod
1999     def _extract_url(webpage):
2000         urls = YoutubeIE._extract_urls(webpage)
2001         return urls[0] if urls else None
2002
2003     @classmethod
2004     def extract_id(cls, url):
2005         mobj = re.match(cls._VALID_URL, url, re.VERBOSE)
2006         if mobj is None:
2007             raise ExtractorError('Invalid URL: %s' % url)
2008         video_id = mobj.group(2)
2009         return video_id
2010
2011     def _extract_chapters_from_json(self, data, duration):
2012         chapter_list = traverse_obj(
2013             data, (
2014                 'playerOverlays', 'playerOverlayRenderer', 'decoratedPlayerBarRenderer',
2015                 'decoratedPlayerBarRenderer', 'playerBar', 'chapteredPlayerBarRenderer', 'chapters'
2016             ), expected_type=list)
2017
2018         return self._extract_chapters(
2019             chapter_list,
2020             chapter_time=lambda chapter: float_or_none(
2021                 traverse_obj(chapter, ('chapterRenderer', 'timeRangeStartMillis')), scale=1000),
2022             chapter_title=lambda chapter: traverse_obj(
2023                 chapter, ('chapterRenderer', 'title', 'simpleText'), expected_type=str),
2024             duration=duration)
2025
2026     def _extract_chapters_from_engagement_panel(self, data, duration):
2027         content_list = traverse_obj(
2028             data,
2029             ('engagementPanels', ..., 'engagementPanelSectionListRenderer', 'content', 'macroMarkersListRenderer', 'contents'),
2030             expected_type=list, default=[])
2031         chapter_time = lambda chapter: parse_duration(self._get_text(chapter.get('timeDescription')))
2032         chapter_title = lambda chapter: self._get_text(chapter.get('title'))
2033
2034         return next((
2035             filter(None, (
2036                 self._extract_chapters(
2037                     traverse_obj(contents, (..., 'macroMarkersListItemRenderer')),
2038                     chapter_time, chapter_title, duration)
2039                 for contents in content_list
2040             ))), [])
2041
2042     def _extract_chapters(self, chapter_list, chapter_time, chapter_title, duration):
2043         chapters = []
2044         last_chapter = {'start_time': 0}
2045         for idx, chapter in enumerate(chapter_list or []):
2046             title = chapter_title(chapter)
2047             start_time = chapter_time(chapter)
2048             if start_time is None:
2049                 continue
2050             last_chapter['end_time'] = start_time
2051             if start_time < last_chapter['start_time']:
2052                 if idx == 1:
2053                     chapters.pop()
2054                     self.report_warning('Invalid start time for chapter "%s"' % last_chapter['title'])
2055                 else:
2056                     self.report_warning(f'Invalid start time for chapter "{title}"')
2057                     continue
2058             last_chapter = {'start_time': start_time, 'title': title}
2059             chapters.append(last_chapter)
2060         last_chapter['end_time'] = duration
2061         return chapters
2062
2063     def _extract_yt_initial_variable(self, webpage, regex, video_id, name):
2064         return self._parse_json(self._search_regex(
2065             (r'%s\s*%s' % (regex, self._YT_INITIAL_BOUNDARY_RE),
2066              regex), webpage, name, default='{}'), video_id, fatal=False)
2067
2068     @staticmethod
2069     def parse_time_text(time_text):
2070         """
2071         Parse the comment time text
2072         time_text is in the format 'X units ago (edited)'
2073         """
2074         time_text_split = time_text.split(' ')
2075         if len(time_text_split) >= 3:
2076             try:
2077                 return datetime_from_str('now-%s%s' % (time_text_split[0], time_text_split[1]), precision='auto')
2078             except ValueError:
2079                 return None
2080
2081     def _extract_comment(self, comment_renderer, parent=None):
2082         comment_id = comment_renderer.get('commentId')
2083         if not comment_id:
2084             return
2085
2086         text = self._get_text(comment_renderer.get('contentText'))
2087
2088         # note: timestamp is an estimate calculated from the current time and time_text
2089         time_text = self._get_text(comment_renderer.get('publishedTimeText')) or ''
2090         time_text_dt = self.parse_time_text(time_text)
2091         if isinstance(time_text_dt, datetime.datetime):
2092             timestamp = calendar.timegm(time_text_dt.timetuple())
2093         author = self._get_text(comment_renderer.get('authorText'))
2094         author_id = try_get(comment_renderer,
2095                             lambda x: x['authorEndpoint']['browseEndpoint']['browseId'], compat_str)
2096
2097         votes = parse_count(try_get(comment_renderer, (lambda x: x['voteCount']['simpleText'],
2098                                                        lambda x: x['likeCount']), compat_str)) or 0
2099         author_thumbnail = try_get(comment_renderer,
2100                                    lambda x: x['authorThumbnail']['thumbnails'][-1]['url'], compat_str)
2101
2102         author_is_uploader = try_get(comment_renderer, lambda x: x['authorIsChannelOwner'], bool)
2103         is_favorited = 'creatorHeart' in (try_get(
2104             comment_renderer, lambda x: x['actionButtons']['commentActionButtonsRenderer'], dict) or {})
2105         return {
2106             'id': comment_id,
2107             'text': text,
2108             'timestamp': timestamp,
2109             'time_text': time_text,
2110             'like_count': votes,
2111             'is_favorited': is_favorited,
2112             'author': author,
2113             'author_id': author_id,
2114             'author_thumbnail': author_thumbnail,
2115             'author_is_uploader': author_is_uploader,
2116             'parent': parent or 'root'
2117         }
2118
2119     def _comment_entries(self, root_continuation_data, identity_token, account_syncid,
2120                          ytcfg, video_id, parent=None, comment_counts=None):
2121
2122         def extract_header(contents):
2123             _total_comments = 0
2124             _continuation = None
2125             for content in contents:
2126                 comments_header_renderer = try_get(content, lambda x: x['commentsHeaderRenderer'])
2127                 expected_comment_count = parse_count(self._get_text(
2128                     comments_header_renderer, (lambda x: x['countText'], lambda x: x['commentsCount']), max_runs=1))
2129
2130                 if expected_comment_count:
2131                     comment_counts[1] = expected_comment_count
2132                     self.to_screen('Downloading ~%d comments' % expected_comment_count)
2133                     _total_comments = comment_counts[1]
2134                 sort_mode_str = self._configuration_arg('comment_sort', [''])[0]
2135                 comment_sort_index = int(sort_mode_str != 'top')  # 1 = new, 0 = top
2136
2137                 sort_menu_item = try_get(
2138                     comments_header_renderer,
2139                     lambda x: x['sortMenu']['sortFilterSubMenuRenderer']['subMenuItems'][comment_sort_index], dict) or {}
2140                 sort_continuation_ep = sort_menu_item.get('serviceEndpoint') or {}
2141
2142                 _continuation = self._extract_continuation_ep_data(sort_continuation_ep) or self._extract_continuation(sort_menu_item)
2143                 if not _continuation:
2144                     continue
2145
2146                 sort_text = sort_menu_item.get('title')
2147                 if isinstance(sort_text, compat_str):
2148                     sort_text = sort_text.lower()
2149                 else:
2150                     sort_text = 'top comments' if comment_sort_index == 0 else 'newest first'
2151                 self.to_screen('Sorting comments by %s' % sort_text)
2152                 break
2153             return _total_comments, _continuation
2154
2155         def extract_thread(contents):
2156             if not parent:
2157                 comment_counts[2] = 0
2158             for content in contents:
2159                 comment_thread_renderer = try_get(content, lambda x: x['commentThreadRenderer'])
2160                 comment_renderer = try_get(
2161                     comment_thread_renderer, (lambda x: x['comment']['commentRenderer'], dict)) or try_get(
2162                     content, (lambda x: x['commentRenderer'], dict))
2163
2164                 if not comment_renderer:
2165                     continue
2166                 comment = self._extract_comment(comment_renderer, parent)
2167                 if not comment:
2168                     continue
2169                 comment_counts[0] += 1
2170                 yield comment
2171                 # Attempt to get the replies
2172                 comment_replies_renderer = try_get(
2173                     comment_thread_renderer, lambda x: x['replies']['commentRepliesRenderer'], dict)
2174
2175                 if comment_replies_renderer:
2176                     comment_counts[2] += 1
2177                     comment_entries_iter = self._comment_entries(
2178                         comment_replies_renderer, identity_token, account_syncid, ytcfg,
2179                         video_id, parent=comment.get('id'), comment_counts=comment_counts)
2180
2181                     for reply_comment in comment_entries_iter:
2182                         yield reply_comment
2183
2184         # YouTube comments have a max depth of 2
2185         max_depth = int_or_none(self._configuration_arg('max_comment_depth', [''])[0]) or float('inf')
2186         if max_depth == 1 and parent:
2187             return
2188         if not comment_counts:
2189             # comment so far, est. total comments, current comment thread #
2190             comment_counts = [0, 0, 0]
2191
2192         continuation = self._extract_continuation(root_continuation_data)
2193         if continuation and len(continuation['continuation']) < 27:
2194             self.write_debug('Detected old API continuation token. Generating new API compatible token.')
2195             continuation_token = self._generate_comment_continuation(video_id)
2196             continuation = self._build_api_continuation_query(continuation_token, None)
2197
2198         visitor_data = None
2199         is_first_continuation = parent is None
2200
2201         for page_num in itertools.count(0):
2202             if not continuation:
2203                 break
2204             headers = self.generate_api_headers(ytcfg, identity_token, account_syncid, visitor_data)
2205             comment_prog_str = '(%d/%d)' % (comment_counts[0], comment_counts[1])
2206             if page_num == 0:
2207                 if is_first_continuation:
2208                     note_prefix = 'Downloading comment section API JSON'
2209                 else:
2210                     note_prefix = '    Downloading comment API JSON reply thread %d %s' % (
2211                         comment_counts[2], comment_prog_str)
2212             else:
2213                 note_prefix = '%sDownloading comment%s API JSON page %d %s' % (
2214                     '       ' if parent else '', ' replies' if parent else '',
2215                     page_num, comment_prog_str)
2216
2217             response = self._extract_response(
2218                 item_id=None, query=continuation,
2219                 ep='next', ytcfg=ytcfg, headers=headers, note=note_prefix,
2220                 check_get_keys=('onResponseReceivedEndpoints', 'continuationContents'))
2221             if not response:
2222                 break
2223             visitor_data = try_get(
2224                 response,
2225                 lambda x: x['responseContext']['webResponseContextExtensionData']['ytConfigData']['visitorData'],
2226                 compat_str) or visitor_data
2227
2228             continuation_contents = dict_get(response, ('onResponseReceivedEndpoints', 'continuationContents'))
2229
2230             continuation = None
2231             if isinstance(continuation_contents, list):
2232                 for continuation_section in continuation_contents:
2233                     if not isinstance(continuation_section, dict):
2234                         continue
2235                     continuation_items = try_get(
2236                         continuation_section,
2237                         (lambda x: x['reloadContinuationItemsCommand']['continuationItems'],
2238                          lambda x: x['appendContinuationItemsAction']['continuationItems']),
2239                         list) or []
2240                     if is_first_continuation:
2241                         total_comments, continuation = extract_header(continuation_items)
2242                         if total_comments:
2243                             yield total_comments
2244                         is_first_continuation = False
2245                         if continuation:
2246                             break
2247                         continue
2248                     count = 0
2249                     for count, entry in enumerate(extract_thread(continuation_items)):
2250                         yield entry
2251                     continuation = self._extract_continuation({'contents': continuation_items})
2252                     if continuation:
2253                         # Sometimes YouTube provides a continuation without any comments
2254                         # In most cases we end up just downloading these with very little comments to come.
2255                         if count == 0:
2256                             if not parent:
2257                                 self.report_warning('No comments received - assuming end of comments')
2258                             continuation = None
2259                         break
2260
2261             # Deprecated response structure
2262             elif isinstance(continuation_contents, dict):
2263                 known_continuation_renderers = ('itemSectionContinuation', 'commentRepliesContinuation')
2264                 for key, continuation_renderer in continuation_contents.items():
2265                     if key not in known_continuation_renderers:
2266                         continue
2267                     if not isinstance(continuation_renderer, dict):
2268                         continue
2269                     if is_first_continuation:
2270                         header_continuation_items = [continuation_renderer.get('header') or {}]
2271                         total_comments, continuation = extract_header(header_continuation_items)
2272                         if total_comments:
2273                             yield total_comments
2274                         is_first_continuation = False
2275                         if continuation:
2276                             break
2277
2278                     # Sometimes YouTube provides a continuation without any comments
2279                     # In most cases we end up just downloading these with very little comments to come.
2280                     count = 0
2281                     for count, entry in enumerate(extract_thread(continuation_renderer.get('contents') or {})):
2282                         yield entry
2283                     continuation = self._extract_continuation(continuation_renderer)
2284                     if count == 0:
2285                         if not parent:
2286                             self.report_warning('No comments received - assuming end of comments')
2287                         continuation = None
2288                     break
2289
2290     @staticmethod
2291     def _generate_comment_continuation(video_id):
2292         """
2293         Generates initial comment section continuation token from given video id
2294         """
2295         b64_vid_id = base64.b64encode(bytes(video_id.encode('utf-8')))
2296         parts = ('Eg0SCw==', b64_vid_id, 'GAYyJyIRIgs=', b64_vid_id, 'MAB4AjAAQhBjb21tZW50cy1zZWN0aW9u')
2297         new_continuation_intlist = list(itertools.chain.from_iterable(
2298             [bytes_to_intlist(base64.b64decode(part)) for part in parts]))
2299         return base64.b64encode(intlist_to_bytes(new_continuation_intlist)).decode('utf-8')
2300
2301     def _extract_comments(self, ytcfg, video_id, contents, webpage):
2302         """Entry for comment extraction"""
2303         def _real_comment_extract(contents):
2304             if isinstance(contents, list):
2305                 for entry in contents:
2306                     for key, renderer in entry.items():
2307                         if key not in known_entry_comment_renderers:
2308                             continue
2309                         yield from self._comment_entries(
2310                             renderer, video_id=video_id, ytcfg=ytcfg,
2311                             identity_token=self._extract_identity_token(webpage, item_id=video_id),
2312                             account_syncid=self._extract_account_syncid(ytcfg))
2313                         break
2314         comments = []
2315         known_entry_comment_renderers = ('itemSectionRenderer',)
2316         estimated_total = 0
2317         max_comments = int_or_none(self._configuration_arg('max_comments', [''])[0]) or float('inf')
2318
2319         try:
2320             for comment in _real_comment_extract(contents):
2321                 if len(comments) >= max_comments:
2322                     break
2323                 if isinstance(comment, int):
2324                     estimated_total = comment
2325                     continue
2326                 comments.append(comment)
2327         except KeyboardInterrupt:
2328             self.to_screen('Interrupted by user')
2329         self.to_screen('Downloaded %d/%d comments' % (len(comments), estimated_total))
2330         return {
2331             'comments': comments,
2332             'comment_count': len(comments),
2333         }
2334
2335     @staticmethod
2336     def _generate_player_context(sts=None):
2337         context = {
2338             'html5Preference': 'HTML5_PREF_WANTS',
2339         }
2340         if sts is not None:
2341             context['signatureTimestamp'] = sts
2342         return {
2343             'playbackContext': {
2344                 'contentPlaybackContext': context
2345             },
2346             'contentCheckOk': True,
2347             'racyCheckOk': True
2348         }
2349
2350     @staticmethod
2351     def _get_video_info_params(video_id, client='TVHTML5'):
2352         GVI_CLIENTS = {
2353             'ANDROID': {
2354                 'c': 'ANDROID',
2355                 'cver': '16.20',
2356             },
2357             'TVHTML5': {
2358                 'c': 'TVHTML5',
2359                 'cver': '6.20180913',
2360             },
2361             'IOS': {
2362                 'c': 'IOS',
2363                 'cver': '16.20'
2364             }
2365         }
2366         query = {
2367             'video_id': video_id,
2368             'eurl': 'https://youtube.googleapis.com/v/' + video_id,
2369             'html5': '1'
2370         }
2371         query.update(GVI_CLIENTS.get(client))
2372         return query
2373
2374     def _extract_player_response(self, client, video_id, master_ytcfg, player_ytcfg, identity_token, player_url, initial_pr):
2375
2376         session_index = self._extract_session_index(player_ytcfg, master_ytcfg)
2377         syncid = self._extract_account_syncid(player_ytcfg, master_ytcfg, initial_pr)
2378         sts = self._extract_signature_timestamp(video_id, player_url, master_ytcfg, fatal=False)
2379         headers = self.generate_api_headers(
2380             player_ytcfg, identity_token, syncid,
2381             default_client=self._YT_CLIENTS[client], session_index=session_index)
2382
2383         yt_query = {'videoId': video_id}
2384         yt_query.update(self._generate_player_context(sts))
2385         return self._extract_response(
2386             item_id=video_id, ep='player', query=yt_query,
2387             ytcfg=player_ytcfg, headers=headers, fatal=False,
2388             default_client=self._YT_CLIENTS[client],
2389             note='Downloading %s player API JSON' % client.replace('_', ' ').strip()
2390         ) or None
2391
2392     def _extract_age_gated_player_response(self, client, video_id, ytcfg, identity_token, player_url, initial_pr):
2393         # get_video_info endpoint seems to be completely dead
2394         gvi_client = None # self._YT_CLIENTS.get(f'_{client}_agegate')
2395         if gvi_client:
2396             pr = self._parse_json(traverse_obj(
2397                 compat_parse_qs(self._download_webpage(
2398                     self.http_scheme() + '//www.youtube.com/get_video_info', video_id,
2399                     'Refetching age-gated %s info webpage' % gvi_client.lower(),
2400                     'unable to download video info webpage', fatal=False,
2401                     query=self._get_video_info_params(video_id, client=gvi_client))),
2402                 ('player_response', 0), expected_type=str) or '{}', video_id)
2403             if pr:
2404                 return pr
2405             self.report_warning('Falling back to embedded-only age-gate workaround')
2406
2407         if not self._YT_CLIENTS.get(f'_{client}_embedded'):
2408             return
2409         embed_webpage = None
2410         if client == 'web' and 'configs' not in self._configuration_arg('player_skip'):
2411             embed_webpage = self._download_webpage(
2412                 'https://www.youtube.com/embed/%s?html5=1' % video_id,
2413                 video_id=video_id, note=f'Downloading age-gated {client} embed config')
2414
2415         ytcfg_age = self.extract_ytcfg(video_id, embed_webpage) or {}
2416         # If we extracted the embed webpage, it'll tell us if we can view the video
2417         embedded_pr = self._parse_json(
2418             traverse_obj(ytcfg_age, ('PLAYER_VARS', 'embedded_player_response'), expected_type=str) or '{}',
2419             video_id=video_id)
2420         embedded_ps_reason = traverse_obj(embedded_pr, ('playabilityStatus', 'reason'), expected_type=str) or ''
2421         if embedded_ps_reason in self._AGE_GATE_REASONS:
2422             return
2423         return self._extract_player_response(
2424             f'_{client}_embedded', video_id,
2425             ytcfg_age or ytcfg, ytcfg_age if client == 'web' else {},
2426             identity_token, player_url, initial_pr)
2427
2428     def _get_requested_clients(self, url, smuggled_data):
2429         requested_clients = [client for client in self._configuration_arg('player_client')
2430                              if client[:0] != '_' and client in self._YT_CLIENTS]
2431         if not requested_clients:
2432             requested_clients = ['android', 'web']
2433
2434         if smuggled_data.get('is_music_url') or self.is_music_url(url):
2435             requested_clients.extend(
2436                 f'{client}_music' for client in requested_clients if not client.endswith('_music'))
2437
2438         return orderedSet(requested_clients)
2439
2440     def _extract_player_responses(self, clients, video_id, webpage, master_ytcfg, player_url, identity_token):
2441         initial_pr = None
2442         if webpage:
2443             initial_pr = self._extract_yt_initial_variable(
2444                 webpage, self._YT_INITIAL_PLAYER_RESPONSE_RE,
2445                 video_id, 'initial player response')
2446
2447         for client in clients:
2448             player_ytcfg = master_ytcfg if client == 'web' else {}
2449             if client == 'web' and initial_pr:
2450                 pr = initial_pr
2451             else:
2452                 if client == 'web_music' and 'configs' not in self._configuration_arg('player_skip'):
2453                     ytm_webpage = self._download_webpage(
2454                         'https://music.youtube.com',
2455                         video_id, fatal=False, note='Downloading remix client config')
2456                     player_ytcfg = self.extract_ytcfg(video_id, ytm_webpage) or {}
2457                 pr = self._extract_player_response(
2458                     client, video_id, player_ytcfg or master_ytcfg, player_ytcfg, identity_token, player_url, initial_pr)
2459             if pr:
2460                 yield pr
2461             if traverse_obj(pr, ('playabilityStatus', 'reason')) in self._AGE_GATE_REASONS:
2462                 pr = self._extract_age_gated_player_response(
2463                     client, video_id, player_ytcfg or master_ytcfg, identity_token, player_url, initial_pr)
2464                 if pr:
2465                     yield pr
2466         # Android player_response does not have microFormats which are needed for
2467         # extraction of some data. So we return the initial_pr with formats
2468         # stripped out even if not requested by the user
2469         # See: https://github.com/yt-dlp/yt-dlp/issues/501
2470         if initial_pr and 'web' not in clients:
2471             initial_pr['streamingData'] = None
2472             yield initial_pr
2473
2474     def _extract_formats(self, streaming_data, video_id, player_url, is_live):
2475         itags, stream_ids = [], []
2476         itag_qualities = {}
2477         q = qualities([
2478             # "tiny" is the smallest video-only format. But some audio-only formats
2479             # was also labeled "tiny". It is not clear if such formats still exist
2480             'tiny', 'audio_quality_low', 'audio_quality_medium', 'audio_quality_high',  # Audio only formats
2481             'small', 'medium', 'large', 'hd720', 'hd1080', 'hd1440', 'hd2160', 'hd2880', 'highres'
2482         ])
2483         streaming_formats = traverse_obj(streaming_data, (..., ('formats', 'adaptiveFormats'), ...), default=[])
2484
2485         for fmt in streaming_formats:
2486             if fmt.get('targetDurationSec') or fmt.get('drmFamilies'):
2487                 continue
2488
2489             itag = str_or_none(fmt.get('itag'))
2490             audio_track = fmt.get('audioTrack') or {}
2491             stream_id = '%s.%s' % (itag or '', audio_track.get('id', ''))
2492             if stream_id in stream_ids:
2493                 continue
2494
2495             quality = fmt.get('quality')
2496             if quality == 'tiny' or not quality:
2497                 quality = fmt.get('audioQuality', '').lower() or quality
2498             if itag and quality:
2499                 itag_qualities[itag] = quality
2500             # FORMAT_STREAM_TYPE_OTF(otf=1) requires downloading the init fragment
2501             # (adding `&sq=0` to the URL) and parsing emsg box to determine the
2502             # number of fragment that would subsequently requested with (`&sq=N`)
2503             if fmt.get('type') == 'FORMAT_STREAM_TYPE_OTF':
2504                 continue
2505
2506             fmt_url = fmt.get('url')
2507             if not fmt_url:
2508                 sc = compat_parse_qs(fmt.get('signatureCipher'))
2509                 fmt_url = url_or_none(try_get(sc, lambda x: x['url'][0]))
2510                 encrypted_sig = try_get(sc, lambda x: x['s'][0])
2511                 if not (sc and fmt_url and encrypted_sig):
2512                     continue
2513                 if not player_url:
2514                     continue
2515                 signature = self._decrypt_signature(sc['s'][0], video_id, player_url)
2516                 sp = try_get(sc, lambda x: x['sp'][0]) or 'signature'
2517                 fmt_url += '&' + sp + '=' + signature
2518
2519             if itag:
2520                 itags.append(itag)
2521                 stream_ids.append(stream_id)
2522
2523             tbr = float_or_none(
2524                 fmt.get('averageBitrate') or fmt.get('bitrate'), 1000)
2525             dct = {
2526                 'asr': int_or_none(fmt.get('audioSampleRate')),
2527                 'filesize': int_or_none(fmt.get('contentLength')),
2528                 'format_id': itag,
2529                 'format_note': ', '.join(filter(None, (
2530                     audio_track.get('displayName'), fmt.get('qualityLabel') or quality))),
2531                 'fps': int_or_none(fmt.get('fps')),
2532                 'height': int_or_none(fmt.get('height')),
2533                 'quality': q(quality),
2534                 'tbr': tbr,
2535                 'url': fmt_url,
2536                 'width': fmt.get('width'),
2537                 'language': audio_track.get('id', '').split('.')[0],
2538             }
2539             mime_mobj = re.match(
2540                 r'((?:[^/]+)/(?:[^;]+))(?:;\s*codecs="([^"]+)")?', fmt.get('mimeType') or '')
2541             if mime_mobj:
2542                 dct['ext'] = mimetype2ext(mime_mobj.group(1))
2543                 dct.update(parse_codecs(mime_mobj.group(2)))
2544                 # The 3gp format in android client has a quality of "small",
2545                 # but is actually worse than all other formats
2546                 if dct['ext'] == '3gp':
2547                     dct['quality'] = q('tiny')
2548                     dct['preference'] = -10
2549             no_audio = dct.get('acodec') == 'none'
2550             no_video = dct.get('vcodec') == 'none'
2551             if no_audio:
2552                 dct['vbr'] = tbr
2553             if no_video:
2554                 dct['abr'] = tbr
2555             if no_audio or no_video:
2556                 dct['downloader_options'] = {
2557                     # Youtube throttles chunks >~10M
2558                     'http_chunk_size': 10485760,
2559                 }
2560                 if dct.get('ext'):
2561                     dct['container'] = dct['ext'] + '_dash'
2562             yield dct
2563
2564         skip_manifests = self._configuration_arg('skip')
2565         get_dash = not is_live and 'dash' not in skip_manifests and self.get_param('youtube_include_dash_manifest', True)
2566         get_hls = 'hls' not in skip_manifests and self.get_param('youtube_include_hls_manifest', True)
2567
2568         for sd in streaming_data:
2569             hls_manifest_url = get_hls and sd.get('hlsManifestUrl')
2570             if hls_manifest_url:
2571                 for f in self._extract_m3u8_formats(
2572                         hls_manifest_url, video_id, 'mp4', fatal=False):
2573                     itag = self._search_regex(
2574                         r'/itag/(\d+)', f['url'], 'itag', default=None)
2575                     if itag in itags:
2576                         continue
2577                     if itag:
2578                         f['format_id'] = itag
2579                         itags.append(itag)
2580                     yield f
2581
2582             dash_manifest_url = get_dash and sd.get('dashManifestUrl')
2583             if dash_manifest_url:
2584                 for f in self._extract_mpd_formats(
2585                         dash_manifest_url, video_id, fatal=False):
2586                     itag = f['format_id']
2587                     if itag in itags:
2588                         continue
2589                     if itag:
2590                         itags.append(itag)
2591                     if itag in itag_qualities:
2592                         f['quality'] = q(itag_qualities[itag])
2593                     filesize = int_or_none(self._search_regex(
2594                         r'/clen/(\d+)', f.get('fragment_base_url')
2595                         or f['url'], 'file size', default=None))
2596                     if filesize:
2597                         f['filesize'] = filesize
2598                     yield f
2599
2600     def _real_extract(self, url):
2601         url, smuggled_data = unsmuggle_url(url, {})
2602         video_id = self._match_id(url)
2603
2604         base_url = self.http_scheme() + '//www.youtube.com/'
2605         webpage_url = base_url + 'watch?v=' + video_id
2606         webpage = self._download_webpage(
2607             webpage_url + '&bpctr=9999999999&has_verified=1', video_id, fatal=False)
2608
2609         master_ytcfg = self.extract_ytcfg(video_id, webpage) or self._get_default_ytcfg()
2610         player_url = self._extract_player_url(master_ytcfg, webpage)
2611         identity_token = self._extract_identity_token(webpage, video_id)
2612
2613         player_responses = list(self._extract_player_responses(
2614             self._get_requested_clients(url, smuggled_data),
2615             video_id, webpage, master_ytcfg, player_url, identity_token))
2616
2617         get_first = lambda obj, keys, **kwargs: traverse_obj(obj, (..., *variadic(keys)), **kwargs, get_all=False)
2618
2619         playability_statuses = traverse_obj(
2620             player_responses, (..., 'playabilityStatus'), expected_type=dict, default=[])
2621
2622         trailer_video_id = get_first(
2623             playability_statuses,
2624             ('errorScreen', 'playerLegacyDesktopYpcTrailerRenderer', 'trailerVideoId'),
2625             expected_type=str)
2626         if trailer_video_id:
2627             return self.url_result(
2628                 trailer_video_id, self.ie_key(), trailer_video_id)
2629
2630         search_meta = ((lambda x: self._html_search_meta(x, webpage, default=None))
2631                        if webpage else (lambda x: None))
2632
2633         video_details = traverse_obj(
2634             player_responses, (..., 'videoDetails'), expected_type=dict, default=[])
2635         microformats = traverse_obj(
2636             player_responses, (..., 'microformat', 'playerMicroformatRenderer'),
2637             expected_type=dict, default=[])
2638         video_title = (
2639             get_first(video_details, 'title')
2640             or self._get_text(microformats, (..., 'title'))
2641             or search_meta(['og:title', 'twitter:title', 'title']))
2642         video_description = get_first(video_details, 'shortDescription')
2643
2644         if not smuggled_data.get('force_singlefeed', False):
2645             if not self.get_param('noplaylist'):
2646                 multifeed_metadata_list = get_first(
2647                     player_responses,
2648                     ('multicamera', 'playerLegacyMulticameraRenderer', 'metadataList'),
2649                     expected_type=str)
2650                 if multifeed_metadata_list:
2651                     entries = []
2652                     feed_ids = []
2653                     for feed in multifeed_metadata_list.split(','):
2654                         # Unquote should take place before split on comma (,) since textual
2655                         # fields may contain comma as well (see
2656                         # https://github.com/ytdl-org/youtube-dl/issues/8536)
2657                         feed_data = compat_parse_qs(
2658                             compat_urllib_parse_unquote_plus(feed))
2659
2660                         def feed_entry(name):
2661                             return try_get(
2662                                 feed_data, lambda x: x[name][0], compat_str)
2663
2664                         feed_id = feed_entry('id')
2665                         if not feed_id:
2666                             continue
2667                         feed_title = feed_entry('title')
2668                         title = video_title
2669                         if feed_title:
2670                             title += ' (%s)' % feed_title
2671                         entries.append({
2672                             '_type': 'url_transparent',
2673                             'ie_key': 'Youtube',
2674                             'url': smuggle_url(
2675                                 '%swatch?v=%s' % (base_url, feed_data['id'][0]),
2676                                 {'force_singlefeed': True}),
2677                             'title': title,
2678                         })
2679                         feed_ids.append(feed_id)
2680                     self.to_screen(
2681                         'Downloading multifeed video (%s) - add --no-playlist to just download video %s'
2682                         % (', '.join(feed_ids), video_id))
2683                     return self.playlist_result(
2684                         entries, video_id, video_title, video_description)
2685             else:
2686                 self.to_screen('Downloading just video %s because of --no-playlist' % video_id)
2687
2688         live_broadcast_details = traverse_obj(microformats, (..., 'liveBroadcastDetails'))
2689         is_live = get_first(video_details, 'isLive')
2690         if is_live is None:
2691             is_live = get_first(live_broadcast_details, 'isLiveNow')
2692
2693         streaming_data = traverse_obj(player_responses, (..., 'streamingData'), default=[])
2694         formats = list(self._extract_formats(streaming_data, video_id, player_url, is_live))
2695
2696         if not formats:
2697             if not self.get_param('allow_unplayable_formats') and traverse_obj(streaming_data, (..., 'licenseInfos')):
2698                 self.raise_no_formats(
2699                     'This video is DRM protected.', expected=True)
2700             pemr = get_first(
2701                 playability_statuses,
2702                 ('errorScreen', 'playerErrorMessageRenderer'), expected_type=dict) or {}
2703             reason = self._get_text(pemr, 'reason') or get_first(playability_statuses, 'reason')
2704             subreason = clean_html(self._get_text(pemr, 'subreason') or '')
2705             if subreason:
2706                 if subreason == 'The uploader has not made this video available in your country.':
2707                     countries = get_first(microformats, 'availableCountries')
2708                     if not countries:
2709                         regions_allowed = search_meta('regionsAllowed')
2710                         countries = regions_allowed.split(',') if regions_allowed else None
2711                     self.raise_geo_restricted(subreason, countries, metadata_available=True)
2712                 reason += f'. {subreason}'
2713             if reason:
2714                 self.raise_no_formats(reason, expected=True)
2715
2716         for f in formats:
2717             # TODO: detect if throttled
2718             if '&n=' in f['url']:  # possibly throttled
2719                 f['source_preference'] = -10
2720                 # note = f.get('format_note')
2721                 # f['format_note'] = f'{note} (throttled)' if note else '(throttled)'
2722
2723         self._sort_formats(formats)
2724
2725         keywords = get_first(video_details, 'keywords', expected_type=list) or []
2726         if not keywords and webpage:
2727             keywords = [
2728                 unescapeHTML(m.group('content'))
2729                 for m in re.finditer(self._meta_regex('og:video:tag'), webpage)]
2730         for keyword in keywords:
2731             if keyword.startswith('yt:stretch='):
2732                 mobj = re.search(r'(\d+)\s*:\s*(\d+)', keyword)
2733                 if mobj:
2734                     # NB: float is intentional for forcing float division
2735                     w, h = (float(v) for v in mobj.groups())
2736                     if w > 0 and h > 0:
2737                         ratio = w / h
2738                         for f in formats:
2739                             if f.get('vcodec') != 'none':
2740                                 f['stretched_ratio'] = ratio
2741                         break
2742
2743         thumbnails = []
2744         thumbnail_dicts = traverse_obj(
2745             (video_details, microformats), (..., ..., 'thumbnail', 'thumbnails', ...),
2746             expected_type=dict, default=[])
2747         for thumbnail in thumbnail_dicts:
2748             thumbnail_url = thumbnail.get('url')
2749             if not thumbnail_url:
2750                 continue
2751             # Sometimes youtube gives a wrong thumbnail URL. See:
2752             # https://github.com/yt-dlp/yt-dlp/issues/233
2753             # https://github.com/ytdl-org/youtube-dl/issues/28023
2754             if 'maxresdefault' in thumbnail_url:
2755                 thumbnail_url = thumbnail_url.split('?')[0]
2756             thumbnails.append({
2757                 'url': thumbnail_url,
2758                 'height': int_or_none(thumbnail.get('height')),
2759                 'width': int_or_none(thumbnail.get('width')),
2760             })
2761         thumbnail_url = search_meta(['og:image', 'twitter:image'])
2762         if thumbnail_url:
2763             thumbnails.append({
2764                 'url': thumbnail_url,
2765             })
2766         # The best resolution thumbnails sometimes does not appear in the webpage
2767         # See: https://github.com/ytdl-org/youtube-dl/issues/29049, https://github.com/yt-dlp/yt-dlp/issues/340
2768         # List of possible thumbnails - Ref: <https://stackoverflow.com/a/20542029>
2769         hq_thumbnail_names = ['maxresdefault', 'hq720', 'sddefault', 'sd1', 'sd2', 'sd3']
2770         # TODO: Test them also? - For some videos, even these don't exist
2771         guaranteed_thumbnail_names = [
2772             'hqdefault', 'hq1', 'hq2', 'hq3', '0',
2773             'mqdefault', 'mq1', 'mq2', 'mq3',
2774             'default', '1', '2', '3'
2775         ]
2776         thumbnail_names = hq_thumbnail_names + guaranteed_thumbnail_names
2777         n_thumbnail_names = len(thumbnail_names)
2778
2779         thumbnails.extend({
2780             'url': 'https://i.ytimg.com/vi{webp}/{video_id}/{name}{live}.{ext}'.format(
2781                 video_id=video_id, name=name, ext=ext,
2782                 webp='_webp' if ext == 'webp' else '', live='_live' if is_live else ''),
2783             '_test_url': name in hq_thumbnail_names,
2784         } for name in thumbnail_names for ext in ('webp', 'jpg'))
2785         for thumb in thumbnails:
2786             i = next((i for i, t in enumerate(thumbnail_names) if f'/{video_id}/{t}' in thumb['url']), n_thumbnail_names)
2787             thumb['preference'] = (0 if '.webp' in thumb['url'] else -1) - (2 * i)
2788         self._remove_duplicate_formats(thumbnails)
2789
2790         category = get_first(microformats, 'category') or search_meta('genre')
2791         channel_id = str_or_none(
2792             get_first(video_details, 'channelId')
2793             or get_first(microformats, 'externalChannelId')
2794             or search_meta('channelId'))
2795         duration = int_or_none(
2796             get_first(video_details, 'lengthSeconds')
2797             or get_first(microformats, 'lengthSeconds')
2798             or parse_duration(search_meta('duration'))) or None
2799         owner_profile_url = get_first(microformats, 'ownerProfileUrl')
2800
2801         live_content = get_first(video_details, 'isLiveContent')
2802         is_upcoming = get_first(video_details, 'isUpcoming')
2803         if is_live is None:
2804             if is_upcoming or live_content is False:
2805                 is_live = False
2806         if is_upcoming is None and (live_content or is_live):
2807             is_upcoming = False
2808         live_starttime = parse_iso8601(get_first(live_broadcast_details, 'startTimestamp'))
2809         live_endtime = parse_iso8601(get_first(live_broadcast_details, 'endTimestamp'))
2810         if not duration and live_endtime and live_starttime:
2811             duration = live_endtime - live_starttime
2812
2813         info = {
2814             'id': video_id,
2815             'title': self._live_title(video_title) if is_live else video_title,
2816             'formats': formats,
2817             'thumbnails': thumbnails,
2818             'description': video_description,
2819             'upload_date': unified_strdate(
2820                 get_first(microformats, 'uploadDate')
2821                 or search_meta('uploadDate')),
2822             'uploader': get_first(video_details, 'author'),
2823             'uploader_id': self._search_regex(r'/(?:channel|user)/([^/?&#]+)', owner_profile_url, 'uploader id') if owner_profile_url else None,
2824             'uploader_url': owner_profile_url,
2825             'channel_id': channel_id,
2826             'channel_url': f'https://www.youtube.com/channel/{channel_id}' if channel_id else None,
2827             'duration': duration,
2828             'view_count': int_or_none(
2829                 get_first((video_details, microformats), (..., 'viewCount'))
2830                 or search_meta('interactionCount')),
2831             'average_rating': float_or_none(get_first(video_details, 'averageRating')),
2832             'age_limit': 18 if (
2833                 get_first(microformats, 'isFamilySafe') is False
2834                 or search_meta('isFamilyFriendly') == 'false'
2835                 or search_meta('og:restrictions:age') == '18+') else 0,
2836             'webpage_url': webpage_url,
2837             'categories': [category] if category else None,
2838             'tags': keywords,
2839             'playable_in_embed': get_first(playability_statuses, 'playableInEmbed'),
2840             'is_live': is_live,
2841             'was_live': (False if is_live or is_upcoming or live_content is False
2842                          else None if is_live is None or is_upcoming is None
2843                          else live_content),
2844             'live_status': 'is_upcoming' if is_upcoming else None,  # rest will be set by YoutubeDL
2845             'release_timestamp': live_starttime,
2846         }
2847
2848         pctr = get_first(player_responses, ('captions', 'playerCaptionsTracklistRenderer'), expected_type=dict)
2849         subtitles = {}
2850         if pctr:
2851             def process_language(container, base_url, lang_code, sub_name, query):
2852                 lang_subs = container.setdefault(lang_code, [])
2853                 for fmt in self._SUBTITLE_FORMATS:
2854                     query.update({
2855                         'fmt': fmt,
2856                     })
2857                     lang_subs.append({
2858                         'ext': fmt,
2859                         'url': update_url_query(base_url, query),
2860                         'name': sub_name,
2861                     })
2862
2863             for caption_track in (pctr.get('captionTracks') or []):
2864                 base_url = caption_track.get('baseUrl')
2865                 if not base_url:
2866                     continue
2867                 if caption_track.get('kind') != 'asr':
2868                     lang_code = (
2869                         remove_start(caption_track.get('vssId') or '', '.').replace('.', '-')
2870                         or caption_track.get('languageCode'))
2871                     if not lang_code:
2872                         continue
2873                     process_language(
2874                         subtitles, base_url, lang_code,
2875                         try_get(caption_track, lambda x: x['name']['simpleText']),
2876                         {})
2877                     continue
2878                 automatic_captions = {}
2879                 for translation_language in (pctr.get('translationLanguages') or []):
2880                     translation_language_code = translation_language.get('languageCode')
2881                     if not translation_language_code:
2882                         continue
2883                     process_language(
2884                         automatic_captions, base_url, translation_language_code,
2885                         self._get_text(translation_language.get('languageName'), max_runs=1),
2886                         {'tlang': translation_language_code})
2887                 info['automatic_captions'] = automatic_captions
2888         info['subtitles'] = subtitles
2889
2890         parsed_url = compat_urllib_parse_urlparse(url)
2891         for component in [parsed_url.fragment, parsed_url.query]:
2892             query = compat_parse_qs(component)
2893             for k, v in query.items():
2894                 for d_k, s_ks in [('start', ('start', 't')), ('end', ('end',))]:
2895                     d_k += '_time'
2896                     if d_k not in info and k in s_ks:
2897                         info[d_k] = parse_duration(query[k][0])
2898
2899         # Youtube Music Auto-generated description
2900         if video_description:
2901             mobj = re.search(r'(?s)(?P<track>[^·\n]+)·(?P<artist>[^\n]+)\n+(?P<album>[^\n]+)(?:.+?℗\s*(?P<release_year>\d{4})(?!\d))?(?:.+?Released on\s*:\s*(?P<release_date>\d{4}-\d{2}-\d{2}))?(.+?\nArtist\s*:\s*(?P<clean_artist>[^\n]+))?.+\nAuto-generated by YouTube\.\s*$', video_description)
2902             if mobj:
2903                 release_year = mobj.group('release_year')
2904                 release_date = mobj.group('release_date')
2905                 if release_date:
2906                     release_date = release_date.replace('-', '')
2907                     if not release_year:
2908                         release_year = release_date[:4]
2909                 info.update({
2910                     'album': mobj.group('album'.strip()),
2911                     'artist': mobj.group('clean_artist') or ', '.join(a.strip() for a in mobj.group('artist').split('·')),
2912                     'track': mobj.group('track').strip(),
2913                     'release_date': release_date,
2914                     'release_year': int_or_none(release_year),
2915                 })
2916
2917         initial_data = None
2918         if webpage:
2919             initial_data = self._extract_yt_initial_variable(
2920                 webpage, self._YT_INITIAL_DATA_RE, video_id,
2921                 'yt initial data')
2922         if not initial_data:
2923             headers = self.generate_api_headers(
2924                 master_ytcfg, identity_token, self._extract_account_syncid(master_ytcfg),
2925                 session_index=self._extract_session_index(master_ytcfg))
2926
2927             initial_data = self._extract_response(
2928                 item_id=video_id, ep='next', fatal=False,
2929                 ytcfg=master_ytcfg, headers=headers, query={'videoId': video_id},
2930                 note='Downloading initial data API JSON')
2931
2932         try:
2933             # This will error if there is no livechat
2934             initial_data['contents']['twoColumnWatchNextResults']['conversationBar']['liveChatRenderer']['continuations'][0]['reloadContinuationData']['continuation']
2935             info['subtitles']['live_chat'] = [{
2936                 'url': 'https://www.youtube.com/watch?v=%s' % video_id,  # url is needed to set cookies
2937                 'video_id': video_id,
2938                 'ext': 'json',
2939                 'protocol': 'youtube_live_chat' if is_live or is_upcoming else 'youtube_live_chat_replay',
2940             }]
2941         except (KeyError, IndexError, TypeError):
2942             pass
2943
2944         if initial_data:
2945             info['chapters'] = (
2946                 self._extract_chapters_from_json(initial_data, duration)
2947                 or self._extract_chapters_from_engagement_panel(initial_data, duration)
2948                 or None)
2949
2950             contents = try_get(
2951                 initial_data,
2952                 lambda x: x['contents']['twoColumnWatchNextResults']['results']['results']['contents'],
2953                 list) or []
2954             for content in contents:
2955                 vpir = content.get('videoPrimaryInfoRenderer')
2956                 if vpir:
2957                     stl = vpir.get('superTitleLink')
2958                     if stl:
2959                         stl = self._get_text(stl)
2960                         if try_get(
2961                                 vpir,
2962                                 lambda x: x['superTitleIcon']['iconType']) == 'LOCATION_PIN':
2963                             info['location'] = stl
2964                         else:
2965                             mobj = re.search(r'(.+?)\s*S(\d+)\s*•\s*E(\d+)', stl)
2966                             if mobj:
2967                                 info.update({
2968                                     'series': mobj.group(1),
2969                                     'season_number': int(mobj.group(2)),
2970                                     'episode_number': int(mobj.group(3)),
2971                                 })
2972                     for tlb in (try_get(
2973                             vpir,
2974                             lambda x: x['videoActions']['menuRenderer']['topLevelButtons'],
2975                             list) or []):
2976                         tbr = tlb.get('toggleButtonRenderer') or {}
2977                         for getter, regex in [(
2978                                 lambda x: x['defaultText']['accessibility']['accessibilityData'],
2979                                 r'(?P<count>[\d,]+)\s*(?P<type>(?:dis)?like)'), ([
2980                                     lambda x: x['accessibility'],
2981                                     lambda x: x['accessibilityData']['accessibilityData'],
2982                                 ], r'(?P<type>(?:dis)?like) this video along with (?P<count>[\d,]+) other people')]:
2983                             label = (try_get(tbr, getter, dict) or {}).get('label')
2984                             if label:
2985                                 mobj = re.match(regex, label)
2986                                 if mobj:
2987                                     info[mobj.group('type') + '_count'] = str_to_int(mobj.group('count'))
2988                                     break
2989                     sbr_tooltip = try_get(
2990                         vpir, lambda x: x['sentimentBar']['sentimentBarRenderer']['tooltip'])
2991                     if sbr_tooltip:
2992                         like_count, dislike_count = sbr_tooltip.split(' / ')
2993                         info.update({
2994                             'like_count': str_to_int(like_count),
2995                             'dislike_count': str_to_int(dislike_count),
2996                         })
2997                 vsir = content.get('videoSecondaryInfoRenderer')
2998                 if vsir:
2999                     info['channel'] = self._get_text(try_get(
3000                         vsir,
3001                         lambda x: x['owner']['videoOwnerRenderer']['title'],
3002                         dict))
3003                     rows = try_get(
3004                         vsir,
3005                         lambda x: x['metadataRowContainer']['metadataRowContainerRenderer']['rows'],
3006                         list) or []
3007                     multiple_songs = False
3008                     for row in rows:
3009                         if try_get(row, lambda x: x['metadataRowRenderer']['hasDividerLine']) is True:
3010                             multiple_songs = True
3011                             break
3012                     for row in rows:
3013                         mrr = row.get('metadataRowRenderer') or {}
3014                         mrr_title = mrr.get('title')
3015                         if not mrr_title:
3016                             continue
3017                         mrr_title = self._get_text(mrr['title'])
3018                         mrr_contents_text = self._get_text(mrr['contents'][0])
3019                         if mrr_title == 'License':
3020                             info['license'] = mrr_contents_text
3021                         elif not multiple_songs:
3022                             if mrr_title == 'Album':
3023                                 info['album'] = mrr_contents_text
3024                             elif mrr_title == 'Artist':
3025                                 info['artist'] = mrr_contents_text
3026                             elif mrr_title == 'Song':
3027                                 info['track'] = mrr_contents_text
3028
3029         fallbacks = {
3030             'channel': 'uploader',
3031             'channel_id': 'uploader_id',
3032             'channel_url': 'uploader_url',
3033         }
3034         for to, frm in fallbacks.items():
3035             if not info.get(to):
3036                 info[to] = info.get(frm)
3037
3038         for s_k, d_k in [('artist', 'creator'), ('track', 'alt_title')]:
3039             v = info.get(s_k)
3040             if v:
3041                 info[d_k] = v
3042
3043         is_private = get_first(video_details, 'isPrivate', expected_type=bool)
3044         is_unlisted = get_first(microformats, 'isUnlisted', expected_type=bool)
3045         is_membersonly = None
3046         is_premium = None
3047         if initial_data and is_private is not None:
3048             is_membersonly = False
3049             is_premium = False
3050             contents = try_get(initial_data, lambda x: x['contents']['twoColumnWatchNextResults']['results']['results']['contents'], list) or []
3051             badge_labels = set()
3052             for content in contents:
3053                 if not isinstance(content, dict):
3054                     continue
3055                 badge_labels.update(self._extract_badges(content.get('videoPrimaryInfoRenderer')))
3056             for badge_label in badge_labels:
3057                 if badge_label.lower() == 'members only':
3058                     is_membersonly = True
3059                 elif badge_label.lower() == 'premium':
3060                     is_premium = True
3061                 elif badge_label.lower() == 'unlisted':
3062                     is_unlisted = True
3063
3064         info['availability'] = self._availability(
3065             is_private=is_private,
3066             needs_premium=is_premium,
3067             needs_subscription=is_membersonly,
3068             needs_auth=info['age_limit'] >= 18,
3069             is_unlisted=None if is_private is None else is_unlisted)
3070
3071         # get xsrf for annotations or comments
3072         get_annotations = self.get_param('writeannotations', False)
3073         get_comments = self.get_param('getcomments', False)
3074         if get_annotations or get_comments:
3075             xsrf_token = None
3076             if master_ytcfg:
3077                 xsrf_token = try_get(master_ytcfg, lambda x: x['XSRF_TOKEN'], compat_str)
3078             if not xsrf_token:
3079                 xsrf_token = self._search_regex(
3080                     r'([\'"])XSRF_TOKEN\1\s*:\s*([\'"])(?P<xsrf_token>(?:(?!\2).)+)\2',
3081                     webpage, 'xsrf token', group='xsrf_token', fatal=False)
3082
3083         # annotations
3084         if get_annotations:
3085             invideo_url = get_first(
3086                 player_responses,
3087                 ('annotations', 0, 'playerAnnotationsUrlsRenderer', 'invideoUrl'),
3088                 expected_type=str)
3089             if xsrf_token and invideo_url:
3090                 xsrf_field_name = None
3091                 if master_ytcfg:
3092                     xsrf_field_name = try_get(master_ytcfg, lambda x: x['XSRF_FIELD_NAME'], compat_str)
3093                 if not xsrf_field_name:
3094                     xsrf_field_name = self._search_regex(
3095                         r'([\'"])XSRF_FIELD_NAME\1\s*:\s*([\'"])(?P<xsrf_field_name>\w+)\2',
3096                         webpage, 'xsrf field name',
3097                         group='xsrf_field_name', default='session_token')
3098                 info['annotations'] = self._download_webpage(
3099                     self._proto_relative_url(invideo_url),
3100                     video_id, note='Downloading annotations',
3101                     errnote='Unable to download video annotations', fatal=False,
3102                     data=urlencode_postdata({xsrf_field_name: xsrf_token}))
3103
3104         if get_comments:
3105             info['__post_extractor'] = lambda: self._extract_comments(master_ytcfg, video_id, contents, webpage)
3106
3107         self.mark_watched(video_id, player_responses)
3108
3109         return info
3110
3111
3112 class YoutubeTabIE(YoutubeBaseInfoExtractor):
3113     IE_DESC = 'YouTube.com tab'
3114     _VALID_URL = r'''(?x)
3115                     https?://
3116                         (?:\w+\.)?
3117                         (?:
3118                             youtube(?:kids)?\.com|
3119                             invidio\.us
3120                         )/
3121                         (?:
3122                             (?P<channel_type>channel|c|user|browse)/|
3123                             (?P<not_channel>
3124                                 feed/|hashtag/|
3125                                 (?:playlist|watch)\?.*?\blist=
3126                             )|
3127                             (?!(?:%s)\b)  # Direct URLs
3128                         )
3129                         (?P<id>[^/?\#&]+)
3130                     ''' % YoutubeBaseInfoExtractor._RESERVED_NAMES
3131     IE_NAME = 'youtube:tab'
3132
3133     _TESTS = [{
3134         'note': 'playlists, multipage',
3135         'url': 'https://www.youtube.com/c/ИгорьКлейнер/playlists?view=1&flow=grid',
3136         'playlist_mincount': 94,
3137         'info_dict': {
3138             'id': 'UCqj7Cz7revf5maW9g5pgNcg',
3139             'title': 'Игорь Клейнер - Playlists',
3140             'description': 'md5:be97ee0f14ee314f1f002cf187166ee2',
3141             'uploader': 'Игорь Клейнер',
3142             'uploader_id': 'UCqj7Cz7revf5maW9g5pgNcg',
3143         },
3144     }, {
3145         'note': 'playlists, multipage, different order',
3146         'url': 'https://www.youtube.com/user/igorkle1/playlists?view=1&sort=dd',
3147         'playlist_mincount': 94,
3148         'info_dict': {
3149             'id': 'UCqj7Cz7revf5maW9g5pgNcg',
3150             'title': 'Игорь Клейнер - Playlists',
3151             'description': 'md5:be97ee0f14ee314f1f002cf187166ee2',
3152             'uploader_id': 'UCqj7Cz7revf5maW9g5pgNcg',
3153             'uploader': 'Игорь Клейнер',
3154         },
3155     }, {
3156         'note': 'playlists, series',
3157         'url': 'https://www.youtube.com/c/3blue1brown/playlists?view=50&sort=dd&shelf_id=3',
3158         'playlist_mincount': 5,
3159         'info_dict': {
3160             'id': 'UCYO_jab_esuFRV4b17AJtAw',
3161             'title': '3Blue1Brown - Playlists',
3162             'description': 'md5:e1384e8a133307dd10edee76e875d62f',
3163             'uploader_id': 'UCYO_jab_esuFRV4b17AJtAw',
3164             'uploader': '3Blue1Brown',
3165         },
3166     }, {
3167         'note': 'playlists, singlepage',
3168         'url': 'https://www.youtube.com/user/ThirstForScience/playlists',
3169         'playlist_mincount': 4,
3170         'info_dict': {
3171             'id': 'UCAEtajcuhQ6an9WEzY9LEMQ',
3172             'title': 'ThirstForScience - Playlists',
3173             'description': 'md5:609399d937ea957b0f53cbffb747a14c',
3174             'uploader': 'ThirstForScience',
3175             'uploader_id': 'UCAEtajcuhQ6an9WEzY9LEMQ',
3176         }
3177     }, {
3178         'url': 'https://www.youtube.com/c/ChristophLaimer/playlists',
3179         'only_matching': True,
3180     }, {
3181         'note': 'basic, single video playlist',
3182         'url': 'https://www.youtube.com/playlist?list=PL4lCao7KL_QFVb7Iudeipvc2BCavECqzc',
3183         'info_dict': {
3184             'uploader_id': 'UCmlqkdCBesrv2Lak1mF_MxA',
3185             'uploader': 'Sergey M.',
3186             'id': 'PL4lCao7KL_QFVb7Iudeipvc2BCavECqzc',
3187             'title': 'youtube-dl public playlist',
3188         },
3189         'playlist_count': 1,
3190     }, {
3191         'note': 'empty playlist',
3192         'url': 'https://www.youtube.com/playlist?list=PL4lCao7KL_QFodcLWhDpGCYnngnHtQ-Xf',
3193         'info_dict': {
3194             'uploader_id': 'UCmlqkdCBesrv2Lak1mF_MxA',
3195             'uploader': 'Sergey M.',
3196             'id': 'PL4lCao7KL_QFodcLWhDpGCYnngnHtQ-Xf',
3197             'title': 'youtube-dl empty playlist',
3198         },
3199         'playlist_count': 0,
3200     }, {
3201         'note': 'Home tab',
3202         'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/featured',
3203         'info_dict': {
3204             'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
3205             'title': 'lex will - Home',
3206             'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488',
3207             'uploader': 'lex will',
3208             'uploader_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
3209         },
3210         'playlist_mincount': 2,
3211     }, {
3212         'note': 'Videos tab',
3213         'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/videos',
3214         'info_dict': {
3215             'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
3216             'title': 'lex will - Videos',
3217             'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488',
3218             'uploader': 'lex will',
3219             'uploader_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
3220         },
3221         'playlist_mincount': 975,
3222     }, {
3223         'note': 'Videos tab, sorted by popular',
3224         'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/videos?view=0&sort=p&flow=grid',
3225         'info_dict': {
3226             'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
3227             'title': 'lex will - Videos',
3228             'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488',
3229             'uploader': 'lex will',
3230             'uploader_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
3231         },
3232         'playlist_mincount': 199,
3233     }, {
3234         'note': 'Playlists tab',
3235         'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/playlists',
3236         'info_dict': {
3237             'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
3238             'title': 'lex will - Playlists',
3239             'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488',
3240             'uploader': 'lex will',
3241             'uploader_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
3242         },
3243         'playlist_mincount': 17,
3244     }, {
3245         'note': 'Community tab',
3246         'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/community',
3247         'info_dict': {
3248             'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
3249             'title': 'lex will - Community',
3250             'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488',
3251             'uploader': 'lex will',
3252             'uploader_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
3253         },
3254         'playlist_mincount': 18,
3255     }, {
3256         'note': 'Channels tab',
3257         'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/channels',
3258         'info_dict': {
3259             'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
3260             'title': 'lex will - Channels',
3261             'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488',
3262             'uploader': 'lex will',
3263             'uploader_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
3264         },
3265         'playlist_mincount': 12,
3266     }, {
3267         'note': 'Search tab',
3268         'url': 'https://www.youtube.com/c/3blue1brown/search?query=linear%20algebra',
3269         'playlist_mincount': 40,
3270         'info_dict': {
3271             'id': 'UCYO_jab_esuFRV4b17AJtAw',
3272             'title': '3Blue1Brown - Search - linear algebra',
3273             'description': 'md5:e1384e8a133307dd10edee76e875d62f',
3274             'uploader': '3Blue1Brown',
3275             'uploader_id': 'UCYO_jab_esuFRV4b17AJtAw',
3276         },
3277     }, {
3278         'url': 'https://invidio.us/channel/UCmlqkdCBesrv2Lak1mF_MxA',
3279         'only_matching': True,
3280     }, {
3281         'url': 'https://www.youtubekids.com/channel/UCmlqkdCBesrv2Lak1mF_MxA',
3282         'only_matching': True,
3283     }, {
3284         'url': 'https://music.youtube.com/channel/UCmlqkdCBesrv2Lak1mF_MxA',
3285         'only_matching': True,
3286     }, {
3287         'note': 'Playlist with deleted videos (#651). As a bonus, the video #51 is also twice in this list.',
3288         'url': 'https://www.youtube.com/playlist?list=PLwP_SiAcdui0KVebT0mU9Apz359a4ubsC',
3289         'info_dict': {
3290             'title': '29C3: Not my department',
3291             'id': 'PLwP_SiAcdui0KVebT0mU9Apz359a4ubsC',
3292             'uploader': 'Christiaan008',
3293             'uploader_id': 'UCEPzS1rYsrkqzSLNp76nrcg',
3294             'description': 'md5:a14dc1a8ef8307a9807fe136a0660268',
3295         },
3296         'playlist_count': 96,
3297     }, {
3298         'note': 'Large playlist',
3299         'url': 'https://www.youtube.com/playlist?list=UUBABnxM4Ar9ten8Mdjj1j0Q',
3300         'info_dict': {
3301             'title': 'Uploads from Cauchemar',
3302             'id': 'UUBABnxM4Ar9ten8Mdjj1j0Q',
3303             'uploader': 'Cauchemar',
3304             'uploader_id': 'UCBABnxM4Ar9ten8Mdjj1j0Q',
3305         },
3306         'playlist_mincount': 1123,
3307     }, {
3308         'note': 'even larger playlist, 8832 videos',
3309         'url': 'http://www.youtube.com/user/NASAgovVideo/videos',
3310         'only_matching': True,
3311     }, {
3312         'note': 'Buggy playlist: the webpage has a "Load more" button but it doesn\'t have more videos',
3313         'url': 'https://www.youtube.com/playlist?list=UUXw-G3eDE9trcvY2sBMM_aA',
3314         'info_dict': {
3315             'title': 'Uploads from Interstellar Movie',
3316             'id': 'UUXw-G3eDE9trcvY2sBMM_aA',
3317             'uploader': 'Interstellar Movie',
3318             'uploader_id': 'UCXw-G3eDE9trcvY2sBMM_aA',
3319         },
3320         'playlist_mincount': 21,
3321     }, {
3322         'note': 'Playlist with "show unavailable videos" button',
3323         'url': 'https://www.youtube.com/playlist?list=UUTYLiWFZy8xtPwxFwX9rV7Q',
3324         'info_dict': {
3325             'title': 'Uploads from Phim Siêu Nhân Nhật Bản',
3326             'id': 'UUTYLiWFZy8xtPwxFwX9rV7Q',
3327             'uploader': 'Phim Siêu Nhân Nhật Bản',
3328             'uploader_id': 'UCTYLiWFZy8xtPwxFwX9rV7Q',
3329         },
3330         'playlist_mincount': 200,
3331     }, {
3332         'note': 'Playlist with unavailable videos in page 7',
3333         'url': 'https://www.youtube.com/playlist?list=UU8l9frL61Yl5KFOl87nIm2w',
3334         'info_dict': {
3335             'title': 'Uploads from BlankTV',
3336             'id': 'UU8l9frL61Yl5KFOl87nIm2w',
3337             'uploader': 'BlankTV',
3338             'uploader_id': 'UC8l9frL61Yl5KFOl87nIm2w',
3339         },
3340         'playlist_mincount': 1000,
3341     }, {
3342         'note': 'https://github.com/ytdl-org/youtube-dl/issues/21844',
3343         'url': 'https://www.youtube.com/playlist?list=PLzH6n4zXuckpfMu_4Ff8E7Z1behQks5ba',
3344         'info_dict': {
3345             'title': 'Data Analysis with Dr Mike Pound',
3346             'id': 'PLzH6n4zXuckpfMu_4Ff8E7Z1behQks5ba',
3347             'uploader_id': 'UC9-y-6csu5WGm29I7JiwpnA',
3348             'uploader': 'Computerphile',
3349             'description': 'md5:7f567c574d13d3f8c0954d9ffee4e487',
3350         },
3351         'playlist_mincount': 11,
3352     }, {
3353         'url': 'https://invidio.us/playlist?list=PL4lCao7KL_QFVb7Iudeipvc2BCavECqzc',
3354         'only_matching': True,
3355     }, {
3356         'note': 'Playlist URL that does not actually serve a playlist',
3357         'url': 'https://www.youtube.com/watch?v=FqZTN594JQw&list=PLMYEtVRpaqY00V9W81Cwmzp6N6vZqfUKD4',
3358         'info_dict': {
3359             'id': 'FqZTN594JQw',
3360             'ext': 'webm',
3361             'title': "Smiley's People 01 detective, Adventure Series, Action",
3362             'uploader': 'STREEM',
3363             'uploader_id': 'UCyPhqAZgwYWZfxElWVbVJng',
3364             'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UCyPhqAZgwYWZfxElWVbVJng',
3365             'upload_date': '20150526',
3366             'license': 'Standard YouTube License',
3367             'description': 'md5:507cdcb5a49ac0da37a920ece610be80',
3368             'categories': ['People & Blogs'],
3369             'tags': list,
3370             'view_count': int,
3371             'like_count': int,
3372             'dislike_count': int,
3373         },
3374         'params': {
3375             'skip_download': True,
3376         },
3377         'skip': 'This video is not available.',
3378         'add_ie': [YoutubeIE.ie_key()],
3379     }, {
3380         'url': 'https://www.youtubekids.com/watch?v=Agk7R8I8o5U&list=PUZ6jURNr1WQZCNHF0ao-c0g',
3381         'only_matching': True,
3382     }, {
3383         'url': 'https://www.youtube.com/watch?v=MuAGGZNfUkU&list=RDMM',
3384         'only_matching': True,
3385     }, {
3386         'url': 'https://www.youtube.com/channel/UCoMdktPbSTixAyNGwb-UYkQ/live',
3387         'info_dict': {
3388             'id': 'FMtPN8yp5LU',  # This will keep changing
3389             'ext': 'mp4',
3390             'title': compat_str,
3391             'uploader': 'Sky News',
3392             'uploader_id': 'skynews',
3393             'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/skynews',
3394             'upload_date': r're:\d{8}',
3395             'description': compat_str,
3396             'categories': ['News & Politics'],
3397             'tags': list,
3398             'like_count': int,
3399             'dislike_count': int,
3400         },
3401         'params': {
3402             'skip_download': True,
3403         },
3404         'expected_warnings': ['Downloading just video ', 'Ignoring subtitle tracks found in '],
3405     }, {
3406         'url': 'https://www.youtube.com/user/TheYoungTurks/live',
3407         'info_dict': {
3408             'id': 'a48o2S1cPoo',
3409             'ext': 'mp4',
3410             'title': 'The Young Turks - Live Main Show',
3411             'uploader': 'The Young Turks',
3412             'uploader_id': 'TheYoungTurks',
3413             'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/TheYoungTurks',
3414             'upload_date': '20150715',
3415             'license': 'Standard YouTube License',
3416             'description': 'md5:438179573adcdff3c97ebb1ee632b891',
3417             'categories': ['News & Politics'],
3418             'tags': ['Cenk Uygur (TV Program Creator)', 'The Young Turks (Award-Winning Work)', 'Talk Show (TV Genre)'],
3419             'like_count': int,
3420             'dislike_count': int,
3421         },
3422         'params': {
3423             'skip_download': True,
3424         },
3425         'only_matching': True,
3426     }, {
3427         'url': 'https://www.youtube.com/channel/UC1yBKRuGpC1tSM73A0ZjYjQ/live',
3428         'only_matching': True,
3429     }, {
3430         'url': 'https://www.youtube.com/c/CommanderVideoHq/live',
3431         'only_matching': True,
3432     }, {
3433         'note': 'A channel that is not live. Should raise error',
3434         'url': 'https://www.youtube.com/user/numberphile/live',
3435         'only_matching': True,
3436     }, {
3437         'url': 'https://www.youtube.com/feed/trending',
3438         'only_matching': True,
3439     }, {
3440         'url': 'https://www.youtube.com/feed/library',
3441         'only_matching': True,
3442     }, {
3443         'url': 'https://www.youtube.com/feed/history',
3444         'only_matching': True,
3445     }, {
3446         'url': 'https://www.youtube.com/feed/subscriptions',
3447         'only_matching': True,
3448     }, {
3449         'url': 'https://www.youtube.com/feed/watch_later',
3450         'only_matching': True,
3451     }, {
3452         'note': 'Recommended - redirects to home page',
3453         'url': 'https://www.youtube.com/feed/recommended',
3454         'only_matching': True,
3455     }, {
3456         'note': 'inline playlist with not always working continuations',
3457         'url': 'https://www.youtube.com/watch?v=UC6u0Tct-Fo&list=PL36D642111D65BE7C',
3458         'only_matching': True,
3459     }, {
3460         'url': 'https://www.youtube.com/course?list=ECUl4u3cNGP61MdtwGTqZA0MreSaDybji8',
3461         'only_matching': True,
3462     }, {
3463         'url': 'https://www.youtube.com/course',
3464         'only_matching': True,
3465     }, {
3466         'url': 'https://www.youtube.com/zsecurity',
3467         'only_matching': True,
3468     }, {
3469         'url': 'http://www.youtube.com/NASAgovVideo/videos',
3470         'only_matching': True,
3471     }, {
3472         'url': 'https://www.youtube.com/TheYoungTurks/live',
3473         'only_matching': True,
3474     }, {
3475         'url': 'https://www.youtube.com/hashtag/cctv9',
3476         'info_dict': {
3477             'id': 'cctv9',
3478             'title': '#cctv9',
3479         },
3480         'playlist_mincount': 350,
3481     }, {
3482         'url': 'https://www.youtube.com/watch?list=PLW4dVinRY435CBE_JD3t-0SRXKfnZHS1P&feature=youtu.be&v=M9cJMXmQ_ZU',
3483         'only_matching': True,
3484     }, {
3485         'note': 'Requires Premium: should request additional YTM-info webpage (and have format 141) for videos in playlist',
3486         'url': 'https://music.youtube.com/playlist?list=PLRBp0Fe2GpgmgoscNFLxNyBVSFVdYmFkq',
3487         'only_matching': True
3488     }, {
3489         'note': '/browse/ should redirect to /channel/',
3490         'url': 'https://music.youtube.com/browse/UC1a8OFewdjuLq6KlF8M_8Ng',
3491         'only_matching': True
3492     }, {
3493         'note': 'VLPL, should redirect to playlist?list=PL...',
3494         'url': 'https://music.youtube.com/browse/VLPLRBp0Fe2GpgmgoscNFLxNyBVSFVdYmFkq',
3495         'info_dict': {
3496             'id': 'PLRBp0Fe2GpgmgoscNFLxNyBVSFVdYmFkq',
3497             'uploader': 'NoCopyrightSounds',
3498             'description': 'Providing you with copyright free / safe music for gaming, live streaming, studying and more!',
3499             'uploader_id': 'UC_aEa8K-EOJ3D6gOs7HcyNg',
3500             'title': 'NCS Releases',
3501         },
3502         'playlist_mincount': 166,
3503     }, {
3504         'note': 'Topic, should redirect to playlist?list=UU...',
3505         'url': 'https://music.youtube.com/browse/UC9ALqqC4aIeG5iDs7i90Bfw',
3506         'info_dict': {
3507             'id': 'UU9ALqqC4aIeG5iDs7i90Bfw',
3508             'uploader_id': 'UC9ALqqC4aIeG5iDs7i90Bfw',
3509             'title': 'Uploads from Royalty Free Music - Topic',
3510             'uploader': 'Royalty Free Music - Topic',
3511         },
3512         'expected_warnings': [
3513             'A channel/user page was given',
3514             'The URL does not have a videos tab',
3515         ],
3516         'playlist_mincount': 101,
3517     }, {
3518         'note': 'Topic without a UU playlist',
3519         'url': 'https://www.youtube.com/channel/UCtFRv9O2AHqOZjjynzrv-xg',
3520         'info_dict': {
3521             'id': 'UCtFRv9O2AHqOZjjynzrv-xg',
3522             'title': 'UCtFRv9O2AHqOZjjynzrv-xg',
3523         },
3524         'expected_warnings': [
3525             'A channel/user page was given',
3526             'The URL does not have a videos tab',
3527             'Falling back to channel URL',
3528         ],
3529         'playlist_mincount': 9,
3530     }, {
3531         'note': 'Youtube music Album',
3532         'url': 'https://music.youtube.com/browse/MPREb_gTAcphH99wE',
3533         'info_dict': {
3534             'id': 'OLAK5uy_l1m0thk3g31NmIIz_vMIbWtyv7eZixlH0',
3535             'title': 'Album - Royalty Free Music Library V2 (50 Songs)',
3536         },
3537         'playlist_count': 50,
3538     }, {
3539         'note': 'unlisted single video playlist',
3540         'url': 'https://www.youtube.com/playlist?list=PLwL24UFy54GrB3s2KMMfjZscDi1x5Dajf',
3541         'info_dict': {
3542             'uploader_id': 'UC9zHu_mHU96r19o-wV5Qs1Q',
3543             'uploader': 'colethedj',
3544             'id': 'PLwL24UFy54GrB3s2KMMfjZscDi1x5Dajf',
3545             'title': 'yt-dlp unlisted playlist test',
3546             'availability': 'unlisted'
3547         },
3548         'playlist_count': 1,
3549     }]
3550
3551     @classmethod
3552     def suitable(cls, url):
3553         return False if YoutubeIE.suitable(url) else super(
3554             YoutubeTabIE, cls).suitable(url)
3555
3556     def _extract_channel_id(self, webpage):
3557         channel_id = self._html_search_meta(
3558             'channelId', webpage, 'channel id', default=None)
3559         if channel_id:
3560             return channel_id
3561         channel_url = self._html_search_meta(
3562             ('og:url', 'al:ios:url', 'al:android:url', 'al:web:url',
3563              'twitter:url', 'twitter:app:url:iphone', 'twitter:app:url:ipad',
3564              'twitter:app:url:googleplay'), webpage, 'channel url')
3565         return self._search_regex(
3566             r'https?://(?:www\.)?youtube\.com/channel/([^/?#&])+',
3567             channel_url, 'channel id')
3568
3569     @staticmethod
3570     def _extract_basic_item_renderer(item):
3571         # Modified from _extract_grid_item_renderer
3572         known_basic_renderers = (
3573             'playlistRenderer', 'videoRenderer', 'channelRenderer', 'showRenderer'
3574         )
3575         for key, renderer in item.items():
3576             if not isinstance(renderer, dict):
3577                 continue
3578             elif key in known_basic_renderers:
3579                 return renderer
3580             elif key.startswith('grid') and key.endswith('Renderer'):
3581                 return renderer
3582
3583     def _grid_entries(self, grid_renderer):
3584         for item in grid_renderer['items']:
3585             if not isinstance(item, dict):
3586                 continue
3587             renderer = self._extract_basic_item_renderer(item)
3588             if not isinstance(renderer, dict):
3589                 continue
3590             title = self._get_text(renderer.get('title'))
3591
3592             # playlist
3593             playlist_id = renderer.get('playlistId')
3594             if playlist_id:
3595                 yield self.url_result(
3596                     'https://www.youtube.com/playlist?list=%s' % playlist_id,
3597                     ie=YoutubeTabIE.ie_key(), video_id=playlist_id,
3598                     video_title=title)
3599                 continue
3600             # video
3601             video_id = renderer.get('videoId')
3602             if video_id:
3603                 yield self._extract_video(renderer)
3604                 continue
3605             # channel
3606             channel_id = renderer.get('channelId')
3607             if channel_id:
3608                 yield self.url_result(
3609                     'https://www.youtube.com/channel/%s' % channel_id,
3610                     ie=YoutubeTabIE.ie_key(), video_title=title)
3611                 continue
3612             # generic endpoint URL support
3613             ep_url = urljoin('https://www.youtube.com/', try_get(
3614                 renderer, lambda x: x['navigationEndpoint']['commandMetadata']['webCommandMetadata']['url'],
3615                 compat_str))
3616             if ep_url:
3617                 for ie in (YoutubeTabIE, YoutubePlaylistIE, YoutubeIE):
3618                     if ie.suitable(ep_url):
3619                         yield self.url_result(
3620                             ep_url, ie=ie.ie_key(), video_id=ie._match_id(ep_url), video_title=title)
3621                         break
3622
3623     def _shelf_entries_from_content(self, shelf_renderer):
3624         content = shelf_renderer.get('content')
3625         if not isinstance(content, dict):
3626             return
3627         renderer = content.get('gridRenderer') or content.get('expandedShelfContentsRenderer')
3628         if renderer:
3629             # TODO: add support for nested playlists so each shelf is processed
3630             # as separate playlist
3631             # TODO: this includes only first N items
3632             for entry in self._grid_entries(renderer):
3633                 yield entry
3634         renderer = content.get('horizontalListRenderer')
3635         if renderer:
3636             # TODO
3637             pass
3638
3639     def _shelf_entries(self, shelf_renderer, skip_channels=False):
3640         ep = try_get(
3641             shelf_renderer, lambda x: x['endpoint']['commandMetadata']['webCommandMetadata']['url'],
3642             compat_str)
3643         shelf_url = urljoin('https://www.youtube.com', ep)
3644         if shelf_url:
3645             # Skipping links to another channels, note that checking for
3646             # endpoint.commandMetadata.webCommandMetadata.webPageTypwebPageType == WEB_PAGE_TYPE_CHANNEL
3647             # will not work
3648             if skip_channels and '/channels?' in shelf_url:
3649                 return
3650             title = self._get_text(shelf_renderer, lambda x: x['title'])
3651             yield self.url_result(shelf_url, video_title=title)
3652         # Shelf may not contain shelf URL, fallback to extraction from content
3653         for entry in self._shelf_entries_from_content(shelf_renderer):
3654             yield entry
3655
3656     def _playlist_entries(self, video_list_renderer):
3657         for content in video_list_renderer['contents']:
3658             if not isinstance(content, dict):
3659                 continue
3660             renderer = content.get('playlistVideoRenderer') or content.get('playlistPanelVideoRenderer')
3661             if not isinstance(renderer, dict):
3662                 continue
3663             video_id = renderer.get('videoId')
3664             if not video_id:
3665                 continue
3666             yield self._extract_video(renderer)
3667
3668     def _rich_entries(self, rich_grid_renderer):
3669         renderer = try_get(
3670             rich_grid_renderer, lambda x: x['content']['videoRenderer'], dict) or {}
3671         video_id = renderer.get('videoId')
3672         if not video_id:
3673             return
3674         yield self._extract_video(renderer)
3675
3676     def _video_entry(self, video_renderer):
3677         video_id = video_renderer.get('videoId')
3678         if video_id:
3679             return self._extract_video(video_renderer)
3680
3681     def _post_thread_entries(self, post_thread_renderer):
3682         post_renderer = try_get(
3683             post_thread_renderer, lambda x: x['post']['backstagePostRenderer'], dict)
3684         if not post_renderer:
3685             return
3686         # video attachment
3687         video_renderer = try_get(
3688             post_renderer, lambda x: x['backstageAttachment']['videoRenderer'], dict) or {}
3689         video_id = video_renderer.get('videoId')
3690         if video_id:
3691             entry = self._extract_video(video_renderer)
3692             if entry:
3693                 yield entry
3694         # playlist attachment
3695         playlist_id = try_get(
3696             post_renderer, lambda x: x['backstageAttachment']['playlistRenderer']['playlistId'], compat_str)
3697         if playlist_id:
3698             yield self.url_result(
3699                 'https://www.youtube.com/playlist?list=%s' % playlist_id,
3700                 ie=YoutubeTabIE.ie_key(), video_id=playlist_id)
3701         # inline video links
3702         runs = try_get(post_renderer, lambda x: x['contentText']['runs'], list) or []
3703         for run in runs:
3704             if not isinstance(run, dict):
3705                 continue
3706             ep_url = try_get(
3707                 run, lambda x: x['navigationEndpoint']['urlEndpoint']['url'], compat_str)
3708             if not ep_url:
3709                 continue
3710             if not YoutubeIE.suitable(ep_url):
3711                 continue
3712             ep_video_id = YoutubeIE._match_id(ep_url)
3713             if video_id == ep_video_id:
3714                 continue
3715             yield self.url_result(ep_url, ie=YoutubeIE.ie_key(), video_id=ep_video_id)
3716
3717     def _post_thread_continuation_entries(self, post_thread_continuation):
3718         contents = post_thread_continuation.get('contents')
3719         if not isinstance(contents, list):
3720             return
3721         for content in contents:
3722             renderer = content.get('backstagePostThreadRenderer')
3723             if not isinstance(renderer, dict):
3724                 continue
3725             for entry in self._post_thread_entries(renderer):
3726                 yield entry
3727
3728     r''' # unused
3729     def _rich_grid_entries(self, contents):
3730         for content in contents:
3731             video_renderer = try_get(content, lambda x: x['richItemRenderer']['content']['videoRenderer'], dict)
3732             if video_renderer:
3733                 entry = self._video_entry(video_renderer)
3734                 if entry:
3735                     yield entry
3736     '''
3737     def _entries(self, tab, item_id, identity_token, account_syncid, ytcfg):
3738
3739         def extract_entries(parent_renderer):  # this needs to called again for continuation to work with feeds
3740             contents = try_get(parent_renderer, lambda x: x['contents'], list) or []
3741             for content in contents:
3742                 if not isinstance(content, dict):
3743                     continue
3744                 is_renderer = try_get(content, lambda x: x['itemSectionRenderer'], dict)
3745                 if not is_renderer:
3746                     renderer = content.get('richItemRenderer')
3747                     if renderer:
3748                         for entry in self._rich_entries(renderer):
3749                             yield entry
3750                         continuation_list[0] = self._extract_continuation(parent_renderer)
3751                     continue
3752                 isr_contents = try_get(is_renderer, lambda x: x['contents'], list) or []
3753                 for isr_content in isr_contents:
3754                     if not isinstance(isr_content, dict):
3755                         continue
3756
3757                     known_renderers = {
3758                         'playlistVideoListRenderer': self._playlist_entries,
3759                         'gridRenderer': self._grid_entries,
3760                         'shelfRenderer': lambda x: self._shelf_entries(x, tab.get('title') != 'Channels'),
3761                         'backstagePostThreadRenderer': self._post_thread_entries,
3762                         'videoRenderer': lambda x: [self._video_entry(x)],
3763                     }
3764                     for key, renderer in isr_content.items():
3765                         if key not in known_renderers:
3766                             continue
3767                         for entry in known_renderers[key](renderer):
3768                             if entry:
3769                                 yield entry
3770                         continuation_list[0] = self._extract_continuation(renderer)
3771                         break
3772
3773                 if not continuation_list[0]:
3774                     continuation_list[0] = self._extract_continuation(is_renderer)
3775
3776             if not continuation_list[0]:
3777                 continuation_list[0] = self._extract_continuation(parent_renderer)
3778
3779         continuation_list = [None]  # Python 2 doesnot support nonlocal
3780         tab_content = try_get(tab, lambda x: x['content'], dict)
3781         if not tab_content:
3782             return
3783         parent_renderer = (
3784             try_get(tab_content, lambda x: x['sectionListRenderer'], dict)
3785             or try_get(tab_content, lambda x: x['richGridRenderer'], dict) or {})
3786         for entry in extract_entries(parent_renderer):
3787             yield entry
3788         continuation = continuation_list[0]
3789         visitor_data = None
3790
3791         for page_num in itertools.count(1):
3792             if not continuation:
3793                 break
3794             headers = self.generate_api_headers(ytcfg, identity_token, account_syncid, visitor_data)
3795             response = self._extract_response(
3796                 item_id='%s page %s' % (item_id, page_num),
3797                 query=continuation, headers=headers, ytcfg=ytcfg,
3798                 check_get_keys=('continuationContents', 'onResponseReceivedActions', 'onResponseReceivedEndpoints'))
3799
3800             if not response:
3801                 break
3802             visitor_data = try_get(
3803                 response, lambda x: x['responseContext']['visitorData'], compat_str) or visitor_data
3804
3805             known_continuation_renderers = {
3806                 'playlistVideoListContinuation': self._playlist_entries,
3807                 'gridContinuation': self._grid_entries,
3808                 'itemSectionContinuation': self._post_thread_continuation_entries,
3809                 'sectionListContinuation': extract_entries,  # for feeds
3810             }
3811             continuation_contents = try_get(
3812                 response, lambda x: x['continuationContents'], dict) or {}
3813             continuation_renderer = None
3814             for key, value in continuation_contents.items():
3815                 if key not in known_continuation_renderers:
3816                     continue
3817                 continuation_renderer = value
3818                 continuation_list = [None]
3819                 for entry in known_continuation_renderers[key](continuation_renderer):
3820                     yield entry
3821                 continuation = continuation_list[0] or self._extract_continuation(continuation_renderer)
3822                 break
3823             if continuation_renderer:
3824                 continue
3825
3826             known_renderers = {
3827                 'gridPlaylistRenderer': (self._grid_entries, 'items'),
3828                 'gridVideoRenderer': (self._grid_entries, 'items'),
3829                 'gridChannelRenderer': (self._grid_entries, 'items'),
3830                 'playlistVideoRenderer': (self._playlist_entries, 'contents'),
3831                 'itemSectionRenderer': (extract_entries, 'contents'),  # for feeds
3832                 'richItemRenderer': (extract_entries, 'contents'),  # for hashtag
3833                 'backstagePostThreadRenderer': (self._post_thread_continuation_entries, 'contents')
3834             }
3835             on_response_received = dict_get(response, ('onResponseReceivedActions', 'onResponseReceivedEndpoints'))
3836             continuation_items = try_get(
3837                 on_response_received, lambda x: x[0]['appendContinuationItemsAction']['continuationItems'], list)
3838             continuation_item = try_get(continuation_items, lambda x: x[0], dict) or {}
3839             video_items_renderer = None
3840             for key, value in continuation_item.items():
3841                 if key not in known_renderers:
3842                     continue
3843                 video_items_renderer = {known_renderers[key][1]: continuation_items}
3844                 continuation_list = [None]
3845                 for entry in known_renderers[key][0](video_items_renderer):
3846                     yield entry
3847                 continuation = continuation_list[0] or self._extract_continuation(video_items_renderer)
3848                 break
3849             if video_items_renderer:
3850                 continue
3851             break
3852
3853     @staticmethod
3854     def _extract_selected_tab(tabs):
3855         for tab in tabs:
3856             renderer = dict_get(tab, ('tabRenderer', 'expandableTabRenderer')) or {}
3857             if renderer.get('selected') is True:
3858                 return renderer
3859         else:
3860             raise ExtractorError('Unable to find selected tab')
3861
3862     @classmethod
3863     def _extract_uploader(cls, data):
3864         uploader = {}
3865         renderer = cls._extract_sidebar_info_renderer(data, 'playlistSidebarSecondaryInfoRenderer') or {}
3866         owner = try_get(
3867             renderer, lambda x: x['videoOwner']['videoOwnerRenderer']['title']['runs'][0], dict)
3868         if owner:
3869             uploader['uploader'] = owner.get('text')
3870             uploader['uploader_id'] = try_get(
3871                 owner, lambda x: x['navigationEndpoint']['browseEndpoint']['browseId'], compat_str)
3872             uploader['uploader_url'] = urljoin(
3873                 'https://www.youtube.com/',
3874                 try_get(owner, lambda x: x['navigationEndpoint']['browseEndpoint']['canonicalBaseUrl'], compat_str))
3875         return {k: v for k, v in uploader.items() if v is not None}
3876
3877     def _extract_from_tabs(self, item_id, webpage, data, tabs):
3878         playlist_id = title = description = channel_url = channel_name = channel_id = None
3879         thumbnails_list = tags = []
3880
3881         selected_tab = self._extract_selected_tab(tabs)
3882         renderer = try_get(
3883             data, lambda x: x['metadata']['channelMetadataRenderer'], dict)
3884         if renderer:
3885             channel_name = renderer.get('title')
3886             channel_url = renderer.get('channelUrl')
3887             channel_id = renderer.get('externalId')
3888         else:
3889             renderer = try_get(
3890                 data, lambda x: x['metadata']['playlistMetadataRenderer'], dict)
3891
3892         if renderer:
3893             title = renderer.get('title')
3894             description = renderer.get('description', '')
3895             playlist_id = channel_id
3896             tags = renderer.get('keywords', '').split()
3897             thumbnails_list = (
3898                 try_get(renderer, lambda x: x['avatar']['thumbnails'], list)
3899                 or try_get(
3900                     self._extract_sidebar_info_renderer(data, 'playlistSidebarPrimaryInfoRenderer'),
3901                     lambda x: x['thumbnailRenderer']['playlistVideoThumbnailRenderer']['thumbnail']['thumbnails'],
3902                     list)
3903                 or [])
3904
3905         thumbnails = []
3906         for t in thumbnails_list:
3907             if not isinstance(t, dict):
3908                 continue
3909             thumbnail_url = url_or_none(t.get('url'))
3910             if not thumbnail_url:
3911                 continue
3912             thumbnails.append({
3913                 'url': thumbnail_url,
3914                 'width': int_or_none(t.get('width')),
3915                 'height': int_or_none(t.get('height')),
3916             })
3917         if playlist_id is None:
3918             playlist_id = item_id
3919         if title is None:
3920             title = (
3921                 try_get(data, lambda x: x['header']['hashtagHeaderRenderer']['hashtag']['simpleText'])
3922                 or playlist_id)
3923         title += format_field(selected_tab, 'title', ' - %s')
3924         title += format_field(selected_tab, 'expandedText', ' - %s')
3925         metadata = {
3926             'playlist_id': playlist_id,
3927             'playlist_title': title,
3928             'playlist_description': description,
3929             'uploader': channel_name,
3930             'uploader_id': channel_id,
3931             'uploader_url': channel_url,
3932             'thumbnails': thumbnails,
3933             'tags': tags,
3934         }
3935         availability = self._extract_availability(data)
3936         if availability:
3937             metadata['availability'] = availability
3938         if not channel_id:
3939             metadata.update(self._extract_uploader(data))
3940         metadata.update({
3941             'channel': metadata['uploader'],
3942             'channel_id': metadata['uploader_id'],
3943             'channel_url': metadata['uploader_url']})
3944         ytcfg = self.extract_ytcfg(item_id, webpage)
3945         return self.playlist_result(
3946             self._entries(
3947                 selected_tab, playlist_id,
3948                 self._extract_identity_token(webpage, item_id),
3949                 self._extract_account_syncid(ytcfg, data), ytcfg),
3950             **metadata)
3951
3952     def _extract_mix_playlist(self, playlist, playlist_id, data, webpage):
3953         first_id = last_id = None
3954         ytcfg = self.extract_ytcfg(playlist_id, webpage)
3955         headers = self.generate_api_headers(
3956             ytcfg, account_syncid=self._extract_account_syncid(ytcfg, data),
3957             identity_token=self._extract_identity_token(webpage, item_id=playlist_id))
3958         for page_num in itertools.count(1):
3959             videos = list(self._playlist_entries(playlist))
3960             if not videos:
3961                 return
3962             start = next((i for i, v in enumerate(videos) if v['id'] == last_id), -1) + 1
3963             if start >= len(videos):
3964                 return
3965             for video in videos[start:]:
3966                 if video['id'] == first_id:
3967                     self.to_screen('First video %s found again; Assuming end of Mix' % first_id)
3968                     return
3969                 yield video
3970             first_id = first_id or videos[0]['id']
3971             last_id = videos[-1]['id']
3972             watch_endpoint = try_get(
3973                 playlist, lambda x: x['contents'][-1]['playlistPanelVideoRenderer']['navigationEndpoint']['watchEndpoint'])
3974             query = {
3975                 'playlistId': playlist_id,
3976                 'videoId': watch_endpoint.get('videoId') or last_id,
3977                 'index': watch_endpoint.get('index') or len(videos),
3978                 'params': watch_endpoint.get('params') or 'OAE%3D'
3979             }
3980             response = self._extract_response(
3981                 item_id='%s page %d' % (playlist_id, page_num),
3982                 query=query, ep='next', headers=headers, ytcfg=ytcfg,
3983                 check_get_keys='contents'
3984             )
3985             playlist = try_get(
3986                 response, lambda x: x['contents']['twoColumnWatchNextResults']['playlist']['playlist'], dict)
3987
3988     def _extract_from_playlist(self, item_id, url, data, playlist, webpage):
3989         title = playlist.get('title') or try_get(
3990             data, lambda x: x['titleText']['simpleText'], compat_str)
3991         playlist_id = playlist.get('playlistId') or item_id
3992
3993         # Delegating everything except mix playlists to regular tab-based playlist URL
3994         playlist_url = urljoin(url, try_get(
3995             playlist, lambda x: x['endpoint']['commandMetadata']['webCommandMetadata']['url'],
3996             compat_str))
3997         if playlist_url and playlist_url != url:
3998             return self.url_result(
3999                 playlist_url, ie=YoutubeTabIE.ie_key(), video_id=playlist_id,
4000                 video_title=title)
4001
4002         return self.playlist_result(
4003             self._extract_mix_playlist(playlist, playlist_id, data, webpage),
4004             playlist_id=playlist_id, playlist_title=title)
4005
4006     def _extract_availability(self, data):
4007         """
4008         Gets the availability of a given playlist/tab.
4009         Note: Unless YouTube tells us explicitly, we do not assume it is public
4010         @param data: response
4011         """
4012         is_private = is_unlisted = None
4013         renderer = self._extract_sidebar_info_renderer(data, 'playlistSidebarPrimaryInfoRenderer') or {}
4014         badge_labels = self._extract_badges(renderer)
4015
4016         # Personal playlists, when authenticated, have a dropdown visibility selector instead of a badge
4017         privacy_dropdown_entries = try_get(
4018             renderer, lambda x: x['privacyForm']['dropdownFormFieldRenderer']['dropdown']['dropdownRenderer']['entries'], list) or []
4019         for renderer_dict in privacy_dropdown_entries:
4020             is_selected = try_get(
4021                 renderer_dict, lambda x: x['privacyDropdownItemRenderer']['isSelected'], bool) or False
4022             if not is_selected:
4023                 continue
4024             label = self._get_text(
4025                 try_get(renderer_dict, lambda x: x['privacyDropdownItemRenderer']['label'], dict) or [])
4026             if label:
4027                 badge_labels.add(label.lower())
4028                 break
4029
4030         for badge_label in badge_labels:
4031             if badge_label == 'unlisted':
4032                 is_unlisted = True
4033             elif badge_label == 'private':
4034                 is_private = True
4035             elif badge_label == 'public':
4036                 is_unlisted = is_private = False
4037         return self._availability(is_private, False, False, False, is_unlisted)
4038
4039     @staticmethod
4040     def _extract_sidebar_info_renderer(data, info_renderer, expected_type=dict):
4041         sidebar_renderer = try_get(
4042             data, lambda x: x['sidebar']['playlistSidebarRenderer']['items'], list) or []
4043         for item in sidebar_renderer:
4044             renderer = try_get(item, lambda x: x[info_renderer], expected_type)
4045             if renderer:
4046                 return renderer
4047
4048     def _reload_with_unavailable_videos(self, item_id, data, webpage):
4049         """
4050         Get playlist with unavailable videos if the 'show unavailable videos' button exists.
4051         """
4052         browse_id = params = None
4053         renderer = self._extract_sidebar_info_renderer(data, 'playlistSidebarPrimaryInfoRenderer')
4054         if not renderer:
4055             return
4056         menu_renderer = try_get(
4057             renderer, lambda x: x['menu']['menuRenderer']['items'], list) or []
4058         for menu_item in menu_renderer:
4059             if not isinstance(menu_item, dict):
4060                 continue
4061             nav_item_renderer = menu_item.get('menuNavigationItemRenderer')
4062             text = try_get(
4063                 nav_item_renderer, lambda x: x['text']['simpleText'], compat_str)
4064             if not text or text.lower() != 'show unavailable videos':
4065                 continue
4066             browse_endpoint = try_get(
4067                 nav_item_renderer, lambda x: x['navigationEndpoint']['browseEndpoint'], dict) or {}
4068             browse_id = browse_endpoint.get('browseId')
4069             params = browse_endpoint.get('params')
4070             break
4071
4072         ytcfg = self.extract_ytcfg(item_id, webpage)
4073         headers = self.generate_api_headers(
4074             ytcfg, account_syncid=self._extract_account_syncid(ytcfg, data),
4075             identity_token=self._extract_identity_token(webpage, item_id=item_id),
4076             visitor_data=try_get(
4077                 self._extract_context(ytcfg), lambda x: x['client']['visitorData'], compat_str))
4078         query = {
4079             'params': params or 'wgYCCAA=',
4080             'browseId': browse_id or 'VL%s' % item_id
4081         }
4082         return self._extract_response(
4083             item_id=item_id, headers=headers, query=query,
4084             check_get_keys='contents', fatal=False, ytcfg=ytcfg,
4085             note='Downloading API JSON with unavailable videos')
4086
4087     def _extract_webpage(self, url, item_id):
4088         retries = self.get_param('extractor_retries', 3)
4089         count = -1
4090         last_error = 'Incomplete yt initial data recieved'
4091         while count < retries:
4092             count += 1
4093             # Sometimes youtube returns a webpage with incomplete ytInitialData
4094             # See: https://github.com/yt-dlp/yt-dlp/issues/116
4095             if count:
4096                 self.report_warning('%s. Retrying ...' % last_error)
4097             webpage = self._download_webpage(
4098                 url, item_id,
4099                 'Downloading webpage%s' % (' (retry #%d)' % count if count else ''))
4100             data = self.extract_yt_initial_data(item_id, webpage)
4101             if data.get('contents') or data.get('currentVideoEndpoint'):
4102                 break
4103             # Extract alerts here only when there is error
4104             self._extract_and_report_alerts(data)
4105             if count >= retries:
4106                 raise ExtractorError(last_error)
4107         return webpage, data
4108
4109     @staticmethod
4110     def _smuggle_data(entries, data):
4111         for entry in entries:
4112             if data:
4113                 entry['url'] = smuggle_url(entry['url'], data)
4114             yield entry
4115
4116     def _real_extract(self, url):
4117         url, smuggled_data = unsmuggle_url(url, {})
4118         if self.is_music_url(url):
4119             smuggled_data['is_music_url'] = True
4120         info_dict = self.__real_extract(url, smuggled_data)
4121         if info_dict.get('entries'):
4122             info_dict['entries'] = self._smuggle_data(info_dict['entries'], smuggled_data)
4123         return info_dict
4124
4125     _url_re = re.compile(r'(?P<pre>%s)(?(channel_type)(?P<tab>/\w+))?(?P<post>.*)$' % _VALID_URL)
4126
4127     def __real_extract(self, url, smuggled_data):
4128         item_id = self._match_id(url)
4129         url = compat_urlparse.urlunparse(
4130             compat_urlparse.urlparse(url)._replace(netloc='www.youtube.com'))
4131         compat_opts = self.get_param('compat_opts', [])
4132
4133         def get_mobj(url):
4134             mobj = self._url_re.match(url).groupdict()
4135             mobj.update((k, '') for k, v in mobj.items() if v is None)
4136             return mobj
4137
4138         mobj = get_mobj(url)
4139         # Youtube returns incomplete data if tabname is not lower case
4140         pre, tab, post, is_channel = mobj['pre'], mobj['tab'].lower(), mobj['post'], not mobj['not_channel']
4141
4142         if is_channel:
4143             if smuggled_data.get('is_music_url'):
4144                 if item_id[:2] == 'VL':
4145                     # Youtube music VL channels have an equivalent playlist
4146                     item_id = item_id[2:]
4147                     pre, tab, post, is_channel = 'https://www.youtube.com/playlist?list=%s' % item_id, '', '', False
4148                 elif item_id[:2] == 'MP':
4149                     # Youtube music albums (/channel/MP...) have a OLAK playlist that can be extracted from the webpage
4150                     item_id = self._search_regex(
4151                         r'\\x22audioPlaylistId\\x22:\\x22([0-9A-Za-z_-]+)\\x22',
4152                         self._download_webpage('https://music.youtube.com/channel/%s' % item_id, item_id),
4153                         'playlist id')
4154                     pre, tab, post, is_channel = 'https://www.youtube.com/playlist?list=%s' % item_id, '', '', False
4155                 elif mobj['channel_type'] == 'browse':
4156                     # Youtube music /browse/ should be changed to /channel/
4157                     pre = 'https://www.youtube.com/channel/%s' % item_id
4158         if is_channel and not tab and 'no-youtube-channel-redirect' not in compat_opts:
4159             # Home URLs should redirect to /videos/
4160             self.report_warning(
4161                 'A channel/user page was given. All the channel\'s videos will be downloaded. '
4162                 'To download only the videos in the home page, add a "/featured" to the URL')
4163             tab = '/videos'
4164
4165         url = ''.join((pre, tab, post))
4166         mobj = get_mobj(url)
4167
4168         # Handle both video/playlist URLs
4169         qs = parse_qs(url)
4170         video_id = qs.get('v', [None])[0]
4171         playlist_id = qs.get('list', [None])[0]
4172
4173         if not video_id and mobj['not_channel'].startswith('watch'):
4174             if not playlist_id:
4175                 # If there is neither video or playlist ids, youtube redirects to home page, which is undesirable
4176                 raise ExtractorError('Unable to recognize tab page')
4177             # Common mistake: https://www.youtube.com/watch?list=playlist_id
4178             self.report_warning('A video URL was given without video ID. Trying to download playlist %s' % playlist_id)
4179             url = 'https://www.youtube.com/playlist?list=%s' % playlist_id
4180             mobj = get_mobj(url)
4181
4182         if video_id and playlist_id:
4183             if self.get_param('noplaylist'):
4184                 self.to_screen('Downloading just video %s because of --no-playlist' % video_id)
4185                 return self.url_result(video_id, ie=YoutubeIE.ie_key(), video_id=video_id)
4186             self.to_screen('Downloading playlist %s; add --no-playlist to just download video %s' % (playlist_id, video_id))
4187
4188         webpage, data = self._extract_webpage(url, item_id)
4189
4190         tabs = try_get(
4191             data, lambda x: x['contents']['twoColumnBrowseResultsRenderer']['tabs'], list)
4192         if tabs:
4193             selected_tab = self._extract_selected_tab(tabs)
4194             tab_name = selected_tab.get('title', '')
4195             if 'no-youtube-channel-redirect' not in compat_opts:
4196                 if mobj['tab'] == '/live':
4197                     # Live tab should have redirected to the video
4198                     raise ExtractorError('The channel is not currently live', expected=True)
4199                 if mobj['tab'] == '/videos' and tab_name.lower() != mobj['tab'][1:]:
4200                     if not mobj['not_channel'] and item_id[:2] == 'UC':
4201                         # Topic channels don't have /videos. Use the equivalent playlist instead
4202                         self.report_warning('The URL does not have a %s tab. Trying to redirect to playlist UU%s instead' % (mobj['tab'][1:], item_id[2:]))
4203                         pl_id = 'UU%s' % item_id[2:]
4204                         pl_url = 'https://www.youtube.com/playlist?list=%s%s' % (pl_id, mobj['post'])
4205                         try:
4206                             pl_webpage, pl_data = self._extract_webpage(pl_url, pl_id)
4207                             for alert_type, alert_message in self._extract_alerts(pl_data):
4208                                 if alert_type == 'error':
4209                                     raise ExtractorError('Youtube said: %s' % alert_message)
4210                             item_id, url, webpage, data = pl_id, pl_url, pl_webpage, pl_data
4211                         except ExtractorError:
4212                             self.report_warning('The playlist gave error. Falling back to channel URL')
4213                     else:
4214                         self.report_warning('The URL does not have a %s tab. %s is being downloaded instead' % (mobj['tab'][1:], tab_name))
4215
4216         self.write_debug('Final URL: %s' % url)
4217
4218         # YouTube sometimes provides a button to reload playlist with unavailable videos.
4219         if 'no-youtube-unavailable-videos' not in compat_opts:
4220             data = self._reload_with_unavailable_videos(item_id, data, webpage) or data
4221         self._extract_and_report_alerts(data)
4222         tabs = try_get(
4223             data, lambda x: x['contents']['twoColumnBrowseResultsRenderer']['tabs'], list)
4224         if tabs:
4225             return self._extract_from_tabs(item_id, webpage, data, tabs)
4226
4227         playlist = try_get(
4228             data, lambda x: x['contents']['twoColumnWatchNextResults']['playlist']['playlist'], dict)
4229         if playlist:
4230             return self._extract_from_playlist(item_id, url, data, playlist, webpage)
4231
4232         video_id = try_get(
4233             data, lambda x: x['currentVideoEndpoint']['watchEndpoint']['videoId'],
4234             compat_str) or video_id
4235         if video_id:
4236             if mobj['tab'] != '/live':  # live tab is expected to redirect to video
4237                 self.report_warning('Unable to recognize playlist. Downloading just video %s' % video_id)
4238             return self.url_result(video_id, ie=YoutubeIE.ie_key(), video_id=video_id)
4239
4240         raise ExtractorError('Unable to recognize tab page')
4241
4242
4243 class YoutubePlaylistIE(InfoExtractor):
4244     IE_DESC = 'YouTube.com playlists'
4245     _VALID_URL = r'''(?x)(?:
4246                         (?:https?://)?
4247                         (?:\w+\.)?
4248                         (?:
4249                             (?:
4250                                 youtube(?:kids)?\.com|
4251                                 invidio\.us
4252                             )
4253                             /.*?\?.*?\blist=
4254                         )?
4255                         (?P<id>%(playlist_id)s)
4256                      )''' % {'playlist_id': YoutubeBaseInfoExtractor._PLAYLIST_ID_RE}
4257     IE_NAME = 'youtube:playlist'
4258     _TESTS = [{
4259         'note': 'issue #673',
4260         'url': 'PLBB231211A4F62143',
4261         'info_dict': {
4262             'title': '[OLD]Team Fortress 2 (Class-based LP)',
4263             'id': 'PLBB231211A4F62143',
4264             'uploader': 'Wickydoo',
4265             'uploader_id': 'UCKSpbfbl5kRQpTdL7kMc-1Q',
4266             'description': 'md5:8fa6f52abb47a9552002fa3ddfc57fc2',
4267         },
4268         'playlist_mincount': 29,
4269     }, {
4270         'url': 'PLtPgu7CB4gbY9oDN3drwC3cMbJggS7dKl',
4271         'info_dict': {
4272             'title': 'YDL_safe_search',
4273             'id': 'PLtPgu7CB4gbY9oDN3drwC3cMbJggS7dKl',
4274         },
4275         'playlist_count': 2,
4276         'skip': 'This playlist is private',
4277     }, {
4278         'note': 'embedded',
4279         'url': 'https://www.youtube.com/embed/videoseries?list=PL6IaIsEjSbf96XFRuNccS_RuEXwNdsoEu',
4280         'playlist_count': 4,
4281         'info_dict': {
4282             'title': 'JODA15',
4283             'id': 'PL6IaIsEjSbf96XFRuNccS_RuEXwNdsoEu',
4284             'uploader': 'milan',
4285             'uploader_id': 'UCEI1-PVPcYXjB73Hfelbmaw',
4286         }
4287     }, {
4288         'url': 'http://www.youtube.com/embed/_xDOZElKyNU?list=PLsyOSbh5bs16vubvKePAQ1x3PhKavfBIl',
4289         'playlist_mincount': 654,
4290         'info_dict': {
4291             'title': '2018 Chinese New Singles (11/6 updated)',
4292             'id': 'PLsyOSbh5bs16vubvKePAQ1x3PhKavfBIl',
4293             'uploader': 'LBK',
4294             'uploader_id': 'UC21nz3_MesPLqtDqwdvnoxA',
4295             'description': 'md5:da521864744d60a198e3a88af4db0d9d',
4296         }
4297     }, {
4298         'url': 'TLGGrESM50VT6acwMjAyMjAxNw',
4299         'only_matching': True,
4300     }, {
4301         # music album playlist
4302         'url': 'OLAK5uy_m4xAFdmMC5rX3Ji3g93pQe3hqLZw_9LhM',
4303         'only_matching': True,
4304     }]
4305
4306     @classmethod
4307     def suitable(cls, url):
4308         if YoutubeTabIE.suitable(url):
4309             return False
4310         # Hack for lazy extractors until more generic solution is implemented
4311         # (see #28780)
4312         from .youtube import parse_qs
4313         qs = parse_qs(url)
4314         if qs.get('v', [None])[0]:
4315             return False
4316         return super(YoutubePlaylistIE, cls).suitable(url)
4317
4318     def _real_extract(self, url):
4319         playlist_id = self._match_id(url)
4320         is_music_url = YoutubeBaseInfoExtractor.is_music_url(url)
4321         url = update_url_query(
4322             'https://www.youtube.com/playlist',
4323             parse_qs(url) or {'list': playlist_id})
4324         if is_music_url:
4325             url = smuggle_url(url, {'is_music_url': True})
4326         return self.url_result(url, ie=YoutubeTabIE.ie_key(), video_id=playlist_id)
4327
4328
4329 class YoutubeYtBeIE(InfoExtractor):
4330     IE_DESC = 'youtu.be'
4331     _VALID_URL = r'https?://youtu\.be/(?P<id>[0-9A-Za-z_-]{11})/*?.*?\blist=(?P<playlist_id>%(playlist_id)s)' % {'playlist_id': YoutubeBaseInfoExtractor._PLAYLIST_ID_RE}
4332     _TESTS = [{
4333         'url': 'https://youtu.be/yeWKywCrFtk?list=PL2qgrgXsNUG5ig9cat4ohreBjYLAPC0J5',
4334         'info_dict': {
4335             'id': 'yeWKywCrFtk',
4336             'ext': 'mp4',
4337             'title': 'Small Scale Baler and Braiding Rugs',
4338             'uploader': 'Backus-Page House Museum',
4339             'uploader_id': 'backuspagemuseum',
4340             'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/backuspagemuseum',
4341             'upload_date': '20161008',
4342             'description': 'md5:800c0c78d5eb128500bffd4f0b4f2e8a',
4343             'categories': ['Nonprofits & Activism'],
4344             'tags': list,
4345             'like_count': int,
4346             'dislike_count': int,
4347         },
4348         'params': {
4349             'noplaylist': True,
4350             'skip_download': True,
4351         },
4352     }, {
4353         'url': 'https://youtu.be/uWyaPkt-VOI?list=PL9D9FC436B881BA21',
4354         'only_matching': True,
4355     }]
4356
4357     def _real_extract(self, url):
4358         mobj = re.match(self._VALID_URL, url)
4359         video_id = mobj.group('id')
4360         playlist_id = mobj.group('playlist_id')
4361         return self.url_result(
4362             update_url_query('https://www.youtube.com/watch', {
4363                 'v': video_id,
4364                 'list': playlist_id,
4365                 'feature': 'youtu.be',
4366             }), ie=YoutubeTabIE.ie_key(), video_id=playlist_id)
4367
4368
4369 class YoutubeYtUserIE(InfoExtractor):
4370     IE_DESC = 'YouTube.com user videos, URL or "ytuser" keyword'
4371     _VALID_URL = r'ytuser:(?P<id>.+)'
4372     _TESTS = [{
4373         'url': 'ytuser:phihag',
4374         'only_matching': True,
4375     }]
4376
4377     def _real_extract(self, url):
4378         user_id = self._match_id(url)
4379         return self.url_result(
4380             'https://www.youtube.com/user/%s' % user_id,
4381             ie=YoutubeTabIE.ie_key(), video_id=user_id)
4382
4383
4384 class YoutubeFavouritesIE(YoutubeBaseInfoExtractor):
4385     IE_NAME = 'youtube:favorites'
4386     IE_DESC = 'YouTube.com liked videos, ":ytfav" for short (requires authentication)'
4387     _VALID_URL = r':ytfav(?:ou?rite)?s?'
4388     _LOGIN_REQUIRED = True
4389     _TESTS = [{
4390         'url': ':ytfav',
4391         'only_matching': True,
4392     }, {
4393         'url': ':ytfavorites',
4394         'only_matching': True,
4395     }]
4396
4397     def _real_extract(self, url):
4398         return self.url_result(
4399             'https://www.youtube.com/playlist?list=LL',
4400             ie=YoutubeTabIE.ie_key())
4401
4402
4403 class YoutubeSearchIE(SearchInfoExtractor, YoutubeTabIE):
4404     IE_DESC = 'YouTube.com searches, "ytsearch" keyword'
4405     # there doesn't appear to be a real limit, for example if you search for
4406     # 'python' you get more than 8.000.000 results
4407     _MAX_RESULTS = float('inf')
4408     IE_NAME = 'youtube:search'
4409     _SEARCH_KEY = 'ytsearch'
4410     _SEARCH_PARAMS = None
4411     _TESTS = []
4412
4413     def _entries(self, query, n):
4414         data = {'query': query}
4415         if self._SEARCH_PARAMS:
4416             data['params'] = self._SEARCH_PARAMS
4417         total = 0
4418         continuation = {}
4419         for page_num in itertools.count(1):
4420             data.update(continuation)
4421             search = self._extract_response(
4422                 item_id='query "%s" page %s' % (query, page_num), ep='search', query=data,
4423                 check_get_keys=('contents', 'onResponseReceivedCommands')
4424             )
4425             if not search:
4426                 break
4427             slr_contents = try_get(
4428                 search,
4429                 (lambda x: x['contents']['twoColumnSearchResultsRenderer']['primaryContents']['sectionListRenderer']['contents'],
4430                  lambda x: x['onResponseReceivedCommands'][0]['appendContinuationItemsAction']['continuationItems']),
4431                 list)
4432             if not slr_contents:
4433                 break
4434
4435             # Youtube sometimes adds promoted content to searches,
4436             # changing the index location of videos and token.
4437             # So we search through all entries till we find them.
4438             continuation = None
4439             for slr_content in slr_contents:
4440                 if not continuation:
4441                     continuation = self._extract_continuation({'contents': [slr_content]})
4442
4443                 isr_contents = try_get(
4444                     slr_content,
4445                     lambda x: x['itemSectionRenderer']['contents'],
4446                     list)
4447                 if not isr_contents:
4448                     continue
4449                 for content in isr_contents:
4450                     if not isinstance(content, dict):
4451                         continue
4452                     video = content.get('videoRenderer')
4453                     if not isinstance(video, dict):
4454                         continue
4455                     video_id = video.get('videoId')
4456                     if not video_id:
4457                         continue
4458
4459                     yield self._extract_video(video)
4460                     total += 1
4461                     if total == n:
4462                         return
4463
4464             if not continuation:
4465                 break
4466
4467     def _get_n_results(self, query, n):
4468         """Get a specified number of results for a query"""
4469         return self.playlist_result(self._entries(query, n), query, query)
4470
4471
4472 class YoutubeSearchDateIE(YoutubeSearchIE):
4473     IE_NAME = YoutubeSearchIE.IE_NAME + ':date'
4474     _SEARCH_KEY = 'ytsearchdate'
4475     IE_DESC = 'YouTube.com searches, newest videos first, "ytsearchdate" keyword'
4476     _SEARCH_PARAMS = 'CAI%3D'
4477
4478
4479 class YoutubeSearchURLIE(YoutubeSearchIE):
4480     IE_DESC = 'YouTube.com search URLs'
4481     IE_NAME = YoutubeSearchIE.IE_NAME + '_url'
4482     _VALID_URL = r'https?://(?:www\.)?youtube\.com/results\?(.*?&)?(?:search_query|q)=(?:[^&]+)(?:[&]|$)'
4483     # _MAX_RESULTS = 100
4484     _TESTS = [{
4485         'url': 'https://www.youtube.com/results?baz=bar&search_query=youtube-dl+test+video&filters=video&lclk=video',
4486         'playlist_mincount': 5,
4487         'info_dict': {
4488             'id': 'youtube-dl test video',
4489             'title': 'youtube-dl test video',
4490         }
4491     }, {
4492         'url': 'https://www.youtube.com/results?q=test&sp=EgQIBBgB',
4493         'only_matching': True,
4494     }]
4495
4496     @classmethod
4497     def _make_valid_url(cls):
4498         return cls._VALID_URL
4499
4500     def _real_extract(self, url):
4501         qs = compat_parse_qs(compat_urllib_parse_urlparse(url).query)
4502         query = (qs.get('search_query') or qs.get('q'))[0]
4503         self._SEARCH_PARAMS = qs.get('sp', ('',))[0]
4504         return self._get_n_results(query, self._MAX_RESULTS)
4505
4506
4507 class YoutubeFeedsInfoExtractor(YoutubeTabIE):
4508     """
4509     Base class for feed extractors
4510     Subclasses must define the _FEED_NAME property.
4511     """
4512     _LOGIN_REQUIRED = True
4513     _TESTS = []
4514
4515     @property
4516     def IE_NAME(self):
4517         return 'youtube:%s' % self._FEED_NAME
4518
4519     def _real_extract(self, url):
4520         return self.url_result(
4521             'https://www.youtube.com/feed/%s' % self._FEED_NAME,
4522             ie=YoutubeTabIE.ie_key())
4523
4524
4525 class YoutubeWatchLaterIE(InfoExtractor):
4526     IE_NAME = 'youtube:watchlater'
4527     IE_DESC = 'Youtube watch later list, ":ytwatchlater" for short (requires authentication)'
4528     _VALID_URL = r':ytwatchlater'
4529     _TESTS = [{
4530         'url': ':ytwatchlater',
4531         'only_matching': True,
4532     }]
4533
4534     def _real_extract(self, url):
4535         return self.url_result(
4536             'https://www.youtube.com/playlist?list=WL', ie=YoutubeTabIE.ie_key())
4537
4538
4539 class YoutubeRecommendedIE(YoutubeFeedsInfoExtractor):
4540     IE_DESC = 'YouTube.com recommended videos, ":ytrec" for short (requires authentication)'
4541     _VALID_URL = r'https?://(?:www\.)?youtube\.com/?(?:[?#]|$)|:ytrec(?:ommended)?'
4542     _FEED_NAME = 'recommended'
4543     _LOGIN_REQUIRED = False
4544     _TESTS = [{
4545         'url': ':ytrec',
4546         'only_matching': True,
4547     }, {
4548         'url': ':ytrecommended',
4549         'only_matching': True,
4550     }, {
4551         'url': 'https://youtube.com',
4552         'only_matching': True,
4553     }]
4554
4555
4556 class YoutubeSubscriptionsIE(YoutubeFeedsInfoExtractor):
4557     IE_DESC = 'YouTube.com subscriptions feed, ":ytsubs" for short (requires authentication)'
4558     _VALID_URL = r':ytsub(?:scription)?s?'
4559     _FEED_NAME = 'subscriptions'
4560     _TESTS = [{
4561         'url': ':ytsubs',
4562         'only_matching': True,
4563     }, {
4564         'url': ':ytsubscriptions',
4565         'only_matching': True,
4566     }]
4567
4568
4569 class YoutubeHistoryIE(YoutubeFeedsInfoExtractor):
4570     IE_DESC = 'Youtube watch history, ":ythis" for short (requires authentication)'
4571     _VALID_URL = r':ythis(?:tory)?'
4572     _FEED_NAME = 'history'
4573     _TESTS = [{
4574         'url': ':ythistory',
4575         'only_matching': True,
4576     }]
4577
4578
4579 class YoutubeTruncatedURLIE(InfoExtractor):
4580     IE_NAME = 'youtube:truncated_url'
4581     IE_DESC = False  # Do not list
4582     _VALID_URL = r'''(?x)
4583         (?:https?://)?
4584         (?:\w+\.)?[yY][oO][uU][tT][uU][bB][eE](?:-nocookie)?\.com/
4585         (?:watch\?(?:
4586             feature=[a-z_]+|
4587             annotation_id=annotation_[^&]+|
4588             x-yt-cl=[0-9]+|
4589             hl=[^&]*|
4590             t=[0-9]+
4591         )?
4592         |
4593             attribution_link\?a=[^&]+
4594         )
4595         $
4596     '''
4597
4598     _TESTS = [{
4599         'url': 'https://www.youtube.com/watch?annotation_id=annotation_3951667041',
4600         'only_matching': True,
4601     }, {
4602         'url': 'https://www.youtube.com/watch?',
4603         'only_matching': True,
4604     }, {
4605         'url': 'https://www.youtube.com/watch?x-yt-cl=84503534',
4606         'only_matching': True,
4607     }, {
4608         'url': 'https://www.youtube.com/watch?feature=foo',
4609         'only_matching': True,
4610     }, {
4611         'url': 'https://www.youtube.com/watch?hl=en-GB',
4612         'only_matching': True,
4613     }, {
4614         'url': 'https://www.youtube.com/watch?t=2372',
4615         'only_matching': True,
4616     }]
4617
4618     def _real_extract(self, url):
4619         raise ExtractorError(
4620             'Did you forget to quote the URL? Remember that & is a meta '
4621             'character in most shells, so you want to put the URL in quotes, '
4622             'like  youtube-dl '
4623             '"https://www.youtube.com/watch?feature=foo&v=BaW_jenozKc" '
4624             ' or simply  youtube-dl BaW_jenozKc  .',
4625             expected=True)
4626
4627
4628 class YoutubeTruncatedIDIE(InfoExtractor):
4629     IE_NAME = 'youtube:truncated_id'
4630     IE_DESC = False  # Do not list
4631     _VALID_URL = r'https?://(?:www\.)?youtube\.com/watch\?v=(?P<id>[0-9A-Za-z_-]{1,10})$'
4632
4633     _TESTS = [{
4634         'url': 'https://www.youtube.com/watch?v=N_708QY7Ob',
4635         'only_matching': True,
4636     }]
4637
4638     def _real_extract(self, url):
4639         video_id = self._match_id(url)
4640         raise ExtractorError(
4641             'Incomplete YouTube ID %s. URL %s looks truncated.' % (video_id, url),
4642             expected=True)