youtube_dlc/extractor/youtube.py

   1 # coding: utf-8
   2
   3 from __future__ import unicode_literals
   4
   5
   6 import itertools
   7 import json
   8 import os.path
   9 import random
  10 import re
  11 import time
  12 import traceback
  13
  14 from .common import InfoExtractor, SearchInfoExtractor
  15 from ..jsinterp import JSInterpreter
  16 from ..swfinterp import SWFInterpreter
  17 from ..compat import (
  18     compat_chr,
  19     compat_HTTPError,
  20     compat_kwargs,
  21     compat_parse_qs,
  22     compat_urllib_parse_unquote,
  23     compat_urllib_parse_unquote_plus,
  24     compat_urllib_parse_urlencode,
  25     compat_urllib_parse_urlparse,
  26     compat_urlparse,
  27     compat_str,
  28 )
  29 from ..utils import (
  30     bool_or_none,
  31     clean_html,
  32     error_to_compat_str,
  33     extract_attributes,
  34     ExtractorError,
  35     float_or_none,
  36     get_element_by_attribute,
  37     get_element_by_id,
  38     int_or_none,
  39     js_to_json,
  40     mimetype2ext,
  41     orderedSet,
  42     parse_codecs,
  43     parse_count,
  44     parse_duration,
  45     remove_quotes,
  46     remove_start,
  47     smuggle_url,
  48     str_or_none,
  49     str_to_int,
  50     try_get,
  51     unescapeHTML,
  52     unified_strdate,
  53     unsmuggle_url,
  54     uppercase_escape,
  55     url_or_none,
  56     urlencode_postdata,
  57 )
  58
  59
  60 class YoutubeBaseInfoExtractor(InfoExtractor):
  61     """Provide base functions for Youtube extractors"""
  62     _LOGIN_URL = 'https://accounts.google.com/ServiceLogin'
  63     _TWOFACTOR_URL = 'https://accounts.google.com/signin/challenge'
  64
  65     _LOOKUP_URL = 'https://accounts.google.com/_/signin/sl/lookup'
  66     _CHALLENGE_URL = 'https://accounts.google.com/_/signin/sl/challenge'
  67     _TFA_URL = 'https://accounts.google.com/_/signin/challenge?hl=en&TL={0}'
  68
  69     _NETRC_MACHINE = 'youtube'
  70     # If True it will raise an error if no login info is provided
  71     _LOGIN_REQUIRED = False
  72
  73     _PLAYLIST_ID_RE = r'(?:PL|LL|EC|UU|FL|RD|UL|TL|PU|OLAK5uy_)[0-9A-Za-z-_]{10,}'
  74     _INITIAL_DATA_RE = r'(?:window\["ytInitialData"\]|ytInitialData)\W?=\W?({.*?});'
  75     _YTCFG_DATA_RE = r"ytcfg.set\(({.*?})\)"
  76
  77     _YOUTUBE_CLIENT_HEADERS = {
  78         'x-youtube-client-name': '1',
  79         'x-youtube-client-version': '1.20200609.04.02',
  80     }
  81
  82     def _set_language(self):
  83         self._set_cookie(
  84             '.youtube.com', 'PREF', 'f1=50000000&f6=8&hl=en',
  85             # YouTube sets the expire time to about two months
  86             expire_time=time.time() + 2 * 30 * 24 * 3600)
  87
  88     def _ids_to_results(self, ids):
  89         return [
  90             self.url_result(vid_id, 'Youtube', video_id=vid_id)
  91             for vid_id in ids]
  92
  93     def _login(self):
  94         """
  95         Attempt to log in to YouTube.
  96         True is returned if successful or skipped.
  97         False is returned if login failed.
  98
  99         If _LOGIN_REQUIRED is set and no authentication was provided, an error is raised.
 100         """
 101         username, password = self._get_login_info()
 102         # No authentication to be performed
 103         if username is None:
 104             if self._LOGIN_REQUIRED and self._downloader.params.get('cookiefile') is None:
 105                 raise ExtractorError('No login info available, needed for using %s.' % self.IE_NAME, expected=True)
 106             if self._downloader.params.get('cookiefile') and False:  # TODO remove 'and False' later - too many people using outdated cookies and open issues, remind them.
 107                 self.to_screen('[Cookies] Reminder - Make sure to always use up to date cookies!')
 108             return True
 109
 110         login_page = self._download_webpage(
 111             self._LOGIN_URL, None,
 112             note='Downloading login page',
 113             errnote='unable to fetch login page', fatal=False)
 114         if login_page is False:
 115             return
 116
 117         login_form = self._hidden_inputs(login_page)
 118
 119         def req(url, f_req, note, errnote):
 120             data = login_form.copy()
 121             data.update({
 122                 'pstMsg': 1,
 123                 'checkConnection': 'youtube',
 124                 'checkedDomains': 'youtube',
 125                 'hl': 'en',
 126                 'deviceinfo': '[null,null,null,[],null,"US",null,null,[],"GlifWebSignIn",null,[null,null,[]]]',
 127                 'f.req': json.dumps(f_req),
 128                 'flowName': 'GlifWebSignIn',
 129                 'flowEntry': 'ServiceLogin',
 130                 # TODO: reverse actual botguard identifier generation algo
 131                 'bgRequest': '["identifier",""]',
 132             })
 133             return self._download_json(
 134                 url, None, note=note, errnote=errnote,
 135                 transform_source=lambda s: re.sub(r'^[^[]*', '', s),
 136                 fatal=False,
 137                 data=urlencode_postdata(data), headers={
 138                     'Content-Type': 'application/x-www-form-urlencoded;charset=utf-8',
 139                     'Google-Accounts-XSRF': 1,
 140                 })
 141
 142         def warn(message):
 143             self._downloader.report_warning(message)
 144
 145         lookup_req = [
 146             username,
 147             None, [], None, 'US', None, None, 2, False, True,
 148             [
 149                 None, None,
 150                 [2, 1, None, 1,
 151                  'https://accounts.google.com/ServiceLogin?passive=true&continue=https%3A%2F%2Fwww.youtube.com%2Fsignin%3Fnext%3D%252F%26action_handle_signin%3Dtrue%26hl%3Den%26app%3Ddesktop%26feature%3Dsign_in_button&hl=en&service=youtube&uilel=3&requestPath=%2FServiceLogin&Page=PasswordSeparationSignIn',
 152                  None, [], 4],
 153                 1, [None, None, []], None, None, None, True
 154             ],
 155             username,
 156         ]
 157
 158         lookup_results = req(
 159             self._LOOKUP_URL, lookup_req,
 160             'Looking up account info', 'Unable to look up account info')
 161
 162         if lookup_results is False:
 163             return False
 164
 165         user_hash = try_get(lookup_results, lambda x: x[0][2], compat_str)
 166         if not user_hash:
 167             warn('Unable to extract user hash')
 168             return False
 169
 170         challenge_req = [
 171             user_hash,
 172             None, 1, None, [1, None, None, None, [password, None, True]],
 173             [
 174                 None, None, [2, 1, None, 1, 'https://accounts.google.com/ServiceLogin?passive=true&continue=https%3A%2F%2Fwww.youtube.com%2Fsignin%3Fnext%3D%252F%26action_handle_signin%3Dtrue%26hl%3Den%26app%3Ddesktop%26feature%3Dsign_in_button&hl=en&service=youtube&uilel=3&requestPath=%2FServiceLogin&Page=PasswordSeparationSignIn', None, [], 4],
 175                 1, [None, None, []], None, None, None, True
 176             ]]
 177
 178         challenge_results = req(
 179             self._CHALLENGE_URL, challenge_req,
 180             'Logging in', 'Unable to log in')
 181
 182         if challenge_results is False:
 183             return
 184
 185         login_res = try_get(challenge_results, lambda x: x[0][5], list)
 186         if login_res:
 187             login_msg = try_get(login_res, lambda x: x[5], compat_str)
 188             warn(
 189                 'Unable to login: %s' % 'Invalid password'
 190                 if login_msg == 'INCORRECT_ANSWER_ENTERED' else login_msg)
 191             return False
 192
 193         res = try_get(challenge_results, lambda x: x[0][-1], list)
 194         if not res:
 195             warn('Unable to extract result entry')
 196             return False
 197
 198         login_challenge = try_get(res, lambda x: x[0][0], list)
 199         if login_challenge:
 200             challenge_str = try_get(login_challenge, lambda x: x[2], compat_str)
 201             if challenge_str == 'TWO_STEP_VERIFICATION':
 202                 # SEND_SUCCESS - TFA code has been successfully sent to phone
 203                 # QUOTA_EXCEEDED - reached the limit of TFA codes
 204                 status = try_get(login_challenge, lambda x: x[5], compat_str)
 205                 if status == 'QUOTA_EXCEEDED':
 206                     warn('Exceeded the limit of TFA codes, try later')
 207                     return False
 208
 209                 tl = try_get(challenge_results, lambda x: x[1][2], compat_str)
 210                 if not tl:
 211                     warn('Unable to extract TL')
 212                     return False
 213
 214                 tfa_code = self._get_tfa_info('2-step verification code')
 215
 216                 if not tfa_code:
 217                     warn(
 218                         'Two-factor authentication required. Provide it either interactively or with --twofactor <code>'
 219                         '(Note that only TOTP (Google Authenticator App) codes work at this time.)')
 220                     return False
 221
 222                 tfa_code = remove_start(tfa_code, 'G-')
 223
 224                 tfa_req = [
 225                     user_hash, None, 2, None,
 226                     [
 227                         9, None, None, None, None, None, None, None,
 228                         [None, tfa_code, True, 2]
 229                     ]]
 230
 231                 tfa_results = req(
 232                     self._TFA_URL.format(tl), tfa_req,
 233                     'Submitting TFA code', 'Unable to submit TFA code')
 234
 235                 if tfa_results is False:
 236                     return False
 237
 238                 tfa_res = try_get(tfa_results, lambda x: x[0][5], list)
 239                 if tfa_res:
 240                     tfa_msg = try_get(tfa_res, lambda x: x[5], compat_str)
 241                     warn(
 242                         'Unable to finish TFA: %s' % 'Invalid TFA code'
 243                         if tfa_msg == 'INCORRECT_ANSWER_ENTERED' else tfa_msg)
 244                     return False
 245
 246                 check_cookie_url = try_get(
 247                     tfa_results, lambda x: x[0][-1][2], compat_str)
 248             else:
 249                 CHALLENGES = {
 250                     'LOGIN_CHALLENGE': "This device isn't recognized. For your security, Google wants to make sure it's really you.",
 251                     'USERNAME_RECOVERY': 'Please provide additional information to aid in the recovery process.',
 252                     'REAUTH': "There is something unusual about your activity. For your security, Google wants to make sure it's really you.",
 253                 }
 254                 challenge = CHALLENGES.get(
 255                     challenge_str,
 256                     '%s returned error %s.' % (self.IE_NAME, challenge_str))
 257                 warn('%s\nGo to https://accounts.google.com/, login and solve a challenge.' % challenge)
 258                 return False
 259         else:
 260             check_cookie_url = try_get(res, lambda x: x[2], compat_str)
 261
 262         if not check_cookie_url:
 263             warn('Unable to extract CheckCookie URL')
 264             return False
 265
 266         check_cookie_results = self._download_webpage(
 267             check_cookie_url, None, 'Checking cookie', fatal=False)
 268
 269         if check_cookie_results is False:
 270             return False
 271
 272         if 'https://myaccount.google.com/' not in check_cookie_results:
 273             warn('Unable to log in')
 274             return False
 275
 276         return True
 277
 278     def _download_webpage_handle(self, *args, **kwargs):
 279         query = kwargs.get('query', {}).copy()
 280         kwargs['query'] = query
 281         return super(YoutubeBaseInfoExtractor, self)._download_webpage_handle(
 282             *args, **compat_kwargs(kwargs))
 283
 284     def _get_yt_initial_data(self, video_id, webpage):
 285         config = self._search_regex(
 286             (r'window\["ytInitialData"\]\s*=\s*(.*?)(?<=});',
 287              r'var\s+ytInitialData\s*=\s*(.*?)(?<=});'),
 288             webpage, 'ytInitialData', default=None)
 289         if config:
 290             return self._parse_json(
 291                 uppercase_escape(config), video_id, fatal=False)
 292
 293     def _real_initialize(self):
 294         if self._downloader is None:
 295             return
 296         self._set_language()
 297         if not self._login():
 298             return
 299
 300
 301 class YoutubeEntryListBaseInfoExtractor(YoutubeBaseInfoExtractor):
 302
 303     def _find_entries_in_json(self, extracted):
 304         entries = []
 305         c = {}
 306
 307         def _real_find(obj):
 308             if obj is None or isinstance(obj, str):
 309                 return
 310
 311             if type(obj) is list:
 312                 for elem in obj:
 313                     _real_find(elem)
 314
 315             if type(obj) is dict:
 316                 if self._is_entry(obj):
 317                     entries.append(obj)
 318                     return
 319
 320                 if 'continuationCommand' in obj:
 321                     c['continuation'] = obj
 322                     return
 323
 324                 for _, o in obj.items():
 325                     _real_find(o)
 326
 327         _real_find(extracted)
 328
 329         return entries, try_get(c, lambda x: x["continuation"])
 330
 331     def _entries(self, page, playlist_id, max_pages=None):
 332         seen = []
 333
 334         yt_conf = {}
 335         for m in re.finditer(self._YTCFG_DATA_RE, page):
 336             parsed = self._parse_json(m.group(1), playlist_id,
 337                                       transform_source=js_to_json, fatal=False)
 338             if parsed:
 339                 yt_conf.update(parsed)
 340
 341         data_json = self._parse_json(self._search_regex(self._INITIAL_DATA_RE, page, 'ytInitialData'), None)
 342
 343         for page_num in range(1, max_pages + 1) if max_pages is not None else itertools.count(1):
 344             entries, continuation = self._find_entries_in_json(data_json)
 345             processed = self._process_entries(entries, seen)
 346
 347             if not processed:
 348                 break
 349             for entry in processed:
 350                 yield entry
 351
 352             if not continuation or not yt_conf:
 353                 break
 354             continuation_token = try_get(continuation, lambda x: x['continuationCommand']['token'])
 355             continuation_url = try_get(continuation, lambda x: x['commandMetadata']['webCommandMetadata']['apiUrl'])
 356             if not continuation_token or not continuation_url:
 357                 break
 358
 359             count = 0
 360             retries = 3
 361             while count <= retries:
 362                 try:
 363                     # Downloading page may result in intermittent 5xx HTTP error
 364                     # that is usually worked around with a retry
 365                     data_json = self._download_json(
 366                         'https://www.youtube.com%s' % continuation_url,
 367                         playlist_id,
 368                         'Downloading continuation page #%s%s' % (page_num, ' (retry #%d)' % count if count else ''),
 369
 370                         transform_source=uppercase_escape,
 371                         query={
 372                             'key': try_get(yt_conf, lambda x: x['INNERTUBE_API_KEY'])
 373                         },
 374                         data=str(json.dumps({
 375                             'context': try_get(yt_conf, lambda x: x['INNERTUBE_CONTEXT']),
 376                             'continuation': continuation_token
 377                         })).encode(encoding='UTF-8', errors='strict'),
 378                         headers={
 379                             'Content-Type': 'application/json'
 380                         }
 381                     )
 382                     break
 383                 except ExtractorError as e:
 384                     if isinstance(e.cause, compat_HTTPError) and e.cause.code in (500, 503):
 385                         count += 1
 386                         if count <= retries:
 387                             continue
 388                     raise
 389
 390     def _extract_title(self, renderer):
 391         title = try_get(renderer, lambda x: x['title']['runs'][0]['text'], compat_str)
 392         if title:
 393             return title
 394         return try_get(renderer, lambda x: x['title']['simpleText'], compat_str)
 395
 396
 397 class YoutubePlaylistBaseInfoExtractor(YoutubeEntryListBaseInfoExtractor):
 398     def _is_entry(self, obj):
 399         return 'videoId' in obj
 400
 401     def _process_entries(self, entries, seen):
 402         ids_in_page = []
 403         titles_in_page = []
 404         for renderer in entries:
 405             video_id = try_get(renderer, lambda x: x['videoId'])
 406             video_title = self._extract_title(renderer)
 407
 408             if video_id is None or video_title is None:
 409                 # we do not have a videoRenderer or title extraction broke
 410                 continue
 411
 412             video_title = video_title.strip()
 413
 414             try:
 415                 idx = ids_in_page.index(video_id)
 416                 if video_title and not titles_in_page[idx]:
 417                     titles_in_page[idx] = video_title
 418             except ValueError:
 419                 ids_in_page.append(video_id)
 420                 titles_in_page.append(video_title)
 421
 422         for video_id, video_title in zip(ids_in_page, titles_in_page):
 423             yield self.url_result(video_id, 'Youtube', video_id, video_title)
 424
 425
 426 class YoutubePlaylistsBaseInfoExtractor(YoutubeEntryListBaseInfoExtractor):
 427     def _is_entry(self, obj):
 428         return 'playlistId' in obj
 429
 430     def _process_entries(self, entries, seen):
 431         for playlist_id in orderedSet(try_get(r, lambda x: x['playlistId']) for r in entries):
 432
 433             yield self.url_result(
 434                 'https://www.youtube.com/playlist?list=%s' % playlist_id, 'YoutubePlaylist')
 435
 436     def _real_extract(self, url):
 437         playlist_id = self._match_id(url)
 438         webpage = self._download_webpage(url, playlist_id)
 439         title = self._og_search_title(webpage, fatal=False)
 440         return self.playlist_result(self._entries(webpage, playlist_id), playlist_id, title)
 441
 442
 443 class YoutubeIE(YoutubeBaseInfoExtractor):
 444     IE_DESC = 'YouTube.com'
 445     _VALID_URL = r"""(?x)^
 446                      (
 447                          (?:https?://|//)                                    # http(s):// or protocol-independent URL
 448                          (?:(?:(?:(?:\w+\.)?[yY][oO][uU][tT][uU][bB][eE](?:-nocookie|kids)?\.com/|
 449                             (?:www\.)?deturl\.com/www\.youtube\.com/|
 450                             (?:www\.)?pwnyoutube\.com/|
 451                             (?:www\.)?hooktube\.com/|
 452                             (?:www\.)?yourepeat\.com/|
 453                             tube\.majestyc\.net/|
 454                             # Invidious instances taken from https://github.com/omarroth/invidious/wiki/Invidious-Instances
 455                             (?:(?:www|dev)\.)?invidio\.us/|
 456                             (?:(?:www|no)\.)?invidiou\.sh/|
 457                             (?:(?:www|fi|de)\.)?invidious\.snopyta\.org/|
 458                             (?:www\.)?invidious\.kabi\.tk/|
 459                             (?:www\.)?invidious\.13ad\.de/|
 460                             (?:www\.)?invidious\.mastodon\.host/|
 461                             (?:www\.)?invidious\.nixnet\.xyz/|
 462                             (?:www\.)?invidious\.drycat\.fr/|
 463                             (?:www\.)?tube\.poal\.co/|
 464                             (?:www\.)?vid\.wxzm\.sx/|
 465                             (?:www\.)?yewtu\.be/|
 466                             (?:www\.)?yt\.elukerio\.org/|
 467                             (?:www\.)?yt\.lelux\.fi/|
 468                             (?:www\.)?invidious\.ggc-project\.de/|
 469                             (?:www\.)?yt\.maisputain\.ovh/|
 470                             (?:www\.)?invidious\.13ad\.de/|
 471                             (?:www\.)?invidious\.toot\.koeln/|
 472                             (?:www\.)?invidious\.fdn\.fr/|
 473                             (?:www\.)?watch\.nettohikari\.com/|
 474                             (?:www\.)?kgg2m7yk5aybusll\.onion/|
 475                             (?:www\.)?qklhadlycap4cnod\.onion/|
 476                             (?:www\.)?axqzx4s6s54s32yentfqojs3x5i7faxza6xo3ehd4bzzsg2ii4fv2iid\.onion/|
 477                             (?:www\.)?c7hqkpkpemu6e7emz5b4vyz7idjgdvgaaa3dyimmeojqbgpea3xqjoid\.onion/|
 478                             (?:www\.)?fz253lmuao3strwbfbmx46yu7acac2jz27iwtorgmbqlkurlclmancad\.onion/|
 479                             (?:www\.)?invidious\.l4qlywnpwqsluw65ts7md3khrivpirse744un3x7mlskqauz5pyuzgqd\.onion/|
 480                             (?:www\.)?owxfohz4kjyv25fvlqilyxast7inivgiktls3th44jhk3ej3i7ya\.b32\.i2p/|
 481                             (?:www\.)?4l2dgddgsrkf2ous66i6seeyi6etzfgrue332grh2n7madpwopotugyd\.onion/|
 482                             youtube\.googleapis\.com/)                        # the various hostnames, with wildcard subdomains
 483                          (?:.*?\#/)?                                          # handle anchor (#/) redirect urls
 484                          (?:                                                  # the various things that can precede the ID:
 485                              (?:(?:v|embed|e)/(?!videoseries))                # v/ or embed/ or e/
 486                              |(?:                                             # or the v= param in all its forms
 487                                  (?:(?:watch|movie)(?:_popup)?(?:\.php)?/?)?  # preceding watch(_popup|.php) or nothing (like /?v=xxxx)
 488                                  (?:\?|\#!?)                                  # the params delimiter ? or # or #!
 489                                  (?:.*?[&;])??                                # any other preceding param (like /?s=tuff&v=xxxx or ?s=tuff&amp;v=V36LpHqtcDY)
 490                                  v=
 491                              )
 492                          ))
 493                          |(?:
 494                             youtu\.be|                                        # just youtu.be/xxxx
 495                             vid\.plus|                                        # or vid.plus/xxxx
 496                             zwearz\.com/watch|                                # or zwearz.com/watch/xxxx
 497                          )/
 498                          |(?:www\.)?cleanvideosearch\.com/media/action/yt/watch\?videoId=
 499                          )
 500                      )?                                                       # all until now is optional -> you can pass the naked ID
 501                      ([0-9A-Za-z_-]{11})                                      # here is it! the YouTube video ID
 502                      (?!.*?\blist=
 503                         (?:
 504                             %(playlist_id)s|                                  # combined list/video URLs are handled by the playlist IE
 505                             WL                                                # WL are handled by the watch later IE
 506                         )
 507                      )
 508                      (?(1).+)?                                                # if we found the ID, everything can follow
 509                      $""" % {'playlist_id': YoutubeBaseInfoExtractor._PLAYLIST_ID_RE}
 510     _NEXT_URL_RE = r'[\?&]next_url=([^&]+)'
 511     _PLAYER_INFO_RE = (
 512         r'/(?P<id>[a-zA-Z0-9_-]{8,})/player_ias\.vflset(?:/[a-zA-Z]{2,3}_[a-zA-Z]{2,3})?/base\.(?P<ext>[a-z]+)$',
 513         r'\b(?P<id>vfl[a-zA-Z0-9_-]+)\b.*?\.(?P<ext>[a-z]+)$',
 514     )
 515     _formats = {
 516         '5': {'ext': 'flv', 'width': 400, 'height': 240, 'acodec': 'mp3', 'abr': 64, 'vcodec': 'h263'},
 517         '6': {'ext': 'flv', 'width': 450, 'height': 270, 'acodec': 'mp3', 'abr': 64, 'vcodec': 'h263'},
 518         '13': {'ext': '3gp', 'acodec': 'aac', 'vcodec': 'mp4v'},
 519         '17': {'ext': '3gp', 'width': 176, 'height': 144, 'acodec': 'aac', 'abr': 24, 'vcodec': 'mp4v'},
 520         '18': {'ext': 'mp4', 'width': 640, 'height': 360, 'acodec': 'aac', 'abr': 96, 'vcodec': 'h264'},
 521         '22': {'ext': 'mp4', 'width': 1280, 'height': 720, 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264'},
 522         '34': {'ext': 'flv', 'width': 640, 'height': 360, 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264'},
 523         '35': {'ext': 'flv', 'width': 854, 'height': 480, 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264'},
 524         # itag 36 videos are either 320x180 (BaW_jenozKc) or 320x240 (__2ABJjxzNo), abr varies as well
 525         '36': {'ext': '3gp', 'width': 320, 'acodec': 'aac', 'vcodec': 'mp4v'},
 526         '37': {'ext': 'mp4', 'width': 1920, 'height': 1080, 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264'},
 527         '38': {'ext': 'mp4', 'width': 4096, 'height': 3072, 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264'},
 528         '43': {'ext': 'webm', 'width': 640, 'height': 360, 'acodec': 'vorbis', 'abr': 128, 'vcodec': 'vp8'},
 529         '44': {'ext': 'webm', 'width': 854, 'height': 480, 'acodec': 'vorbis', 'abr': 128, 'vcodec': 'vp8'},
 530         '45': {'ext': 'webm', 'width': 1280, 'height': 720, 'acodec': 'vorbis', 'abr': 192, 'vcodec': 'vp8'},
 531         '46': {'ext': 'webm', 'width': 1920, 'height': 1080, 'acodec': 'vorbis', 'abr': 192, 'vcodec': 'vp8'},
 532         '59': {'ext': 'mp4', 'width': 854, 'height': 480, 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264'},
 533         '78': {'ext': 'mp4', 'width': 854, 'height': 480, 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264'},
 534
 535
 536         # 3D videos
 537         '82': {'ext': 'mp4', 'height': 360, 'format_note': '3D', 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264', 'preference': -20},
 538         '83': {'ext': 'mp4', 'height': 480, 'format_note': '3D', 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264', 'preference': -20},
 539         '84': {'ext': 'mp4', 'height': 720, 'format_note': '3D', 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264', 'preference': -20},
 540         '85': {'ext': 'mp4', 'height': 1080, 'format_note': '3D', 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264', 'preference': -20},
 541         '100': {'ext': 'webm', 'height': 360, 'format_note': '3D', 'acodec': 'vorbis', 'abr': 128, 'vcodec': 'vp8', 'preference': -20},
 542         '101': {'ext': 'webm', 'height': 480, 'format_note': '3D', 'acodec': 'vorbis', 'abr': 192, 'vcodec': 'vp8', 'preference': -20},
 543         '102': {'ext': 'webm', 'height': 720, 'format_note': '3D', 'acodec': 'vorbis', 'abr': 192, 'vcodec': 'vp8', 'preference': -20},
 544
 545         # Apple HTTP Live Streaming
 546         '91': {'ext': 'mp4', 'height': 144, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 48, 'vcodec': 'h264', 'preference': -10},
 547         '92': {'ext': 'mp4', 'height': 240, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 48, 'vcodec': 'h264', 'preference': -10},
 548         '93': {'ext': 'mp4', 'height': 360, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264', 'preference': -10},
 549         '94': {'ext': 'mp4', 'height': 480, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264', 'preference': -10},
 550         '95': {'ext': 'mp4', 'height': 720, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 256, 'vcodec': 'h264', 'preference': -10},
 551         '96': {'ext': 'mp4', 'height': 1080, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 256, 'vcodec': 'h264', 'preference': -10},
 552         '132': {'ext': 'mp4', 'height': 240, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 48, 'vcodec': 'h264', 'preference': -10},
 553         '151': {'ext': 'mp4', 'height': 72, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 24, 'vcodec': 'h264', 'preference': -10},
 554
 555         # DASH mp4 video
 556         '133': {'ext': 'mp4', 'height': 240, 'format_note': 'DASH video', 'vcodec': 'h264'},
 557         '134': {'ext': 'mp4', 'height': 360, 'format_note': 'DASH video', 'vcodec': 'h264'},
 558         '135': {'ext': 'mp4', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'h264'},
 559         '136': {'ext': 'mp4', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'h264'},
 560         '137': {'ext': 'mp4', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'h264'},
 561         '138': {'ext': 'mp4', 'format_note': 'DASH video', 'vcodec': 'h264'},  # Height can vary (https://github.com/ytdl-org/youtube-dl/issues/4559)
 562         '160': {'ext': 'mp4', 'height': 144, 'format_note': 'DASH video', 'vcodec': 'h264'},
 563         '212': {'ext': 'mp4', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'h264'},
 564         '264': {'ext': 'mp4', 'height': 1440, 'format_note': 'DASH video', 'vcodec': 'h264'},
 565         '298': {'ext': 'mp4', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'h264', 'fps': 60},
 566         '299': {'ext': 'mp4', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'h264', 'fps': 60},
 567         '266': {'ext': 'mp4', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'h264'},
 568
 569         # Dash mp4 audio
 570         '139': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'abr': 48, 'container': 'm4a_dash'},
 571         '140': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'abr': 128, 'container': 'm4a_dash'},
 572         '141': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'abr': 256, 'container': 'm4a_dash'},
 573         '256': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'container': 'm4a_dash'},
 574         '258': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'container': 'm4a_dash'},
 575         '325': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'dtse', 'container': 'm4a_dash'},
 576         '328': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'ec-3', 'container': 'm4a_dash'},
 577
 578         # Dash webm
 579         '167': {'ext': 'webm', 'height': 360, 'width': 640, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
 580         '168': {'ext': 'webm', 'height': 480, 'width': 854, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
 581         '169': {'ext': 'webm', 'height': 720, 'width': 1280, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
 582         '170': {'ext': 'webm', 'height': 1080, 'width': 1920, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
 583         '218': {'ext': 'webm', 'height': 480, 'width': 854, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
 584         '219': {'ext': 'webm', 'height': 480, 'width': 854, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
 585         '278': {'ext': 'webm', 'height': 144, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp9'},
 586         '242': {'ext': 'webm', 'height': 240, 'format_note': 'DASH video', 'vcodec': 'vp9'},
 587         '243': {'ext': 'webm', 'height': 360, 'format_note': 'DASH video', 'vcodec': 'vp9'},
 588         '244': {'ext': 'webm', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'vp9'},
 589         '245': {'ext': 'webm', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'vp9'},
 590         '246': {'ext': 'webm', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'vp9'},
 591         '247': {'ext': 'webm', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'vp9'},
 592         '248': {'ext': 'webm', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'vp9'},
 593         '271': {'ext': 'webm', 'height': 1440, 'format_note': 'DASH video', 'vcodec': 'vp9'},
 594         # itag 272 videos are either 3840x2160 (e.g. RtoitU2A-3E) or 7680x4320 (sLprVF6d7Ug)
 595         '272': {'ext': 'webm', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'vp9'},
 596         '302': {'ext': 'webm', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'vp9', 'fps': 60},
 597         '303': {'ext': 'webm', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'vp9', 'fps': 60},
 598         '308': {'ext': 'webm', 'height': 1440, 'format_note': 'DASH video', 'vcodec': 'vp9', 'fps': 60},
 599         '313': {'ext': 'webm', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'vp9'},
 600         '315': {'ext': 'webm', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'vp9', 'fps': 60},
 601
 602         # Dash webm audio
 603         '171': {'ext': 'webm', 'acodec': 'vorbis', 'format_note': 'DASH audio', 'abr': 128},
 604         '172': {'ext': 'webm', 'acodec': 'vorbis', 'format_note': 'DASH audio', 'abr': 256},
 605
 606         # Dash webm audio with opus inside
 607         '249': {'ext': 'webm', 'format_note': 'DASH audio', 'acodec': 'opus', 'abr': 50},
 608         '250': {'ext': 'webm', 'format_note': 'DASH audio', 'acodec': 'opus', 'abr': 70},
 609         '251': {'ext': 'webm', 'format_note': 'DASH audio', 'acodec': 'opus', 'abr': 160},
 610
 611         # RTMP (unnamed)
 612         '_rtmp': {'protocol': 'rtmp'},
 613
 614         # av01 video only formats sometimes served with "unknown" codecs
 615         '394': {'acodec': 'none', 'vcodec': 'av01.0.05M.08'},
 616         '395': {'acodec': 'none', 'vcodec': 'av01.0.05M.08'},
 617         '396': {'acodec': 'none', 'vcodec': 'av01.0.05M.08'},
 618         '397': {'acodec': 'none', 'vcodec': 'av01.0.05M.08'},
 619     }
 620     _SUBTITLE_FORMATS = ('json3', 'srv1', 'srv2', 'srv3', 'ttml', 'vtt')
 621
 622     _GEO_BYPASS = False
 623
 624     IE_NAME = 'youtube'
 625     _TESTS = [
 626         {
 627             'url': 'https://www.youtube.com/watch?v=BaW_jenozKc&t=1s&end=9',
 628             'info_dict': {
 629                 'id': 'BaW_jenozKc',
 630                 'ext': 'mp4',
 631                 'title': 'youtube-dl test video "\'/\\ä↭𝕐',
 632                 'uploader': 'Philipp Hagemeister',
 633                 'uploader_id': 'phihag',
 634                 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/phihag',
 635                 'channel_id': 'UCLqxVugv74EIW3VWh2NOa3Q',
 636                 'channel_url': r're:https?://(?:www\.)?youtube\.com/channel/UCLqxVugv74EIW3VWh2NOa3Q',
 637                 'upload_date': '20121002',
 638                 'description': 'test chars:  "\'/\\ä↭𝕐\ntest URL: https://github.com/rg3/youtube-dl/issues/1892\n\nThis is a test video for youtube-dl.\n\nFor more information, contact phihag@phihag.de .',
 639                 'categories': ['Science & Technology'],
 640                 'tags': ['youtube-dl'],
 641                 'duration': 10,
 642                 'view_count': int,
 643                 'like_count': int,
 644                 'dislike_count': int,
 645                 'start_time': 1,
 646                 'end_time': 9,
 647             }
 648         },
 649         {
 650             'url': '//www.YouTube.com/watch?v=yZIXLfi8CZQ',
 651             'note': 'Embed-only video (#1746)',
 652             'info_dict': {
 653                 'id': 'yZIXLfi8CZQ',
 654                 'ext': 'mp4',
 655                 'upload_date': '20120608',
 656                 'title': 'Principal Sexually Assaults A Teacher - Episode 117 - 8th June 2012',
 657                 'description': 'md5:09b78bd971f1e3e289601dfba15ca4f7',
 658                 'uploader': 'SET India',
 659                 'uploader_id': 'setindia',
 660                 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/setindia',
 661                 'age_limit': 18,
 662             }
 663         },
 664         {
 665             'url': 'https://www.youtube.com/watch?v=BaW_jenozKc&v=UxxajLWwzqY',
 666             'note': 'Use the first video ID in the URL',
 667             'info_dict': {
 668                 'id': 'BaW_jenozKc',
 669                 'ext': 'mp4',
 670                 'title': 'youtube-dl test video "\'/\\ä↭𝕐',
 671                 'uploader': 'Philipp Hagemeister',
 672                 'uploader_id': 'phihag',
 673                 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/phihag',
 674                 'upload_date': '20121002',
 675                 'description': 'test chars:  "\'/\\ä↭𝕐\ntest URL: https://github.com/rg3/youtube-dl/issues/1892\n\nThis is a test video for youtube-dl.\n\nFor more information, contact phihag@phihag.de .',
 676                 'categories': ['Science & Technology'],
 677                 'tags': ['youtube-dl'],
 678                 'duration': 10,
 679                 'view_count': int,
 680                 'like_count': int,
 681                 'dislike_count': int,
 682             },
 683             'params': {
 684                 'skip_download': True,
 685             },
 686         },
 687         {
 688             'url': 'https://www.youtube.com/watch?v=a9LDPn-MO4I',
 689             'note': '256k DASH audio (format 141) via DASH manifest',
 690             'info_dict': {
 691                 'id': 'a9LDPn-MO4I',
 692                 'ext': 'm4a',
 693                 'upload_date': '20121002',
 694                 'uploader_id': '8KVIDEO',
 695                 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/8KVIDEO',
 696                 'description': '',
 697                 'uploader': '8KVIDEO',
 698                 'title': 'UHDTV TEST 8K VIDEO.mp4'
 699             },
 700             'params': {
 701                 'youtube_include_dash_manifest': True,
 702                 'format': '141',
 703             },
 704             'skip': 'format 141 not served anymore',
 705         },
 706         # Controversy video
 707         {
 708             'url': 'https://www.youtube.com/watch?v=T4XJQO3qol8',
 709             'info_dict': {
 710                 'id': 'T4XJQO3qol8',
 711                 'ext': 'mp4',
 712                 'duration': 219,
 713                 'upload_date': '20100909',
 714                 'uploader': 'Amazing Atheist',
 715                 'uploader_id': 'TheAmazingAtheist',
 716                 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/TheAmazingAtheist',
 717                 'title': 'Burning Everyone\'s Koran',
 718                 'description': 'SUBSCRIBE: http://www.youtube.com/saturninefilms\n\nEven Obama has taken a stand against freedom on this issue: http://www.huffingtonpost.com/2010/09/09/obama-gma-interview-quran_n_710282.html',
 719             }
 720         },
 721         # Normal age-gate video (embed allowed)
 722         {
 723             'url': 'https://youtube.com/watch?v=HtVdAasjOgU',
 724             'info_dict': {
 725                 'id': 'HtVdAasjOgU',
 726                 'ext': 'mp4',
 727                 'title': 'The Witcher 3: Wild Hunt - The Sword Of Destiny Trailer',
 728                 'description': r're:(?s).{100,}About the Game\n.*?The Witcher 3: Wild Hunt.{100,}',
 729                 'duration': 142,
 730                 'uploader': 'The Witcher',
 731                 'uploader_id': 'WitcherGame',
 732                 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/WitcherGame',
 733                 'upload_date': '20140605',
 734                 'age_limit': 18,
 735             },
 736         },
 737         # Olympics (https://github.com/ytdl-org/youtube-dl/issues/4431)
 738         {
 739             'url': 'lqQg6PlCWgI',
 740             'info_dict': {
 741                 'id': 'lqQg6PlCWgI',
 742                 'ext': 'mp4',
 743                 'duration': 6085,
 744                 'upload_date': '20150827',
 745                 'uploader_id': 'olympic',
 746                 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/olympic',
 747                 'description': 'HO09  - Women -  GER-AUS - Hockey - 31 July 2012 - London 2012 Olympic Games',
 748                 'uploader': 'Olympic',
 749                 'title': 'Hockey - Women -  GER-AUS - London 2012 Olympic Games',
 750             },
 751             'params': {
 752                 'skip_download': 'requires avconv',
 753             }
 754         },
 755         # Non-square pixels
 756         {
 757             'url': 'https://www.youtube.com/watch?v=_b-2C3KPAM0',
 758             'info_dict': {
 759                 'id': '_b-2C3KPAM0',
 760                 'ext': 'mp4',
 761                 'stretched_ratio': 16 / 9.,
 762                 'duration': 85,
 763                 'upload_date': '20110310',
 764                 'uploader_id': 'AllenMeow',
 765                 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/AllenMeow',
 766                 'description': 'made by Wacom from Korea | 字幕&加油添醋 by TY\'s Allen | 感謝heylisa00cavey1001同學熱情提供梗及翻譯',
 767                 'uploader': '孫ᄋᄅ',
 768                 'title': '[A-made] 變態妍字幕版 太妍 我就是這樣的人',
 769             },
 770         },
 771         # url_encoded_fmt_stream_map is empty string
 772         {
 773             'url': 'qEJwOuvDf7I',
 774             'info_dict': {
 775                 'id': 'qEJwOuvDf7I',
 776                 'ext': 'webm',
 777                 'title': 'Обсуждение судебной практики по выборам 14 сентября 2014 года в Санкт-Петербурге',
 778                 'description': '',
 779                 'upload_date': '20150404',
 780                 'uploader_id': 'spbelect',
 781                 'uploader': 'Наблюдатели Петербурга',
 782             },
 783             'params': {
 784                 'skip_download': 'requires avconv',
 785             },
 786             'skip': 'This live event has ended.',
 787         },
 788         # Extraction from multiple DASH manifests (https://github.com/ytdl-org/youtube-dl/pull/6097)
 789         {
 790             'url': 'https://www.youtube.com/watch?v=FIl7x6_3R5Y',
 791             'info_dict': {
 792                 'id': 'FIl7x6_3R5Y',
 793                 'ext': 'webm',
 794                 'title': 'md5:7b81415841e02ecd4313668cde88737a',
 795                 'description': 'md5:116377fd2963b81ec4ce64b542173306',
 796                 'duration': 220,
 797                 'upload_date': '20150625',
 798                 'uploader_id': 'dorappi2000',
 799                 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/dorappi2000',
 800                 'uploader': 'dorappi2000',
 801                 'formats': 'mincount:31',
 802             },
 803             'skip': 'not actual anymore',
 804         },
 805         # DASH manifest with segment_list
 806         {
 807             'url': 'https://www.youtube.com/embed/CsmdDsKjzN8',
 808             'md5': '8ce563a1d667b599d21064e982ab9e31',
 809             'info_dict': {
 810                 'id': 'CsmdDsKjzN8',
 811                 'ext': 'mp4',
 812                 'upload_date': '20150501',  # According to '<meta itemprop="datePublished"', but in other places it's 20150510
 813                 'uploader': 'Airtek',
 814                 'description': 'Retransmisión en directo de la XVIII media maratón de Zaragoza.',
 815                 'uploader_id': 'UCzTzUmjXxxacNnL8I3m4LnQ',
 816                 'title': 'Retransmisión XVIII Media maratón Zaragoza 2015',
 817             },
 818             'params': {
 819                 'youtube_include_dash_manifest': True,
 820                 'format': '135',  # bestvideo
 821             },
 822             'skip': 'This live event has ended.',
 823         },
 824         {
 825             # Multifeed videos (multiple cameras), URL is for Main Camera
 826             'url': 'https://www.youtube.com/watch?v=jqWvoWXjCVs',
 827             'info_dict': {
 828                 'id': 'jqWvoWXjCVs',
 829                 'title': 'teamPGP: Rocket League Noob Stream',
 830                 'description': 'md5:dc7872fb300e143831327f1bae3af010',
 831             },
 832             'playlist': [{
 833                 'info_dict': {
 834                     'id': 'jqWvoWXjCVs',
 835                     'ext': 'mp4',
 836                     'title': 'teamPGP: Rocket League Noob Stream (Main Camera)',
 837                     'description': 'md5:dc7872fb300e143831327f1bae3af010',
 838                     'duration': 7335,
 839                     'upload_date': '20150721',
 840                     'uploader': 'Beer Games Beer',
 841                     'uploader_id': 'beergamesbeer',
 842                     'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/beergamesbeer',
 843                     'license': 'Standard YouTube License',
 844                 },
 845             }, {
 846                 'info_dict': {
 847                     'id': '6h8e8xoXJzg',
 848                     'ext': 'mp4',
 849                     'title': 'teamPGP: Rocket League Noob Stream (kreestuh)',
 850                     'description': 'md5:dc7872fb300e143831327f1bae3af010',
 851                     'duration': 7337,
 852                     'upload_date': '20150721',
 853                     'uploader': 'Beer Games Beer',
 854                     'uploader_id': 'beergamesbeer',
 855                     'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/beergamesbeer',
 856                     'license': 'Standard YouTube License',
 857                 },
 858             }, {
 859                 'info_dict': {
 860                     'id': 'PUOgX5z9xZw',
 861                     'ext': 'mp4',
 862                     'title': 'teamPGP: Rocket League Noob Stream (grizzle)',
 863                     'description': 'md5:dc7872fb300e143831327f1bae3af010',
 864                     'duration': 7337,
 865                     'upload_date': '20150721',
 866                     'uploader': 'Beer Games Beer',
 867                     'uploader_id': 'beergamesbeer',
 868                     'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/beergamesbeer',
 869                     'license': 'Standard YouTube License',
 870                 },
 871             }, {
 872                 'info_dict': {
 873                     'id': 'teuwxikvS5k',
 874                     'ext': 'mp4',
 875                     'title': 'teamPGP: Rocket League Noob Stream (zim)',
 876                     'description': 'md5:dc7872fb300e143831327f1bae3af010',
 877                     'duration': 7334,
 878                     'upload_date': '20150721',
 879                     'uploader': 'Beer Games Beer',
 880                     'uploader_id': 'beergamesbeer',
 881                     'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/beergamesbeer',
 882                     'license': 'Standard YouTube License',
 883                 },
 884             }],
 885             'params': {
 886                 'skip_download': True,
 887             },
 888             'skip': 'This video is not available.',
 889         },
 890         {
 891             # Multifeed video with comma in title (see https://github.com/ytdl-org/youtube-dl/issues/8536)
 892             'url': 'https://www.youtube.com/watch?v=gVfLd0zydlo',
 893             'info_dict': {
 894                 'id': 'gVfLd0zydlo',
 895                 'title': 'DevConf.cz 2016 Day 2 Workshops 1 14:00 - 15:30',
 896             },
 897             'playlist_count': 2,
 898             'skip': 'Not multifeed anymore',
 899         },
 900         {
 901             'url': 'https://vid.plus/FlRa-iH7PGw',
 902             'only_matching': True,
 903         },
 904         {
 905             'url': 'https://zwearz.com/watch/9lWxNJF-ufM/electra-woman-dyna-girl-official-trailer-grace-helbig.html',
 906             'only_matching': True,
 907         },
 908         {
 909             # Title with JS-like syntax "};" (see https://github.com/ytdl-org/youtube-dl/issues/7468)
 910             # Also tests cut-off URL expansion in video description (see
 911             # https://github.com/ytdl-org/youtube-dl/issues/1892,
 912             # https://github.com/ytdl-org/youtube-dl/issues/8164)
 913             'url': 'https://www.youtube.com/watch?v=lsguqyKfVQg',
 914             'info_dict': {
 915                 'id': 'lsguqyKfVQg',
 916                 'ext': 'mp4',
 917                 'title': '{dark walk}; Loki/AC/Dishonored; collab w/Elflover21',
 918                 'alt_title': 'Dark Walk - Position Music',
 919                 'description': 'md5:8085699c11dc3f597ce0410b0dcbb34a',
 920                 'duration': 133,
 921                 'upload_date': '20151119',
 922                 'uploader_id': 'IronSoulElf',
 923                 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/IronSoulElf',
 924                 'uploader': 'IronSoulElf',
 925                 'creator': 'Todd Haberman,  Daniel Law Heath and Aaron Kaplan',
 926                 'track': 'Dark Walk - Position Music',
 927                 'artist': 'Todd Haberman,  Daniel Law Heath and Aaron Kaplan',
 928                 'album': 'Position Music - Production Music Vol. 143 - Dark Walk',
 929             },
 930             'params': {
 931                 'skip_download': True,
 932             },
 933         },
 934         {
 935             # Tags with '};' (see https://github.com/ytdl-org/youtube-dl/issues/7468)
 936             'url': 'https://www.youtube.com/watch?v=Ms7iBXnlUO8',
 937             'only_matching': True,
 938         },
 939         {
 940             # Video with yt:stretch=17:0
 941             'url': 'https://www.youtube.com/watch?v=Q39EVAstoRM',
 942             'info_dict': {
 943                 'id': 'Q39EVAstoRM',
 944                 'ext': 'mp4',
 945                 'title': 'Clash Of Clans#14 Dicas De Ataque Para CV 4',
 946                 'description': 'md5:ee18a25c350637c8faff806845bddee9',
 947                 'upload_date': '20151107',
 948                 'uploader_id': 'UCCr7TALkRbo3EtFzETQF1LA',
 949                 'uploader': 'CH GAMER DROID',
 950             },
 951             'params': {
 952                 'skip_download': True,
 953             },
 954             'skip': 'This video does not exist.',
 955         },
 956         {
 957             # Video licensed under Creative Commons
 958             'url': 'https://www.youtube.com/watch?v=M4gD1WSo5mA',
 959             'info_dict': {
 960                 'id': 'M4gD1WSo5mA',
 961                 'ext': 'mp4',
 962                 'title': 'md5:e41008789470fc2533a3252216f1c1d1',
 963                 'description': 'md5:a677553cf0840649b731a3024aeff4cc',
 964                 'duration': 721,
 965                 'upload_date': '20150127',
 966                 'uploader_id': 'BerkmanCenter',
 967                 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/BerkmanCenter',
 968                 'uploader': 'The Berkman Klein Center for Internet & Society',
 969                 'license': 'Creative Commons Attribution license (reuse allowed)',
 970             },
 971             'params': {
 972                 'skip_download': True,
 973             },
 974         },
 975         {
 976             # Channel-like uploader_url
 977             'url': 'https://www.youtube.com/watch?v=eQcmzGIKrzg',
 978             'info_dict': {
 979                 'id': 'eQcmzGIKrzg',
 980                 'ext': 'mp4',
 981                 'title': 'Democratic Socialism and Foreign Policy | Bernie Sanders',
 982                 'description': 'md5:dda0d780d5a6e120758d1711d062a867',
 983                 'duration': 4060,
 984                 'upload_date': '20151119',
 985                 'uploader': 'Bernie Sanders',
 986                 'uploader_id': 'UCH1dpzjCEiGAt8CXkryhkZg',
 987                 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UCH1dpzjCEiGAt8CXkryhkZg',
 988                 'license': 'Creative Commons Attribution license (reuse allowed)',
 989             },
 990             'params': {
 991                 'skip_download': True,
 992             },
 993         },
 994         {
 995             'url': 'https://www.youtube.com/watch?feature=player_embedded&amp;amp;v=V36LpHqtcDY',
 996             'only_matching': True,
 997         },
 998         {
 999             # YouTube Red paid video (https://github.com/ytdl-org/youtube-dl/issues/10059)
1000             'url': 'https://www.youtube.com/watch?v=i1Ko8UG-Tdo',
1001             'only_matching': True,
1002         },
1003         {
1004             # Rental video preview
1005             'url': 'https://www.youtube.com/watch?v=yYr8q0y5Jfg',
1006             'info_dict': {
1007                 'id': 'uGpuVWrhIzE',
1008                 'ext': 'mp4',
1009                 'title': 'Piku - Trailer',
1010                 'description': 'md5:c36bd60c3fd6f1954086c083c72092eb',
1011                 'upload_date': '20150811',
1012                 'uploader': 'FlixMatrix',
1013                 'uploader_id': 'FlixMatrixKaravan',
1014                 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/FlixMatrixKaravan',
1015                 'license': 'Standard YouTube License',
1016             },
1017             'params': {
1018                 'skip_download': True,
1019             },
1020             'skip': 'This video is not available.',
1021         },
1022         {
1023             # YouTube Red video with episode data
1024             'url': 'https://www.youtube.com/watch?v=iqKdEhx-dD4',
1025             'info_dict': {
1026                 'id': 'iqKdEhx-dD4',
1027                 'ext': 'mp4',
1028                 'title': 'Isolation - Mind Field (Ep 1)',
1029                 'description': 'md5:46a29be4ceffa65b92d277b93f463c0f',
1030                 'duration': 2085,
1031                 'upload_date': '20170118',
1032                 'uploader': 'Vsauce',
1033                 'uploader_id': 'Vsauce',
1034                 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/Vsauce',
1035                 'series': 'Mind Field',
1036                 'season_number': 1,
1037                 'episode_number': 1,
1038             },
1039             'params': {
1040                 'skip_download': True,
1041             },
1042             'expected_warnings': [
1043                 'Skipping DASH manifest',
1044             ],
1045         },
1046         {
1047             # The following content has been identified by the YouTube community
1048             # as inappropriate or offensive to some audiences.
1049             'url': 'https://www.youtube.com/watch?v=6SJNVb0GnPI',
1050             'info_dict': {
1051                 'id': '6SJNVb0GnPI',
1052                 'ext': 'mp4',
1053                 'title': 'Race Differences in Intelligence',
1054                 'description': 'md5:5d161533167390427a1f8ee89a1fc6f1',
1055                 'duration': 965,
1056                 'upload_date': '20140124',
1057                 'uploader': 'New Century Foundation',
1058                 'uploader_id': 'UCEJYpZGqgUob0zVVEaLhvVg',
1059                 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UCEJYpZGqgUob0zVVEaLhvVg',
1060             },
1061             'params': {
1062                 'skip_download': True,
1063             },
1064         },
1065         {
1066             # itag 212
1067             'url': '1t24XAntNCY',
1068             'only_matching': True,
1069         },
1070         {
1071             # geo restricted to JP
1072             'url': 'sJL6WA-aGkQ',
1073             'only_matching': True,
1074         },
1075         {
1076             'url': 'https://www.youtube.com/watch?v=MuAGGZNfUkU&list=RDMM',
1077             'only_matching': True,
1078         },
1079         {
1080             'url': 'https://invidio.us/watch?v=BaW_jenozKc',
1081             'only_matching': True,
1082         },
1083         {
1084             # DRM protected
1085             'url': 'https://www.youtube.com/watch?v=s7_qI6_mIXc',
1086             'only_matching': True,
1087         },
1088         {
1089             # Video with unsupported adaptive stream type formats
1090             'url': 'https://www.youtube.com/watch?v=Z4Vy8R84T1U',
1091             'info_dict': {
1092                 'id': 'Z4Vy8R84T1U',
1093                 'ext': 'mp4',
1094                 'title': 'saman SMAN 53 Jakarta(Sancety) opening COFFEE4th at SMAN 53 Jakarta',
1095                 'description': 'md5:d41d8cd98f00b204e9800998ecf8427e',
1096                 'duration': 433,
1097                 'upload_date': '20130923',
1098                 'uploader': 'Amelia Putri Harwita',
1099                 'uploader_id': 'UCpOxM49HJxmC1qCalXyB3_Q',
1100                 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UCpOxM49HJxmC1qCalXyB3_Q',
1101                 'formats': 'maxcount:10',
1102             },
1103             'params': {
1104                 'skip_download': True,
1105                 'youtube_include_dash_manifest': False,
1106             },
1107             'skip': 'not actual anymore',
1108         },
1109         {
1110             # Youtube Music Auto-generated description
1111             'url': 'https://music.youtube.com/watch?v=MgNrAu2pzNs',
1112             'info_dict': {
1113                 'id': 'MgNrAu2pzNs',
1114                 'ext': 'mp4',
1115                 'title': 'Voyeur Girl',
1116                 'description': 'md5:7ae382a65843d6df2685993e90a8628f',
1117                 'upload_date': '20190312',
1118                 'uploader': 'Stephen - Topic',
1119                 'uploader_id': 'UC-pWHpBjdGG69N9mM2auIAA',
1120                 'artist': 'Stephen',
1121                 'track': 'Voyeur Girl',
1122                 'album': 'it\'s too much love to know my dear',
1123                 'release_date': '20190313',
1124                 'release_year': 2019,
1125             },
1126             'params': {
1127                 'skip_download': True,
1128             },
1129         },
1130         {
1131             # Youtube Music Auto-generated description
1132             # Retrieve 'artist' field from 'Artist:' in video description
1133             # when it is present on youtube music video
1134             'url': 'https://www.youtube.com/watch?v=k0jLE7tTwjY',
1135             'info_dict': {
1136                 'id': 'k0jLE7tTwjY',
1137                 'ext': 'mp4',
1138                 'title': 'Latch Feat. Sam Smith',
1139                 'description': 'md5:3cb1e8101a7c85fcba9b4fb41b951335',
1140                 'upload_date': '20150110',
1141                 'uploader': 'Various Artists - Topic',
1142                 'uploader_id': 'UCNkEcmYdjrH4RqtNgh7BZ9w',
1143                 'artist': 'Disclosure',
1144                 'track': 'Latch Feat. Sam Smith',
1145                 'album': 'Latch Featuring Sam Smith',
1146                 'release_date': '20121008',
1147                 'release_year': 2012,
1148             },
1149             'params': {
1150                 'skip_download': True,
1151             },
1152         },
1153         {
1154             # Youtube Music Auto-generated description
1155             # handle multiple artists on youtube music video
1156             'url': 'https://www.youtube.com/watch?v=74qn0eJSjpA',
1157             'info_dict': {
1158                 'id': '74qn0eJSjpA',
1159                 'ext': 'mp4',
1160                 'title': 'Eastside',
1161                 'description': 'md5:290516bb73dcbfab0dcc4efe6c3de5f2',
1162                 'upload_date': '20180710',
1163                 'uploader': 'Benny Blanco - Topic',
1164                 'uploader_id': 'UCzqz_ksRu_WkIzmivMdIS7A',
1165                 'artist': 'benny blanco, Halsey, Khalid',
1166                 'track': 'Eastside',
1167                 'album': 'Eastside',
1168                 'release_date': '20180713',
1169                 'release_year': 2018,
1170             },
1171             'params': {
1172                 'skip_download': True,
1173             },
1174         },
1175         {
1176             # Youtube Music Auto-generated description
1177             # handle youtube music video with release_year and no release_date
1178             'url': 'https://www.youtube.com/watch?v=-hcAI0g-f5M',
1179             'info_dict': {
1180                 'id': '-hcAI0g-f5M',
1181                 'ext': 'mp4',
1182                 'title': 'Put It On Me',
1183                 'description': 'md5:f6422397c07c4c907c6638e1fee380a5',
1184                 'upload_date': '20180426',
1185                 'uploader': 'Matt Maeson - Topic',
1186                 'uploader_id': 'UCnEkIGqtGcQMLk73Kp-Q5LQ',
1187                 'artist': 'Matt Maeson',
1188                 'track': 'Put It On Me',
1189                 'album': 'The Hearse',
1190                 'release_date': None,
1191                 'release_year': 2018,
1192             },
1193             'params': {
1194                 'skip_download': True,
1195             },
1196         },
1197         {
1198             'url': 'https://www.youtubekids.com/watch?v=3b8nCWDgZ6Q',
1199             'only_matching': True,
1200         },
1201         {
1202             # invalid -> valid video id redirection
1203             'url': 'DJztXj2GPfl',
1204             'info_dict': {
1205                 'id': 'DJztXj2GPfk',
1206                 'ext': 'mp4',
1207                 'title': 'Panjabi MC - Mundian To Bach Ke (The Dictator Soundtrack)',
1208                 'description': 'md5:bf577a41da97918e94fa9798d9228825',
1209                 'upload_date': '20090125',
1210                 'uploader': 'Prochorowka',
1211                 'uploader_id': 'Prochorowka',
1212                 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/Prochorowka',
1213                 'artist': 'Panjabi MC',
1214                 'track': 'Beware of the Boys (Mundian to Bach Ke) - Motivo Hi-Lectro Remix',
1215                 'album': 'Beware of the Boys (Mundian To Bach Ke)',
1216             },
1217             'params': {
1218                 'skip_download': True,
1219             },
1220         },
1221         {
1222             # empty description results in an empty string
1223             'url': 'https://www.youtube.com/watch?v=x41yOUIvK2k',
1224             'info_dict': {
1225                 'id': 'x41yOUIvK2k',
1226                 'ext': 'mp4',
1227                 'title': 'IMG 3456',
1228                 'description': '',
1229                 'upload_date': '20170613',
1230                 'uploader_id': 'ElevageOrVert',
1231                 'uploader': 'ElevageOrVert',
1232             },
1233             'params': {
1234                 'skip_download': True,
1235             },
1236         },
1237     ]
1238
1239     def __init__(self, *args, **kwargs):
1240         super(YoutubeIE, self).__init__(*args, **kwargs)
1241         self._player_cache = {}
1242
1243     def report_video_info_webpage_download(self, video_id):
1244         """Report attempt to download video info webpage."""
1245         self.to_screen('%s: Downloading video info webpage' % video_id)
1246
1247     def report_information_extraction(self, video_id):
1248         """Report attempt to extract video information."""
1249         self.to_screen('%s: Extracting video information' % video_id)
1250
1251     def report_unavailable_format(self, video_id, format):
1252         """Report extracted video URL."""
1253         self.to_screen('%s: Format %s not available' % (video_id, format))
1254
1255     def report_rtmp_download(self):
1256         """Indicate the download will use the RTMP protocol."""
1257         self.to_screen('RTMP download detected')
1258
1259     def _signature_cache_id(self, example_sig):
1260         """ Return a string representation of a signature """
1261         return '.'.join(compat_str(len(part)) for part in example_sig.split('.'))
1262
1263     @classmethod
1264     def _extract_player_info(cls, player_url):
1265         for player_re in cls._PLAYER_INFO_RE:
1266             id_m = re.search(player_re, player_url)
1267             if id_m:
1268                 break
1269         else:
1270             raise ExtractorError('Cannot identify player %r' % player_url)
1271         return id_m.group('ext'), id_m.group('id')
1272
1273     def _extract_signature_function(self, video_id, player_url, example_sig):
1274         player_type, player_id = self._extract_player_info(player_url)
1275
1276         # Read from filesystem cache
1277         func_id = '%s_%s_%s' % (
1278             player_type, player_id, self._signature_cache_id(example_sig))
1279         assert os.path.basename(func_id) == func_id
1280
1281         cache_spec = self._downloader.cache.load('youtube-sigfuncs', func_id)
1282         if cache_spec is not None:
1283             return lambda s: ''.join(s[i] for i in cache_spec)
1284
1285         download_note = (
1286             'Downloading player %s' % player_url
1287             if self._downloader.params.get('verbose') else
1288             'Downloading %s player %s' % (player_type, player_id)
1289         )
1290         if player_type == 'js':
1291             code = self._download_webpage(
1292                 player_url, video_id,
1293                 note=download_note,
1294                 errnote='Download of %s failed' % player_url)
1295             res = self._parse_sig_js(code)
1296         elif player_type == 'swf':
1297             urlh = self._request_webpage(
1298                 player_url, video_id,
1299                 note=download_note,
1300                 errnote='Download of %s failed' % player_url)
1301             code = urlh.read()
1302             res = self._parse_sig_swf(code)
1303         else:
1304             assert False, 'Invalid player type %r' % player_type
1305
1306         test_string = ''.join(map(compat_chr, range(len(example_sig))))
1307         cache_res = res(test_string)
1308         cache_spec = [ord(c) for c in cache_res]
1309
1310         self._downloader.cache.store('youtube-sigfuncs', func_id, cache_spec)
1311         return res
1312
1313     def _print_sig_code(self, func, example_sig):
1314         def gen_sig_code(idxs):
1315             def _genslice(start, end, step):
1316                 starts = '' if start == 0 else str(start)
1317                 ends = (':%d' % (end + step)) if end + step >= 0 else ':'
1318                 steps = '' if step == 1 else (':%d' % step)
1319                 return 's[%s%s%s]' % (starts, ends, steps)
1320
1321             step = None
1322             # Quelch pyflakes warnings - start will be set when step is set
1323             start = '(Never used)'
1324             for i, prev in zip(idxs[1:], idxs[:-1]):
1325                 if step is not None:
1326                     if i - prev == step:
1327                         continue
1328                     yield _genslice(start, prev, step)
1329                     step = None
1330                     continue
1331                 if i - prev in [-1, 1]:
1332                     step = i - prev
1333                     start = prev
1334                     continue
1335                 else:
1336                     yield 's[%d]' % prev
1337             if step is None:
1338                 yield 's[%d]' % i
1339             else:
1340                 yield _genslice(start, i, step)
1341
1342         test_string = ''.join(map(compat_chr, range(len(example_sig))))
1343         cache_res = func(test_string)
1344         cache_spec = [ord(c) for c in cache_res]
1345         expr_code = ' + '.join(gen_sig_code(cache_spec))
1346         signature_id_tuple = '(%s)' % (
1347             ', '.join(compat_str(len(p)) for p in example_sig.split('.')))
1348         code = ('if tuple(len(p) for p in s.split(\'.\')) == %s:\n'
1349                 '    return %s\n') % (signature_id_tuple, expr_code)
1350         self.to_screen('Extracted signature function:\n' + code)
1351
1352     def _parse_sig_js(self, jscode):
1353         funcname = self._search_regex(
1354             (r'\b[cs]\s*&&\s*[adf]\.set\([^,]+\s*,\s*encodeURIComponent\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\(',
1355              r'\b[a-zA-Z0-9]+\s*&&\s*[a-zA-Z0-9]+\.set\([^,]+\s*,\s*encodeURIComponent\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\(',
1356              r'(?:\b|[^a-zA-Z0-9$])(?P<sig>[a-zA-Z0-9$]{2})\s*=\s*function\(\s*a\s*\)\s*{\s*a\s*=\s*a\.split\(\s*""\s*\)',
1357              r'(?P<sig>[a-zA-Z0-9$]+)\s*=\s*function\(\s*a\s*\)\s*{\s*a\s*=\s*a\.split\(\s*""\s*\)',
1358              # Obsolete patterns
1359              r'(["\'])signature\1\s*,\s*(?P<sig>[a-zA-Z0-9$]+)\(',
1360              r'\.sig\|\|(?P<sig>[a-zA-Z0-9$]+)\(',
1361              r'yt\.akamaized\.net/\)\s*\|\|\s*.*?\s*[cs]\s*&&\s*[adf]\.set\([^,]+\s*,\s*(?:encodeURIComponent\s*\()?\s*(?P<sig>[a-zA-Z0-9$]+)\(',
1362              r'\b[cs]\s*&&\s*[adf]\.set\([^,]+\s*,\s*(?P<sig>[a-zA-Z0-9$]+)\(',
1363              r'\b[a-zA-Z0-9]+\s*&&\s*[a-zA-Z0-9]+\.set\([^,]+\s*,\s*(?P<sig>[a-zA-Z0-9$]+)\(',
1364              r'\bc\s*&&\s*a\.set\([^,]+\s*,\s*\([^)]*\)\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\(',
1365              r'\bc\s*&&\s*[a-zA-Z0-9]+\.set\([^,]+\s*,\s*\([^)]*\)\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\(',
1366              r'\bc\s*&&\s*[a-zA-Z0-9]+\.set\([^,]+\s*,\s*\([^)]*\)\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\('),
1367             jscode, 'Initial JS player signature function name', group='sig')
1368
1369         jsi = JSInterpreter(jscode)
1370         initial_function = jsi.extract_function(funcname)
1371         return lambda s: initial_function([s])
1372
1373     def _parse_sig_swf(self, file_contents):
1374         swfi = SWFInterpreter(file_contents)
1375         TARGET_CLASSNAME = 'SignatureDecipher'
1376         searched_class = swfi.extract_class(TARGET_CLASSNAME)
1377         initial_function = swfi.extract_function(searched_class, 'decipher')
1378         return lambda s: initial_function([s])
1379
1380     def _decrypt_signature(self, s, video_id, player_url, age_gate=False):
1381         """Turn the encrypted s field into a working signature"""
1382
1383         if player_url is None:
1384             raise ExtractorError('Cannot decrypt signature without player_url')
1385
1386         if player_url.startswith('//'):
1387             player_url = 'https:' + player_url
1388         elif not re.match(r'https?://', player_url):
1389             player_url = compat_urlparse.urljoin(
1390                 'https://www.youtube.com', player_url)
1391         try:
1392             player_id = (player_url, self._signature_cache_id(s))
1393             if player_id not in self._player_cache:
1394                 func = self._extract_signature_function(
1395                     video_id, player_url, s
1396                 )
1397                 self._player_cache[player_id] = func
1398             func = self._player_cache[player_id]
1399             if self._downloader.params.get('youtube_print_sig_code'):
1400                 self._print_sig_code(func, s)
1401             return func(s)
1402         except Exception as e:
1403             tb = traceback.format_exc()
1404             raise ExtractorError(
1405                 'Signature extraction failed: ' + tb, cause=e)
1406
1407     def _get_subtitles(self, video_id, webpage, has_live_chat_replay):
1408         try:
1409             subs_doc = self._download_xml(
1410                 'https://video.google.com/timedtext?hl=en&type=list&v=%s' % video_id,
1411                 video_id, note=False)
1412         except ExtractorError as err:
1413             self._downloader.report_warning('unable to download video subtitles: %s' % error_to_compat_str(err))
1414             return {}
1415
1416         sub_lang_list = {}
1417         for track in subs_doc.findall('track'):
1418             lang = track.attrib['lang_code']
1419             if lang in sub_lang_list:
1420                 continue
1421             sub_formats = []
1422             for ext in self._SUBTITLE_FORMATS:
1423                 params = compat_urllib_parse_urlencode({
1424                     'lang': lang,
1425                     'v': video_id,
1426                     'fmt': ext,
1427                     'name': track.attrib['name'].encode('utf-8'),
1428                 })
1429                 sub_formats.append({
1430                     'url': 'https://www.youtube.com/api/timedtext?' + params,
1431                     'ext': ext,
1432                 })
1433             sub_lang_list[lang] = sub_formats
1434         if has_live_chat_replay:
1435             sub_lang_list['live_chat'] = [
1436                 {
1437                     'video_id': video_id,
1438                     'ext': 'json',
1439                     'protocol': 'youtube_live_chat_replay',
1440                 },
1441             ]
1442         if not sub_lang_list:
1443             self._downloader.report_warning('video doesn\'t have subtitles')
1444             return {}
1445         return sub_lang_list
1446
1447     def _get_ytplayer_config(self, video_id, webpage):
1448         patterns = (
1449             # User data may contain arbitrary character sequences that may affect
1450             # JSON extraction with regex, e.g. when '};' is contained the second
1451             # regex won't capture the whole JSON. Yet working around by trying more
1452             # concrete regex first keeping in mind proper quoted string handling
1453             # to be implemented in future that will replace this workaround (see
1454             # https://github.com/ytdl-org/youtube-dl/issues/7468,
1455             # https://github.com/ytdl-org/youtube-dl/pull/7599)
1456             r';ytplayer\.config\s*=\s*({.+?});ytplayer',
1457             r';ytplayer\.config\s*=\s*({.+?});',
1458             r'ytInitialPlayerResponse\s*=\s*({.+?});var meta'
1459         )
1460         config = self._search_regex(
1461             patterns, webpage, 'ytplayer.config', default=None)
1462         if config:
1463             return self._parse_json(
1464                 uppercase_escape(config), video_id, fatal=False)
1465
1466     def _get_music_metadata_from_yt_initial(self, yt_initial):
1467         music_metadata = []
1468         key_map = {
1469             'Album': 'album',
1470             'Artist': 'artist',
1471             'Song': 'track'
1472         }
1473         contents = try_get(yt_initial, lambda x: x['contents']['twoColumnWatchNextResults']['results']['results']['contents'])
1474         if type(contents) is list:
1475             for content in contents:
1476                 music_track = {}
1477                 if type(content) is not dict:
1478                     continue
1479                 videoSecondaryInfoRenderer = try_get(content, lambda x: x['videoSecondaryInfoRenderer'])
1480                 if type(videoSecondaryInfoRenderer) is not dict:
1481                     continue
1482                 rows = try_get(videoSecondaryInfoRenderer, lambda x: x['metadataRowContainer']['metadataRowContainerRenderer']['rows'])
1483                 if type(rows) is not list:
1484                     continue
1485                 for row in rows:
1486                     metadataRowRenderer = try_get(row, lambda x: x['metadataRowRenderer'])
1487                     if type(metadataRowRenderer) is not dict:
1488                         continue
1489                     key = try_get(metadataRowRenderer, lambda x: x['title']['simpleText'])
1490                     value = try_get(metadataRowRenderer, lambda x: x['contents'][0]['simpleText']) or \
1491                         try_get(metadataRowRenderer, lambda x: x['contents'][0]['runs'][0]['text'])
1492                     if type(key) is not str or type(value) is not str:
1493                         continue
1494                     if key in key_map:
1495                         if key_map[key] in music_track:
1496                             # we've started on a new track
1497                             music_metadata.append(music_track)
1498                             music_track = {}
1499                         music_track[key_map[key]] = value
1500                 if len(music_track.keys()):
1501                     music_metadata.append(music_track)
1502         return music_metadata
1503
1504     def _get_automatic_captions(self, video_id, webpage):
1505         """We need the webpage for getting the captions url, pass it as an
1506            argument to speed up the process."""
1507         self.to_screen('%s: Looking for automatic captions' % video_id)
1508         player_config = self._get_ytplayer_config(video_id, webpage)
1509         err_msg = 'Couldn\'t find automatic captions for %s' % video_id
1510         if not player_config:
1511             self._downloader.report_warning(err_msg)
1512             return {}
1513         try:
1514             if "args" in player_config and "ttsurl" in player_config["args"]:
1515                 args = player_config['args']
1516                 caption_url = args['ttsurl']
1517                 timestamp = args['timestamp']
1518
1519                 # We get the available subtitles
1520                 list_params = compat_urllib_parse_urlencode({
1521                     'type': 'list',
1522                     'tlangs': 1,
1523                     'asrs': 1,
1524                 })
1525                 list_url = caption_url + '&' + list_params
1526                 caption_list = self._download_xml(list_url, video_id)
1527                 original_lang_node = caption_list.find('track')
1528                 if original_lang_node is None:
1529                     self._downloader.report_warning('Video doesn\'t have automatic captions')
1530                     return {}
1531                 original_lang = original_lang_node.attrib['lang_code']
1532                 caption_kind = original_lang_node.attrib.get('kind', '')
1533
1534                 sub_lang_list = {}
1535                 for lang_node in caption_list.findall('target'):
1536                     sub_lang = lang_node.attrib['lang_code']
1537                     sub_formats = []
1538                     for ext in self._SUBTITLE_FORMATS:
1539                         params = compat_urllib_parse_urlencode({
1540                             'lang': original_lang,
1541                             'tlang': sub_lang,
1542                             'fmt': ext,
1543                             'ts': timestamp,
1544                             'kind': caption_kind,
1545                         })
1546                         sub_formats.append({
1547                             'url': caption_url + '&' + params,
1548                             'ext': ext,
1549                         })
1550                     sub_lang_list[sub_lang] = sub_formats
1551                 return sub_lang_list
1552
1553             def make_captions(sub_url, sub_langs):
1554                 parsed_sub_url = compat_urllib_parse_urlparse(sub_url)
1555                 caption_qs = compat_parse_qs(parsed_sub_url.query)
1556                 captions = {}
1557                 for sub_lang in sub_langs:
1558                     sub_formats = []
1559                     for ext in self._SUBTITLE_FORMATS:
1560                         caption_qs.update({
1561                             'tlang': [sub_lang],
1562                             'fmt': [ext],
1563                         })
1564                         sub_url = compat_urlparse.urlunparse(parsed_sub_url._replace(
1565                             query=compat_urllib_parse_urlencode(caption_qs, True)))
1566                         sub_formats.append({
1567                             'url': sub_url,
1568                             'ext': ext,
1569                         })
1570                     captions[sub_lang] = sub_formats
1571                 return captions
1572
1573             # New captions format as of 22.06.2017
1574             if "args" in player_config:
1575                 player_response = player_config["args"].get('player_response')
1576             else:
1577                 # New player system (ytInitialPlayerResponse) as of October 2020
1578                 player_response = player_config
1579
1580             if player_response:
1581                 if isinstance(player_response, compat_str):
1582                     player_response = self._parse_json(
1583                         player_response, video_id, fatal=False)
1584
1585                 renderer = player_response['captions']['playerCaptionsTracklistRenderer']
1586                 caption_tracks = renderer['captionTracks']
1587                 for caption_track in caption_tracks:
1588                     if 'kind' not in caption_track:
1589                         # not an automatic transcription
1590                         continue
1591                     base_url = caption_track['baseUrl']
1592                     sub_lang_list = []
1593                     for lang in renderer['translationLanguages']:
1594                         lang_code = lang.get('languageCode')
1595                         if lang_code:
1596                             sub_lang_list.append(lang_code)
1597                     return make_captions(base_url, sub_lang_list)
1598
1599                 self._downloader.report_warning("Couldn't find automatic captions for %s" % video_id)
1600                 return {}
1601
1602             if "args" in player_config:
1603                 args = player_config["args"]
1604
1605                 # Some videos don't provide ttsurl but rather caption_tracks and
1606                 # caption_translation_languages (e.g. 20LmZk1hakA)
1607                 # Does not used anymore as of 22.06.2017
1608                 caption_tracks = args['caption_tracks']
1609                 caption_translation_languages = args['caption_translation_languages']
1610                 caption_url = compat_parse_qs(caption_tracks.split(',')[0])['u'][0]
1611                 sub_lang_list = []
1612                 for lang in caption_translation_languages.split(','):
1613                     lang_qs = compat_parse_qs(compat_urllib_parse_unquote_plus(lang))
1614                     sub_lang = lang_qs.get('lc', [None])[0]
1615                     if sub_lang:
1616                         sub_lang_list.append(sub_lang)
1617                 return make_captions(caption_url, sub_lang_list)
1618         # An extractor error can be raise by the download process if there are
1619         # no automatic captions but there are subtitles
1620         except (KeyError, IndexError, ExtractorError):
1621             self._downloader.report_warning(err_msg)
1622             return {}
1623
1624     def _mark_watched(self, video_id, video_info, player_response):
1625         playback_url = url_or_none(try_get(
1626             player_response,
1627             lambda x: x['playbackTracking']['videostatsPlaybackUrl']['baseUrl']) or try_get(
1628             video_info, lambda x: x['videostats_playback_base_url'][0]))
1629         if not playback_url:
1630             return
1631         parsed_playback_url = compat_urlparse.urlparse(playback_url)
1632         qs = compat_urlparse.parse_qs(parsed_playback_url.query)
1633
1634         # cpn generation algorithm is reverse engineered from base.js.
1635         # In fact it works even with dummy cpn.
1636         CPN_ALPHABET = 'abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789-_'
1637         cpn = ''.join((CPN_ALPHABET[random.randint(0, 256) & 63] for _ in range(0, 16)))
1638
1639         qs.update({
1640             'ver': ['2'],
1641             'cpn': [cpn],
1642         })
1643         playback_url = compat_urlparse.urlunparse(
1644             parsed_playback_url._replace(query=compat_urllib_parse_urlencode(qs, True)))
1645
1646         self._download_webpage(
1647             playback_url, video_id, 'Marking watched',
1648             'Unable to mark watched', fatal=False)
1649
1650     @staticmethod
1651     def _extract_urls(webpage):
1652         # Embedded YouTube player
1653         entries = [
1654             unescapeHTML(mobj.group('url'))
1655             for mobj in re.finditer(r'''(?x)
1656             (?:
1657                 <iframe[^>]+?src=|
1658                 data-video-url=|
1659                 <embed[^>]+?src=|
1660                 embedSWF\(?:\s*|
1661                 <object[^>]+data=|
1662                 new\s+SWFObject\(
1663             )
1664             (["\'])
1665                 (?P<url>(?:https?:)?//(?:www\.)?youtube(?:-nocookie)?\.com/
1666                 (?:embed|v|p)/[0-9A-Za-z_-]{11}.*?)
1667             \1''', webpage)]
1668
1669         # lazyYT YouTube embed
1670         entries.extend(list(map(
1671             unescapeHTML,
1672             re.findall(r'class="lazyYT" data-youtube-id="([^"]+)"', webpage))))
1673
1674         # Wordpress "YouTube Video Importer" plugin
1675         matches = re.findall(r'''(?x)<div[^>]+
1676             class=(?P<q1>[\'"])[^\'"]*\byvii_single_video_player\b[^\'"]*(?P=q1)[^>]+
1677             data-video_id=(?P<q2>[\'"])([^\'"]+)(?P=q2)''', webpage)
1678         entries.extend(m[-1] for m in matches)
1679
1680         return entries
1681
1682     @staticmethod
1683     def _extract_url(webpage):
1684         urls = YoutubeIE._extract_urls(webpage)
1685         return urls[0] if urls else None
1686
1687     @classmethod
1688     def extract_id(cls, url):
1689         mobj = re.match(cls._VALID_URL, url, re.VERBOSE)
1690         if mobj is None:
1691             raise ExtractorError('Invalid URL: %s' % url)
1692         video_id = mobj.group(2)
1693         return video_id
1694
1695     def _extract_chapters_from_json(self, webpage, video_id, duration):
1696         if not webpage:
1697             return
1698         initial_data = self._parse_json(
1699             self._search_regex(
1700                 r'window\["ytInitialData"\] = (.+);\n', webpage,
1701                 'player args', default='{}'),
1702             video_id, fatal=False)
1703         if not initial_data or not isinstance(initial_data, dict):
1704             return
1705         chapters_list = try_get(
1706             initial_data,
1707             lambda x: x['playerOverlays']
1708                        ['playerOverlayRenderer']
1709                        ['decoratedPlayerBarRenderer']
1710                        ['decoratedPlayerBarRenderer']
1711                        ['playerBar']
1712                        ['chapteredPlayerBarRenderer']
1713                        ['chapters'],
1714             list)
1715         if not chapters_list:
1716             return
1717
1718         def chapter_time(chapter):
1719             return float_or_none(
1720                 try_get(
1721                     chapter,
1722                     lambda x: x['chapterRenderer']['timeRangeStartMillis'],
1723                     int),
1724                 scale=1000)
1725         chapters = []
1726         for next_num, chapter in enumerate(chapters_list, start=1):
1727             start_time = chapter_time(chapter)
1728             if start_time is None:
1729                 continue
1730             end_time = (chapter_time(chapters_list[next_num])
1731                         if next_num < len(chapters_list) else duration)
1732             if end_time is None:
1733                 continue
1734             title = try_get(
1735                 chapter, lambda x: x['chapterRenderer']['title']['simpleText'],
1736                 compat_str)
1737             chapters.append({
1738                 'start_time': start_time,
1739                 'end_time': end_time,
1740                 'title': title,
1741             })
1742         return chapters
1743
1744     @staticmethod
1745     def _extract_chapters_from_description(description, duration):
1746         if not description:
1747             return None
1748         chapter_lines = re.findall(
1749             r'(?:^|<br\s*/>)([^<]*<a[^>]+onclick=["\']yt\.www\.watch\.player\.seekTo[^>]+>(\d{1,2}:\d{1,2}(?::\d{1,2})?)</a>[^>]*)(?=$|<br\s*/>)',
1750             description)
1751         if not chapter_lines:
1752             return None
1753         chapters = []
1754         for next_num, (chapter_line, time_point) in enumerate(
1755                 chapter_lines, start=1):
1756             start_time = parse_duration(time_point)
1757             if start_time is None:
1758                 continue
1759             if start_time > duration:
1760                 break
1761             end_time = (duration if next_num == len(chapter_lines)
1762                         else parse_duration(chapter_lines[next_num][1]))
1763             if end_time is None:
1764                 continue
1765             if end_time > duration:
1766                 end_time = duration
1767             if start_time > end_time:
1768                 break
1769             chapter_title = re.sub(
1770                 r'<a[^>]+>[^<]+</a>', '', chapter_line).strip(' \t-')
1771             chapter_title = re.sub(r'\s+', ' ', chapter_title)
1772             chapters.append({
1773                 'start_time': start_time,
1774                 'end_time': end_time,
1775                 'title': chapter_title,
1776             })
1777         return chapters
1778
1779     def _extract_chapters(self, webpage, description, video_id, duration):
1780         return (self._extract_chapters_from_json(webpage, video_id, duration)
1781                 or self._extract_chapters_from_description(description, duration))
1782
1783     def _real_extract(self, url):
1784         url, smuggled_data = unsmuggle_url(url, {})
1785
1786         proto = (
1787             'http' if self._downloader.params.get('prefer_insecure', False)
1788             else 'https')
1789
1790         start_time = None
1791         end_time = None
1792         parsed_url = compat_urllib_parse_urlparse(url)
1793         for component in [parsed_url.fragment, parsed_url.query]:
1794             query = compat_parse_qs(component)
1795             if start_time is None and 't' in query:
1796                 start_time = parse_duration(query['t'][0])
1797             if start_time is None and 'start' in query:
1798                 start_time = parse_duration(query['start'][0])
1799             if end_time is None and 'end' in query:
1800                 end_time = parse_duration(query['end'][0])
1801
1802         # Extract original video URL from URL with redirection, like age verification, using next_url parameter
1803         mobj = re.search(self._NEXT_URL_RE, url)
1804         if mobj:
1805             url = proto + '://www.youtube.com/' + compat_urllib_parse_unquote(mobj.group(1)).lstrip('/')
1806         video_id = self.extract_id(url)
1807
1808         # Get video webpage
1809         url = proto + '://www.youtube.com/watch?v=%s&gl=US&hl=en&has_verified=1&bpctr=9999999999' % video_id
1810         video_webpage, urlh = self._download_webpage_handle(url, video_id)
1811
1812         qs = compat_parse_qs(compat_urllib_parse_urlparse(urlh.geturl()).query)
1813         video_id = qs.get('v', [None])[0] or video_id
1814
1815         # Attempt to extract SWF player URL
1816         mobj = re.search(r'swfConfig.*?"(https?:\\/\\/.*?watch.*?-.*?\.swf)"', video_webpage)
1817         if mobj is not None:
1818             player_url = re.sub(r'\\(.)', r'\1', mobj.group(1))
1819         else:
1820             player_url = None
1821
1822         dash_mpds = []
1823
1824         def add_dash_mpd(video_info):
1825             dash_mpd = video_info.get('dashmpd')
1826             if dash_mpd and dash_mpd[0] not in dash_mpds:
1827                 dash_mpds.append(dash_mpd[0])
1828
1829         def add_dash_mpd_pr(pl_response):
1830             dash_mpd = url_or_none(try_get(
1831                 pl_response, lambda x: x['streamingData']['dashManifestUrl'],
1832                 compat_str))
1833             if dash_mpd and dash_mpd not in dash_mpds:
1834                 dash_mpds.append(dash_mpd)
1835
1836         is_live = None
1837         view_count = None
1838
1839         def extract_view_count(v_info):
1840             return int_or_none(try_get(v_info, lambda x: x['view_count'][0]))
1841
1842         def extract_player_response(player_response, video_id):
1843             pl_response = str_or_none(player_response)
1844             if not pl_response:
1845                 return
1846             pl_response = self._parse_json(pl_response, video_id, fatal=False)
1847             if isinstance(pl_response, dict):
1848                 add_dash_mpd_pr(pl_response)
1849                 return pl_response
1850
1851         def extract_embedded_config(embed_webpage, video_id):
1852             embedded_config = self._search_regex(
1853                 r'setConfig\(({.*})\);',
1854                 embed_webpage, 'ytInitialData', default=None)
1855             if embedded_config:
1856                 return embedded_config
1857
1858         player_response = {}
1859
1860         # Get video info
1861         video_info = {}
1862         embed_webpage = None
1863         if (self._og_search_property('restrictions:age', video_webpage, default=None) == '18+'
1864                 or re.search(r'player-age-gate-content">', video_webpage) is not None):
1865             cookie_keys = self._get_cookies('https://www.youtube.com').keys()
1866             age_gate = True
1867             # We simulate the access to the video from www.youtube.com/v/{video_id}
1868             # this can be viewed without login into Youtube
1869             url = proto + '://www.youtube.com/embed/%s' % video_id
1870             embed_webpage = self._download_webpage(url, video_id, 'Downloading embed webpage')
1871             ext = extract_embedded_config(embed_webpage, video_id)
1872             # playabilityStatus = re.search(r'{\\\"status\\\":\\\"(?P<playabilityStatus>[^\"]+)\\\"', ext)
1873             playable_in_embed = re.search(r'{\\\"playableInEmbed\\\":(?P<playableinEmbed>[^\,]+)', ext)
1874             if not playable_in_embed:
1875                 self.to_screen('Could not determine whether playabale in embed for video %s' % video_id)
1876                 playable_in_embed = ''
1877             else:
1878                 playable_in_embed = playable_in_embed.group('playableinEmbed')
1879             # check if video is only playable on youtube in other words not playable in embed - if so it requires auth (cookies)
1880             # if re.search(r'player-unavailable">', embed_webpage) is not None:
1881             if playable_in_embed == 'false':
1882                 '''
1883                 # TODO apply this patch when Support for Python 2.6(!) and above drops
1884                 if ({'VISITOR_INFO1_LIVE', 'HSID', 'SSID', 'SID'} <= cookie_keys
1885                         or {'VISITOR_INFO1_LIVE', '__Secure-3PSID', 'LOGIN_INFO'} <= cookie_keys):
1886                 '''
1887                 if (set(('VISITOR_INFO1_LIVE', 'HSID', 'SSID', 'SID')) <= set(cookie_keys)
1888                         or set(('VISITOR_INFO1_LIVE', '__Secure-3PSID', 'LOGIN_INFO')) <= set(cookie_keys)):
1889                     age_gate = False
1890                     # Try looking directly into the video webpage
1891                     ytplayer_config = self._get_ytplayer_config(video_id, video_webpage)
1892                     if ytplayer_config:
1893                         args = ytplayer_config.get("args")
1894                         if args is not None:
1895                             if args.get('url_encoded_fmt_stream_map') or args.get('hlsvp'):
1896                                 # Convert to the same format returned by compat_parse_qs
1897                                 video_info = dict((k, [v]) for k, v in args.items())
1898                                 add_dash_mpd(video_info)
1899                             # Rental video is not rented but preview is available (e.g.
1900                             # https://www.youtube.com/watch?v=yYr8q0y5Jfg,
1901                             # https://github.com/ytdl-org/youtube-dl/issues/10532)
1902                             if not video_info and args.get('ypc_vid'):
1903                                 return self.url_result(
1904                                     args['ypc_vid'], YoutubeIE.ie_key(), video_id=args['ypc_vid'])
1905                             if args.get('livestream') == '1' or args.get('live_playback') == 1:
1906                                 is_live = True
1907                             if not player_response:
1908                                 player_response = extract_player_response(args.get('player_response'), video_id)
1909                         elif not player_response:
1910                             player_response = ytplayer_config
1911                     if not video_info or self._downloader.params.get('youtube_include_dash_manifest', True):
1912                         add_dash_mpd_pr(player_response)
1913                 else:
1914                     raise ExtractorError('Video is age restricted and only playable on Youtube. Requires cookies!', expected=True)
1915             else:
1916                 data = compat_urllib_parse_urlencode({
1917                     'video_id': video_id,
1918                     'eurl': 'https://youtube.googleapis.com/v/' + video_id,
1919                     'sts': self._search_regex(
1920                         r'"sts"\s*:\s*(\d+)', embed_webpage, 'sts', default=''),
1921                 })
1922                 video_info_url = proto + '://www.youtube.com/get_video_info?' + data
1923                 try:
1924                     video_info_webpage = self._download_webpage(
1925                         video_info_url, video_id,
1926                         note='Refetching age-gated info webpage',
1927                         errnote='unable to download video info webpage')
1928                 except ExtractorError:
1929                     video_info_webpage = None
1930                 if video_info_webpage:
1931                     video_info = compat_parse_qs(video_info_webpage)
1932                     pl_response = video_info.get('player_response', [None])[0]
1933                     player_response = extract_player_response(pl_response, video_id)
1934                     add_dash_mpd(video_info)
1935                     view_count = extract_view_count(video_info)
1936         else:
1937             age_gate = False
1938             # Try looking directly into the video webpage
1939             ytplayer_config = self._get_ytplayer_config(video_id, video_webpage)
1940             args = ytplayer_config.get("args")
1941             if args is not None:
1942                 if args.get('url_encoded_fmt_stream_map') or args.get('hlsvp'):
1943                     # Convert to the same format returned by compat_parse_qs
1944                     video_info = dict((k, [v]) for k, v in args.items())
1945                     add_dash_mpd(video_info)
1946                 # Rental video is not rented but preview is available (e.g.
1947                 # https://www.youtube.com/watch?v=yYr8q0y5Jfg,
1948                 # https://github.com/ytdl-org/youtube-dl/issues/10532)
1949                 if not video_info and args.get('ypc_vid'):
1950                     return self.url_result(
1951                         args['ypc_vid'], YoutubeIE.ie_key(), video_id=args['ypc_vid'])
1952                 if args.get('livestream') == '1' or args.get('live_playback') == 1:
1953                     is_live = True
1954                 if not player_response:
1955                     player_response = extract_player_response(args.get('player_response'), video_id)
1956             elif not player_response:
1957                 player_response = ytplayer_config
1958             if not video_info or self._downloader.params.get('youtube_include_dash_manifest', True):
1959                 add_dash_mpd_pr(player_response)
1960
1961         def extract_unavailable_message():
1962             messages = []
1963             for tag, kind in (('h1', 'message'), ('div', 'submessage')):
1964                 msg = self._html_search_regex(
1965                     r'(?s)<{tag}[^>]+id=["\']unavailable-{kind}["\'][^>]*>(.+?)</{tag}>'.format(tag=tag, kind=kind),
1966                     video_webpage, 'unavailable %s' % kind, default=None)
1967                 if msg:
1968                     messages.append(msg)
1969             if messages:
1970                 return '\n'.join(messages)
1971
1972         if not video_info and not player_response:
1973             unavailable_message = extract_unavailable_message()
1974             if not unavailable_message:
1975                 unavailable_message = 'Unable to extract video data'
1976             raise ExtractorError(
1977                 'YouTube said: %s' % unavailable_message, expected=True, video_id=video_id)
1978
1979         if not isinstance(video_info, dict):
1980             video_info = {}
1981
1982         video_details = try_get(
1983             player_response, lambda x: x['videoDetails'], dict) or {}
1984
1985         microformat = try_get(
1986             player_response, lambda x: x['microformat']['playerMicroformatRenderer'], dict) or {}
1987
1988         video_title = video_info.get('title', [None])[0] or video_details.get('title')
1989         if not video_title:
1990             self._downloader.report_warning('Unable to extract video title')
1991             video_title = '_'
1992
1993         description_original = video_description = get_element_by_id("eow-description", video_webpage)
1994         if video_description:
1995
1996             def replace_url(m):
1997                 redir_url = compat_urlparse.urljoin(url, m.group(1))
1998                 parsed_redir_url = compat_urllib_parse_urlparse(redir_url)
1999                 if re.search(r'^(?:www\.)?(?:youtube(?:-nocookie)?\.com|youtu\.be)$', parsed_redir_url.netloc) and parsed_redir_url.path == '/redirect':
2000                     qs = compat_parse_qs(parsed_redir_url.query)
2001                     q = qs.get('q')
2002                     if q and q[0]:
2003                         return q[0]
2004                 return redir_url
2005
2006             description_original = video_description = re.sub(r'''(?x)
2007                 <a\s+
2008                     (?:[a-zA-Z-]+="[^"]*"\s+)*?
2009                     (?:title|href)="([^"]+)"\s+
2010                     (?:[a-zA-Z-]+="[^"]*"\s+)*?
2011                     class="[^"]*"[^>]*>
2012                 [^<]+\.{3}\s*
2013                 </a>
2014             ''', replace_url, video_description)
2015             video_description = clean_html(video_description)
2016         else:
2017             video_description = video_details.get('shortDescription')
2018             if video_description is None:
2019                 video_description = self._html_search_meta('description', video_webpage)
2020
2021         if not smuggled_data.get('force_singlefeed', False):
2022             if not self._downloader.params.get('noplaylist'):
2023                 multifeed_metadata_list = try_get(
2024                     player_response,
2025                     lambda x: x['multicamera']['playerLegacyMulticameraRenderer']['metadataList'],
2026                     compat_str) or try_get(
2027                     video_info, lambda x: x['multifeed_metadata_list'][0], compat_str)
2028                 if multifeed_metadata_list:
2029                     entries = []
2030                     feed_ids = []
2031                     for feed in multifeed_metadata_list.split(','):
2032                         # Unquote should take place before split on comma (,) since textual
2033                         # fields may contain comma as well (see
2034                         # https://github.com/ytdl-org/youtube-dl/issues/8536)
2035                         feed_data = compat_parse_qs(compat_urllib_parse_unquote_plus(feed))
2036
2037                         def feed_entry(name):
2038                             return try_get(feed_data, lambda x: x[name][0], compat_str)
2039
2040                         feed_id = feed_entry('id')
2041                         if not feed_id:
2042                             continue
2043                         feed_title = feed_entry('title')
2044                         title = video_title
2045                         if feed_title:
2046                             title += ' (%s)' % feed_title
2047                         entries.append({
2048                             '_type': 'url_transparent',
2049                             'ie_key': 'Youtube',
2050                             'url': smuggle_url(
2051                                 '%s://www.youtube.com/watch?v=%s' % (proto, feed_data['id'][0]),
2052                                 {'force_singlefeed': True}),
2053                             'title': title,
2054                         })
2055                         feed_ids.append(feed_id)
2056                     self.to_screen(
2057                         'Downloading multifeed video (%s) - add --no-playlist to just download video %s'
2058                         % (', '.join(feed_ids), video_id))
2059                     return self.playlist_result(entries, video_id, video_title, video_description)
2060             else:
2061                 self.to_screen('Downloading just video %s because of --no-playlist' % video_id)
2062
2063         if view_count is None:
2064             view_count = extract_view_count(video_info)
2065         if view_count is None and video_details:
2066             view_count = int_or_none(video_details.get('viewCount'))
2067         if view_count is None and microformat:
2068             view_count = int_or_none(microformat.get('viewCount'))
2069
2070         if is_live is None:
2071             is_live = bool_or_none(video_details.get('isLive'))
2072
2073         has_live_chat_replay = False
2074         if not is_live:
2075             yt_initial_data = self._get_yt_initial_data(video_id, video_webpage)
2076             try:
2077                 yt_initial_data['contents']['twoColumnWatchNextResults']['conversationBar']['liveChatRenderer']['continuations'][0]['reloadContinuationData']['continuation']
2078                 has_live_chat_replay = True
2079             except (KeyError, IndexError, TypeError):
2080                 pass
2081
2082         # Check for "rental" videos
2083         if 'ypc_video_rental_bar_text' in video_info and 'author' not in video_info:
2084             raise ExtractorError('"rental" videos not supported. See https://github.com/ytdl-org/youtube-dl/issues/359 for more information.', expected=True)
2085
2086         def _extract_filesize(media_url):
2087             return int_or_none(self._search_regex(
2088                 r'\bclen[=/](\d+)', media_url, 'filesize', default=None))
2089
2090         streaming_formats = try_get(player_response, lambda x: x['streamingData']['formats'], list) or []
2091         streaming_formats.extend(try_get(player_response, lambda x: x['streamingData']['adaptiveFormats'], list) or [])
2092
2093         if 'conn' in video_info and video_info['conn'][0].startswith('rtmp'):
2094             self.report_rtmp_download()
2095             formats = [{
2096                 'format_id': '_rtmp',
2097                 'protocol': 'rtmp',
2098                 'url': video_info['conn'][0],
2099                 'player_url': player_url,
2100             }]
2101         elif not is_live and (streaming_formats or len(video_info.get('url_encoded_fmt_stream_map', [''])[0]) >= 1 or len(video_info.get('adaptive_fmts', [''])[0]) >= 1):
2102             encoded_url_map = video_info.get('url_encoded_fmt_stream_map', [''])[0] + ',' + video_info.get('adaptive_fmts', [''])[0]
2103             if 'rtmpe%3Dyes' in encoded_url_map:
2104                 raise ExtractorError('rtmpe downloads are not supported, see https://github.com/ytdl-org/youtube-dl/issues/343 for more information.', expected=True)
2105             formats = []
2106             formats_spec = {}
2107             fmt_list = video_info.get('fmt_list', [''])[0]
2108             if fmt_list:
2109                 for fmt in fmt_list.split(','):
2110                     spec = fmt.split('/')
2111                     if len(spec) > 1:
2112                         width_height = spec[1].split('x')
2113                         if len(width_height) == 2:
2114                             formats_spec[spec[0]] = {
2115                                 'resolution': spec[1],
2116                                 'width': int_or_none(width_height[0]),
2117                                 'height': int_or_none(width_height[1]),
2118                             }
2119             for fmt in streaming_formats:
2120                 itag = str_or_none(fmt.get('itag'))
2121                 if not itag:
2122                     continue
2123                 quality = fmt.get('quality')
2124                 quality_label = fmt.get('qualityLabel') or quality
2125                 formats_spec[itag] = {
2126                     'asr': int_or_none(fmt.get('audioSampleRate')),
2127                     'filesize': int_or_none(fmt.get('contentLength')),
2128                     'format_note': quality_label,
2129                     'fps': int_or_none(fmt.get('fps')),
2130                     'height': int_or_none(fmt.get('height')),
2131                     # bitrate for itag 43 is always 2147483647
2132                     'tbr': float_or_none(fmt.get('averageBitrate') or fmt.get('bitrate'), 1000) if itag != '43' else None,
2133                     'width': int_or_none(fmt.get('width')),
2134                 }
2135
2136             for fmt in streaming_formats:
2137                 if fmt.get('drmFamilies') or fmt.get('drm_families'):
2138                     continue
2139                 url = url_or_none(fmt.get('url'))
2140
2141                 if not url:
2142                     cipher = fmt.get('cipher') or fmt.get('signatureCipher')
2143                     if not cipher:
2144                         continue
2145                     url_data = compat_parse_qs(cipher)
2146                     url = url_or_none(try_get(url_data, lambda x: x['url'][0], compat_str))
2147                     if not url:
2148                         continue
2149                 else:
2150                     cipher = None
2151                     url_data = compat_parse_qs(compat_urllib_parse_urlparse(url).query)
2152
2153                 stream_type = int_or_none(try_get(url_data, lambda x: x['stream_type'][0]))
2154                 # Unsupported FORMAT_STREAM_TYPE_OTF
2155                 if stream_type == 3:
2156                     continue
2157
2158                 format_id = fmt.get('itag') or url_data['itag'][0]
2159                 if not format_id:
2160                     continue
2161                 format_id = compat_str(format_id)
2162
2163                 if cipher:
2164                     if 's' in url_data or self._downloader.params.get('youtube_include_dash_manifest', True):
2165                         ASSETS_RE = r'(?:"assets":.+?"js":\s*("[^"]+"))|(?:"jsUrl":\s*("[^"]+"))'
2166                         jsplayer_url_json = self._search_regex(
2167                             ASSETS_RE,
2168                             embed_webpage if age_gate else video_webpage,
2169                             'JS player URL (1)', default=None)
2170                         if not jsplayer_url_json and not age_gate:
2171                             # We need the embed website after all
2172                             if embed_webpage is None:
2173                                 embed_url = proto + '://www.youtube.com/embed/%s' % video_id
2174                                 embed_webpage = self._download_webpage(
2175                                     embed_url, video_id, 'Downloading embed webpage')
2176                             jsplayer_url_json = self._search_regex(
2177                                 ASSETS_RE, embed_webpage, 'JS player URL')
2178
2179                         player_url = json.loads(jsplayer_url_json)
2180                         if player_url is None:
2181                             player_url_json = self._search_regex(
2182                                 r'ytplayer\.config.*?"url"\s*:\s*("[^"]+")',
2183                                 video_webpage, 'age gate player URL')
2184                             player_url = json.loads(player_url_json)
2185
2186                     if 'sig' in url_data:
2187                         url += '&signature=' + url_data['sig'][0]
2188                     elif 's' in url_data:
2189                         encrypted_sig = url_data['s'][0]
2190
2191                         if self._downloader.params.get('verbose'):
2192                             if player_url is None:
2193                                 player_desc = 'unknown'
2194                             else:
2195                                 player_type, player_version = self._extract_player_info(player_url)
2196                                 player_desc = '%s player %s' % ('flash' if player_type == 'swf' else 'html5', player_version)
2197                             parts_sizes = self._signature_cache_id(encrypted_sig)
2198                             self.to_screen('{%s} signature length %s, %s' %
2199                                            (format_id, parts_sizes, player_desc))
2200
2201                         signature = self._decrypt_signature(
2202                             encrypted_sig, video_id, player_url, age_gate)
2203                         sp = try_get(url_data, lambda x: x['sp'][0], compat_str) or 'signature'
2204                         url += '&%s=%s' % (sp, signature)
2205                 if 'ratebypass' not in url:
2206                     url += '&ratebypass=yes'
2207
2208                 dct = {
2209                     'format_id': format_id,
2210                     'url': url,
2211                     'player_url': player_url,
2212                 }
2213                 if format_id in self._formats:
2214                     dct.update(self._formats[format_id])
2215                 if format_id in formats_spec:
2216                     dct.update(formats_spec[format_id])
2217
2218                 # Some itags are not included in DASH manifest thus corresponding formats will
2219                 # lack metadata (see https://github.com/ytdl-org/youtube-dl/pull/5993).
2220                 # Trying to extract metadata from url_encoded_fmt_stream_map entry.
2221                 mobj = re.search(r'^(?P<width>\d+)[xX](?P<height>\d+)$', url_data.get('size', [''])[0])
2222                 width, height = (int(mobj.group('width')), int(mobj.group('height'))) if mobj else (None, None)
2223
2224                 if width is None:
2225                     width = int_or_none(fmt.get('width'))
2226                 if height is None:
2227                     height = int_or_none(fmt.get('height'))
2228
2229                 filesize = int_or_none(url_data.get(
2230                     'clen', [None])[0]) or _extract_filesize(url)
2231
2232                 quality = url_data.get('quality', [None])[0] or fmt.get('quality')
2233                 quality_label = url_data.get('quality_label', [None])[0] or fmt.get('qualityLabel')
2234
2235                 tbr = (float_or_none(url_data.get('bitrate', [None])[0], 1000)
2236                        or float_or_none(fmt.get('bitrate'), 1000)) if format_id != '43' else None
2237                 fps = int_or_none(url_data.get('fps', [None])[0]) or int_or_none(fmt.get('fps'))
2238
2239                 more_fields = {
2240                     'filesize': filesize,
2241                     'tbr': tbr,
2242                     'width': width,
2243                     'height': height,
2244                     'fps': fps,
2245                     'format_note': quality_label or quality,
2246                 }
2247                 for key, value in more_fields.items():
2248                     if value:
2249                         dct[key] = value
2250                 type_ = url_data.get('type', [None])[0] or fmt.get('mimeType')
2251                 if type_:
2252                     type_split = type_.split(';')
2253                     kind_ext = type_split[0].split('/')
2254                     if len(kind_ext) == 2:
2255                         kind, _ = kind_ext
2256                         dct['ext'] = mimetype2ext(type_split[0])
2257                         if kind in ('audio', 'video'):
2258                             codecs = None
2259                             for mobj in re.finditer(
2260                                     r'(?P<key>[a-zA-Z_-]+)=(?P<quote>["\']?)(?P<val>.+?)(?P=quote)(?:;|$)', type_):
2261                                 if mobj.group('key') == 'codecs':
2262                                     codecs = mobj.group('val')
2263                                     break
2264                             if codecs:
2265                                 dct.update(parse_codecs(codecs))
2266                 if dct.get('acodec') == 'none' or dct.get('vcodec') == 'none':
2267                     dct['downloader_options'] = {
2268                         # Youtube throttles chunks >~10M
2269                         'http_chunk_size': 10485760,
2270                     }
2271                 formats.append(dct)
2272         else:
2273             manifest_url = (
2274                 url_or_none(try_get(
2275                     player_response,
2276                     lambda x: x['streamingData']['hlsManifestUrl'],
2277                     compat_str))
2278                 or url_or_none(try_get(
2279                     video_info, lambda x: x['hlsvp'][0], compat_str)))
2280             if manifest_url:
2281                 formats = []
2282                 m3u8_formats = self._extract_m3u8_formats(
2283                     manifest_url, video_id, 'mp4', fatal=False)
2284                 for a_format in m3u8_formats:
2285                     itag = self._search_regex(
2286                         r'/itag/(\d+)/', a_format['url'], 'itag', default=None)
2287                     if itag:
2288                         a_format['format_id'] = itag
2289                         if itag in self._formats:
2290                             dct = self._formats[itag].copy()
2291                             dct.update(a_format)
2292                             a_format = dct
2293                     a_format['player_url'] = player_url
2294                     # Accept-Encoding header causes failures in live streams on Youtube and Youtube Gaming
2295                     a_format.setdefault('http_headers', {})['Youtubedl-no-compression'] = 'True'
2296                     if self._downloader.params.get('youtube_include_hls_manifest', True):
2297                         formats.append(a_format)
2298             else:
2299                 error_message = extract_unavailable_message()
2300                 if not error_message:
2301                     error_message = clean_html(try_get(
2302                         player_response, lambda x: x['playabilityStatus']['reason'],
2303                         compat_str))
2304                 if not error_message:
2305                     error_message = clean_html(
2306                         try_get(video_info, lambda x: x['reason'][0], compat_str))
2307                 if error_message:
2308                     raise ExtractorError(error_message, expected=True)
2309                 raise ExtractorError('no conn, hlsvp, hlsManifestUrl or url_encoded_fmt_stream_map information found in video info')
2310
2311         # uploader
2312         video_uploader = try_get(
2313             video_info, lambda x: x['author'][0],
2314             compat_str) or str_or_none(video_details.get('author'))
2315         if video_uploader:
2316             video_uploader = compat_urllib_parse_unquote_plus(video_uploader)
2317         else:
2318             self._downloader.report_warning('unable to extract uploader name')
2319
2320         # uploader_id
2321         video_uploader_id = None
2322         video_uploader_url = None
2323         mobj = re.search(
2324             r'<link itemprop="url" href="(?P<uploader_url>https?://www\.youtube\.com/(?:user|channel)/(?P<uploader_id>[^"]+))">',
2325             video_webpage)
2326         if mobj is not None:
2327             video_uploader_id = mobj.group('uploader_id')
2328             video_uploader_url = mobj.group('uploader_url')
2329         else:
2330             owner_profile_url = url_or_none(microformat.get('ownerProfileUrl'))
2331             if owner_profile_url:
2332                 video_uploader_id = self._search_regex(
2333                     r'(?:user|channel)/([^/]+)', owner_profile_url, 'uploader id',
2334                     default=None)
2335                 video_uploader_url = owner_profile_url
2336
2337         channel_id = (
2338             str_or_none(video_details.get('channelId'))
2339             or self._html_search_meta(
2340                 'channelId', video_webpage, 'channel id', default=None)
2341             or self._search_regex(
2342                 r'data-channel-external-id=(["\'])(?P<id>(?:(?!\1).)+)\1',
2343                 video_webpage, 'channel id', default=None, group='id'))
2344         channel_url = 'http://www.youtube.com/channel/%s' % channel_id if channel_id else None
2345
2346         thumbnails = []
2347         thumbnails_list = try_get(
2348             video_details, lambda x: x['thumbnail']['thumbnails'], list) or []
2349         for t in thumbnails_list:
2350             if not isinstance(t, dict):
2351                 continue
2352             thumbnail_url = url_or_none(t.get('url'))
2353             if not thumbnail_url:
2354                 continue
2355             thumbnails.append({
2356                 'url': thumbnail_url,
2357                 'width': int_or_none(t.get('width')),
2358                 'height': int_or_none(t.get('height')),
2359             })
2360
2361         if not thumbnails:
2362             video_thumbnail = None
2363             # We try first to get a high quality image:
2364             m_thumb = re.search(r'<span itemprop="thumbnail".*?href="(.*?)">',
2365                                 video_webpage, re.DOTALL)
2366             if m_thumb is not None:
2367                 video_thumbnail = m_thumb.group(1)
2368             thumbnail_url = try_get(video_info, lambda x: x['thumbnail_url'][0], compat_str)
2369             if thumbnail_url:
2370                 video_thumbnail = compat_urllib_parse_unquote_plus(thumbnail_url)
2371             if video_thumbnail:
2372                 thumbnails.append({'url': video_thumbnail})
2373
2374         # upload date
2375         upload_date = self._html_search_meta(
2376             'datePublished', video_webpage, 'upload date', default=None)
2377         if not upload_date:
2378             upload_date = self._search_regex(
2379                 [r'(?s)id="eow-date.*?>(.*?)</span>',
2380                  r'(?:id="watch-uploader-info".*?>.*?|["\']simpleText["\']\s*:\s*["\'])(?:Published|Uploaded|Streamed live|Started) on (.+?)[<"\']'],
2381                 video_webpage, 'upload date', default=None)
2382         if not upload_date:
2383             upload_date = microformat.get('publishDate') or microformat.get('uploadDate')
2384         upload_date = unified_strdate(upload_date)
2385
2386         video_license = self._html_search_regex(
2387             r'<h4[^>]+class="title"[^>]*>\s*License\s*</h4>\s*<ul[^>]*>\s*<li>(.+?)</li',
2388             video_webpage, 'license', default=None)
2389
2390         m_music = re.search(
2391             r'''(?x)
2392                 <h4[^>]+class="title"[^>]*>\s*Music\s*</h4>\s*
2393                 <ul[^>]*>\s*
2394                 <li>(?P<title>.+?)
2395                 by (?P<creator>.+?)
2396                 (?:
2397                     \(.+?\)|
2398                     <a[^>]*
2399                         (?:
2400                             \bhref=["\']/red[^>]*>|             # drop possible
2401                             >\s*Listen ad-free with YouTube Red # YouTube Red ad
2402                         )
2403                     .*?
2404                 )?</li
2405             ''',
2406             video_webpage)
2407         if m_music:
2408             video_alt_title = remove_quotes(unescapeHTML(m_music.group('title')))
2409             video_creator = clean_html(m_music.group('creator'))
2410         else:
2411             video_alt_title = video_creator = None
2412
2413         def extract_meta(field):
2414             return self._html_search_regex(
2415                 r'<h4[^>]+class="title"[^>]*>\s*%s\s*</h4>\s*<ul[^>]*>\s*<li>(.+?)</li>\s*' % field,
2416                 video_webpage, field, default=None)
2417
2418         track = extract_meta('Song')
2419         artist = extract_meta('Artist')
2420         album = extract_meta('Album')
2421
2422         # Youtube Music Auto-generated description
2423         release_date = release_year = None
2424         if video_description:
2425             mobj = re.search(r'(?s)Provided to YouTube by [^\n]+\n+(?P<track>[^·]+)·(?P<artist>[^\n]+)\n+(?P<album>[^\n]+)(?:.+?℗\s*(?P<release_year>\d{4})(?!\d))?(?:.+?Released on\s*:\s*(?P<release_date>\d{4}-\d{2}-\d{2}))?(.+?\nArtist\s*:\s*(?P<clean_artist>[^\n]+))?', video_description)
2426             if mobj:
2427                 if not track:
2428                     track = mobj.group('track').strip()
2429                 if not artist:
2430                     artist = mobj.group('clean_artist') or ', '.join(a.strip() for a in mobj.group('artist').split('·'))
2431                 if not album:
2432                     album = mobj.group('album'.strip())
2433                 release_year = mobj.group('release_year')
2434                 release_date = mobj.group('release_date')
2435                 if release_date:
2436                     release_date = release_date.replace('-', '')
2437                     if not release_year:
2438                         release_year = int(release_date[:4])
2439                 if release_year:
2440                     release_year = int(release_year)
2441
2442         yt_initial = self._get_yt_initial_data(video_id, video_webpage)
2443         if yt_initial:
2444             music_metadata = self._get_music_metadata_from_yt_initial(yt_initial)
2445             if len(music_metadata):
2446                 album = music_metadata[0].get('album')
2447                 artist = music_metadata[0].get('artist')
2448                 track = music_metadata[0].get('track')
2449
2450         m_episode = re.search(
2451             r'<div[^>]+id="watch7-headline"[^>]*>\s*<span[^>]*>.*?>(?P<series>[^<]+)</a></b>\s*S(?P<season>\d+)\s*•\s*E(?P<episode>\d+)</span>',
2452             video_webpage)
2453         if m_episode:
2454             series = unescapeHTML(m_episode.group('series'))
2455             season_number = int(m_episode.group('season'))
2456             episode_number = int(m_episode.group('episode'))
2457         else:
2458             series = season_number = episode_number = None
2459
2460         m_cat_container = self._search_regex(
2461             r'(?s)<h4[^>]*>\s*Category\s*</h4>\s*<ul[^>]*>(.*?)</ul>',
2462             video_webpage, 'categories', default=None)
2463         category = None
2464         if m_cat_container:
2465             category = self._html_search_regex(
2466                 r'(?s)<a[^<]+>(.*?)</a>', m_cat_container, 'category',
2467                 default=None)
2468         if not category:
2469             category = try_get(
2470                 microformat, lambda x: x['category'], compat_str)
2471         video_categories = None if category is None else [category]
2472
2473         video_tags = [
2474             unescapeHTML(m.group('content'))
2475             for m in re.finditer(self._meta_regex('og:video:tag'), video_webpage)]
2476         if not video_tags:
2477             video_tags = try_get(video_details, lambda x: x['keywords'], list)
2478
2479         def _extract_count(count_name):
2480             return str_to_int(self._search_regex(
2481                 r'"accessibilityData":\{"label":"([\d,\w]+) %ss"\}'
2482                 % re.escape(count_name),
2483                 video_webpage, count_name, default=None))
2484
2485         like_count = _extract_count('like')
2486         dislike_count = _extract_count('dislike')
2487
2488         if view_count is None:
2489             view_count = str_to_int(self._search_regex(
2490                 r'<[^>]+class=["\']watch-view-count[^>]+>\s*([\d,\s]+)', video_webpage,
2491                 'view count', default=None))
2492
2493         average_rating = (
2494             float_or_none(video_details.get('averageRating'))
2495             or try_get(video_info, lambda x: float_or_none(x['avg_rating'][0])))
2496
2497         # subtitles
2498         video_subtitles = self.extract_subtitles(
2499             video_id, video_webpage, has_live_chat_replay)
2500         automatic_captions = self.extract_automatic_captions(video_id, video_webpage)
2501
2502         video_duration = try_get(
2503             video_info, lambda x: int_or_none(x['length_seconds'][0]))
2504         if not video_duration:
2505             video_duration = int_or_none(video_details.get('lengthSeconds'))
2506         if not video_duration:
2507             video_duration = parse_duration(self._html_search_meta(
2508                 'duration', video_webpage, 'video duration'))
2509
2510         # Get Subscriber Count of channel
2511         subscriber_count = parse_count(self._search_regex(
2512             r'"text":"([\d\.]+\w?) subscribers"',
2513             video_webpage,
2514             'subscriber count',
2515             default=None
2516         ))
2517
2518         # annotations
2519         video_annotations = None
2520         if self._downloader.params.get('writeannotations', False):
2521             xsrf_token = self._search_regex(
2522                 r'([\'"])XSRF_TOKEN\1\s*:\s*([\'"])(?P<xsrf_token>[A-Za-z0-9+/=]+)\2',
2523                 video_webpage, 'xsrf token', group='xsrf_token', fatal=False)
2524             invideo_url = try_get(
2525                 player_response, lambda x: x['annotations'][0]['playerAnnotationsUrlsRenderer']['invideoUrl'], compat_str)
2526             if xsrf_token and invideo_url:
2527                 xsrf_field_name = self._search_regex(
2528                     r'([\'"])XSRF_FIELD_NAME\1\s*:\s*([\'"])(?P<xsrf_field_name>\w+)\2',
2529                     video_webpage, 'xsrf field name',
2530                     group='xsrf_field_name', default='session_token')
2531                 video_annotations = self._download_webpage(
2532                     self._proto_relative_url(invideo_url),
2533                     video_id, note='Downloading annotations',
2534                     errnote='Unable to download video annotations', fatal=False,
2535                     data=urlencode_postdata({xsrf_field_name: xsrf_token}))
2536
2537         chapters = self._extract_chapters(video_webpage, description_original, video_id, video_duration)
2538
2539         # Look for the DASH manifest
2540         if self._downloader.params.get('youtube_include_dash_manifest', True):
2541             dash_mpd_fatal = True
2542             for mpd_url in dash_mpds:
2543                 dash_formats = {}
2544                 try:
2545                     def decrypt_sig(mobj):
2546                         s = mobj.group(1)
2547                         dec_s = self._decrypt_signature(s, video_id, player_url, age_gate)
2548                         return '/signature/%s' % dec_s
2549
2550                     mpd_url = re.sub(r'/s/([a-fA-F0-9\.]+)', decrypt_sig, mpd_url)
2551
2552                     for df in self._extract_mpd_formats(
2553                             mpd_url, video_id, fatal=dash_mpd_fatal,
2554                             formats_dict=self._formats):
2555                         if not df.get('filesize'):
2556                             df['filesize'] = _extract_filesize(df['url'])
2557                         # Do not overwrite DASH format found in some previous DASH manifest
2558                         if df['format_id'] not in dash_formats:
2559                             dash_formats[df['format_id']] = df
2560                         # Additional DASH manifests may end up in HTTP Error 403 therefore
2561                         # allow them to fail without bug report message if we already have
2562                         # some DASH manifest succeeded. This is temporary workaround to reduce
2563                         # burst of bug reports until we figure out the reason and whether it
2564                         # can be fixed at all.
2565                         dash_mpd_fatal = False
2566                 except (ExtractorError, KeyError) as e:
2567                     self.report_warning(
2568                         'Skipping DASH manifest: %r' % e, video_id)
2569                 if dash_formats:
2570                     # Remove the formats we found through non-DASH, they
2571                     # contain less info and it can be wrong, because we use
2572                     # fixed values (for example the resolution). See
2573                     # https://github.com/ytdl-org/youtube-dl/issues/5774 for an
2574                     # example.
2575                     formats = [f for f in formats if f['format_id'] not in dash_formats.keys()]
2576                     formats.extend(dash_formats.values())
2577
2578         # Check for malformed aspect ratio
2579         stretched_m = re.search(
2580             r'<meta\s+property="og:video:tag".*?content="yt:stretch=(?P<w>[0-9]+):(?P<h>[0-9]+)">',
2581             video_webpage)
2582         if stretched_m:
2583             w = float(stretched_m.group('w'))
2584             h = float(stretched_m.group('h'))
2585             # yt:stretch may hold invalid ratio data (e.g. for Q39EVAstoRM ratio is 17:0).
2586             # We will only process correct ratios.
2587             if w > 0 and h > 0:
2588                 ratio = w / h
2589                 for f in formats:
2590                     if f.get('vcodec') != 'none':
2591                         f['stretched_ratio'] = ratio
2592
2593         if not formats:
2594             if 'reason' in video_info:
2595                 if 'The uploader has not made this video available in your country.' in video_info['reason']:
2596                     regions_allowed = self._html_search_meta(
2597                         'regionsAllowed', video_webpage, default=None)
2598                     countries = regions_allowed.split(',') if regions_allowed else None
2599                     self.raise_geo_restricted(
2600                         msg=video_info['reason'][0], countries=countries)
2601                 reason = video_info['reason'][0]
2602                 if 'Invalid parameters' in reason:
2603                     unavailable_message = extract_unavailable_message()
2604                     if unavailable_message:
2605                         reason = unavailable_message
2606                 raise ExtractorError(
2607                     'YouTube said: %s' % reason,
2608                     expected=True, video_id=video_id)
2609             if video_info.get('license_info') or try_get(player_response, lambda x: x['streamingData']['licenseInfos']):
2610                 raise ExtractorError('This video is DRM protected.', expected=True)
2611
2612         self._sort_formats(formats)
2613
2614         self.mark_watched(video_id, video_info, player_response)
2615
2616         return {
2617             'id': video_id,
2618             'uploader': video_uploader,
2619             'uploader_id': video_uploader_id,
2620             'uploader_url': video_uploader_url,
2621             'channel_id': channel_id,
2622             'channel_url': channel_url,
2623             'upload_date': upload_date,
2624             'license': video_license,
2625             'creator': video_creator or artist,
2626             'title': video_title,
2627             'alt_title': video_alt_title or track,
2628             'thumbnails': thumbnails,
2629             'description': video_description,
2630             'categories': video_categories,
2631             'tags': video_tags,
2632             'subtitles': video_subtitles,
2633             'automatic_captions': automatic_captions,
2634             'duration': video_duration,
2635             'age_limit': 18 if age_gate else 0,
2636             'annotations': video_annotations,
2637             'chapters': chapters,
2638             'webpage_url': proto + '://www.youtube.com/watch?v=%s' % video_id,
2639             'view_count': view_count,
2640             'like_count': like_count,
2641             'dislike_count': dislike_count,
2642             'average_rating': average_rating,
2643             'formats': formats,
2644             'is_live': is_live,
2645             'start_time': start_time,
2646             'end_time': end_time,
2647             'series': series,
2648             'season_number': season_number,
2649             'episode_number': episode_number,
2650             'track': track,
2651             'artist': artist,
2652             'album': album,
2653             'release_date': release_date,
2654             'release_year': release_year,
2655             'subscriber_count': subscriber_count,
2656         }
2657
2658
2659 class YoutubePlaylistIE(YoutubePlaylistBaseInfoExtractor):
2660     IE_DESC = 'YouTube.com playlists'
2661     _VALID_URL = r"""(?x)(?:
2662                         (?:https?://)?
2663                         (?:\w+\.)?
2664                         (?:
2665                             (?:
2666                                 youtube(?:kids)?\.com|
2667                                 invidio\.us
2668                             )
2669                             /
2670                             (?:
2671                                (?:course|view_play_list|my_playlists|artist|playlist|watch|embed/(?:videoseries|[0-9A-Za-z_-]{11}))
2672                                \? (?:.*?[&;])*? (?:p|a|list)=
2673                             |  p/
2674                             )|
2675                             youtu\.be/[0-9A-Za-z_-]{11}\?.*?\blist=
2676                         )
2677                         (
2678                             (?:PL|LL|EC|UU|FL|RD|UL|TL|PU|OLAK5uy_)?[0-9A-Za-z-_]{10,}
2679                             # Top tracks, they can also include dots
2680                             |(?:MC)[\w\.]*
2681                         )
2682                         .*
2683                      |
2684                         (%(playlist_id)s)
2685                      )""" % {'playlist_id': YoutubeBaseInfoExtractor._PLAYLIST_ID_RE}
2686     _TEMPLATE_URL = 'https://www.youtube.com/playlist?list=%s'
2687     _VIDEO_RE_TPL = r'href="\s*/watch\?v=%s(?:&amp;(?:[^"]*?index=(?P<index>\d+))?(?:[^>]+>(?P<title>[^<]+))?)?'
2688     _VIDEO_RE = _VIDEO_RE_TPL % r'(?P<id>[0-9A-Za-z_-]{11})'
2689     IE_NAME = 'youtube:playlist'
2690     _YTM_PLAYLIST_PREFIX = 'RDCLAK5uy_'
2691     _YTM_CHANNEL_INFO = {
2692         'uploader': 'Youtube Music',
2693         'uploader_id': 'music',  # or "UC-9-kyTW8ZkZNDHQJ6FgpwQ"
2694         'uploader_url': 'https://www.youtube.com/music'
2695     }
2696     _TESTS = [{
2697         'url': 'https://www.youtube.com/playlist?list=PL4lCao7KL_QFVb7Iudeipvc2BCavECqzc',
2698         'info_dict': {
2699             'uploader_id': 'UCmlqkdCBesrv2Lak1mF_MxA',
2700             'uploader': 'Sergey M.',
2701             'id': 'PL4lCao7KL_QFVb7Iudeipvc2BCavECqzc',
2702             'title': 'youtube-dl public playlist',
2703         },
2704         'playlist_count': 1,
2705     }, {
2706         'url': 'https://www.youtube.com/playlist?list=PL4lCao7KL_QFodcLWhDpGCYnngnHtQ-Xf',
2707         'info_dict': {
2708             'uploader_id': 'UCmlqkdCBesrv2Lak1mF_MxA',
2709             'uploader': 'Sergey M.',
2710             'id': 'PL4lCao7KL_QFodcLWhDpGCYnngnHtQ-Xf',
2711             'title': 'youtube-dl empty playlist',
2712         },
2713         'playlist_count': 0,
2714     }, {
2715         'note': 'Playlist with deleted videos (#651). As a bonus, the video #51 is also twice in this list.',
2716         'url': 'https://www.youtube.com/playlist?list=PLwP_SiAcdui0KVebT0mU9Apz359a4ubsC',
2717         'info_dict': {
2718             'title': '29C3: Not my department',
2719             'id': 'PLwP_SiAcdui0KVebT0mU9Apz359a4ubsC',
2720             'uploader': 'Christiaan008',
2721             'uploader_id': 'ChRiStIaAn008',
2722         },
2723         'playlist_count': 96,
2724     }, {
2725         'note': 'issue #673',
2726         'url': 'PLBB231211A4F62143',
2727         'info_dict': {
2728             'title': '[OLD]Team Fortress 2 (Class-based LP)',
2729             'id': 'PLBB231211A4F62143',
2730             'uploader': 'Wickydoo',
2731             'uploader_id': 'Wickydoo',
2732         },
2733         'playlist_mincount': 26,
2734     }, {
2735         'note': 'Large playlist',
2736         'url': 'https://www.youtube.com/playlist?list=UUBABnxM4Ar9ten8Mdjj1j0Q',
2737         'info_dict': {
2738             'title': 'Uploads from Cauchemar',
2739             'id': 'UUBABnxM4Ar9ten8Mdjj1j0Q',
2740             'uploader': 'Cauchemar',
2741             'uploader_id': 'Cauchemar89',
2742         },
2743         'playlist_mincount': 799,
2744     }, {
2745         'url': 'PLtPgu7CB4gbY9oDN3drwC3cMbJggS7dKl',
2746         'info_dict': {
2747             'title': 'YDL_safe_search',
2748             'id': 'PLtPgu7CB4gbY9oDN3drwC3cMbJggS7dKl',
2749         },
2750         'playlist_count': 2,
2751         'skip': 'This playlist is private',
2752     }, {
2753         'note': 'embedded',
2754         'url': 'https://www.youtube.com/embed/videoseries?list=PL6IaIsEjSbf96XFRuNccS_RuEXwNdsoEu',
2755         'playlist_count': 4,
2756         'info_dict': {
2757             'title': 'JODA15',
2758             'id': 'PL6IaIsEjSbf96XFRuNccS_RuEXwNdsoEu',
2759             'uploader': 'milan',
2760             'uploader_id': 'UCEI1-PVPcYXjB73Hfelbmaw',
2761         }
2762     }, {
2763         'url': 'http://www.youtube.com/embed/_xDOZElKyNU?list=PLsyOSbh5bs16vubvKePAQ1x3PhKavfBIl',
2764         'playlist_mincount': 485,
2765         'info_dict': {
2766             'title': '2018 Chinese New Singles (11/6 updated)',
2767             'id': 'PLsyOSbh5bs16vubvKePAQ1x3PhKavfBIl',
2768             'uploader': 'LBK',
2769             'uploader_id': 'sdragonfang',
2770         }
2771     }, {
2772         'note': 'Embedded SWF player',
2773         'url': 'https://www.youtube.com/p/YN5VISEtHet5D4NEvfTd0zcgFk84NqFZ?hl=en_US&fs=1&rel=0',
2774         'playlist_count': 4,
2775         'info_dict': {
2776             'title': 'JODA7',
2777             'id': 'YN5VISEtHet5D4NEvfTd0zcgFk84NqFZ',
2778         },
2779         'skip': 'This playlist does not exist',
2780     }, {
2781         'note': 'Buggy playlist: the webpage has a "Load more" button but it doesn\'t have more videos',
2782         'url': 'https://www.youtube.com/playlist?list=UUXw-G3eDE9trcvY2sBMM_aA',
2783         'info_dict': {
2784             'title': 'Uploads from Interstellar Movie',
2785             'id': 'UUXw-G3eDE9trcvY2sBMM_aA',
2786             'uploader': 'Interstellar Movie',
2787             'uploader_id': 'InterstellarMovie1',
2788         },
2789         'playlist_mincount': 21,
2790     }, {
2791         # Playlist URL that does not actually serve a playlist
2792         'url': 'https://www.youtube.com/watch?v=FqZTN594JQw&list=PLMYEtVRpaqY00V9W81Cwmzp6N6vZqfUKD4',
2793         'info_dict': {
2794             'id': 'FqZTN594JQw',
2795             'ext': 'webm',
2796             'title': "Smiley's People 01 detective, Adventure Series, Action",
2797             'uploader': 'STREEM',
2798             'uploader_id': 'UCyPhqAZgwYWZfxElWVbVJng',
2799             'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UCyPhqAZgwYWZfxElWVbVJng',
2800             'upload_date': '20150526',
2801             'license': 'Standard YouTube License',
2802             'description': 'md5:507cdcb5a49ac0da37a920ece610be80',
2803             'categories': ['People & Blogs'],
2804             'tags': list,
2805             'view_count': int,
2806             'like_count': int,
2807             'dislike_count': int,
2808         },
2809         'params': {
2810             'skip_download': True,
2811         },
2812         'skip': 'This video is not available.',
2813         'add_ie': [YoutubeIE.ie_key()],
2814     }, {
2815         'url': 'https://youtu.be/yeWKywCrFtk?list=PL2qgrgXsNUG5ig9cat4ohreBjYLAPC0J5',
2816         'info_dict': {
2817             'id': 'yeWKywCrFtk',
2818             'ext': 'mp4',
2819             'title': 'Small Scale Baler and Braiding Rugs',
2820             'uploader': 'Backus-Page House Museum',
2821             'uploader_id': 'backuspagemuseum',
2822             'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/backuspagemuseum',
2823             'upload_date': '20161008',
2824             'description': 'md5:800c0c78d5eb128500bffd4f0b4f2e8a',
2825             'categories': ['Nonprofits & Activism'],
2826             'tags': list,
2827             'like_count': int,
2828             'dislike_count': int,
2829         },
2830         'params': {
2831             'noplaylist': True,
2832             'skip_download': True,
2833         },
2834     }, {
2835         # https://github.com/ytdl-org/youtube-dl/issues/21844
2836         'url': 'https://www.youtube.com/playlist?list=PLzH6n4zXuckpfMu_4Ff8E7Z1behQks5ba',
2837         'info_dict': {
2838             'title': 'Data Analysis with Dr Mike Pound',
2839             'id': 'PLzH6n4zXuckpfMu_4Ff8E7Z1behQks5ba',
2840             'uploader_id': 'Computerphile',
2841             'uploader': 'Computerphile',
2842         },
2843         'playlist_mincount': 11,
2844     }, {
2845         'url': 'https://youtu.be/uWyaPkt-VOI?list=PL9D9FC436B881BA21',
2846         'only_matching': True,
2847     }, {
2848         'url': 'TLGGrESM50VT6acwMjAyMjAxNw',
2849         'only_matching': True,
2850     }, {
2851         # music album playlist
2852         'url': 'OLAK5uy_m4xAFdmMC5rX3Ji3g93pQe3hqLZw_9LhM',
2853         'only_matching': True,
2854     }, {
2855         'url': 'https://invidio.us/playlist?list=PLDIoUOhQQPlXr63I_vwF9GD8sAKh77dWU',
2856         'only_matching': True,
2857     }, {
2858         'url': 'https://www.youtubekids.com/watch?v=Agk7R8I8o5U&list=PUZ6jURNr1WQZCNHF0ao-c0g',
2859         'only_matching': True,
2860     }]
2861
2862     def _real_initialize(self):
2863         self._login()
2864
2865     def extract_videos_from_page(self, page):
2866         ids_in_page = []
2867         titles_in_page = []
2868
2869         for item in re.findall(
2870                 r'(<[^>]*\bdata-video-id\s*=\s*["\'][0-9A-Za-z_-]{11}[^>]+>)', page):
2871             attrs = extract_attributes(item)
2872             video_id = attrs['data-video-id']
2873             video_title = unescapeHTML(attrs.get('data-title'))
2874             if video_title:
2875                 video_title = video_title.strip()
2876             ids_in_page.append(video_id)
2877             titles_in_page.append(video_title)
2878
2879         # Fallback with old _VIDEO_RE
2880         self.extract_videos_from_page_impl(
2881             self._VIDEO_RE, page, ids_in_page, titles_in_page)
2882
2883         # Relaxed fallbacks
2884         self.extract_videos_from_page_impl(
2885             r'href="\s*/watch\?v\s*=\s*(?P<id>[0-9A-Za-z_-]{11})', page,
2886             ids_in_page, titles_in_page)
2887         self.extract_videos_from_page_impl(
2888             r'data-video-ids\s*=\s*["\'](?P<id>[0-9A-Za-z_-]{11})', page,
2889             ids_in_page, titles_in_page)
2890
2891         return zip(ids_in_page, titles_in_page)
2892
2893     def _extract_mix_ids_from_yt_initial(self, yt_initial):
2894         ids = []
2895         playlist_contents = try_get(yt_initial, lambda x: x['contents']['twoColumnWatchNextResults']['playlist']['playlist']['contents'], list)
2896         if playlist_contents:
2897             for item in playlist_contents:
2898                 videoId = try_get(item, lambda x: x['playlistPanelVideoRenderer']['videoId'], compat_str)
2899                 if videoId:
2900                     ids.append(videoId)
2901         return ids
2902
2903     def _extract_mix(self, playlist_id):
2904         # The mixes are generated from a single video
2905         # the id of the playlist is just 'RD' + video_id
2906         ids = []
2907         yt_initial = None
2908         last_id = playlist_id[-11:]
2909         for n in itertools.count(1):
2910             url = 'https://www.youtube.com/watch?v=%s&list=%s' % (last_id, playlist_id)
2911             webpage = self._download_webpage(
2912                 url, playlist_id, 'Downloading page {0} of Youtube mix'.format(n))
2913             new_ids = orderedSet(re.findall(
2914                 r'''(?xs)data-video-username=".*?".*?
2915                            href="/watch\?v=([0-9A-Za-z_-]{11})&amp;[^"]*?list=%s''' % re.escape(playlist_id),
2916                 webpage))
2917
2918             # if no ids in html of page, try using embedded json
2919             if (len(new_ids) == 0):
2920                 yt_initial = self._get_yt_initial_data(playlist_id, webpage)
2921                 if yt_initial:
2922                     new_ids = self._extract_mix_ids_from_yt_initial(yt_initial)
2923
2924             # Fetch new pages until all the videos are repeated, it seems that
2925             # there are always 51 unique videos.
2926             new_ids = [_id for _id in new_ids if _id not in ids]
2927             if not new_ids:
2928                 break
2929             ids.extend(new_ids)
2930             last_id = ids[-1]
2931
2932         url_results = self._ids_to_results(ids)
2933
2934         search_title = lambda class_name: get_element_by_attribute('class', class_name, webpage)
2935         title_span = (
2936             search_title('playlist-title')
2937             or search_title('title long-title')
2938             or search_title('title'))
2939         title = clean_html(title_span)
2940
2941         if not title:
2942             title = try_get(yt_initial, lambda x: x['contents']['twoColumnWatchNextResults']['playlist']['playlist']['title'], compat_str)
2943
2944         return self.playlist_result(url_results, playlist_id, title)
2945
2946     def _extract_playlist(self, playlist_id):
2947         url = self._TEMPLATE_URL % playlist_id
2948         page = self._download_webpage(url, playlist_id)
2949
2950         # the yt-alert-message now has tabindex attribute (see https://github.com/ytdl-org/youtube-dl/issues/11604)
2951         for match in re.findall(r'<div class="yt-alert-message"[^>]*>([^<]+)</div>', page):
2952             match = match.strip()
2953             # Check if the playlist exists or is private
2954             mobj = re.match(r'[^<]*(?:The|This) playlist (?P<reason>does not exist|is private)[^<]*', match)
2955             if mobj:
2956                 reason = mobj.group('reason')
2957                 message = 'This playlist %s' % reason
2958                 if 'private' in reason:
2959                     message += ', use --username or --netrc to access it'
2960                 message += '.'
2961                 raise ExtractorError(message, expected=True)
2962             elif re.match(r'[^<]*Invalid parameters[^<]*', match):
2963                 raise ExtractorError(
2964                     'Invalid parameters. Maybe URL is incorrect.',
2965                     expected=True)
2966             elif re.match(r'[^<]*Choose your language[^<]*', match):
2967                 continue
2968             else:
2969                 self.report_warning('Youtube gives an alert message: ' + match)
2970
2971         playlist_title = self._html_search_regex(
2972             r'(?s)<h1 class="pl-header-title[^"]*"[^>]*>\s*(.*?)\s*</h1>',
2973             page, 'title', default=None)
2974
2975         _UPLOADER_BASE = r'class=["\']pl-header-details[^>]+>\s*<li>\s*<a[^>]+\bhref='
2976         uploader = self._html_search_regex(
2977             r'%s["\']/(?:user|channel)/[^>]+>([^<]+)' % _UPLOADER_BASE,
2978             page, 'uploader', default=None)
2979         mobj = re.search(
2980             r'%s(["\'])(?P<path>/(?:user|channel)/(?P<uploader_id>.+?))\1' % _UPLOADER_BASE,
2981             page)
2982         if mobj:
2983             uploader_id = mobj.group('uploader_id')
2984             uploader_url = compat_urlparse.urljoin(url, mobj.group('path'))
2985         else:
2986             uploader_id = uploader_url = None
2987
2988         has_videos = True
2989
2990         if not playlist_title:
2991             try:
2992                 # Some playlist URLs don't actually serve a playlist (e.g.
2993                 # https://www.youtube.com/watch?v=FqZTN594JQw&list=PLMYEtVRpaqY00V9W81Cwmzp6N6vZqfUKD4)
2994                 next(self._entries(page, playlist_id))
2995             except StopIteration:
2996                 has_videos = False
2997
2998         playlist = self.playlist_result(
2999             self._entries(page, playlist_id), playlist_id, playlist_title)
3000         playlist.update({
3001             'uploader': uploader,
3002             'uploader_id': uploader_id,
3003             'uploader_url': uploader_url,
3004         })
3005         if playlist_id.startswith(self._YTM_PLAYLIST_PREFIX):
3006             playlist.update(self._YTM_CHANNEL_INFO)
3007
3008         return has_videos, playlist
3009
3010     def _check_download_just_video(self, url, playlist_id):
3011         # Check if it's a video-specific URL
3012         query_dict = compat_urlparse.parse_qs(compat_urlparse.urlparse(url).query)
3013         video_id = query_dict.get('v', [None])[0] or self._search_regex(
3014             r'(?:(?:^|//)youtu\.be/|youtube\.com/embed/(?!videoseries))([0-9A-Za-z_-]{11})', url,
3015             'video id', default=None)
3016         if video_id:
3017             if self._downloader.params.get('noplaylist'):
3018                 self.to_screen('Downloading just video %s because of --no-playlist' % video_id)
3019                 return video_id, self.url_result(video_id, 'Youtube', video_id=video_id)
3020             else:
3021                 self.to_screen('Downloading playlist %s - add --no-playlist to just download video %s' % (playlist_id, video_id))
3022                 return video_id, None
3023         return None, None
3024
3025     def _real_extract(self, url):
3026         # Extract playlist id
3027         mobj = re.match(self._VALID_URL, url)
3028         if mobj is None:
3029             raise ExtractorError('Invalid URL: %s' % url)
3030         playlist_id = mobj.group(1) or mobj.group(2)
3031
3032         video_id, video = self._check_download_just_video(url, playlist_id)
3033         if video:
3034             return video
3035
3036         if playlist_id.startswith(('RD', 'UL', 'PU')):
3037             if not playlist_id.startswith(self._YTM_PLAYLIST_PREFIX):
3038                 # Mixes require a custom extraction process,
3039                 # Youtube Music playlists act like normal playlists (with randomized order)
3040                 return self._extract_mix(playlist_id)
3041
3042         has_videos, playlist = self._extract_playlist(playlist_id)
3043         if has_videos or not video_id:
3044             return playlist
3045
3046         # Some playlist URLs don't actually serve a playlist (see
3047         # https://github.com/ytdl-org/youtube-dl/issues/10537).
3048         # Fallback to plain video extraction if there is a video id
3049         # along with playlist id.
3050         return self.url_result(video_id, 'Youtube', video_id=video_id)
3051
3052
3053 class YoutubeChannelIE(YoutubePlaylistBaseInfoExtractor):
3054     IE_DESC = 'YouTube.com channels'
3055     _VALID_URL = r'https?://(?:youtu\.be|(?:\w+\.)?youtube(?:-nocookie|kids)?\.com|(?:www\.)?invidio\.us)/channel/(?P<id>[0-9A-Za-z_-]+)'
3056     _TEMPLATE_URL = 'https://www.youtube.com/channel/%s/videos'
3057     _VIDEO_RE = r'(?:title="(?P<title>[^"]+)"[^>]+)?href="/watch\?v=(?P<id>[0-9A-Za-z_-]+)&?'
3058     IE_NAME = 'youtube:channel'
3059     _TESTS = [{
3060         'note': 'paginated channel',
3061         'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w',
3062         'playlist_mincount': 91,
3063         'info_dict': {
3064             'id': 'UUKfVa3S1e4PHvxWcwyMMg8w',
3065             'title': 'Uploads from lex will',
3066             'uploader': 'lex will',
3067             'uploader_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
3068         }
3069     }, {
3070         'note': 'Age restricted channel',
3071         # from https://www.youtube.com/user/DeusExOfficial
3072         'url': 'https://www.youtube.com/channel/UCs0ifCMCm1icqRbqhUINa0w',
3073         'playlist_mincount': 64,
3074         'info_dict': {
3075             'id': 'UUs0ifCMCm1icqRbqhUINa0w',
3076             'title': 'Uploads from Deus Ex',
3077             'uploader': 'Deus Ex',
3078             'uploader_id': 'DeusExOfficial',
3079         },
3080     }, {
3081         'url': 'https://invidio.us/channel/UC23qupoDRn9YOAVzeoxjOQA',
3082         'only_matching': True,
3083     }, {
3084         'url': 'https://www.youtubekids.com/channel/UCyu8StPfZWapR6rfW_JgqcA',
3085         'only_matching': True,
3086     }]
3087
3088     @classmethod
3089     def suitable(cls, url):
3090         return (False if YoutubePlaylistsIE.suitable(url) or YoutubeLiveIE.suitable(url)
3091                 else super(YoutubeChannelIE, cls).suitable(url))
3092
3093     def _build_template_url(self, url, channel_id):
3094         return self._TEMPLATE_URL % channel_id
3095
3096     def _real_extract(self, url):
3097         channel_id = self._match_id(url)
3098
3099         url = self._build_template_url(url, channel_id)
3100
3101         # Channel by page listing is restricted to 35 pages of 30 items, i.e. 1050 videos total (see #5778)
3102         # Workaround by extracting as a playlist if managed to obtain channel playlist URL
3103         # otherwise fallback on channel by page extraction
3104         channel_page = self._download_webpage(
3105             url + '?view=57', channel_id,
3106             'Downloading channel page', fatal=False)
3107         if channel_page is False:
3108             channel_playlist_id = False
3109         else:
3110             channel_playlist_id = self._html_search_meta(
3111                 'channelId', channel_page, 'channel id', default=None)
3112             if not channel_playlist_id:
3113                 channel_url = self._html_search_meta(
3114                     ('al:ios:url', 'twitter:app:url:iphone', 'twitter:app:url:ipad'),
3115                     channel_page, 'channel url', default=None)
3116                 if channel_url:
3117                     channel_playlist_id = self._search_regex(
3118                         r'vnd\.youtube://user/([0-9A-Za-z_-]+)',
3119                         channel_url, 'channel id', default=None)
3120         if channel_playlist_id and channel_playlist_id.startswith('UC'):
3121             playlist_id = 'UU' + channel_playlist_id[2:]
3122             return self.url_result(
3123                 compat_urlparse.urljoin(url, '/playlist?list=%s' % playlist_id), 'YoutubePlaylist')
3124
3125         channel_page = self._download_webpage(url, channel_id, 'Downloading page #1')
3126         autogenerated = re.search(r'''(?x)
3127                 class="[^"]*?(?:
3128                     channel-header-autogenerated-label|
3129                     yt-channel-title-autogenerated
3130                 )[^"]*"''', channel_page) is not None
3131
3132         if autogenerated:
3133             # The videos are contained in a single page
3134             # the ajax pages can't be used, they are empty
3135             entries = [
3136                 self.url_result(
3137                     video_id, 'Youtube', video_id=video_id,
3138                     video_title=video_title)
3139                 for video_id, video_title in self.extract_videos_from_page(channel_page)]
3140             return self.playlist_result(entries, channel_id)
3141
3142         try:
3143             next(self._entries(channel_page, channel_id))
3144         except StopIteration:
3145             alert_message = self._html_search_regex(
3146                 r'(?s)<div[^>]+class=(["\']).*?\byt-alert-message\b.*?\1[^>]*>(?P<alert>[^<]+)</div>',
3147                 channel_page, 'alert', default=None, group='alert')
3148             if alert_message:
3149                 raise ExtractorError('Youtube said: %s' % alert_message, expected=True)
3150
3151         return self.playlist_result(self._entries(channel_page, channel_id), channel_id)
3152
3153
3154 class YoutubeUserIE(YoutubeChannelIE):
3155     IE_DESC = 'YouTube.com user videos (URL or "ytuser" keyword)'
3156     _VALID_URL = r'(?:(?:https?://(?:\w+\.)?youtube\.com/(?:(?P<user>user|c)/)?(?!(?:attribution_link|watch|results|shared)(?:$|[^a-z_A-Z0-9%-])))|ytuser:)(?!feed/)(?P<id>[A-Za-z0-9_%-]+)'
3157     _TEMPLATE_URL = 'https://www.youtube.com/%s/%s/videos'
3158     IE_NAME = 'youtube:user'
3159
3160     _TESTS = [{
3161         'url': 'https://www.youtube.com/user/TheLinuxFoundation',
3162         'playlist_mincount': 320,
3163         'info_dict': {
3164             'id': 'UUfX55Sx5hEFjoC3cNs6mCUQ',
3165             'title': 'Uploads from The Linux Foundation',
3166             'uploader': 'The Linux Foundation',
3167             'uploader_id': 'TheLinuxFoundation',
3168         }
3169     }, {
3170         # Only available via https://www.youtube.com/c/12minuteathlete/videos
3171         # but not https://www.youtube.com/user/12minuteathlete/videos
3172         'url': 'https://www.youtube.com/c/12minuteathlete/videos',
3173         'playlist_mincount': 249,
3174         'info_dict': {
3175             'id': 'UUVjM-zV6_opMDx7WYxnjZiQ',
3176             'title': 'Uploads from 12 Minute Athlete',
3177             'uploader': '12 Minute Athlete',
3178             'uploader_id': 'the12minuteathlete',
3179         }
3180     }, {
3181         'url': 'ytuser:phihag',
3182         'only_matching': True,
3183     }, {
3184         'url': 'https://www.youtube.com/c/gametrailers',
3185         'only_matching': True,
3186     }, {
3187         'url': 'https://www.youtube.com/c/Pawe%C5%82Zadro%C5%BCniak',
3188         'only_matching': True,
3189     }, {
3190         'url': 'https://www.youtube.com/gametrailers',
3191         'only_matching': True,
3192     }, {
3193         # This channel is not available, geo restricted to JP
3194         'url': 'https://www.youtube.com/user/kananishinoSMEJ/videos',
3195         'only_matching': True,
3196     }]
3197
3198     @classmethod
3199     def suitable(cls, url):
3200         # Don't return True if the url can be extracted with other youtube
3201         # extractor, the regex would is too permissive and it would match.
3202         other_yt_ies = iter(klass for (name, klass) in globals().items() if name.startswith('Youtube') and name.endswith('IE') and klass is not cls)
3203         if any(ie.suitable(url) for ie in other_yt_ies):
3204             return False
3205         else:
3206             return super(YoutubeUserIE, cls).suitable(url)
3207
3208     def _build_template_url(self, url, channel_id):
3209         mobj = re.match(self._VALID_URL, url)
3210         return self._TEMPLATE_URL % (mobj.group('user') or 'user', mobj.group('id'))
3211
3212
3213 class YoutubeLiveIE(YoutubeBaseInfoExtractor):
3214     IE_DESC = 'YouTube.com live streams'
3215     _VALID_URL = r'(?P<base_url>https?://(?:\w+\.)?youtube\.com/(?:(?:user|channel|c)/)?(?P<id>[^/]+))/live'
3216     IE_NAME = 'youtube:live'
3217
3218     _TESTS = [{
3219         'url': 'https://www.youtube.com/user/TheYoungTurks/live',
3220         'info_dict': {
3221             'id': 'a48o2S1cPoo',
3222             'ext': 'mp4',
3223             'title': 'The Young Turks - Live Main Show',
3224             'uploader': 'The Young Turks',
3225             'uploader_id': 'TheYoungTurks',
3226             'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/TheYoungTurks',
3227             'upload_date': '20150715',
3228             'license': 'Standard YouTube License',
3229             'description': 'md5:438179573adcdff3c97ebb1ee632b891',
3230             'categories': ['News & Politics'],
3231             'tags': ['Cenk Uygur (TV Program Creator)', 'The Young Turks (Award-Winning Work)', 'Talk Show (TV Genre)'],
3232             'like_count': int,
3233             'dislike_count': int,
3234         },
3235         'params': {
3236             'skip_download': True,
3237         },
3238     }, {
3239         'url': 'https://www.youtube.com/channel/UC1yBKRuGpC1tSM73A0ZjYjQ/live',
3240         'only_matching': True,
3241     }, {
3242         'url': 'https://www.youtube.com/c/CommanderVideoHq/live',
3243         'only_matching': True,
3244     }, {
3245         'url': 'https://www.youtube.com/TheYoungTurks/live',
3246         'only_matching': True,
3247     }]
3248
3249     def _real_extract(self, url):
3250         mobj = re.match(self._VALID_URL, url)
3251         channel_id = mobj.group('id')
3252         base_url = mobj.group('base_url')
3253         webpage = self._download_webpage(url, channel_id, fatal=False)
3254         if webpage:
3255             page_type = self._og_search_property(
3256                 'type', webpage, 'page type', default='')
3257             video_id = self._html_search_meta(
3258                 'videoId', webpage, 'video id', default=None)
3259             if page_type.startswith('video') and video_id and re.match(
3260                     r'^[0-9A-Za-z_-]{11}$', video_id):
3261                 return self.url_result(video_id, YoutubeIE.ie_key())
3262         return self.url_result(base_url)
3263
3264
3265 class YoutubePlaylistsIE(YoutubePlaylistsBaseInfoExtractor):
3266     IE_DESC = 'YouTube.com user/channel playlists'
3267     _VALID_URL = r'https?://(?:\w+\.)?youtube\.com/(?:user|channel|c)/(?P<id>[^/]+)/playlists'
3268     IE_NAME = 'youtube:playlists'
3269
3270     _TESTS = [{
3271         'url': 'https://www.youtube.com/user/ThirstForScience/playlists',
3272         'playlist_mincount': 4,
3273         'info_dict': {
3274             'id': 'ThirstForScience',
3275             'title': 'ThirstForScience',
3276         },
3277     }, {
3278         # with "Load more" button
3279         'url': 'https://www.youtube.com/user/igorkle1/playlists?view=1&sort=dd',
3280         'playlist_mincount': 70,
3281         'info_dict': {
3282             'id': 'igorkle1',
3283             'title': 'Игорь Клейнер',
3284         },
3285     }, {
3286         'url': 'https://www.youtube.com/channel/UCiU1dHvZObB2iP6xkJ__Icw/playlists',
3287         'playlist_mincount': 17,
3288         'info_dict': {
3289             'id': 'UCiU1dHvZObB2iP6xkJ__Icw',
3290             'title': 'Chem Player',
3291         },
3292         'skip': 'Blocked',
3293     }, {
3294         'url': 'https://www.youtube.com/c/ChristophLaimer/playlists',
3295         'only_matching': True,
3296     }]
3297
3298
3299 class YoutubeSearchIE(SearchInfoExtractor, YoutubePlaylistBaseInfoExtractor):
3300     IE_DESC = 'YouTube.com searches'
3301     # there doesn't appear to be a real limit, for example if you search for
3302     # 'python' you get more than 8.000.000 results
3303     _MAX_RESULTS = float('inf')
3304     IE_NAME = 'youtube:search'
3305     _SEARCH_KEY = 'ytsearch'
3306     _SEARCH_PARAMS = None
3307     _TESTS = []
3308
3309     def _entries(self, query, n):
3310         data = {
3311             'context': {
3312                 'client': {
3313                     'clientName': 'WEB',
3314                     'clientVersion': '2.20201021.03.00',
3315                 }
3316             },
3317             'query': query,
3318         }
3319         if self._SEARCH_PARAMS:
3320             data['params'] = self._SEARCH_PARAMS
3321         total = 0
3322         for page_num in itertools.count(1):
3323             search = self._download_json(
3324                 'https://www.youtube.com/youtubei/v1/search?key=AIzaSyAO_FJ2SlqU8Q4STEHLGCilw_Y9_11qcW8',
3325                 video_id='query "%s"' % query,
3326                 note='Downloading page %s' % page_num,
3327                 errnote='Unable to download API page', fatal=False,
3328                 data=json.dumps(data).encode('utf8'),
3329                 headers={'content-type': 'application/json'})
3330             if not search:
3331                 break
3332             slr_contents = try_get(
3333                 search,
3334                 (lambda x: x['contents']['twoColumnSearchResultsRenderer']['primaryContents']['sectionListRenderer']['contents'],
3335                  lambda x: x['onResponseReceivedCommands'][0]['appendContinuationItemsAction']['continuationItems']),
3336                 list)
3337             if not slr_contents:
3338                 break
3339             isr_contents = try_get(
3340                 slr_contents,
3341                 lambda x: x[0]['itemSectionRenderer']['contents'],
3342                 list)
3343             if not isr_contents:
3344                 break
3345             for content in isr_contents:
3346                 if not isinstance(content, dict):
3347                     continue
3348                 video = content.get('videoRenderer')
3349                 if not isinstance(video, dict):
3350                     continue
3351                 video_id = video.get('videoId')
3352                 if not video_id:
3353                     continue
3354                 title = try_get(video, lambda x: x['title']['runs'][0]['text'], compat_str)
3355                 description = try_get(video, lambda x: x['descriptionSnippet']['runs'][0]['text'], compat_str)
3356                 duration = parse_duration(try_get(video, lambda x: x['lengthText']['simpleText'], compat_str))
3357                 view_count_text = try_get(video, lambda x: x['viewCountText']['simpleText'], compat_str) or ''
3358                 view_count = int_or_none(self._search_regex(
3359                     r'^(\d+)', re.sub(r'\s', '', view_count_text),
3360                     'view count', default=None))
3361                 uploader = try_get(video, lambda x: x['ownerText']['runs'][0]['text'], compat_str)
3362                 total += 1
3363                 yield {
3364                     '_type': 'url_transparent',
3365                     'ie_key': YoutubeIE.ie_key(),
3366                     'id': video_id,
3367                     'url': video_id,
3368                     'title': title,
3369                     'description': description,
3370                     'duration': duration,
3371                     'view_count': view_count,
3372                     'uploader': uploader,
3373                 }
3374                 if total == n:
3375                     return
3376             token = try_get(
3377                 slr_contents,
3378                 lambda x: x[1]['continuationItemRenderer']['continuationEndpoint']['continuationCommand']['token'],
3379                 compat_str)
3380             if not token:
3381                 break
3382             data['continuation'] = token
3383
3384     def _get_n_results(self, query, n):
3385         """Get a specified number of results for a query"""
3386         return self.playlist_result(self._entries(query, n), query)
3387
3388
3389 class YoutubeSearchDateIE(YoutubeSearchIE):
3390     IE_NAME = YoutubeSearchIE.IE_NAME + ':date'
3391     _SEARCH_KEY = 'ytsearchdate'
3392     IE_DESC = 'YouTube.com searches, newest videos first'
3393     _SEARCH_PARAMS = 'CAI%3D'
3394
3395
3396 class YoutubeSearchURLIE(YoutubePlaylistBaseInfoExtractor):
3397     IE_DESC = 'YouTube.com search URLs'
3398     IE_NAME = 'youtube:search_url'
3399     _VALID_URL = r'https?://(?:www\.)?youtube\.com/results\?(.*?&)?(?:search_query|q)=(?P<query>[^&]+)(?:[&]|$)'
3400     _TESTS = [{
3401         'url': 'https://www.youtube.com/results?baz=bar&search_query=youtube-dl+test+video&filters=video&lclk=video',
3402         'playlist_mincount': 5,
3403         'info_dict': {
3404             'title': 'youtube-dl test video',
3405         }
3406     }, {
3407         'url': 'https://www.youtube.com/results?q=test&sp=EgQIBBgB',
3408         'only_matching': True,
3409     }]
3410
3411     def _process_json_dict(self, obj, videos, c):
3412         if "videoId" in obj:
3413             videos.append(obj)
3414             return
3415
3416         if "nextContinuationData" in obj:
3417             c["continuation"] = obj["nextContinuationData"]
3418             return
3419
3420     def _real_extract(self, url):
3421         mobj = re.match(self._VALID_URL, url)
3422         query = compat_urllib_parse_unquote_plus(mobj.group('query'))
3423         webpage = self._download_webpage(url, query)
3424         return self.playlist_result(self._entries(webpage, query, max_pages=5), playlist_title=query)
3425
3426
3427 class YoutubeShowIE(YoutubePlaylistsBaseInfoExtractor):
3428     IE_DESC = 'YouTube.com (multi-season) shows'
3429     _VALID_URL = r'https?://(?:www\.)?youtube\.com/show/(?P<id>[^?#]*)'
3430     IE_NAME = 'youtube:show'
3431     _TESTS = [{
3432         'url': 'https://www.youtube.com/show/airdisasters',
3433         'playlist_mincount': 5,
3434         'info_dict': {
3435             'id': 'airdisasters',
3436             'title': 'Air Disasters',
3437         }
3438     }]
3439
3440     def _real_extract(self, url):
3441         playlist_id = self._match_id(url)
3442         return super(YoutubeShowIE, self)._real_extract(
3443             'https://www.youtube.com/show/%s/playlists' % playlist_id)
3444
3445
3446 class YoutubeFeedsInfoExtractor(YoutubePlaylistBaseInfoExtractor):
3447     """
3448     Base class for feed extractors
3449     Subclasses must define the _FEED_NAME and _PLAYLIST_TITLE properties.
3450     """
3451     _LOGIN_REQUIRED = True
3452
3453     @property
3454     def IE_NAME(self):
3455         return 'youtube:%s' % self._FEED_NAME
3456
3457     def _real_initialize(self):
3458         self._login()
3459
3460     def _process_entries(self, entries, seen):
3461         new_info = []
3462         for v in entries:
3463             v_id = try_get(v, lambda x: x['videoId'])
3464             if not v_id:
3465                 continue
3466
3467             have_video = False
3468             for old in seen:
3469                 if old['videoId'] == v_id:
3470                     have_video = True
3471                     break
3472
3473             if not have_video:
3474                 new_info.append(v)
3475
3476         if not new_info:
3477             return
3478
3479         seen.extend(new_info)
3480         for video in new_info:
3481             yield self.url_result(try_get(video, lambda x: x['videoId']), YoutubeIE.ie_key(), video_title=self._extract_title(video))
3482
3483     def _real_extract(self, url):
3484         page = self._download_webpage(
3485             'https://www.youtube.com/feed/%s' % self._FEED_NAME,
3486             self._PLAYLIST_TITLE)
3487         return self.playlist_result(self._entries(page, self._PLAYLIST_TITLE),
3488                                     playlist_title=self._PLAYLIST_TITLE)
3489
3490
3491 class YoutubeWatchLaterIE(YoutubePlaylistIE):
3492     IE_NAME = 'youtube:watchlater'
3493     IE_DESC = 'Youtube watch later list, ":ytwatchlater" for short (requires authentication)'
3494     _VALID_URL = r'https?://(?:www\.)?youtube\.com/(?:feed/watch_later|(?:playlist|watch)\?(?:.+&)?list=WL)|:ytwatchlater'
3495
3496     _TESTS = [{
3497         'url': 'https://www.youtube.com/playlist?list=WL',
3498         'only_matching': True,
3499     }, {
3500         'url': 'https://www.youtube.com/watch?v=bCNU9TrbiRk&index=1&list=WL',
3501         'only_matching': True,
3502     }]
3503
3504     def _real_extract(self, url):
3505         _, video = self._check_download_just_video(url, 'WL')
3506         if video:
3507             return video
3508         _, playlist = self._extract_playlist('WL')
3509         return playlist
3510
3511
3512 class YoutubeFavouritesIE(YoutubeBaseInfoExtractor):
3513     IE_NAME = 'youtube:favorites'
3514     IE_DESC = 'YouTube.com favourite videos, ":ytfav" for short (requires authentication)'
3515     _VALID_URL = r'https?://(?:www\.)?youtube\.com/my_favorites|:ytfav(?:ou?rites)?'
3516     _LOGIN_REQUIRED = True
3517
3518     def _real_extract(self, url):
3519         webpage = self._download_webpage('https://www.youtube.com/my_favorites', 'Youtube Favourites videos')
3520         playlist_id = self._search_regex(r'list=(.+?)["&]', webpage, 'favourites playlist id')
3521         return self.url_result(playlist_id, 'YoutubePlaylist')
3522
3523
3524 class YoutubeRecommendedIE(YoutubeFeedsInfoExtractor):
3525     IE_DESC = 'YouTube.com recommended videos, ":ytrec" for short (requires authentication)'
3526     _VALID_URL = r'https?://(?:www\.)?youtube\.com/feed/recommended|:ytrec(?:ommended)?'
3527     _FEED_NAME = 'recommended'
3528     _PLAYLIST_TITLE = 'Youtube Recommended videos'
3529
3530
3531 class YoutubeSubscriptionsIE(YoutubeFeedsInfoExtractor):
3532     IE_DESC = 'YouTube.com subscriptions feed, "ytsubs" keyword (requires authentication)'
3533     _VALID_URL = r'https?://(?:www\.)?youtube\.com/feed/subscriptions|:ytsubs(?:criptions)?'
3534     _FEED_NAME = 'subscriptions'
3535     _PLAYLIST_TITLE = 'Youtube Subscriptions'
3536
3537
3538 class YoutubeHistoryIE(YoutubeFeedsInfoExtractor):
3539     IE_DESC = 'Youtube watch history, ":ythistory" for short (requires authentication)'
3540     _VALID_URL = r'https?://(?:www\.)?youtube\.com/feed/history|:ythistory'
3541     _FEED_NAME = 'history'
3542     _PLAYLIST_TITLE = 'Youtube History'
3543
3544
3545 class YoutubeTruncatedURLIE(InfoExtractor):
3546     IE_NAME = 'youtube:truncated_url'
3547     IE_DESC = False  # Do not list
3548     _VALID_URL = r'''(?x)
3549         (?:https?://)?
3550         (?:\w+\.)?[yY][oO][uU][tT][uU][bB][eE](?:-nocookie)?\.com/
3551         (?:watch\?(?:
3552             feature=[a-z_]+|
3553             annotation_id=annotation_[^&]+|
3554             x-yt-cl=[0-9]+|
3555             hl=[^&]*|
3556             t=[0-9]+
3557         )?
3558         |
3559             attribution_link\?a=[^&]+
3560         )
3561         $
3562     '''
3563
3564     _TESTS = [{
3565         'url': 'https://www.youtube.com/watch?annotation_id=annotation_3951667041',
3566         'only_matching': True,
3567     }, {
3568         'url': 'https://www.youtube.com/watch?',
3569         'only_matching': True,
3570     }, {
3571         'url': 'https://www.youtube.com/watch?x-yt-cl=84503534',
3572         'only_matching': True,
3573     }, {
3574         'url': 'https://www.youtube.com/watch?feature=foo',
3575         'only_matching': True,
3576     }, {
3577         'url': 'https://www.youtube.com/watch?hl=en-GB',
3578         'only_matching': True,
3579     }, {
3580         'url': 'https://www.youtube.com/watch?t=2372',
3581         'only_matching': True,
3582     }]
3583
3584     def _real_extract(self, url):
3585         raise ExtractorError(
3586             'Did you forget to quote the URL? Remember that & is a meta '
3587             'character in most shells, so you want to put the URL in quotes, '
3588             'like  youtube-dl '
3589             '"https://www.youtube.com/watch?feature=foo&v=BaW_jenozKc" '
3590             ' or simply  youtube-dl BaW_jenozKc  .',
3591             expected=True)
3592
3593
3594 class YoutubeTruncatedIDIE(InfoExtractor):
3595     IE_NAME = 'youtube:truncated_id'
3596     IE_DESC = False  # Do not list
3597     _VALID_URL = r'https?://(?:www\.)?youtube\.com/watch\?v=(?P<id>[0-9A-Za-z_-]{1,10})$'
3598
3599     _TESTS = [{
3600         'url': 'https://www.youtube.com/watch?v=N_708QY7Ob',
3601         'only_matching': True,
3602     }]
3603
3604     def _real_extract(self, url):
3605         video_id = self._match_id(url)
3606         raise ExtractorError(
3607             'Incomplete YouTube ID %s. URL %s looks truncated.' % (video_id, url),
3608             expected=True)