youtube_dlc/extractor/youtube.py

   1 # coding: utf-8
   2
   3 from __future__ import unicode_literals
   4
   5
   6 import itertools
   7 import json
   8 import os.path
   9 import random
  10 import re
  11 import time
  12 import traceback
  13
  14 from .common import InfoExtractor, SearchInfoExtractor
  15 from ..jsinterp import JSInterpreter
  16 from ..swfinterp import SWFInterpreter
  17 from ..compat import (
  18     compat_chr,
  19     compat_kwargs,
  20     compat_parse_qs,
  21     compat_urllib_parse_unquote,
  22     compat_urllib_parse_unquote_plus,
  23     compat_urllib_parse_urlencode,
  24     compat_urllib_parse_urlparse,
  25     compat_urlparse,
  26     compat_str,
  27 )
  28 from ..utils import (
  29     bool_or_none,
  30     clean_html,
  31     error_to_compat_str,
  32     ExtractorError,
  33     float_or_none,
  34     get_element_by_id,
  35     int_or_none,
  36     mimetype2ext,
  37     parse_codecs,
  38     parse_count,
  39     parse_duration,
  40     remove_quotes,
  41     remove_start,
  42     smuggle_url,
  43     str_or_none,
  44     str_to_int,
  45     try_get,
  46     unescapeHTML,
  47     unified_strdate,
  48     unsmuggle_url,
  49     update_url_query,
  50     uppercase_escape,
  51     url_or_none,
  52     urlencode_postdata,
  53     urljoin,
  54 )
  55
  56
  57 class YoutubeBaseInfoExtractor(InfoExtractor):
  58     """Provide base functions for Youtube extractors"""
  59     _LOGIN_URL = 'https://accounts.google.com/ServiceLogin'
  60     _TWOFACTOR_URL = 'https://accounts.google.com/signin/challenge'
  61
  62     _LOOKUP_URL = 'https://accounts.google.com/_/signin/sl/lookup'
  63     _CHALLENGE_URL = 'https://accounts.google.com/_/signin/sl/challenge'
  64     _TFA_URL = 'https://accounts.google.com/_/signin/challenge?hl=en&TL={0}'
  65
  66     _RESERVED_NAMES = (
  67         r'course|embed|channel|c|user|playlist|watch|w|results|storefront|'
  68         r'shared|index|account|reporthistory|t/terms|about|upload|signin|logout|'
  69         r'feed/(watch_later|history|subscriptions|library|trending|recommended)')
  70
  71     _NETRC_MACHINE = 'youtube'
  72     # If True it will raise an error if no login info is provided
  73     _LOGIN_REQUIRED = False
  74
  75     _PLAYLIST_ID_RE = r'(?:(?:PL|LL|EC|UU|FL|RD|UL|TL|PU|OLAK5uy_)[0-9A-Za-z-_]{10,}|RDMM|WL|LL|LM)'
  76
  77     _YOUTUBE_CLIENT_HEADERS = {
  78         'x-youtube-client-name': '1',
  79         'x-youtube-client-version': '1.20200609.04.02',
  80     }
  81
  82     def _set_language(self):
  83         self._set_cookie(
  84             '.youtube.com', 'PREF', 'f1=50000000&f6=8&hl=en',
  85             # YouTube sets the expire time to about two months
  86             expire_time=time.time() + 2 * 30 * 24 * 3600)
  87
  88     def _ids_to_results(self, ids):
  89         return [
  90             self.url_result(vid_id, 'Youtube', video_id=vid_id)
  91             for vid_id in ids]
  92
  93     def _login(self):
  94         """
  95         Attempt to log in to YouTube.
  96         True is returned if successful or skipped.
  97         False is returned if login failed.
  98
  99         If _LOGIN_REQUIRED is set and no authentication was provided, an error is raised.
 100         """
 101         username, password = self._get_login_info()
 102         # No authentication to be performed
 103         if username is None:
 104             if self._LOGIN_REQUIRED and self._downloader.params.get('cookiefile') is None:
 105                 raise ExtractorError('No login info available, needed for using %s.' % self.IE_NAME, expected=True)
 106             if self._downloader.params.get('cookiefile') and False:  # TODO remove 'and False' later - too many people using outdated cookies and open issues, remind them.
 107                 self.to_screen('[Cookies] Reminder - Make sure to always use up to date cookies!')
 108             return True
 109
 110         login_page = self._download_webpage(
 111             self._LOGIN_URL, None,
 112             note='Downloading login page',
 113             errnote='unable to fetch login page', fatal=False)
 114         if login_page is False:
 115             return
 116
 117         login_form = self._hidden_inputs(login_page)
 118
 119         def req(url, f_req, note, errnote):
 120             data = login_form.copy()
 121             data.update({
 122                 'pstMsg': 1,
 123                 'checkConnection': 'youtube',
 124                 'checkedDomains': 'youtube',
 125                 'hl': 'en',
 126                 'deviceinfo': '[null,null,null,[],null,"US",null,null,[],"GlifWebSignIn",null,[null,null,[]]]',
 127                 'f.req': json.dumps(f_req),
 128                 'flowName': 'GlifWebSignIn',
 129                 'flowEntry': 'ServiceLogin',
 130                 # TODO: reverse actual botguard identifier generation algo
 131                 'bgRequest': '["identifier",""]',
 132             })
 133             return self._download_json(
 134                 url, None, note=note, errnote=errnote,
 135                 transform_source=lambda s: re.sub(r'^[^[]*', '', s),
 136                 fatal=False,
 137                 data=urlencode_postdata(data), headers={
 138                     'Content-Type': 'application/x-www-form-urlencoded;charset=utf-8',
 139                     'Google-Accounts-XSRF': 1,
 140                 })
 141
 142         def warn(message):
 143             self._downloader.report_warning(message)
 144
 145         lookup_req = [
 146             username,
 147             None, [], None, 'US', None, None, 2, False, True,
 148             [
 149                 None, None,
 150                 [2, 1, None, 1,
 151                  'https://accounts.google.com/ServiceLogin?passive=true&continue=https%3A%2F%2Fwww.youtube.com%2Fsignin%3Fnext%3D%252F%26action_handle_signin%3Dtrue%26hl%3Den%26app%3Ddesktop%26feature%3Dsign_in_button&hl=en&service=youtube&uilel=3&requestPath=%2FServiceLogin&Page=PasswordSeparationSignIn',
 152                  None, [], 4],
 153                 1, [None, None, []], None, None, None, True
 154             ],
 155             username,
 156         ]
 157
 158         lookup_results = req(
 159             self._LOOKUP_URL, lookup_req,
 160             'Looking up account info', 'Unable to look up account info')
 161
 162         if lookup_results is False:
 163             return False
 164
 165         user_hash = try_get(lookup_results, lambda x: x[0][2], compat_str)
 166         if not user_hash:
 167             warn('Unable to extract user hash')
 168             return False
 169
 170         challenge_req = [
 171             user_hash,
 172             None, 1, None, [1, None, None, None, [password, None, True]],
 173             [
 174                 None, None, [2, 1, None, 1, 'https://accounts.google.com/ServiceLogin?passive=true&continue=https%3A%2F%2Fwww.youtube.com%2Fsignin%3Fnext%3D%252F%26action_handle_signin%3Dtrue%26hl%3Den%26app%3Ddesktop%26feature%3Dsign_in_button&hl=en&service=youtube&uilel=3&requestPath=%2FServiceLogin&Page=PasswordSeparationSignIn', None, [], 4],
 175                 1, [None, None, []], None, None, None, True
 176             ]]
 177
 178         challenge_results = req(
 179             self._CHALLENGE_URL, challenge_req,
 180             'Logging in', 'Unable to log in')
 181
 182         if challenge_results is False:
 183             return
 184
 185         login_res = try_get(challenge_results, lambda x: x[0][5], list)
 186         if login_res:
 187             login_msg = try_get(login_res, lambda x: x[5], compat_str)
 188             warn(
 189                 'Unable to login: %s' % 'Invalid password'
 190                 if login_msg == 'INCORRECT_ANSWER_ENTERED' else login_msg)
 191             return False
 192
 193         res = try_get(challenge_results, lambda x: x[0][-1], list)
 194         if not res:
 195             warn('Unable to extract result entry')
 196             return False
 197
 198         login_challenge = try_get(res, lambda x: x[0][0], list)
 199         if login_challenge:
 200             challenge_str = try_get(login_challenge, lambda x: x[2], compat_str)
 201             if challenge_str == 'TWO_STEP_VERIFICATION':
 202                 # SEND_SUCCESS - TFA code has been successfully sent to phone
 203                 # QUOTA_EXCEEDED - reached the limit of TFA codes
 204                 status = try_get(login_challenge, lambda x: x[5], compat_str)
 205                 if status == 'QUOTA_EXCEEDED':
 206                     warn('Exceeded the limit of TFA codes, try later')
 207                     return False
 208
 209                 tl = try_get(challenge_results, lambda x: x[1][2], compat_str)
 210                 if not tl:
 211                     warn('Unable to extract TL')
 212                     return False
 213
 214                 tfa_code = self._get_tfa_info('2-step verification code')
 215
 216                 if not tfa_code:
 217                     warn(
 218                         'Two-factor authentication required. Provide it either interactively or with --twofactor <code>'
 219                         '(Note that only TOTP (Google Authenticator App) codes work at this time.)')
 220                     return False
 221
 222                 tfa_code = remove_start(tfa_code, 'G-')
 223
 224                 tfa_req = [
 225                     user_hash, None, 2, None,
 226                     [
 227                         9, None, None, None, None, None, None, None,
 228                         [None, tfa_code, True, 2]
 229                     ]]
 230
 231                 tfa_results = req(
 232                     self._TFA_URL.format(tl), tfa_req,
 233                     'Submitting TFA code', 'Unable to submit TFA code')
 234
 235                 if tfa_results is False:
 236                     return False
 237
 238                 tfa_res = try_get(tfa_results, lambda x: x[0][5], list)
 239                 if tfa_res:
 240                     tfa_msg = try_get(tfa_res, lambda x: x[5], compat_str)
 241                     warn(
 242                         'Unable to finish TFA: %s' % 'Invalid TFA code'
 243                         if tfa_msg == 'INCORRECT_ANSWER_ENTERED' else tfa_msg)
 244                     return False
 245
 246                 check_cookie_url = try_get(
 247                     tfa_results, lambda x: x[0][-1][2], compat_str)
 248             else:
 249                 CHALLENGES = {
 250                     'LOGIN_CHALLENGE': "This device isn't recognized. For your security, Google wants to make sure it's really you.",
 251                     'USERNAME_RECOVERY': 'Please provide additional information to aid in the recovery process.',
 252                     'REAUTH': "There is something unusual about your activity. For your security, Google wants to make sure it's really you.",
 253                 }
 254                 challenge = CHALLENGES.get(
 255                     challenge_str,
 256                     '%s returned error %s.' % (self.IE_NAME, challenge_str))
 257                 warn('%s\nGo to https://accounts.google.com/, login and solve a challenge.' % challenge)
 258                 return False
 259         else:
 260             check_cookie_url = try_get(res, lambda x: x[2], compat_str)
 261
 262         if not check_cookie_url:
 263             warn('Unable to extract CheckCookie URL')
 264             return False
 265
 266         check_cookie_results = self._download_webpage(
 267             check_cookie_url, None, 'Checking cookie', fatal=False)
 268
 269         if check_cookie_results is False:
 270             return False
 271
 272         if 'https://myaccount.google.com/' not in check_cookie_results:
 273             warn('Unable to log in')
 274             return False
 275
 276         return True
 277
 278     def _download_webpage_handle(self, *args, **kwargs):
 279         query = kwargs.get('query', {}).copy()
 280         kwargs['query'] = query
 281         return super(YoutubeBaseInfoExtractor, self)._download_webpage_handle(
 282             *args, **compat_kwargs(kwargs))
 283
 284     def _get_yt_initial_data(self, video_id, webpage):
 285         config = self._search_regex(
 286             (r'window\["ytInitialData"\]\s*=\s*(.*?)(?<=});',
 287              r'var\s+ytInitialData\s*=\s*(.*?)(?<=});'),
 288             webpage, 'ytInitialData', default=None)
 289         if config:
 290             return self._parse_json(
 291                 uppercase_escape(config), video_id, fatal=False)
 292
 293     def _real_initialize(self):
 294         if self._downloader is None:
 295             return
 296         self._set_language()
 297         if not self._login():
 298             return
 299
 300     _DEFAULT_API_DATA = {
 301         'context': {
 302             'client': {
 303                 'clientName': 'WEB',
 304                 'clientVersion': '2.20201021.03.00',
 305             }
 306         },
 307     }
 308
 309     _YT_INITIAL_DATA_RE = r'(?:window\s*\[\s*["\']ytInitialData["\']\s*\]|ytInitialData)\s*=\s*({.+?})\s*;'
 310
 311     def _call_api(self, ep, query, video_id):
 312         data = self._DEFAULT_API_DATA.copy()
 313         data.update(query)
 314
 315         response = self._download_json(
 316             'https://www.youtube.com/youtubei/v1/%s' % ep, video_id=video_id,
 317             note='Downloading API JSON', errnote='Unable to download API page',
 318             data=json.dumps(data).encode('utf8'),
 319             headers={'content-type': 'application/json'},
 320             query={'key': 'AIzaSyAO_FJ2SlqU8Q4STEHLGCilw_Y9_11qcW8'})
 321
 322         return response
 323
 324     def _extract_yt_initial_data(self, video_id, webpage):
 325         return self._parse_json(
 326             self._search_regex(
 327                 (r'%s\s*\n' % self._YT_INITIAL_DATA_RE,
 328                  self._YT_INITIAL_DATA_RE), webpage, 'yt initial data'),
 329             video_id)
 330
 331
 332 class YoutubeIE(YoutubeBaseInfoExtractor):
 333     IE_DESC = 'YouTube.com'
 334     _VALID_URL = r"""(?x)^
 335                      (
 336                          (?:https?://|//)                                    # http(s):// or protocol-independent URL
 337                          (?:(?:(?:(?:\w+\.)?[yY][oO][uU][tT][uU][bB][eE](?:-nocookie|kids)?\.com/|
 338                             (?:www\.)?deturl\.com/www\.youtube\.com/|
 339                             (?:www\.)?pwnyoutube\.com/|
 340                             (?:www\.)?hooktube\.com/|
 341                             (?:www\.)?yourepeat\.com/|
 342                             tube\.majestyc\.net/|
 343                             # Invidious instances taken from https://github.com/omarroth/invidious/wiki/Invidious-Instances
 344                             (?:(?:www|dev)\.)?invidio\.us/|
 345                             (?:(?:www|no)\.)?invidiou\.sh/|
 346                             (?:(?:www|fi|de)\.)?invidious\.snopyta\.org/|
 347                             (?:www\.)?invidious\.kabi\.tk/|
 348                             (?:www\.)?invidious\.13ad\.de/|
 349                             (?:www\.)?invidious\.mastodon\.host/|
 350                             (?:www\.)?invidious\.nixnet\.xyz/|
 351                             (?:www\.)?invidious\.drycat\.fr/|
 352                             (?:www\.)?tube\.poal\.co/|
 353                             (?:www\.)?vid\.wxzm\.sx/|
 354                             (?:www\.)?yewtu\.be/|
 355                             (?:www\.)?yt\.elukerio\.org/|
 356                             (?:www\.)?yt\.lelux\.fi/|
 357                             (?:www\.)?invidious\.ggc-project\.de/|
 358                             (?:www\.)?yt\.maisputain\.ovh/|
 359                             (?:www\.)?invidious\.13ad\.de/|
 360                             (?:www\.)?invidious\.toot\.koeln/|
 361                             (?:www\.)?invidious\.fdn\.fr/|
 362                             (?:www\.)?watch\.nettohikari\.com/|
 363                             (?:www\.)?kgg2m7yk5aybusll\.onion/|
 364                             (?:www\.)?qklhadlycap4cnod\.onion/|
 365                             (?:www\.)?axqzx4s6s54s32yentfqojs3x5i7faxza6xo3ehd4bzzsg2ii4fv2iid\.onion/|
 366                             (?:www\.)?c7hqkpkpemu6e7emz5b4vyz7idjgdvgaaa3dyimmeojqbgpea3xqjoid\.onion/|
 367                             (?:www\.)?fz253lmuao3strwbfbmx46yu7acac2jz27iwtorgmbqlkurlclmancad\.onion/|
 368                             (?:www\.)?invidious\.l4qlywnpwqsluw65ts7md3khrivpirse744un3x7mlskqauz5pyuzgqd\.onion/|
 369                             (?:www\.)?owxfohz4kjyv25fvlqilyxast7inivgiktls3th44jhk3ej3i7ya\.b32\.i2p/|
 370                             (?:www\.)?4l2dgddgsrkf2ous66i6seeyi6etzfgrue332grh2n7madpwopotugyd\.onion/|
 371                             youtube\.googleapis\.com/)                        # the various hostnames, with wildcard subdomains
 372                          (?:.*?\#/)?                                          # handle anchor (#/) redirect urls
 373                          (?:                                                  # the various things that can precede the ID:
 374                              (?:(?:v|embed|e)/(?!videoseries))                # v/ or embed/ or e/
 375                              |(?:                                             # or the v= param in all its forms
 376                                  (?:(?:watch|movie)(?:_popup)?(?:\.php)?/?)?  # preceding watch(_popup|.php) or nothing (like /?v=xxxx)
 377                                  (?:\?|\#!?)                                  # the params delimiter ? or # or #!
 378                                  (?:.*?[&;])??                                # any other preceding param (like /?s=tuff&v=xxxx or ?s=tuff&amp;v=V36LpHqtcDY)
 379                                  v=
 380                              )
 381                          ))
 382                          |(?:
 383                             youtu\.be|                                        # just youtu.be/xxxx
 384                             vid\.plus|                                        # or vid.plus/xxxx
 385                             zwearz\.com/watch|                                # or zwearz.com/watch/xxxx
 386                          )/
 387                          |(?:www\.)?cleanvideosearch\.com/media/action/yt/watch\?videoId=
 388                          )
 389                      )?                                                       # all until now is optional -> you can pass the naked ID
 390                      (?P<id>[0-9A-Za-z_-]{11})                                      # here is it! the YouTube video ID
 391                      (?!.*?\blist=
 392                         (?:
 393                             %(playlist_id)s|                                  # combined list/video URLs are handled by the playlist IE
 394                             WL                                                # WL are handled by the watch later IE
 395                         )
 396                      )
 397                      (?(1).+)?                                                # if we found the ID, everything can follow
 398                      $""" % {'playlist_id': YoutubeBaseInfoExtractor._PLAYLIST_ID_RE}
 399     _NEXT_URL_RE = r'[\?&]next_url=([^&]+)'
 400     _PLAYER_INFO_RE = (
 401         r'/(?P<id>[a-zA-Z0-9_-]{8,})/player_ias\.vflset(?:/[a-zA-Z]{2,3}_[a-zA-Z]{2,3})?/base\.(?P<ext>[a-z]+)$',
 402         r'\b(?P<id>vfl[a-zA-Z0-9_-]+)\b.*?\.(?P<ext>[a-z]+)$',
 403     )
 404     _formats = {
 405         '5': {'ext': 'flv', 'width': 400, 'height': 240, 'acodec': 'mp3', 'abr': 64, 'vcodec': 'h263'},
 406         '6': {'ext': 'flv', 'width': 450, 'height': 270, 'acodec': 'mp3', 'abr': 64, 'vcodec': 'h263'},
 407         '13': {'ext': '3gp', 'acodec': 'aac', 'vcodec': 'mp4v'},
 408         '17': {'ext': '3gp', 'width': 176, 'height': 144, 'acodec': 'aac', 'abr': 24, 'vcodec': 'mp4v'},
 409         '18': {'ext': 'mp4', 'width': 640, 'height': 360, 'acodec': 'aac', 'abr': 96, 'vcodec': 'h264'},
 410         '22': {'ext': 'mp4', 'width': 1280, 'height': 720, 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264'},
 411         '34': {'ext': 'flv', 'width': 640, 'height': 360, 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264'},
 412         '35': {'ext': 'flv', 'width': 854, 'height': 480, 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264'},
 413         # itag 36 videos are either 320x180 (BaW_jenozKc) or 320x240 (__2ABJjxzNo), abr varies as well
 414         '36': {'ext': '3gp', 'width': 320, 'acodec': 'aac', 'vcodec': 'mp4v'},
 415         '37': {'ext': 'mp4', 'width': 1920, 'height': 1080, 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264'},
 416         '38': {'ext': 'mp4', 'width': 4096, 'height': 3072, 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264'},
 417         '43': {'ext': 'webm', 'width': 640, 'height': 360, 'acodec': 'vorbis', 'abr': 128, 'vcodec': 'vp8'},
 418         '44': {'ext': 'webm', 'width': 854, 'height': 480, 'acodec': 'vorbis', 'abr': 128, 'vcodec': 'vp8'},
 419         '45': {'ext': 'webm', 'width': 1280, 'height': 720, 'acodec': 'vorbis', 'abr': 192, 'vcodec': 'vp8'},
 420         '46': {'ext': 'webm', 'width': 1920, 'height': 1080, 'acodec': 'vorbis', 'abr': 192, 'vcodec': 'vp8'},
 421         '59': {'ext': 'mp4', 'width': 854, 'height': 480, 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264'},
 422         '78': {'ext': 'mp4', 'width': 854, 'height': 480, 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264'},
 423
 424
 425         # 3D videos
 426         '82': {'ext': 'mp4', 'height': 360, 'format_note': '3D', 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264', 'preference': -20},
 427         '83': {'ext': 'mp4', 'height': 480, 'format_note': '3D', 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264', 'preference': -20},
 428         '84': {'ext': 'mp4', 'height': 720, 'format_note': '3D', 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264', 'preference': -20},
 429         '85': {'ext': 'mp4', 'height': 1080, 'format_note': '3D', 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264', 'preference': -20},
 430         '100': {'ext': 'webm', 'height': 360, 'format_note': '3D', 'acodec': 'vorbis', 'abr': 128, 'vcodec': 'vp8', 'preference': -20},
 431         '101': {'ext': 'webm', 'height': 480, 'format_note': '3D', 'acodec': 'vorbis', 'abr': 192, 'vcodec': 'vp8', 'preference': -20},
 432         '102': {'ext': 'webm', 'height': 720, 'format_note': '3D', 'acodec': 'vorbis', 'abr': 192, 'vcodec': 'vp8', 'preference': -20},
 433
 434         # Apple HTTP Live Streaming
 435         '91': {'ext': 'mp4', 'height': 144, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 48, 'vcodec': 'h264', 'preference': -10},
 436         '92': {'ext': 'mp4', 'height': 240, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 48, 'vcodec': 'h264', 'preference': -10},
 437         '93': {'ext': 'mp4', 'height': 360, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264', 'preference': -10},
 438         '94': {'ext': 'mp4', 'height': 480, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264', 'preference': -10},
 439         '95': {'ext': 'mp4', 'height': 720, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 256, 'vcodec': 'h264', 'preference': -10},
 440         '96': {'ext': 'mp4', 'height': 1080, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 256, 'vcodec': 'h264', 'preference': -10},
 441         '132': {'ext': 'mp4', 'height': 240, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 48, 'vcodec': 'h264', 'preference': -10},
 442         '151': {'ext': 'mp4', 'height': 72, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 24, 'vcodec': 'h264', 'preference': -10},
 443
 444         # DASH mp4 video
 445         '133': {'ext': 'mp4', 'height': 240, 'format_note': 'DASH video', 'vcodec': 'h264'},
 446         '134': {'ext': 'mp4', 'height': 360, 'format_note': 'DASH video', 'vcodec': 'h264'},
 447         '135': {'ext': 'mp4', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'h264'},
 448         '136': {'ext': 'mp4', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'h264'},
 449         '137': {'ext': 'mp4', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'h264'},
 450         '138': {'ext': 'mp4', 'format_note': 'DASH video', 'vcodec': 'h264'},  # Height can vary (https://github.com/ytdl-org/youtube-dl/issues/4559)
 451         '160': {'ext': 'mp4', 'height': 144, 'format_note': 'DASH video', 'vcodec': 'h264'},
 452         '212': {'ext': 'mp4', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'h264'},
 453         '264': {'ext': 'mp4', 'height': 1440, 'format_note': 'DASH video', 'vcodec': 'h264'},
 454         '298': {'ext': 'mp4', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'h264', 'fps': 60},
 455         '299': {'ext': 'mp4', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'h264', 'fps': 60},
 456         '266': {'ext': 'mp4', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'h264'},
 457
 458         # Dash mp4 audio
 459         '139': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'abr': 48, 'container': 'm4a_dash'},
 460         '140': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'abr': 128, 'container': 'm4a_dash'},
 461         '141': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'abr': 256, 'container': 'm4a_dash'},
 462         '256': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'container': 'm4a_dash'},
 463         '258': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'container': 'm4a_dash'},
 464         '325': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'dtse', 'container': 'm4a_dash'},
 465         '328': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'ec-3', 'container': 'm4a_dash'},
 466
 467         # Dash webm
 468         '167': {'ext': 'webm', 'height': 360, 'width': 640, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
 469         '168': {'ext': 'webm', 'height': 480, 'width': 854, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
 470         '169': {'ext': 'webm', 'height': 720, 'width': 1280, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
 471         '170': {'ext': 'webm', 'height': 1080, 'width': 1920, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
 472         '218': {'ext': 'webm', 'height': 480, 'width': 854, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
 473         '219': {'ext': 'webm', 'height': 480, 'width': 854, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
 474         '278': {'ext': 'webm', 'height': 144, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp9'},
 475         '242': {'ext': 'webm', 'height': 240, 'format_note': 'DASH video', 'vcodec': 'vp9'},
 476         '243': {'ext': 'webm', 'height': 360, 'format_note': 'DASH video', 'vcodec': 'vp9'},
 477         '244': {'ext': 'webm', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'vp9'},
 478         '245': {'ext': 'webm', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'vp9'},
 479         '246': {'ext': 'webm', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'vp9'},
 480         '247': {'ext': 'webm', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'vp9'},
 481         '248': {'ext': 'webm', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'vp9'},
 482         '271': {'ext': 'webm', 'height': 1440, 'format_note': 'DASH video', 'vcodec': 'vp9'},
 483         # itag 272 videos are either 3840x2160 (e.g. RtoitU2A-3E) or 7680x4320 (sLprVF6d7Ug)
 484         '272': {'ext': 'webm', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'vp9'},
 485         '302': {'ext': 'webm', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'vp9', 'fps': 60},
 486         '303': {'ext': 'webm', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'vp9', 'fps': 60},
 487         '308': {'ext': 'webm', 'height': 1440, 'format_note': 'DASH video', 'vcodec': 'vp9', 'fps': 60},
 488         '313': {'ext': 'webm', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'vp9'},
 489         '315': {'ext': 'webm', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'vp9', 'fps': 60},
 490
 491         # Dash webm audio
 492         '171': {'ext': 'webm', 'acodec': 'vorbis', 'format_note': 'DASH audio', 'abr': 128},
 493         '172': {'ext': 'webm', 'acodec': 'vorbis', 'format_note': 'DASH audio', 'abr': 256},
 494
 495         # Dash webm audio with opus inside
 496         '249': {'ext': 'webm', 'format_note': 'DASH audio', 'acodec': 'opus', 'abr': 50},
 497         '250': {'ext': 'webm', 'format_note': 'DASH audio', 'acodec': 'opus', 'abr': 70},
 498         '251': {'ext': 'webm', 'format_note': 'DASH audio', 'acodec': 'opus', 'abr': 160},
 499
 500         # RTMP (unnamed)
 501         '_rtmp': {'protocol': 'rtmp'},
 502
 503         # av01 video only formats sometimes served with "unknown" codecs
 504         '394': {'acodec': 'none', 'vcodec': 'av01.0.05M.08'},
 505         '395': {'acodec': 'none', 'vcodec': 'av01.0.05M.08'},
 506         '396': {'acodec': 'none', 'vcodec': 'av01.0.05M.08'},
 507         '397': {'acodec': 'none', 'vcodec': 'av01.0.05M.08'},
 508     }
 509     _SUBTITLE_FORMATS = ('json3', 'srv1', 'srv2', 'srv3', 'ttml', 'vtt')
 510
 511     _GEO_BYPASS = False
 512
 513     IE_NAME = 'youtube'
 514     _TESTS = [
 515         {
 516             'url': 'https://www.youtube.com/watch?v=BaW_jenozKc&t=1s&end=9',
 517             'info_dict': {
 518                 'id': 'BaW_jenozKc',
 519                 'ext': 'mp4',
 520                 'title': 'youtube-dl test video "\'/\\ä↭𝕐',
 521                 'uploader': 'Philipp Hagemeister',
 522                 'uploader_id': 'phihag',
 523                 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/phihag',
 524                 'channel_id': 'UCLqxVugv74EIW3VWh2NOa3Q',
 525                 'channel_url': r're:https?://(?:www\.)?youtube\.com/channel/UCLqxVugv74EIW3VWh2NOa3Q',
 526                 'upload_date': '20121002',
 527                 'description': 'test chars:  "\'/\\ä↭𝕐\ntest URL: https://github.com/rg3/youtube-dl/issues/1892\n\nThis is a test video for youtube-dl.\n\nFor more information, contact phihag@phihag.de .',
 528                 'categories': ['Science & Technology'],
 529                 'tags': ['youtube-dl'],
 530                 'duration': 10,
 531                 'view_count': int,
 532                 'like_count': int,
 533                 'dislike_count': int,
 534                 'start_time': 1,
 535                 'end_time': 9,
 536             }
 537         },
 538         {
 539             'url': '//www.YouTube.com/watch?v=yZIXLfi8CZQ',
 540             'note': 'Embed-only video (#1746)',
 541             'info_dict': {
 542                 'id': 'yZIXLfi8CZQ',
 543                 'ext': 'mp4',
 544                 'upload_date': '20120608',
 545                 'title': 'Principal Sexually Assaults A Teacher - Episode 117 - 8th June 2012',
 546                 'description': 'md5:09b78bd971f1e3e289601dfba15ca4f7',
 547                 'uploader': 'SET India',
 548                 'uploader_id': 'setindia',
 549                 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/setindia',
 550                 'age_limit': 18,
 551             }
 552         },
 553         {
 554             'url': 'https://www.youtube.com/watch?v=BaW_jenozKc&v=yZIXLfi8CZQ',
 555             'note': 'Use the first video ID in the URL',
 556             'info_dict': {
 557                 'id': 'BaW_jenozKc',
 558                 'ext': 'mp4',
 559                 'title': 'youtube-dl test video "\'/\\ä↭𝕐',
 560                 'uploader': 'Philipp Hagemeister',
 561                 'uploader_id': 'phihag',
 562                 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/phihag',
 563                 'upload_date': '20121002',
 564                 'description': 'test chars:  "\'/\\ä↭𝕐\ntest URL: https://github.com/rg3/youtube-dl/issues/1892\n\nThis is a test video for youtube-dl.\n\nFor more information, contact phihag@phihag.de .',
 565                 'categories': ['Science & Technology'],
 566                 'tags': ['youtube-dl'],
 567                 'duration': 10,
 568                 'view_count': int,
 569                 'like_count': int,
 570                 'dislike_count': int,
 571             },
 572             'params': {
 573                 'skip_download': True,
 574             },
 575         },
 576         {
 577             'url': 'https://www.youtube.com/watch?v=a9LDPn-MO4I',
 578             'note': '256k DASH audio (format 141) via DASH manifest',
 579             'info_dict': {
 580                 'id': 'a9LDPn-MO4I',
 581                 'ext': 'm4a',
 582                 'upload_date': '20121002',
 583                 'uploader_id': '8KVIDEO',
 584                 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/8KVIDEO',
 585                 'description': '',
 586                 'uploader': '8KVIDEO',
 587                 'title': 'UHDTV TEST 8K VIDEO.mp4'
 588             },
 589             'params': {
 590                 'youtube_include_dash_manifest': True,
 591                 'format': '141',
 592             },
 593             'skip': 'format 141 not served anymore',
 594         },
 595         # DASH manifest with encrypted signature
 596         {
 597             'url': 'https://www.youtube.com/watch?v=IB3lcPjvWLA',
 598             'info_dict': {
 599                 'id': 'IB3lcPjvWLA',
 600                 'ext': 'm4a',
 601                 'title': 'Afrojack, Spree Wilson - The Spark (Official Music Video) ft. Spree Wilson',
 602                 'description': 'md5:8f5e2b82460520b619ccac1f509d43bf',
 603                 'duration': 244,
 604                 'uploader': 'AfrojackVEVO',
 605                 'uploader_id': 'AfrojackVEVO',
 606                 'upload_date': '20131011',
 607             },
 608             'params': {
 609                 'youtube_include_dash_manifest': True,
 610                 'format': '141/bestaudio[ext=m4a]',
 611             },
 612         },
 613         # Controversy video
 614         {
 615             'url': 'https://www.youtube.com/watch?v=T4XJQO3qol8',
 616             'info_dict': {
 617                 'id': 'T4XJQO3qol8',
 618                 'ext': 'mp4',
 619                 'duration': 219,
 620                 'upload_date': '20100909',
 621                 'uploader': 'Amazing Atheist',
 622                 'uploader_id': 'TheAmazingAtheist',
 623                 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/TheAmazingAtheist',
 624                 'title': 'Burning Everyone\'s Koran',
 625                 'description': 'SUBSCRIBE: http://www.youtube.com/saturninefilms\n\nEven Obama has taken a stand against freedom on this issue: http://www.huffingtonpost.com/2010/09/09/obama-gma-interview-quran_n_710282.html',
 626             }
 627         },
 628         # Normal age-gate video (embed allowed)
 629         {
 630             'url': 'https://youtube.com/watch?v=HtVdAasjOgU',
 631             'info_dict': {
 632                 'id': 'HtVdAasjOgU',
 633                 'ext': 'mp4',
 634                 'title': 'The Witcher 3: Wild Hunt - The Sword Of Destiny Trailer',
 635                 'description': r're:(?s).{100,}About the Game\n.*?The Witcher 3: Wild Hunt.{100,}',
 636                 'duration': 142,
 637                 'uploader': 'The Witcher',
 638                 'uploader_id': 'WitcherGame',
 639                 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/WitcherGame',
 640                 'upload_date': '20140605',
 641                 'age_limit': 18,
 642             },
 643         },
 644         # video_info is None (https://github.com/ytdl-org/youtube-dl/issues/4421)
 645         # YouTube Red ad is not captured for creator
 646         {
 647             'url': '__2ABJjxzNo',
 648             'info_dict': {
 649                 'id': '__2ABJjxzNo',
 650                 'ext': 'mp4',
 651                 'duration': 266,
 652                 'upload_date': '20100430',
 653                 'uploader_id': 'deadmau5',
 654                 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/deadmau5',
 655                 'creator': 'Dada Life, deadmau5',
 656                 'description': 'md5:12c56784b8032162bb936a5f76d55360',
 657                 'uploader': 'deadmau5',
 658                 'title': 'Deadmau5 - Some Chords (HD)',
 659                 'alt_title': 'This Machine Kills Some Chords',
 660             },
 661             'expected_warnings': [
 662                 'DASH manifest missing',
 663             ]
 664         },
 665         # Olympics (https://github.com/ytdl-org/youtube-dl/issues/4431)
 666         {
 667             'url': 'lqQg6PlCWgI',
 668             'info_dict': {
 669                 'id': 'lqQg6PlCWgI',
 670                 'ext': 'mp4',
 671                 'duration': 6085,
 672                 'upload_date': '20150827',
 673                 'uploader_id': 'olympic',
 674                 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/olympic',
 675                 'description': 'HO09  - Women -  GER-AUS - Hockey - 31 July 2012 - London 2012 Olympic Games',
 676                 'uploader': 'Olympic',
 677                 'title': 'Hockey - Women -  GER-AUS - London 2012 Olympic Games',
 678             },
 679             'params': {
 680                 'skip_download': 'requires avconv',
 681             }
 682         },
 683         # Non-square pixels
 684         {
 685             'url': 'https://www.youtube.com/watch?v=_b-2C3KPAM0',
 686             'info_dict': {
 687                 'id': '_b-2C3KPAM0',
 688                 'ext': 'mp4',
 689                 'stretched_ratio': 16 / 9.,
 690                 'duration': 85,
 691                 'upload_date': '20110310',
 692                 'uploader_id': 'AllenMeow',
 693                 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/AllenMeow',
 694                 'description': 'made by Wacom from Korea | 字幕&加油添醋 by TY\'s Allen | 感謝heylisa00cavey1001同學熱情提供梗及翻譯',
 695                 'uploader': '孫ᄋᄅ',
 696                 'title': '[A-made] 變態妍字幕版 太妍 我就是這樣的人',
 697             },
 698         },
 699         # url_encoded_fmt_stream_map is empty string
 700         {
 701             'url': 'qEJwOuvDf7I',
 702             'info_dict': {
 703                 'id': 'qEJwOuvDf7I',
 704                 'ext': 'webm',
 705                 'title': 'Обсуждение судебной практики по выборам 14 сентября 2014 года в Санкт-Петербурге',
 706                 'description': '',
 707                 'upload_date': '20150404',
 708                 'uploader_id': 'spbelect',
 709                 'uploader': 'Наблюдатели Петербурга',
 710             },
 711             'params': {
 712                 'skip_download': 'requires avconv',
 713             },
 714             'skip': 'This live event has ended.',
 715         },
 716         # Extraction from multiple DASH manifests (https://github.com/ytdl-org/youtube-dl/pull/6097)
 717         {
 718             'url': 'https://www.youtube.com/watch?v=FIl7x6_3R5Y',
 719             'info_dict': {
 720                 'id': 'FIl7x6_3R5Y',
 721                 'ext': 'webm',
 722                 'title': 'md5:7b81415841e02ecd4313668cde88737a',
 723                 'description': 'md5:116377fd2963b81ec4ce64b542173306',
 724                 'duration': 220,
 725                 'upload_date': '20150625',
 726                 'uploader_id': 'dorappi2000',
 727                 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/dorappi2000',
 728                 'uploader': 'dorappi2000',
 729                 'formats': 'mincount:31',
 730             },
 731             'skip': 'not actual anymore',
 732         },
 733         # DASH manifest with segment_list
 734         {
 735             'url': 'https://www.youtube.com/embed/CsmdDsKjzN8',
 736             'md5': '8ce563a1d667b599d21064e982ab9e31',
 737             'info_dict': {
 738                 'id': 'CsmdDsKjzN8',
 739                 'ext': 'mp4',
 740                 'upload_date': '20150501',  # According to '<meta itemprop="datePublished"', but in other places it's 20150510
 741                 'uploader': 'Airtek',
 742                 'description': 'Retransmisión en directo de la XVIII media maratón de Zaragoza.',
 743                 'uploader_id': 'UCzTzUmjXxxacNnL8I3m4LnQ',
 744                 'title': 'Retransmisión XVIII Media maratón Zaragoza 2015',
 745             },
 746             'params': {
 747                 'youtube_include_dash_manifest': True,
 748                 'format': '135',  # bestvideo
 749             },
 750             'skip': 'This live event has ended.',
 751         },
 752         {
 753             # Multifeed videos (multiple cameras), URL is for Main Camera
 754             'url': 'https://www.youtube.com/watch?v=jqWvoWXjCVs',
 755             'info_dict': {
 756                 'id': 'jqWvoWXjCVs',
 757                 'title': 'teamPGP: Rocket League Noob Stream',
 758                 'description': 'md5:dc7872fb300e143831327f1bae3af010',
 759             },
 760             'playlist': [{
 761                 'info_dict': {
 762                     'id': 'jqWvoWXjCVs',
 763                     'ext': 'mp4',
 764                     'title': 'teamPGP: Rocket League Noob Stream (Main Camera)',
 765                     'description': 'md5:dc7872fb300e143831327f1bae3af010',
 766                     'duration': 7335,
 767                     'upload_date': '20150721',
 768                     'uploader': 'Beer Games Beer',
 769                     'uploader_id': 'beergamesbeer',
 770                     'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/beergamesbeer',
 771                     'license': 'Standard YouTube License',
 772                 },
 773             }, {
 774                 'info_dict': {
 775                     'id': '6h8e8xoXJzg',
 776                     'ext': 'mp4',
 777                     'title': 'teamPGP: Rocket League Noob Stream (kreestuh)',
 778                     'description': 'md5:dc7872fb300e143831327f1bae3af010',
 779                     'duration': 7337,
 780                     'upload_date': '20150721',
 781                     'uploader': 'Beer Games Beer',
 782                     'uploader_id': 'beergamesbeer',
 783                     'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/beergamesbeer',
 784                     'license': 'Standard YouTube License',
 785                 },
 786             }, {
 787                 'info_dict': {
 788                     'id': 'PUOgX5z9xZw',
 789                     'ext': 'mp4',
 790                     'title': 'teamPGP: Rocket League Noob Stream (grizzle)',
 791                     'description': 'md5:dc7872fb300e143831327f1bae3af010',
 792                     'duration': 7337,
 793                     'upload_date': '20150721',
 794                     'uploader': 'Beer Games Beer',
 795                     'uploader_id': 'beergamesbeer',
 796                     'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/beergamesbeer',
 797                     'license': 'Standard YouTube License',
 798                 },
 799             }, {
 800                 'info_dict': {
 801                     'id': 'teuwxikvS5k',
 802                     'ext': 'mp4',
 803                     'title': 'teamPGP: Rocket League Noob Stream (zim)',
 804                     'description': 'md5:dc7872fb300e143831327f1bae3af010',
 805                     'duration': 7334,
 806                     'upload_date': '20150721',
 807                     'uploader': 'Beer Games Beer',
 808                     'uploader_id': 'beergamesbeer',
 809                     'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/beergamesbeer',
 810                     'license': 'Standard YouTube License',
 811                 },
 812             }],
 813             'params': {
 814                 'skip_download': True,
 815             },
 816             'skip': 'This video is not available.',
 817         },
 818         {
 819             # Multifeed video with comma in title (see https://github.com/ytdl-org/youtube-dl/issues/8536)
 820             'url': 'https://www.youtube.com/watch?v=gVfLd0zydlo',
 821             'info_dict': {
 822                 'id': 'gVfLd0zydlo',
 823                 'title': 'DevConf.cz 2016 Day 2 Workshops 1 14:00 - 15:30',
 824             },
 825             'playlist_count': 2,
 826             'skip': 'Not multifeed anymore',
 827         },
 828         {
 829             'url': 'https://vid.plus/FlRa-iH7PGw',
 830             'only_matching': True,
 831         },
 832         {
 833             'url': 'https://zwearz.com/watch/9lWxNJF-ufM/electra-woman-dyna-girl-official-trailer-grace-helbig.html',
 834             'only_matching': True,
 835         },
 836         {
 837             # Title with JS-like syntax "};" (see https://github.com/ytdl-org/youtube-dl/issues/7468)
 838             # Also tests cut-off URL expansion in video description (see
 839             # https://github.com/ytdl-org/youtube-dl/issues/1892,
 840             # https://github.com/ytdl-org/youtube-dl/issues/8164)
 841             'url': 'https://www.youtube.com/watch?v=lsguqyKfVQg',
 842             'info_dict': {
 843                 'id': 'lsguqyKfVQg',
 844                 'ext': 'mp4',
 845                 'title': '{dark walk}; Loki/AC/Dishonored; collab w/Elflover21',
 846                 'alt_title': 'Dark Walk - Position Music',
 847                 'description': 'md5:8085699c11dc3f597ce0410b0dcbb34a',
 848                 'duration': 133,
 849                 'upload_date': '20151119',
 850                 'uploader_id': 'IronSoulElf',
 851                 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/IronSoulElf',
 852                 'uploader': 'IronSoulElf',
 853                 'creator': 'Todd Haberman,  Daniel Law Heath and Aaron Kaplan',
 854                 'track': 'Dark Walk - Position Music',
 855                 'artist': 'Todd Haberman,  Daniel Law Heath and Aaron Kaplan',
 856                 'album': 'Position Music - Production Music Vol. 143 - Dark Walk',
 857             },
 858             'params': {
 859                 'skip_download': True,
 860             },
 861         },
 862         {
 863             # Tags with '};' (see https://github.com/ytdl-org/youtube-dl/issues/7468)
 864             'url': 'https://www.youtube.com/watch?v=Ms7iBXnlUO8',
 865             'only_matching': True,
 866         },
 867         {
 868             # Video with yt:stretch=17:0
 869             'url': 'https://www.youtube.com/watch?v=Q39EVAstoRM',
 870             'info_dict': {
 871                 'id': 'Q39EVAstoRM',
 872                 'ext': 'mp4',
 873                 'title': 'Clash Of Clans#14 Dicas De Ataque Para CV 4',
 874                 'description': 'md5:ee18a25c350637c8faff806845bddee9',
 875                 'upload_date': '20151107',
 876                 'uploader_id': 'UCCr7TALkRbo3EtFzETQF1LA',
 877                 'uploader': 'CH GAMER DROID',
 878             },
 879             'params': {
 880                 'skip_download': True,
 881             },
 882             'skip': 'This video does not exist.',
 883         },
 884         {
 885             # Video licensed under Creative Commons
 886             'url': 'https://www.youtube.com/watch?v=M4gD1WSo5mA',
 887             'info_dict': {
 888                 'id': 'M4gD1WSo5mA',
 889                 'ext': 'mp4',
 890                 'title': 'md5:e41008789470fc2533a3252216f1c1d1',
 891                 'description': 'md5:a677553cf0840649b731a3024aeff4cc',
 892                 'duration': 721,
 893                 'upload_date': '20150127',
 894                 'uploader_id': 'BerkmanCenter',
 895                 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/BerkmanCenter',
 896                 'uploader': 'The Berkman Klein Center for Internet & Society',
 897                 'license': 'Creative Commons Attribution license (reuse allowed)',
 898             },
 899             'params': {
 900                 'skip_download': True,
 901             },
 902         },
 903         {
 904             # Channel-like uploader_url
 905             'url': 'https://www.youtube.com/watch?v=eQcmzGIKrzg',
 906             'info_dict': {
 907                 'id': 'eQcmzGIKrzg',
 908                 'ext': 'mp4',
 909                 'title': 'Democratic Socialism and Foreign Policy | Bernie Sanders',
 910                 'description': 'md5:dda0d780d5a6e120758d1711d062a867',
 911                 'duration': 4060,
 912                 'upload_date': '20151119',
 913                 'uploader': 'Bernie Sanders',
 914                 'uploader_id': 'UCH1dpzjCEiGAt8CXkryhkZg',
 915                 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UCH1dpzjCEiGAt8CXkryhkZg',
 916                 'license': 'Creative Commons Attribution license (reuse allowed)',
 917             },
 918             'params': {
 919                 'skip_download': True,
 920             },
 921         },
 922         {
 923             'url': 'https://www.youtube.com/watch?feature=player_embedded&amp;amp;v=V36LpHqtcDY',
 924             'only_matching': True,
 925         },
 926         {
 927             # YouTube Red paid video (https://github.com/ytdl-org/youtube-dl/issues/10059)
 928             'url': 'https://www.youtube.com/watch?v=i1Ko8UG-Tdo',
 929             'only_matching': True,
 930         },
 931         {
 932             # Rental video preview
 933             'url': 'https://www.youtube.com/watch?v=yYr8q0y5Jfg',
 934             'info_dict': {
 935                 'id': 'uGpuVWrhIzE',
 936                 'ext': 'mp4',
 937                 'title': 'Piku - Trailer',
 938                 'description': 'md5:c36bd60c3fd6f1954086c083c72092eb',
 939                 'upload_date': '20150811',
 940                 'uploader': 'FlixMatrix',
 941                 'uploader_id': 'FlixMatrixKaravan',
 942                 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/FlixMatrixKaravan',
 943                 'license': 'Standard YouTube License',
 944             },
 945             'params': {
 946                 'skip_download': True,
 947             },
 948             'skip': 'This video is not available.',
 949         },
 950         {
 951             # YouTube Red video with episode data
 952             'url': 'https://www.youtube.com/watch?v=iqKdEhx-dD4',
 953             'info_dict': {
 954                 'id': 'iqKdEhx-dD4',
 955                 'ext': 'mp4',
 956                 'title': 'Isolation - Mind Field (Ep 1)',
 957                 'description': 'md5:46a29be4ceffa65b92d277b93f463c0f',
 958                 'duration': 2085,
 959                 'upload_date': '20170118',
 960                 'uploader': 'Vsauce',
 961                 'uploader_id': 'Vsauce',
 962                 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/Vsauce',
 963                 'series': 'Mind Field',
 964                 'season_number': 1,
 965                 'episode_number': 1,
 966             },
 967             'params': {
 968                 'skip_download': True,
 969             },
 970             'expected_warnings': [
 971                 'Skipping DASH manifest',
 972             ],
 973         },
 974         {
 975             # The following content has been identified by the YouTube community
 976             # as inappropriate or offensive to some audiences.
 977             'url': 'https://www.youtube.com/watch?v=6SJNVb0GnPI',
 978             'info_dict': {
 979                 'id': '6SJNVb0GnPI',
 980                 'ext': 'mp4',
 981                 'title': 'Race Differences in Intelligence',
 982                 'description': 'md5:5d161533167390427a1f8ee89a1fc6f1',
 983                 'duration': 965,
 984                 'upload_date': '20140124',
 985                 'uploader': 'New Century Foundation',
 986                 'uploader_id': 'UCEJYpZGqgUob0zVVEaLhvVg',
 987                 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UCEJYpZGqgUob0zVVEaLhvVg',
 988             },
 989             'params': {
 990                 'skip_download': True,
 991             },
 992         },
 993         {
 994             # itag 212
 995             'url': '1t24XAntNCY',
 996             'only_matching': True,
 997         },
 998         {
 999             # geo restricted to JP
1000             'url': 'sJL6WA-aGkQ',
1001             'only_matching': True,
1002         },
1003         {
1004             'url': 'https://invidio.us/watch?v=BaW_jenozKc',
1005             'only_matching': True,
1006         },
1007         {
1008             # DRM protected
1009             'url': 'https://www.youtube.com/watch?v=s7_qI6_mIXc',
1010             'only_matching': True,
1011         },
1012         {
1013             # Video with unsupported adaptive stream type formats
1014             'url': 'https://www.youtube.com/watch?v=Z4Vy8R84T1U',
1015             'info_dict': {
1016                 'id': 'Z4Vy8R84T1U',
1017                 'ext': 'mp4',
1018                 'title': 'saman SMAN 53 Jakarta(Sancety) opening COFFEE4th at SMAN 53 Jakarta',
1019                 'description': 'md5:d41d8cd98f00b204e9800998ecf8427e',
1020                 'duration': 433,
1021                 'upload_date': '20130923',
1022                 'uploader': 'Amelia Putri Harwita',
1023                 'uploader_id': 'UCpOxM49HJxmC1qCalXyB3_Q',
1024                 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UCpOxM49HJxmC1qCalXyB3_Q',
1025                 'formats': 'maxcount:10',
1026             },
1027             'params': {
1028                 'skip_download': True,
1029                 'youtube_include_dash_manifest': False,
1030             },
1031             'skip': 'not actual anymore',
1032         },
1033         {
1034             # Youtube Music Auto-generated description
1035             'url': 'https://music.youtube.com/watch?v=MgNrAu2pzNs',
1036             'info_dict': {
1037                 'id': 'MgNrAu2pzNs',
1038                 'ext': 'mp4',
1039                 'title': 'Voyeur Girl',
1040                 'description': 'md5:7ae382a65843d6df2685993e90a8628f',
1041                 'upload_date': '20190312',
1042                 'uploader': 'Stephen - Topic',
1043                 'uploader_id': 'UC-pWHpBjdGG69N9mM2auIAA',
1044                 'artist': 'Stephen',
1045                 'track': 'Voyeur Girl',
1046                 'album': 'it\'s too much love to know my dear',
1047                 'release_date': '20190313',
1048                 'release_year': 2019,
1049             },
1050             'params': {
1051                 'skip_download': True,
1052             },
1053         },
1054         {
1055             'url': 'https://www.youtubekids.com/watch?v=3b8nCWDgZ6Q',
1056             'only_matching': True,
1057         },
1058         {
1059             # invalid -> valid video id redirection
1060             'url': 'DJztXj2GPfl',
1061             'info_dict': {
1062                 'id': 'DJztXj2GPfk',
1063                 'ext': 'mp4',
1064                 'title': 'Panjabi MC - Mundian To Bach Ke (The Dictator Soundtrack)',
1065                 'description': 'md5:bf577a41da97918e94fa9798d9228825',
1066                 'upload_date': '20090125',
1067                 'uploader': 'Prochorowka',
1068                 'uploader_id': 'Prochorowka',
1069                 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/Prochorowka',
1070                 'artist': 'Panjabi MC',
1071                 'track': 'Beware of the Boys (Mundian to Bach Ke) - Motivo Hi-Lectro Remix',
1072                 'album': 'Beware of the Boys (Mundian To Bach Ke)',
1073             },
1074             'params': {
1075                 'skip_download': True,
1076             },
1077         },
1078         {
1079             # empty description results in an empty string
1080             'url': 'https://www.youtube.com/watch?v=x41yOUIvK2k',
1081             'info_dict': {
1082                 'id': 'x41yOUIvK2k',
1083                 'ext': 'mp4',
1084                 'title': 'IMG 3456',
1085                 'description': '',
1086                 'upload_date': '20170613',
1087                 'uploader_id': 'ElevageOrVert',
1088                 'uploader': 'ElevageOrVert',
1089             },
1090             'params': {
1091                 'skip_download': True,
1092             },
1093         },
1094         {
1095             # with '};' inside yt initial data (see https://github.com/ytdl-org/youtube-dl/issues/27093)
1096             'url': 'https://www.youtube.com/watch?v=CHqg6qOn4no',
1097             'info_dict': {
1098                 'id': 'CHqg6qOn4no',
1099                 'ext': 'mp4',
1100                 'title': 'Part 77   Sort a list of simple types in c#',
1101                 'description': 'md5:b8746fa52e10cdbf47997903f13b20dc',
1102                 'upload_date': '20130831',
1103                 'uploader_id': 'kudvenkat',
1104                 'uploader': 'kudvenkat',
1105             },
1106             'params': {
1107                 'skip_download': True,
1108             },
1109         },
1110     ]
1111
1112     def __init__(self, *args, **kwargs):
1113         super(YoutubeIE, self).__init__(*args, **kwargs)
1114         self._player_cache = {}
1115
1116     def report_video_info_webpage_download(self, video_id):
1117         """Report attempt to download video info webpage."""
1118         self.to_screen('%s: Downloading video info webpage' % video_id)
1119
1120     def report_information_extraction(self, video_id):
1121         """Report attempt to extract video information."""
1122         self.to_screen('%s: Extracting video information' % video_id)
1123
1124     def report_unavailable_format(self, video_id, format):
1125         """Report extracted video URL."""
1126         self.to_screen('%s: Format %s not available' % (video_id, format))
1127
1128     def report_rtmp_download(self):
1129         """Indicate the download will use the RTMP protocol."""
1130         self.to_screen('RTMP download detected')
1131
1132     def _signature_cache_id(self, example_sig):
1133         """ Return a string representation of a signature """
1134         return '.'.join(compat_str(len(part)) for part in example_sig.split('.'))
1135
1136     @classmethod
1137     def _extract_player_info(cls, player_url):
1138         for player_re in cls._PLAYER_INFO_RE:
1139             id_m = re.search(player_re, player_url)
1140             if id_m:
1141                 break
1142         else:
1143             raise ExtractorError('Cannot identify player %r' % player_url)
1144         return id_m.group('ext'), id_m.group('id')
1145
1146     def _extract_signature_function(self, video_id, player_url, example_sig):
1147         player_type, player_id = self._extract_player_info(player_url)
1148
1149         # Read from filesystem cache
1150         func_id = '%s_%s_%s' % (
1151             player_type, player_id, self._signature_cache_id(example_sig))
1152         assert os.path.basename(func_id) == func_id
1153
1154         cache_spec = self._downloader.cache.load('youtube-sigfuncs', func_id)
1155         if cache_spec is not None:
1156             return lambda s: ''.join(s[i] for i in cache_spec)
1157
1158         download_note = (
1159             'Downloading player %s' % player_url
1160             if self._downloader.params.get('verbose') else
1161             'Downloading %s player %s' % (player_type, player_id)
1162         )
1163         if player_type == 'js':
1164             code = self._download_webpage(
1165                 player_url, video_id,
1166                 note=download_note,
1167                 errnote='Download of %s failed' % player_url)
1168             res = self._parse_sig_js(code)
1169         elif player_type == 'swf':
1170             urlh = self._request_webpage(
1171                 player_url, video_id,
1172                 note=download_note,
1173                 errnote='Download of %s failed' % player_url)
1174             code = urlh.read()
1175             res = self._parse_sig_swf(code)
1176         else:
1177             assert False, 'Invalid player type %r' % player_type
1178
1179         test_string = ''.join(map(compat_chr, range(len(example_sig))))
1180         cache_res = res(test_string)
1181         cache_spec = [ord(c) for c in cache_res]
1182
1183         self._downloader.cache.store('youtube-sigfuncs', func_id, cache_spec)
1184         return res
1185
1186     def _print_sig_code(self, func, example_sig):
1187         def gen_sig_code(idxs):
1188             def _genslice(start, end, step):
1189                 starts = '' if start == 0 else str(start)
1190                 ends = (':%d' % (end + step)) if end + step >= 0 else ':'
1191                 steps = '' if step == 1 else (':%d' % step)
1192                 return 's[%s%s%s]' % (starts, ends, steps)
1193
1194             step = None
1195             # Quelch pyflakes warnings - start will be set when step is set
1196             start = '(Never used)'
1197             for i, prev in zip(idxs[1:], idxs[:-1]):
1198                 if step is not None:
1199                     if i - prev == step:
1200                         continue
1201                     yield _genslice(start, prev, step)
1202                     step = None
1203                     continue
1204                 if i - prev in [-1, 1]:
1205                     step = i - prev
1206                     start = prev
1207                     continue
1208                 else:
1209                     yield 's[%d]' % prev
1210             if step is None:
1211                 yield 's[%d]' % i
1212             else:
1213                 yield _genslice(start, i, step)
1214
1215         test_string = ''.join(map(compat_chr, range(len(example_sig))))
1216         cache_res = func(test_string)
1217         cache_spec = [ord(c) for c in cache_res]
1218         expr_code = ' + '.join(gen_sig_code(cache_spec))
1219         signature_id_tuple = '(%s)' % (
1220             ', '.join(compat_str(len(p)) for p in example_sig.split('.')))
1221         code = ('if tuple(len(p) for p in s.split(\'.\')) == %s:\n'
1222                 '    return %s\n') % (signature_id_tuple, expr_code)
1223         self.to_screen('Extracted signature function:\n' + code)
1224
1225     def _parse_sig_js(self, jscode):
1226         funcname = self._search_regex(
1227             (r'\b[cs]\s*&&\s*[adf]\.set\([^,]+\s*,\s*encodeURIComponent\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\(',
1228              r'\b[a-zA-Z0-9]+\s*&&\s*[a-zA-Z0-9]+\.set\([^,]+\s*,\s*encodeURIComponent\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\(',
1229              r'(?:\b|[^a-zA-Z0-9$])(?P<sig>[a-zA-Z0-9$]{2})\s*=\s*function\(\s*a\s*\)\s*{\s*a\s*=\s*a\.split\(\s*""\s*\)',
1230              r'(?P<sig>[a-zA-Z0-9$]+)\s*=\s*function\(\s*a\s*\)\s*{\s*a\s*=\s*a\.split\(\s*""\s*\)',
1231              # Obsolete patterns
1232              r'(["\'])signature\1\s*,\s*(?P<sig>[a-zA-Z0-9$]+)\(',
1233              r'\.sig\|\|(?P<sig>[a-zA-Z0-9$]+)\(',
1234              r'yt\.akamaized\.net/\)\s*\|\|\s*.*?\s*[cs]\s*&&\s*[adf]\.set\([^,]+\s*,\s*(?:encodeURIComponent\s*\()?\s*(?P<sig>[a-zA-Z0-9$]+)\(',
1235              r'\b[cs]\s*&&\s*[adf]\.set\([^,]+\s*,\s*(?P<sig>[a-zA-Z0-9$]+)\(',
1236              r'\b[a-zA-Z0-9]+\s*&&\s*[a-zA-Z0-9]+\.set\([^,]+\s*,\s*(?P<sig>[a-zA-Z0-9$]+)\(',
1237              r'\bc\s*&&\s*a\.set\([^,]+\s*,\s*\([^)]*\)\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\(',
1238              r'\bc\s*&&\s*[a-zA-Z0-9]+\.set\([^,]+\s*,\s*\([^)]*\)\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\(',
1239              r'\bc\s*&&\s*[a-zA-Z0-9]+\.set\([^,]+\s*,\s*\([^)]*\)\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\('),
1240             jscode, 'Initial JS player signature function name', group='sig')
1241
1242         jsi = JSInterpreter(jscode)
1243         initial_function = jsi.extract_function(funcname)
1244         return lambda s: initial_function([s])
1245
1246     def _parse_sig_swf(self, file_contents):
1247         swfi = SWFInterpreter(file_contents)
1248         TARGET_CLASSNAME = 'SignatureDecipher'
1249         searched_class = swfi.extract_class(TARGET_CLASSNAME)
1250         initial_function = swfi.extract_function(searched_class, 'decipher')
1251         return lambda s: initial_function([s])
1252
1253     def _decrypt_signature(self, s, video_id, player_url, age_gate=False):
1254         """Turn the encrypted s field into a working signature"""
1255
1256         if player_url is None:
1257             raise ExtractorError('Cannot decrypt signature without player_url')
1258
1259         if player_url.startswith('//'):
1260             player_url = 'https:' + player_url
1261         elif not re.match(r'https?://', player_url):
1262             player_url = compat_urlparse.urljoin(
1263                 'https://www.youtube.com', player_url)
1264         try:
1265             player_id = (player_url, self._signature_cache_id(s))
1266             if player_id not in self._player_cache:
1267                 func = self._extract_signature_function(
1268                     video_id, player_url, s
1269                 )
1270                 self._player_cache[player_id] = func
1271             func = self._player_cache[player_id]
1272             if self._downloader.params.get('youtube_print_sig_code'):
1273                 self._print_sig_code(func, s)
1274             return func(s)
1275         except Exception as e:
1276             tb = traceback.format_exc()
1277             raise ExtractorError(
1278                 'Signature extraction failed: ' + tb, cause=e)
1279
1280     def _get_subtitles(self, video_id, webpage, has_live_chat_replay):
1281         try:
1282             subs_doc = self._download_xml(
1283                 'https://video.google.com/timedtext?hl=en&type=list&v=%s' % video_id,
1284                 video_id, note=False)
1285         except ExtractorError as err:
1286             self._downloader.report_warning('unable to download video subtitles: %s' % error_to_compat_str(err))
1287             return {}
1288
1289         sub_lang_list = {}
1290         for track in subs_doc.findall('track'):
1291             lang = track.attrib['lang_code']
1292             if lang in sub_lang_list:
1293                 continue
1294             sub_formats = []
1295             for ext in self._SUBTITLE_FORMATS:
1296                 params = compat_urllib_parse_urlencode({
1297                     'lang': lang,
1298                     'v': video_id,
1299                     'fmt': ext,
1300                     'name': track.attrib['name'].encode('utf-8'),
1301                 })
1302                 sub_formats.append({
1303                     'url': 'https://www.youtube.com/api/timedtext?' + params,
1304                     'ext': ext,
1305                 })
1306             sub_lang_list[lang] = sub_formats
1307         if has_live_chat_replay:
1308             sub_lang_list['live_chat'] = [
1309                 {
1310                     'video_id': video_id,
1311                     'ext': 'json',
1312                     'protocol': 'youtube_live_chat_replay',
1313                 },
1314             ]
1315         if not sub_lang_list:
1316             self._downloader.report_warning('video doesn\'t have subtitles')
1317             return {}
1318         return sub_lang_list
1319
1320     def _get_ytplayer_config(self, video_id, webpage):
1321         patterns = (
1322             # User data may contain arbitrary character sequences that may affect
1323             # JSON extraction with regex, e.g. when '};' is contained the second
1324             # regex won't capture the whole JSON. Yet working around by trying more
1325             # concrete regex first keeping in mind proper quoted string handling
1326             # to be implemented in future that will replace this workaround (see
1327             # https://github.com/ytdl-org/youtube-dl/issues/7468,
1328             # https://github.com/ytdl-org/youtube-dl/pull/7599)
1329             r';ytplayer\.config\s*=\s*({.+?});ytplayer',
1330             r';ytplayer\.config\s*=\s*({.+?});',
1331         )
1332         config = self._search_regex(
1333             patterns, webpage, 'ytplayer.config', default=None)
1334         if config:
1335             return self._parse_json(
1336                 uppercase_escape(config), video_id, fatal=False)
1337
1338     def _get_music_metadata_from_yt_initial(self, yt_initial):
1339         music_metadata = []
1340         key_map = {
1341             'Album': 'album',
1342             'Artist': 'artist',
1343             'Song': 'track'
1344         }
1345         contents = try_get(yt_initial, lambda x: x['contents']['twoColumnWatchNextResults']['results']['results']['contents'])
1346         if type(contents) is list:
1347             for content in contents:
1348                 music_track = {}
1349                 if type(content) is not dict:
1350                     continue
1351                 videoSecondaryInfoRenderer = try_get(content, lambda x: x['videoSecondaryInfoRenderer'])
1352                 if type(videoSecondaryInfoRenderer) is not dict:
1353                     continue
1354                 rows = try_get(videoSecondaryInfoRenderer, lambda x: x['metadataRowContainer']['metadataRowContainerRenderer']['rows'])
1355                 if type(rows) is not list:
1356                     continue
1357                 for row in rows:
1358                     metadataRowRenderer = try_get(row, lambda x: x['metadataRowRenderer'])
1359                     if type(metadataRowRenderer) is not dict:
1360                         continue
1361                     key = try_get(metadataRowRenderer, lambda x: x['title']['simpleText'])
1362                     value = try_get(metadataRowRenderer, lambda x: x['contents'][0]['simpleText']) or \
1363                         try_get(metadataRowRenderer, lambda x: x['contents'][0]['runs'][0]['text'])
1364                     if type(key) is not str or type(value) is not str:
1365                         continue
1366                     if key in key_map:
1367                         if key_map[key] in music_track:
1368                             # we've started on a new track
1369                             music_metadata.append(music_track)
1370                             music_track = {}
1371                         music_track[key_map[key]] = value
1372                 if len(music_track.keys()):
1373                     music_metadata.append(music_track)
1374         return music_metadata
1375
1376     def _get_automatic_captions(self, video_id, webpage):
1377         """We need the webpage for getting the captions url, pass it as an
1378            argument to speed up the process."""
1379         self.to_screen('%s: Looking for automatic captions' % video_id)
1380         player_config = self._get_ytplayer_config(video_id, webpage)
1381         err_msg = 'Couldn\'t find automatic captions for %s' % video_id
1382         if not player_config:
1383             self._downloader.report_warning(err_msg)
1384             return {}
1385         try:
1386             args = player_config['args']
1387             caption_url = args.get('ttsurl')
1388             if caption_url:
1389                 timestamp = args['timestamp']
1390                 # We get the available subtitles
1391                 list_params = compat_urllib_parse_urlencode({
1392                     'type': 'list',
1393                     'tlangs': 1,
1394                     'asrs': 1,
1395                 })
1396                 list_url = caption_url + '&' + list_params
1397                 caption_list = self._download_xml(list_url, video_id)
1398                 original_lang_node = caption_list.find('track')
1399                 if original_lang_node is None:
1400                     self._downloader.report_warning('Video doesn\'t have automatic captions')
1401                     return {}
1402                 original_lang = original_lang_node.attrib['lang_code']
1403                 caption_kind = original_lang_node.attrib.get('kind', '')
1404
1405                 sub_lang_list = {}
1406                 for lang_node in caption_list.findall('target'):
1407                     sub_lang = lang_node.attrib['lang_code']
1408                     sub_formats = []
1409                     for ext in self._SUBTITLE_FORMATS:
1410                         params = compat_urllib_parse_urlencode({
1411                             'lang': original_lang,
1412                             'tlang': sub_lang,
1413                             'fmt': ext,
1414                             'ts': timestamp,
1415                             'kind': caption_kind,
1416                         })
1417                         sub_formats.append({
1418                             'url': caption_url + '&' + params,
1419                             'ext': ext,
1420                         })
1421                     sub_lang_list[sub_lang] = sub_formats
1422                 return sub_lang_list
1423
1424             def make_captions(sub_url, sub_langs):
1425                 parsed_sub_url = compat_urllib_parse_urlparse(sub_url)
1426                 caption_qs = compat_parse_qs(parsed_sub_url.query)
1427                 captions = {}
1428                 for sub_lang in sub_langs:
1429                     sub_formats = []
1430                     for ext in self._SUBTITLE_FORMATS:
1431                         caption_qs.update({
1432                             'tlang': [sub_lang],
1433                             'fmt': [ext],
1434                         })
1435                         sub_url = compat_urlparse.urlunparse(parsed_sub_url._replace(
1436                             query=compat_urllib_parse_urlencode(caption_qs, True)))
1437                         sub_formats.append({
1438                             'url': sub_url,
1439                             'ext': ext,
1440                         })
1441                     captions[sub_lang] = sub_formats
1442                 return captions
1443
1444             # New captions format as of 22.06.2017
1445             player_response = args.get('player_response')
1446             if player_response and isinstance(player_response, compat_str):
1447                 player_response = self._parse_json(
1448                     player_response, video_id, fatal=False)
1449                 if player_response:
1450                     renderer = player_response['captions']['playerCaptionsTracklistRenderer']
1451                     base_url = renderer['captionTracks'][0]['baseUrl']
1452                     sub_lang_list = []
1453                     for lang in renderer['translationLanguages']:
1454                         lang_code = lang.get('languageCode')
1455                         if lang_code:
1456                             sub_lang_list.append(lang_code)
1457                     return make_captions(base_url, sub_lang_list)
1458
1459             # Some videos don't provide ttsurl but rather caption_tracks and
1460             # caption_translation_languages (e.g. 20LmZk1hakA)
1461             # Does not used anymore as of 22.06.2017
1462             caption_tracks = args['caption_tracks']
1463             caption_translation_languages = args['caption_translation_languages']
1464             caption_url = compat_parse_qs(caption_tracks.split(',')[0])['u'][0]
1465             sub_lang_list = []
1466             for lang in caption_translation_languages.split(','):
1467                 lang_qs = compat_parse_qs(compat_urllib_parse_unquote_plus(lang))
1468                 sub_lang = lang_qs.get('lc', [None])[0]
1469                 if sub_lang:
1470                     sub_lang_list.append(sub_lang)
1471             return make_captions(caption_url, sub_lang_list)
1472         # An extractor error can be raise by the download process if there are
1473         # no automatic captions but there are subtitles
1474         except (KeyError, IndexError, ExtractorError):
1475             self._downloader.report_warning(err_msg)
1476             return {}
1477
1478     def _mark_watched(self, video_id, video_info, player_response):
1479         playback_url = url_or_none(try_get(
1480             player_response,
1481             lambda x: x['playbackTracking']['videostatsPlaybackUrl']['baseUrl']) or try_get(
1482             video_info, lambda x: x['videostats_playback_base_url'][0]))
1483         if not playback_url:
1484             return
1485         parsed_playback_url = compat_urlparse.urlparse(playback_url)
1486         qs = compat_urlparse.parse_qs(parsed_playback_url.query)
1487
1488         # cpn generation algorithm is reverse engineered from base.js.
1489         # In fact it works even with dummy cpn.
1490         CPN_ALPHABET = 'abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789-_'
1491         cpn = ''.join((CPN_ALPHABET[random.randint(0, 256) & 63] for _ in range(0, 16)))
1492
1493         qs.update({
1494             'ver': ['2'],
1495             'cpn': [cpn],
1496         })
1497         playback_url = compat_urlparse.urlunparse(
1498             parsed_playback_url._replace(query=compat_urllib_parse_urlencode(qs, True)))
1499
1500         self._download_webpage(
1501             playback_url, video_id, 'Marking watched',
1502             'Unable to mark watched', fatal=False)
1503
1504     @staticmethod
1505     def _extract_urls(webpage):
1506         # Embedded YouTube player
1507         entries = [
1508             unescapeHTML(mobj.group('url'))
1509             for mobj in re.finditer(r'''(?x)
1510             (?:
1511                 <iframe[^>]+?src=|
1512                 data-video-url=|
1513                 <embed[^>]+?src=|
1514                 embedSWF\(?:\s*|
1515                 <object[^>]+data=|
1516                 new\s+SWFObject\(
1517             )
1518             (["\'])
1519                 (?P<url>(?:https?:)?//(?:www\.)?youtube(?:-nocookie)?\.com/
1520                 (?:embed|v|p)/[0-9A-Za-z_-]{11}.*?)
1521             \1''', webpage)]
1522
1523         # lazyYT YouTube embed
1524         entries.extend(list(map(
1525             unescapeHTML,
1526             re.findall(r'class="lazyYT" data-youtube-id="([^"]+)"', webpage))))
1527
1528         # Wordpress "YouTube Video Importer" plugin
1529         matches = re.findall(r'''(?x)<div[^>]+
1530             class=(?P<q1>[\'"])[^\'"]*\byvii_single_video_player\b[^\'"]*(?P=q1)[^>]+
1531             data-video_id=(?P<q2>[\'"])([^\'"]+)(?P=q2)''', webpage)
1532         entries.extend(m[-1] for m in matches)
1533
1534         return entries
1535
1536     @staticmethod
1537     def _extract_url(webpage):
1538         urls = YoutubeIE._extract_urls(webpage)
1539         return urls[0] if urls else None
1540
1541     @classmethod
1542     def extract_id(cls, url):
1543         mobj = re.match(cls._VALID_URL, url, re.VERBOSE)
1544         if mobj is None:
1545             raise ExtractorError('Invalid URL: %s' % url)
1546         video_id = mobj.group(2)
1547         return video_id
1548
1549     def _extract_chapters_from_json(self, webpage, video_id, duration):
1550         if not webpage:
1551             return
1552         data = self._extract_yt_initial_data(video_id, webpage)
1553         if not data or not isinstance(data, dict):
1554             return
1555         chapters_list = try_get(
1556             data,
1557             lambda x: x['playerOverlays']
1558                        ['playerOverlayRenderer']
1559                        ['decoratedPlayerBarRenderer']
1560                        ['decoratedPlayerBarRenderer']
1561                        ['playerBar']
1562                        ['chapteredPlayerBarRenderer']
1563                        ['chapters'],
1564             list)
1565         if not chapters_list:
1566             return
1567
1568         def chapter_time(chapter):
1569             return float_or_none(
1570                 try_get(
1571                     chapter,
1572                     lambda x: x['chapterRenderer']['timeRangeStartMillis'],
1573                     int),
1574                 scale=1000)
1575         chapters = []
1576         for next_num, chapter in enumerate(chapters_list, start=1):
1577             start_time = chapter_time(chapter)
1578             if start_time is None:
1579                 continue
1580             end_time = (chapter_time(chapters_list[next_num])
1581                         if next_num < len(chapters_list) else duration)
1582             if end_time is None:
1583                 continue
1584             title = try_get(
1585                 chapter, lambda x: x['chapterRenderer']['title']['simpleText'],
1586                 compat_str)
1587             chapters.append({
1588                 'start_time': start_time,
1589                 'end_time': end_time,
1590                 'title': title,
1591             })
1592         return chapters
1593
1594     @staticmethod
1595     def _extract_chapters_from_description(description, duration):
1596         if not description:
1597             return None
1598         chapter_lines = re.findall(
1599             r'(?:^|<br\s*/>)([^<]*<a[^>]+onclick=["\']yt\.www\.watch\.player\.seekTo[^>]+>(\d{1,2}:\d{1,2}(?::\d{1,2})?)</a>[^>]*)(?=$|<br\s*/>)',
1600             description)
1601         if not chapter_lines:
1602             return None
1603         chapters = []
1604         for next_num, (chapter_line, time_point) in enumerate(
1605                 chapter_lines, start=1):
1606             start_time = parse_duration(time_point)
1607             if start_time is None:
1608                 continue
1609             if start_time > duration:
1610                 break
1611             end_time = (duration if next_num == len(chapter_lines)
1612                         else parse_duration(chapter_lines[next_num][1]))
1613             if end_time is None:
1614                 continue
1615             if end_time > duration:
1616                 end_time = duration
1617             if start_time > end_time:
1618                 break
1619             chapter_title = re.sub(
1620                 r'<a[^>]+>[^<]+</a>', '', chapter_line).strip(' \t-')
1621             chapter_title = re.sub(r'\s+', ' ', chapter_title)
1622             chapters.append({
1623                 'start_time': start_time,
1624                 'end_time': end_time,
1625                 'title': chapter_title,
1626             })
1627         return chapters
1628
1629     def _extract_chapters(self, webpage, description, video_id, duration):
1630         return (self._extract_chapters_from_json(webpage, video_id, duration)
1631                 or self._extract_chapters_from_description(description, duration))
1632
1633     def _real_extract(self, url):
1634         url, smuggled_data = unsmuggle_url(url, {})
1635
1636         proto = (
1637             'http' if self._downloader.params.get('prefer_insecure', False)
1638             else 'https')
1639
1640         start_time = None
1641         end_time = None
1642         parsed_url = compat_urllib_parse_urlparse(url)
1643         for component in [parsed_url.fragment, parsed_url.query]:
1644             query = compat_parse_qs(component)
1645             if start_time is None and 't' in query:
1646                 start_time = parse_duration(query['t'][0])
1647             if start_time is None and 'start' in query:
1648                 start_time = parse_duration(query['start'][0])
1649             if end_time is None and 'end' in query:
1650                 end_time = parse_duration(query['end'][0])
1651
1652         # Extract original video URL from URL with redirection, like age verification, using next_url parameter
1653         mobj = re.search(self._NEXT_URL_RE, url)
1654         if mobj:
1655             url = proto + '://www.youtube.com/' + compat_urllib_parse_unquote(mobj.group(1)).lstrip('/')
1656         video_id = self.extract_id(url)
1657
1658         # Get video webpage
1659         url = proto + '://www.youtube.com/watch?v=%s&gl=US&hl=en&has_verified=1&bpctr=9999999999' % video_id
1660         video_webpage, urlh = self._download_webpage_handle(url, video_id)
1661
1662         qs = compat_parse_qs(compat_urllib_parse_urlparse(urlh.geturl()).query)
1663         video_id = qs.get('v', [None])[0] or video_id
1664
1665         # Attempt to extract SWF player URL
1666         mobj = re.search(r'swfConfig.*?"(https?:\\/\\/.*?watch.*?-.*?\.swf)"', video_webpage)
1667         if mobj is not None:
1668             player_url = re.sub(r'\\(.)', r'\1', mobj.group(1))
1669         else:
1670             player_url = None
1671
1672         dash_mpds = []
1673
1674         def add_dash_mpd(video_info):
1675             dash_mpd = video_info.get('dashmpd')
1676             if dash_mpd and dash_mpd[0] not in dash_mpds:
1677                 dash_mpds.append(dash_mpd[0])
1678
1679         def add_dash_mpd_pr(pl_response):
1680             dash_mpd = url_or_none(try_get(
1681                 pl_response, lambda x: x['streamingData']['dashManifestUrl'],
1682                 compat_str))
1683             if dash_mpd and dash_mpd not in dash_mpds:
1684                 dash_mpds.append(dash_mpd)
1685
1686         is_live = None
1687         view_count = None
1688
1689         def extract_view_count(v_info):
1690             return int_or_none(try_get(v_info, lambda x: x['view_count'][0]))
1691
1692         def extract_player_response(player_response, video_id):
1693             pl_response = str_or_none(player_response)
1694             if not pl_response:
1695                 return
1696             pl_response = self._parse_json(pl_response, video_id, fatal=False)
1697             if isinstance(pl_response, dict):
1698                 add_dash_mpd_pr(pl_response)
1699                 return pl_response
1700
1701         def extract_embedded_config(embed_webpage, video_id):
1702             embedded_config = self._search_regex(
1703                 r'setConfig\(({.*})\);',
1704                 embed_webpage, 'ytInitialData', default=None)
1705             if embedded_config:
1706                 return embedded_config
1707
1708         player_response = {}
1709
1710         # Get video info
1711         video_info = {}
1712         embed_webpage = None
1713         if (self._og_search_property('restrictions:age', video_webpage, default=None) == '18+'
1714                 or re.search(r'player-age-gate-content">', video_webpage) is not None):
1715             cookie_keys = self._get_cookies('https://www.youtube.com').keys()
1716             age_gate = True
1717             # We simulate the access to the video from www.youtube.com/v/{video_id}
1718             # this can be viewed without login into Youtube
1719             url = proto + '://www.youtube.com/embed/%s' % video_id
1720             embed_webpage = self._download_webpage(url, video_id, 'Downloading embed webpage')
1721             ext = extract_embedded_config(embed_webpage, video_id)
1722             # playabilityStatus = re.search(r'{\\\"status\\\":\\\"(?P<playabilityStatus>[^\"]+)\\\"', ext)
1723             playable_in_embed = re.search(r'{\\\"playableInEmbed\\\":(?P<playableinEmbed>[^\,]+)', ext)
1724             if not playable_in_embed:
1725                 self.to_screen('Could not determine whether playabale in embed for video %s' % video_id)
1726                 playable_in_embed = ''
1727             else:
1728                 playable_in_embed = playable_in_embed.group('playableinEmbed')
1729             # check if video is only playable on youtube in other words not playable in embed - if so it requires auth (cookies)
1730             # if re.search(r'player-unavailable">', embed_webpage) is not None:
1731             if playable_in_embed == 'false':
1732                 '''
1733                 # TODO apply this patch when Support for Python 2.6(!) and above drops
1734                 if ({'VISITOR_INFO1_LIVE', 'HSID', 'SSID', 'SID'} <= cookie_keys
1735                         or {'VISITOR_INFO1_LIVE', '__Secure-3PSID', 'LOGIN_INFO'} <= cookie_keys):
1736                 '''
1737                 if (set(('VISITOR_INFO1_LIVE', 'HSID', 'SSID', 'SID')) <= set(cookie_keys)
1738                         or set(('VISITOR_INFO1_LIVE', '__Secure-3PSID', 'LOGIN_INFO')) <= set(cookie_keys)):
1739                     age_gate = False
1740                     # Try looking directly into the video webpage
1741                     ytplayer_config = self._get_ytplayer_config(video_id, video_webpage)
1742                     if ytplayer_config:
1743                         args = ytplayer_config.get("args")
1744                         if args is not None:
1745                             if args.get('url_encoded_fmt_stream_map') or args.get('hlsvp'):
1746                                 # Convert to the same format returned by compat_parse_qs
1747                                 video_info = dict((k, [v]) for k, v in args.items())
1748                                 add_dash_mpd(video_info)
1749                             # Rental video is not rented but preview is available (e.g.
1750                             # https://www.youtube.com/watch?v=yYr8q0y5Jfg,
1751                             # https://github.com/ytdl-org/youtube-dl/issues/10532)
1752                             if not video_info and args.get('ypc_vid'):
1753                                 return self.url_result(
1754                                     args['ypc_vid'], YoutubeIE.ie_key(), video_id=args['ypc_vid'])
1755                             if args.get('livestream') == '1' or args.get('live_playback') == 1:
1756                                 is_live = True
1757                             if not player_response:
1758                                 player_response = extract_player_response(args.get('player_response'), video_id)
1759                         elif not player_response:
1760                             player_response = ytplayer_config
1761                     if not video_info or self._downloader.params.get('youtube_include_dash_manifest', True):
1762                         add_dash_mpd_pr(player_response)
1763                 else:
1764                     raise ExtractorError('Video is age restricted and only playable on Youtube. Requires cookies!', expected=True)
1765             else:
1766                 data = compat_urllib_parse_urlencode({
1767                     'video_id': video_id,
1768                     'eurl': 'https://youtube.googleapis.com/v/' + video_id,
1769                     'sts': self._search_regex(
1770                         r'"sts"\s*:\s*(\d+)', embed_webpage, 'sts', default=''),
1771                 })
1772                 video_info_url = proto + '://www.youtube.com/get_video_info?' + data
1773                 try:
1774                     video_info_webpage = self._download_webpage(
1775                         video_info_url, video_id,
1776                         note='Refetching age-gated info webpage',
1777                         errnote='unable to download video info webpage')
1778                 except ExtractorError:
1779                     video_info_webpage = None
1780                 if video_info_webpage:
1781                     video_info = compat_parse_qs(video_info_webpage)
1782                     pl_response = video_info.get('player_response', [None])[0]
1783                     player_response = extract_player_response(pl_response, video_id)
1784                     add_dash_mpd(video_info)
1785                     view_count = extract_view_count(video_info)
1786         else:
1787             age_gate = False
1788             # Try looking directly into the video webpage
1789             ytplayer_config = self._get_ytplayer_config(video_id, video_webpage)
1790             if ytplayer_config:
1791                 args = ytplayer_config.get('args', {})
1792                 if args.get('url_encoded_fmt_stream_map') or args.get('hlsvp'):
1793                     # Convert to the same format returned by compat_parse_qs
1794                     video_info = dict((k, [v]) for k, v in args.items())
1795                     add_dash_mpd(video_info)
1796                 # Rental video is not rented but preview is available (e.g.
1797                 # https://www.youtube.com/watch?v=yYr8q0y5Jfg,
1798                 # https://github.com/ytdl-org/youtube-dl/issues/10532)
1799                 if not video_info and args.get('ypc_vid'):
1800                     return self.url_result(
1801                         args['ypc_vid'], YoutubeIE.ie_key(), video_id=args['ypc_vid'])
1802                 if args.get('livestream') == '1' or args.get('live_playback') == 1:
1803                     is_live = True
1804                 if not player_response:
1805                     player_response = extract_player_response(args.get('player_response'), video_id)
1806             if not video_info or self._downloader.params.get('youtube_include_dash_manifest', True):
1807                 add_dash_mpd_pr(player_response)
1808
1809         if not video_info and not player_response:
1810             player_response = extract_player_response(
1811                 self._search_regex(
1812                     r'ytInitialPlayerResponse\s*=\s*({.+?})\s*;', video_webpage,
1813                     'initial player response', default='{}'),
1814                 video_id)
1815
1816         def extract_unavailable_message():
1817             messages = []
1818             for tag, kind in (('h1', 'message'), ('div', 'submessage')):
1819                 msg = self._html_search_regex(
1820                     r'(?s)<{tag}[^>]+id=["\']unavailable-{kind}["\'][^>]*>(.+?)</{tag}>'.format(tag=tag, kind=kind),
1821                     video_webpage, 'unavailable %s' % kind, default=None)
1822                 if msg:
1823                     messages.append(msg)
1824             if messages:
1825                 return '\n'.join(messages)
1826
1827         if not video_info and not player_response:
1828             unavailable_message = extract_unavailable_message()
1829             if not unavailable_message:
1830                 unavailable_message = 'Unable to extract video data'
1831             raise ExtractorError(
1832                 'YouTube said: %s' % unavailable_message, expected=True, video_id=video_id)
1833
1834         if not isinstance(video_info, dict):
1835             video_info = {}
1836
1837         video_details = try_get(
1838             player_response, lambda x: x['videoDetails'], dict) or {}
1839
1840         microformat = try_get(
1841             player_response, lambda x: x['microformat']['playerMicroformatRenderer'], dict) or {}
1842
1843         video_title = video_info.get('title', [None])[0] or video_details.get('title')
1844         if not video_title:
1845             self._downloader.report_warning('Unable to extract video title')
1846             video_title = '_'
1847
1848         description_original = video_description = get_element_by_id("eow-description", video_webpage)
1849         if video_description:
1850
1851             def replace_url(m):
1852                 redir_url = compat_urlparse.urljoin(url, m.group(1))
1853                 parsed_redir_url = compat_urllib_parse_urlparse(redir_url)
1854                 if re.search(r'^(?:www\.)?(?:youtube(?:-nocookie)?\.com|youtu\.be)$', parsed_redir_url.netloc) and parsed_redir_url.path == '/redirect':
1855                     qs = compat_parse_qs(parsed_redir_url.query)
1856                     q = qs.get('q')
1857                     if q and q[0]:
1858                         return q[0]
1859                 return redir_url
1860
1861             description_original = video_description = re.sub(r'''(?x)
1862                 <a\s+
1863                     (?:[a-zA-Z-]+="[^"]*"\s+)*?
1864                     (?:title|href)="([^"]+)"\s+
1865                     (?:[a-zA-Z-]+="[^"]*"\s+)*?
1866                     class="[^"]*"[^>]*>
1867                 [^<]+\.{3}\s*
1868                 </a>
1869             ''', replace_url, video_description)
1870             video_description = clean_html(video_description)
1871         else:
1872             video_description = video_details.get('shortDescription')
1873             if video_description is None:
1874                 video_description = self._html_search_meta('description', video_webpage)
1875
1876         if not smuggled_data.get('force_singlefeed', False):
1877             if not self._downloader.params.get('noplaylist'):
1878                 multifeed_metadata_list = try_get(
1879                     player_response,
1880                     lambda x: x['multicamera']['playerLegacyMulticameraRenderer']['metadataList'],
1881                     compat_str) or try_get(
1882                     video_info, lambda x: x['multifeed_metadata_list'][0], compat_str)
1883                 if multifeed_metadata_list:
1884                     entries = []
1885                     feed_ids = []
1886                     for feed in multifeed_metadata_list.split(','):
1887                         # Unquote should take place before split on comma (,) since textual
1888                         # fields may contain comma as well (see
1889                         # https://github.com/ytdl-org/youtube-dl/issues/8536)
1890                         feed_data = compat_parse_qs(compat_urllib_parse_unquote_plus(feed))
1891
1892                         def feed_entry(name):
1893                             return try_get(feed_data, lambda x: x[name][0], compat_str)
1894
1895                         feed_id = feed_entry('id')
1896                         if not feed_id:
1897                             continue
1898                         feed_title = feed_entry('title')
1899                         title = video_title
1900                         if feed_title:
1901                             title += ' (%s)' % feed_title
1902                         entries.append({
1903                             '_type': 'url_transparent',
1904                             'ie_key': 'Youtube',
1905                             'url': smuggle_url(
1906                                 '%s://www.youtube.com/watch?v=%s' % (proto, feed_data['id'][0]),
1907                                 {'force_singlefeed': True}),
1908                             'title': title,
1909                         })
1910                         feed_ids.append(feed_id)
1911                     self.to_screen(
1912                         'Downloading multifeed video (%s) - add --no-playlist to just download video %s'
1913                         % (', '.join(feed_ids), video_id))
1914                     return self.playlist_result(entries, video_id, video_title, video_description)
1915             else:
1916                 self.to_screen('Downloading just video %s because of --no-playlist' % video_id)
1917
1918         if view_count is None:
1919             view_count = extract_view_count(video_info)
1920         if view_count is None and video_details:
1921             view_count = int_or_none(video_details.get('viewCount'))
1922         if view_count is None and microformat:
1923             view_count = int_or_none(microformat.get('viewCount'))
1924
1925         if is_live is None:
1926             is_live = bool_or_none(video_details.get('isLive'))
1927
1928         has_live_chat_replay = False
1929         if not is_live:
1930             yt_initial_data = self._get_yt_initial_data(video_id, video_webpage)
1931             try:
1932                 yt_initial_data['contents']['twoColumnWatchNextResults']['conversationBar']['liveChatRenderer']['continuations'][0]['reloadContinuationData']['continuation']
1933                 has_live_chat_replay = True
1934             except (KeyError, IndexError, TypeError):
1935                 pass
1936
1937         # Check for "rental" videos
1938         if 'ypc_video_rental_bar_text' in video_info and 'author' not in video_info:
1939             raise ExtractorError('"rental" videos not supported. See https://github.com/ytdl-org/youtube-dl/issues/359 for more information.', expected=True)
1940
1941         def _extract_filesize(media_url):
1942             return int_or_none(self._search_regex(
1943                 r'\bclen[=/](\d+)', media_url, 'filesize', default=None))
1944
1945         streaming_formats = try_get(player_response, lambda x: x['streamingData']['formats'], list) or []
1946         streaming_formats.extend(try_get(player_response, lambda x: x['streamingData']['adaptiveFormats'], list) or [])
1947
1948         if 'conn' in video_info and video_info['conn'][0].startswith('rtmp'):
1949             self.report_rtmp_download()
1950             formats = [{
1951                 'format_id': '_rtmp',
1952                 'protocol': 'rtmp',
1953                 'url': video_info['conn'][0],
1954                 'player_url': player_url,
1955             }]
1956         elif not is_live and (streaming_formats or len(video_info.get('url_encoded_fmt_stream_map', [''])[0]) >= 1 or len(video_info.get('adaptive_fmts', [''])[0]) >= 1):
1957             encoded_url_map = video_info.get('url_encoded_fmt_stream_map', [''])[0] + ',' + video_info.get('adaptive_fmts', [''])[0]
1958             if 'rtmpe%3Dyes' in encoded_url_map:
1959                 raise ExtractorError('rtmpe downloads are not supported, see https://github.com/ytdl-org/youtube-dl/issues/343 for more information.', expected=True)
1960             formats = []
1961             formats_spec = {}
1962             fmt_list = video_info.get('fmt_list', [''])[0]
1963             if fmt_list:
1964                 for fmt in fmt_list.split(','):
1965                     spec = fmt.split('/')
1966                     if len(spec) > 1:
1967                         width_height = spec[1].split('x')
1968                         if len(width_height) == 2:
1969                             formats_spec[spec[0]] = {
1970                                 'resolution': spec[1],
1971                                 'width': int_or_none(width_height[0]),
1972                                 'height': int_or_none(width_height[1]),
1973                             }
1974             for fmt in streaming_formats:
1975                 itag = str_or_none(fmt.get('itag'))
1976                 if not itag:
1977                     continue
1978                 quality = fmt.get('quality')
1979                 quality_label = fmt.get('qualityLabel') or quality
1980                 formats_spec[itag] = {
1981                     'asr': int_or_none(fmt.get('audioSampleRate')),
1982                     'filesize': int_or_none(fmt.get('contentLength')),
1983                     'format_note': quality_label,
1984                     'fps': int_or_none(fmt.get('fps')),
1985                     'height': int_or_none(fmt.get('height')),
1986                     # bitrate for itag 43 is always 2147483647
1987                     'tbr': float_or_none(fmt.get('averageBitrate') or fmt.get('bitrate'), 1000) if itag != '43' else None,
1988                     'width': int_or_none(fmt.get('width')),
1989                 }
1990
1991             for fmt in streaming_formats:
1992                 if fmt.get('drmFamilies') or fmt.get('drm_families'):
1993                     continue
1994                 url = url_or_none(fmt.get('url'))
1995
1996                 if not url:
1997                     cipher = fmt.get('cipher') or fmt.get('signatureCipher')
1998                     if not cipher:
1999                         continue
2000                     url_data = compat_parse_qs(cipher)
2001                     url = url_or_none(try_get(url_data, lambda x: x['url'][0], compat_str))
2002                     if not url:
2003                         continue
2004                 else:
2005                     cipher = None
2006                     url_data = compat_parse_qs(compat_urllib_parse_urlparse(url).query)
2007
2008                 stream_type = int_or_none(try_get(url_data, lambda x: x['stream_type'][0]))
2009                 # Unsupported FORMAT_STREAM_TYPE_OTF
2010                 if stream_type == 3:
2011                     continue
2012
2013                 format_id = fmt.get('itag') or url_data['itag'][0]
2014                 if not format_id:
2015                     continue
2016                 format_id = compat_str(format_id)
2017
2018                 if cipher:
2019                     if 's' in url_data or self._downloader.params.get('youtube_include_dash_manifest', True):
2020                         ASSETS_RE = (
2021                             r'<script[^>]+\bsrc=("[^"]+")[^>]+\bname=["\']player_ias/base',
2022                             r'"jsUrl"\s*:\s*("[^"]+")',
2023                             r'"assets":.+?"js":\s*("[^"]+")')
2024                         jsplayer_url_json = self._search_regex(
2025                             ASSETS_RE,
2026                             embed_webpage if age_gate else video_webpage,
2027                             'JS player URL (1)', default=None)
2028                         if not jsplayer_url_json and not age_gate:
2029                             # We need the embed website after all
2030                             if embed_webpage is None:
2031                                 embed_url = proto + '://www.youtube.com/embed/%s' % video_id
2032                                 embed_webpage = self._download_webpage(
2033                                     embed_url, video_id, 'Downloading embed webpage')
2034                             jsplayer_url_json = self._search_regex(
2035                                 ASSETS_RE, embed_webpage, 'JS player URL')
2036
2037                         player_url = json.loads(jsplayer_url_json)
2038                         if player_url is None:
2039                             player_url_json = self._search_regex(
2040                                 r'ytplayer\.config.*?"url"\s*:\s*("[^"]+")',
2041                                 video_webpage, 'age gate player URL')
2042                             player_url = json.loads(player_url_json)
2043
2044                     if 'sig' in url_data:
2045                         url += '&signature=' + url_data['sig'][0]
2046                     elif 's' in url_data:
2047                         encrypted_sig = url_data['s'][0]
2048
2049                         if self._downloader.params.get('verbose'):
2050                             if player_url is None:
2051                                 player_desc = 'unknown'
2052                             else:
2053                                 player_type, player_version = self._extract_player_info(player_url)
2054                                 player_desc = '%s player %s' % ('flash' if player_type == 'swf' else 'html5', player_version)
2055                             parts_sizes = self._signature_cache_id(encrypted_sig)
2056                             self.to_screen('{%s} signature length %s, %s' %
2057                                            (format_id, parts_sizes, player_desc))
2058
2059                         signature = self._decrypt_signature(
2060                             encrypted_sig, video_id, player_url, age_gate)
2061                         sp = try_get(url_data, lambda x: x['sp'][0], compat_str) or 'signature'
2062                         url += '&%s=%s' % (sp, signature)
2063                 if 'ratebypass' not in url:
2064                     url += '&ratebypass=yes'
2065
2066                 dct = {
2067                     'format_id': format_id,
2068                     'url': url,
2069                     'player_url': player_url,
2070                 }
2071                 if format_id in self._formats:
2072                     dct.update(self._formats[format_id])
2073                 if format_id in formats_spec:
2074                     dct.update(formats_spec[format_id])
2075
2076                 # Some itags are not included in DASH manifest thus corresponding formats will
2077                 # lack metadata (see https://github.com/ytdl-org/youtube-dl/pull/5993).
2078                 # Trying to extract metadata from url_encoded_fmt_stream_map entry.
2079                 mobj = re.search(r'^(?P<width>\d+)[xX](?P<height>\d+)$', url_data.get('size', [''])[0])
2080                 width, height = (int(mobj.group('width')), int(mobj.group('height'))) if mobj else (None, None)
2081
2082                 if width is None:
2083                     width = int_or_none(fmt.get('width'))
2084                 if height is None:
2085                     height = int_or_none(fmt.get('height'))
2086
2087                 filesize = int_or_none(url_data.get(
2088                     'clen', [None])[0]) or _extract_filesize(url)
2089
2090                 quality = url_data.get('quality', [None])[0] or fmt.get('quality')
2091                 quality_label = url_data.get('quality_label', [None])[0] or fmt.get('qualityLabel')
2092
2093                 tbr = (float_or_none(url_data.get('bitrate', [None])[0], 1000)
2094                        or float_or_none(fmt.get('bitrate'), 1000)) if format_id != '43' else None
2095                 fps = int_or_none(url_data.get('fps', [None])[0]) or int_or_none(fmt.get('fps'))
2096
2097                 more_fields = {
2098                     'filesize': filesize,
2099                     'tbr': tbr,
2100                     'width': width,
2101                     'height': height,
2102                     'fps': fps,
2103                     'format_note': quality_label or quality,
2104                 }
2105                 for key, value in more_fields.items():
2106                     if value:
2107                         dct[key] = value
2108                 type_ = url_data.get('type', [None])[0] or fmt.get('mimeType')
2109                 if type_:
2110                     type_split = type_.split(';')
2111                     kind_ext = type_split[0].split('/')
2112                     if len(kind_ext) == 2:
2113                         kind, _ = kind_ext
2114                         dct['ext'] = mimetype2ext(type_split[0])
2115                         if kind in ('audio', 'video'):
2116                             codecs = None
2117                             for mobj in re.finditer(
2118                                     r'(?P<key>[a-zA-Z_-]+)=(?P<quote>["\']?)(?P<val>.+?)(?P=quote)(?:;|$)', type_):
2119                                 if mobj.group('key') == 'codecs':
2120                                     codecs = mobj.group('val')
2121                                     break
2122                             if codecs:
2123                                 dct.update(parse_codecs(codecs))
2124                 if dct.get('acodec') == 'none' or dct.get('vcodec') == 'none':
2125                     dct['downloader_options'] = {
2126                         # Youtube throttles chunks >~10M
2127                         'http_chunk_size': 10485760,
2128                     }
2129                 formats.append(dct)
2130         else:
2131             manifest_url = (
2132                 url_or_none(try_get(
2133                     player_response,
2134                     lambda x: x['streamingData']['hlsManifestUrl'],
2135                     compat_str))
2136                 or url_or_none(try_get(
2137                     video_info, lambda x: x['hlsvp'][0], compat_str)))
2138             if manifest_url:
2139                 formats = []
2140                 m3u8_formats = self._extract_m3u8_formats(
2141                     manifest_url, video_id, 'mp4', fatal=False)
2142                 for a_format in m3u8_formats:
2143                     itag = self._search_regex(
2144                         r'/itag/(\d+)/', a_format['url'], 'itag', default=None)
2145                     if itag:
2146                         a_format['format_id'] = itag
2147                         if itag in self._formats:
2148                             dct = self._formats[itag].copy()
2149                             dct.update(a_format)
2150                             a_format = dct
2151                     a_format['player_url'] = player_url
2152                     # Accept-Encoding header causes failures in live streams on Youtube and Youtube Gaming
2153                     a_format.setdefault('http_headers', {})['Youtubedl-no-compression'] = 'True'
2154                     if self._downloader.params.get('youtube_include_hls_manifest', True):
2155                         formats.append(a_format)
2156             else:
2157                 error_message = extract_unavailable_message()
2158                 if not error_message:
2159                     reason_list = try_get(
2160                         player_response,
2161                         lambda x: x['playabilityStatus']['errorScreen']['playerErrorMessageRenderer']['subreason']['runs'],
2162                         list) or []
2163                     for reason in reason_list:
2164                         if not isinstance(reason, dict):
2165                             continue
2166                         reason_text = try_get(reason, lambda x: x['text'], compat_str)
2167                         if reason_text:
2168                             if not error_message:
2169                                 error_message = ''
2170                             error_message += reason_text
2171                     if error_message:
2172                         error_message = clean_html(error_message)
2173                 if not error_message:
2174                     error_message = clean_html(try_get(
2175                         player_response, lambda x: x['playabilityStatus']['reason'],
2176                         compat_str))
2177                 if not error_message:
2178                     error_message = clean_html(
2179                         try_get(video_info, lambda x: x['reason'][0], compat_str))
2180                 if error_message:
2181                     raise ExtractorError(error_message, expected=True)
2182                 raise ExtractorError('no conn, hlsvp, hlsManifestUrl or url_encoded_fmt_stream_map information found in video info')
2183
2184         # uploader
2185         video_uploader = try_get(
2186             video_info, lambda x: x['author'][0],
2187             compat_str) or str_or_none(video_details.get('author'))
2188         if video_uploader:
2189             video_uploader = compat_urllib_parse_unquote_plus(video_uploader)
2190         else:
2191             self._downloader.report_warning('unable to extract uploader name')
2192
2193         # uploader_id
2194         video_uploader_id = None
2195         video_uploader_url = None
2196         mobj = re.search(
2197             r'<link itemprop="url" href="(?P<uploader_url>https?://www\.youtube\.com/(?:user|channel)/(?P<uploader_id>[^"]+))">',
2198             video_webpage)
2199         if mobj is not None:
2200             video_uploader_id = mobj.group('uploader_id')
2201             video_uploader_url = mobj.group('uploader_url')
2202         else:
2203             owner_profile_url = url_or_none(microformat.get('ownerProfileUrl'))
2204             if owner_profile_url:
2205                 video_uploader_id = self._search_regex(
2206                     r'(?:user|channel)/([^/]+)', owner_profile_url, 'uploader id',
2207                     default=None)
2208                 video_uploader_url = owner_profile_url
2209
2210         channel_id = (
2211             str_or_none(video_details.get('channelId'))
2212             or self._html_search_meta(
2213                 'channelId', video_webpage, 'channel id', default=None)
2214             or self._search_regex(
2215                 r'data-channel-external-id=(["\'])(?P<id>(?:(?!\1).)+)\1',
2216                 video_webpage, 'channel id', default=None, group='id'))
2217         channel_url = 'http://www.youtube.com/channel/%s' % channel_id if channel_id else None
2218
2219         thumbnails = []
2220         thumbnails_list = try_get(
2221             video_details, lambda x: x['thumbnail']['thumbnails'], list) or []
2222         for t in thumbnails_list:
2223             if not isinstance(t, dict):
2224                 continue
2225             thumbnail_url = url_or_none(t.get('url'))
2226             if not thumbnail_url:
2227                 continue
2228             thumbnails.append({
2229                 'url': thumbnail_url,
2230                 'width': int_or_none(t.get('width')),
2231                 'height': int_or_none(t.get('height')),
2232             })
2233
2234         if not thumbnails:
2235             video_thumbnail = None
2236             # We try first to get a high quality image:
2237             m_thumb = re.search(r'<span itemprop="thumbnail".*?href="(.*?)">',
2238                                 video_webpage, re.DOTALL)
2239             if m_thumb is not None:
2240                 video_thumbnail = m_thumb.group(1)
2241             thumbnail_url = try_get(video_info, lambda x: x['thumbnail_url'][0], compat_str)
2242             if thumbnail_url:
2243                 video_thumbnail = compat_urllib_parse_unquote_plus(thumbnail_url)
2244             if video_thumbnail:
2245                 thumbnails.append({'url': video_thumbnail})
2246
2247         # upload date
2248         upload_date = self._html_search_meta(
2249             'datePublished', video_webpage, 'upload date', default=None)
2250         if not upload_date:
2251             upload_date = self._search_regex(
2252                 [r'(?s)id="eow-date.*?>(.*?)</span>',
2253                  r'(?:id="watch-uploader-info".*?>.*?|["\']simpleText["\']\s*:\s*["\'])(?:Published|Uploaded|Streamed live|Started) on (.+?)[<"\']'],
2254                 video_webpage, 'upload date', default=None)
2255         if not upload_date:
2256             upload_date = microformat.get('publishDate') or microformat.get('uploadDate')
2257         upload_date = unified_strdate(upload_date)
2258
2259         video_license = self._html_search_regex(
2260             r'<h4[^>]+class="title"[^>]*>\s*License\s*</h4>\s*<ul[^>]*>\s*<li>(.+?)</li',
2261             video_webpage, 'license', default=None)
2262
2263         m_music = re.search(
2264             r'''(?x)
2265                 <h4[^>]+class="title"[^>]*>\s*Music\s*</h4>\s*
2266                 <ul[^>]*>\s*
2267                 <li>(?P<title>.+?)
2268                 by (?P<creator>.+?)
2269                 (?:
2270                     \(.+?\)|
2271                     <a[^>]*
2272                         (?:
2273                             \bhref=["\']/red[^>]*>|             # drop possible
2274                             >\s*Listen ad-free with YouTube Red # YouTube Red ad
2275                         )
2276                     .*?
2277                 )?</li
2278             ''',
2279             video_webpage)
2280         if m_music:
2281             video_alt_title = remove_quotes(unescapeHTML(m_music.group('title')))
2282             video_creator = clean_html(m_music.group('creator'))
2283         else:
2284             video_alt_title = video_creator = None
2285
2286         def extract_meta(field):
2287             return self._html_search_regex(
2288                 r'<h4[^>]+class="title"[^>]*>\s*%s\s*</h4>\s*<ul[^>]*>\s*<li>(.+?)</li>\s*' % field,
2289                 video_webpage, field, default=None)
2290
2291         track = extract_meta('Song')
2292         artist = extract_meta('Artist')
2293         album = extract_meta('Album')
2294
2295         # Youtube Music Auto-generated description
2296         release_date = release_year = None
2297         if video_description:
2298             mobj = re.search(r'(?s)Provided to YouTube by [^\n]+\n+(?P<track>[^·]+)·(?P<artist>[^\n]+)\n+(?P<album>[^\n]+)(?:.+?℗\s*(?P<release_year>\d{4})(?!\d))?(?:.+?Released on\s*:\s*(?P<release_date>\d{4}-\d{2}-\d{2}))?(.+?\nArtist\s*:\s*(?P<clean_artist>[^\n]+))?', video_description)
2299             if mobj:
2300                 if not track:
2301                     track = mobj.group('track').strip()
2302                 if not artist:
2303                     artist = mobj.group('clean_artist') or ', '.join(a.strip() for a in mobj.group('artist').split('·'))
2304                 if not album:
2305                     album = mobj.group('album'.strip())
2306                 release_year = mobj.group('release_year')
2307                 release_date = mobj.group('release_date')
2308                 if release_date:
2309                     release_date = release_date.replace('-', '')
2310                     if not release_year:
2311                         release_year = int(release_date[:4])
2312                 if release_year:
2313                     release_year = int(release_year)
2314
2315         yt_initial = self._get_yt_initial_data(video_id, video_webpage)
2316         if yt_initial:
2317             music_metadata = self._get_music_metadata_from_yt_initial(yt_initial)
2318             if len(music_metadata):
2319                 album = music_metadata[0].get('album')
2320                 artist = music_metadata[0].get('artist')
2321                 track = music_metadata[0].get('track')
2322
2323         m_episode = re.search(
2324             r'<div[^>]+id="watch7-headline"[^>]*>\s*<span[^>]*>.*?>(?P<series>[^<]+)</a></b>\s*S(?P<season>\d+)\s*•\s*E(?P<episode>\d+)</span>',
2325             video_webpage)
2326         if m_episode:
2327             series = unescapeHTML(m_episode.group('series'))
2328             season_number = int(m_episode.group('season'))
2329             episode_number = int(m_episode.group('episode'))
2330         else:
2331             series = season_number = episode_number = None
2332
2333         m_cat_container = self._search_regex(
2334             r'(?s)<h4[^>]*>\s*Category\s*</h4>\s*<ul[^>]*>(.*?)</ul>',
2335             video_webpage, 'categories', default=None)
2336         category = None
2337         if m_cat_container:
2338             category = self._html_search_regex(
2339                 r'(?s)<a[^<]+>(.*?)</a>', m_cat_container, 'category',
2340                 default=None)
2341         if not category:
2342             category = try_get(
2343                 microformat, lambda x: x['category'], compat_str)
2344         video_categories = None if category is None else [category]
2345
2346         video_tags = [
2347             unescapeHTML(m.group('content'))
2348             for m in re.finditer(self._meta_regex('og:video:tag'), video_webpage)]
2349         if not video_tags:
2350             video_tags = try_get(video_details, lambda x: x['keywords'], list)
2351
2352         def _extract_count(count_name):
2353             return str_to_int(self._search_regex(
2354                 (r'-%s-button[^>]+><span[^>]+class="yt-uix-button-content"[^>]*>([\d,]+)</span>' % re.escape(count_name),
2355                  r'["\']label["\']\s*:\s*["\']([\d,.]+)\s+%ss["\']' % re.escape(count_name)),
2356                 video_webpage, count_name, default=None))
2357
2358         like_count = _extract_count('like')
2359         dislike_count = _extract_count('dislike')
2360
2361         if view_count is None:
2362             view_count = str_to_int(self._search_regex(
2363                 r'<[^>]+class=["\']watch-view-count[^>]+>\s*([\d,\s]+)', video_webpage,
2364                 'view count', default=None))
2365
2366         average_rating = (
2367             float_or_none(video_details.get('averageRating'))
2368             or try_get(video_info, lambda x: float_or_none(x['avg_rating'][0])))
2369
2370         # subtitles
2371         video_subtitles = self.extract_subtitles(
2372             video_id, video_webpage, has_live_chat_replay)
2373         automatic_captions = self.extract_automatic_captions(video_id, video_webpage)
2374
2375         video_duration = try_get(
2376             video_info, lambda x: int_or_none(x['length_seconds'][0]))
2377         if not video_duration:
2378             video_duration = int_or_none(video_details.get('lengthSeconds'))
2379         if not video_duration:
2380             video_duration = parse_duration(self._html_search_meta(
2381                 'duration', video_webpage, 'video duration'))
2382
2383         # Get Subscriber Count of channel
2384         subscriber_count = parse_count(self._search_regex(
2385             r'"text":"([\d\.]+\w?) subscribers"',
2386             video_webpage,
2387             'subscriber count',
2388             default=None
2389         ))
2390
2391         # annotations
2392         video_annotations = None
2393         if self._downloader.params.get('writeannotations', False):
2394             xsrf_token = self._search_regex(
2395                 r'([\'"])XSRF_TOKEN\1\s*:\s*([\'"])(?P<xsrf_token>[A-Za-z0-9+/=]+)\2',
2396                 video_webpage, 'xsrf token', group='xsrf_token', fatal=False)
2397             invideo_url = try_get(
2398                 player_response, lambda x: x['annotations'][0]['playerAnnotationsUrlsRenderer']['invideoUrl'], compat_str)
2399             if xsrf_token and invideo_url:
2400                 xsrf_field_name = self._search_regex(
2401                     r'([\'"])XSRF_FIELD_NAME\1\s*:\s*([\'"])(?P<xsrf_field_name>\w+)\2',
2402                     video_webpage, 'xsrf field name',
2403                     group='xsrf_field_name', default='session_token')
2404                 video_annotations = self._download_webpage(
2405                     self._proto_relative_url(invideo_url),
2406                     video_id, note='Downloading annotations',
2407                     errnote='Unable to download video annotations', fatal=False,
2408                     data=urlencode_postdata({xsrf_field_name: xsrf_token}))
2409
2410         chapters = self._extract_chapters(video_webpage, description_original, video_id, video_duration)
2411
2412         # Look for the DASH manifest
2413         if self._downloader.params.get('youtube_include_dash_manifest', True):
2414             dash_mpd_fatal = True
2415             for mpd_url in dash_mpds:
2416                 dash_formats = {}
2417                 try:
2418                     def decrypt_sig(mobj):
2419                         s = mobj.group(1)
2420                         dec_s = self._decrypt_signature(s, video_id, player_url, age_gate)
2421                         return '/signature/%s' % dec_s
2422
2423                     mpd_url = re.sub(r'/s/([a-fA-F0-9\.]+)', decrypt_sig, mpd_url)
2424
2425                     for df in self._extract_mpd_formats(
2426                             mpd_url, video_id, fatal=dash_mpd_fatal,
2427                             formats_dict=self._formats):
2428                         if not df.get('filesize'):
2429                             df['filesize'] = _extract_filesize(df['url'])
2430                         # Do not overwrite DASH format found in some previous DASH manifest
2431                         if df['format_id'] not in dash_formats:
2432                             dash_formats[df['format_id']] = df
2433                         # Additional DASH manifests may end up in HTTP Error 403 therefore
2434                         # allow them to fail without bug report message if we already have
2435                         # some DASH manifest succeeded. This is temporary workaround to reduce
2436                         # burst of bug reports until we figure out the reason and whether it
2437                         # can be fixed at all.
2438                         dash_mpd_fatal = False
2439                 except (ExtractorError, KeyError) as e:
2440                     self.report_warning(
2441                         'Skipping DASH manifest: %r' % e, video_id)
2442                 if dash_formats:
2443                     # Remove the formats we found through non-DASH, they
2444                     # contain less info and it can be wrong, because we use
2445                     # fixed values (for example the resolution). See
2446                     # https://github.com/ytdl-org/youtube-dl/issues/5774 for an
2447                     # example.
2448                     formats = [f for f in formats if f['format_id'] not in dash_formats.keys()]
2449                     formats.extend(dash_formats.values())
2450
2451         # Check for malformed aspect ratio
2452         stretched_m = re.search(
2453             r'<meta\s+property="og:video:tag".*?content="yt:stretch=(?P<w>[0-9]+):(?P<h>[0-9]+)">',
2454             video_webpage)
2455         if stretched_m:
2456             w = float(stretched_m.group('w'))
2457             h = float(stretched_m.group('h'))
2458             # yt:stretch may hold invalid ratio data (e.g. for Q39EVAstoRM ratio is 17:0).
2459             # We will only process correct ratios.
2460             if w > 0 and h > 0:
2461                 ratio = w / h
2462                 for f in formats:
2463                     if f.get('vcodec') != 'none':
2464                         f['stretched_ratio'] = ratio
2465
2466         if not formats:
2467             if 'reason' in video_info:
2468                 if 'The uploader has not made this video available in your country.' in video_info['reason']:
2469                     regions_allowed = self._html_search_meta(
2470                         'regionsAllowed', video_webpage, default=None)
2471                     countries = regions_allowed.split(',') if regions_allowed else None
2472                     self.raise_geo_restricted(
2473                         msg=video_info['reason'][0], countries=countries)
2474                 reason = video_info['reason'][0]
2475                 if 'Invalid parameters' in reason:
2476                     unavailable_message = extract_unavailable_message()
2477                     if unavailable_message:
2478                         reason = unavailable_message
2479                 raise ExtractorError(
2480                     'YouTube said: %s' % reason,
2481                     expected=True, video_id=video_id)
2482             if video_info.get('license_info') or try_get(player_response, lambda x: x['streamingData']['licenseInfos']):
2483                 raise ExtractorError('This video is DRM protected.', expected=True)
2484
2485         self._sort_formats(formats)
2486
2487         self.mark_watched(video_id, video_info, player_response)
2488
2489         return {
2490             'id': video_id,
2491             'uploader': video_uploader,
2492             'uploader_id': video_uploader_id,
2493             'uploader_url': video_uploader_url,
2494             'channel_id': channel_id,
2495             'channel_url': channel_url,
2496             'upload_date': upload_date,
2497             'license': video_license,
2498             'creator': video_creator or artist,
2499             'title': video_title,
2500             'alt_title': video_alt_title or track,
2501             'thumbnails': thumbnails,
2502             'description': video_description,
2503             'categories': video_categories,
2504             'tags': video_tags,
2505             'subtitles': video_subtitles,
2506             'automatic_captions': automatic_captions,
2507             'duration': video_duration,
2508             'age_limit': 18 if age_gate else 0,
2509             'annotations': video_annotations,
2510             'chapters': chapters,
2511             'webpage_url': proto + '://www.youtube.com/watch?v=%s' % video_id,
2512             'view_count': view_count,
2513             'like_count': like_count,
2514             'dislike_count': dislike_count,
2515             'average_rating': average_rating,
2516             'formats': formats,
2517             'is_live': is_live,
2518             'start_time': start_time,
2519             'end_time': end_time,
2520             'series': series,
2521             'season_number': season_number,
2522             'episode_number': episode_number,
2523             'track': track,
2524             'artist': artist,
2525             'album': album,
2526             'release_date': release_date,
2527             'release_year': release_year,
2528             'subscriber_count': subscriber_count,
2529         }
2530
2531
2532 class YoutubeTabIE(YoutubeBaseInfoExtractor):
2533     IE_DESC = 'YouTube.com tab'
2534     _VALID_URL = r'''(?x)
2535                     https?://
2536                         (?:\w+\.)?
2537                         (?:
2538                             youtube(?:kids)?\.com|
2539                             invidio\.us
2540                         )/
2541                         (?:
2542                             (?:channel|c|user)/|
2543                             (?P<not_channel>
2544                                 (?:playlist|watch)\?.*?\blist=
2545                             )|
2546                             (?!(%s)([/#?]|$))  # Direct URLs
2547                         )
2548                         (?P<id>[^/?\#&]+)
2549                     ''' % YoutubeBaseInfoExtractor._RESERVED_NAMES
2550     IE_NAME = 'youtube:tab'
2551
2552     _TESTS = [{
2553         # playlists, multipage
2554         'url': 'https://www.youtube.com/c/ИгорьКлейнер/playlists?view=1&flow=grid',
2555         'playlist_mincount': 94,
2556         'info_dict': {
2557             'id': 'UCqj7Cz7revf5maW9g5pgNcg',
2558             'title': 'Игорь Клейнер - Playlists',
2559             'description': 'md5:be97ee0f14ee314f1f002cf187166ee2',
2560         },
2561     }, {
2562         # playlists, multipage, different order
2563         'url': 'https://www.youtube.com/user/igorkle1/playlists?view=1&sort=dd',
2564         'playlist_mincount': 94,
2565         'info_dict': {
2566             'id': 'UCqj7Cz7revf5maW9g5pgNcg',
2567             'title': 'Игорь Клейнер - Playlists',
2568             'description': 'md5:be97ee0f14ee314f1f002cf187166ee2',
2569         },
2570     }, {
2571         # playlists, singlepage
2572         'url': 'https://www.youtube.com/user/ThirstForScience/playlists',
2573         'playlist_mincount': 4,
2574         'info_dict': {
2575             'id': 'UCAEtajcuhQ6an9WEzY9LEMQ',
2576             'title': 'ThirstForScience - Playlists',
2577             'description': 'md5:609399d937ea957b0f53cbffb747a14c',
2578         }
2579     }, {
2580         'url': 'https://www.youtube.com/c/ChristophLaimer/playlists',
2581         'only_matching': True,
2582     }, {
2583         # basic, single video playlist
2584         'url': 'https://www.youtube.com/playlist?list=PL4lCao7KL_QFVb7Iudeipvc2BCavECqzc',
2585         'info_dict': {
2586             'uploader_id': 'UCmlqkdCBesrv2Lak1mF_MxA',
2587             'uploader': 'Sergey M.',
2588             'id': 'PL4lCao7KL_QFVb7Iudeipvc2BCavECqzc',
2589             'title': 'youtube-dl public playlist',
2590         },
2591         'playlist_count': 1,
2592     }, {
2593         # empty playlist
2594         'url': 'https://www.youtube.com/playlist?list=PL4lCao7KL_QFodcLWhDpGCYnngnHtQ-Xf',
2595         'info_dict': {
2596             'uploader_id': 'UCmlqkdCBesrv2Lak1mF_MxA',
2597             'uploader': 'Sergey M.',
2598             'id': 'PL4lCao7KL_QFodcLWhDpGCYnngnHtQ-Xf',
2599             'title': 'youtube-dl empty playlist',
2600         },
2601         'playlist_count': 0,
2602     }, {
2603         # Home tab
2604         'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/featured',
2605         'info_dict': {
2606             'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
2607             'title': 'lex will - Home',
2608             'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488',
2609         },
2610         'playlist_mincount': 2,
2611     }, {
2612         # Videos tab
2613         'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/videos',
2614         'info_dict': {
2615             'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
2616             'title': 'lex will - Videos',
2617             'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488',
2618         },
2619         'playlist_mincount': 975,
2620     }, {
2621         # Videos tab, sorted by popular
2622         'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/videos?view=0&sort=p&flow=grid',
2623         'info_dict': {
2624             'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
2625             'title': 'lex will - Videos',
2626             'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488',
2627         },
2628         'playlist_mincount': 199,
2629     }, {
2630         # Playlists tab
2631         'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/playlists',
2632         'info_dict': {
2633             'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
2634             'title': 'lex will - Playlists',
2635             'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488',
2636         },
2637         'playlist_mincount': 17,
2638     }, {
2639         # Community tab
2640         'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/community',
2641         'info_dict': {
2642             'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
2643             'title': 'lex will - Community',
2644             'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488',
2645         },
2646         'playlist_mincount': 18,
2647     }, {
2648         # Channels tab
2649         'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/channels',
2650         'info_dict': {
2651             'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
2652             'title': 'lex will - Channels',
2653             'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488',
2654         },
2655         'playlist_mincount': 138,
2656     }, {
2657         'url': 'https://invidio.us/channel/UCmlqkdCBesrv2Lak1mF_MxA',
2658         'only_matching': True,
2659     }, {
2660         'url': 'https://www.youtubekids.com/channel/UCmlqkdCBesrv2Lak1mF_MxA',
2661         'only_matching': True,
2662     }, {
2663         'url': 'https://music.youtube.com/channel/UCmlqkdCBesrv2Lak1mF_MxA',
2664         'only_matching': True,
2665     }, {
2666         'note': 'Playlist with deleted videos (#651). As a bonus, the video #51 is also twice in this list.',
2667         'url': 'https://www.youtube.com/playlist?list=PLwP_SiAcdui0KVebT0mU9Apz359a4ubsC',
2668         'info_dict': {
2669             'title': '29C3: Not my department',
2670             'id': 'PLwP_SiAcdui0KVebT0mU9Apz359a4ubsC',
2671             'uploader': 'Christiaan008',
2672             'uploader_id': 'UCEPzS1rYsrkqzSLNp76nrcg',
2673         },
2674         'playlist_count': 96,
2675     }, {
2676         'note': 'Large playlist',
2677         'url': 'https://www.youtube.com/playlist?list=UUBABnxM4Ar9ten8Mdjj1j0Q',
2678         'info_dict': {
2679             'title': 'Uploads from Cauchemar',
2680             'id': 'UUBABnxM4Ar9ten8Mdjj1j0Q',
2681             'uploader': 'Cauchemar',
2682             'uploader_id': 'UCBABnxM4Ar9ten8Mdjj1j0Q',
2683         },
2684         'playlist_mincount': 1123,
2685     }, {
2686         # even larger playlist, 8832 videos
2687         'url': 'http://www.youtube.com/user/NASAgovVideo/videos',
2688         'only_matching': True,
2689     }, {
2690         'note': 'Buggy playlist: the webpage has a "Load more" button but it doesn\'t have more videos',
2691         'url': 'https://www.youtube.com/playlist?list=UUXw-G3eDE9trcvY2sBMM_aA',
2692         'info_dict': {
2693             'title': 'Uploads from Interstellar Movie',
2694             'id': 'UUXw-G3eDE9trcvY2sBMM_aA',
2695             'uploader': 'Interstellar Movie',
2696             'uploader_id': 'UCXw-G3eDE9trcvY2sBMM_aA',
2697         },
2698         'playlist_mincount': 21,
2699     }, {
2700         # https://github.com/ytdl-org/youtube-dl/issues/21844
2701         'url': 'https://www.youtube.com/playlist?list=PLzH6n4zXuckpfMu_4Ff8E7Z1behQks5ba',
2702         'info_dict': {
2703             'title': 'Data Analysis with Dr Mike Pound',
2704             'id': 'PLzH6n4zXuckpfMu_4Ff8E7Z1behQks5ba',
2705             'uploader_id': 'UC9-y-6csu5WGm29I7JiwpnA',
2706             'uploader': 'Computerphile',
2707         },
2708         'playlist_mincount': 11,
2709     }, {
2710         'url': 'https://invidio.us/playlist?list=PL4lCao7KL_QFVb7Iudeipvc2BCavECqzc',
2711         'only_matching': True,
2712     }, {
2713         # Playlist URL that does not actually serve a playlist
2714         'url': 'https://www.youtube.com/watch?v=FqZTN594JQw&list=PLMYEtVRpaqY00V9W81Cwmzp6N6vZqfUKD4',
2715         'info_dict': {
2716             'id': 'FqZTN594JQw',
2717             'ext': 'webm',
2718             'title': "Smiley's People 01 detective, Adventure Series, Action",
2719             'uploader': 'STREEM',
2720             'uploader_id': 'UCyPhqAZgwYWZfxElWVbVJng',
2721             'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UCyPhqAZgwYWZfxElWVbVJng',
2722             'upload_date': '20150526',
2723             'license': 'Standard YouTube License',
2724             'description': 'md5:507cdcb5a49ac0da37a920ece610be80',
2725             'categories': ['People & Blogs'],
2726             'tags': list,
2727             'view_count': int,
2728             'like_count': int,
2729             'dislike_count': int,
2730         },
2731         'params': {
2732             'skip_download': True,
2733         },
2734         'skip': 'This video is not available.',
2735         'add_ie': [YoutubeIE.ie_key()],
2736     }, {
2737         'url': 'https://www.youtubekids.com/watch?v=Agk7R8I8o5U&list=PUZ6jURNr1WQZCNHF0ao-c0g',
2738         'only_matching': True,
2739     }, {
2740         'url': 'https://www.youtube.com/watch?v=MuAGGZNfUkU&list=RDMM',
2741         'only_matching': True,
2742     }, {
2743         'url': 'https://www.youtube.com/channel/UCoMdktPbSTixAyNGwb-UYkQ/live',
2744         'info_dict': {
2745             'id': '9Auq9mYxFEE',
2746             'ext': 'mp4',
2747             'title': 'Watch Sky News live',
2748             'uploader': 'Sky News',
2749             'uploader_id': 'skynews',
2750             'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/skynews',
2751             'upload_date': '20191102',
2752             'description': 'md5:78de4e1c2359d0ea3ed829678e38b662',
2753             'categories': ['News & Politics'],
2754             'tags': list,
2755             'like_count': int,
2756             'dislike_count': int,
2757         },
2758         'params': {
2759             'skip_download': True,
2760         },
2761     }, {
2762         'url': 'https://www.youtube.com/user/TheYoungTurks/live',
2763         'info_dict': {
2764             'id': 'a48o2S1cPoo',
2765             'ext': 'mp4',
2766             'title': 'The Young Turks - Live Main Show',
2767             'uploader': 'The Young Turks',
2768             'uploader_id': 'TheYoungTurks',
2769             'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/TheYoungTurks',
2770             'upload_date': '20150715',
2771             'license': 'Standard YouTube License',
2772             'description': 'md5:438179573adcdff3c97ebb1ee632b891',
2773             'categories': ['News & Politics'],
2774             'tags': ['Cenk Uygur (TV Program Creator)', 'The Young Turks (Award-Winning Work)', 'Talk Show (TV Genre)'],
2775             'like_count': int,
2776             'dislike_count': int,
2777         },
2778         'params': {
2779             'skip_download': True,
2780         },
2781         'only_matching': True,
2782     }, {
2783         'url': 'https://www.youtube.com/channel/UC1yBKRuGpC1tSM73A0ZjYjQ/live',
2784         'only_matching': True,
2785     }, {
2786         'url': 'https://www.youtube.com/c/CommanderVideoHq/live',
2787         'only_matching': True,
2788     },
2789         # TODO
2790         # {
2791         #     'url': 'https://www.youtube.com/TheYoungTurks/live',
2792         #     'only_matching': True,
2793         # }
2794     ]
2795
2796     def _extract_channel_id(self, webpage):
2797         channel_id = self._html_search_meta(
2798             'channelId', webpage, 'channel id', default=None)
2799         if channel_id:
2800             return channel_id
2801         channel_url = self._html_search_meta(
2802             ('og:url', 'al:ios:url', 'al:android:url', 'al:web:url',
2803              'twitter:url', 'twitter:app:url:iphone', 'twitter:app:url:ipad',
2804              'twitter:app:url:googleplay'), webpage, 'channel url')
2805         return self._search_regex(
2806             r'https?://(?:www\.)?youtube\.com/channel/([^/?#&])+',
2807             channel_url, 'channel id')
2808
2809     @staticmethod
2810     def _extract_grid_item_renderer(item):
2811         for item_kind in ('Playlist', 'Video', 'Channel'):
2812             renderer = item.get('grid%sRenderer' % item_kind)
2813             if renderer:
2814                 return renderer
2815
2816     def _extract_video(self, renderer):
2817         video_id = renderer.get('videoId')
2818         title = try_get(
2819             renderer,
2820             (lambda x: x['title']['runs'][0]['text'],
2821              lambda x: x['title']['simpleText']), compat_str)
2822         description = try_get(
2823             renderer, lambda x: x['descriptionSnippet']['runs'][0]['text'],
2824             compat_str)
2825         duration = parse_duration(try_get(
2826             renderer, lambda x: x['lengthText']['simpleText'], compat_str))
2827         view_count_text = try_get(
2828             renderer, lambda x: x['viewCountText']['simpleText'], compat_str) or ''
2829         view_count = str_to_int(self._search_regex(
2830             r'^([\d,]+)', re.sub(r'\s', '', view_count_text),
2831             'view count', default=None))
2832         uploader = try_get(
2833             renderer, lambda x: x['ownerText']['runs'][0]['text'], compat_str)
2834         return {
2835             '_type': 'url_transparent',
2836             'ie_key': YoutubeIE.ie_key(),
2837             'id': video_id,
2838             'url': video_id,
2839             'title': title,
2840             'description': description,
2841             'duration': duration,
2842             'view_count': view_count,
2843             'uploader': uploader,
2844         }
2845
2846     def _grid_entries(self, grid_renderer):
2847         for item in grid_renderer['items']:
2848             if not isinstance(item, dict):
2849                 continue
2850             renderer = self._extract_grid_item_renderer(item)
2851             if not isinstance(renderer, dict):
2852                 continue
2853             title = try_get(
2854                 renderer, lambda x: x['title']['runs'][0]['text'], compat_str)
2855             # playlist
2856             playlist_id = renderer.get('playlistId')
2857             if playlist_id:
2858                 yield self.url_result(
2859                     'https://www.youtube.com/playlist?list=%s' % playlist_id,
2860                     ie=YoutubeTabIE.ie_key(), video_id=playlist_id,
2861                     video_title=title)
2862             # video
2863             video_id = renderer.get('videoId')
2864             if video_id:
2865                 yield self._extract_video(renderer)
2866             # channel
2867             channel_id = renderer.get('channelId')
2868             if channel_id:
2869                 title = try_get(
2870                     renderer, lambda x: x['title']['simpleText'], compat_str)
2871                 yield self.url_result(
2872                     'https://www.youtube.com/channel/%s' % channel_id,
2873                     ie=YoutubeTabIE.ie_key(), video_title=title)
2874
2875     def _shelf_entries_trimmed(self, shelf_renderer):
2876         renderer = try_get(
2877             shelf_renderer, lambda x: x['content']['horizontalListRenderer'], dict)
2878         if not renderer:
2879             return
2880         # TODO: add support for nested playlists so each shelf is processed
2881         # as separate playlist
2882         # TODO: this includes only first N items
2883         for entry in self._grid_entries(renderer):
2884             yield entry
2885
2886     def _shelf_entries(self, shelf_renderer):
2887         ep = try_get(
2888             shelf_renderer, lambda x: x['endpoint']['commandMetadata']['webCommandMetadata']['url'],
2889             compat_str)
2890         shelf_url = urljoin('https://www.youtube.com', ep)
2891         if not shelf_url:
2892             return
2893         title = try_get(
2894             shelf_renderer, lambda x: x['title']['runs'][0]['text'], compat_str)
2895         yield self.url_result(shelf_url, video_title=title)
2896
2897     def _playlist_entries(self, video_list_renderer):
2898         for content in video_list_renderer['contents']:
2899             if not isinstance(content, dict):
2900                 continue
2901             renderer = content.get('playlistVideoRenderer') or content.get('playlistPanelVideoRenderer')
2902             if not isinstance(renderer, dict):
2903                 continue
2904             video_id = renderer.get('videoId')
2905             if not video_id:
2906                 continue
2907             yield self._extract_video(renderer)
2908
2909     def _itemSection_entries(self, item_sect_renderer):
2910         for content in item_sect_renderer['contents']:
2911             if not isinstance(content, dict):
2912                 continue
2913             renderer = content.get('videoRenderer', {})
2914             if not isinstance(renderer, dict):
2915                 continue
2916             video_id = renderer.get('videoId')
2917             if not video_id:
2918                 continue
2919             yield self._extract_video(renderer)
2920
2921     def _rich_entries(self, rich_grid_renderer):
2922         renderer = try_get(
2923             rich_grid_renderer, lambda x: x['content']['videoRenderer'], dict) or {}
2924         video_id = renderer.get('videoId')
2925         if not video_id:
2926             return
2927         yield self._extract_video(renderer)
2928
2929     def _video_entry(self, video_renderer):
2930         video_id = video_renderer.get('videoId')
2931         if video_id:
2932             return self._extract_video(video_renderer)
2933
2934     def _post_thread_entries(self, post_thread_renderer):
2935         post_renderer = try_get(
2936             post_thread_renderer, lambda x: x['post']['backstagePostRenderer'], dict)
2937         if not post_renderer:
2938             return
2939         # video attachment
2940         video_renderer = try_get(
2941             post_renderer, lambda x: x['backstageAttachment']['videoRenderer'], dict)
2942         video_id = None
2943         if video_renderer:
2944             entry = self._video_entry(video_renderer)
2945             if entry:
2946                 yield entry
2947         # inline video links
2948         runs = try_get(post_renderer, lambda x: x['contentText']['runs'], list) or []
2949         for run in runs:
2950             if not isinstance(run, dict):
2951                 continue
2952             ep_url = try_get(
2953                 run, lambda x: x['navigationEndpoint']['urlEndpoint']['url'], compat_str)
2954             if not ep_url:
2955                 continue
2956             if not YoutubeIE.suitable(ep_url):
2957                 continue
2958             ep_video_id = YoutubeIE._match_id(ep_url)
2959             if video_id == ep_video_id:
2960                 continue
2961             yield self.url_result(ep_url, ie=YoutubeIE.ie_key(), video_id=video_id)
2962
2963     def _post_thread_continuation_entries(self, post_thread_continuation):
2964         contents = post_thread_continuation.get('contents')
2965         if not isinstance(contents, list):
2966             return
2967         for content in contents:
2968             renderer = content.get('backstagePostThreadRenderer')
2969             if not isinstance(renderer, dict):
2970                 continue
2971             for entry in self._post_thread_entries(renderer):
2972                 yield entry
2973
2974     @staticmethod
2975     def _extract_next_continuation_data(renderer):
2976         next_continuation = try_get(
2977             renderer, lambda x: x['continuations'][0]['nextContinuationData'], dict)
2978         if not next_continuation:
2979             return
2980         continuation = next_continuation.get('continuation')
2981         if not continuation:
2982             return
2983         ctp = next_continuation.get('clickTrackingParams')
2984         return {
2985             'ctoken': continuation,
2986             'continuation': continuation,
2987             'itct': ctp,
2988         }
2989
2990     @classmethod
2991     def _extract_continuation(cls, renderer):
2992         next_continuation = cls._extract_next_continuation_data(renderer)
2993         if next_continuation:
2994             return next_continuation
2995         contents = renderer.get('contents')
2996         if not isinstance(contents, list):
2997             return
2998         for content in contents:
2999             if not isinstance(content, dict):
3000                 continue
3001             continuation_ep = try_get(
3002                 content, lambda x: x['continuationItemRenderer']['continuationEndpoint'],
3003                 dict)
3004             if not continuation_ep:
3005                 continue
3006             continuation = try_get(
3007                 continuation_ep, lambda x: x['continuationCommand']['token'], compat_str)
3008             if not continuation:
3009                 continue
3010             ctp = continuation_ep.get('clickTrackingParams')
3011             if not ctp:
3012                 continue
3013             return {
3014                 'ctoken': continuation,
3015                 'continuation': continuation,
3016                 'itct': ctp,
3017             }
3018
3019     def _entries(self, tab, identity_token):
3020
3021         def extract_entries(parent_renderer):  # this needs to called again for continuation to work with feeds
3022             contents = try_get(parent_renderer, lambda x: x['contents'], list) or []
3023             for content in contents:
3024                 if not isinstance(content, dict):
3025                     continue
3026                 is_renderer = try_get(content, lambda x: x['itemSectionRenderer'], dict)
3027                 if not is_renderer:
3028                     renderer = content.get('richItemRenderer')
3029                     if renderer:
3030                         for entry in self._rich_entries(renderer):
3031                             yield entry
3032                         continuation_list[0] = self._extract_continuation(parent_renderer)
3033                     continue
3034                 isr_contents = try_get(is_renderer, lambda x: x['contents'], list) or []
3035                 for isr_content in isr_contents:
3036                     if not isinstance(isr_content, dict):
3037                         continue
3038                     renderer = isr_content.get('playlistVideoListRenderer')
3039                     if renderer:
3040                         for entry in self._playlist_entries(renderer):
3041                             yield entry
3042                         continuation_list[0] = self._extract_continuation(renderer)
3043                         continue
3044                     renderer = isr_content.get('gridRenderer')
3045                     if renderer:
3046                         for entry in self._grid_entries(renderer):
3047                             yield entry
3048                         continuation_list[0] = self._extract_continuation(renderer)
3049                         continue
3050                     renderer = isr_content.get('shelfRenderer')
3051                     if renderer:
3052                         for entry in self._shelf_entries(renderer):
3053                             yield entry
3054                         continue
3055                     renderer = isr_content.get('backstagePostThreadRenderer')
3056                     if renderer:
3057                         for entry in self._post_thread_entries(renderer):
3058                             yield entry
3059                         continuation_list[0] = self._extract_continuation(renderer)
3060                         continue
3061                     renderer = isr_content.get('videoRenderer')
3062                     if renderer:
3063                         entry = self._video_entry(renderer)
3064                         if entry:
3065                             yield entry
3066
3067                 if not continuation_list[0]:
3068                     continuation_list[0] = self._extract_continuation(is_renderer)
3069
3070             if not continuation_list[0]:
3071                 continuation_list[0] = self._extract_continuation(parent_renderer)
3072
3073         continuation_list = [None]  # Python 2 doesnot support nonlocal
3074         parent_renderer = (
3075             try_get(tab, lambda x: x['sectionListRenderer'], dict)
3076             or try_get(tab, lambda x: x['richGridRenderer'], dict) or {})
3077         for entry in extract_entries(parent_renderer):
3078             yield entry
3079         continuation = continuation_list[0]
3080
3081         headers = {
3082             'x-youtube-client-name': '1',
3083             'x-youtube-client-version': '2.20201112.04.01',
3084         }
3085         if identity_token:
3086             headers['x-youtube-identity-token'] = identity_token
3087
3088         for page_num in itertools.count(1):
3089             if not continuation:
3090                 break
3091             browse = self._download_json(
3092                 'https://www.youtube.com/browse_ajax', None,
3093                 'Downloading page %d' % page_num,
3094                 headers=headers, query=continuation, fatal=False)
3095             if not browse:
3096                 break
3097             response = try_get(browse, lambda x: x[1]['response'], dict)
3098             if not response:
3099                 break
3100
3101             continuation_contents = try_get(
3102                 response, lambda x: x['continuationContents'], dict)
3103             if continuation_contents:
3104                 continuation_renderer = continuation_contents.get('playlistVideoListContinuation')
3105                 if continuation_renderer:
3106                     for entry in self._playlist_entries(continuation_renderer):
3107                         yield entry
3108                     continuation = self._extract_continuation(continuation_renderer)
3109                     continue
3110                 continuation_renderer = continuation_contents.get('gridContinuation')
3111                 if continuation_renderer:
3112                     for entry in self._grid_entries(continuation_renderer):
3113                         yield entry
3114                     continuation = self._extract_continuation(continuation_renderer)
3115                     continue
3116                 continuation_renderer = continuation_contents.get('itemSectionContinuation')
3117                 if continuation_renderer:
3118                     for entry in self._post_thread_continuation_entries(continuation_renderer):
3119                         yield entry
3120                     continuation = self._extract_continuation(continuation_renderer)
3121                     continue
3122                 continuation_renderer = continuation_contents.get('sectionListContinuation')  # for feeds
3123                 if continuation_renderer:
3124                     continuation_list = [None]
3125                     for entry in extract_entries(continuation_renderer):
3126                         yield entry
3127                     continuation = continuation_list[0]
3128                     continue
3129
3130             continuation_items = try_get(
3131                 response, lambda x: x['onResponseReceivedActions'][0]['appendContinuationItemsAction']['continuationItems'], list)
3132             if continuation_items:
3133                 continuation_item = continuation_items[0]
3134                 if not isinstance(continuation_item, dict):
3135                     continue
3136                 renderer = continuation_item.get('playlistVideoRenderer') or continuation_item.get('itemSectionRenderer')
3137                 if renderer:
3138                     video_list_renderer = {'contents': continuation_items}
3139                     for entry in self._playlist_entries(video_list_renderer):
3140                         yield entry
3141                     continuation = self._extract_continuation(video_list_renderer)
3142                     continue
3143             break
3144
3145     @staticmethod
3146     def _extract_selected_tab(tabs):
3147         for tab in tabs:
3148             if try_get(tab, lambda x: x['tabRenderer']['selected'], bool):
3149                 return tab['tabRenderer']
3150         else:
3151             raise ExtractorError('Unable to find selected tab')
3152
3153     @staticmethod
3154     def _extract_uploader(data):
3155         uploader = {}
3156         sidebar_renderer = try_get(
3157             data, lambda x: x['sidebar']['playlistSidebarRenderer']['items'], list)
3158         if sidebar_renderer:
3159             for item in sidebar_renderer:
3160                 if not isinstance(item, dict):
3161                     continue
3162                 renderer = item.get('playlistSidebarSecondaryInfoRenderer')
3163                 if not isinstance(renderer, dict):
3164                     continue
3165                 owner = try_get(
3166                     renderer, lambda x: x['videoOwner']['videoOwnerRenderer']['title']['runs'][0], dict)
3167                 if owner:
3168                     uploader['uploader'] = owner.get('text')
3169                     uploader['uploader_id'] = try_get(
3170                         owner, lambda x: x['navigationEndpoint']['browseEndpoint']['browseId'], compat_str)
3171                     uploader['uploader_url'] = urljoin(
3172                         'https://www.youtube.com/',
3173                         try_get(owner, lambda x: x['navigationEndpoint']['browseEndpoint']['canonicalBaseUrl'], compat_str))
3174         return uploader
3175
3176     def _extract_from_tabs(self, item_id, webpage, data, tabs, identity_token):
3177         selected_tab = self._extract_selected_tab(tabs)
3178         renderer = try_get(
3179             data, lambda x: x['metadata']['channelMetadataRenderer'], dict)
3180         playlist_id = title = description = None
3181         if renderer:
3182             channel_title = renderer.get('title') or item_id
3183             tab_title = selected_tab.get('title')
3184             title = channel_title or item_id
3185             if tab_title:
3186                 title += ' - %s' % tab_title
3187             description = renderer.get('description')
3188             playlist_id = renderer.get('externalId')
3189         renderer = try_get(
3190             data, lambda x: x['metadata']['playlistMetadataRenderer'], dict)
3191         if renderer:
3192             title = renderer.get('title')
3193             description = None
3194             playlist_id = item_id
3195         if playlist_id is None:
3196             playlist_id = item_id
3197         if title is None:
3198             title = "Youtube " + playlist_id.title()
3199         playlist = self.playlist_result(
3200             self._entries(selected_tab['content'], identity_token),
3201             playlist_id=playlist_id, playlist_title=title,
3202             playlist_description=description)
3203         playlist.update(self._extract_uploader(data))
3204         return playlist
3205
3206     def _extract_from_playlist(self, item_id, data, playlist):
3207         title = playlist.get('title') or try_get(
3208             data, lambda x: x['titleText']['simpleText'], compat_str)
3209         playlist_id = playlist.get('playlistId') or item_id
3210         return self.playlist_result(
3211             self._playlist_entries(playlist), playlist_id=playlist_id,
3212             playlist_title=title)
3213
3214     def _real_extract(self, url):
3215         item_id = self._match_id(url)
3216         url = compat_urlparse.urlunparse(
3217             compat_urlparse.urlparse(url)._replace(netloc='www.youtube.com'))
3218         is_home = re.match(r'(?P<pre>%s)(?P<post>/?(?![^#?]).*$)' % self._VALID_URL, url)
3219         if is_home is not None and is_home.group('not_channel') is None and item_id != 'feed':
3220             self._downloader.report_warning(
3221                 'A channel/user page was given. All the channel\'s videos will be downloaded. '
3222                 'To download only the videos in the home page, add a "/home" to the URL')
3223             url = '%s/videos%s' % (is_home.group('pre'), is_home.group('post') or '')
3224
3225         # Handle both video/playlist URLs
3226         qs = compat_urlparse.parse_qs(compat_urlparse.urlparse(url).query)
3227         video_id = qs.get('v', [None])[0]
3228         playlist_id = qs.get('list', [None])[0]
3229         if video_id and playlist_id:
3230             if self._downloader.params.get('noplaylist'):
3231                 self.to_screen('Downloading just video %s because of --no-playlist' % video_id)
3232                 return self.url_result(video_id, ie=YoutubeIE.ie_key(), video_id=video_id)
3233             self.to_screen('Downloading playlist %s - add --no-playlist to just download video %s' % (playlist_id, video_id))
3234         webpage = self._download_webpage(url, item_id)
3235         identity_token = self._search_regex(
3236             r'\bID_TOKEN["\']\s*:\s*["\'](.+?)["\']', webpage,
3237             'identity token', default=None)
3238         data = self._extract_yt_initial_data(item_id, webpage)
3239         tabs = try_get(
3240             data, lambda x: x['contents']['twoColumnBrowseResultsRenderer']['tabs'], list)
3241         if tabs:
3242             return self._extract_from_tabs(item_id, webpage, data, tabs, identity_token)
3243         playlist = try_get(
3244             data, lambda x: x['contents']['twoColumnWatchNextResults']['playlist']['playlist'], dict)
3245         if playlist:
3246             return self._extract_from_playlist(item_id, data, playlist)
3247         # Fallback to video extraction if no playlist alike page is recognized.
3248         # First check for the current video then try the v attribute of URL query.
3249         video_id = try_get(
3250             data, lambda x: x['currentVideoEndpoint']['watchEndpoint']['videoId'],
3251             compat_str) or video_id
3252         if video_id:
3253             return self.url_result(video_id, ie=YoutubeIE.ie_key(), video_id=video_id)
3254         # Failed to recognize
3255         raise ExtractorError('Unable to recognize tab page')
3256
3257
3258 class YoutubePlaylistIE(InfoExtractor):
3259     IE_DESC = 'YouTube.com playlists'
3260     _VALID_URL = r'''(?x)(?:
3261                         (?:https?://)?
3262                         (?:\w+\.)?
3263                         (?:
3264                             (?:
3265                                 youtube(?:kids)?\.com|
3266                                 invidio\.us|
3267                                 youtu\.be
3268                             )
3269                             /.*?\?.*?\blist=
3270                         )?
3271                         (?P<id>%(playlist_id)s)
3272                      )''' % {'playlist_id': YoutubeBaseInfoExtractor._PLAYLIST_ID_RE}
3273     IE_NAME = 'youtube:playlist'
3274     _TESTS = [{
3275         'note': 'issue #673',
3276         'url': 'PLBB231211A4F62143',
3277         'info_dict': {
3278             'title': '[OLD]Team Fortress 2 (Class-based LP)',
3279             'id': 'PLBB231211A4F62143',
3280             'uploader': 'Wickydoo',
3281             'uploader_id': 'UCKSpbfbl5kRQpTdL7kMc-1Q',
3282         },
3283         'playlist_mincount': 29,
3284     }, {
3285         'url': 'PLtPgu7CB4gbY9oDN3drwC3cMbJggS7dKl',
3286         'info_dict': {
3287             'title': 'YDL_safe_search',
3288             'id': 'PLtPgu7CB4gbY9oDN3drwC3cMbJggS7dKl',
3289         },
3290         'playlist_count': 2,
3291         'skip': 'This playlist is private',
3292     }, {
3293         'note': 'embedded',
3294         'url': 'https://www.youtube.com/embed/videoseries?list=PL6IaIsEjSbf96XFRuNccS_RuEXwNdsoEu',
3295         'playlist_count': 4,
3296         'info_dict': {
3297             'title': 'JODA15',
3298             'id': 'PL6IaIsEjSbf96XFRuNccS_RuEXwNdsoEu',
3299             'uploader': 'milan',
3300             'uploader_id': 'UCEI1-PVPcYXjB73Hfelbmaw',
3301         }
3302     }, {
3303         'url': 'http://www.youtube.com/embed/_xDOZElKyNU?list=PLsyOSbh5bs16vubvKePAQ1x3PhKavfBIl',
3304         'playlist_mincount': 982,
3305         'info_dict': {
3306             'title': '2018 Chinese New Singles (11/6 updated)',
3307             'id': 'PLsyOSbh5bs16vubvKePAQ1x3PhKavfBIl',
3308             'uploader': 'LBK',
3309             'uploader_id': 'UC21nz3_MesPLqtDqwdvnoxA',
3310         }
3311     }, {
3312         'url': 'https://youtu.be/yeWKywCrFtk?list=PL2qgrgXsNUG5ig9cat4ohreBjYLAPC0J5',
3313         'info_dict': {
3314             'id': 'yeWKywCrFtk',
3315             'ext': 'mp4',
3316             'title': 'Small Scale Baler and Braiding Rugs',
3317             'uploader': 'Backus-Page House Museum',
3318             'uploader_id': 'backuspagemuseum',
3319             'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/backuspagemuseum',
3320             'upload_date': '20161008',
3321             'description': 'md5:800c0c78d5eb128500bffd4f0b4f2e8a',
3322             'categories': ['Nonprofits & Activism'],
3323             'tags': list,
3324             'like_count': int,
3325             'dislike_count': int,
3326         },
3327         'params': {
3328             'noplaylist': True,
3329             'skip_download': True,
3330         },
3331     }, {
3332         'url': 'https://youtu.be/uWyaPkt-VOI?list=PL9D9FC436B881BA21',
3333         'only_matching': True,
3334     }, {
3335         'url': 'TLGGrESM50VT6acwMjAyMjAxNw',
3336         'only_matching': True,
3337     }, {
3338         # music album playlist
3339         'url': 'OLAK5uy_m4xAFdmMC5rX3Ji3g93pQe3hqLZw_9LhM',
3340         'only_matching': True,
3341     }]
3342
3343     @classmethod
3344     def suitable(cls, url):
3345         return False if YoutubeTabIE.suitable(url) else super(
3346             YoutubePlaylistIE, cls).suitable(url)
3347
3348     def _real_extract(self, url):
3349         playlist_id = self._match_id(url)
3350         qs = compat_urlparse.parse_qs(compat_urlparse.urlparse(url).query)
3351         if not qs:
3352             qs = {'list': playlist_id}
3353         return self.url_result(
3354             update_url_query('https://www.youtube.com/playlist', qs),
3355             ie=YoutubeTabIE.ie_key(), video_id=playlist_id)
3356
3357
3358 class YoutubeYtUserIE(InfoExtractor):
3359     _VALID_URL = r'ytuser:(?P<id>.+)'
3360     _TESTS = [{
3361         'url': 'ytuser:phihag',
3362         'only_matching': True,
3363     }]
3364
3365     def _real_extract(self, url):
3366         user_id = self._match_id(url)
3367         return self.url_result(
3368             'https://www.youtube.com/user/%s' % user_id,
3369             ie=YoutubeTabIE.ie_key(), video_id=user_id)
3370
3371
3372 class YoutubeFavouritesIE(InfoExtractor):
3373     IE_NAME = 'youtube:favorites'
3374     IE_DESC = 'YouTube.com liked videos, ":ytfav" for short (requires authentication)'
3375     _VALID_URL = r':ytfav(?:ou?rite)?s?'
3376     _LOGIN_REQUIRED = True
3377     _TESTS = [{
3378         'url': ':ytfav',
3379         'only_matching': True,
3380     }, {
3381         'url': ':ytfavorites',
3382         'only_matching': True,
3383     }]
3384
3385     def _real_extract(self, url):
3386         return self.url_result(
3387             'https://www.youtube.com/playlist?list=LL',
3388             ie=YoutubeTabIE.ie_key())
3389
3390
3391 class YoutubeSearchIE(SearchInfoExtractor, YoutubeBaseInfoExtractor):
3392     IE_DESC = 'YouTube.com searches'
3393     # there doesn't appear to be a real limit, for example if you search for
3394     # 'python' you get more than 8.000.000 results
3395     _MAX_RESULTS = float('inf')
3396     IE_NAME = 'youtube:search'
3397     _SEARCH_KEY = 'ytsearch'
3398     _SEARCH_PARAMS = None
3399     _TESTS = []
3400
3401     def _entries(self, query, n):
3402         data = {
3403             'context': {
3404                 'client': {
3405                     'clientName': 'WEB',
3406                     'clientVersion': '2.20201021.03.00',
3407                 }
3408             },
3409             'query': query,
3410         }
3411         if self._SEARCH_PARAMS:
3412             data['params'] = self._SEARCH_PARAMS
3413         total = 0
3414         for page_num in itertools.count(1):
3415             search = self._download_json(
3416                 'https://www.youtube.com/youtubei/v1/search?key=AIzaSyAO_FJ2SlqU8Q4STEHLGCilw_Y9_11qcW8',
3417                 video_id='query "%s"' % query,
3418                 note='Downloading page %s' % page_num,
3419                 errnote='Unable to download API page', fatal=False,
3420                 data=json.dumps(data).encode('utf8'),
3421                 headers={'content-type': 'application/json'})
3422             if not search:
3423                 break
3424             slr_contents = try_get(
3425                 search,
3426                 (lambda x: x['contents']['twoColumnSearchResultsRenderer']['primaryContents']['sectionListRenderer']['contents'],
3427                  lambda x: x['onResponseReceivedCommands'][0]['appendContinuationItemsAction']['continuationItems']),
3428                 list)
3429             if not slr_contents:
3430                 break
3431             isr_contents = try_get(
3432                 slr_contents,
3433                 lambda x: x[0]['itemSectionRenderer']['contents'],
3434                 list)
3435             if not isr_contents:
3436                 break
3437             for content in isr_contents:
3438                 if not isinstance(content, dict):
3439                     continue
3440                 video = content.get('videoRenderer')
3441                 if not isinstance(video, dict):
3442                     continue
3443                 video_id = video.get('videoId')
3444                 if not video_id:
3445                     continue
3446                 title = try_get(video, lambda x: x['title']['runs'][0]['text'], compat_str)
3447                 description = try_get(video, lambda x: x['descriptionSnippet']['runs'][0]['text'], compat_str)
3448                 duration = parse_duration(try_get(video, lambda x: x['lengthText']['simpleText'], compat_str))
3449                 view_count_text = try_get(video, lambda x: x['viewCountText']['simpleText'], compat_str) or ''
3450                 view_count = int_or_none(self._search_regex(
3451                     r'^(\d+)', re.sub(r'\s', '', view_count_text),
3452                     'view count', default=None))
3453                 uploader = try_get(video, lambda x: x['ownerText']['runs'][0]['text'], compat_str)
3454                 total += 1
3455                 yield {
3456                     '_type': 'url_transparent',
3457                     'ie_key': YoutubeIE.ie_key(),
3458                     'id': video_id,
3459                     'url': video_id,
3460                     'title': title,
3461                     'description': description,
3462                     'duration': duration,
3463                     'view_count': view_count,
3464                     'uploader': uploader,
3465                 }
3466                 if total == n:
3467                     return
3468             token = try_get(
3469                 slr_contents,
3470                 lambda x: x[1]['continuationItemRenderer']['continuationEndpoint']['continuationCommand']['token'],
3471                 compat_str)
3472             if not token:
3473                 break
3474             data['continuation'] = token
3475
3476     def _get_n_results(self, query, n):
3477         """Get a specified number of results for a query"""
3478         return self.playlist_result(self._entries(query, n), query)
3479
3480
3481 class YoutubeSearchDateIE(YoutubeSearchIE):
3482     IE_NAME = YoutubeSearchIE.IE_NAME + ':date'
3483     _SEARCH_KEY = 'ytsearchdate'
3484     IE_DESC = 'YouTube.com searches, newest videos first'
3485     _SEARCH_PARAMS = 'CAI%3D'
3486
3487
3488 class YoutubeSearchURLIE(YoutubeSearchIE):
3489     IE_DESC = 'YouTube.com search URLs'
3490     IE_NAME = YoutubeSearchIE.IE_NAME + '_url'
3491     _VALID_URL = r'https?://(?:www\.)?youtube\.com/results\?(.*?&)?(?:search_query|q)=(?:[^&]+)(?:[&]|$)'
3492     # _MAX_RESULTS = 100
3493     _TESTS = [{
3494         'url': 'https://www.youtube.com/results?baz=bar&search_query=youtube-dl+test+video&filters=video&lclk=video',
3495         'playlist_mincount': 5,
3496         'info_dict': {
3497             'title': 'youtube-dl test video',
3498         }
3499     }, {
3500         'url': 'https://www.youtube.com/results?q=test&sp=EgQIBBgB',
3501         'only_matching': True,
3502     }]
3503
3504     @classmethod
3505     def _make_valid_url(cls):
3506         return cls._VALID_URL
3507
3508     def _real_extract(self, url):
3509         qs = compat_parse_qs(compat_urllib_parse_urlparse(url).query)
3510         query = (qs.get('search_query') or qs.get('q'))[0]
3511         self._SEARCH_PARAMS = qs.get('sp', ('',))[0]
3512         return self._get_n_results(query, self._MAX_RESULTS)
3513
3514
3515 class YoutubeFeedsInfoExtractor(YoutubeTabIE):
3516     """
3517     Base class for feed extractors
3518     Subclasses must define the _FEED_NAME and _PLAYLIST_TITLE properties.
3519     """
3520     _LOGIN_REQUIRED = True
3521     # _MAX_PAGES = 5
3522     _TESTS = []
3523
3524     @property
3525     def IE_NAME(self):
3526         return 'youtube:%s' % self._FEED_NAME
3527
3528     def _real_initialize(self):
3529         self._login()
3530
3531     def _shelf_entries(self, shelf_renderer):
3532         renderer = try_get(shelf_renderer, lambda x: x['content']['gridRenderer'], dict)
3533         if not renderer:
3534             return
3535         for entry in self._grid_entries(renderer):
3536             yield entry
3537
3538     def _extract_from_tabs(self, item_id, webpage, data, tabs, identity_token):
3539         selected_tab = self._extract_selected_tab(tabs)
3540         return self.playlist_result(
3541             self._entries(selected_tab['content'], identity_token),
3542             playlist_title=self._PLAYLIST_TITLE)
3543
3544     def _real_extract(self, url):
3545         item_id = self._FEED_NAME
3546         url = 'https://www.youtube.com/feed/%s' % self._FEED_NAME
3547         webpage = self._download_webpage(url, item_id)
3548         identity_token = self._search_regex(
3549             r'\bID_TOKEN["\']\s*:\s*["\'](.+?)["\']', webpage,
3550             'identity token', default=None)
3551         data = self._extract_yt_initial_data(item_id, webpage)
3552         tabs = try_get(
3553             data, lambda x: x['contents']['twoColumnBrowseResultsRenderer']['tabs'], list)
3554         if tabs:
3555             return self._extract_from_tabs(item_id, webpage, data, tabs, identity_token)
3556         # Failed to recognize
3557         raise ExtractorError('Unable to recognize feed page')
3558
3559
3560 class YoutubeWatchLaterIE(InfoExtractor):
3561     IE_NAME = 'youtube:watchlater'
3562     _VALID_URL = r'https?://(?:www\.)?youtube\.com/feed/watch_later|:ytwatchlater|WL'
3563
3564     IE_DESC = 'Youtube watch later list, ":ytwatchlater" for short (requires authentication)'
3565     _TESTS = [{
3566         'url': 'https://www.youtube.com/feed/watch_later',
3567         'only_matching': True,
3568     }, {
3569         'url': ':ytwatchlater',
3570         'only_matching': True,
3571     }]
3572
3573     def _real_extract(self, url):
3574         return self.url_result(
3575             'https://www.youtube.com/playlist?list=WL', ie=YoutubeTabIE.ie_key())
3576
3577
3578 class YoutubeRecommendedIE(YoutubeFeedsInfoExtractor):
3579     IE_DESC = 'YouTube.com recommended videos, ":ytrec" for short (requires authentication)'
3580     _VALID_URL = r'https?://(?:www\.)?youtube\.com(?:/feed/recommended|/?[?#]|/?$)|:ytrec(?:ommended)?'
3581     _FEED_NAME = 'recommended'
3582     _PLAYLIST_TITLE = 'Youtube Recommended videos'
3583
3584
3585 class YoutubeSubscriptionsIE(YoutubeFeedsInfoExtractor):
3586     _VALID_URL = r'https?://(?:www\.)?youtube\.com/feed/subscriptions|:ytsub(?:scription)?s?'
3587     IE_DESC = 'YouTube.com subscriptions feed, ":ytsubs" for short (requires authentication)'
3588     _FEED_NAME = 'subscriptions'
3589     _PLAYLIST_TITLE = 'Youtube Subscriptions'
3590
3591
3592 class YoutubeHistoryIE(YoutubeFeedsInfoExtractor):
3593     IE_DESC = 'Youtube watch history, ":ythistory" for short (requires authentication)'
3594     _VALID_URL = r'https?://(?:www\.)?youtube\.com/feed/history|:ythistory'
3595     _FEED_NAME = 'history'
3596     _PLAYLIST_TITLE = 'Youtube History'
3597
3598
3599 class YoutubeTruncatedURLIE(InfoExtractor):
3600     IE_NAME = 'youtube:truncated_url'
3601     IE_DESC = False  # Do not list
3602     _VALID_URL = r'''(?x)
3603         (?:https?://)?
3604         (?:\w+\.)?[yY][oO][uU][tT][uU][bB][eE](?:-nocookie)?\.com/
3605         (?:watch\?(?:
3606             feature=[a-z_]+|
3607             annotation_id=annotation_[^&]+|
3608             x-yt-cl=[0-9]+|
3609             hl=[^&]*|
3610             t=[0-9]+
3611         )?
3612         |
3613             attribution_link\?a=[^&]+
3614         )
3615         $
3616     '''
3617
3618     _TESTS = [{
3619         'url': 'https://www.youtube.com/watch?annotation_id=annotation_3951667041',
3620         'only_matching': True,
3621     }, {
3622         'url': 'https://www.youtube.com/watch?',
3623         'only_matching': True,
3624     }, {
3625         'url': 'https://www.youtube.com/watch?x-yt-cl=84503534',
3626         'only_matching': True,
3627     }, {
3628         'url': 'https://www.youtube.com/watch?feature=foo',
3629         'only_matching': True,
3630     }, {
3631         'url': 'https://www.youtube.com/watch?hl=en-GB',
3632         'only_matching': True,
3633     }, {
3634         'url': 'https://www.youtube.com/watch?t=2372',
3635         'only_matching': True,
3636     }]
3637
3638     def _real_extract(self, url):
3639         raise ExtractorError(
3640             'Did you forget to quote the URL? Remember that & is a meta '
3641             'character in most shells, so you want to put the URL in quotes, '
3642             'like  youtube-dl '
3643             '"https://www.youtube.com/watch?feature=foo&v=BaW_jenozKc" '
3644             ' or simply  youtube-dl BaW_jenozKc  .',
3645             expected=True)
3646
3647
3648 class YoutubeTruncatedIDIE(InfoExtractor):
3649     IE_NAME = 'youtube:truncated_id'
3650     IE_DESC = False  # Do not list
3651     _VALID_URL = r'https?://(?:www\.)?youtube\.com/watch\?v=(?P<id>[0-9A-Za-z_-]{1,10})$'
3652
3653     _TESTS = [{
3654         'url': 'https://www.youtube.com/watch?v=N_708QY7Ob',
3655         'only_matching': True,
3656     }]
3657
3658     def _real_extract(self, url):
3659         video_id = self._match_id(url)
3660         raise ExtractorError(
3661             'Incomplete YouTube ID %s. URL %s looks truncated.' % (video_id, url),
3662             expected=True)
3663
3664
3665 # Do Youtube show urls even exist anymore? I couldn't find any
3666 r'''
3667 class YoutubeShowIE(YoutubeTabIE):
3668     IE_DESC = 'YouTube.com (multi-season) shows'
3669     _VALID_URL = r'https?://(?:www\.)?youtube\.com/show/(?P<id>[^?#]*)'
3670     IE_NAME = 'youtube:show'
3671     _TESTS = [{
3672         'url': 'https://www.youtube.com/show/airdisasters',
3673         'playlist_mincount': 5,
3674         'info_dict': {
3675             'id': 'airdisasters',
3676             'title': 'Air Disasters',
3677         }
3678     }]
3679
3680     def _real_extract(self, url):
3681         playlist_id = self._match_id(url)
3682         return super(YoutubeShowIE, self)._real_extract(
3683             'https://www.youtube.com/show/%s/playlists' % playlist_id)
3684 '''