youtube_dlc/extractor/youtube.py

   1 # coding: utf-8
   2
   3 from __future__ import unicode_literals
   4
   5
   6 import itertools
   7 import json
   8 import os.path
   9 import random
  10 import re
  11 import time
  12 import traceback
  13
  14 from .common import InfoExtractor, SearchInfoExtractor
  15 from ..jsinterp import JSInterpreter
  16 from ..swfinterp import SWFInterpreter
  17 from ..compat import (
  18     compat_chr,
  19     compat_kwargs,
  20     compat_parse_qs,
  21     compat_urllib_parse_unquote,
  22     compat_urllib_parse_unquote_plus,
  23     compat_urllib_parse_urlencode,
  24     compat_urllib_parse_urlparse,
  25     compat_urlparse,
  26     compat_str,
  27 )
  28 from ..utils import (
  29     bool_or_none,
  30     clean_html,
  31     error_to_compat_str,
  32     ExtractorError,
  33     float_or_none,
  34     get_element_by_id,
  35     int_or_none,
  36     mimetype2ext,
  37     parse_codecs,
  38     parse_count,
  39     parse_duration,
  40     remove_quotes,
  41     remove_start,
  42     smuggle_url,
  43     str_or_none,
  44     str_to_int,
  45     try_get,
  46     unescapeHTML,
  47     unified_strdate,
  48     unsmuggle_url,
  49     update_url_query,
  50     uppercase_escape,
  51     url_or_none,
  52     urlencode_postdata,
  53     urljoin,
  54 )
  55
  56
  57 class YoutubeBaseInfoExtractor(InfoExtractor):
  58     """Provide base functions for Youtube extractors"""
  59     _LOGIN_URL = 'https://accounts.google.com/ServiceLogin'
  60     _TWOFACTOR_URL = 'https://accounts.google.com/signin/challenge'
  61
  62     _LOOKUP_URL = 'https://accounts.google.com/_/signin/sl/lookup'
  63     _CHALLENGE_URL = 'https://accounts.google.com/_/signin/sl/challenge'
  64     _TFA_URL = 'https://accounts.google.com/_/signin/challenge?hl=en&TL={0}'
  65
  66     _RESERVED_NAMES = (
  67         r'course|embed|channel|c|user|playlist|watch|w|results|storefront|'
  68         r'shared|index|account|reporthistory|t/terms|about|upload|signin|logout|'
  69         r'feed/(watch_later|history|subscriptions|library|trending|recommended)')
  70
  71     _NETRC_MACHINE = 'youtube'
  72     # If True it will raise an error if no login info is provided
  73     _LOGIN_REQUIRED = False
  74
  75     _PLAYLIST_ID_RE = r'(?:(?:PL|LL|EC|UU|FL|RD|UL|TL|PU|OLAK5uy_)[0-9A-Za-z-_]{10,}|RDMM)'
  76
  77     _YOUTUBE_CLIENT_HEADERS = {
  78         'x-youtube-client-name': '1',
  79         'x-youtube-client-version': '1.20200609.04.02',
  80     }
  81
  82     def _set_language(self):
  83         self._set_cookie(
  84             '.youtube.com', 'PREF', 'f1=50000000&f6=8&hl=en',
  85             # YouTube sets the expire time to about two months
  86             expire_time=time.time() + 2 * 30 * 24 * 3600)
  87
  88     def _ids_to_results(self, ids):
  89         return [
  90             self.url_result(vid_id, 'Youtube', video_id=vid_id)
  91             for vid_id in ids]
  92
  93     def _login(self):
  94         """
  95         Attempt to log in to YouTube.
  96         True is returned if successful or skipped.
  97         False is returned if login failed.
  98
  99         If _LOGIN_REQUIRED is set and no authentication was provided, an error is raised.
 100         """
 101         username, password = self._get_login_info()
 102         # No authentication to be performed
 103         if username is None:
 104             if self._LOGIN_REQUIRED and self._downloader.params.get('cookiefile') is None:
 105                 raise ExtractorError('No login info available, needed for using %s.' % self.IE_NAME, expected=True)
 106             if self._downloader.params.get('cookiefile') and False:  # TODO remove 'and False' later - too many people using outdated cookies and open issues, remind them.
 107                 self.to_screen('[Cookies] Reminder - Make sure to always use up to date cookies!')
 108             return True
 109
 110         login_page = self._download_webpage(
 111             self._LOGIN_URL, None,
 112             note='Downloading login page',
 113             errnote='unable to fetch login page', fatal=False)
 114         if login_page is False:
 115             return
 116
 117         login_form = self._hidden_inputs(login_page)
 118
 119         def req(url, f_req, note, errnote):
 120             data = login_form.copy()
 121             data.update({
 122                 'pstMsg': 1,
 123                 'checkConnection': 'youtube',
 124                 'checkedDomains': 'youtube',
 125                 'hl': 'en',
 126                 'deviceinfo': '[null,null,null,[],null,"US",null,null,[],"GlifWebSignIn",null,[null,null,[]]]',
 127                 'f.req': json.dumps(f_req),
 128                 'flowName': 'GlifWebSignIn',
 129                 'flowEntry': 'ServiceLogin',
 130                 # TODO: reverse actual botguard identifier generation algo
 131                 'bgRequest': '["identifier",""]',
 132             })
 133             return self._download_json(
 134                 url, None, note=note, errnote=errnote,
 135                 transform_source=lambda s: re.sub(r'^[^[]*', '', s),
 136                 fatal=False,
 137                 data=urlencode_postdata(data), headers={
 138                     'Content-Type': 'application/x-www-form-urlencoded;charset=utf-8',
 139                     'Google-Accounts-XSRF': 1,
 140                 })
 141
 142         def warn(message):
 143             self._downloader.report_warning(message)
 144
 145         lookup_req = [
 146             username,
 147             None, [], None, 'US', None, None, 2, False, True,
 148             [
 149                 None, None,
 150                 [2, 1, None, 1,
 151                  'https://accounts.google.com/ServiceLogin?passive=true&continue=https%3A%2F%2Fwww.youtube.com%2Fsignin%3Fnext%3D%252F%26action_handle_signin%3Dtrue%26hl%3Den%26app%3Ddesktop%26feature%3Dsign_in_button&hl=en&service=youtube&uilel=3&requestPath=%2FServiceLogin&Page=PasswordSeparationSignIn',
 152                  None, [], 4],
 153                 1, [None, None, []], None, None, None, True
 154             ],
 155             username,
 156         ]
 157
 158         lookup_results = req(
 159             self._LOOKUP_URL, lookup_req,
 160             'Looking up account info', 'Unable to look up account info')
 161
 162         if lookup_results is False:
 163             return False
 164
 165         user_hash = try_get(lookup_results, lambda x: x[0][2], compat_str)
 166         if not user_hash:
 167             warn('Unable to extract user hash')
 168             return False
 169
 170         challenge_req = [
 171             user_hash,
 172             None, 1, None, [1, None, None, None, [password, None, True]],
 173             [
 174                 None, None, [2, 1, None, 1, 'https://accounts.google.com/ServiceLogin?passive=true&continue=https%3A%2F%2Fwww.youtube.com%2Fsignin%3Fnext%3D%252F%26action_handle_signin%3Dtrue%26hl%3Den%26app%3Ddesktop%26feature%3Dsign_in_button&hl=en&service=youtube&uilel=3&requestPath=%2FServiceLogin&Page=PasswordSeparationSignIn', None, [], 4],
 175                 1, [None, None, []], None, None, None, True
 176             ]]
 177
 178         challenge_results = req(
 179             self._CHALLENGE_URL, challenge_req,
 180             'Logging in', 'Unable to log in')
 181
 182         if challenge_results is False:
 183             return
 184
 185         login_res = try_get(challenge_results, lambda x: x[0][5], list)
 186         if login_res:
 187             login_msg = try_get(login_res, lambda x: x[5], compat_str)
 188             warn(
 189                 'Unable to login: %s' % 'Invalid password'
 190                 if login_msg == 'INCORRECT_ANSWER_ENTERED' else login_msg)
 191             return False
 192
 193         res = try_get(challenge_results, lambda x: x[0][-1], list)
 194         if not res:
 195             warn('Unable to extract result entry')
 196             return False
 197
 198         login_challenge = try_get(res, lambda x: x[0][0], list)
 199         if login_challenge:
 200             challenge_str = try_get(login_challenge, lambda x: x[2], compat_str)
 201             if challenge_str == 'TWO_STEP_VERIFICATION':
 202                 # SEND_SUCCESS - TFA code has been successfully sent to phone
 203                 # QUOTA_EXCEEDED - reached the limit of TFA codes
 204                 status = try_get(login_challenge, lambda x: x[5], compat_str)
 205                 if status == 'QUOTA_EXCEEDED':
 206                     warn('Exceeded the limit of TFA codes, try later')
 207                     return False
 208
 209                 tl = try_get(challenge_results, lambda x: x[1][2], compat_str)
 210                 if not tl:
 211                     warn('Unable to extract TL')
 212                     return False
 213
 214                 tfa_code = self._get_tfa_info('2-step verification code')
 215
 216                 if not tfa_code:
 217                     warn(
 218                         'Two-factor authentication required. Provide it either interactively or with --twofactor <code>'
 219                         '(Note that only TOTP (Google Authenticator App) codes work at this time.)')
 220                     return False
 221
 222                 tfa_code = remove_start(tfa_code, 'G-')
 223
 224                 tfa_req = [
 225                     user_hash, None, 2, None,
 226                     [
 227                         9, None, None, None, None, None, None, None,
 228                         [None, tfa_code, True, 2]
 229                     ]]
 230
 231                 tfa_results = req(
 232                     self._TFA_URL.format(tl), tfa_req,
 233                     'Submitting TFA code', 'Unable to submit TFA code')
 234
 235                 if tfa_results is False:
 236                     return False
 237
 238                 tfa_res = try_get(tfa_results, lambda x: x[0][5], list)
 239                 if tfa_res:
 240                     tfa_msg = try_get(tfa_res, lambda x: x[5], compat_str)
 241                     warn(
 242                         'Unable to finish TFA: %s' % 'Invalid TFA code'
 243                         if tfa_msg == 'INCORRECT_ANSWER_ENTERED' else tfa_msg)
 244                     return False
 245
 246                 check_cookie_url = try_get(
 247                     tfa_results, lambda x: x[0][-1][2], compat_str)
 248             else:
 249                 CHALLENGES = {
 250                     'LOGIN_CHALLENGE': "This device isn't recognized. For your security, Google wants to make sure it's really you.",
 251                     'USERNAME_RECOVERY': 'Please provide additional information to aid in the recovery process.',
 252                     'REAUTH': "There is something unusual about your activity. For your security, Google wants to make sure it's really you.",
 253                 }
 254                 challenge = CHALLENGES.get(
 255                     challenge_str,
 256                     '%s returned error %s.' % (self.IE_NAME, challenge_str))
 257                 warn('%s\nGo to https://accounts.google.com/, login and solve a challenge.' % challenge)
 258                 return False
 259         else:
 260             check_cookie_url = try_get(res, lambda x: x[2], compat_str)
 261
 262         if not check_cookie_url:
 263             warn('Unable to extract CheckCookie URL')
 264             return False
 265
 266         check_cookie_results = self._download_webpage(
 267             check_cookie_url, None, 'Checking cookie', fatal=False)
 268
 269         if check_cookie_results is False:
 270             return False
 271
 272         if 'https://myaccount.google.com/' not in check_cookie_results:
 273             warn('Unable to log in')
 274             return False
 275
 276         return True
 277
 278     def _download_webpage_handle(self, *args, **kwargs):
 279         query = kwargs.get('query', {}).copy()
 280         kwargs['query'] = query
 281         return super(YoutubeBaseInfoExtractor, self)._download_webpage_handle(
 282             *args, **compat_kwargs(kwargs))
 283
 284     def _get_yt_initial_data(self, video_id, webpage):
 285         config = self._search_regex(
 286             (r'window\["ytInitialData"\]\s*=\s*(.*?)(?<=});',
 287              r'var\s+ytInitialData\s*=\s*(.*?)(?<=});'),
 288             webpage, 'ytInitialData', default=None)
 289         if config:
 290             return self._parse_json(
 291                 uppercase_escape(config), video_id, fatal=False)
 292
 293     def _real_initialize(self):
 294         if self._downloader is None:
 295             return
 296         self._set_language()
 297         if not self._login():
 298             return
 299
 300     _DEFAULT_API_DATA = {
 301         'context': {
 302             'client': {
 303                 'clientName': 'WEB',
 304                 'clientVersion': '2.20201021.03.00',
 305             }
 306         },
 307     }
 308
 309     _YT_INITIAL_DATA_RE = r'(?:window\s*\[\s*["\']ytInitialData["\']\s*\]|ytInitialData)\s*=\s*({.+?})\s*;'
 310
 311     def _call_api(self, ep, query, video_id):
 312         data = self._DEFAULT_API_DATA.copy()
 313         data.update(query)
 314
 315         response = self._download_json(
 316             'https://www.youtube.com/youtubei/v1/%s' % ep, video_id=video_id,
 317             note='Downloading API JSON', errnote='Unable to download API page',
 318             data=json.dumps(data).encode('utf8'),
 319             headers={'content-type': 'application/json'},
 320             query={'key': 'AIzaSyAO_FJ2SlqU8Q4STEHLGCilw_Y9_11qcW8'})
 321
 322         return response
 323
 324     def _extract_yt_initial_data(self, video_id, webpage):
 325         return self._parse_json(
 326             self._search_regex(
 327                 (r'%s\s*\n' % self._YT_INITIAL_DATA_RE,
 328                  self._YT_INITIAL_DATA_RE), webpage, 'yt initial data'),
 329             video_id)
 330
 331
 332 class YoutubeIE(YoutubeBaseInfoExtractor):
 333     IE_DESC = 'YouTube.com'
 334     _VALID_URL = r"""(?x)^
 335                      (
 336                          (?:https?://|//)                                    # http(s):// or protocol-independent URL
 337                          (?:(?:(?:(?:\w+\.)?[yY][oO][uU][tT][uU][bB][eE](?:-nocookie|kids)?\.com/|
 338                             (?:www\.)?deturl\.com/www\.youtube\.com/|
 339                             (?:www\.)?pwnyoutube\.com/|
 340                             (?:www\.)?hooktube\.com/|
 341                             (?:www\.)?yourepeat\.com/|
 342                             tube\.majestyc\.net/|
 343                             # Invidious instances taken from https://github.com/omarroth/invidious/wiki/Invidious-Instances
 344                             (?:(?:www|dev)\.)?invidio\.us/|
 345                             (?:(?:www|no)\.)?invidiou\.sh/|
 346                             (?:(?:www|fi|de)\.)?invidious\.snopyta\.org/|
 347                             (?:www\.)?invidious\.kabi\.tk/|
 348                             (?:www\.)?invidious\.13ad\.de/|
 349                             (?:www\.)?invidious\.mastodon\.host/|
 350                             (?:www\.)?invidious\.nixnet\.xyz/|
 351                             (?:www\.)?invidious\.drycat\.fr/|
 352                             (?:www\.)?tube\.poal\.co/|
 353                             (?:www\.)?vid\.wxzm\.sx/|
 354                             (?:www\.)?yewtu\.be/|
 355                             (?:www\.)?yt\.elukerio\.org/|
 356                             (?:www\.)?yt\.lelux\.fi/|
 357                             (?:www\.)?invidious\.ggc-project\.de/|
 358                             (?:www\.)?yt\.maisputain\.ovh/|
 359                             (?:www\.)?invidious\.13ad\.de/|
 360                             (?:www\.)?invidious\.toot\.koeln/|
 361                             (?:www\.)?invidious\.fdn\.fr/|
 362                             (?:www\.)?watch\.nettohikari\.com/|
 363                             (?:www\.)?kgg2m7yk5aybusll\.onion/|
 364                             (?:www\.)?qklhadlycap4cnod\.onion/|
 365                             (?:www\.)?axqzx4s6s54s32yentfqojs3x5i7faxza6xo3ehd4bzzsg2ii4fv2iid\.onion/|
 366                             (?:www\.)?c7hqkpkpemu6e7emz5b4vyz7idjgdvgaaa3dyimmeojqbgpea3xqjoid\.onion/|
 367                             (?:www\.)?fz253lmuao3strwbfbmx46yu7acac2jz27iwtorgmbqlkurlclmancad\.onion/|
 368                             (?:www\.)?invidious\.l4qlywnpwqsluw65ts7md3khrivpirse744un3x7mlskqauz5pyuzgqd\.onion/|
 369                             (?:www\.)?owxfohz4kjyv25fvlqilyxast7inivgiktls3th44jhk3ej3i7ya\.b32\.i2p/|
 370                             (?:www\.)?4l2dgddgsrkf2ous66i6seeyi6etzfgrue332grh2n7madpwopotugyd\.onion/|
 371                             youtube\.googleapis\.com/)                        # the various hostnames, with wildcard subdomains
 372                          (?:.*?\#/)?                                          # handle anchor (#/) redirect urls
 373                          (?:                                                  # the various things that can precede the ID:
 374                              (?:(?:v|embed|e)/(?!videoseries))                # v/ or embed/ or e/
 375                              |(?:                                             # or the v= param in all its forms
 376                                  (?:(?:watch|movie)(?:_popup)?(?:\.php)?/?)?  # preceding watch(_popup|.php) or nothing (like /?v=xxxx)
 377                                  (?:\?|\#!?)                                  # the params delimiter ? or # or #!
 378                                  (?:.*?[&;])??                                # any other preceding param (like /?s=tuff&v=xxxx or ?s=tuff&amp;v=V36LpHqtcDY)
 379                                  v=
 380                              )
 381                          ))
 382                          |(?:
 383                             youtu\.be|                                        # just youtu.be/xxxx
 384                             vid\.plus|                                        # or vid.plus/xxxx
 385                             zwearz\.com/watch|                                # or zwearz.com/watch/xxxx
 386                          )/
 387                          |(?:www\.)?cleanvideosearch\.com/media/action/yt/watch\?videoId=
 388                          )
 389                      )?                                                       # all until now is optional -> you can pass the naked ID
 390                      (?P<id>[0-9A-Za-z_-]{11})                                      # here is it! the YouTube video ID
 391                      (?!.*?\blist=
 392                         (?:
 393                             %(playlist_id)s|                                  # combined list/video URLs are handled by the playlist IE
 394                             WL                                                # WL are handled by the watch later IE
 395                         )
 396                      )
 397                      (?(1).+)?                                                # if we found the ID, everything can follow
 398                      $""" % {'playlist_id': YoutubeBaseInfoExtractor._PLAYLIST_ID_RE}
 399     _NEXT_URL_RE = r'[\?&]next_url=([^&]+)'
 400     _PLAYER_INFO_RE = (
 401         r'/(?P<id>[a-zA-Z0-9_-]{8,})/player_ias\.vflset(?:/[a-zA-Z]{2,3}_[a-zA-Z]{2,3})?/base\.(?P<ext>[a-z]+)$',
 402         r'\b(?P<id>vfl[a-zA-Z0-9_-]+)\b.*?\.(?P<ext>[a-z]+)$',
 403     )
 404     _formats = {
 405         '5': {'ext': 'flv', 'width': 400, 'height': 240, 'acodec': 'mp3', 'abr': 64, 'vcodec': 'h263'},
 406         '6': {'ext': 'flv', 'width': 450, 'height': 270, 'acodec': 'mp3', 'abr': 64, 'vcodec': 'h263'},
 407         '13': {'ext': '3gp', 'acodec': 'aac', 'vcodec': 'mp4v'},
 408         '17': {'ext': '3gp', 'width': 176, 'height': 144, 'acodec': 'aac', 'abr': 24, 'vcodec': 'mp4v'},
 409         '18': {'ext': 'mp4', 'width': 640, 'height': 360, 'acodec': 'aac', 'abr': 96, 'vcodec': 'h264'},
 410         '22': {'ext': 'mp4', 'width': 1280, 'height': 720, 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264'},
 411         '34': {'ext': 'flv', 'width': 640, 'height': 360, 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264'},
 412         '35': {'ext': 'flv', 'width': 854, 'height': 480, 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264'},
 413         # itag 36 videos are either 320x180 (BaW_jenozKc) or 320x240 (__2ABJjxzNo), abr varies as well
 414         '36': {'ext': '3gp', 'width': 320, 'acodec': 'aac', 'vcodec': 'mp4v'},
 415         '37': {'ext': 'mp4', 'width': 1920, 'height': 1080, 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264'},
 416         '38': {'ext': 'mp4', 'width': 4096, 'height': 3072, 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264'},
 417         '43': {'ext': 'webm', 'width': 640, 'height': 360, 'acodec': 'vorbis', 'abr': 128, 'vcodec': 'vp8'},
 418         '44': {'ext': 'webm', 'width': 854, 'height': 480, 'acodec': 'vorbis', 'abr': 128, 'vcodec': 'vp8'},
 419         '45': {'ext': 'webm', 'width': 1280, 'height': 720, 'acodec': 'vorbis', 'abr': 192, 'vcodec': 'vp8'},
 420         '46': {'ext': 'webm', 'width': 1920, 'height': 1080, 'acodec': 'vorbis', 'abr': 192, 'vcodec': 'vp8'},
 421         '59': {'ext': 'mp4', 'width': 854, 'height': 480, 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264'},
 422         '78': {'ext': 'mp4', 'width': 854, 'height': 480, 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264'},
 423
 424
 425         # 3D videos
 426         '82': {'ext': 'mp4', 'height': 360, 'format_note': '3D', 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264', 'preference': -20},
 427         '83': {'ext': 'mp4', 'height': 480, 'format_note': '3D', 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264', 'preference': -20},
 428         '84': {'ext': 'mp4', 'height': 720, 'format_note': '3D', 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264', 'preference': -20},
 429         '85': {'ext': 'mp4', 'height': 1080, 'format_note': '3D', 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264', 'preference': -20},
 430         '100': {'ext': 'webm', 'height': 360, 'format_note': '3D', 'acodec': 'vorbis', 'abr': 128, 'vcodec': 'vp8', 'preference': -20},
 431         '101': {'ext': 'webm', 'height': 480, 'format_note': '3D', 'acodec': 'vorbis', 'abr': 192, 'vcodec': 'vp8', 'preference': -20},
 432         '102': {'ext': 'webm', 'height': 720, 'format_note': '3D', 'acodec': 'vorbis', 'abr': 192, 'vcodec': 'vp8', 'preference': -20},
 433
 434         # Apple HTTP Live Streaming
 435         '91': {'ext': 'mp4', 'height': 144, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 48, 'vcodec': 'h264', 'preference': -10},
 436         '92': {'ext': 'mp4', 'height': 240, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 48, 'vcodec': 'h264', 'preference': -10},
 437         '93': {'ext': 'mp4', 'height': 360, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264', 'preference': -10},
 438         '94': {'ext': 'mp4', 'height': 480, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264', 'preference': -10},
 439         '95': {'ext': 'mp4', 'height': 720, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 256, 'vcodec': 'h264', 'preference': -10},
 440         '96': {'ext': 'mp4', 'height': 1080, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 256, 'vcodec': 'h264', 'preference': -10},
 441         '132': {'ext': 'mp4', 'height': 240, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 48, 'vcodec': 'h264', 'preference': -10},
 442         '151': {'ext': 'mp4', 'height': 72, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 24, 'vcodec': 'h264', 'preference': -10},
 443
 444         # DASH mp4 video
 445         '133': {'ext': 'mp4', 'height': 240, 'format_note': 'DASH video', 'vcodec': 'h264'},
 446         '134': {'ext': 'mp4', 'height': 360, 'format_note': 'DASH video', 'vcodec': 'h264'},
 447         '135': {'ext': 'mp4', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'h264'},
 448         '136': {'ext': 'mp4', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'h264'},
 449         '137': {'ext': 'mp4', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'h264'},
 450         '138': {'ext': 'mp4', 'format_note': 'DASH video', 'vcodec': 'h264'},  # Height can vary (https://github.com/ytdl-org/youtube-dl/issues/4559)
 451         '160': {'ext': 'mp4', 'height': 144, 'format_note': 'DASH video', 'vcodec': 'h264'},
 452         '212': {'ext': 'mp4', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'h264'},
 453         '264': {'ext': 'mp4', 'height': 1440, 'format_note': 'DASH video', 'vcodec': 'h264'},
 454         '298': {'ext': 'mp4', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'h264', 'fps': 60},
 455         '299': {'ext': 'mp4', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'h264', 'fps': 60},
 456         '266': {'ext': 'mp4', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'h264'},
 457
 458         # Dash mp4 audio
 459         '139': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'abr': 48, 'container': 'm4a_dash'},
 460         '140': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'abr': 128, 'container': 'm4a_dash'},
 461         '141': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'abr': 256, 'container': 'm4a_dash'},
 462         '256': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'container': 'm4a_dash'},
 463         '258': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'container': 'm4a_dash'},
 464         '325': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'dtse', 'container': 'm4a_dash'},
 465         '328': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'ec-3', 'container': 'm4a_dash'},
 466
 467         # Dash webm
 468         '167': {'ext': 'webm', 'height': 360, 'width': 640, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
 469         '168': {'ext': 'webm', 'height': 480, 'width': 854, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
 470         '169': {'ext': 'webm', 'height': 720, 'width': 1280, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
 471         '170': {'ext': 'webm', 'height': 1080, 'width': 1920, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
 472         '218': {'ext': 'webm', 'height': 480, 'width': 854, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
 473         '219': {'ext': 'webm', 'height': 480, 'width': 854, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
 474         '278': {'ext': 'webm', 'height': 144, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp9'},
 475         '242': {'ext': 'webm', 'height': 240, 'format_note': 'DASH video', 'vcodec': 'vp9'},
 476         '243': {'ext': 'webm', 'height': 360, 'format_note': 'DASH video', 'vcodec': 'vp9'},
 477         '244': {'ext': 'webm', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'vp9'},
 478         '245': {'ext': 'webm', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'vp9'},
 479         '246': {'ext': 'webm', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'vp9'},
 480         '247': {'ext': 'webm', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'vp9'},
 481         '248': {'ext': 'webm', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'vp9'},
 482         '271': {'ext': 'webm', 'height': 1440, 'format_note': 'DASH video', 'vcodec': 'vp9'},
 483         # itag 272 videos are either 3840x2160 (e.g. RtoitU2A-3E) or 7680x4320 (sLprVF6d7Ug)
 484         '272': {'ext': 'webm', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'vp9'},
 485         '302': {'ext': 'webm', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'vp9', 'fps': 60},
 486         '303': {'ext': 'webm', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'vp9', 'fps': 60},
 487         '308': {'ext': 'webm', 'height': 1440, 'format_note': 'DASH video', 'vcodec': 'vp9', 'fps': 60},
 488         '313': {'ext': 'webm', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'vp9'},
 489         '315': {'ext': 'webm', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'vp9', 'fps': 60},
 490
 491         # Dash webm audio
 492         '171': {'ext': 'webm', 'acodec': 'vorbis', 'format_note': 'DASH audio', 'abr': 128},
 493         '172': {'ext': 'webm', 'acodec': 'vorbis', 'format_note': 'DASH audio', 'abr': 256},
 494
 495         # Dash webm audio with opus inside
 496         '249': {'ext': 'webm', 'format_note': 'DASH audio', 'acodec': 'opus', 'abr': 50},
 497         '250': {'ext': 'webm', 'format_note': 'DASH audio', 'acodec': 'opus', 'abr': 70},
 498         '251': {'ext': 'webm', 'format_note': 'DASH audio', 'acodec': 'opus', 'abr': 160},
 499
 500         # RTMP (unnamed)
 501         '_rtmp': {'protocol': 'rtmp'},
 502
 503         # av01 video only formats sometimes served with "unknown" codecs
 504         '394': {'acodec': 'none', 'vcodec': 'av01.0.05M.08'},
 505         '395': {'acodec': 'none', 'vcodec': 'av01.0.05M.08'},
 506         '396': {'acodec': 'none', 'vcodec': 'av01.0.05M.08'},
 507         '397': {'acodec': 'none', 'vcodec': 'av01.0.05M.08'},
 508     }
 509     _SUBTITLE_FORMATS = ('json3', 'srv1', 'srv2', 'srv3', 'ttml', 'vtt')
 510
 511     _GEO_BYPASS = False
 512
 513     IE_NAME = 'youtube'
 514     _TESTS = [
 515         {
 516             'url': 'https://www.youtube.com/watch?v=BaW_jenozKc&t=1s&end=9',
 517             'info_dict': {
 518                 'id': 'BaW_jenozKc',
 519                 'ext': 'mp4',
 520                 'title': 'youtube-dl test video "\'/\\ä↭𝕐',
 521                 'uploader': 'Philipp Hagemeister',
 522                 'uploader_id': 'phihag',
 523                 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/phihag',
 524                 'channel_id': 'UCLqxVugv74EIW3VWh2NOa3Q',
 525                 'channel_url': r're:https?://(?:www\.)?youtube\.com/channel/UCLqxVugv74EIW3VWh2NOa3Q',
 526                 'upload_date': '20121002',
 527                 'description': 'test chars:  "\'/\\ä↭𝕐\ntest URL: https://github.com/rg3/youtube-dl/issues/1892\n\nThis is a test video for youtube-dl.\n\nFor more information, contact phihag@phihag.de .',
 528                 'categories': ['Science & Technology'],
 529                 'tags': ['youtube-dl'],
 530                 'duration': 10,
 531                 'view_count': int,
 532                 'like_count': int,
 533                 'dislike_count': int,
 534                 'start_time': 1,
 535                 'end_time': 9,
 536             }
 537         },
 538         {
 539             'url': '//www.YouTube.com/watch?v=yZIXLfi8CZQ',
 540             'note': 'Embed-only video (#1746)',
 541             'info_dict': {
 542                 'id': 'yZIXLfi8CZQ',
 543                 'ext': 'mp4',
 544                 'upload_date': '20120608',
 545                 'title': 'Principal Sexually Assaults A Teacher - Episode 117 - 8th June 2012',
 546                 'description': 'md5:09b78bd971f1e3e289601dfba15ca4f7',
 547                 'uploader': 'SET India',
 548                 'uploader_id': 'setindia',
 549                 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/setindia',
 550                 'age_limit': 18,
 551             }
 552         },
 553         {
 554             'url': 'https://www.youtube.com/watch?v=BaW_jenozKc&v=yZIXLfi8CZQ',
 555             'note': 'Use the first video ID in the URL',
 556             'info_dict': {
 557                 'id': 'BaW_jenozKc',
 558                 'ext': 'mp4',
 559                 'title': 'youtube-dl test video "\'/\\ä↭𝕐',
 560                 'uploader': 'Philipp Hagemeister',
 561                 'uploader_id': 'phihag',
 562                 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/phihag',
 563                 'upload_date': '20121002',
 564                 'description': 'test chars:  "\'/\\ä↭𝕐\ntest URL: https://github.com/rg3/youtube-dl/issues/1892\n\nThis is a test video for youtube-dl.\n\nFor more information, contact phihag@phihag.de .',
 565                 'categories': ['Science & Technology'],
 566                 'tags': ['youtube-dl'],
 567                 'duration': 10,
 568                 'view_count': int,
 569                 'like_count': int,
 570                 'dislike_count': int,
 571             },
 572             'params': {
 573                 'skip_download': True,
 574             },
 575         },
 576         {
 577             'url': 'https://www.youtube.com/watch?v=a9LDPn-MO4I',
 578             'note': '256k DASH audio (format 141) via DASH manifest',
 579             'info_dict': {
 580                 'id': 'a9LDPn-MO4I',
 581                 'ext': 'm4a',
 582                 'upload_date': '20121002',
 583                 'uploader_id': '8KVIDEO',
 584                 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/8KVIDEO',
 585                 'description': '',
 586                 'uploader': '8KVIDEO',
 587                 'title': 'UHDTV TEST 8K VIDEO.mp4'
 588             },
 589             'params': {
 590                 'youtube_include_dash_manifest': True,
 591                 'format': '141',
 592             },
 593             'skip': 'format 141 not served anymore',
 594         },
 595         # DASH manifest with encrypted signature
 596         {
 597             'url': 'https://www.youtube.com/watch?v=IB3lcPjvWLA',
 598             'info_dict': {
 599                 'id': 'IB3lcPjvWLA',
 600                 'ext': 'm4a',
 601                 'title': 'Afrojack, Spree Wilson - The Spark (Official Music Video) ft. Spree Wilson',
 602                 'description': 'md5:8f5e2b82460520b619ccac1f509d43bf',
 603                 'duration': 244,
 604                 'uploader': 'AfrojackVEVO',
 605                 'uploader_id': 'AfrojackVEVO',
 606                 'upload_date': '20131011',
 607             },
 608             'params': {
 609                 'youtube_include_dash_manifest': True,
 610                 'format': '141/bestaudio[ext=m4a]',
 611             },
 612         },
 613         # Controversy video
 614         {
 615             'url': 'https://www.youtube.com/watch?v=T4XJQO3qol8',
 616             'info_dict': {
 617                 'id': 'T4XJQO3qol8',
 618                 'ext': 'mp4',
 619                 'duration': 219,
 620                 'upload_date': '20100909',
 621                 'uploader': 'Amazing Atheist',
 622                 'uploader_id': 'TheAmazingAtheist',
 623                 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/TheAmazingAtheist',
 624                 'title': 'Burning Everyone\'s Koran',
 625                 'description': 'SUBSCRIBE: http://www.youtube.com/saturninefilms\n\nEven Obama has taken a stand against freedom on this issue: http://www.huffingtonpost.com/2010/09/09/obama-gma-interview-quran_n_710282.html',
 626             }
 627         },
 628         # Normal age-gate video (embed allowed)
 629         {
 630             'url': 'https://youtube.com/watch?v=HtVdAasjOgU',
 631             'info_dict': {
 632                 'id': 'HtVdAasjOgU',
 633                 'ext': 'mp4',
 634                 'title': 'The Witcher 3: Wild Hunt - The Sword Of Destiny Trailer',
 635                 'description': r're:(?s).{100,}About the Game\n.*?The Witcher 3: Wild Hunt.{100,}',
 636                 'duration': 142,
 637                 'uploader': 'The Witcher',
 638                 'uploader_id': 'WitcherGame',
 639                 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/WitcherGame',
 640                 'upload_date': '20140605',
 641                 'age_limit': 18,
 642             },
 643         },
 644         # video_info is None (https://github.com/ytdl-org/youtube-dl/issues/4421)
 645         # YouTube Red ad is not captured for creator
 646         {
 647             'url': '__2ABJjxzNo',
 648             'info_dict': {
 649                 'id': '__2ABJjxzNo',
 650                 'ext': 'mp4',
 651                 'duration': 266,
 652                 'upload_date': '20100430',
 653                 'uploader_id': 'deadmau5',
 654                 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/deadmau5',
 655                 'creator': 'Dada Life, deadmau5',
 656                 'description': 'md5:12c56784b8032162bb936a5f76d55360',
 657                 'uploader': 'deadmau5',
 658                 'title': 'Deadmau5 - Some Chords (HD)',
 659                 'alt_title': 'This Machine Kills Some Chords',
 660             },
 661             'expected_warnings': [
 662                 'DASH manifest missing',
 663             ]
 664         },
 665         # Olympics (https://github.com/ytdl-org/youtube-dl/issues/4431)
 666         {
 667             'url': 'lqQg6PlCWgI',
 668             'info_dict': {
 669                 'id': 'lqQg6PlCWgI',
 670                 'ext': 'mp4',
 671                 'duration': 6085,
 672                 'upload_date': '20150827',
 673                 'uploader_id': 'olympic',
 674                 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/olympic',
 675                 'description': 'HO09  - Women -  GER-AUS - Hockey - 31 July 2012 - London 2012 Olympic Games',
 676                 'uploader': 'Olympic',
 677                 'title': 'Hockey - Women -  GER-AUS - London 2012 Olympic Games',
 678             },
 679             'params': {
 680                 'skip_download': 'requires avconv',
 681             }
 682         },
 683         # Non-square pixels
 684         {
 685             'url': 'https://www.youtube.com/watch?v=_b-2C3KPAM0',
 686             'info_dict': {
 687                 'id': '_b-2C3KPAM0',
 688                 'ext': 'mp4',
 689                 'stretched_ratio': 16 / 9.,
 690                 'duration': 85,
 691                 'upload_date': '20110310',
 692                 'uploader_id': 'AllenMeow',
 693                 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/AllenMeow',
 694                 'description': 'made by Wacom from Korea | 字幕&加油添醋 by TY\'s Allen | 感謝heylisa00cavey1001同學熱情提供梗及翻譯',
 695                 'uploader': '孫ᄋᄅ',
 696                 'title': '[A-made] 變態妍字幕版 太妍 我就是這樣的人',
 697             },
 698         },
 699         # url_encoded_fmt_stream_map is empty string
 700         {
 701             'url': 'qEJwOuvDf7I',
 702             'info_dict': {
 703                 'id': 'qEJwOuvDf7I',
 704                 'ext': 'webm',
 705                 'title': 'Обсуждение судебной практики по выборам 14 сентября 2014 года в Санкт-Петербурге',
 706                 'description': '',
 707                 'upload_date': '20150404',
 708                 'uploader_id': 'spbelect',
 709                 'uploader': 'Наблюдатели Петербурга',
 710             },
 711             'params': {
 712                 'skip_download': 'requires avconv',
 713             },
 714             'skip': 'This live event has ended.',
 715         },
 716         # Extraction from multiple DASH manifests (https://github.com/ytdl-org/youtube-dl/pull/6097)
 717         {
 718             'url': 'https://www.youtube.com/watch?v=FIl7x6_3R5Y',
 719             'info_dict': {
 720                 'id': 'FIl7x6_3R5Y',
 721                 'ext': 'webm',
 722                 'title': 'md5:7b81415841e02ecd4313668cde88737a',
 723                 'description': 'md5:116377fd2963b81ec4ce64b542173306',
 724                 'duration': 220,
 725                 'upload_date': '20150625',
 726                 'uploader_id': 'dorappi2000',
 727                 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/dorappi2000',
 728                 'uploader': 'dorappi2000',
 729                 'formats': 'mincount:31',
 730             },
 731             'skip': 'not actual anymore',
 732         },
 733         # DASH manifest with segment_list
 734         {
 735             'url': 'https://www.youtube.com/embed/CsmdDsKjzN8',
 736             'md5': '8ce563a1d667b599d21064e982ab9e31',
 737             'info_dict': {
 738                 'id': 'CsmdDsKjzN8',
 739                 'ext': 'mp4',
 740                 'upload_date': '20150501',  # According to '<meta itemprop="datePublished"', but in other places it's 20150510
 741                 'uploader': 'Airtek',
 742                 'description': 'Retransmisión en directo de la XVIII media maratón de Zaragoza.',
 743                 'uploader_id': 'UCzTzUmjXxxacNnL8I3m4LnQ',
 744                 'title': 'Retransmisión XVIII Media maratón Zaragoza 2015',
 745             },
 746             'params': {
 747                 'youtube_include_dash_manifest': True,
 748                 'format': '135',  # bestvideo
 749             },
 750             'skip': 'This live event has ended.',
 751         },
 752         {
 753             # Multifeed videos (multiple cameras), URL is for Main Camera
 754             'url': 'https://www.youtube.com/watch?v=jqWvoWXjCVs',
 755             'info_dict': {
 756                 'id': 'jqWvoWXjCVs',
 757                 'title': 'teamPGP: Rocket League Noob Stream',
 758                 'description': 'md5:dc7872fb300e143831327f1bae3af010',
 759             },
 760             'playlist': [{
 761                 'info_dict': {
 762                     'id': 'jqWvoWXjCVs',
 763                     'ext': 'mp4',
 764                     'title': 'teamPGP: Rocket League Noob Stream (Main Camera)',
 765                     'description': 'md5:dc7872fb300e143831327f1bae3af010',
 766                     'duration': 7335,
 767                     'upload_date': '20150721',
 768                     'uploader': 'Beer Games Beer',
 769                     'uploader_id': 'beergamesbeer',
 770                     'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/beergamesbeer',
 771                     'license': 'Standard YouTube License',
 772                 },
 773             }, {
 774                 'info_dict': {
 775                     'id': '6h8e8xoXJzg',
 776                     'ext': 'mp4',
 777                     'title': 'teamPGP: Rocket League Noob Stream (kreestuh)',
 778                     'description': 'md5:dc7872fb300e143831327f1bae3af010',
 779                     'duration': 7337,
 780                     'upload_date': '20150721',
 781                     'uploader': 'Beer Games Beer',
 782                     'uploader_id': 'beergamesbeer',
 783                     'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/beergamesbeer',
 784                     'license': 'Standard YouTube License',
 785                 },
 786             }, {
 787                 'info_dict': {
 788                     'id': 'PUOgX5z9xZw',
 789                     'ext': 'mp4',
 790                     'title': 'teamPGP: Rocket League Noob Stream (grizzle)',
 791                     'description': 'md5:dc7872fb300e143831327f1bae3af010',
 792                     'duration': 7337,
 793                     'upload_date': '20150721',
 794                     'uploader': 'Beer Games Beer',
 795                     'uploader_id': 'beergamesbeer',
 796                     'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/beergamesbeer',
 797                     'license': 'Standard YouTube License',
 798                 },
 799             }, {
 800                 'info_dict': {
 801                     'id': 'teuwxikvS5k',
 802                     'ext': 'mp4',
 803                     'title': 'teamPGP: Rocket League Noob Stream (zim)',
 804                     'description': 'md5:dc7872fb300e143831327f1bae3af010',
 805                     'duration': 7334,
 806                     'upload_date': '20150721',
 807                     'uploader': 'Beer Games Beer',
 808                     'uploader_id': 'beergamesbeer',
 809                     'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/beergamesbeer',
 810                     'license': 'Standard YouTube License',
 811                 },
 812             }],
 813             'params': {
 814                 'skip_download': True,
 815             },
 816             'skip': 'This video is not available.',
 817         },
 818         {
 819             # Multifeed video with comma in title (see https://github.com/ytdl-org/youtube-dl/issues/8536)
 820             'url': 'https://www.youtube.com/watch?v=gVfLd0zydlo',
 821             'info_dict': {
 822                 'id': 'gVfLd0zydlo',
 823                 'title': 'DevConf.cz 2016 Day 2 Workshops 1 14:00 - 15:30',
 824             },
 825             'playlist_count': 2,
 826             'skip': 'Not multifeed anymore',
 827         },
 828         {
 829             'url': 'https://vid.plus/FlRa-iH7PGw',
 830             'only_matching': True,
 831         },
 832         {
 833             'url': 'https://zwearz.com/watch/9lWxNJF-ufM/electra-woman-dyna-girl-official-trailer-grace-helbig.html',
 834             'only_matching': True,
 835         },
 836         {
 837             # Title with JS-like syntax "};" (see https://github.com/ytdl-org/youtube-dl/issues/7468)
 838             # Also tests cut-off URL expansion in video description (see
 839             # https://github.com/ytdl-org/youtube-dl/issues/1892,
 840             # https://github.com/ytdl-org/youtube-dl/issues/8164)
 841             'url': 'https://www.youtube.com/watch?v=lsguqyKfVQg',
 842             'info_dict': {
 843                 'id': 'lsguqyKfVQg',
 844                 'ext': 'mp4',
 845                 'title': '{dark walk}; Loki/AC/Dishonored; collab w/Elflover21',
 846                 'alt_title': 'Dark Walk - Position Music',
 847                 'description': 'md5:8085699c11dc3f597ce0410b0dcbb34a',
 848                 'duration': 133,
 849                 'upload_date': '20151119',
 850                 'uploader_id': 'IronSoulElf',
 851                 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/IronSoulElf',
 852                 'uploader': 'IronSoulElf',
 853                 'creator': 'Todd Haberman,  Daniel Law Heath and Aaron Kaplan',
 854                 'track': 'Dark Walk - Position Music',
 855                 'artist': 'Todd Haberman,  Daniel Law Heath and Aaron Kaplan',
 856                 'album': 'Position Music - Production Music Vol. 143 - Dark Walk',
 857             },
 858             'params': {
 859                 'skip_download': True,
 860             },
 861         },
 862         {
 863             # Tags with '};' (see https://github.com/ytdl-org/youtube-dl/issues/7468)
 864             'url': 'https://www.youtube.com/watch?v=Ms7iBXnlUO8',
 865             'only_matching': True,
 866         },
 867         {
 868             # Video with yt:stretch=17:0
 869             'url': 'https://www.youtube.com/watch?v=Q39EVAstoRM',
 870             'info_dict': {
 871                 'id': 'Q39EVAstoRM',
 872                 'ext': 'mp4',
 873                 'title': 'Clash Of Clans#14 Dicas De Ataque Para CV 4',
 874                 'description': 'md5:ee18a25c350637c8faff806845bddee9',
 875                 'upload_date': '20151107',
 876                 'uploader_id': 'UCCr7TALkRbo3EtFzETQF1LA',
 877                 'uploader': 'CH GAMER DROID',
 878             },
 879             'params': {
 880                 'skip_download': True,
 881             },
 882             'skip': 'This video does not exist.',
 883         },
 884         {
 885             # Video licensed under Creative Commons
 886             'url': 'https://www.youtube.com/watch?v=M4gD1WSo5mA',
 887             'info_dict': {
 888                 'id': 'M4gD1WSo5mA',
 889                 'ext': 'mp4',
 890                 'title': 'md5:e41008789470fc2533a3252216f1c1d1',
 891                 'description': 'md5:a677553cf0840649b731a3024aeff4cc',
 892                 'duration': 721,
 893                 'upload_date': '20150127',
 894                 'uploader_id': 'BerkmanCenter',
 895                 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/BerkmanCenter',
 896                 'uploader': 'The Berkman Klein Center for Internet & Society',
 897                 'license': 'Creative Commons Attribution license (reuse allowed)',
 898             },
 899             'params': {
 900                 'skip_download': True,
 901             },
 902         },
 903         {
 904             # Channel-like uploader_url
 905             'url': 'https://www.youtube.com/watch?v=eQcmzGIKrzg',
 906             'info_dict': {
 907                 'id': 'eQcmzGIKrzg',
 908                 'ext': 'mp4',
 909                 'title': 'Democratic Socialism and Foreign Policy | Bernie Sanders',
 910                 'description': 'md5:dda0d780d5a6e120758d1711d062a867',
 911                 'duration': 4060,
 912                 'upload_date': '20151119',
 913                 'uploader': 'Bernie Sanders',
 914                 'uploader_id': 'UCH1dpzjCEiGAt8CXkryhkZg',
 915                 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UCH1dpzjCEiGAt8CXkryhkZg',
 916                 'license': 'Creative Commons Attribution license (reuse allowed)',
 917             },
 918             'params': {
 919                 'skip_download': True,
 920             },
 921         },
 922         {
 923             'url': 'https://www.youtube.com/watch?feature=player_embedded&amp;amp;v=V36LpHqtcDY',
 924             'only_matching': True,
 925         },
 926         {
 927             # YouTube Red paid video (https://github.com/ytdl-org/youtube-dl/issues/10059)
 928             'url': 'https://www.youtube.com/watch?v=i1Ko8UG-Tdo',
 929             'only_matching': True,
 930         },
 931         {
 932             # Rental video preview
 933             'url': 'https://www.youtube.com/watch?v=yYr8q0y5Jfg',
 934             'info_dict': {
 935                 'id': 'uGpuVWrhIzE',
 936                 'ext': 'mp4',
 937                 'title': 'Piku - Trailer',
 938                 'description': 'md5:c36bd60c3fd6f1954086c083c72092eb',
 939                 'upload_date': '20150811',
 940                 'uploader': 'FlixMatrix',
 941                 'uploader_id': 'FlixMatrixKaravan',
 942                 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/FlixMatrixKaravan',
 943                 'license': 'Standard YouTube License',
 944             },
 945             'params': {
 946                 'skip_download': True,
 947             },
 948             'skip': 'This video is not available.',
 949         },
 950         {
 951             # YouTube Red video with episode data
 952             'url': 'https://www.youtube.com/watch?v=iqKdEhx-dD4',
 953             'info_dict': {
 954                 'id': 'iqKdEhx-dD4',
 955                 'ext': 'mp4',
 956                 'title': 'Isolation - Mind Field (Ep 1)',
 957                 'description': 'md5:46a29be4ceffa65b92d277b93f463c0f',
 958                 'duration': 2085,
 959                 'upload_date': '20170118',
 960                 'uploader': 'Vsauce',
 961                 'uploader_id': 'Vsauce',
 962                 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/Vsauce',
 963                 'series': 'Mind Field',
 964                 'season_number': 1,
 965                 'episode_number': 1,
 966             },
 967             'params': {
 968                 'skip_download': True,
 969             },
 970             'expected_warnings': [
 971                 'Skipping DASH manifest',
 972             ],
 973         },
 974         {
 975             # The following content has been identified by the YouTube community
 976             # as inappropriate or offensive to some audiences.
 977             'url': 'https://www.youtube.com/watch?v=6SJNVb0GnPI',
 978             'info_dict': {
 979                 'id': '6SJNVb0GnPI',
 980                 'ext': 'mp4',
 981                 'title': 'Race Differences in Intelligence',
 982                 'description': 'md5:5d161533167390427a1f8ee89a1fc6f1',
 983                 'duration': 965,
 984                 'upload_date': '20140124',
 985                 'uploader': 'New Century Foundation',
 986                 'uploader_id': 'UCEJYpZGqgUob0zVVEaLhvVg',
 987                 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UCEJYpZGqgUob0zVVEaLhvVg',
 988             },
 989             'params': {
 990                 'skip_download': True,
 991             },
 992         },
 993         {
 994             # itag 212
 995             'url': '1t24XAntNCY',
 996             'only_matching': True,
 997         },
 998         {
 999             # geo restricted to JP
1000             'url': 'sJL6WA-aGkQ',
1001             'only_matching': True,
1002         },
1003         {
1004             'url': 'https://invidio.us/watch?v=BaW_jenozKc',
1005             'only_matching': True,
1006         },
1007         {
1008             # DRM protected
1009             'url': 'https://www.youtube.com/watch?v=s7_qI6_mIXc',
1010             'only_matching': True,
1011         },
1012         {
1013             # Video with unsupported adaptive stream type formats
1014             'url': 'https://www.youtube.com/watch?v=Z4Vy8R84T1U',
1015             'info_dict': {
1016                 'id': 'Z4Vy8R84T1U',
1017                 'ext': 'mp4',
1018                 'title': 'saman SMAN 53 Jakarta(Sancety) opening COFFEE4th at SMAN 53 Jakarta',
1019                 'description': 'md5:d41d8cd98f00b204e9800998ecf8427e',
1020                 'duration': 433,
1021                 'upload_date': '20130923',
1022                 'uploader': 'Amelia Putri Harwita',
1023                 'uploader_id': 'UCpOxM49HJxmC1qCalXyB3_Q',
1024                 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UCpOxM49HJxmC1qCalXyB3_Q',
1025                 'formats': 'maxcount:10',
1026             },
1027             'params': {
1028                 'skip_download': True,
1029                 'youtube_include_dash_manifest': False,
1030             },
1031             'skip': 'not actual anymore',
1032         },
1033         {
1034             # Youtube Music Auto-generated description
1035             'url': 'https://music.youtube.com/watch?v=MgNrAu2pzNs',
1036             'info_dict': {
1037                 'id': 'MgNrAu2pzNs',
1038                 'ext': 'mp4',
1039                 'title': 'Voyeur Girl',
1040                 'description': 'md5:7ae382a65843d6df2685993e90a8628f',
1041                 'upload_date': '20190312',
1042                 'uploader': 'Stephen - Topic',
1043                 'uploader_id': 'UC-pWHpBjdGG69N9mM2auIAA',
1044                 'artist': 'Stephen',
1045                 'track': 'Voyeur Girl',
1046                 'album': 'it\'s too much love to know my dear',
1047                 'release_date': '20190313',
1048                 'release_year': 2019,
1049             },
1050             'params': {
1051                 'skip_download': True,
1052             },
1053         },
1054         {
1055             'url': 'https://www.youtubekids.com/watch?v=3b8nCWDgZ6Q',
1056             'only_matching': True,
1057         },
1058         {
1059             # invalid -> valid video id redirection
1060             'url': 'DJztXj2GPfl',
1061             'info_dict': {
1062                 'id': 'DJztXj2GPfk',
1063                 'ext': 'mp4',
1064                 'title': 'Panjabi MC - Mundian To Bach Ke (The Dictator Soundtrack)',
1065                 'description': 'md5:bf577a41da97918e94fa9798d9228825',
1066                 'upload_date': '20090125',
1067                 'uploader': 'Prochorowka',
1068                 'uploader_id': 'Prochorowka',
1069                 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/Prochorowka',
1070                 'artist': 'Panjabi MC',
1071                 'track': 'Beware of the Boys (Mundian to Bach Ke) - Motivo Hi-Lectro Remix',
1072                 'album': 'Beware of the Boys (Mundian To Bach Ke)',
1073             },
1074             'params': {
1075                 'skip_download': True,
1076             },
1077         },
1078         {
1079             # empty description results in an empty string
1080             'url': 'https://www.youtube.com/watch?v=x41yOUIvK2k',
1081             'info_dict': {
1082                 'id': 'x41yOUIvK2k',
1083                 'ext': 'mp4',
1084                 'title': 'IMG 3456',
1085                 'description': '',
1086                 'upload_date': '20170613',
1087                 'uploader_id': 'ElevageOrVert',
1088                 'uploader': 'ElevageOrVert',
1089             },
1090             'params': {
1091                 'skip_download': True,
1092             },
1093         },
1094         {
1095             # with '};' inside yt initial data (see https://github.com/ytdl-org/youtube-dl/issues/27093)
1096             'url': 'https://www.youtube.com/watch?v=CHqg6qOn4no',
1097             'info_dict': {
1098                 'id': 'CHqg6qOn4no',
1099                 'ext': 'mp4',
1100                 'title': 'Part 77   Sort a list of simple types in c#',
1101                 'description': 'md5:b8746fa52e10cdbf47997903f13b20dc',
1102                 'upload_date': '20130831',
1103                 'uploader_id': 'kudvenkat',
1104                 'uploader': 'kudvenkat',
1105             },
1106             'params': {
1107                 'skip_download': True,
1108             },
1109         },
1110     ]
1111
1112     def __init__(self, *args, **kwargs):
1113         super(YoutubeIE, self).__init__(*args, **kwargs)
1114         self._player_cache = {}
1115
1116     def report_video_info_webpage_download(self, video_id):
1117         """Report attempt to download video info webpage."""
1118         self.to_screen('%s: Downloading video info webpage' % video_id)
1119
1120     def report_information_extraction(self, video_id):
1121         """Report attempt to extract video information."""
1122         self.to_screen('%s: Extracting video information' % video_id)
1123
1124     def report_unavailable_format(self, video_id, format):
1125         """Report extracted video URL."""
1126         self.to_screen('%s: Format %s not available' % (video_id, format))
1127
1128     def report_rtmp_download(self):
1129         """Indicate the download will use the RTMP protocol."""
1130         self.to_screen('RTMP download detected')
1131
1132     def _signature_cache_id(self, example_sig):
1133         """ Return a string representation of a signature """
1134         return '.'.join(compat_str(len(part)) for part in example_sig.split('.'))
1135
1136     @classmethod
1137     def _extract_player_info(cls, player_url):
1138         for player_re in cls._PLAYER_INFO_RE:
1139             id_m = re.search(player_re, player_url)
1140             if id_m:
1141                 break
1142         else:
1143             raise ExtractorError('Cannot identify player %r' % player_url)
1144         return id_m.group('ext'), id_m.group('id')
1145
1146     def _extract_signature_function(self, video_id, player_url, example_sig):
1147         player_type, player_id = self._extract_player_info(player_url)
1148
1149         # Read from filesystem cache
1150         func_id = '%s_%s_%s' % (
1151             player_type, player_id, self._signature_cache_id(example_sig))
1152         assert os.path.basename(func_id) == func_id
1153
1154         cache_spec = self._downloader.cache.load('youtube-sigfuncs', func_id)
1155         if cache_spec is not None:
1156             return lambda s: ''.join(s[i] for i in cache_spec)
1157
1158         download_note = (
1159             'Downloading player %s' % player_url
1160             if self._downloader.params.get('verbose') else
1161             'Downloading %s player %s' % (player_type, player_id)
1162         )
1163         if player_type == 'js':
1164             code = self._download_webpage(
1165                 player_url, video_id,
1166                 note=download_note,
1167                 errnote='Download of %s failed' % player_url)
1168             res = self._parse_sig_js(code)
1169         elif player_type == 'swf':
1170             urlh = self._request_webpage(
1171                 player_url, video_id,
1172                 note=download_note,
1173                 errnote='Download of %s failed' % player_url)
1174             code = urlh.read()
1175             res = self._parse_sig_swf(code)
1176         else:
1177             assert False, 'Invalid player type %r' % player_type
1178
1179         test_string = ''.join(map(compat_chr, range(len(example_sig))))
1180         cache_res = res(test_string)
1181         cache_spec = [ord(c) for c in cache_res]
1182
1183         self._downloader.cache.store('youtube-sigfuncs', func_id, cache_spec)
1184         return res
1185
1186     def _print_sig_code(self, func, example_sig):
1187         def gen_sig_code(idxs):
1188             def _genslice(start, end, step):
1189                 starts = '' if start == 0 else str(start)
1190                 ends = (':%d' % (end + step)) if end + step >= 0 else ':'
1191                 steps = '' if step == 1 else (':%d' % step)
1192                 return 's[%s%s%s]' % (starts, ends, steps)
1193
1194             step = None
1195             # Quelch pyflakes warnings - start will be set when step is set
1196             start = '(Never used)'
1197             for i, prev in zip(idxs[1:], idxs[:-1]):
1198                 if step is not None:
1199                     if i - prev == step:
1200                         continue
1201                     yield _genslice(start, prev, step)
1202                     step = None
1203                     continue
1204                 if i - prev in [-1, 1]:
1205                     step = i - prev
1206                     start = prev
1207                     continue
1208                 else:
1209                     yield 's[%d]' % prev
1210             if step is None:
1211                 yield 's[%d]' % i
1212             else:
1213                 yield _genslice(start, i, step)
1214
1215         test_string = ''.join(map(compat_chr, range(len(example_sig))))
1216         cache_res = func(test_string)
1217         cache_spec = [ord(c) for c in cache_res]
1218         expr_code = ' + '.join(gen_sig_code(cache_spec))
1219         signature_id_tuple = '(%s)' % (
1220             ', '.join(compat_str(len(p)) for p in example_sig.split('.')))
1221         code = ('if tuple(len(p) for p in s.split(\'.\')) == %s:\n'
1222                 '    return %s\n') % (signature_id_tuple, expr_code)
1223         self.to_screen('Extracted signature function:\n' + code)
1224
1225     def _parse_sig_js(self, jscode):
1226         funcname = self._search_regex(
1227             (r'\b[cs]\s*&&\s*[adf]\.set\([^,]+\s*,\s*encodeURIComponent\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\(',
1228              r'\b[a-zA-Z0-9]+\s*&&\s*[a-zA-Z0-9]+\.set\([^,]+\s*,\s*encodeURIComponent\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\(',
1229              r'(?:\b|[^a-zA-Z0-9$])(?P<sig>[a-zA-Z0-9$]{2})\s*=\s*function\(\s*a\s*\)\s*{\s*a\s*=\s*a\.split\(\s*""\s*\)',
1230              r'(?P<sig>[a-zA-Z0-9$]+)\s*=\s*function\(\s*a\s*\)\s*{\s*a\s*=\s*a\.split\(\s*""\s*\)',
1231              # Obsolete patterns
1232              r'(["\'])signature\1\s*,\s*(?P<sig>[a-zA-Z0-9$]+)\(',
1233              r'\.sig\|\|(?P<sig>[a-zA-Z0-9$]+)\(',
1234              r'yt\.akamaized\.net/\)\s*\|\|\s*.*?\s*[cs]\s*&&\s*[adf]\.set\([^,]+\s*,\s*(?:encodeURIComponent\s*\()?\s*(?P<sig>[a-zA-Z0-9$]+)\(',
1235              r'\b[cs]\s*&&\s*[adf]\.set\([^,]+\s*,\s*(?P<sig>[a-zA-Z0-9$]+)\(',
1236              r'\b[a-zA-Z0-9]+\s*&&\s*[a-zA-Z0-9]+\.set\([^,]+\s*,\s*(?P<sig>[a-zA-Z0-9$]+)\(',
1237              r'\bc\s*&&\s*a\.set\([^,]+\s*,\s*\([^)]*\)\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\(',
1238              r'\bc\s*&&\s*[a-zA-Z0-9]+\.set\([^,]+\s*,\s*\([^)]*\)\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\(',
1239              r'\bc\s*&&\s*[a-zA-Z0-9]+\.set\([^,]+\s*,\s*\([^)]*\)\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\('),
1240             jscode, 'Initial JS player signature function name', group='sig')
1241
1242         jsi = JSInterpreter(jscode)
1243         initial_function = jsi.extract_function(funcname)
1244         return lambda s: initial_function([s])
1245
1246     def _parse_sig_swf(self, file_contents):
1247         swfi = SWFInterpreter(file_contents)
1248         TARGET_CLASSNAME = 'SignatureDecipher'
1249         searched_class = swfi.extract_class(TARGET_CLASSNAME)
1250         initial_function = swfi.extract_function(searched_class, 'decipher')
1251         return lambda s: initial_function([s])
1252
1253     def _decrypt_signature(self, s, video_id, player_url, age_gate=False):
1254         """Turn the encrypted s field into a working signature"""
1255
1256         if player_url is None:
1257             raise ExtractorError('Cannot decrypt signature without player_url')
1258
1259         if player_url.startswith('//'):
1260             player_url = 'https:' + player_url
1261         elif not re.match(r'https?://', player_url):
1262             player_url = compat_urlparse.urljoin(
1263                 'https://www.youtube.com', player_url)
1264         try:
1265             player_id = (player_url, self._signature_cache_id(s))
1266             if player_id not in self._player_cache:
1267                 func = self._extract_signature_function(
1268                     video_id, player_url, s
1269                 )
1270                 self._player_cache[player_id] = func
1271             func = self._player_cache[player_id]
1272             if self._downloader.params.get('youtube_print_sig_code'):
1273                 self._print_sig_code(func, s)
1274             return func(s)
1275         except Exception as e:
1276             tb = traceback.format_exc()
1277             raise ExtractorError(
1278                 'Signature extraction failed: ' + tb, cause=e)
1279
1280     def _get_subtitles(self, video_id, webpage, has_live_chat_replay):
1281         try:
1282             subs_doc = self._download_xml(
1283                 'https://video.google.com/timedtext?hl=en&type=list&v=%s' % video_id,
1284                 video_id, note=False)
1285         except ExtractorError as err:
1286             self._downloader.report_warning('unable to download video subtitles: %s' % error_to_compat_str(err))
1287             return {}
1288
1289         sub_lang_list = {}
1290         for track in subs_doc.findall('track'):
1291             lang = track.attrib['lang_code']
1292             if lang in sub_lang_list:
1293                 continue
1294             sub_formats = []
1295             for ext in self._SUBTITLE_FORMATS:
1296                 params = compat_urllib_parse_urlencode({
1297                     'lang': lang,
1298                     'v': video_id,
1299                     'fmt': ext,
1300                     'name': track.attrib['name'].encode('utf-8'),
1301                 })
1302                 sub_formats.append({
1303                     'url': 'https://www.youtube.com/api/timedtext?' + params,
1304                     'ext': ext,
1305                 })
1306             sub_lang_list[lang] = sub_formats
1307         if has_live_chat_replay:
1308             sub_lang_list['live_chat'] = [
1309                 {
1310                     'video_id': video_id,
1311                     'ext': 'json',
1312                     'protocol': 'youtube_live_chat_replay',
1313                 },
1314             ]
1315         if not sub_lang_list:
1316             self._downloader.report_warning('video doesn\'t have subtitles')
1317             return {}
1318         return sub_lang_list
1319
1320     def _get_ytplayer_config(self, video_id, webpage):
1321         patterns = (
1322             # User data may contain arbitrary character sequences that may affect
1323             # JSON extraction with regex, e.g. when '};' is contained the second
1324             # regex won't capture the whole JSON. Yet working around by trying more
1325             # concrete regex first keeping in mind proper quoted string handling
1326             # to be implemented in future that will replace this workaround (see
1327             # https://github.com/ytdl-org/youtube-dl/issues/7468,
1328             # https://github.com/ytdl-org/youtube-dl/pull/7599)
1329             r';ytplayer\.config\s*=\s*({.+?});ytplayer',
1330             r';ytplayer\.config\s*=\s*({.+?});',
1331             r'ytInitialPlayerResponse\s*=\s*({.+?});var meta'  # Needed???
1332         )
1333         config = self._search_regex(
1334             patterns, webpage, 'ytplayer.config', default=None)
1335         if config:
1336             return self._parse_json(
1337                 uppercase_escape(config), video_id, fatal=False)
1338
1339     def _get_music_metadata_from_yt_initial(self, yt_initial):
1340         music_metadata = []
1341         key_map = {
1342             'Album': 'album',
1343             'Artist': 'artist',
1344             'Song': 'track'
1345         }
1346         contents = try_get(yt_initial, lambda x: x['contents']['twoColumnWatchNextResults']['results']['results']['contents'])
1347         if type(contents) is list:
1348             for content in contents:
1349                 music_track = {}
1350                 if type(content) is not dict:
1351                     continue
1352                 videoSecondaryInfoRenderer = try_get(content, lambda x: x['videoSecondaryInfoRenderer'])
1353                 if type(videoSecondaryInfoRenderer) is not dict:
1354                     continue
1355                 rows = try_get(videoSecondaryInfoRenderer, lambda x: x['metadataRowContainer']['metadataRowContainerRenderer']['rows'])
1356                 if type(rows) is not list:
1357                     continue
1358                 for row in rows:
1359                     metadataRowRenderer = try_get(row, lambda x: x['metadataRowRenderer'])
1360                     if type(metadataRowRenderer) is not dict:
1361                         continue
1362                     key = try_get(metadataRowRenderer, lambda x: x['title']['simpleText'])
1363                     value = try_get(metadataRowRenderer, lambda x: x['contents'][0]['simpleText']) or \
1364                         try_get(metadataRowRenderer, lambda x: x['contents'][0]['runs'][0]['text'])
1365                     if type(key) is not str or type(value) is not str:
1366                         continue
1367                     if key in key_map:
1368                         if key_map[key] in music_track:
1369                             # we've started on a new track
1370                             music_metadata.append(music_track)
1371                             music_track = {}
1372                         music_track[key_map[key]] = value
1373                 if len(music_track.keys()):
1374                     music_metadata.append(music_track)
1375         return music_metadata
1376
1377     def _get_automatic_captions(self, video_id, webpage):
1378         """We need the webpage for getting the captions url, pass it as an
1379            argument to speed up the process."""
1380         self.to_screen('%s: Looking for automatic captions' % video_id)
1381         player_config = self._get_ytplayer_config(video_id, webpage)
1382         err_msg = 'Couldn\'t find automatic captions for %s' % video_id
1383         if not player_config:
1384             self._downloader.report_warning(err_msg)
1385             return {}
1386         try:
1387             args = player_config['args']
1388             caption_url = args.get('ttsurl')
1389             if caption_url:
1390                 timestamp = args['timestamp']
1391                 # We get the available subtitles
1392                 list_params = compat_urllib_parse_urlencode({
1393                     'type': 'list',
1394                     'tlangs': 1,
1395                     'asrs': 1,
1396                 })
1397                 list_url = caption_url + '&' + list_params
1398                 caption_list = self._download_xml(list_url, video_id)
1399                 original_lang_node = caption_list.find('track')
1400                 if original_lang_node is None:
1401                     self._downloader.report_warning('Video doesn\'t have automatic captions')
1402                     return {}
1403                 original_lang = original_lang_node.attrib['lang_code']
1404                 caption_kind = original_lang_node.attrib.get('kind', '')
1405
1406                 sub_lang_list = {}
1407                 for lang_node in caption_list.findall('target'):
1408                     sub_lang = lang_node.attrib['lang_code']
1409                     sub_formats = []
1410                     for ext in self._SUBTITLE_FORMATS:
1411                         params = compat_urllib_parse_urlencode({
1412                             'lang': original_lang,
1413                             'tlang': sub_lang,
1414                             'fmt': ext,
1415                             'ts': timestamp,
1416                             'kind': caption_kind,
1417                         })
1418                         sub_formats.append({
1419                             'url': caption_url + '&' + params,
1420                             'ext': ext,
1421                         })
1422                     sub_lang_list[sub_lang] = sub_formats
1423                 return sub_lang_list
1424
1425             def make_captions(sub_url, sub_langs):
1426                 parsed_sub_url = compat_urllib_parse_urlparse(sub_url)
1427                 caption_qs = compat_parse_qs(parsed_sub_url.query)
1428                 captions = {}
1429                 for sub_lang in sub_langs:
1430                     sub_formats = []
1431                     for ext in self._SUBTITLE_FORMATS:
1432                         caption_qs.update({
1433                             'tlang': [sub_lang],
1434                             'fmt': [ext],
1435                         })
1436                         sub_url = compat_urlparse.urlunparse(parsed_sub_url._replace(
1437                             query=compat_urllib_parse_urlencode(caption_qs, True)))
1438                         sub_formats.append({
1439                             'url': sub_url,
1440                             'ext': ext,
1441                         })
1442                     captions[sub_lang] = sub_formats
1443                 return captions
1444
1445             # New captions format as of 22.06.2017
1446             player_response = args.get('player_response')
1447             if player_response and isinstance(player_response, compat_str):
1448                 player_response = self._parse_json(
1449                     player_response, video_id, fatal=False)
1450                 if player_response:
1451                     renderer = player_response['captions']['playerCaptionsTracklistRenderer']
1452                     base_url = renderer['captionTracks'][0]['baseUrl']
1453                     sub_lang_list = []
1454                     for lang in renderer['translationLanguages']:
1455                         lang_code = lang.get('languageCode')
1456                         if lang_code:
1457                             sub_lang_list.append(lang_code)
1458                     return make_captions(base_url, sub_lang_list)
1459
1460             # Some videos don't provide ttsurl but rather caption_tracks and
1461             # caption_translation_languages (e.g. 20LmZk1hakA)
1462             # Does not used anymore as of 22.06.2017
1463             caption_tracks = args['caption_tracks']
1464             caption_translation_languages = args['caption_translation_languages']
1465             caption_url = compat_parse_qs(caption_tracks.split(',')[0])['u'][0]
1466             sub_lang_list = []
1467             for lang in caption_translation_languages.split(','):
1468                 lang_qs = compat_parse_qs(compat_urllib_parse_unquote_plus(lang))
1469                 sub_lang = lang_qs.get('lc', [None])[0]
1470                 if sub_lang:
1471                     sub_lang_list.append(sub_lang)
1472             return make_captions(caption_url, sub_lang_list)
1473         # An extractor error can be raise by the download process if there are
1474         # no automatic captions but there are subtitles
1475         except (KeyError, IndexError, ExtractorError):
1476             self._downloader.report_warning(err_msg)
1477             return {}
1478
1479     def _mark_watched(self, video_id, video_info, player_response):
1480         playback_url = url_or_none(try_get(
1481             player_response,
1482             lambda x: x['playbackTracking']['videostatsPlaybackUrl']['baseUrl']) or try_get(
1483             video_info, lambda x: x['videostats_playback_base_url'][0]))
1484         if not playback_url:
1485             return
1486         parsed_playback_url = compat_urlparse.urlparse(playback_url)
1487         qs = compat_urlparse.parse_qs(parsed_playback_url.query)
1488
1489         # cpn generation algorithm is reverse engineered from base.js.
1490         # In fact it works even with dummy cpn.
1491         CPN_ALPHABET = 'abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789-_'
1492         cpn = ''.join((CPN_ALPHABET[random.randint(0, 256) & 63] for _ in range(0, 16)))
1493
1494         qs.update({
1495             'ver': ['2'],
1496             'cpn': [cpn],
1497         })
1498         playback_url = compat_urlparse.urlunparse(
1499             parsed_playback_url._replace(query=compat_urllib_parse_urlencode(qs, True)))
1500
1501         self._download_webpage(
1502             playback_url, video_id, 'Marking watched',
1503             'Unable to mark watched', fatal=False)
1504
1505     @staticmethod
1506     def _extract_urls(webpage):
1507         # Embedded YouTube player
1508         entries = [
1509             unescapeHTML(mobj.group('url'))
1510             for mobj in re.finditer(r'''(?x)
1511             (?:
1512                 <iframe[^>]+?src=|
1513                 data-video-url=|
1514                 <embed[^>]+?src=|
1515                 embedSWF\(?:\s*|
1516                 <object[^>]+data=|
1517                 new\s+SWFObject\(
1518             )
1519             (["\'])
1520                 (?P<url>(?:https?:)?//(?:www\.)?youtube(?:-nocookie)?\.com/
1521                 (?:embed|v|p)/[0-9A-Za-z_-]{11}.*?)
1522             \1''', webpage)]
1523
1524         # lazyYT YouTube embed
1525         entries.extend(list(map(
1526             unescapeHTML,
1527             re.findall(r'class="lazyYT" data-youtube-id="([^"]+)"', webpage))))
1528
1529         # Wordpress "YouTube Video Importer" plugin
1530         matches = re.findall(r'''(?x)<div[^>]+
1531             class=(?P<q1>[\'"])[^\'"]*\byvii_single_video_player\b[^\'"]*(?P=q1)[^>]+
1532             data-video_id=(?P<q2>[\'"])([^\'"]+)(?P=q2)''', webpage)
1533         entries.extend(m[-1] for m in matches)
1534
1535         return entries
1536
1537     @staticmethod
1538     def _extract_url(webpage):
1539         urls = YoutubeIE._extract_urls(webpage)
1540         return urls[0] if urls else None
1541
1542     @classmethod
1543     def extract_id(cls, url):
1544         mobj = re.match(cls._VALID_URL, url, re.VERBOSE)
1545         if mobj is None:
1546             raise ExtractorError('Invalid URL: %s' % url)
1547         video_id = mobj.group(2)
1548         return video_id
1549
1550     def _extract_chapters_from_json(self, webpage, video_id, duration):
1551         if not webpage:
1552             return
1553         data = self._extract_yt_initial_data(video_id, webpage)
1554         if not data or not isinstance(data, dict):
1555             return
1556         chapters_list = try_get(
1557             data,
1558             lambda x: x['playerOverlays']
1559                        ['playerOverlayRenderer']
1560                        ['decoratedPlayerBarRenderer']
1561                        ['decoratedPlayerBarRenderer']
1562                        ['playerBar']
1563                        ['chapteredPlayerBarRenderer']
1564                        ['chapters'],
1565             list)
1566         if not chapters_list:
1567             return
1568
1569         def chapter_time(chapter):
1570             return float_or_none(
1571                 try_get(
1572                     chapter,
1573                     lambda x: x['chapterRenderer']['timeRangeStartMillis'],
1574                     int),
1575                 scale=1000)
1576         chapters = []
1577         for next_num, chapter in enumerate(chapters_list, start=1):
1578             start_time = chapter_time(chapter)
1579             if start_time is None:
1580                 continue
1581             end_time = (chapter_time(chapters_list[next_num])
1582                         if next_num < len(chapters_list) else duration)
1583             if end_time is None:
1584                 continue
1585             title = try_get(
1586                 chapter, lambda x: x['chapterRenderer']['title']['simpleText'],
1587                 compat_str)
1588             chapters.append({
1589                 'start_time': start_time,
1590                 'end_time': end_time,
1591                 'title': title,
1592             })
1593         return chapters
1594
1595     @staticmethod
1596     def _extract_chapters_from_description(description, duration):
1597         if not description:
1598             return None
1599         chapter_lines = re.findall(
1600             r'(?:^|<br\s*/>)([^<]*<a[^>]+onclick=["\']yt\.www\.watch\.player\.seekTo[^>]+>(\d{1,2}:\d{1,2}(?::\d{1,2})?)</a>[^>]*)(?=$|<br\s*/>)',
1601             description)
1602         if not chapter_lines:
1603             return None
1604         chapters = []
1605         for next_num, (chapter_line, time_point) in enumerate(
1606                 chapter_lines, start=1):
1607             start_time = parse_duration(time_point)
1608             if start_time is None:
1609                 continue
1610             if start_time > duration:
1611                 break
1612             end_time = (duration if next_num == len(chapter_lines)
1613                         else parse_duration(chapter_lines[next_num][1]))
1614             if end_time is None:
1615                 continue
1616             if end_time > duration:
1617                 end_time = duration
1618             if start_time > end_time:
1619                 break
1620             chapter_title = re.sub(
1621                 r'<a[^>]+>[^<]+</a>', '', chapter_line).strip(' \t-')
1622             chapter_title = re.sub(r'\s+', ' ', chapter_title)
1623             chapters.append({
1624                 'start_time': start_time,
1625                 'end_time': end_time,
1626                 'title': chapter_title,
1627             })
1628         return chapters
1629
1630     def _extract_chapters(self, webpage, description, video_id, duration):
1631         return (self._extract_chapters_from_json(webpage, video_id, duration)
1632                 or self._extract_chapters_from_description(description, duration))
1633
1634     def _real_extract(self, url):
1635         url, smuggled_data = unsmuggle_url(url, {})
1636
1637         proto = (
1638             'http' if self._downloader.params.get('prefer_insecure', False)
1639             else 'https')
1640
1641         start_time = None
1642         end_time = None
1643         parsed_url = compat_urllib_parse_urlparse(url)
1644         for component in [parsed_url.fragment, parsed_url.query]:
1645             query = compat_parse_qs(component)
1646             if start_time is None and 't' in query:
1647                 start_time = parse_duration(query['t'][0])
1648             if start_time is None and 'start' in query:
1649                 start_time = parse_duration(query['start'][0])
1650             if end_time is None and 'end' in query:
1651                 end_time = parse_duration(query['end'][0])
1652
1653         # Extract original video URL from URL with redirection, like age verification, using next_url parameter
1654         mobj = re.search(self._NEXT_URL_RE, url)
1655         if mobj:
1656             url = proto + '://www.youtube.com/' + compat_urllib_parse_unquote(mobj.group(1)).lstrip('/')
1657         video_id = self.extract_id(url)
1658
1659         # Get video webpage
1660         url = proto + '://www.youtube.com/watch?v=%s&gl=US&hl=en&has_verified=1&bpctr=9999999999' % video_id
1661         video_webpage, urlh = self._download_webpage_handle(url, video_id)
1662
1663         qs = compat_parse_qs(compat_urllib_parse_urlparse(urlh.geturl()).query)
1664         video_id = qs.get('v', [None])[0] or video_id
1665
1666         # Attempt to extract SWF player URL
1667         mobj = re.search(r'swfConfig.*?"(https?:\\/\\/.*?watch.*?-.*?\.swf)"', video_webpage)
1668         if mobj is not None:
1669             player_url = re.sub(r'\\(.)', r'\1', mobj.group(1))
1670         else:
1671             player_url = None
1672
1673         dash_mpds = []
1674
1675         def add_dash_mpd(video_info):
1676             dash_mpd = video_info.get('dashmpd')
1677             if dash_mpd and dash_mpd[0] not in dash_mpds:
1678                 dash_mpds.append(dash_mpd[0])
1679
1680         def add_dash_mpd_pr(pl_response):
1681             dash_mpd = url_or_none(try_get(
1682                 pl_response, lambda x: x['streamingData']['dashManifestUrl'],
1683                 compat_str))
1684             if dash_mpd and dash_mpd not in dash_mpds:
1685                 dash_mpds.append(dash_mpd)
1686
1687         is_live = None
1688         view_count = None
1689
1690         def extract_view_count(v_info):
1691             return int_or_none(try_get(v_info, lambda x: x['view_count'][0]))
1692
1693         def extract_player_response(player_response, video_id):
1694             pl_response = str_or_none(player_response)
1695             if not pl_response:
1696                 return
1697             pl_response = self._parse_json(pl_response, video_id, fatal=False)
1698             if isinstance(pl_response, dict):
1699                 add_dash_mpd_pr(pl_response)
1700                 return pl_response
1701
1702         def extract_embedded_config(embed_webpage, video_id):
1703             embedded_config = self._search_regex(
1704                 r'setConfig\(({.*})\);',
1705                 embed_webpage, 'ytInitialData', default=None)
1706             if embedded_config:
1707                 return embedded_config
1708
1709         player_response = {}
1710
1711         # Get video info
1712         video_info = {}
1713         embed_webpage = None
1714         if (self._og_search_property('restrictions:age', video_webpage, default=None) == '18+'
1715                 or re.search(r'player-age-gate-content">', video_webpage) is not None):
1716             cookie_keys = self._get_cookies('https://www.youtube.com').keys()
1717             age_gate = True
1718             # We simulate the access to the video from www.youtube.com/v/{video_id}
1719             # this can be viewed without login into Youtube
1720             url = proto + '://www.youtube.com/embed/%s' % video_id
1721             embed_webpage = self._download_webpage(url, video_id, 'Downloading embed webpage')
1722             ext = extract_embedded_config(embed_webpage, video_id)
1723             # playabilityStatus = re.search(r'{\\\"status\\\":\\\"(?P<playabilityStatus>[^\"]+)\\\"', ext)
1724             playable_in_embed = re.search(r'{\\\"playableInEmbed\\\":(?P<playableinEmbed>[^\,]+)', ext)
1725             if not playable_in_embed:
1726                 self.to_screen('Could not determine whether playabale in embed for video %s' % video_id)
1727                 playable_in_embed = ''
1728             else:
1729                 playable_in_embed = playable_in_embed.group('playableinEmbed')
1730             # check if video is only playable on youtube in other words not playable in embed - if so it requires auth (cookies)
1731             # if re.search(r'player-unavailable">', embed_webpage) is not None:
1732             if playable_in_embed == 'false':
1733                 '''
1734                 # TODO apply this patch when Support for Python 2.6(!) and above drops
1735                 if ({'VISITOR_INFO1_LIVE', 'HSID', 'SSID', 'SID'} <= cookie_keys
1736                         or {'VISITOR_INFO1_LIVE', '__Secure-3PSID', 'LOGIN_INFO'} <= cookie_keys):
1737                 '''
1738                 if (set(('VISITOR_INFO1_LIVE', 'HSID', 'SSID', 'SID')) <= set(cookie_keys)
1739                         or set(('VISITOR_INFO1_LIVE', '__Secure-3PSID', 'LOGIN_INFO')) <= set(cookie_keys)):
1740                     age_gate = False
1741                     # Try looking directly into the video webpage
1742                     ytplayer_config = self._get_ytplayer_config(video_id, video_webpage)
1743                     if ytplayer_config:
1744                         args = ytplayer_config.get("args")
1745                         if args is not None:
1746                             if args.get('url_encoded_fmt_stream_map') or args.get('hlsvp'):
1747                                 # Convert to the same format returned by compat_parse_qs
1748                                 video_info = dict((k, [v]) for k, v in args.items())
1749                                 add_dash_mpd(video_info)
1750                             # Rental video is not rented but preview is available (e.g.
1751                             # https://www.youtube.com/watch?v=yYr8q0y5Jfg,
1752                             # https://github.com/ytdl-org/youtube-dl/issues/10532)
1753                             if not video_info and args.get('ypc_vid'):
1754                                 return self.url_result(
1755                                     args['ypc_vid'], YoutubeIE.ie_key(), video_id=args['ypc_vid'])
1756                             if args.get('livestream') == '1' or args.get('live_playback') == 1:
1757                                 is_live = True
1758                             if not player_response:
1759                                 player_response = extract_player_response(args.get('player_response'), video_id)
1760                         elif not player_response:
1761                             player_response = ytplayer_config
1762                     if not video_info or self._downloader.params.get('youtube_include_dash_manifest', True):
1763                         add_dash_mpd_pr(player_response)
1764                 else:
1765                     raise ExtractorError('Video is age restricted and only playable on Youtube. Requires cookies!', expected=True)
1766             else:
1767                 data = compat_urllib_parse_urlencode({
1768                     'video_id': video_id,
1769                     'eurl': 'https://youtube.googleapis.com/v/' + video_id,
1770                     'sts': self._search_regex(
1771                         r'"sts"\s*:\s*(\d+)', embed_webpage, 'sts', default=''),
1772                 })
1773                 video_info_url = proto + '://www.youtube.com/get_video_info?' + data
1774                 try:
1775                     video_info_webpage = self._download_webpage(
1776                         video_info_url, video_id,
1777                         note='Refetching age-gated info webpage',
1778                         errnote='unable to download video info webpage')
1779                 except ExtractorError:
1780                     video_info_webpage = None
1781                 if video_info_webpage:
1782                     video_info = compat_parse_qs(video_info_webpage)
1783                     pl_response = video_info.get('player_response', [None])[0]
1784                     player_response = extract_player_response(pl_response, video_id)
1785                     add_dash_mpd(video_info)
1786                     view_count = extract_view_count(video_info)
1787         else:
1788             age_gate = False
1789             # Try looking directly into the video webpage
1790             ytplayer_config = self._get_ytplayer_config(video_id, video_webpage)
1791             if ytplayer_config:
1792                 args = ytplayer_config.get('args', {})
1793                 if args.get('url_encoded_fmt_stream_map') or args.get('hlsvp'):
1794                     # Convert to the same format returned by compat_parse_qs
1795                     video_info = dict((k, [v]) for k, v in args.items())
1796                     add_dash_mpd(video_info)
1797                 # Rental video is not rented but preview is available (e.g.
1798                 # https://www.youtube.com/watch?v=yYr8q0y5Jfg,
1799                 # https://github.com/ytdl-org/youtube-dl/issues/10532)
1800                 if not video_info and args.get('ypc_vid'):
1801                     return self.url_result(
1802                         args['ypc_vid'], YoutubeIE.ie_key(), video_id=args['ypc_vid'])
1803                 if args.get('livestream') == '1' or args.get('live_playback') == 1:
1804                     is_live = True
1805                 if not player_response:
1806                     player_response = extract_player_response(args.get('player_response'), video_id)
1807             if not video_info or self._downloader.params.get('youtube_include_dash_manifest', True):
1808                 add_dash_mpd_pr(player_response)
1809
1810         if not video_info and not player_response:
1811             player_response = extract_player_response(
1812                 self._search_regex(
1813                     r'ytInitialPlayerResponse\s*=\s*({.+?})\s*;', video_webpage,
1814                     'initial player response', default='{}'),
1815                 video_id)
1816
1817         def extract_unavailable_message():
1818             messages = []
1819             for tag, kind in (('h1', 'message'), ('div', 'submessage')):
1820                 msg = self._html_search_regex(
1821                     r'(?s)<{tag}[^>]+id=["\']unavailable-{kind}["\'][^>]*>(.+?)</{tag}>'.format(tag=tag, kind=kind),
1822                     video_webpage, 'unavailable %s' % kind, default=None)
1823                 if msg:
1824                     messages.append(msg)
1825             if messages:
1826                 return '\n'.join(messages)
1827
1828         if not video_info and not player_response:
1829             unavailable_message = extract_unavailable_message()
1830             if not unavailable_message:
1831                 unavailable_message = 'Unable to extract video data'
1832             raise ExtractorError(
1833                 'YouTube said: %s' % unavailable_message, expected=True, video_id=video_id)
1834
1835         if not isinstance(video_info, dict):
1836             video_info = {}
1837
1838         video_details = try_get(
1839             player_response, lambda x: x['videoDetails'], dict) or {}
1840
1841         microformat = try_get(
1842             player_response, lambda x: x['microformat']['playerMicroformatRenderer'], dict) or {}
1843
1844         video_title = video_info.get('title', [None])[0] or video_details.get('title')
1845         if not video_title:
1846             self._downloader.report_warning('Unable to extract video title')
1847             video_title = '_'
1848
1849         description_original = video_description = get_element_by_id("eow-description", video_webpage)
1850         if video_description:
1851
1852             def replace_url(m):
1853                 redir_url = compat_urlparse.urljoin(url, m.group(1))
1854                 parsed_redir_url = compat_urllib_parse_urlparse(redir_url)
1855                 if re.search(r'^(?:www\.)?(?:youtube(?:-nocookie)?\.com|youtu\.be)$', parsed_redir_url.netloc) and parsed_redir_url.path == '/redirect':
1856                     qs = compat_parse_qs(parsed_redir_url.query)
1857                     q = qs.get('q')
1858                     if q and q[0]:
1859                         return q[0]
1860                 return redir_url
1861
1862             description_original = video_description = re.sub(r'''(?x)
1863                 <a\s+
1864                     (?:[a-zA-Z-]+="[^"]*"\s+)*?
1865                     (?:title|href)="([^"]+)"\s+
1866                     (?:[a-zA-Z-]+="[^"]*"\s+)*?
1867                     class="[^"]*"[^>]*>
1868                 [^<]+\.{3}\s*
1869                 </a>
1870             ''', replace_url, video_description)
1871             video_description = clean_html(video_description)
1872         else:
1873             video_description = video_details.get('shortDescription')
1874             if video_description is None:
1875                 video_description = self._html_search_meta('description', video_webpage)
1876
1877         if not smuggled_data.get('force_singlefeed', False):
1878             if not self._downloader.params.get('noplaylist'):
1879                 multifeed_metadata_list = try_get(
1880                     player_response,
1881                     lambda x: x['multicamera']['playerLegacyMulticameraRenderer']['metadataList'],
1882                     compat_str) or try_get(
1883                     video_info, lambda x: x['multifeed_metadata_list'][0], compat_str)
1884                 if multifeed_metadata_list:
1885                     entries = []
1886                     feed_ids = []
1887                     for feed in multifeed_metadata_list.split(','):
1888                         # Unquote should take place before split on comma (,) since textual
1889                         # fields may contain comma as well (see
1890                         # https://github.com/ytdl-org/youtube-dl/issues/8536)
1891                         feed_data = compat_parse_qs(compat_urllib_parse_unquote_plus(feed))
1892
1893                         def feed_entry(name):
1894                             return try_get(feed_data, lambda x: x[name][0], compat_str)
1895
1896                         feed_id = feed_entry('id')
1897                         if not feed_id:
1898                             continue
1899                         feed_title = feed_entry('title')
1900                         title = video_title
1901                         if feed_title:
1902                             title += ' (%s)' % feed_title
1903                         entries.append({
1904                             '_type': 'url_transparent',
1905                             'ie_key': 'Youtube',
1906                             'url': smuggle_url(
1907                                 '%s://www.youtube.com/watch?v=%s' % (proto, feed_data['id'][0]),
1908                                 {'force_singlefeed': True}),
1909                             'title': title,
1910                         })
1911                         feed_ids.append(feed_id)
1912                     self.to_screen(
1913                         'Downloading multifeed video (%s) - add --no-playlist to just download video %s'
1914                         % (', '.join(feed_ids), video_id))
1915                     return self.playlist_result(entries, video_id, video_title, video_description)
1916             else:
1917                 self.to_screen('Downloading just video %s because of --no-playlist' % video_id)
1918
1919         if view_count is None:
1920             view_count = extract_view_count(video_info)
1921         if view_count is None and video_details:
1922             view_count = int_or_none(video_details.get('viewCount'))
1923         if view_count is None and microformat:
1924             view_count = int_or_none(microformat.get('viewCount'))
1925
1926         if is_live is None:
1927             is_live = bool_or_none(video_details.get('isLive'))
1928
1929         has_live_chat_replay = False
1930         if not is_live:
1931             yt_initial_data = self._get_yt_initial_data(video_id, video_webpage)
1932             try:
1933                 yt_initial_data['contents']['twoColumnWatchNextResults']['conversationBar']['liveChatRenderer']['continuations'][0]['reloadContinuationData']['continuation']
1934                 has_live_chat_replay = True
1935             except (KeyError, IndexError, TypeError):
1936                 pass
1937
1938         # Check for "rental" videos
1939         if 'ypc_video_rental_bar_text' in video_info and 'author' not in video_info:
1940             raise ExtractorError('"rental" videos not supported. See https://github.com/ytdl-org/youtube-dl/issues/359 for more information.', expected=True)
1941
1942         def _extract_filesize(media_url):
1943             return int_or_none(self._search_regex(
1944                 r'\bclen[=/](\d+)', media_url, 'filesize', default=None))
1945
1946         streaming_formats = try_get(player_response, lambda x: x['streamingData']['formats'], list) or []
1947         streaming_formats.extend(try_get(player_response, lambda x: x['streamingData']['adaptiveFormats'], list) or [])
1948
1949         if 'conn' in video_info and video_info['conn'][0].startswith('rtmp'):
1950             self.report_rtmp_download()
1951             formats = [{
1952                 'format_id': '_rtmp',
1953                 'protocol': 'rtmp',
1954                 'url': video_info['conn'][0],
1955                 'player_url': player_url,
1956             }]
1957         elif not is_live and (streaming_formats or len(video_info.get('url_encoded_fmt_stream_map', [''])[0]) >= 1 or len(video_info.get('adaptive_fmts', [''])[0]) >= 1):
1958             encoded_url_map = video_info.get('url_encoded_fmt_stream_map', [''])[0] + ',' + video_info.get('adaptive_fmts', [''])[0]
1959             if 'rtmpe%3Dyes' in encoded_url_map:
1960                 raise ExtractorError('rtmpe downloads are not supported, see https://github.com/ytdl-org/youtube-dl/issues/343 for more information.', expected=True)
1961             formats = []
1962             formats_spec = {}
1963             fmt_list = video_info.get('fmt_list', [''])[0]
1964             if fmt_list:
1965                 for fmt in fmt_list.split(','):
1966                     spec = fmt.split('/')
1967                     if len(spec) > 1:
1968                         width_height = spec[1].split('x')
1969                         if len(width_height) == 2:
1970                             formats_spec[spec[0]] = {
1971                                 'resolution': spec[1],
1972                                 'width': int_or_none(width_height[0]),
1973                                 'height': int_or_none(width_height[1]),
1974                             }
1975             for fmt in streaming_formats:
1976                 itag = str_or_none(fmt.get('itag'))
1977                 if not itag:
1978                     continue
1979                 quality = fmt.get('quality')
1980                 quality_label = fmt.get('qualityLabel') or quality
1981                 formats_spec[itag] = {
1982                     'asr': int_or_none(fmt.get('audioSampleRate')),
1983                     'filesize': int_or_none(fmt.get('contentLength')),
1984                     'format_note': quality_label,
1985                     'fps': int_or_none(fmt.get('fps')),
1986                     'height': int_or_none(fmt.get('height')),
1987                     # bitrate for itag 43 is always 2147483647
1988                     'tbr': float_or_none(fmt.get('averageBitrate') or fmt.get('bitrate'), 1000) if itag != '43' else None,
1989                     'width': int_or_none(fmt.get('width')),
1990                 }
1991
1992             for fmt in streaming_formats:
1993                 if fmt.get('drmFamilies') or fmt.get('drm_families'):
1994                     continue
1995                 url = url_or_none(fmt.get('url'))
1996
1997                 if not url:
1998                     cipher = fmt.get('cipher') or fmt.get('signatureCipher')
1999                     if not cipher:
2000                         continue
2001                     url_data = compat_parse_qs(cipher)
2002                     url = url_or_none(try_get(url_data, lambda x: x['url'][0], compat_str))
2003                     if not url:
2004                         continue
2005                 else:
2006                     cipher = None
2007                     url_data = compat_parse_qs(compat_urllib_parse_urlparse(url).query)
2008
2009                 stream_type = int_or_none(try_get(url_data, lambda x: x['stream_type'][0]))
2010                 # Unsupported FORMAT_STREAM_TYPE_OTF
2011                 if stream_type == 3:
2012                     continue
2013
2014                 format_id = fmt.get('itag') or url_data['itag'][0]
2015                 if not format_id:
2016                     continue
2017                 format_id = compat_str(format_id)
2018
2019                 if cipher:
2020                     if 's' in url_data or self._downloader.params.get('youtube_include_dash_manifest', True):
2021                         ASSETS_RE = (
2022                             r'<script[^>]+\bsrc=("[^"]+")[^>]+\bname=["\']player_ias/base',
2023                             r'"jsUrl"\s*:\s*("[^"]+")',
2024                             r'"assets":.+?"js":\s*("[^"]+")')
2025                         jsplayer_url_json = self._search_regex(
2026                             ASSETS_RE,
2027                             embed_webpage if age_gate else video_webpage,
2028                             'JS player URL (1)', default=None)
2029                         if not jsplayer_url_json and not age_gate:
2030                             # We need the embed website after all
2031                             if embed_webpage is None:
2032                                 embed_url = proto + '://www.youtube.com/embed/%s' % video_id
2033                                 embed_webpage = self._download_webpage(
2034                                     embed_url, video_id, 'Downloading embed webpage')
2035                             jsplayer_url_json = self._search_regex(
2036                                 ASSETS_RE, embed_webpage, 'JS player URL')
2037
2038                         player_url = json.loads(jsplayer_url_json)
2039                         if player_url is None:
2040                             player_url_json = self._search_regex(
2041                                 r'ytplayer\.config.*?"url"\s*:\s*("[^"]+")',
2042                                 video_webpage, 'age gate player URL')
2043                             player_url = json.loads(player_url_json)
2044
2045                     if 'sig' in url_data:
2046                         url += '&signature=' + url_data['sig'][0]
2047                     elif 's' in url_data:
2048                         encrypted_sig = url_data['s'][0]
2049
2050                         if self._downloader.params.get('verbose'):
2051                             if player_url is None:
2052                                 player_desc = 'unknown'
2053                             else:
2054                                 player_type, player_version = self._extract_player_info(player_url)
2055                                 player_desc = '%s player %s' % ('flash' if player_type == 'swf' else 'html5', player_version)
2056                             parts_sizes = self._signature_cache_id(encrypted_sig)
2057                             self.to_screen('{%s} signature length %s, %s' %
2058                                            (format_id, parts_sizes, player_desc))
2059
2060                         signature = self._decrypt_signature(
2061                             encrypted_sig, video_id, player_url, age_gate)
2062                         sp = try_get(url_data, lambda x: x['sp'][0], compat_str) or 'signature'
2063                         url += '&%s=%s' % (sp, signature)
2064                 if 'ratebypass' not in url:
2065                     url += '&ratebypass=yes'
2066
2067                 dct = {
2068                     'format_id': format_id,
2069                     'url': url,
2070                     'player_url': player_url,
2071                 }
2072                 if format_id in self._formats:
2073                     dct.update(self._formats[format_id])
2074                 if format_id in formats_spec:
2075                     dct.update(formats_spec[format_id])
2076
2077                 # Some itags are not included in DASH manifest thus corresponding formats will
2078                 # lack metadata (see https://github.com/ytdl-org/youtube-dl/pull/5993).
2079                 # Trying to extract metadata from url_encoded_fmt_stream_map entry.
2080                 mobj = re.search(r'^(?P<width>\d+)[xX](?P<height>\d+)$', url_data.get('size', [''])[0])
2081                 width, height = (int(mobj.group('width')), int(mobj.group('height'))) if mobj else (None, None)
2082
2083                 if width is None:
2084                     width = int_or_none(fmt.get('width'))
2085                 if height is None:
2086                     height = int_or_none(fmt.get('height'))
2087
2088                 filesize = int_or_none(url_data.get(
2089                     'clen', [None])[0]) or _extract_filesize(url)
2090
2091                 quality = url_data.get('quality', [None])[0] or fmt.get('quality')
2092                 quality_label = url_data.get('quality_label', [None])[0] or fmt.get('qualityLabel')
2093
2094                 tbr = (float_or_none(url_data.get('bitrate', [None])[0], 1000)
2095                        or float_or_none(fmt.get('bitrate'), 1000)) if format_id != '43' else None
2096                 fps = int_or_none(url_data.get('fps', [None])[0]) or int_or_none(fmt.get('fps'))
2097
2098                 more_fields = {
2099                     'filesize': filesize,
2100                     'tbr': tbr,
2101                     'width': width,
2102                     'height': height,
2103                     'fps': fps,
2104                     'format_note': quality_label or quality,
2105                 }
2106                 for key, value in more_fields.items():
2107                     if value:
2108                         dct[key] = value
2109                 type_ = url_data.get('type', [None])[0] or fmt.get('mimeType')
2110                 if type_:
2111                     type_split = type_.split(';')
2112                     kind_ext = type_split[0].split('/')
2113                     if len(kind_ext) == 2:
2114                         kind, _ = kind_ext
2115                         dct['ext'] = mimetype2ext(type_split[0])
2116                         if kind in ('audio', 'video'):
2117                             codecs = None
2118                             for mobj in re.finditer(
2119                                     r'(?P<key>[a-zA-Z_-]+)=(?P<quote>["\']?)(?P<val>.+?)(?P=quote)(?:;|$)', type_):
2120                                 if mobj.group('key') == 'codecs':
2121                                     codecs = mobj.group('val')
2122                                     break
2123                             if codecs:
2124                                 dct.update(parse_codecs(codecs))
2125                 if dct.get('acodec') == 'none' or dct.get('vcodec') == 'none':
2126                     dct['downloader_options'] = {
2127                         # Youtube throttles chunks >~10M
2128                         'http_chunk_size': 10485760,
2129                     }
2130                 formats.append(dct)
2131         else:
2132             manifest_url = (
2133                 url_or_none(try_get(
2134                     player_response,
2135                     lambda x: x['streamingData']['hlsManifestUrl'],
2136                     compat_str))
2137                 or url_or_none(try_get(
2138                     video_info, lambda x: x['hlsvp'][0], compat_str)))
2139             if manifest_url:
2140                 formats = []
2141                 m3u8_formats = self._extract_m3u8_formats(
2142                     manifest_url, video_id, 'mp4', fatal=False)
2143                 for a_format in m3u8_formats:
2144                     itag = self._search_regex(
2145                         r'/itag/(\d+)/', a_format['url'], 'itag', default=None)
2146                     if itag:
2147                         a_format['format_id'] = itag
2148                         if itag in self._formats:
2149                             dct = self._formats[itag].copy()
2150                             dct.update(a_format)
2151                             a_format = dct
2152                     a_format['player_url'] = player_url
2153                     # Accept-Encoding header causes failures in live streams on Youtube and Youtube Gaming
2154                     a_format.setdefault('http_headers', {})['Youtubedl-no-compression'] = 'True'
2155                     if self._downloader.params.get('youtube_include_hls_manifest', True):
2156                         formats.append(a_format)
2157             else:
2158                 error_message = extract_unavailable_message()
2159                 if not error_message:
2160                     reason_list = try_get(
2161                         player_response,
2162                         lambda x: x['playabilityStatus']['errorScreen']['playerErrorMessageRenderer']['subreason']['runs'],
2163                         list) or []
2164                     for reason in reason_list:
2165                         if not isinstance(reason, dict):
2166                             continue
2167                         reason_text = try_get(reason, lambda x: x['text'], compat_str)
2168                         if reason_text:
2169                             if not error_message:
2170                                 error_message = ''
2171                             error_message += reason_text
2172                     if error_message:
2173                         error_message = clean_html(error_message)
2174                 if not error_message:
2175                     error_message = clean_html(try_get(
2176                         player_response, lambda x: x['playabilityStatus']['reason'],
2177                         compat_str))
2178                 if not error_message:
2179                     error_message = clean_html(
2180                         try_get(video_info, lambda x: x['reason'][0], compat_str))
2181                 if error_message:
2182                     raise ExtractorError(error_message, expected=True)
2183                 raise ExtractorError('no conn, hlsvp, hlsManifestUrl or url_encoded_fmt_stream_map information found in video info')
2184
2185         # uploader
2186         video_uploader = try_get(
2187             video_info, lambda x: x['author'][0],
2188             compat_str) or str_or_none(video_details.get('author'))
2189         if video_uploader:
2190             video_uploader = compat_urllib_parse_unquote_plus(video_uploader)
2191         else:
2192             self._downloader.report_warning('unable to extract uploader name')
2193
2194         # uploader_id
2195         video_uploader_id = None
2196         video_uploader_url = None
2197         mobj = re.search(
2198             r'<link itemprop="url" href="(?P<uploader_url>https?://www\.youtube\.com/(?:user|channel)/(?P<uploader_id>[^"]+))">',
2199             video_webpage)
2200         if mobj is not None:
2201             video_uploader_id = mobj.group('uploader_id')
2202             video_uploader_url = mobj.group('uploader_url')
2203         else:
2204             owner_profile_url = url_or_none(microformat.get('ownerProfileUrl'))
2205             if owner_profile_url:
2206                 video_uploader_id = self._search_regex(
2207                     r'(?:user|channel)/([^/]+)', owner_profile_url, 'uploader id',
2208                     default=None)
2209                 video_uploader_url = owner_profile_url
2210
2211         channel_id = (
2212             str_or_none(video_details.get('channelId'))
2213             or self._html_search_meta(
2214                 'channelId', video_webpage, 'channel id', default=None)
2215             or self._search_regex(
2216                 r'data-channel-external-id=(["\'])(?P<id>(?:(?!\1).)+)\1',
2217                 video_webpage, 'channel id', default=None, group='id'))
2218         channel_url = 'http://www.youtube.com/channel/%s' % channel_id if channel_id else None
2219
2220         thumbnails = []
2221         thumbnails_list = try_get(
2222             video_details, lambda x: x['thumbnail']['thumbnails'], list) or []
2223         for t in thumbnails_list:
2224             if not isinstance(t, dict):
2225                 continue
2226             thumbnail_url = url_or_none(t.get('url'))
2227             if not thumbnail_url:
2228                 continue
2229             thumbnails.append({
2230                 'url': thumbnail_url,
2231                 'width': int_or_none(t.get('width')),
2232                 'height': int_or_none(t.get('height')),
2233             })
2234
2235         if not thumbnails:
2236             video_thumbnail = None
2237             # We try first to get a high quality image:
2238             m_thumb = re.search(r'<span itemprop="thumbnail".*?href="(.*?)">',
2239                                 video_webpage, re.DOTALL)
2240             if m_thumb is not None:
2241                 video_thumbnail = m_thumb.group(1)
2242             thumbnail_url = try_get(video_info, lambda x: x['thumbnail_url'][0], compat_str)
2243             if thumbnail_url:
2244                 video_thumbnail = compat_urllib_parse_unquote_plus(thumbnail_url)
2245             if video_thumbnail:
2246                 thumbnails.append({'url': video_thumbnail})
2247
2248         # upload date
2249         upload_date = self._html_search_meta(
2250             'datePublished', video_webpage, 'upload date', default=None)
2251         if not upload_date:
2252             upload_date = self._search_regex(
2253                 [r'(?s)id="eow-date.*?>(.*?)</span>',
2254                  r'(?:id="watch-uploader-info".*?>.*?|["\']simpleText["\']\s*:\s*["\'])(?:Published|Uploaded|Streamed live|Started) on (.+?)[<"\']'],
2255                 video_webpage, 'upload date', default=None)
2256         if not upload_date:
2257             upload_date = microformat.get('publishDate') or microformat.get('uploadDate')
2258         upload_date = unified_strdate(upload_date)
2259
2260         video_license = self._html_search_regex(
2261             r'<h4[^>]+class="title"[^>]*>\s*License\s*</h4>\s*<ul[^>]*>\s*<li>(.+?)</li',
2262             video_webpage, 'license', default=None)
2263
2264         m_music = re.search(
2265             r'''(?x)
2266                 <h4[^>]+class="title"[^>]*>\s*Music\s*</h4>\s*
2267                 <ul[^>]*>\s*
2268                 <li>(?P<title>.+?)
2269                 by (?P<creator>.+?)
2270                 (?:
2271                     \(.+?\)|
2272                     <a[^>]*
2273                         (?:
2274                             \bhref=["\']/red[^>]*>|             # drop possible
2275                             >\s*Listen ad-free with YouTube Red # YouTube Red ad
2276                         )
2277                     .*?
2278                 )?</li
2279             ''',
2280             video_webpage)
2281         if m_music:
2282             video_alt_title = remove_quotes(unescapeHTML(m_music.group('title')))
2283             video_creator = clean_html(m_music.group('creator'))
2284         else:
2285             video_alt_title = video_creator = None
2286
2287         def extract_meta(field):
2288             return self._html_search_regex(
2289                 r'<h4[^>]+class="title"[^>]*>\s*%s\s*</h4>\s*<ul[^>]*>\s*<li>(.+?)</li>\s*' % field,
2290                 video_webpage, field, default=None)
2291
2292         track = extract_meta('Song')
2293         artist = extract_meta('Artist')
2294         album = extract_meta('Album')
2295
2296         # Youtube Music Auto-generated description
2297         release_date = release_year = None
2298         if video_description:
2299             mobj = re.search(r'(?s)Provided to YouTube by [^\n]+\n+(?P<track>[^·]+)·(?P<artist>[^\n]+)\n+(?P<album>[^\n]+)(?:.+?℗\s*(?P<release_year>\d{4})(?!\d))?(?:.+?Released on\s*:\s*(?P<release_date>\d{4}-\d{2}-\d{2}))?(.+?\nArtist\s*:\s*(?P<clean_artist>[^\n]+))?', video_description)
2300             if mobj:
2301                 if not track:
2302                     track = mobj.group('track').strip()
2303                 if not artist:
2304                     artist = mobj.group('clean_artist') or ', '.join(a.strip() for a in mobj.group('artist').split('·'))
2305                 if not album:
2306                     album = mobj.group('album'.strip())
2307                 release_year = mobj.group('release_year')
2308                 release_date = mobj.group('release_date')
2309                 if release_date:
2310                     release_date = release_date.replace('-', '')
2311                     if not release_year:
2312                         release_year = int(release_date[:4])
2313                 if release_year:
2314                     release_year = int(release_year)
2315
2316         yt_initial = self._get_yt_initial_data(video_id, video_webpage)
2317         if yt_initial:
2318             music_metadata = self._get_music_metadata_from_yt_initial(yt_initial)
2319             if len(music_metadata):
2320                 album = music_metadata[0].get('album')
2321                 artist = music_metadata[0].get('artist')
2322                 track = music_metadata[0].get('track')
2323
2324         m_episode = re.search(
2325             r'<div[^>]+id="watch7-headline"[^>]*>\s*<span[^>]*>.*?>(?P<series>[^<]+)</a></b>\s*S(?P<season>\d+)\s*•\s*E(?P<episode>\d+)</span>',
2326             video_webpage)
2327         if m_episode:
2328             series = unescapeHTML(m_episode.group('series'))
2329             season_number = int(m_episode.group('season'))
2330             episode_number = int(m_episode.group('episode'))
2331         else:
2332             series = season_number = episode_number = None
2333
2334         m_cat_container = self._search_regex(
2335             r'(?s)<h4[^>]*>\s*Category\s*</h4>\s*<ul[^>]*>(.*?)</ul>',
2336             video_webpage, 'categories', default=None)
2337         category = None
2338         if m_cat_container:
2339             category = self._html_search_regex(
2340                 r'(?s)<a[^<]+>(.*?)</a>', m_cat_container, 'category',
2341                 default=None)
2342         if not category:
2343             category = try_get(
2344                 microformat, lambda x: x['category'], compat_str)
2345         video_categories = None if category is None else [category]
2346
2347         video_tags = [
2348             unescapeHTML(m.group('content'))
2349             for m in re.finditer(self._meta_regex('og:video:tag'), video_webpage)]
2350         if not video_tags:
2351             video_tags = try_get(video_details, lambda x: x['keywords'], list)
2352
2353         def _extract_count(count_name):
2354             return str_to_int(self._search_regex(
2355                 (r'-%s-button[^>]+><span[^>]+class="yt-uix-button-content"[^>]*>([\d,]+)</span>' % re.escape(count_name),
2356                  r'["\']label["\']\s*:\s*["\']([\d,.]+)\s+%ss["\']' % re.escape(count_name)),
2357                 video_webpage, count_name, default=None))
2358
2359         like_count = _extract_count('like')
2360         dislike_count = _extract_count('dislike')
2361
2362         if view_count is None:
2363             view_count = str_to_int(self._search_regex(
2364                 r'<[^>]+class=["\']watch-view-count[^>]+>\s*([\d,\s]+)', video_webpage,
2365                 'view count', default=None))
2366
2367         average_rating = (
2368             float_or_none(video_details.get('averageRating'))
2369             or try_get(video_info, lambda x: float_or_none(x['avg_rating'][0])))
2370
2371         # subtitles
2372         video_subtitles = self.extract_subtitles(
2373             video_id, video_webpage, has_live_chat_replay)
2374         automatic_captions = self.extract_automatic_captions(video_id, video_webpage)
2375
2376         video_duration = try_get(
2377             video_info, lambda x: int_or_none(x['length_seconds'][0]))
2378         if not video_duration:
2379             video_duration = int_or_none(video_details.get('lengthSeconds'))
2380         if not video_duration:
2381             video_duration = parse_duration(self._html_search_meta(
2382                 'duration', video_webpage, 'video duration'))
2383
2384         # Get Subscriber Count of channel
2385         subscriber_count = parse_count(self._search_regex(
2386             r'"text":"([\d\.]+\w?) subscribers"',
2387             video_webpage,
2388             'subscriber count',
2389             default=None
2390         ))
2391
2392         # annotations
2393         video_annotations = None
2394         if self._downloader.params.get('writeannotations', False):
2395             xsrf_token = self._search_regex(
2396                 r'([\'"])XSRF_TOKEN\1\s*:\s*([\'"])(?P<xsrf_token>[A-Za-z0-9+/=]+)\2',
2397                 video_webpage, 'xsrf token', group='xsrf_token', fatal=False)
2398             invideo_url = try_get(
2399                 player_response, lambda x: x['annotations'][0]['playerAnnotationsUrlsRenderer']['invideoUrl'], compat_str)
2400             if xsrf_token and invideo_url:
2401                 xsrf_field_name = self._search_regex(
2402                     r'([\'"])XSRF_FIELD_NAME\1\s*:\s*([\'"])(?P<xsrf_field_name>\w+)\2',
2403                     video_webpage, 'xsrf field name',
2404                     group='xsrf_field_name', default='session_token')
2405                 video_annotations = self._download_webpage(
2406                     self._proto_relative_url(invideo_url),
2407                     video_id, note='Downloading annotations',
2408                     errnote='Unable to download video annotations', fatal=False,
2409                     data=urlencode_postdata({xsrf_field_name: xsrf_token}))
2410
2411         chapters = self._extract_chapters(video_webpage, description_original, video_id, video_duration)
2412
2413         # Look for the DASH manifest
2414         if self._downloader.params.get('youtube_include_dash_manifest', True):
2415             dash_mpd_fatal = True
2416             for mpd_url in dash_mpds:
2417                 dash_formats = {}
2418                 try:
2419                     def decrypt_sig(mobj):
2420                         s = mobj.group(1)
2421                         dec_s = self._decrypt_signature(s, video_id, player_url, age_gate)
2422                         return '/signature/%s' % dec_s
2423
2424                     mpd_url = re.sub(r'/s/([a-fA-F0-9\.]+)', decrypt_sig, mpd_url)
2425
2426                     for df in self._extract_mpd_formats(
2427                             mpd_url, video_id, fatal=dash_mpd_fatal,
2428                             formats_dict=self._formats):
2429                         if not df.get('filesize'):
2430                             df['filesize'] = _extract_filesize(df['url'])
2431                         # Do not overwrite DASH format found in some previous DASH manifest
2432                         if df['format_id'] not in dash_formats:
2433                             dash_formats[df['format_id']] = df
2434                         # Additional DASH manifests may end up in HTTP Error 403 therefore
2435                         # allow them to fail without bug report message if we already have
2436                         # some DASH manifest succeeded. This is temporary workaround to reduce
2437                         # burst of bug reports until we figure out the reason and whether it
2438                         # can be fixed at all.
2439                         dash_mpd_fatal = False
2440                 except (ExtractorError, KeyError) as e:
2441                     self.report_warning(
2442                         'Skipping DASH manifest: %r' % e, video_id)
2443                 if dash_formats:
2444                     # Remove the formats we found through non-DASH, they
2445                     # contain less info and it can be wrong, because we use
2446                     # fixed values (for example the resolution). See
2447                     # https://github.com/ytdl-org/youtube-dl/issues/5774 for an
2448                     # example.
2449                     formats = [f for f in formats if f['format_id'] not in dash_formats.keys()]
2450                     formats.extend(dash_formats.values())
2451
2452         # Check for malformed aspect ratio
2453         stretched_m = re.search(
2454             r'<meta\s+property="og:video:tag".*?content="yt:stretch=(?P<w>[0-9]+):(?P<h>[0-9]+)">',
2455             video_webpage)
2456         if stretched_m:
2457             w = float(stretched_m.group('w'))
2458             h = float(stretched_m.group('h'))
2459             # yt:stretch may hold invalid ratio data (e.g. for Q39EVAstoRM ratio is 17:0).
2460             # We will only process correct ratios.
2461             if w > 0 and h > 0:
2462                 ratio = w / h
2463                 for f in formats:
2464                     if f.get('vcodec') != 'none':
2465                         f['stretched_ratio'] = ratio
2466
2467         if not formats:
2468             if 'reason' in video_info:
2469                 if 'The uploader has not made this video available in your country.' in video_info['reason']:
2470                     regions_allowed = self._html_search_meta(
2471                         'regionsAllowed', video_webpage, default=None)
2472                     countries = regions_allowed.split(',') if regions_allowed else None
2473                     self.raise_geo_restricted(
2474                         msg=video_info['reason'][0], countries=countries)
2475                 reason = video_info['reason'][0]
2476                 if 'Invalid parameters' in reason:
2477                     unavailable_message = extract_unavailable_message()
2478                     if unavailable_message:
2479                         reason = unavailable_message
2480                 raise ExtractorError(
2481                     'YouTube said: %s' % reason,
2482                     expected=True, video_id=video_id)
2483             if video_info.get('license_info') or try_get(player_response, lambda x: x['streamingData']['licenseInfos']):
2484                 raise ExtractorError('This video is DRM protected.', expected=True)
2485
2486         self._sort_formats(formats)
2487
2488         self.mark_watched(video_id, video_info, player_response)
2489
2490         return {
2491             'id': video_id,
2492             'uploader': video_uploader,
2493             'uploader_id': video_uploader_id,
2494             'uploader_url': video_uploader_url,
2495             'channel_id': channel_id,
2496             'channel_url': channel_url,
2497             'upload_date': upload_date,
2498             'license': video_license,
2499             'creator': video_creator or artist,
2500             'title': video_title,
2501             'alt_title': video_alt_title or track,
2502             'thumbnails': thumbnails,
2503             'description': video_description,
2504             'categories': video_categories,
2505             'tags': video_tags,
2506             'subtitles': video_subtitles,
2507             'automatic_captions': automatic_captions,
2508             'duration': video_duration,
2509             'age_limit': 18 if age_gate else 0,
2510             'annotations': video_annotations,
2511             'chapters': chapters,
2512             'webpage_url': proto + '://www.youtube.com/watch?v=%s' % video_id,
2513             'view_count': view_count,
2514             'like_count': like_count,
2515             'dislike_count': dislike_count,
2516             'average_rating': average_rating,
2517             'formats': formats,
2518             'is_live': is_live,
2519             'start_time': start_time,
2520             'end_time': end_time,
2521             'series': series,
2522             'season_number': season_number,
2523             'episode_number': episode_number,
2524             'track': track,
2525             'artist': artist,
2526             'album': album,
2527             'release_date': release_date,
2528             'release_year': release_year,
2529             'subscriber_count': subscriber_count,
2530         }
2531
2532
2533 class YoutubeTabIE(YoutubeBaseInfoExtractor):
2534     IE_DESC = 'YouTube.com tab'
2535     _VALID_URL = (r'''(?x)
2536         https?://(?:\w+\.)?(?:youtube(?:kids)?\.com|invidio\.us)/(?:
2537             (?!(%s)([/#?]|$))|channel/|c/|user/|
2538             (?P<not_channel>playlist|watch)/?\?.*?\blist=)
2539         (?P<id>[^/?#&]+)''') % YoutubeBaseInfoExtractor._RESERVED_NAMES
2540     IE_NAME = 'youtube:tab'
2541
2542     _TESTS = [{
2543         # playlists, multipage
2544         'url': 'https://www.youtube.com/c/ИгорьКлейнер/playlists?view=1&flow=grid',
2545         'playlist_mincount': 94,
2546         'info_dict': {
2547             'id': 'UCqj7Cz7revf5maW9g5pgNcg',
2548             'title': 'Игорь Клейнер - Playlists',
2549             'description': 'md5:be97ee0f14ee314f1f002cf187166ee2',
2550         },
2551     }, {
2552         # playlists, multipage, different order
2553         'url': 'https://www.youtube.com/user/igorkle1/playlists?view=1&sort=dd',
2554         'playlist_mincount': 94,
2555         'info_dict': {
2556             'id': 'UCqj7Cz7revf5maW9g5pgNcg',
2557             'title': 'Игорь Клейнер - Playlists',
2558             'description': 'md5:be97ee0f14ee314f1f002cf187166ee2',
2559         },
2560     }, {
2561         # playlists, singlepage
2562         'url': 'https://www.youtube.com/user/ThirstForScience/playlists',
2563         'playlist_mincount': 4,
2564         'info_dict': {
2565             'id': 'UCAEtajcuhQ6an9WEzY9LEMQ',
2566             'title': 'ThirstForScience - Playlists',
2567             'description': 'md5:609399d937ea957b0f53cbffb747a14c',
2568         }
2569     }, {
2570         'url': 'https://www.youtube.com/c/ChristophLaimer/playlists',
2571         'only_matching': True,
2572     }, {
2573         # basic, single video playlist
2574         'url': 'https://www.youtube.com/playlist?list=PL4lCao7KL_QFVb7Iudeipvc2BCavECqzc',
2575         'info_dict': {
2576             'uploader_id': 'UCmlqkdCBesrv2Lak1mF_MxA',
2577             'uploader': 'Sergey M.',
2578             'id': 'PL4lCao7KL_QFVb7Iudeipvc2BCavECqzc',
2579             'title': 'youtube-dl public playlist',
2580         },
2581         'playlist_count': 1,
2582     }, {
2583         # empty playlist
2584         'url': 'https://www.youtube.com/playlist?list=PL4lCao7KL_QFodcLWhDpGCYnngnHtQ-Xf',
2585         'info_dict': {
2586             'uploader_id': 'UCmlqkdCBesrv2Lak1mF_MxA',
2587             'uploader': 'Sergey M.',
2588             'id': 'PL4lCao7KL_QFodcLWhDpGCYnngnHtQ-Xf',
2589             'title': 'youtube-dl empty playlist',
2590         },
2591         'playlist_count': 0,
2592     }, {
2593         # Home tab
2594         'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/featured',
2595         'info_dict': {
2596             'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
2597             'title': 'lex will - Home',
2598             'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488',
2599         },
2600         'playlist_mincount': 2,
2601     }, {
2602         # Videos tab
2603         'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/videos',
2604         'info_dict': {
2605             'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
2606             'title': 'lex will - Videos',
2607             'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488',
2608         },
2609         'playlist_mincount': 975,
2610     }, {
2611         # Videos tab, sorted by popular
2612         'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/videos?view=0&sort=p&flow=grid',
2613         'info_dict': {
2614             'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
2615             'title': 'lex will - Videos',
2616             'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488',
2617         },
2618         'playlist_mincount': 199,
2619     }, {
2620         # Playlists tab
2621         'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/playlists',
2622         'info_dict': {
2623             'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
2624             'title': 'lex will - Playlists',
2625             'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488',
2626         },
2627         'playlist_mincount': 17,
2628     }, {
2629         # Community tab
2630         'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/community',
2631         'info_dict': {
2632             'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
2633             'title': 'lex will - Community',
2634             'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488',
2635         },
2636         'playlist_mincount': 18,
2637     }, {
2638         # Channels tab
2639         'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/channels',
2640         'info_dict': {
2641             'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
2642             'title': 'lex will - Channels',
2643             'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488',
2644         },
2645         'playlist_mincount': 138,
2646     }, {
2647         'url': 'https://invidio.us/channel/UCmlqkdCBesrv2Lak1mF_MxA',
2648         'only_matching': True,
2649     }, {
2650         'url': 'https://www.youtubekids.com/channel/UCmlqkdCBesrv2Lak1mF_MxA',
2651         'only_matching': True,
2652     }, {
2653         'url': 'https://music.youtube.com/channel/UCmlqkdCBesrv2Lak1mF_MxA',
2654         'only_matching': True,
2655     }, {
2656         'note': 'Playlist with deleted videos (#651). As a bonus, the video #51 is also twice in this list.',
2657         'url': 'https://www.youtube.com/playlist?list=PLwP_SiAcdui0KVebT0mU9Apz359a4ubsC',
2658         'info_dict': {
2659             'title': '29C3: Not my department',
2660             'id': 'PLwP_SiAcdui0KVebT0mU9Apz359a4ubsC',
2661             'uploader': 'Christiaan008',
2662             'uploader_id': 'UCEPzS1rYsrkqzSLNp76nrcg',
2663         },
2664         'playlist_count': 96,
2665     }, {
2666         'note': 'Large playlist',
2667         'url': 'https://www.youtube.com/playlist?list=UUBABnxM4Ar9ten8Mdjj1j0Q',
2668         'info_dict': {
2669             'title': 'Uploads from Cauchemar',
2670             'id': 'UUBABnxM4Ar9ten8Mdjj1j0Q',
2671             'uploader': 'Cauchemar',
2672             'uploader_id': 'UCBABnxM4Ar9ten8Mdjj1j0Q',
2673         },
2674         'playlist_mincount': 1123,
2675     }, {
2676         # even larger playlist, 8832 videos
2677         'url': 'http://www.youtube.com/user/NASAgovVideo/videos',
2678         'only_matching': True,
2679     }, {
2680         'note': 'Buggy playlist: the webpage has a "Load more" button but it doesn\'t have more videos',
2681         'url': 'https://www.youtube.com/playlist?list=UUXw-G3eDE9trcvY2sBMM_aA',
2682         'info_dict': {
2683             'title': 'Uploads from Interstellar Movie',
2684             'id': 'UUXw-G3eDE9trcvY2sBMM_aA',
2685             'uploader': 'Interstellar Movie',
2686             'uploader_id': 'UCXw-G3eDE9trcvY2sBMM_aA',
2687         },
2688         'playlist_mincount': 21,
2689     }, {
2690         # https://github.com/ytdl-org/youtube-dl/issues/21844
2691         'url': 'https://www.youtube.com/playlist?list=PLzH6n4zXuckpfMu_4Ff8E7Z1behQks5ba',
2692         'info_dict': {
2693             'title': 'Data Analysis with Dr Mike Pound',
2694             'id': 'PLzH6n4zXuckpfMu_4Ff8E7Z1behQks5ba',
2695             'uploader_id': 'UC9-y-6csu5WGm29I7JiwpnA',
2696             'uploader': 'Computerphile',
2697         },
2698         'playlist_mincount': 11,
2699     }, {
2700         'url': 'https://invidio.us/playlist?list=PL4lCao7KL_QFVb7Iudeipvc2BCavECqzc',
2701         'only_matching': True,
2702     }, {
2703         # Playlist URL that does not actually serve a playlist
2704         'url': 'https://www.youtube.com/watch?v=FqZTN594JQw&list=PLMYEtVRpaqY00V9W81Cwmzp6N6vZqfUKD4',
2705         'info_dict': {
2706             'id': 'FqZTN594JQw',
2707             'ext': 'webm',
2708             'title': "Smiley's People 01 detective, Adventure Series, Action",
2709             'uploader': 'STREEM',
2710             'uploader_id': 'UCyPhqAZgwYWZfxElWVbVJng',
2711             'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UCyPhqAZgwYWZfxElWVbVJng',
2712             'upload_date': '20150526',
2713             'license': 'Standard YouTube License',
2714             'description': 'md5:507cdcb5a49ac0da37a920ece610be80',
2715             'categories': ['People & Blogs'],
2716             'tags': list,
2717             'view_count': int,
2718             'like_count': int,
2719             'dislike_count': int,
2720         },
2721         'params': {
2722             'skip_download': True,
2723         },
2724         'skip': 'This video is not available.',
2725         'add_ie': [YoutubeIE.ie_key()],
2726     }, {
2727         'url': 'https://www.youtubekids.com/watch?v=Agk7R8I8o5U&list=PUZ6jURNr1WQZCNHF0ao-c0g',
2728         'only_matching': True,
2729     }, {
2730         'url': 'https://www.youtube.com/watch?v=MuAGGZNfUkU&list=RDMM',
2731         'only_matching': True,
2732     }, {
2733         'url': 'https://www.youtube.com/channel/UCoMdktPbSTixAyNGwb-UYkQ/live',
2734         'info_dict': {
2735             'id': '9Auq9mYxFEE',
2736             'ext': 'mp4',
2737             'title': 'Watch Sky News live',
2738             'uploader': 'Sky News',
2739             'uploader_id': 'skynews',
2740             'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/skynews',
2741             'upload_date': '20191102',
2742             'description': 'md5:78de4e1c2359d0ea3ed829678e38b662',
2743             'categories': ['News & Politics'],
2744             'tags': list,
2745             'like_count': int,
2746             'dislike_count': int,
2747         },
2748         'params': {
2749             'skip_download': True,
2750         },
2751     }, {
2752         'url': 'https://www.youtube.com/user/TheYoungTurks/live',
2753         'info_dict': {
2754             'id': 'a48o2S1cPoo',
2755             'ext': 'mp4',
2756             'title': 'The Young Turks - Live Main Show',
2757             'uploader': 'The Young Turks',
2758             'uploader_id': 'TheYoungTurks',
2759             'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/TheYoungTurks',
2760             'upload_date': '20150715',
2761             'license': 'Standard YouTube License',
2762             'description': 'md5:438179573adcdff3c97ebb1ee632b891',
2763             'categories': ['News & Politics'],
2764             'tags': ['Cenk Uygur (TV Program Creator)', 'The Young Turks (Award-Winning Work)', 'Talk Show (TV Genre)'],
2765             'like_count': int,
2766             'dislike_count': int,
2767         },
2768         'params': {
2769             'skip_download': True,
2770         },
2771         'only_matching': True,
2772     }, {
2773         'url': 'https://www.youtube.com/channel/UC1yBKRuGpC1tSM73A0ZjYjQ/live',
2774         'only_matching': True,
2775     }, {
2776         'url': 'https://www.youtube.com/c/CommanderVideoHq/live',
2777         'only_matching': True,
2778     },
2779         # TODO
2780         # {
2781         #     'url': 'https://www.youtube.com/TheYoungTurks/live',
2782         #     'only_matching': True,
2783         # }
2784     ]
2785
2786     def _extract_channel_id(self, webpage):
2787         channel_id = self._html_search_meta(
2788             'channelId', webpage, 'channel id', default=None)
2789         if channel_id:
2790             return channel_id
2791         channel_url = self._html_search_meta(
2792             ('og:url', 'al:ios:url', 'al:android:url', 'al:web:url',
2793              'twitter:url', 'twitter:app:url:iphone', 'twitter:app:url:ipad',
2794              'twitter:app:url:googleplay'), webpage, 'channel url')
2795         return self._search_regex(
2796             r'https?://(?:www\.)?youtube\.com/channel/([^/?#&])+',
2797             channel_url, 'channel id')
2798
2799     @staticmethod
2800     def _extract_grid_item_renderer(item):
2801         for item_kind in ('Playlist', 'Video', 'Channel'):
2802             renderer = item.get('grid%sRenderer' % item_kind)
2803             if renderer:
2804                 return renderer
2805
2806     def _extract_video(self, renderer):
2807         video_id = renderer.get('videoId')
2808         title = try_get(
2809             renderer,
2810             (lambda x: x['title']['runs'][0]['text'],
2811              lambda x: x['title']['simpleText']), compat_str)
2812         description = try_get(
2813             renderer, lambda x: x['descriptionSnippet']['runs'][0]['text'],
2814             compat_str)
2815         duration = parse_duration(try_get(
2816             renderer, lambda x: x['lengthText']['simpleText'], compat_str))
2817         view_count_text = try_get(
2818             renderer, lambda x: x['viewCountText']['simpleText'], compat_str) or ''
2819         view_count = str_to_int(self._search_regex(
2820             r'^([\d,]+)', re.sub(r'\s', '', view_count_text),
2821             'view count', default=None))
2822         uploader = try_get(
2823             renderer, lambda x: x['ownerText']['runs'][0]['text'], compat_str)
2824         return {
2825             '_type': 'url_transparent',
2826             'ie_key': YoutubeIE.ie_key(),
2827             'id': video_id,
2828             'url': video_id,
2829             'title': title,
2830             'description': description,
2831             'duration': duration,
2832             'view_count': view_count,
2833             'uploader': uploader,
2834         }
2835
2836     def _grid_entries(self, grid_renderer):
2837         for item in grid_renderer['items']:
2838             if not isinstance(item, dict):
2839                 continue
2840             renderer = self._extract_grid_item_renderer(item)
2841             if not isinstance(renderer, dict):
2842                 continue
2843             title = try_get(
2844                 renderer, lambda x: x['title']['runs'][0]['text'], compat_str)
2845             # playlist
2846             playlist_id = renderer.get('playlistId')
2847             if playlist_id:
2848                 yield self.url_result(
2849                     'https://www.youtube.com/playlist?list=%s' % playlist_id,
2850                     ie=YoutubeTabIE.ie_key(), video_id=playlist_id,
2851                     video_title=title)
2852             # video
2853             video_id = renderer.get('videoId')
2854             if video_id:
2855                 yield self._extract_video(renderer)
2856             # channel
2857             channel_id = renderer.get('channelId')
2858             if channel_id:
2859                 title = try_get(
2860                     renderer, lambda x: x['title']['simpleText'], compat_str)
2861                 yield self.url_result(
2862                     'https://www.youtube.com/channel/%s' % channel_id,
2863                     ie=YoutubeTabIE.ie_key(), video_title=title)
2864
2865     def _shelf_entries_trimmed(self, shelf_renderer):
2866         renderer = try_get(
2867             shelf_renderer, lambda x: x['content']['horizontalListRenderer'], dict)
2868         if not renderer:
2869             return
2870         # TODO: add support for nested playlists so each shelf is processed
2871         # as separate playlist
2872         # TODO: this includes only first N items
2873         for entry in self._grid_entries(renderer):
2874             yield entry
2875
2876     def _shelf_entries(self, shelf_renderer):
2877         ep = try_get(
2878             shelf_renderer, lambda x: x['endpoint']['commandMetadata']['webCommandMetadata']['url'],
2879             compat_str)
2880         shelf_url = urljoin('https://www.youtube.com', ep)
2881         if not shelf_url:
2882             return
2883         title = try_get(
2884             shelf_renderer, lambda x: x['title']['runs'][0]['text'], compat_str)
2885         yield self.url_result(shelf_url, video_title=title)
2886
2887     def _playlist_entries(self, video_list_renderer):
2888         for content in video_list_renderer['contents']:
2889             if not isinstance(content, dict):
2890                 continue
2891             renderer = content.get('playlistVideoRenderer') or content.get('playlistPanelVideoRenderer')
2892             if not isinstance(renderer, dict):
2893                 continue
2894             video_id = renderer.get('videoId')
2895             if not video_id:
2896                 continue
2897             yield self._extract_video(renderer)
2898
2899     def _itemSection_entries(self, item_sect_renderer):
2900         for content in item_sect_renderer['contents']:
2901             if not isinstance(content, dict):
2902                 continue
2903             renderer = content.get('videoRenderer', {})
2904             if not isinstance(renderer, dict):
2905                 continue
2906             video_id = renderer.get('videoId')
2907             if not video_id:
2908                 continue
2909             yield self._extract_video(renderer)
2910
2911     def _rich_entries(self, rich_grid_renderer):
2912         renderer = try_get(
2913             rich_grid_renderer, lambda x: x['content']['videoRenderer'], dict)
2914         video_id = renderer.get('videoId')
2915         if not video_id:
2916             return
2917         yield self._extract_video(renderer)
2918
2919     def _video_entry(self, video_renderer):
2920         video_id = video_renderer.get('videoId')
2921         if video_id:
2922             return self._extract_video(video_renderer)
2923
2924     def _post_thread_entries(self, post_thread_renderer):
2925         post_renderer = try_get(
2926             post_thread_renderer, lambda x: x['post']['backstagePostRenderer'], dict)
2927         if not post_renderer:
2928             return
2929         # video attachment
2930         video_renderer = try_get(
2931             post_renderer, lambda x: x['backstageAttachment']['videoRenderer'], dict)
2932         video_id = None
2933         if video_renderer:
2934             entry = self._video_entry(video_renderer)
2935             if entry:
2936                 yield entry
2937         # inline video links
2938         runs = try_get(post_renderer, lambda x: x['contentText']['runs'], list) or []
2939         for run in runs:
2940             if not isinstance(run, dict):
2941                 continue
2942             ep_url = try_get(
2943                 run, lambda x: x['navigationEndpoint']['urlEndpoint']['url'], compat_str)
2944             if not ep_url:
2945                 continue
2946             if not YoutubeIE.suitable(ep_url):
2947                 continue
2948             ep_video_id = YoutubeIE._match_id(ep_url)
2949             if video_id == ep_video_id:
2950                 continue
2951             yield self.url_result(ep_url, ie=YoutubeIE.ie_key(), video_id=video_id)
2952
2953     def _post_thread_continuation_entries(self, post_thread_continuation):
2954         contents = post_thread_continuation.get('contents')
2955         if not isinstance(contents, list):
2956             return
2957         for content in contents:
2958             renderer = content.get('backstagePostThreadRenderer')
2959             if not isinstance(renderer, dict):
2960                 continue
2961             for entry in self._post_thread_entries(renderer):
2962                 yield entry
2963
2964     @staticmethod
2965     def _extract_next_continuation_data(renderer):
2966         next_continuation = try_get(
2967             renderer, lambda x: x['continuations'][0]['nextContinuationData'], dict)
2968         if not next_continuation:
2969             return
2970         continuation = next_continuation.get('continuation')
2971         if not continuation:
2972             return
2973         ctp = next_continuation.get('clickTrackingParams')
2974         return {
2975             'ctoken': continuation,
2976             'continuation': continuation,
2977             'itct': ctp,
2978         }
2979
2980     @classmethod
2981     def _extract_continuation(cls, renderer):
2982         next_continuation = cls._extract_next_continuation_data(renderer)
2983         if next_continuation:
2984             return next_continuation
2985         contents = renderer.get('contents')
2986         if not isinstance(contents, list):
2987             return
2988         for content in contents:
2989             if not isinstance(content, dict):
2990                 continue
2991             continuation_ep = try_get(
2992                 content, lambda x: x['continuationItemRenderer']['continuationEndpoint'],
2993                 dict)
2994             if not continuation_ep:
2995                 continue
2996             continuation = try_get(
2997                 continuation_ep, lambda x: x['continuationCommand']['token'], compat_str)
2998             if not continuation:
2999                 continue
3000             ctp = continuation_ep.get('clickTrackingParams')
3001             if not ctp:
3002                 continue
3003             return {
3004                 'ctoken': continuation,
3005                 'continuation': continuation,
3006                 'itct': ctp,
3007             }
3008
3009     def _entries(self, tab, identity_token):
3010
3011         def extract_entries(parent_renderer):
3012             slr_contents = try_get(parent_renderer, lambda x: x['contents'], list) or []
3013             for slr_content in slr_contents:
3014                 if not isinstance(slr_content, dict):
3015                     continue
3016                 is_renderer = try_get(slr_content, lambda x: x['itemSectionRenderer'], dict)
3017                 if not is_renderer:
3018                     renderer = slr_content.get('richItemRenderer')
3019                     if renderer:
3020                         for entry in self._rich_entries(renderer):
3021                             yield entry
3022                         continuation_list[0] = self._extract_continuation(parent_renderer)
3023                     continue
3024                 isr_contents = try_get(is_renderer, lambda x: x['contents'], list) or []
3025                 for isr_content in isr_contents:
3026                     if not isinstance(isr_content, dict):
3027                         continue
3028                     renderer = isr_content.get('playlistVideoListRenderer')
3029                     if renderer:
3030                         for entry in self._playlist_entries(renderer):
3031                             yield entry
3032                         continuation_list[0] = self._extract_continuation(renderer)
3033                         continue
3034                     renderer = isr_content.get('gridRenderer')
3035                     if renderer:
3036                         for entry in self._grid_entries(renderer):
3037                             yield entry
3038                         continuation_list[0] = self._extract_continuation(renderer)
3039                         continue
3040                     renderer = isr_content.get('shelfRenderer')
3041                     if renderer:
3042                         for entry in self._shelf_entries(renderer):
3043                             yield entry
3044                         continuation_list[0] = self._extract_continuation(parent_renderer)
3045                         continue
3046                     renderer = isr_content.get('backstagePostThreadRenderer')
3047                     if renderer:
3048                         for entry in self._post_thread_entries(renderer):
3049                             yield entry
3050                         continuation_list[0] = self._extract_continuation(renderer)
3051                         continue
3052                     renderer = isr_content.get('videoRenderer')
3053                     if renderer:
3054                         entry = self._video_entry(renderer)
3055                         if entry:
3056                             yield entry
3057                 if not continuation_list[0]:
3058                     continuation_list[0] = self._extract_continuation(is_renderer)
3059                 if not continuation_list[0]:
3060                     continuation_list[0] = self._extract_continuation(parent_renderer)
3061
3062         continuation_list = [None]  # Python 2 doesnot support nonlocal
3063         parent_renderer = (
3064             try_get(tab, lambda x: x['sectionListRenderer'], dict)
3065             or try_get(tab, lambda x: x['richGridRenderer'], dict) or {})
3066         if parent_renderer:
3067             for entry in extract_entries(parent_renderer):
3068                 yield entry
3069         continuation = continuation_list[0]
3070
3071         headers = {
3072             'x-youtube-client-name': '1',
3073             'x-youtube-client-version': '2.20201112.04.01',
3074         }
3075         if identity_token:
3076             headers['x-youtube-identity-token'] = identity_token
3077
3078         for page_num in itertools.count(1):
3079             if not continuation:
3080                 break
3081             if hasattr(self, '_MAX_PAGES') and page_num > self._MAX_PAGES:
3082                 break
3083             browse = self._download_json(
3084                 'https://www.youtube.com/browse_ajax', None,
3085                 'Downloading page %d' % page_num,
3086                 headers=headers, query=continuation, fatal=False)
3087             if not browse:
3088                 break
3089             response = try_get(browse, lambda x: x[1]['response'], dict)
3090             if not response:
3091                 break
3092
3093             continuation_contents = try_get(
3094                 response, lambda x: x['continuationContents'], dict)
3095             if continuation_contents:
3096                 continuation_renderer = continuation_contents.get('playlistVideoListContinuation')
3097                 if continuation_renderer:
3098                     for entry in self._playlist_entries(continuation_renderer):
3099                         yield entry
3100                     continuation = self._extract_continuation(continuation_renderer)
3101                     continue
3102                 continuation_renderer = continuation_contents.get('gridContinuation')
3103                 if continuation_renderer:
3104                     for entry in self._grid_entries(continuation_renderer):
3105                         yield entry
3106                     continuation = self._extract_continuation(continuation_renderer)
3107                     continue
3108                 continuation_renderer = continuation_contents.get('itemSectionContinuation')
3109                 if continuation_renderer:
3110                     for entry in self._post_thread_continuation_entries(continuation_renderer):
3111                         yield entry
3112                     continuation = self._extract_continuation(continuation_renderer)
3113                     continue
3114                 continuation_renderer = continuation_contents.get('sectionListContinuation')
3115                 if continuation_renderer:
3116                     continuation_list = [None]
3117                     for entry in extract_entries(continuation_renderer):
3118                         yield entry
3119                     continuation = continuation_list[0]
3120                     continue
3121
3122             continuation_items = try_get(
3123                 response, lambda x: x['onResponseReceivedActions'][0]['appendContinuationItemsAction']['continuationItems'], list)
3124             if continuation_items:
3125                 continuation_item = continuation_items[0]
3126                 if not isinstance(continuation_item, dict):
3127                     continue
3128                 renderer = continuation_item.get('playlistVideoRenderer')
3129                 if renderer:
3130                     video_list_renderer = {'contents': continuation_items}
3131                     for entry in self._playlist_entries(video_list_renderer):
3132                         yield entry
3133                     continuation = self._extract_continuation(video_list_renderer)
3134                     continue
3135                 renderer = continuation_item.get('itemSectionRenderer')
3136                 if renderer:
3137                     for entry in self._itemSection_entries(renderer):
3138                         yield entry
3139                     continuation = self._extract_continuation({'contents': continuation_items})
3140                     continue
3141             break
3142
3143     @staticmethod
3144     def _extract_selected_tab(tabs):
3145         for tab in tabs:
3146             if try_get(tab, lambda x: x['tabRenderer']['selected'], bool):
3147                 return tab['tabRenderer']
3148         else:
3149             raise ExtractorError('Unable to find selected tab')
3150
3151     @staticmethod
3152     def _extract_uploader(data):
3153         uploader = {}
3154         sidebar_renderer = try_get(
3155             data, lambda x: x['sidebar']['playlistSidebarRenderer']['items'], list)
3156         if sidebar_renderer:
3157             for item in sidebar_renderer:
3158                 if not isinstance(item, dict):
3159                     continue
3160                 renderer = item.get('playlistSidebarSecondaryInfoRenderer')
3161                 if not isinstance(renderer, dict):
3162                     continue
3163                 owner = try_get(
3164                     renderer, lambda x: x['videoOwner']['videoOwnerRenderer']['title']['runs'][0], dict)
3165                 if owner:
3166                     uploader['uploader'] = owner.get('text')
3167                     uploader['uploader_id'] = try_get(
3168                         owner, lambda x: x['navigationEndpoint']['browseEndpoint']['browseId'], compat_str)
3169                     uploader['uploader_url'] = urljoin(
3170                         'https://www.youtube.com/',
3171                         try_get(owner, lambda x: x['navigationEndpoint']['browseEndpoint']['canonicalBaseUrl'], compat_str))
3172         return uploader
3173
3174     def _extract_from_tabs(self, item_id, webpage, data, tabs, identity_token):
3175         selected_tab = self._extract_selected_tab(tabs)
3176         renderer = try_get(
3177             data, lambda x: x['metadata']['channelMetadataRenderer'], dict)
3178         playlist_id = None
3179         if renderer:
3180             channel_title = renderer.get('title') or item_id
3181             tab_title = selected_tab.get('title')
3182             title = channel_title or item_id
3183             if tab_title:
3184                 title += ' - %s' % tab_title
3185             description = renderer.get('description')
3186             playlist_id = renderer.get('externalId')
3187         renderer = try_get(
3188             data, lambda x: x['metadata']['playlistMetadataRenderer'], dict)
3189         if renderer:
3190             title = renderer.get('title')
3191             description = None
3192             playlist_id = item_id
3193         if playlist_id is None:
3194             return None
3195         playlist = self.playlist_result(
3196             self._entries(selected_tab['content'], identity_token),
3197             playlist_id=playlist_id, playlist_title=title,
3198             playlist_description=description)
3199         playlist.update(self._extract_uploader(data))
3200         return playlist
3201
3202     def _extract_from_playlist(self, item_id, data, playlist):
3203         title = playlist.get('title') or try_get(
3204             data, lambda x: x['titleText']['simpleText'], compat_str)
3205         playlist_id = playlist.get('playlistId') or item_id
3206         return self.playlist_result(
3207             self._playlist_entries(playlist), playlist_id=playlist_id,
3208             playlist_title=title)
3209
3210     def _real_extract(self, url):
3211         item_id = self._match_id(url)
3212         url = compat_urlparse.urlunparse(
3213             compat_urlparse.urlparse(url)._replace(netloc='www.youtube.com'))
3214         is_home = re.match(r'(?P<pre>%s)(?P<post>/?(?![^#?]).*$)' % self._VALID_URL, url)
3215         if is_home is not None and is_home.group('not_channel') is None:
3216             self._downloader.report_warning(
3217                 'A channel/user page was given. All the channel\'s videos will be downloaded. '
3218                 'To download only the videos in the home page, add a "/home" to the URL')
3219             url = '%s/videos%s' % (is_home.group('pre'), is_home.group('post') or '')
3220
3221         # Handle both video/playlist URLs
3222         qs = compat_urlparse.parse_qs(compat_urlparse.urlparse(url).query)
3223         video_id = qs.get('v', [None])[0]
3224         playlist_id = qs.get('list', [None])[0]
3225         if video_id and playlist_id:
3226             if self._downloader.params.get('noplaylist'):
3227                 self.to_screen('Downloading just video %s because of --no-playlist' % video_id)
3228                 return self.url_result(video_id, ie=YoutubeIE.ie_key(), video_id=video_id)
3229             self.to_screen('Downloading playlist %s - add --no-playlist to just download video %s' % (playlist_id, video_id))
3230         webpage = self._download_webpage(url, item_id)
3231         identity_token = self._search_regex(
3232             r'\bID_TOKEN["\']\s*:\s*["\'](.+?)["\']', webpage,
3233             'identity token', default=None)
3234         data = self._extract_yt_initial_data(item_id, webpage)
3235         tabs = try_get(
3236             data, lambda x: x['contents']['twoColumnBrowseResultsRenderer']['tabs'], list)
3237         if tabs:
3238             return self._extract_from_tabs(item_id, webpage, data, tabs, identity_token)
3239         playlist = try_get(
3240             data, lambda x: x['contents']['twoColumnWatchNextResults']['playlist']['playlist'], dict)
3241         if playlist:
3242             return self._extract_from_playlist(item_id, data, playlist)
3243         # Fallback to video extraction if no playlist alike page is recognized.
3244         # First check for the current video then try the v attribute of URL query.
3245         video_id = try_get(
3246             data, lambda x: x['currentVideoEndpoint']['watchEndpoint']['videoId'],
3247             compat_str) or video_id
3248         if video_id:
3249             return self.url_result(video_id, ie=YoutubeIE.ie_key(), video_id=video_id)
3250         # Failed to recognize
3251         raise ExtractorError('Unable to recognize tab page')
3252
3253
3254 class YoutubePlaylistIE(InfoExtractor):
3255     IE_DESC = 'YouTube.com playlists'
3256     _VALID_URL = r'''(?x)(?:
3257                         (?:https?://)?
3258                         (?:\w+\.)?
3259                         (?:
3260                             (?:
3261                                 youtube(?:kids)?\.com|
3262                                 invidio\.us|
3263                                 youtu\.be
3264                             )
3265                             /.*?\?.*?\blist=
3266                         )?
3267                         (?P<id>%(playlist_id)s)
3268                      )''' % {'playlist_id': YoutubeBaseInfoExtractor._PLAYLIST_ID_RE}
3269     IE_NAME = 'youtube:playlist'
3270     _TESTS = [{
3271         'note': 'issue #673',
3272         'url': 'PLBB231211A4F62143',
3273         'info_dict': {
3274             'title': '[OLD]Team Fortress 2 (Class-based LP)',
3275             'id': 'PLBB231211A4F62143',
3276             'uploader': 'Wickydoo',
3277             'uploader_id': 'UCKSpbfbl5kRQpTdL7kMc-1Q',
3278         },
3279         'playlist_mincount': 29,
3280     }, {
3281         'url': 'PLtPgu7CB4gbY9oDN3drwC3cMbJggS7dKl',
3282         'info_dict': {
3283             'title': 'YDL_safe_search',
3284             'id': 'PLtPgu7CB4gbY9oDN3drwC3cMbJggS7dKl',
3285         },
3286         'playlist_count': 2,
3287         'skip': 'This playlist is private',
3288     }, {
3289         'note': 'embedded',
3290         'url': 'https://www.youtube.com/embed/videoseries?list=PL6IaIsEjSbf96XFRuNccS_RuEXwNdsoEu',
3291         'playlist_count': 4,
3292         'info_dict': {
3293             'title': 'JODA15',
3294             'id': 'PL6IaIsEjSbf96XFRuNccS_RuEXwNdsoEu',
3295             'uploader': 'milan',
3296             'uploader_id': 'UCEI1-PVPcYXjB73Hfelbmaw',
3297         }
3298     }, {
3299         'url': 'http://www.youtube.com/embed/_xDOZElKyNU?list=PLsyOSbh5bs16vubvKePAQ1x3PhKavfBIl',
3300         'playlist_mincount': 982,
3301         'info_dict': {
3302             'title': '2018 Chinese New Singles (11/6 updated)',
3303             'id': 'PLsyOSbh5bs16vubvKePAQ1x3PhKavfBIl',
3304             'uploader': 'LBK',
3305             'uploader_id': 'UC21nz3_MesPLqtDqwdvnoxA',
3306         }
3307     }, {
3308         'url': 'https://youtu.be/yeWKywCrFtk?list=PL2qgrgXsNUG5ig9cat4ohreBjYLAPC0J5',
3309         'info_dict': {
3310             'id': 'yeWKywCrFtk',
3311             'ext': 'mp4',
3312             'title': 'Small Scale Baler and Braiding Rugs',
3313             'uploader': 'Backus-Page House Museum',
3314             'uploader_id': 'backuspagemuseum',
3315             'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/backuspagemuseum',
3316             'upload_date': '20161008',
3317             'description': 'md5:800c0c78d5eb128500bffd4f0b4f2e8a',
3318             'categories': ['Nonprofits & Activism'],
3319             'tags': list,
3320             'like_count': int,
3321             'dislike_count': int,
3322         },
3323         'params': {
3324             'noplaylist': True,
3325             'skip_download': True,
3326         },
3327     }, {
3328         'url': 'https://youtu.be/uWyaPkt-VOI?list=PL9D9FC436B881BA21',
3329         'only_matching': True,
3330     }, {
3331         'url': 'TLGGrESM50VT6acwMjAyMjAxNw',
3332         'only_matching': True,
3333     }, {
3334         # music album playlist
3335         'url': 'OLAK5uy_m4xAFdmMC5rX3Ji3g93pQe3hqLZw_9LhM',
3336         'only_matching': True,
3337     }]
3338
3339     @classmethod
3340     def suitable(cls, url):
3341         return False if YoutubeTabIE.suitable(url) else super(
3342             YoutubePlaylistIE, cls).suitable(url)
3343
3344     def _real_extract(self, url):
3345         playlist_id = self._match_id(url)
3346         qs = compat_urlparse.parse_qs(compat_urlparse.urlparse(url).query)
3347         if not qs:
3348             qs = {'list': playlist_id}
3349         return self.url_result(
3350             update_url_query('https://www.youtube.com/playlist', qs),
3351             ie=YoutubeTabIE.ie_key(), video_id=playlist_id)
3352
3353
3354 class YoutubeYtUserIE(InfoExtractor):
3355     _VALID_URL = r'ytuser:(?P<id>.+)'
3356     _TESTS = [{
3357         'url': 'ytuser:phihag',
3358         'only_matching': True,
3359     }]
3360
3361     def _real_extract(self, url):
3362         user_id = self._match_id(url)
3363         return self.url_result(
3364             'https://www.youtube.com/user/%s' % user_id,
3365             ie=YoutubeTabIE.ie_key(), video_id=user_id)
3366
3367
3368 class YoutubeSearchIE(SearchInfoExtractor, YoutubeBaseInfoExtractor):
3369     IE_DESC = 'YouTube.com searches'
3370     # there doesn't appear to be a real limit, for example if you search for
3371     # 'python' you get more than 8.000.000 results
3372     _MAX_RESULTS = float('inf')
3373     IE_NAME = 'youtube:search'
3374     _SEARCH_KEY = 'ytsearch'
3375     _SEARCH_PARAMS = None
3376     _TESTS = []
3377
3378     def _entries(self, query, n):
3379         data = {
3380             'context': {
3381                 'client': {
3382                     'clientName': 'WEB',
3383                     'clientVersion': '2.20201021.03.00',
3384                 }
3385             },
3386             'query': query,
3387         }
3388         if self._SEARCH_PARAMS:
3389             data['params'] = self._SEARCH_PARAMS
3390         total = 0
3391         for page_num in itertools.count(1):
3392             search = self._download_json(
3393                 'https://www.youtube.com/youtubei/v1/search?key=AIzaSyAO_FJ2SlqU8Q4STEHLGCilw_Y9_11qcW8',
3394                 video_id='query "%s"' % query,
3395                 note='Downloading page %s' % page_num,
3396                 errnote='Unable to download API page', fatal=False,
3397                 data=json.dumps(data).encode('utf8'),
3398                 headers={'content-type': 'application/json'})
3399             if not search:
3400                 break
3401             slr_contents = try_get(
3402                 search,
3403                 (lambda x: x['contents']['twoColumnSearchResultsRenderer']['primaryContents']['sectionListRenderer']['contents'],
3404                  lambda x: x['onResponseReceivedCommands'][0]['appendContinuationItemsAction']['continuationItems']),
3405                 list)
3406             if not slr_contents:
3407                 break
3408             isr_contents = try_get(
3409                 slr_contents,
3410                 lambda x: x[0]['itemSectionRenderer']['contents'],
3411                 list)
3412             if not isr_contents:
3413                 break
3414             for content in isr_contents:
3415                 if not isinstance(content, dict):
3416                     continue
3417                 video = content.get('videoRenderer')
3418                 if not isinstance(video, dict):
3419                     continue
3420                 video_id = video.get('videoId')
3421                 if not video_id:
3422                     continue
3423                 title = try_get(video, lambda x: x['title']['runs'][0]['text'], compat_str)
3424                 description = try_get(video, lambda x: x['descriptionSnippet']['runs'][0]['text'], compat_str)
3425                 duration = parse_duration(try_get(video, lambda x: x['lengthText']['simpleText'], compat_str))
3426                 view_count_text = try_get(video, lambda x: x['viewCountText']['simpleText'], compat_str) or ''
3427                 view_count = int_or_none(self._search_regex(
3428                     r'^(\d+)', re.sub(r'\s', '', view_count_text),
3429                     'view count', default=None))
3430                 uploader = try_get(video, lambda x: x['ownerText']['runs'][0]['text'], compat_str)
3431                 total += 1
3432                 yield {
3433                     '_type': 'url_transparent',
3434                     'ie_key': YoutubeIE.ie_key(),
3435                     'id': video_id,
3436                     'url': video_id,
3437                     'title': title,
3438                     'description': description,
3439                     'duration': duration,
3440                     'view_count': view_count,
3441                     'uploader': uploader,
3442                 }
3443                 if total == n:
3444                     return
3445             token = try_get(
3446                 slr_contents,
3447                 lambda x: x[1]['continuationItemRenderer']['continuationEndpoint']['continuationCommand']['token'],
3448                 compat_str)
3449             if not token:
3450                 break
3451             data['continuation'] = token
3452
3453     def _get_n_results(self, query, n):
3454         """Get a specified number of results for a query"""
3455         return self.playlist_result(self._entries(query, n), query)
3456
3457
3458 class YoutubeSearchDateIE(YoutubeSearchIE):
3459     IE_NAME = YoutubeSearchIE.IE_NAME + ':date'
3460     _SEARCH_KEY = 'ytsearchdate'
3461     IE_DESC = 'YouTube.com searches, newest videos first'
3462     _SEARCH_PARAMS = 'CAI%3D'
3463
3464
3465 class YoutubeSearchURLIE(YoutubeSearchIE):
3466     IE_DESC = 'YouTube.com search URLs'
3467     IE_NAME = YoutubeSearchIE.IE_NAME + '_url'
3468     _VALID_URL = r'https?://(?:www\.)?youtube\.com/results\?(.*?&)?(?:search_query|q)=(?:[^&]+)(?:[&]|$)'
3469     # _MAX_RESULTS = 100
3470     _TESTS = [{
3471         'url': 'https://www.youtube.com/results?baz=bar&search_query=youtube-dl+test+video&filters=video&lclk=video',
3472         'playlist_mincount': 5,
3473         'info_dict': {
3474             'title': 'youtube-dl test video',
3475         }
3476     }, {
3477         'url': 'https://www.youtube.com/results?q=test&sp=EgQIBBgB',
3478         'only_matching': True,
3479     }]
3480
3481     @classmethod
3482     def _make_valid_url(cls):
3483         return cls._VALID_URL
3484
3485     def _real_extract(self, url):
3486         qs = compat_parse_qs(compat_urllib_parse_urlparse(url).query)
3487         query = (qs.get('search_query') or qs.get('q'))[0]
3488         self._SEARCH_PARAMS = qs.get('sp', ('',))[0]
3489         return self._get_n_results(query, self._MAX_RESULTS)
3490
3491
3492 class YoutubeFeedsInfoExtractor(YoutubeTabIE):
3493     """
3494     Base class for feed extractors
3495     Subclasses must define the _FEED_NAME and _PLAYLIST_TITLE properties.
3496     """
3497     _LOGIN_REQUIRED = True
3498     # _MAX_PAGES = 5
3499     _TESTS = []
3500
3501     @property
3502     def IE_NAME(self):
3503         return 'youtube:%s' % self._FEED_NAME
3504
3505     def _real_initialize(self):
3506         self._login()
3507
3508     def _shelf_entries(self, shelf_renderer):
3509         renderer = try_get(shelf_renderer, lambda x: x['content']['gridRenderer'], dict)
3510         if not renderer:
3511             return
3512         for entry in self._grid_entries(renderer):
3513             yield entry
3514
3515     def _extract_from_tabs(self, item_id, webpage, data, tabs, identity_token):
3516         selected_tab = self._extract_selected_tab(tabs)
3517         return self.playlist_result(
3518             self._entries(selected_tab['content'], identity_token),
3519             playlist_title=self._PLAYLIST_TITLE)
3520
3521     def _real_extract(self, url):
3522         item_id = self._FEED_NAME
3523         url = 'https://www.youtube.com/feed/%s' % self._FEED_NAME
3524         webpage = self._download_webpage(url, item_id)
3525         identity_token = self._search_regex(
3526             r'\bID_TOKEN["\']\s*:\s*["\'](.+?)["\']', webpage,
3527             'identity token', default=None)
3528         data = self._extract_yt_initial_data(item_id, webpage)
3529         tabs = try_get(
3530             data, lambda x: x['contents']['twoColumnBrowseResultsRenderer']['tabs'], list)
3531         if tabs:
3532             return self._extract_from_tabs(item_id, webpage, data, tabs, identity_token)
3533         # Failed to recognize
3534         raise ExtractorError('Unable to recognize feed page')
3535
3536
3537 class YoutubeWatchLaterIE(InfoExtractor):
3538     IE_NAME = 'youtube:watchlater'
3539     IE_DESC = 'Youtube watch later list, ":ytwatchlater" or "WL" for short (requires authentication)'
3540     _VALID_URL = r'https?://(?:www\.)?youtube\.com/feed/watch_later|:ytwatchlater|WL'
3541
3542     _TESTS = [{
3543         'url': 'https://www.youtube.com/feed/watch_later',
3544         'only_matching': True,
3545     }, {
3546         'url': ':ytwatchlater',
3547         'only_matching': True,
3548     }]
3549
3550     def _real_extract(self, url):
3551         return self.url_result(
3552             'https://www.youtube.com/playlist?list=WL', ie=YoutubeTabIE.ie_key())
3553
3554
3555 class YoutubeFavouritesIE(InfoExtractor):
3556     IE_NAME = 'youtube:favourites'
3557     IE_DESC = 'YouTube.com liked videos, ":ytfav" or "LL" for short (requires authentication)'
3558     _VALID_URL = r':ytfav(?:ou?rite)?s?|LL'
3559
3560     _TESTS = [{
3561         'url': ':ytfav',
3562         'only_matching': True,
3563     }]
3564
3565     def _real_extract(self, url):
3566         return self.url_result(
3567             'https://www.youtube.com/playlist?list=LL', ie=YoutubeTabIE.ie_key())
3568
3569
3570 class YoutubeRecommendedIE(YoutubeFeedsInfoExtractor):
3571     IE_DESC = 'YouTube.com recommended videos, ":ytrec" for short (requires authentication)'
3572     _VALID_URL = r'https?://(?:www\.)?youtube\.com(?:/feed/recommended|/?[?#]|/?$)|:ytrec(?:ommended)?'
3573     _FEED_NAME = 'recommended'
3574     _PLAYLIST_TITLE = 'Youtube Recommended videos'
3575
3576
3577 class YoutubeSubscriptionsIE(YoutubeFeedsInfoExtractor):
3578     IE_DESC = 'YouTube.com subscriptions feed, "ytsubs" keyword (requires authentication)'
3579     _VALID_URL = r'https?://(?:www\.)?youtube\.com/feed/subscriptions|:ytsub(?:scription)?s?'
3580     _FEED_NAME = 'subscriptions'
3581     _PLAYLIST_TITLE = 'Youtube Subscriptions'
3582
3583
3584 class YoutubeHistoryIE(YoutubeFeedsInfoExtractor):
3585     IE_DESC = 'Youtube watch history, ":ythistory" for short (requires authentication)'
3586     _VALID_URL = r'https?://(?:www\.)?youtube\.com/feed/history|:ythistory'
3587     _FEED_NAME = 'history'
3588     _PLAYLIST_TITLE = 'Youtube History'
3589
3590
3591 class YoutubeTruncatedURLIE(InfoExtractor):
3592     IE_NAME = 'youtube:truncated_url'
3593     IE_DESC = False  # Do not list
3594     _VALID_URL = r'''(?x)
3595         (?:https?://)?
3596         (?:\w+\.)?[yY][oO][uU][tT][uU][bB][eE](?:-nocookie)?\.com/
3597         (?:watch\?(?:
3598             feature=[a-z_]+|
3599             annotation_id=annotation_[^&]+|
3600             x-yt-cl=[0-9]+|
3601             hl=[^&]*|
3602             t=[0-9]+
3603         )?
3604         |
3605             attribution_link\?a=[^&]+
3606         )
3607         $
3608     '''
3609
3610     _TESTS = [{
3611         'url': 'https://www.youtube.com/watch?annotation_id=annotation_3951667041',
3612         'only_matching': True,
3613     }, {
3614         'url': 'https://www.youtube.com/watch?',
3615         'only_matching': True,
3616     }, {
3617         'url': 'https://www.youtube.com/watch?x-yt-cl=84503534',
3618         'only_matching': True,
3619     }, {
3620         'url': 'https://www.youtube.com/watch?feature=foo',
3621         'only_matching': True,
3622     }, {
3623         'url': 'https://www.youtube.com/watch?hl=en-GB',
3624         'only_matching': True,
3625     }, {
3626         'url': 'https://www.youtube.com/watch?t=2372',
3627         'only_matching': True,
3628     }]
3629
3630     def _real_extract(self, url):
3631         raise ExtractorError(
3632             'Did you forget to quote the URL? Remember that & is a meta '
3633             'character in most shells, so you want to put the URL in quotes, '
3634             'like  youtube-dl '
3635             '"https://www.youtube.com/watch?feature=foo&v=BaW_jenozKc" '
3636             ' or simply  youtube-dl BaW_jenozKc  .',
3637             expected=True)
3638
3639
3640 class YoutubeTruncatedIDIE(InfoExtractor):
3641     IE_NAME = 'youtube:truncated_id'
3642     IE_DESC = False  # Do not list
3643     _VALID_URL = r'https?://(?:www\.)?youtube\.com/watch\?v=(?P<id>[0-9A-Za-z_-]{1,10})$'
3644
3645     _TESTS = [{
3646         'url': 'https://www.youtube.com/watch?v=N_708QY7Ob',
3647         'only_matching': True,
3648     }]
3649
3650     def _real_extract(self, url):
3651         video_id = self._match_id(url)
3652         raise ExtractorError(
3653             'Incomplete YouTube ID %s. URL %s looks truncated.' % (video_id, url),
3654             expected=True)
3655
3656
3657 # Do Youtube show urls even exist anymore? I couldn't find any
3658 r'''
3659 class YoutubeShowIE(YoutubeTabIE):
3660     IE_DESC = 'YouTube.com (multi-season) shows'
3661     _VALID_URL = r'https?://(?:www\.)?youtube\.com/show/(?P<id>[^?#]*)'
3662     IE_NAME = 'youtube:show'
3663     _TESTS = [{
3664         'url': 'https://www.youtube.com/show/airdisasters',
3665         'playlist_mincount': 5,
3666         'info_dict': {
3667             'id': 'airdisasters',
3668             'title': 'Air Disasters',
3669         }
3670     }]
3671
3672     def _real_extract(self, url):
3673         playlist_id = self._match_id(url)
3674         return super(YoutubeShowIE, self)._real_extract(
3675             'https://www.youtube.com/show/%s/playlists' % playlist_id)
3676 '''