youtube_dlc/extractor/youtube.py

   1 # coding: utf-8
   2
   3 from __future__ import unicode_literals
   4
   5
   6 import itertools
   7 import json
   8 import os.path
   9 import random
  10 import re
  11 import time
  12 import traceback
  13
  14 from .common import InfoExtractor, SearchInfoExtractor
  15 from ..jsinterp import JSInterpreter
  16 from ..swfinterp import SWFInterpreter
  17 from ..compat import (
  18     compat_chr,
  19     compat_kwargs,
  20     compat_parse_qs,
  21     compat_urllib_parse_unquote,
  22     compat_urllib_parse_unquote_plus,
  23     compat_urllib_parse_urlencode,
  24     compat_urllib_parse_urlparse,
  25     compat_urlparse,
  26     compat_str,
  27 )
  28 from ..utils import (
  29     bool_or_none,
  30     clean_html,
  31     error_to_compat_str,
  32     ExtractorError,
  33     float_or_none,
  34     get_element_by_id,
  35     int_or_none,
  36     mimetype2ext,
  37     parse_codecs,
  38     parse_count,
  39     parse_duration,
  40     remove_quotes,
  41     remove_start,
  42     smuggle_url,
  43     str_or_none,
  44     str_to_int,
  45     try_get,
  46     unescapeHTML,
  47     unified_strdate,
  48     unsmuggle_url,
  49     update_url_query,
  50     uppercase_escape,
  51     url_or_none,
  52     urlencode_postdata,
  53     urljoin,
  54 )
  55
  56
  57 class YoutubeBaseInfoExtractor(InfoExtractor):
  58     """Provide base functions for Youtube extractors"""
  59     _LOGIN_URL = 'https://accounts.google.com/ServiceLogin'
  60     _TWOFACTOR_URL = 'https://accounts.google.com/signin/challenge'
  61
  62     _LOOKUP_URL = 'https://accounts.google.com/_/signin/sl/lookup'
  63     _CHALLENGE_URL = 'https://accounts.google.com/_/signin/sl/challenge'
  64     _TFA_URL = 'https://accounts.google.com/_/signin/challenge?hl=en&TL={0}'
  65
  66     _RESERVED_NAMES = (
  67         r'course|embed|channel|c|user|playlist|watch|w|results|storefront|oops|'
  68         r'shared|index|account|reporthistory|t/terms|about|upload|signin|logout|'
  69         r'feed/(watch_later|history|subscriptions|library|trending|recommended)')
  70
  71     _NETRC_MACHINE = 'youtube'
  72     # If True it will raise an error if no login info is provided
  73     _LOGIN_REQUIRED = False
  74
  75     _PLAYLIST_ID_RE = r'(?:(?:PL|LL|EC|UU|FL|RD|UL|TL|PU|OLAK5uy_)[0-9A-Za-z-_]{10,}|RDMM|WL|LL|LM)'
  76
  77     _YOUTUBE_CLIENT_HEADERS = {
  78         'x-youtube-client-name': '1',
  79         'x-youtube-client-version': '1.20200609.04.02',
  80     }
  81
  82     def _set_language(self):
  83         self._set_cookie(
  84             '.youtube.com', 'PREF', 'f1=50000000&f6=8&hl=en',
  85             # YouTube sets the expire time to about two months
  86             expire_time=time.time() + 2 * 30 * 24 * 3600)
  87
  88     def _ids_to_results(self, ids):
  89         return [
  90             self.url_result(vid_id, 'Youtube', video_id=vid_id)
  91             for vid_id in ids]
  92
  93     def _login(self):
  94         """
  95         Attempt to log in to YouTube.
  96         True is returned if successful or skipped.
  97         False is returned if login failed.
  98
  99         If _LOGIN_REQUIRED is set and no authentication was provided, an error is raised.
 100         """
 101         username, password = self._get_login_info()
 102         # No authentication to be performed
 103         if username is None:
 104             if self._LOGIN_REQUIRED and self._downloader.params.get('cookiefile') is None:
 105                 raise ExtractorError('No login info available, needed for using %s.' % self.IE_NAME, expected=True)
 106             if self._downloader.params.get('cookiefile') and False:  # TODO remove 'and False' later - too many people using outdated cookies and open issues, remind them.
 107                 self.to_screen('[Cookies] Reminder - Make sure to always use up to date cookies!')
 108             return True
 109
 110         login_page = self._download_webpage(
 111             self._LOGIN_URL, None,
 112             note='Downloading login page',
 113             errnote='unable to fetch login page', fatal=False)
 114         if login_page is False:
 115             return
 116
 117         login_form = self._hidden_inputs(login_page)
 118
 119         def req(url, f_req, note, errnote):
 120             data = login_form.copy()
 121             data.update({
 122                 'pstMsg': 1,
 123                 'checkConnection': 'youtube',
 124                 'checkedDomains': 'youtube',
 125                 'hl': 'en',
 126                 'deviceinfo': '[null,null,null,[],null,"US",null,null,[],"GlifWebSignIn",null,[null,null,[]]]',
 127                 'f.req': json.dumps(f_req),
 128                 'flowName': 'GlifWebSignIn',
 129                 'flowEntry': 'ServiceLogin',
 130                 # TODO: reverse actual botguard identifier generation algo
 131                 'bgRequest': '["identifier",""]',
 132             })
 133             return self._download_json(
 134                 url, None, note=note, errnote=errnote,
 135                 transform_source=lambda s: re.sub(r'^[^[]*', '', s),
 136                 fatal=False,
 137                 data=urlencode_postdata(data), headers={
 138                     'Content-Type': 'application/x-www-form-urlencoded;charset=utf-8',
 139                     'Google-Accounts-XSRF': 1,
 140                 })
 141
 142         def warn(message):
 143             self._downloader.report_warning(message)
 144
 145         lookup_req = [
 146             username,
 147             None, [], None, 'US', None, None, 2, False, True,
 148             [
 149                 None, None,
 150                 [2, 1, None, 1,
 151                  'https://accounts.google.com/ServiceLogin?passive=true&continue=https%3A%2F%2Fwww.youtube.com%2Fsignin%3Fnext%3D%252F%26action_handle_signin%3Dtrue%26hl%3Den%26app%3Ddesktop%26feature%3Dsign_in_button&hl=en&service=youtube&uilel=3&requestPath=%2FServiceLogin&Page=PasswordSeparationSignIn',
 152                  None, [], 4],
 153                 1, [None, None, []], None, None, None, True
 154             ],
 155             username,
 156         ]
 157
 158         lookup_results = req(
 159             self._LOOKUP_URL, lookup_req,
 160             'Looking up account info', 'Unable to look up account info')
 161
 162         if lookup_results is False:
 163             return False
 164
 165         user_hash = try_get(lookup_results, lambda x: x[0][2], compat_str)
 166         if not user_hash:
 167             warn('Unable to extract user hash')
 168             return False
 169
 170         challenge_req = [
 171             user_hash,
 172             None, 1, None, [1, None, None, None, [password, None, True]],
 173             [
 174                 None, None, [2, 1, None, 1, 'https://accounts.google.com/ServiceLogin?passive=true&continue=https%3A%2F%2Fwww.youtube.com%2Fsignin%3Fnext%3D%252F%26action_handle_signin%3Dtrue%26hl%3Den%26app%3Ddesktop%26feature%3Dsign_in_button&hl=en&service=youtube&uilel=3&requestPath=%2FServiceLogin&Page=PasswordSeparationSignIn', None, [], 4],
 175                 1, [None, None, []], None, None, None, True
 176             ]]
 177
 178         challenge_results = req(
 179             self._CHALLENGE_URL, challenge_req,
 180             'Logging in', 'Unable to log in')
 181
 182         if challenge_results is False:
 183             return
 184
 185         login_res = try_get(challenge_results, lambda x: x[0][5], list)
 186         if login_res:
 187             login_msg = try_get(login_res, lambda x: x[5], compat_str)
 188             warn(
 189                 'Unable to login: %s' % 'Invalid password'
 190                 if login_msg == 'INCORRECT_ANSWER_ENTERED' else login_msg)
 191             return False
 192
 193         res = try_get(challenge_results, lambda x: x[0][-1], list)
 194         if not res:
 195             warn('Unable to extract result entry')
 196             return False
 197
 198         login_challenge = try_get(res, lambda x: x[0][0], list)
 199         if login_challenge:
 200             challenge_str = try_get(login_challenge, lambda x: x[2], compat_str)
 201             if challenge_str == 'TWO_STEP_VERIFICATION':
 202                 # SEND_SUCCESS - TFA code has been successfully sent to phone
 203                 # QUOTA_EXCEEDED - reached the limit of TFA codes
 204                 status = try_get(login_challenge, lambda x: x[5], compat_str)
 205                 if status == 'QUOTA_EXCEEDED':
 206                     warn('Exceeded the limit of TFA codes, try later')
 207                     return False
 208
 209                 tl = try_get(challenge_results, lambda x: x[1][2], compat_str)
 210                 if not tl:
 211                     warn('Unable to extract TL')
 212                     return False
 213
 214                 tfa_code = self._get_tfa_info('2-step verification code')
 215
 216                 if not tfa_code:
 217                     warn(
 218                         'Two-factor authentication required. Provide it either interactively or with --twofactor <code>'
 219                         '(Note that only TOTP (Google Authenticator App) codes work at this time.)')
 220                     return False
 221
 222                 tfa_code = remove_start(tfa_code, 'G-')
 223
 224                 tfa_req = [
 225                     user_hash, None, 2, None,
 226                     [
 227                         9, None, None, None, None, None, None, None,
 228                         [None, tfa_code, True, 2]
 229                     ]]
 230
 231                 tfa_results = req(
 232                     self._TFA_URL.format(tl), tfa_req,
 233                     'Submitting TFA code', 'Unable to submit TFA code')
 234
 235                 if tfa_results is False:
 236                     return False
 237
 238                 tfa_res = try_get(tfa_results, lambda x: x[0][5], list)
 239                 if tfa_res:
 240                     tfa_msg = try_get(tfa_res, lambda x: x[5], compat_str)
 241                     warn(
 242                         'Unable to finish TFA: %s' % 'Invalid TFA code'
 243                         if tfa_msg == 'INCORRECT_ANSWER_ENTERED' else tfa_msg)
 244                     return False
 245
 246                 check_cookie_url = try_get(
 247                     tfa_results, lambda x: x[0][-1][2], compat_str)
 248             else:
 249                 CHALLENGES = {
 250                     'LOGIN_CHALLENGE': "This device isn't recognized. For your security, Google wants to make sure it's really you.",
 251                     'USERNAME_RECOVERY': 'Please provide additional information to aid in the recovery process.',
 252                     'REAUTH': "There is something unusual about your activity. For your security, Google wants to make sure it's really you.",
 253                 }
 254                 challenge = CHALLENGES.get(
 255                     challenge_str,
 256                     '%s returned error %s.' % (self.IE_NAME, challenge_str))
 257                 warn('%s\nGo to https://accounts.google.com/, login and solve a challenge.' % challenge)
 258                 return False
 259         else:
 260             check_cookie_url = try_get(res, lambda x: x[2], compat_str)
 261
 262         if not check_cookie_url:
 263             warn('Unable to extract CheckCookie URL')
 264             return False
 265
 266         check_cookie_results = self._download_webpage(
 267             check_cookie_url, None, 'Checking cookie', fatal=False)
 268
 269         if check_cookie_results is False:
 270             return False
 271
 272         if 'https://myaccount.google.com/' not in check_cookie_results:
 273             warn('Unable to log in')
 274             return False
 275
 276         return True
 277
 278     def _download_webpage_handle(self, *args, **kwargs):
 279         query = kwargs.get('query', {}).copy()
 280         kwargs['query'] = query
 281         return super(YoutubeBaseInfoExtractor, self)._download_webpage_handle(
 282             *args, **compat_kwargs(kwargs))
 283
 284     def _get_yt_initial_data(self, video_id, webpage):
 285         config = self._search_regex(
 286             (r'window\["ytInitialData"\]\s*=\s*(.*?)(?<=});',
 287              r'var\s+ytInitialData\s*=\s*(.*?)(?<=});'),
 288             webpage, 'ytInitialData', default=None)
 289         if config:
 290             return self._parse_json(
 291                 uppercase_escape(config), video_id, fatal=False)
 292
 293     def _real_initialize(self):
 294         if self._downloader is None:
 295             return
 296         self._set_language()
 297         if not self._login():
 298             return
 299
 300     _DEFAULT_API_DATA = {
 301         'context': {
 302             'client': {
 303                 'clientName': 'WEB',
 304                 'clientVersion': '2.20201021.03.00',
 305             }
 306         },
 307     }
 308
 309     _YT_INITIAL_DATA_RE = r'(?:window\s*\[\s*["\']ytInitialData["\']\s*\]|ytInitialData)\s*=\s*({.+?})\s*;'
 310
 311     def _call_api(self, ep, query, video_id):
 312         data = self._DEFAULT_API_DATA.copy()
 313         data.update(query)
 314
 315         response = self._download_json(
 316             'https://www.youtube.com/youtubei/v1/%s' % ep, video_id=video_id,
 317             note='Downloading API JSON', errnote='Unable to download API page',
 318             data=json.dumps(data).encode('utf8'),
 319             headers={'content-type': 'application/json'},
 320             query={'key': 'AIzaSyAO_FJ2SlqU8Q4STEHLGCilw_Y9_11qcW8'})
 321
 322         return response
 323
 324     def _extract_yt_initial_data(self, video_id, webpage):
 325         return self._parse_json(
 326             self._search_regex(
 327                 (r'%s\s*\n' % self._YT_INITIAL_DATA_RE,
 328                  self._YT_INITIAL_DATA_RE), webpage, 'yt initial data'),
 329             video_id)
 330
 331
 332 class YoutubeIE(YoutubeBaseInfoExtractor):
 333     IE_DESC = 'YouTube.com'
 334     _VALID_URL = r"""(?x)^
 335                      (
 336                          (?:https?://|//)                                    # http(s):// or protocol-independent URL
 337                          (?:(?:(?:(?:\w+\.)?[yY][oO][uU][tT][uU][bB][eE](?:-nocookie|kids)?\.com/|
 338                             (?:www\.)?deturl\.com/www\.youtube\.com/|
 339                             (?:www\.)?pwnyoutube\.com/|
 340                             (?:www\.)?hooktube\.com/|
 341                             (?:www\.)?yourepeat\.com/|
 342                             tube\.majestyc\.net/|
 343                             # Invidious instances taken from https://github.com/omarroth/invidious/wiki/Invidious-Instances
 344                             (?:(?:www|dev)\.)?invidio\.us/|
 345                             (?:(?:www|no)\.)?invidiou\.sh/|
 346                             (?:(?:www|fi|de)\.)?invidious\.snopyta\.org/|
 347                             (?:www\.)?invidious\.kabi\.tk/|
 348                             (?:www\.)?invidious\.13ad\.de/|
 349                             (?:www\.)?invidious\.mastodon\.host/|
 350                             (?:www\.)?invidious\.nixnet\.xyz/|
 351                             (?:www\.)?invidious\.drycat\.fr/|
 352                             (?:www\.)?tube\.poal\.co/|
 353                             (?:www\.)?vid\.wxzm\.sx/|
 354                             (?:www\.)?yewtu\.be/|
 355                             (?:www\.)?yt\.elukerio\.org/|
 356                             (?:www\.)?yt\.lelux\.fi/|
 357                             (?:www\.)?invidious\.ggc-project\.de/|
 358                             (?:www\.)?yt\.maisputain\.ovh/|
 359                             (?:www\.)?invidious\.13ad\.de/|
 360                             (?:www\.)?invidious\.toot\.koeln/|
 361                             (?:www\.)?invidious\.fdn\.fr/|
 362                             (?:www\.)?watch\.nettohikari\.com/|
 363                             (?:www\.)?kgg2m7yk5aybusll\.onion/|
 364                             (?:www\.)?qklhadlycap4cnod\.onion/|
 365                             (?:www\.)?axqzx4s6s54s32yentfqojs3x5i7faxza6xo3ehd4bzzsg2ii4fv2iid\.onion/|
 366                             (?:www\.)?c7hqkpkpemu6e7emz5b4vyz7idjgdvgaaa3dyimmeojqbgpea3xqjoid\.onion/|
 367                             (?:www\.)?fz253lmuao3strwbfbmx46yu7acac2jz27iwtorgmbqlkurlclmancad\.onion/|
 368                             (?:www\.)?invidious\.l4qlywnpwqsluw65ts7md3khrivpirse744un3x7mlskqauz5pyuzgqd\.onion/|
 369                             (?:www\.)?owxfohz4kjyv25fvlqilyxast7inivgiktls3th44jhk3ej3i7ya\.b32\.i2p/|
 370                             (?:www\.)?4l2dgddgsrkf2ous66i6seeyi6etzfgrue332grh2n7madpwopotugyd\.onion/|
 371                             youtube\.googleapis\.com/)                        # the various hostnames, with wildcard subdomains
 372                          (?:.*?\#/)?                                          # handle anchor (#/) redirect urls
 373                          (?:                                                  # the various things that can precede the ID:
 374                              (?:(?:v|embed|e)/(?!videoseries))                # v/ or embed/ or e/
 375                              |(?:                                             # or the v= param in all its forms
 376                                  (?:(?:watch|movie)(?:_popup)?(?:\.php)?/?)?  # preceding watch(_popup|.php) or nothing (like /?v=xxxx)
 377                                  (?:\?|\#!?)                                  # the params delimiter ? or # or #!
 378                                  (?:.*?[&;])??                                # any other preceding param (like /?s=tuff&v=xxxx or ?s=tuff&amp;v=V36LpHqtcDY)
 379                                  v=
 380                              )
 381                          ))
 382                          |(?:
 383                             youtu\.be|                                        # just youtu.be/xxxx
 384                             vid\.plus|                                        # or vid.plus/xxxx
 385                             zwearz\.com/watch|                                # or zwearz.com/watch/xxxx
 386                          )/
 387                          |(?:www\.)?cleanvideosearch\.com/media/action/yt/watch\?videoId=
 388                          )
 389                      )?                                                       # all until now is optional -> you can pass the naked ID
 390                      (?P<id>[0-9A-Za-z_-]{11})                                      # here is it! the YouTube video ID
 391                      (?!.*?\blist=
 392                         (?:
 393                             %(playlist_id)s|                                  # combined list/video URLs are handled by the playlist IE
 394                             WL                                                # WL are handled by the watch later IE
 395                         )
 396                      )
 397                      (?(1).+)?                                                # if we found the ID, everything can follow
 398                      $""" % {'playlist_id': YoutubeBaseInfoExtractor._PLAYLIST_ID_RE}
 399     _NEXT_URL_RE = r'[\?&]next_url=([^&]+)'
 400     _PLAYER_INFO_RE = (
 401         r'/(?P<id>[a-zA-Z0-9_-]{8,})/player_ias\.vflset(?:/[a-zA-Z]{2,3}_[a-zA-Z]{2,3})?/base\.(?P<ext>[a-z]+)$',
 402         r'\b(?P<id>vfl[a-zA-Z0-9_-]+)\b.*?\.(?P<ext>[a-z]+)$',
 403     )
 404     _formats = {
 405         '5': {'ext': 'flv', 'width': 400, 'height': 240, 'acodec': 'mp3', 'abr': 64, 'vcodec': 'h263'},
 406         '6': {'ext': 'flv', 'width': 450, 'height': 270, 'acodec': 'mp3', 'abr': 64, 'vcodec': 'h263'},
 407         '13': {'ext': '3gp', 'acodec': 'aac', 'vcodec': 'mp4v'},
 408         '17': {'ext': '3gp', 'width': 176, 'height': 144, 'acodec': 'aac', 'abr': 24, 'vcodec': 'mp4v'},
 409         '18': {'ext': 'mp4', 'width': 640, 'height': 360, 'acodec': 'aac', 'abr': 96, 'vcodec': 'h264'},
 410         '22': {'ext': 'mp4', 'width': 1280, 'height': 720, 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264'},
 411         '34': {'ext': 'flv', 'width': 640, 'height': 360, 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264'},
 412         '35': {'ext': 'flv', 'width': 854, 'height': 480, 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264'},
 413         # itag 36 videos are either 320x180 (BaW_jenozKc) or 320x240 (__2ABJjxzNo), abr varies as well
 414         '36': {'ext': '3gp', 'width': 320, 'acodec': 'aac', 'vcodec': 'mp4v'},
 415         '37': {'ext': 'mp4', 'width': 1920, 'height': 1080, 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264'},
 416         '38': {'ext': 'mp4', 'width': 4096, 'height': 3072, 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264'},
 417         '43': {'ext': 'webm', 'width': 640, 'height': 360, 'acodec': 'vorbis', 'abr': 128, 'vcodec': 'vp8'},
 418         '44': {'ext': 'webm', 'width': 854, 'height': 480, 'acodec': 'vorbis', 'abr': 128, 'vcodec': 'vp8'},
 419         '45': {'ext': 'webm', 'width': 1280, 'height': 720, 'acodec': 'vorbis', 'abr': 192, 'vcodec': 'vp8'},
 420         '46': {'ext': 'webm', 'width': 1920, 'height': 1080, 'acodec': 'vorbis', 'abr': 192, 'vcodec': 'vp8'},
 421         '59': {'ext': 'mp4', 'width': 854, 'height': 480, 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264'},
 422         '78': {'ext': 'mp4', 'width': 854, 'height': 480, 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264'},
 423
 424
 425         # 3D videos
 426         '82': {'ext': 'mp4', 'height': 360, 'format_note': '3D', 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264', 'preference': -20},
 427         '83': {'ext': 'mp4', 'height': 480, 'format_note': '3D', 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264', 'preference': -20},
 428         '84': {'ext': 'mp4', 'height': 720, 'format_note': '3D', 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264', 'preference': -20},
 429         '85': {'ext': 'mp4', 'height': 1080, 'format_note': '3D', 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264', 'preference': -20},
 430         '100': {'ext': 'webm', 'height': 360, 'format_note': '3D', 'acodec': 'vorbis', 'abr': 128, 'vcodec': 'vp8', 'preference': -20},
 431         '101': {'ext': 'webm', 'height': 480, 'format_note': '3D', 'acodec': 'vorbis', 'abr': 192, 'vcodec': 'vp8', 'preference': -20},
 432         '102': {'ext': 'webm', 'height': 720, 'format_note': '3D', 'acodec': 'vorbis', 'abr': 192, 'vcodec': 'vp8', 'preference': -20},
 433
 434         # Apple HTTP Live Streaming
 435         '91': {'ext': 'mp4', 'height': 144, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 48, 'vcodec': 'h264', 'preference': -10},
 436         '92': {'ext': 'mp4', 'height': 240, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 48, 'vcodec': 'h264', 'preference': -10},
 437         '93': {'ext': 'mp4', 'height': 360, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264', 'preference': -10},
 438         '94': {'ext': 'mp4', 'height': 480, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264', 'preference': -10},
 439         '95': {'ext': 'mp4', 'height': 720, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 256, 'vcodec': 'h264', 'preference': -10},
 440         '96': {'ext': 'mp4', 'height': 1080, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 256, 'vcodec': 'h264', 'preference': -10},
 441         '132': {'ext': 'mp4', 'height': 240, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 48, 'vcodec': 'h264', 'preference': -10},
 442         '151': {'ext': 'mp4', 'height': 72, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 24, 'vcodec': 'h264', 'preference': -10},
 443
 444         # DASH mp4 video
 445         '133': {'ext': 'mp4', 'height': 240, 'format_note': 'DASH video', 'vcodec': 'h264'},
 446         '134': {'ext': 'mp4', 'height': 360, 'format_note': 'DASH video', 'vcodec': 'h264'},
 447         '135': {'ext': 'mp4', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'h264'},
 448         '136': {'ext': 'mp4', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'h264'},
 449         '137': {'ext': 'mp4', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'h264'},
 450         '138': {'ext': 'mp4', 'format_note': 'DASH video', 'vcodec': 'h264'},  # Height can vary (https://github.com/ytdl-org/youtube-dl/issues/4559)
 451         '160': {'ext': 'mp4', 'height': 144, 'format_note': 'DASH video', 'vcodec': 'h264'},
 452         '212': {'ext': 'mp4', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'h264'},
 453         '264': {'ext': 'mp4', 'height': 1440, 'format_note': 'DASH video', 'vcodec': 'h264'},
 454         '298': {'ext': 'mp4', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'h264', 'fps': 60},
 455         '299': {'ext': 'mp4', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'h264', 'fps': 60},
 456         '266': {'ext': 'mp4', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'h264'},
 457
 458         # Dash mp4 audio
 459         '139': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'abr': 48, 'container': 'm4a_dash'},
 460         '140': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'abr': 128, 'container': 'm4a_dash'},
 461         '141': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'abr': 256, 'container': 'm4a_dash'},
 462         '256': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'container': 'm4a_dash'},
 463         '258': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'container': 'm4a_dash'},
 464         '325': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'dtse', 'container': 'm4a_dash'},
 465         '328': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'ec-3', 'container': 'm4a_dash'},
 466
 467         # Dash webm
 468         '167': {'ext': 'webm', 'height': 360, 'width': 640, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
 469         '168': {'ext': 'webm', 'height': 480, 'width': 854, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
 470         '169': {'ext': 'webm', 'height': 720, 'width': 1280, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
 471         '170': {'ext': 'webm', 'height': 1080, 'width': 1920, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
 472         '218': {'ext': 'webm', 'height': 480, 'width': 854, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
 473         '219': {'ext': 'webm', 'height': 480, 'width': 854, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
 474         '278': {'ext': 'webm', 'height': 144, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp9'},
 475         '242': {'ext': 'webm', 'height': 240, 'format_note': 'DASH video', 'vcodec': 'vp9'},
 476         '243': {'ext': 'webm', 'height': 360, 'format_note': 'DASH video', 'vcodec': 'vp9'},
 477         '244': {'ext': 'webm', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'vp9'},
 478         '245': {'ext': 'webm', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'vp9'},
 479         '246': {'ext': 'webm', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'vp9'},
 480         '247': {'ext': 'webm', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'vp9'},
 481         '248': {'ext': 'webm', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'vp9'},
 482         '271': {'ext': 'webm', 'height': 1440, 'format_note': 'DASH video', 'vcodec': 'vp9'},
 483         # itag 272 videos are either 3840x2160 (e.g. RtoitU2A-3E) or 7680x4320 (sLprVF6d7Ug)
 484         '272': {'ext': 'webm', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'vp9'},
 485         '302': {'ext': 'webm', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'vp9', 'fps': 60},
 486         '303': {'ext': 'webm', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'vp9', 'fps': 60},
 487         '308': {'ext': 'webm', 'height': 1440, 'format_note': 'DASH video', 'vcodec': 'vp9', 'fps': 60},
 488         '313': {'ext': 'webm', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'vp9'},
 489         '315': {'ext': 'webm', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'vp9', 'fps': 60},
 490
 491         # Dash webm audio
 492         '171': {'ext': 'webm', 'acodec': 'vorbis', 'format_note': 'DASH audio', 'abr': 128},
 493         '172': {'ext': 'webm', 'acodec': 'vorbis', 'format_note': 'DASH audio', 'abr': 256},
 494
 495         # Dash webm audio with opus inside
 496         '249': {'ext': 'webm', 'format_note': 'DASH audio', 'acodec': 'opus', 'abr': 50},
 497         '250': {'ext': 'webm', 'format_note': 'DASH audio', 'acodec': 'opus', 'abr': 70},
 498         '251': {'ext': 'webm', 'format_note': 'DASH audio', 'acodec': 'opus', 'abr': 160},
 499
 500         # RTMP (unnamed)
 501         '_rtmp': {'protocol': 'rtmp'},
 502
 503         # av01 video only formats sometimes served with "unknown" codecs
 504         '394': {'acodec': 'none', 'vcodec': 'av01.0.05M.08'},
 505         '395': {'acodec': 'none', 'vcodec': 'av01.0.05M.08'},
 506         '396': {'acodec': 'none', 'vcodec': 'av01.0.05M.08'},
 507         '397': {'acodec': 'none', 'vcodec': 'av01.0.05M.08'},
 508     }
 509     _SUBTITLE_FORMATS = ('srv1', 'srv2', 'srv3', 'ttml', 'vtt')  # TODO 'json3' raising issues with automatic captions
 510
 511     _GEO_BYPASS = False
 512
 513     IE_NAME = 'youtube'
 514     _TESTS = [
 515         {
 516             'url': 'https://www.youtube.com/watch?v=BaW_jenozKc&t=1s&end=9',
 517             'info_dict': {
 518                 'id': 'BaW_jenozKc',
 519                 'ext': 'mp4',
 520                 'title': 'youtube-dl test video "\'/\\ä↭𝕐',
 521                 'uploader': 'Philipp Hagemeister',
 522                 'uploader_id': 'phihag',
 523                 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/phihag',
 524                 'channel_id': 'UCLqxVugv74EIW3VWh2NOa3Q',
 525                 'channel_url': r're:https?://(?:www\.)?youtube\.com/channel/UCLqxVugv74EIW3VWh2NOa3Q',
 526                 'upload_date': '20121002',
 527                 'description': 'test chars:  "\'/\\ä↭𝕐\ntest URL: https://github.com/rg3/youtube-dl/issues/1892\n\nThis is a test video for youtube-dl.\n\nFor more information, contact phihag@phihag.de .',
 528                 'categories': ['Science & Technology'],
 529                 'tags': ['youtube-dl'],
 530                 'duration': 10,
 531                 'view_count': int,
 532                 'like_count': int,
 533                 'dislike_count': int,
 534                 'start_time': 1,
 535                 'end_time': 9,
 536             }
 537         },
 538         {
 539             'url': '//www.YouTube.com/watch?v=yZIXLfi8CZQ',
 540             'note': 'Embed-only video (#1746)',
 541             'info_dict': {
 542                 'id': 'yZIXLfi8CZQ',
 543                 'ext': 'mp4',
 544                 'upload_date': '20120608',
 545                 'title': 'Principal Sexually Assaults A Teacher - Episode 117 - 8th June 2012',
 546                 'description': 'md5:09b78bd971f1e3e289601dfba15ca4f7',
 547                 'uploader': 'SET India',
 548                 'uploader_id': 'setindia',
 549                 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/setindia',
 550                 'age_limit': 18,
 551             }
 552         },
 553         {
 554             'url': 'https://www.youtube.com/watch?v=BaW_jenozKc&v=yZIXLfi8CZQ',
 555             'note': 'Use the first video ID in the URL',
 556             'info_dict': {
 557                 'id': 'BaW_jenozKc',
 558                 'ext': 'mp4',
 559                 'title': 'youtube-dl test video "\'/\\ä↭𝕐',
 560                 'uploader': 'Philipp Hagemeister',
 561                 'uploader_id': 'phihag',
 562                 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/phihag',
 563                 'upload_date': '20121002',
 564                 'description': 'test chars:  "\'/\\ä↭𝕐\ntest URL: https://github.com/rg3/youtube-dl/issues/1892\n\nThis is a test video for youtube-dl.\n\nFor more information, contact phihag@phihag.de .',
 565                 'categories': ['Science & Technology'],
 566                 'tags': ['youtube-dl'],
 567                 'duration': 10,
 568                 'view_count': int,
 569                 'like_count': int,
 570                 'dislike_count': int,
 571             },
 572             'params': {
 573                 'skip_download': True,
 574             },
 575         },
 576         {
 577             'url': 'https://www.youtube.com/watch?v=a9LDPn-MO4I',
 578             'note': '256k DASH audio (format 141) via DASH manifest',
 579             'info_dict': {
 580                 'id': 'a9LDPn-MO4I',
 581                 'ext': 'm4a',
 582                 'upload_date': '20121002',
 583                 'uploader_id': '8KVIDEO',
 584                 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/8KVIDEO',
 585                 'description': '',
 586                 'uploader': '8KVIDEO',
 587                 'title': 'UHDTV TEST 8K VIDEO.mp4'
 588             },
 589             'params': {
 590                 'youtube_include_dash_manifest': True,
 591                 'format': '141',
 592             },
 593             'skip': 'format 141 not served anymore',
 594         },
 595         # DASH manifest with encrypted signature
 596         {
 597             'url': 'https://www.youtube.com/watch?v=IB3lcPjvWLA',
 598             'info_dict': {
 599                 'id': 'IB3lcPjvWLA',
 600                 'ext': 'm4a',
 601                 'title': 'Afrojack, Spree Wilson - The Spark (Official Music Video) ft. Spree Wilson',
 602                 'description': 'md5:8f5e2b82460520b619ccac1f509d43bf',
 603                 'duration': 244,
 604                 'uploader': 'AfrojackVEVO',
 605                 'uploader_id': 'AfrojackVEVO',
 606                 'upload_date': '20131011',
 607             },
 608             'params': {
 609                 'youtube_include_dash_manifest': True,
 610                 'format': '141/bestaudio[ext=m4a]',
 611             },
 612         },
 613         # Controversy video
 614         {
 615             'url': 'https://www.youtube.com/watch?v=T4XJQO3qol8',
 616             'info_dict': {
 617                 'id': 'T4XJQO3qol8',
 618                 'ext': 'mp4',
 619                 'duration': 219,
 620                 'upload_date': '20100909',
 621                 'uploader': 'Amazing Atheist',
 622                 'uploader_id': 'TheAmazingAtheist',
 623                 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/TheAmazingAtheist',
 624                 'title': 'Burning Everyone\'s Koran',
 625                 'description': 'SUBSCRIBE: http://www.youtube.com/saturninefilms\n\nEven Obama has taken a stand against freedom on this issue: http://www.huffingtonpost.com/2010/09/09/obama-gma-interview-quran_n_710282.html',
 626             }
 627         },
 628         # Normal age-gate video (embed allowed)
 629         {
 630             'url': 'https://youtube.com/watch?v=HtVdAasjOgU',
 631             'info_dict': {
 632                 'id': 'HtVdAasjOgU',
 633                 'ext': 'mp4',
 634                 'title': 'The Witcher 3: Wild Hunt - The Sword Of Destiny Trailer',
 635                 'description': r're:(?s).{100,}About the Game\n.*?The Witcher 3: Wild Hunt.{100,}',
 636                 'duration': 142,
 637                 'uploader': 'The Witcher',
 638                 'uploader_id': 'WitcherGame',
 639                 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/WitcherGame',
 640                 'upload_date': '20140605',
 641                 'age_limit': 18,
 642             },
 643         },
 644         # video_info is None (https://github.com/ytdl-org/youtube-dl/issues/4421)
 645         # YouTube Red ad is not captured for creator
 646         {
 647             'url': '__2ABJjxzNo',
 648             'info_dict': {
 649                 'id': '__2ABJjxzNo',
 650                 'ext': 'mp4',
 651                 'duration': 266,
 652                 'upload_date': '20100430',
 653                 'uploader_id': 'deadmau5',
 654                 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/deadmau5',
 655                 'creator': 'Dada Life, deadmau5',
 656                 'description': 'md5:12c56784b8032162bb936a5f76d55360',
 657                 'uploader': 'deadmau5',
 658                 'title': 'Deadmau5 - Some Chords (HD)',
 659                 'alt_title': 'This Machine Kills Some Chords',
 660             },
 661             'expected_warnings': [
 662                 'DASH manifest missing',
 663             ]
 664         },
 665         # Olympics (https://github.com/ytdl-org/youtube-dl/issues/4431)
 666         {
 667             'url': 'lqQg6PlCWgI',
 668             'info_dict': {
 669                 'id': 'lqQg6PlCWgI',
 670                 'ext': 'mp4',
 671                 'duration': 6085,
 672                 'upload_date': '20150827',
 673                 'uploader_id': 'olympic',
 674                 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/olympic',
 675                 'description': 'HO09  - Women -  GER-AUS - Hockey - 31 July 2012 - London 2012 Olympic Games',
 676                 'uploader': 'Olympic',
 677                 'title': 'Hockey - Women -  GER-AUS - London 2012 Olympic Games',
 678             },
 679             'params': {
 680                 'skip_download': 'requires avconv',
 681             }
 682         },
 683         # Non-square pixels
 684         {
 685             'url': 'https://www.youtube.com/watch?v=_b-2C3KPAM0',
 686             'info_dict': {
 687                 'id': '_b-2C3KPAM0',
 688                 'ext': 'mp4',
 689                 'stretched_ratio': 16 / 9.,
 690                 'duration': 85,
 691                 'upload_date': '20110310',
 692                 'uploader_id': 'AllenMeow',
 693                 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/AllenMeow',
 694                 'description': 'made by Wacom from Korea | 字幕&加油添醋 by TY\'s Allen | 感謝heylisa00cavey1001同學熱情提供梗及翻譯',
 695                 'uploader': '孫ᄋᄅ',
 696                 'title': '[A-made] 變態妍字幕版 太妍 我就是這樣的人',
 697             },
 698         },
 699         # url_encoded_fmt_stream_map is empty string
 700         {
 701             'url': 'qEJwOuvDf7I',
 702             'info_dict': {
 703                 'id': 'qEJwOuvDf7I',
 704                 'ext': 'webm',
 705                 'title': 'Обсуждение судебной практики по выборам 14 сентября 2014 года в Санкт-Петербурге',
 706                 'description': '',
 707                 'upload_date': '20150404',
 708                 'uploader_id': 'spbelect',
 709                 'uploader': 'Наблюдатели Петербурга',
 710             },
 711             'params': {
 712                 'skip_download': 'requires avconv',
 713             },
 714             'skip': 'This live event has ended.',
 715         },
 716         # Extraction from multiple DASH manifests (https://github.com/ytdl-org/youtube-dl/pull/6097)
 717         {
 718             'url': 'https://www.youtube.com/watch?v=FIl7x6_3R5Y',
 719             'info_dict': {
 720                 'id': 'FIl7x6_3R5Y',
 721                 'ext': 'webm',
 722                 'title': 'md5:7b81415841e02ecd4313668cde88737a',
 723                 'description': 'md5:116377fd2963b81ec4ce64b542173306',
 724                 'duration': 220,
 725                 'upload_date': '20150625',
 726                 'uploader_id': 'dorappi2000',
 727                 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/dorappi2000',
 728                 'uploader': 'dorappi2000',
 729                 'formats': 'mincount:31',
 730             },
 731             'skip': 'not actual anymore',
 732         },
 733         # DASH manifest with segment_list
 734         {
 735             'url': 'https://www.youtube.com/embed/CsmdDsKjzN8',
 736             'md5': '8ce563a1d667b599d21064e982ab9e31',
 737             'info_dict': {
 738                 'id': 'CsmdDsKjzN8',
 739                 'ext': 'mp4',
 740                 'upload_date': '20150501',  # According to '<meta itemprop="datePublished"', but in other places it's 20150510
 741                 'uploader': 'Airtek',
 742                 'description': 'Retransmisión en directo de la XVIII media maratón de Zaragoza.',
 743                 'uploader_id': 'UCzTzUmjXxxacNnL8I3m4LnQ',
 744                 'title': 'Retransmisión XVIII Media maratón Zaragoza 2015',
 745             },
 746             'params': {
 747                 'youtube_include_dash_manifest': True,
 748                 'format': '135',  # bestvideo
 749             },
 750             'skip': 'This live event has ended.',
 751         },
 752         {
 753             # Multifeed videos (multiple cameras), URL is for Main Camera
 754             'url': 'https://www.youtube.com/watch?v=jqWvoWXjCVs',
 755             'info_dict': {
 756                 'id': 'jqWvoWXjCVs',
 757                 'title': 'teamPGP: Rocket League Noob Stream',
 758                 'description': 'md5:dc7872fb300e143831327f1bae3af010',
 759             },
 760             'playlist': [{
 761                 'info_dict': {
 762                     'id': 'jqWvoWXjCVs',
 763                     'ext': 'mp4',
 764                     'title': 'teamPGP: Rocket League Noob Stream (Main Camera)',
 765                     'description': 'md5:dc7872fb300e143831327f1bae3af010',
 766                     'duration': 7335,
 767                     'upload_date': '20150721',
 768                     'uploader': 'Beer Games Beer',
 769                     'uploader_id': 'beergamesbeer',
 770                     'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/beergamesbeer',
 771                     'license': 'Standard YouTube License',
 772                 },
 773             }, {
 774                 'info_dict': {
 775                     'id': '6h8e8xoXJzg',
 776                     'ext': 'mp4',
 777                     'title': 'teamPGP: Rocket League Noob Stream (kreestuh)',
 778                     'description': 'md5:dc7872fb300e143831327f1bae3af010',
 779                     'duration': 7337,
 780                     'upload_date': '20150721',
 781                     'uploader': 'Beer Games Beer',
 782                     'uploader_id': 'beergamesbeer',
 783                     'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/beergamesbeer',
 784                     'license': 'Standard YouTube License',
 785                 },
 786             }, {
 787                 'info_dict': {
 788                     'id': 'PUOgX5z9xZw',
 789                     'ext': 'mp4',
 790                     'title': 'teamPGP: Rocket League Noob Stream (grizzle)',
 791                     'description': 'md5:dc7872fb300e143831327f1bae3af010',
 792                     'duration': 7337,
 793                     'upload_date': '20150721',
 794                     'uploader': 'Beer Games Beer',
 795                     'uploader_id': 'beergamesbeer',
 796                     'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/beergamesbeer',
 797                     'license': 'Standard YouTube License',
 798                 },
 799             }, {
 800                 'info_dict': {
 801                     'id': 'teuwxikvS5k',
 802                     'ext': 'mp4',
 803                     'title': 'teamPGP: Rocket League Noob Stream (zim)',
 804                     'description': 'md5:dc7872fb300e143831327f1bae3af010',
 805                     'duration': 7334,
 806                     'upload_date': '20150721',
 807                     'uploader': 'Beer Games Beer',
 808                     'uploader_id': 'beergamesbeer',
 809                     'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/beergamesbeer',
 810                     'license': 'Standard YouTube License',
 811                 },
 812             }],
 813             'params': {
 814                 'skip_download': True,
 815             },
 816             'skip': 'This video is not available.',
 817         },
 818         {
 819             # Multifeed video with comma in title (see https://github.com/ytdl-org/youtube-dl/issues/8536)
 820             'url': 'https://www.youtube.com/watch?v=gVfLd0zydlo',
 821             'info_dict': {
 822                 'id': 'gVfLd0zydlo',
 823                 'title': 'DevConf.cz 2016 Day 2 Workshops 1 14:00 - 15:30',
 824             },
 825             'playlist_count': 2,
 826             'skip': 'Not multifeed anymore',
 827         },
 828         {
 829             'url': 'https://vid.plus/FlRa-iH7PGw',
 830             'only_matching': True,
 831         },
 832         {
 833             'url': 'https://zwearz.com/watch/9lWxNJF-ufM/electra-woman-dyna-girl-official-trailer-grace-helbig.html',
 834             'only_matching': True,
 835         },
 836         {
 837             # Title with JS-like syntax "};" (see https://github.com/ytdl-org/youtube-dl/issues/7468)
 838             # Also tests cut-off URL expansion in video description (see
 839             # https://github.com/ytdl-org/youtube-dl/issues/1892,
 840             # https://github.com/ytdl-org/youtube-dl/issues/8164)
 841             'url': 'https://www.youtube.com/watch?v=lsguqyKfVQg',
 842             'info_dict': {
 843                 'id': 'lsguqyKfVQg',
 844                 'ext': 'mp4',
 845                 'title': '{dark walk}; Loki/AC/Dishonored; collab w/Elflover21',
 846                 'alt_title': 'Dark Walk - Position Music',
 847                 'description': 'md5:8085699c11dc3f597ce0410b0dcbb34a',
 848                 'duration': 133,
 849                 'upload_date': '20151119',
 850                 'uploader_id': 'IronSoulElf',
 851                 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/IronSoulElf',
 852                 'uploader': 'IronSoulElf',
 853                 'creator': 'Todd Haberman,  Daniel Law Heath and Aaron Kaplan',
 854                 'track': 'Dark Walk - Position Music',
 855                 'artist': 'Todd Haberman,  Daniel Law Heath and Aaron Kaplan',
 856                 'album': 'Position Music - Production Music Vol. 143 - Dark Walk',
 857             },
 858             'params': {
 859                 'skip_download': True,
 860             },
 861         },
 862         {
 863             # Tags with '};' (see https://github.com/ytdl-org/youtube-dl/issues/7468)
 864             'url': 'https://www.youtube.com/watch?v=Ms7iBXnlUO8',
 865             'only_matching': True,
 866         },
 867         {
 868             # Video with yt:stretch=17:0
 869             'url': 'https://www.youtube.com/watch?v=Q39EVAstoRM',
 870             'info_dict': {
 871                 'id': 'Q39EVAstoRM',
 872                 'ext': 'mp4',
 873                 'title': 'Clash Of Clans#14 Dicas De Ataque Para CV 4',
 874                 'description': 'md5:ee18a25c350637c8faff806845bddee9',
 875                 'upload_date': '20151107',
 876                 'uploader_id': 'UCCr7TALkRbo3EtFzETQF1LA',
 877                 'uploader': 'CH GAMER DROID',
 878             },
 879             'params': {
 880                 'skip_download': True,
 881             },
 882             'skip': 'This video does not exist.',
 883         },
 884         {
 885             # Video licensed under Creative Commons
 886             'url': 'https://www.youtube.com/watch?v=M4gD1WSo5mA',
 887             'info_dict': {
 888                 'id': 'M4gD1WSo5mA',
 889                 'ext': 'mp4',
 890                 'title': 'md5:e41008789470fc2533a3252216f1c1d1',
 891                 'description': 'md5:a677553cf0840649b731a3024aeff4cc',
 892                 'duration': 721,
 893                 'upload_date': '20150127',
 894                 'uploader_id': 'BerkmanCenter',
 895                 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/BerkmanCenter',
 896                 'uploader': 'The Berkman Klein Center for Internet & Society',
 897                 'license': 'Creative Commons Attribution license (reuse allowed)',
 898             },
 899             'params': {
 900                 'skip_download': True,
 901             },
 902         },
 903         {
 904             # Channel-like uploader_url
 905             'url': 'https://www.youtube.com/watch?v=eQcmzGIKrzg',
 906             'info_dict': {
 907                 'id': 'eQcmzGIKrzg',
 908                 'ext': 'mp4',
 909                 'title': 'Democratic Socialism and Foreign Policy | Bernie Sanders',
 910                 'description': 'md5:dda0d780d5a6e120758d1711d062a867',
 911                 'duration': 4060,
 912                 'upload_date': '20151119',
 913                 'uploader': 'Bernie Sanders',
 914                 'uploader_id': 'UCH1dpzjCEiGAt8CXkryhkZg',
 915                 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UCH1dpzjCEiGAt8CXkryhkZg',
 916                 'license': 'Creative Commons Attribution license (reuse allowed)',
 917             },
 918             'params': {
 919                 'skip_download': True,
 920             },
 921         },
 922         {
 923             'url': 'https://www.youtube.com/watch?feature=player_embedded&amp;amp;v=V36LpHqtcDY',
 924             'only_matching': True,
 925         },
 926         {
 927             # YouTube Red paid video (https://github.com/ytdl-org/youtube-dl/issues/10059)
 928             'url': 'https://www.youtube.com/watch?v=i1Ko8UG-Tdo',
 929             'only_matching': True,
 930         },
 931         {
 932             # Rental video preview
 933             'url': 'https://www.youtube.com/watch?v=yYr8q0y5Jfg',
 934             'info_dict': {
 935                 'id': 'uGpuVWrhIzE',
 936                 'ext': 'mp4',
 937                 'title': 'Piku - Trailer',
 938                 'description': 'md5:c36bd60c3fd6f1954086c083c72092eb',
 939                 'upload_date': '20150811',
 940                 'uploader': 'FlixMatrix',
 941                 'uploader_id': 'FlixMatrixKaravan',
 942                 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/FlixMatrixKaravan',
 943                 'license': 'Standard YouTube License',
 944             },
 945             'params': {
 946                 'skip_download': True,
 947             },
 948             'skip': 'This video is not available.',
 949         },
 950         {
 951             # YouTube Red video with episode data
 952             'url': 'https://www.youtube.com/watch?v=iqKdEhx-dD4',
 953             'info_dict': {
 954                 'id': 'iqKdEhx-dD4',
 955                 'ext': 'mp4',
 956                 'title': 'Isolation - Mind Field (Ep 1)',
 957                 'description': 'md5:46a29be4ceffa65b92d277b93f463c0f',
 958                 'duration': 2085,
 959                 'upload_date': '20170118',
 960                 'uploader': 'Vsauce',
 961                 'uploader_id': 'Vsauce',
 962                 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/Vsauce',
 963                 'series': 'Mind Field',
 964                 'season_number': 1,
 965                 'episode_number': 1,
 966             },
 967             'params': {
 968                 'skip_download': True,
 969             },
 970             'expected_warnings': [
 971                 'Skipping DASH manifest',
 972             ],
 973         },
 974         {
 975             # The following content has been identified by the YouTube community
 976             # as inappropriate or offensive to some audiences.
 977             'url': 'https://www.youtube.com/watch?v=6SJNVb0GnPI',
 978             'info_dict': {
 979                 'id': '6SJNVb0GnPI',
 980                 'ext': 'mp4',
 981                 'title': 'Race Differences in Intelligence',
 982                 'description': 'md5:5d161533167390427a1f8ee89a1fc6f1',
 983                 'duration': 965,
 984                 'upload_date': '20140124',
 985                 'uploader': 'New Century Foundation',
 986                 'uploader_id': 'UCEJYpZGqgUob0zVVEaLhvVg',
 987                 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UCEJYpZGqgUob0zVVEaLhvVg',
 988             },
 989             'params': {
 990                 'skip_download': True,
 991             },
 992         },
 993         {
 994             # itag 212
 995             'url': '1t24XAntNCY',
 996             'only_matching': True,
 997         },
 998         {
 999             # geo restricted to JP
1000             'url': 'sJL6WA-aGkQ',
1001             'only_matching': True,
1002         },
1003         {
1004             'url': 'https://invidio.us/watch?v=BaW_jenozKc',
1005             'only_matching': True,
1006         },
1007         {
1008             # DRM protected
1009             'url': 'https://www.youtube.com/watch?v=s7_qI6_mIXc',
1010             'only_matching': True,
1011         },
1012         {
1013             # Video with unsupported adaptive stream type formats
1014             'url': 'https://www.youtube.com/watch?v=Z4Vy8R84T1U',
1015             'info_dict': {
1016                 'id': 'Z4Vy8R84T1U',
1017                 'ext': 'mp4',
1018                 'title': 'saman SMAN 53 Jakarta(Sancety) opening COFFEE4th at SMAN 53 Jakarta',
1019                 'description': 'md5:d41d8cd98f00b204e9800998ecf8427e',
1020                 'duration': 433,
1021                 'upload_date': '20130923',
1022                 'uploader': 'Amelia Putri Harwita',
1023                 'uploader_id': 'UCpOxM49HJxmC1qCalXyB3_Q',
1024                 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UCpOxM49HJxmC1qCalXyB3_Q',
1025                 'formats': 'maxcount:10',
1026             },
1027             'params': {
1028                 'skip_download': True,
1029                 'youtube_include_dash_manifest': False,
1030             },
1031             'skip': 'not actual anymore',
1032         },
1033         {
1034             # Youtube Music Auto-generated description
1035             'url': 'https://music.youtube.com/watch?v=MgNrAu2pzNs',
1036             'info_dict': {
1037                 'id': 'MgNrAu2pzNs',
1038                 'ext': 'mp4',
1039                 'title': 'Voyeur Girl',
1040                 'description': 'md5:7ae382a65843d6df2685993e90a8628f',
1041                 'upload_date': '20190312',
1042                 'uploader': 'Stephen - Topic',
1043                 'uploader_id': 'UC-pWHpBjdGG69N9mM2auIAA',
1044                 'artist': 'Stephen',
1045                 'track': 'Voyeur Girl',
1046                 'album': 'it\'s too much love to know my dear',
1047                 'release_date': '20190313',
1048                 'release_year': 2019,
1049             },
1050             'params': {
1051                 'skip_download': True,
1052             },
1053         },
1054         {
1055             'url': 'https://www.youtubekids.com/watch?v=3b8nCWDgZ6Q',
1056             'only_matching': True,
1057         },
1058         {
1059             # invalid -> valid video id redirection
1060             'url': 'DJztXj2GPfl',
1061             'info_dict': {
1062                 'id': 'DJztXj2GPfk',
1063                 'ext': 'mp4',
1064                 'title': 'Panjabi MC - Mundian To Bach Ke (The Dictator Soundtrack)',
1065                 'description': 'md5:bf577a41da97918e94fa9798d9228825',
1066                 'upload_date': '20090125',
1067                 'uploader': 'Prochorowka',
1068                 'uploader_id': 'Prochorowka',
1069                 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/Prochorowka',
1070                 'artist': 'Panjabi MC',
1071                 'track': 'Beware of the Boys (Mundian to Bach Ke) - Motivo Hi-Lectro Remix',
1072                 'album': 'Beware of the Boys (Mundian To Bach Ke)',
1073             },
1074             'params': {
1075                 'skip_download': True,
1076             },
1077         },
1078         {
1079             # empty description results in an empty string
1080             'url': 'https://www.youtube.com/watch?v=x41yOUIvK2k',
1081             'info_dict': {
1082                 'id': 'x41yOUIvK2k',
1083                 'ext': 'mp4',
1084                 'title': 'IMG 3456',
1085                 'description': '',
1086                 'upload_date': '20170613',
1087                 'uploader_id': 'ElevageOrVert',
1088                 'uploader': 'ElevageOrVert',
1089             },
1090             'params': {
1091                 'skip_download': True,
1092             },
1093         },
1094         {
1095             # with '};' inside yt initial data (see https://github.com/ytdl-org/youtube-dl/issues/27093)
1096             'url': 'https://www.youtube.com/watch?v=CHqg6qOn4no',
1097             'info_dict': {
1098                 'id': 'CHqg6qOn4no',
1099                 'ext': 'mp4',
1100                 'title': 'Part 77   Sort a list of simple types in c#',
1101                 'description': 'md5:b8746fa52e10cdbf47997903f13b20dc',
1102                 'upload_date': '20130831',
1103                 'uploader_id': 'kudvenkat',
1104                 'uploader': 'kudvenkat',
1105             },
1106             'params': {
1107                 'skip_download': True,
1108             },
1109         },
1110     ]
1111
1112     def __init__(self, *args, **kwargs):
1113         super(YoutubeIE, self).__init__(*args, **kwargs)
1114         self._player_cache = {}
1115
1116     def report_video_info_webpage_download(self, video_id):
1117         """Report attempt to download video info webpage."""
1118         self.to_screen('%s: Downloading video info webpage' % video_id)
1119
1120     def report_information_extraction(self, video_id):
1121         """Report attempt to extract video information."""
1122         self.to_screen('%s: Extracting video information' % video_id)
1123
1124     def report_unavailable_format(self, video_id, format):
1125         """Report extracted video URL."""
1126         self.to_screen('%s: Format %s not available' % (video_id, format))
1127
1128     def report_rtmp_download(self):
1129         """Indicate the download will use the RTMP protocol."""
1130         self.to_screen('RTMP download detected')
1131
1132     def _signature_cache_id(self, example_sig):
1133         """ Return a string representation of a signature """
1134         return '.'.join(compat_str(len(part)) for part in example_sig.split('.'))
1135
1136     @classmethod
1137     def _extract_player_info(cls, player_url):
1138         for player_re in cls._PLAYER_INFO_RE:
1139             id_m = re.search(player_re, player_url)
1140             if id_m:
1141                 break
1142         else:
1143             raise ExtractorError('Cannot identify player %r' % player_url)
1144         return id_m.group('ext'), id_m.group('id')
1145
1146     def _extract_signature_function(self, video_id, player_url, example_sig):
1147         player_type, player_id = self._extract_player_info(player_url)
1148
1149         # Read from filesystem cache
1150         func_id = '%s_%s_%s' % (
1151             player_type, player_id, self._signature_cache_id(example_sig))
1152         assert os.path.basename(func_id) == func_id
1153
1154         cache_spec = self._downloader.cache.load('youtube-sigfuncs', func_id)
1155         if cache_spec is not None:
1156             return lambda s: ''.join(s[i] for i in cache_spec)
1157
1158         download_note = (
1159             'Downloading player %s' % player_url
1160             if self._downloader.params.get('verbose') else
1161             'Downloading %s player %s' % (player_type, player_id)
1162         )
1163         if player_type == 'js':
1164             code = self._download_webpage(
1165                 player_url, video_id,
1166                 note=download_note,
1167                 errnote='Download of %s failed' % player_url)
1168             res = self._parse_sig_js(code)
1169         elif player_type == 'swf':
1170             urlh = self._request_webpage(
1171                 player_url, video_id,
1172                 note=download_note,
1173                 errnote='Download of %s failed' % player_url)
1174             code = urlh.read()
1175             res = self._parse_sig_swf(code)
1176         else:
1177             assert False, 'Invalid player type %r' % player_type
1178
1179         test_string = ''.join(map(compat_chr, range(len(example_sig))))
1180         cache_res = res(test_string)
1181         cache_spec = [ord(c) for c in cache_res]
1182
1183         self._downloader.cache.store('youtube-sigfuncs', func_id, cache_spec)
1184         return res
1185
1186     def _print_sig_code(self, func, example_sig):
1187         def gen_sig_code(idxs):
1188             def _genslice(start, end, step):
1189                 starts = '' if start == 0 else str(start)
1190                 ends = (':%d' % (end + step)) if end + step >= 0 else ':'
1191                 steps = '' if step == 1 else (':%d' % step)
1192                 return 's[%s%s%s]' % (starts, ends, steps)
1193
1194             step = None
1195             # Quelch pyflakes warnings - start will be set when step is set
1196             start = '(Never used)'
1197             for i, prev in zip(idxs[1:], idxs[:-1]):
1198                 if step is not None:
1199                     if i - prev == step:
1200                         continue
1201                     yield _genslice(start, prev, step)
1202                     step = None
1203                     continue
1204                 if i - prev in [-1, 1]:
1205                     step = i - prev
1206                     start = prev
1207                     continue
1208                 else:
1209                     yield 's[%d]' % prev
1210             if step is None:
1211                 yield 's[%d]' % i
1212             else:
1213                 yield _genslice(start, i, step)
1214
1215         test_string = ''.join(map(compat_chr, range(len(example_sig))))
1216         cache_res = func(test_string)
1217         cache_spec = [ord(c) for c in cache_res]
1218         expr_code = ' + '.join(gen_sig_code(cache_spec))
1219         signature_id_tuple = '(%s)' % (
1220             ', '.join(compat_str(len(p)) for p in example_sig.split('.')))
1221         code = ('if tuple(len(p) for p in s.split(\'.\')) == %s:\n'
1222                 '    return %s\n') % (signature_id_tuple, expr_code)
1223         self.to_screen('Extracted signature function:\n' + code)
1224
1225     def _parse_sig_js(self, jscode):
1226         funcname = self._search_regex(
1227             (r'\b[cs]\s*&&\s*[adf]\.set\([^,]+\s*,\s*encodeURIComponent\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\(',
1228              r'\b[a-zA-Z0-9]+\s*&&\s*[a-zA-Z0-9]+\.set\([^,]+\s*,\s*encodeURIComponent\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\(',
1229              r'(?:\b|[^a-zA-Z0-9$])(?P<sig>[a-zA-Z0-9$]{2})\s*=\s*function\(\s*a\s*\)\s*{\s*a\s*=\s*a\.split\(\s*""\s*\)',
1230              r'(?P<sig>[a-zA-Z0-9$]+)\s*=\s*function\(\s*a\s*\)\s*{\s*a\s*=\s*a\.split\(\s*""\s*\)',
1231              # Obsolete patterns
1232              r'(["\'])signature\1\s*,\s*(?P<sig>[a-zA-Z0-9$]+)\(',
1233              r'\.sig\|\|(?P<sig>[a-zA-Z0-9$]+)\(',
1234              r'yt\.akamaized\.net/\)\s*\|\|\s*.*?\s*[cs]\s*&&\s*[adf]\.set\([^,]+\s*,\s*(?:encodeURIComponent\s*\()?\s*(?P<sig>[a-zA-Z0-9$]+)\(',
1235              r'\b[cs]\s*&&\s*[adf]\.set\([^,]+\s*,\s*(?P<sig>[a-zA-Z0-9$]+)\(',
1236              r'\b[a-zA-Z0-9]+\s*&&\s*[a-zA-Z0-9]+\.set\([^,]+\s*,\s*(?P<sig>[a-zA-Z0-9$]+)\(',
1237              r'\bc\s*&&\s*a\.set\([^,]+\s*,\s*\([^)]*\)\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\(',
1238              r'\bc\s*&&\s*[a-zA-Z0-9]+\.set\([^,]+\s*,\s*\([^)]*\)\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\(',
1239              r'\bc\s*&&\s*[a-zA-Z0-9]+\.set\([^,]+\s*,\s*\([^)]*\)\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\('),
1240             jscode, 'Initial JS player signature function name', group='sig')
1241
1242         jsi = JSInterpreter(jscode)
1243         initial_function = jsi.extract_function(funcname)
1244         return lambda s: initial_function([s])
1245
1246     def _parse_sig_swf(self, file_contents):
1247         swfi = SWFInterpreter(file_contents)
1248         TARGET_CLASSNAME = 'SignatureDecipher'
1249         searched_class = swfi.extract_class(TARGET_CLASSNAME)
1250         initial_function = swfi.extract_function(searched_class, 'decipher')
1251         return lambda s: initial_function([s])
1252
1253     def _decrypt_signature(self, s, video_id, player_url, age_gate=False):
1254         """Turn the encrypted s field into a working signature"""
1255
1256         if player_url is None:
1257             raise ExtractorError('Cannot decrypt signature without player_url')
1258
1259         if player_url.startswith('//'):
1260             player_url = 'https:' + player_url
1261         elif not re.match(r'https?://', player_url):
1262             player_url = compat_urlparse.urljoin(
1263                 'https://www.youtube.com', player_url)
1264         try:
1265             player_id = (player_url, self._signature_cache_id(s))
1266             if player_id not in self._player_cache:
1267                 func = self._extract_signature_function(
1268                     video_id, player_url, s
1269                 )
1270                 self._player_cache[player_id] = func
1271             func = self._player_cache[player_id]
1272             if self._downloader.params.get('youtube_print_sig_code'):
1273                 self._print_sig_code(func, s)
1274             return func(s)
1275         except Exception as e:
1276             tb = traceback.format_exc()
1277             raise ExtractorError(
1278                 'Signature extraction failed: ' + tb, cause=e)
1279
1280     def _get_subtitles(self, video_id, webpage, has_live_chat_replay):
1281         try:
1282             subs_doc = self._download_xml(
1283                 'https://video.google.com/timedtext?hl=en&type=list&v=%s' % video_id,
1284                 video_id, note=False)
1285         except ExtractorError as err:
1286             self._downloader.report_warning('unable to download video subtitles: %s' % error_to_compat_str(err))
1287             return {}
1288
1289         sub_lang_list = {}
1290         for track in subs_doc.findall('track'):
1291             lang = track.attrib['lang_code']
1292             if lang in sub_lang_list:
1293                 continue
1294             sub_formats = []
1295             for ext in self._SUBTITLE_FORMATS:
1296                 params = compat_urllib_parse_urlencode({
1297                     'lang': lang,
1298                     'v': video_id,
1299                     'fmt': ext,
1300                     'name': track.attrib['name'].encode('utf-8'),
1301                 })
1302                 sub_formats.append({
1303                     'url': 'https://www.youtube.com/api/timedtext?' + params,
1304                     'ext': ext,
1305                 })
1306             sub_lang_list[lang] = sub_formats
1307         if has_live_chat_replay:
1308             sub_lang_list['live_chat'] = [
1309                 {
1310                     'video_id': video_id,
1311                     'ext': 'json',
1312                     'protocol': 'youtube_live_chat_replay',
1313                 },
1314             ]
1315         if not sub_lang_list:
1316             self._downloader.report_warning('video doesn\'t have subtitles')
1317             return {}
1318         return sub_lang_list
1319
1320     def _get_ytplayer_config(self, video_id, webpage):
1321         patterns = (
1322             # User data may contain arbitrary character sequences that may affect
1323             # JSON extraction with regex, e.g. when '};' is contained the second
1324             # regex won't capture the whole JSON. Yet working around by trying more
1325             # concrete regex first keeping in mind proper quoted string handling
1326             # to be implemented in future that will replace this workaround (see
1327             # https://github.com/ytdl-org/youtube-dl/issues/7468,
1328             # https://github.com/ytdl-org/youtube-dl/pull/7599)
1329             r';ytplayer\.config\s*=\s*({.+?});ytplayer',
1330             r';ytplayer\.config\s*=\s*({.+?});',
1331         )
1332         config = self._search_regex(
1333             patterns, webpage, 'ytplayer.config', default=None)
1334         if config:
1335             return self._parse_json(
1336                 uppercase_escape(config), video_id, fatal=False)
1337
1338     def _get_automatic_captions(self, video_id, webpage):
1339         """We need the webpage for getting the captions url, pass it as an
1340            argument to speed up the process."""
1341         self.to_screen('%s: Looking for automatic captions' % video_id)
1342         player_config = self._get_ytplayer_config(video_id, webpage)
1343         err_msg = 'Couldn\'t find automatic captions for %s' % video_id
1344         if not player_config:
1345             self._downloader.report_warning(err_msg)
1346             return {}
1347         try:
1348             args = player_config['args']
1349             caption_url = args.get('ttsurl')
1350             if caption_url:
1351                 timestamp = args['timestamp']
1352                 # We get the available subtitles
1353                 list_params = compat_urllib_parse_urlencode({
1354                     'type': 'list',
1355                     'tlangs': 1,
1356                     'asrs': 1,
1357                 })
1358                 list_url = caption_url + '&' + list_params
1359                 caption_list = self._download_xml(list_url, video_id)
1360                 original_lang_node = caption_list.find('track')
1361                 if original_lang_node is None:
1362                     self._downloader.report_warning('Video doesn\'t have automatic captions')
1363                     return {}
1364                 original_lang = original_lang_node.attrib['lang_code']
1365                 caption_kind = original_lang_node.attrib.get('kind', '')
1366
1367                 sub_lang_list = {}
1368                 for lang_node in caption_list.findall('target'):
1369                     sub_lang = lang_node.attrib['lang_code']
1370                     sub_formats = []
1371                     for ext in self._SUBTITLE_FORMATS:
1372                         params = compat_urllib_parse_urlencode({
1373                             'lang': original_lang,
1374                             'tlang': sub_lang,
1375                             'fmt': ext,
1376                             'ts': timestamp,
1377                             'kind': caption_kind,
1378                         })
1379                         sub_formats.append({
1380                             'url': caption_url + '&' + params,
1381                             'ext': ext,
1382                         })
1383                     sub_lang_list[sub_lang] = sub_formats
1384                 return sub_lang_list
1385
1386             def make_captions(sub_url, sub_langs):
1387                 parsed_sub_url = compat_urllib_parse_urlparse(sub_url)
1388                 caption_qs = compat_parse_qs(parsed_sub_url.query)
1389                 captions = {}
1390                 for sub_lang in sub_langs:
1391                     sub_formats = []
1392                     for ext in self._SUBTITLE_FORMATS:
1393                         caption_qs.update({
1394                             'tlang': [sub_lang],
1395                             'fmt': [ext],
1396                         })
1397                         sub_url = compat_urlparse.urlunparse(parsed_sub_url._replace(
1398                             query=compat_urllib_parse_urlencode(caption_qs, True)))
1399                         sub_formats.append({
1400                             'url': sub_url,
1401                             'ext': ext,
1402                         })
1403                     captions[sub_lang] = sub_formats
1404                 return captions
1405
1406             # New captions format as of 22.06.2017
1407             player_response = args.get('player_response')
1408             if player_response and isinstance(player_response, compat_str):
1409                 player_response = self._parse_json(
1410                     player_response, video_id, fatal=False)
1411                 if player_response:
1412                     renderer = player_response['captions']['playerCaptionsTracklistRenderer']
1413                     base_url = renderer['captionTracks'][0]['baseUrl']
1414                     sub_lang_list = []
1415                     for lang in renderer['translationLanguages']:
1416                         lang_code = lang.get('languageCode')
1417                         if lang_code:
1418                             sub_lang_list.append(lang_code)
1419                     return make_captions(base_url, sub_lang_list)
1420
1421             # Some videos don't provide ttsurl but rather caption_tracks and
1422             # caption_translation_languages (e.g. 20LmZk1hakA)
1423             # Does not used anymore as of 22.06.2017
1424             caption_tracks = args['caption_tracks']
1425             caption_translation_languages = args['caption_translation_languages']
1426             caption_url = compat_parse_qs(caption_tracks.split(',')[0])['u'][0]
1427             sub_lang_list = []
1428             for lang in caption_translation_languages.split(','):
1429                 lang_qs = compat_parse_qs(compat_urllib_parse_unquote_plus(lang))
1430                 sub_lang = lang_qs.get('lc', [None])[0]
1431                 if sub_lang:
1432                     sub_lang_list.append(sub_lang)
1433             return make_captions(caption_url, sub_lang_list)
1434         # An extractor error can be raise by the download process if there are
1435         # no automatic captions but there are subtitles
1436         except (KeyError, IndexError, ExtractorError):
1437             self._downloader.report_warning(err_msg)
1438             return {}
1439
1440     def _mark_watched(self, video_id, video_info, player_response):
1441         playback_url = url_or_none(try_get(
1442             player_response,
1443             lambda x: x['playbackTracking']['videostatsPlaybackUrl']['baseUrl']) or try_get(
1444             video_info, lambda x: x['videostats_playback_base_url'][0]))
1445         if not playback_url:
1446             return
1447         parsed_playback_url = compat_urlparse.urlparse(playback_url)
1448         qs = compat_urlparse.parse_qs(parsed_playback_url.query)
1449
1450         # cpn generation algorithm is reverse engineered from base.js.
1451         # In fact it works even with dummy cpn.
1452         CPN_ALPHABET = 'abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789-_'
1453         cpn = ''.join((CPN_ALPHABET[random.randint(0, 256) & 63] for _ in range(0, 16)))
1454
1455         qs.update({
1456             'ver': ['2'],
1457             'cpn': [cpn],
1458         })
1459         playback_url = compat_urlparse.urlunparse(
1460             parsed_playback_url._replace(query=compat_urllib_parse_urlencode(qs, True)))
1461
1462         self._download_webpage(
1463             playback_url, video_id, 'Marking watched',
1464             'Unable to mark watched', fatal=False)
1465
1466     @staticmethod
1467     def _extract_urls(webpage):
1468         # Embedded YouTube player
1469         entries = [
1470             unescapeHTML(mobj.group('url'))
1471             for mobj in re.finditer(r'''(?x)
1472             (?:
1473                 <iframe[^>]+?src=|
1474                 data-video-url=|
1475                 <embed[^>]+?src=|
1476                 embedSWF\(?:\s*|
1477                 <object[^>]+data=|
1478                 new\s+SWFObject\(
1479             )
1480             (["\'])
1481                 (?P<url>(?:https?:)?//(?:www\.)?youtube(?:-nocookie)?\.com/
1482                 (?:embed|v|p)/[0-9A-Za-z_-]{11}.*?)
1483             \1''', webpage)]
1484
1485         # lazyYT YouTube embed
1486         entries.extend(list(map(
1487             unescapeHTML,
1488             re.findall(r'class="lazyYT" data-youtube-id="([^"]+)"', webpage))))
1489
1490         # Wordpress "YouTube Video Importer" plugin
1491         matches = re.findall(r'''(?x)<div[^>]+
1492             class=(?P<q1>[\'"])[^\'"]*\byvii_single_video_player\b[^\'"]*(?P=q1)[^>]+
1493             data-video_id=(?P<q2>[\'"])([^\'"]+)(?P=q2)''', webpage)
1494         entries.extend(m[-1] for m in matches)
1495
1496         return entries
1497
1498     @staticmethod
1499     def _extract_url(webpage):
1500         urls = YoutubeIE._extract_urls(webpage)
1501         return urls[0] if urls else None
1502
1503     @classmethod
1504     def extract_id(cls, url):
1505         mobj = re.match(cls._VALID_URL, url, re.VERBOSE)
1506         if mobj is None:
1507             raise ExtractorError('Invalid URL: %s' % url)
1508         video_id = mobj.group(2)
1509         return video_id
1510
1511     def _extract_chapters_from_json(self, webpage, video_id, duration):
1512         if not webpage:
1513             return
1514         data = self._extract_yt_initial_data(video_id, webpage)
1515         if not data or not isinstance(data, dict):
1516             return
1517         chapters_list = try_get(
1518             data,
1519             lambda x: x['playerOverlays']
1520                        ['playerOverlayRenderer']
1521                        ['decoratedPlayerBarRenderer']
1522                        ['decoratedPlayerBarRenderer']
1523                        ['playerBar']
1524                        ['chapteredPlayerBarRenderer']
1525                        ['chapters'],
1526             list)
1527         if not chapters_list:
1528             return
1529
1530         def chapter_time(chapter):
1531             return float_or_none(
1532                 try_get(
1533                     chapter,
1534                     lambda x: x['chapterRenderer']['timeRangeStartMillis'],
1535                     int),
1536                 scale=1000)
1537         chapters = []
1538         for next_num, chapter in enumerate(chapters_list, start=1):
1539             start_time = chapter_time(chapter)
1540             if start_time is None:
1541                 continue
1542             end_time = (chapter_time(chapters_list[next_num])
1543                         if next_num < len(chapters_list) else duration)
1544             if end_time is None:
1545                 continue
1546             title = try_get(
1547                 chapter, lambda x: x['chapterRenderer']['title']['simpleText'],
1548                 compat_str)
1549             chapters.append({
1550                 'start_time': start_time,
1551                 'end_time': end_time,
1552                 'title': title,
1553             })
1554         return chapters
1555
1556     @staticmethod
1557     def _extract_chapters_from_description(description, duration):
1558         if not description:
1559             return None
1560         chapter_lines = re.findall(
1561             r'(?:^|<br\s*/>)([^<]*<a[^>]+onclick=["\']yt\.www\.watch\.player\.seekTo[^>]+>(\d{1,2}:\d{1,2}(?::\d{1,2})?)</a>[^>]*)(?=$|<br\s*/>)',
1562             description)
1563         if not chapter_lines:
1564             return None
1565         chapters = []
1566         for next_num, (chapter_line, time_point) in enumerate(
1567                 chapter_lines, start=1):
1568             start_time = parse_duration(time_point)
1569             if start_time is None:
1570                 continue
1571             if start_time > duration:
1572                 break
1573             end_time = (duration if next_num == len(chapter_lines)
1574                         else parse_duration(chapter_lines[next_num][1]))
1575             if end_time is None:
1576                 continue
1577             if end_time > duration:
1578                 end_time = duration
1579             if start_time > end_time:
1580                 break
1581             chapter_title = re.sub(
1582                 r'<a[^>]+>[^<]+</a>', '', chapter_line).strip(' \t-')
1583             chapter_title = re.sub(r'\s+', ' ', chapter_title)
1584             chapters.append({
1585                 'start_time': start_time,
1586                 'end_time': end_time,
1587                 'title': chapter_title,
1588             })
1589         return chapters
1590
1591     def _extract_chapters(self, webpage, description, video_id, duration):
1592         return (self._extract_chapters_from_json(webpage, video_id, duration)
1593                 or self._extract_chapters_from_description(description, duration))
1594
1595     def _real_extract(self, url):
1596         url, smuggled_data = unsmuggle_url(url, {})
1597
1598         proto = (
1599             'http' if self._downloader.params.get('prefer_insecure', False)
1600             else 'https')
1601
1602         start_time = None
1603         end_time = None
1604         parsed_url = compat_urllib_parse_urlparse(url)
1605         for component in [parsed_url.fragment, parsed_url.query]:
1606             query = compat_parse_qs(component)
1607             if start_time is None and 't' in query:
1608                 start_time = parse_duration(query['t'][0])
1609             if start_time is None and 'start' in query:
1610                 start_time = parse_duration(query['start'][0])
1611             if end_time is None and 'end' in query:
1612                 end_time = parse_duration(query['end'][0])
1613
1614         # Extract original video URL from URL with redirection, like age verification, using next_url parameter
1615         mobj = re.search(self._NEXT_URL_RE, url)
1616         if mobj:
1617             url = proto + '://www.youtube.com/' + compat_urllib_parse_unquote(mobj.group(1)).lstrip('/')
1618         video_id = self.extract_id(url)
1619
1620         # Get video webpage
1621         url = proto + '://www.youtube.com/watch?v=%s&gl=US&hl=en&has_verified=1&bpctr=9999999999' % video_id
1622         video_webpage, urlh = self._download_webpage_handle(url, video_id)
1623
1624         qs = compat_parse_qs(compat_urllib_parse_urlparse(urlh.geturl()).query)
1625         video_id = qs.get('v', [None])[0] or video_id
1626
1627         # Attempt to extract SWF player URL
1628         mobj = re.search(r'swfConfig.*?"(https?:\\/\\/.*?watch.*?-.*?\.swf)"', video_webpage)
1629         if mobj is not None:
1630             player_url = re.sub(r'\\(.)', r'\1', mobj.group(1))
1631         else:
1632             player_url = None
1633
1634         dash_mpds = []
1635
1636         def add_dash_mpd(video_info):
1637             dash_mpd = video_info.get('dashmpd')
1638             if dash_mpd and dash_mpd[0] not in dash_mpds:
1639                 dash_mpds.append(dash_mpd[0])
1640
1641         def add_dash_mpd_pr(pl_response):
1642             dash_mpd = url_or_none(try_get(
1643                 pl_response, lambda x: x['streamingData']['dashManifestUrl'],
1644                 compat_str))
1645             if dash_mpd and dash_mpd not in dash_mpds:
1646                 dash_mpds.append(dash_mpd)
1647
1648         is_live = None
1649         view_count = None
1650
1651         def extract_view_count(v_info):
1652             return int_or_none(try_get(v_info, lambda x: x['view_count'][0]))
1653
1654         def extract_player_response(player_response, video_id):
1655             pl_response = str_or_none(player_response)
1656             if not pl_response:
1657                 return
1658             pl_response = self._parse_json(pl_response, video_id, fatal=False)
1659             if isinstance(pl_response, dict):
1660                 add_dash_mpd_pr(pl_response)
1661                 return pl_response
1662
1663         def extract_embedded_config(embed_webpage, video_id):
1664             embedded_config = self._search_regex(
1665                 r'setConfig\(({.*})\);',
1666                 embed_webpage, 'ytInitialData', default=None)
1667             if embedded_config:
1668                 return embedded_config
1669
1670         player_response = {}
1671
1672         # Get video info
1673         video_info = {}
1674         embed_webpage = None
1675         if (self._og_search_property('restrictions:age', video_webpage, default=None) == '18+'
1676                 or re.search(r'player-age-gate-content">', video_webpage) is not None):
1677             cookie_keys = self._get_cookies('https://www.youtube.com').keys()
1678             age_gate = True
1679             # We simulate the access to the video from www.youtube.com/v/{video_id}
1680             # this can be viewed without login into Youtube
1681             url = proto + '://www.youtube.com/embed/%s' % video_id
1682             embed_webpage = self._download_webpage(url, video_id, 'Downloading embed webpage')
1683             ext = extract_embedded_config(embed_webpage, video_id)
1684             # playabilityStatus = re.search(r'{\\\"status\\\":\\\"(?P<playabilityStatus>[^\"]+)\\\"', ext)
1685             playable_in_embed = re.search(r'{\\\"playableInEmbed\\\":(?P<playableinEmbed>[^\,]+)', ext)
1686             if not playable_in_embed:
1687                 self.to_screen('Could not determine whether playabale in embed for video %s' % video_id)
1688                 playable_in_embed = ''
1689             else:
1690                 playable_in_embed = playable_in_embed.group('playableinEmbed')
1691             # check if video is only playable on youtube in other words not playable in embed - if so it requires auth (cookies)
1692             # if re.search(r'player-unavailable">', embed_webpage) is not None:
1693             if playable_in_embed == 'false':
1694                 '''
1695                 # TODO apply this patch when Support for Python 2.6(!) and above drops
1696                 if ({'VISITOR_INFO1_LIVE', 'HSID', 'SSID', 'SID'} <= cookie_keys
1697                         or {'VISITOR_INFO1_LIVE', '__Secure-3PSID', 'LOGIN_INFO'} <= cookie_keys):
1698                 '''
1699                 if (set(('VISITOR_INFO1_LIVE', 'HSID', 'SSID', 'SID')) <= set(cookie_keys)
1700                         or set(('VISITOR_INFO1_LIVE', '__Secure-3PSID', 'LOGIN_INFO')) <= set(cookie_keys)):
1701                     age_gate = False
1702                     # Try looking directly into the video webpage
1703                     ytplayer_config = self._get_ytplayer_config(video_id, video_webpage)
1704                     if ytplayer_config:
1705                         args = ytplayer_config.get("args")
1706                         if args is not None:
1707                             if args.get('url_encoded_fmt_stream_map') or args.get('hlsvp'):
1708                                 # Convert to the same format returned by compat_parse_qs
1709                                 video_info = dict((k, [v]) for k, v in args.items())
1710                                 add_dash_mpd(video_info)
1711                             # Rental video is not rented but preview is available (e.g.
1712                             # https://www.youtube.com/watch?v=yYr8q0y5Jfg,
1713                             # https://github.com/ytdl-org/youtube-dl/issues/10532)
1714                             if not video_info and args.get('ypc_vid'):
1715                                 return self.url_result(
1716                                     args['ypc_vid'], YoutubeIE.ie_key(), video_id=args['ypc_vid'])
1717                             if args.get('livestream') == '1' or args.get('live_playback') == 1:
1718                                 is_live = True
1719                             if not player_response:
1720                                 player_response = extract_player_response(args.get('player_response'), video_id)
1721                         elif not player_response:
1722                             player_response = ytplayer_config
1723                     if not video_info or self._downloader.params.get('youtube_include_dash_manifest', True):
1724                         add_dash_mpd_pr(player_response)
1725                 else:
1726                     raise ExtractorError('Video is age restricted and only playable on Youtube. Requires cookies!', expected=True)
1727             else:
1728                 data = compat_urllib_parse_urlencode({
1729                     'video_id': video_id,
1730                     'eurl': 'https://youtube.googleapis.com/v/' + video_id,
1731                     'sts': self._search_regex(
1732                         r'"sts"\s*:\s*(\d+)', embed_webpage, 'sts', default=''),
1733                 })
1734                 video_info_url = proto + '://www.youtube.com/get_video_info?' + data
1735                 try:
1736                     video_info_webpage = self._download_webpage(
1737                         video_info_url, video_id,
1738                         note='Refetching age-gated info webpage',
1739                         errnote='unable to download video info webpage')
1740                 except ExtractorError:
1741                     video_info_webpage = None
1742                 if video_info_webpage:
1743                     video_info = compat_parse_qs(video_info_webpage)
1744                     pl_response = video_info.get('player_response', [None])[0]
1745                     player_response = extract_player_response(pl_response, video_id)
1746                     add_dash_mpd(video_info)
1747                     view_count = extract_view_count(video_info)
1748         else:
1749             age_gate = False
1750             # Try looking directly into the video webpage
1751             ytplayer_config = self._get_ytplayer_config(video_id, video_webpage)
1752             if ytplayer_config:
1753                 args = ytplayer_config.get('args', {})
1754                 if args.get('url_encoded_fmt_stream_map') or args.get('hlsvp'):
1755                     # Convert to the same format returned by compat_parse_qs
1756                     video_info = dict((k, [v]) for k, v in args.items())
1757                     add_dash_mpd(video_info)
1758                 # Rental video is not rented but preview is available (e.g.
1759                 # https://www.youtube.com/watch?v=yYr8q0y5Jfg,
1760                 # https://github.com/ytdl-org/youtube-dl/issues/10532)
1761                 if not video_info and args.get('ypc_vid'):
1762                     return self.url_result(
1763                         args['ypc_vid'], YoutubeIE.ie_key(), video_id=args['ypc_vid'])
1764                 if args.get('livestream') == '1' or args.get('live_playback') == 1:
1765                     is_live = True
1766                 if not player_response:
1767                     player_response = extract_player_response(args.get('player_response'), video_id)
1768             if not video_info or self._downloader.params.get('youtube_include_dash_manifest', True):
1769                 add_dash_mpd_pr(player_response)
1770
1771         if not video_info and not player_response:
1772             player_response = extract_player_response(
1773                 self._search_regex(
1774                     r'ytInitialPlayerResponse\s*=\s*({.+?})\s*;', video_webpage,
1775                     'initial player response', default='{}'),
1776                 video_id)
1777
1778         def extract_unavailable_message():
1779             messages = []
1780             for tag, kind in (('h1', 'message'), ('div', 'submessage')):
1781                 msg = self._html_search_regex(
1782                     r'(?s)<{tag}[^>]+id=["\']unavailable-{kind}["\'][^>]*>(.+?)</{tag}>'.format(tag=tag, kind=kind),
1783                     video_webpage, 'unavailable %s' % kind, default=None)
1784                 if msg:
1785                     messages.append(msg)
1786             if messages:
1787                 return '\n'.join(messages)
1788
1789         if not video_info and not player_response:
1790             unavailable_message = extract_unavailable_message()
1791             if not unavailable_message:
1792                 unavailable_message = 'Unable to extract video data'
1793             raise ExtractorError(
1794                 'YouTube said: %s' % unavailable_message, expected=True, video_id=video_id)
1795
1796         if not isinstance(video_info, dict):
1797             video_info = {}
1798
1799         video_details = try_get(
1800             player_response, lambda x: x['videoDetails'], dict) or {}
1801
1802         microformat = try_get(
1803             player_response, lambda x: x['microformat']['playerMicroformatRenderer'], dict) or {}
1804
1805         video_title = video_info.get('title', [None])[0] or video_details.get('title')
1806         if not video_title:
1807             self._downloader.report_warning('Unable to extract video title')
1808             video_title = '_'
1809
1810         description_original = video_description = get_element_by_id("eow-description", video_webpage)
1811         if video_description:
1812
1813             def replace_url(m):
1814                 redir_url = compat_urlparse.urljoin(url, m.group(1))
1815                 parsed_redir_url = compat_urllib_parse_urlparse(redir_url)
1816                 if re.search(r'^(?:www\.)?(?:youtube(?:-nocookie)?\.com|youtu\.be)$', parsed_redir_url.netloc) and parsed_redir_url.path == '/redirect':
1817                     qs = compat_parse_qs(parsed_redir_url.query)
1818                     q = qs.get('q')
1819                     if q and q[0]:
1820                         return q[0]
1821                 return redir_url
1822
1823             description_original = video_description = re.sub(r'''(?x)
1824                 <a\s+
1825                     (?:[a-zA-Z-]+="[^"]*"\s+)*?
1826                     (?:title|href)="([^"]+)"\s+
1827                     (?:[a-zA-Z-]+="[^"]*"\s+)*?
1828                     class="[^"]*"[^>]*>
1829                 [^<]+\.{3}\s*
1830                 </a>
1831             ''', replace_url, video_description)
1832             video_description = clean_html(video_description)
1833         else:
1834             video_description = video_details.get('shortDescription')
1835             if video_description is None:
1836                 video_description = self._html_search_meta('description', video_webpage)
1837
1838         if not smuggled_data.get('force_singlefeed', False):
1839             if not self._downloader.params.get('noplaylist'):
1840                 multifeed_metadata_list = try_get(
1841                     player_response,
1842                     lambda x: x['multicamera']['playerLegacyMulticameraRenderer']['metadataList'],
1843                     compat_str) or try_get(
1844                     video_info, lambda x: x['multifeed_metadata_list'][0], compat_str)
1845                 if multifeed_metadata_list:
1846                     entries = []
1847                     feed_ids = []
1848                     for feed in multifeed_metadata_list.split(','):
1849                         # Unquote should take place before split on comma (,) since textual
1850                         # fields may contain comma as well (see
1851                         # https://github.com/ytdl-org/youtube-dl/issues/8536)
1852                         feed_data = compat_parse_qs(compat_urllib_parse_unquote_plus(feed))
1853
1854                         def feed_entry(name):
1855                             return try_get(feed_data, lambda x: x[name][0], compat_str)
1856
1857                         feed_id = feed_entry('id')
1858                         if not feed_id:
1859                             continue
1860                         feed_title = feed_entry('title')
1861                         title = video_title
1862                         if feed_title:
1863                             title += ' (%s)' % feed_title
1864                         entries.append({
1865                             '_type': 'url_transparent',
1866                             'ie_key': 'Youtube',
1867                             'url': smuggle_url(
1868                                 '%s://www.youtube.com/watch?v=%s' % (proto, feed_data['id'][0]),
1869                                 {'force_singlefeed': True}),
1870                             'title': title,
1871                         })
1872                         feed_ids.append(feed_id)
1873                     self.to_screen(
1874                         'Downloading multifeed video (%s) - add --no-playlist to just download video %s'
1875                         % (', '.join(feed_ids), video_id))
1876                     return self.playlist_result(entries, video_id, video_title, video_description)
1877             else:
1878                 self.to_screen('Downloading just video %s because of --no-playlist' % video_id)
1879
1880         if view_count is None:
1881             view_count = extract_view_count(video_info)
1882         if view_count is None and video_details:
1883             view_count = int_or_none(video_details.get('viewCount'))
1884         if view_count is None and microformat:
1885             view_count = int_or_none(microformat.get('viewCount'))
1886
1887         if is_live is None:
1888             is_live = bool_or_none(video_details.get('isLive'))
1889
1890         has_live_chat_replay = False
1891         if not is_live:
1892             yt_initial_data = self._get_yt_initial_data(video_id, video_webpage)
1893             try:
1894                 yt_initial_data['contents']['twoColumnWatchNextResults']['conversationBar']['liveChatRenderer']['continuations'][0]['reloadContinuationData']['continuation']
1895                 has_live_chat_replay = True
1896             except (KeyError, IndexError, TypeError):
1897                 pass
1898
1899         # Check for "rental" videos
1900         if 'ypc_video_rental_bar_text' in video_info and 'author' not in video_info:
1901             raise ExtractorError('"rental" videos not supported. See https://github.com/ytdl-org/youtube-dl/issues/359 for more information.', expected=True)
1902
1903         def _extract_filesize(media_url):
1904             return int_or_none(self._search_regex(
1905                 r'\bclen[=/](\d+)', media_url, 'filesize', default=None))
1906
1907         streaming_formats = try_get(player_response, lambda x: x['streamingData']['formats'], list) or []
1908         streaming_formats.extend(try_get(player_response, lambda x: x['streamingData']['adaptiveFormats'], list) or [])
1909
1910         if 'conn' in video_info and video_info['conn'][0].startswith('rtmp'):
1911             self.report_rtmp_download()
1912             formats = [{
1913                 'format_id': '_rtmp',
1914                 'protocol': 'rtmp',
1915                 'url': video_info['conn'][0],
1916                 'player_url': player_url,
1917             }]
1918         elif not is_live and (streaming_formats or len(video_info.get('url_encoded_fmt_stream_map', [''])[0]) >= 1 or len(video_info.get('adaptive_fmts', [''])[0]) >= 1):
1919             encoded_url_map = video_info.get('url_encoded_fmt_stream_map', [''])[0] + ',' + video_info.get('adaptive_fmts', [''])[0]
1920             if 'rtmpe%3Dyes' in encoded_url_map:
1921                 raise ExtractorError('rtmpe downloads are not supported, see https://github.com/ytdl-org/youtube-dl/issues/343 for more information.', expected=True)
1922             formats = []
1923             formats_spec = {}
1924             fmt_list = video_info.get('fmt_list', [''])[0]
1925             if fmt_list:
1926                 for fmt in fmt_list.split(','):
1927                     spec = fmt.split('/')
1928                     if len(spec) > 1:
1929                         width_height = spec[1].split('x')
1930                         if len(width_height) == 2:
1931                             formats_spec[spec[0]] = {
1932                                 'resolution': spec[1],
1933                                 'width': int_or_none(width_height[0]),
1934                                 'height': int_or_none(width_height[1]),
1935                             }
1936             for fmt in streaming_formats:
1937                 itag = str_or_none(fmt.get('itag'))
1938                 if not itag:
1939                     continue
1940                 quality = fmt.get('quality')
1941                 quality_label = fmt.get('qualityLabel') or quality
1942                 formats_spec[itag] = {
1943                     'asr': int_or_none(fmt.get('audioSampleRate')),
1944                     'filesize': int_or_none(fmt.get('contentLength')),
1945                     'format_note': quality_label,
1946                     'fps': int_or_none(fmt.get('fps')),
1947                     'height': int_or_none(fmt.get('height')),
1948                     # bitrate for itag 43 is always 2147483647
1949                     'tbr': float_or_none(fmt.get('averageBitrate') or fmt.get('bitrate'), 1000) if itag != '43' else None,
1950                     'width': int_or_none(fmt.get('width')),
1951                 }
1952
1953             for fmt in streaming_formats:
1954                 if fmt.get('drmFamilies') or fmt.get('drm_families'):
1955                     continue
1956                 url = url_or_none(fmt.get('url'))
1957
1958                 if not url:
1959                     cipher = fmt.get('cipher') or fmt.get('signatureCipher')
1960                     if not cipher:
1961                         continue
1962                     url_data = compat_parse_qs(cipher)
1963                     url = url_or_none(try_get(url_data, lambda x: x['url'][0], compat_str))
1964                     if not url:
1965                         continue
1966                 else:
1967                     cipher = None
1968                     url_data = compat_parse_qs(compat_urllib_parse_urlparse(url).query)
1969
1970                 stream_type = int_or_none(try_get(url_data, lambda x: x['stream_type'][0]))
1971                 # Unsupported FORMAT_STREAM_TYPE_OTF
1972                 if stream_type == 3:
1973                     continue
1974
1975                 format_id = fmt.get('itag') or url_data['itag'][0]
1976                 if not format_id:
1977                     continue
1978                 format_id = compat_str(format_id)
1979
1980                 if cipher:
1981                     if 's' in url_data or self._downloader.params.get('youtube_include_dash_manifest', True):
1982                         ASSETS_RE = (
1983                             r'<script[^>]+\bsrc=("[^"]+")[^>]+\bname=["\']player_ias/base',
1984                             r'"jsUrl"\s*:\s*("[^"]+")',
1985                             r'"assets":.+?"js":\s*("[^"]+")')
1986                         jsplayer_url_json = self._search_regex(
1987                             ASSETS_RE,
1988                             embed_webpage if age_gate else video_webpage,
1989                             'JS player URL (1)', default=None)
1990                         if not jsplayer_url_json and not age_gate:
1991                             # We need the embed website after all
1992                             if embed_webpage is None:
1993                                 embed_url = proto + '://www.youtube.com/embed/%s' % video_id
1994                                 embed_webpage = self._download_webpage(
1995                                     embed_url, video_id, 'Downloading embed webpage')
1996                             jsplayer_url_json = self._search_regex(
1997                                 ASSETS_RE, embed_webpage, 'JS player URL')
1998
1999                         player_url = json.loads(jsplayer_url_json)
2000                         if player_url is None:
2001                             player_url_json = self._search_regex(
2002                                 r'ytplayer\.config.*?"url"\s*:\s*("[^"]+")',
2003                                 video_webpage, 'age gate player URL')
2004                             player_url = json.loads(player_url_json)
2005
2006                     if 'sig' in url_data:
2007                         url += '&signature=' + url_data['sig'][0]
2008                     elif 's' in url_data:
2009                         encrypted_sig = url_data['s'][0]
2010
2011                         if self._downloader.params.get('verbose'):
2012                             if player_url is None:
2013                                 player_desc = 'unknown'
2014                             else:
2015                                 player_type, player_version = self._extract_player_info(player_url)
2016                                 player_desc = '%s player %s' % ('flash' if player_type == 'swf' else 'html5', player_version)
2017                             parts_sizes = self._signature_cache_id(encrypted_sig)
2018                             self.to_screen('{%s} signature length %s, %s' %
2019                                            (format_id, parts_sizes, player_desc))
2020
2021                         signature = self._decrypt_signature(
2022                             encrypted_sig, video_id, player_url, age_gate)
2023                         sp = try_get(url_data, lambda x: x['sp'][0], compat_str) or 'signature'
2024                         url += '&%s=%s' % (sp, signature)
2025                 if 'ratebypass' not in url:
2026                     url += '&ratebypass=yes'
2027
2028                 dct = {
2029                     'format_id': format_id,
2030                     'url': url,
2031                     'player_url': player_url,
2032                 }
2033                 if format_id in self._formats:
2034                     dct.update(self._formats[format_id])
2035                 if format_id in formats_spec:
2036                     dct.update(formats_spec[format_id])
2037
2038                 # Some itags are not included in DASH manifest thus corresponding formats will
2039                 # lack metadata (see https://github.com/ytdl-org/youtube-dl/pull/5993).
2040                 # Trying to extract metadata from url_encoded_fmt_stream_map entry.
2041                 mobj = re.search(r'^(?P<width>\d+)[xX](?P<height>\d+)$', url_data.get('size', [''])[0])
2042                 width, height = (int(mobj.group('width')), int(mobj.group('height'))) if mobj else (None, None)
2043
2044                 if width is None:
2045                     width = int_or_none(fmt.get('width'))
2046                 if height is None:
2047                     height = int_or_none(fmt.get('height'))
2048
2049                 filesize = int_or_none(url_data.get(
2050                     'clen', [None])[0]) or _extract_filesize(url)
2051
2052                 quality = url_data.get('quality', [None])[0] or fmt.get('quality')
2053                 quality_label = url_data.get('quality_label', [None])[0] or fmt.get('qualityLabel')
2054
2055                 tbr = (float_or_none(url_data.get('bitrate', [None])[0], 1000)
2056                        or float_or_none(fmt.get('bitrate'), 1000)) if format_id != '43' else None
2057                 fps = int_or_none(url_data.get('fps', [None])[0]) or int_or_none(fmt.get('fps'))
2058
2059                 more_fields = {
2060                     'filesize': filesize,
2061                     'tbr': tbr,
2062                     'width': width,
2063                     'height': height,
2064                     'fps': fps,
2065                     'format_note': quality_label or quality,
2066                 }
2067                 for key, value in more_fields.items():
2068                     if value:
2069                         dct[key] = value
2070                 type_ = url_data.get('type', [None])[0] or fmt.get('mimeType')
2071                 if type_:
2072                     type_split = type_.split(';')
2073                     kind_ext = type_split[0].split('/')
2074                     if len(kind_ext) == 2:
2075                         kind, _ = kind_ext
2076                         dct['ext'] = mimetype2ext(type_split[0])
2077                         if kind in ('audio', 'video'):
2078                             codecs = None
2079                             for mobj in re.finditer(
2080                                     r'(?P<key>[a-zA-Z_-]+)=(?P<quote>["\']?)(?P<val>.+?)(?P=quote)(?:;|$)', type_):
2081                                 if mobj.group('key') == 'codecs':
2082                                     codecs = mobj.group('val')
2083                                     break
2084                             if codecs:
2085                                 dct.update(parse_codecs(codecs))
2086                 if dct.get('acodec') == 'none' or dct.get('vcodec') == 'none':
2087                     dct['downloader_options'] = {
2088                         # Youtube throttles chunks >~10M
2089                         'http_chunk_size': 10485760,
2090                     }
2091                 formats.append(dct)
2092         else:
2093             manifest_url = (
2094                 url_or_none(try_get(
2095                     player_response,
2096                     lambda x: x['streamingData']['hlsManifestUrl'],
2097                     compat_str))
2098                 or url_or_none(try_get(
2099                     video_info, lambda x: x['hlsvp'][0], compat_str)))
2100             if manifest_url:
2101                 formats = []
2102                 m3u8_formats = self._extract_m3u8_formats(
2103                     manifest_url, video_id, 'mp4', fatal=False)
2104                 for a_format in m3u8_formats:
2105                     itag = self._search_regex(
2106                         r'/itag/(\d+)/', a_format['url'], 'itag', default=None)
2107                     if itag:
2108                         a_format['format_id'] = itag
2109                         if itag in self._formats:
2110                             dct = self._formats[itag].copy()
2111                             dct.update(a_format)
2112                             a_format = dct
2113                     a_format['player_url'] = player_url
2114                     # Accept-Encoding header causes failures in live streams on Youtube and Youtube Gaming
2115                     a_format.setdefault('http_headers', {})['Youtubedl-no-compression'] = 'True'
2116                     if self._downloader.params.get('youtube_include_hls_manifest', True):
2117                         formats.append(a_format)
2118             else:
2119                 error_message = extract_unavailable_message()
2120                 if not error_message:
2121                     reason_list = try_get(
2122                         player_response,
2123                         lambda x: x['playabilityStatus']['errorScreen']['playerErrorMessageRenderer']['subreason']['runs'],
2124                         list) or []
2125                     for reason in reason_list:
2126                         if not isinstance(reason, dict):
2127                             continue
2128                         reason_text = try_get(reason, lambda x: x['text'], compat_str)
2129                         if reason_text:
2130                             if not error_message:
2131                                 error_message = ''
2132                             error_message += reason_text
2133                     if error_message:
2134                         error_message = clean_html(error_message)
2135                 if not error_message:
2136                     error_message = clean_html(try_get(
2137                         player_response, lambda x: x['playabilityStatus']['reason'],
2138                         compat_str))
2139                 if not error_message:
2140                     error_message = clean_html(
2141                         try_get(video_info, lambda x: x['reason'][0], compat_str))
2142                 if error_message:
2143                     raise ExtractorError(error_message, expected=True)
2144                 raise ExtractorError('no conn, hlsvp, hlsManifestUrl or url_encoded_fmt_stream_map information found in video info')
2145
2146         # uploader
2147         video_uploader = try_get(
2148             video_info, lambda x: x['author'][0],
2149             compat_str) or str_or_none(video_details.get('author'))
2150         if video_uploader:
2151             video_uploader = compat_urllib_parse_unquote_plus(video_uploader)
2152         else:
2153             self._downloader.report_warning('unable to extract uploader name')
2154
2155         # uploader_id
2156         video_uploader_id = None
2157         video_uploader_url = None
2158         mobj = re.search(
2159             r'<link itemprop="url" href="(?P<uploader_url>https?://www\.youtube\.com/(?:user|channel)/(?P<uploader_id>[^"]+))">',
2160             video_webpage)
2161         if mobj is not None:
2162             video_uploader_id = mobj.group('uploader_id')
2163             video_uploader_url = mobj.group('uploader_url')
2164         else:
2165             owner_profile_url = url_or_none(microformat.get('ownerProfileUrl'))
2166             if owner_profile_url:
2167                 video_uploader_id = self._search_regex(
2168                     r'(?:user|channel)/([^/]+)', owner_profile_url, 'uploader id',
2169                     default=None)
2170                 video_uploader_url = owner_profile_url
2171
2172         channel_id = (
2173             str_or_none(video_details.get('channelId'))
2174             or self._html_search_meta(
2175                 'channelId', video_webpage, 'channel id', default=None)
2176             or self._search_regex(
2177                 r'data-channel-external-id=(["\'])(?P<id>(?:(?!\1).)+)\1',
2178                 video_webpage, 'channel id', default=None, group='id'))
2179         channel_url = 'http://www.youtube.com/channel/%s' % channel_id if channel_id else None
2180
2181         thumbnails = []
2182         thumbnails_list = try_get(
2183             video_details, lambda x: x['thumbnail']['thumbnails'], list) or []
2184         for t in thumbnails_list:
2185             if not isinstance(t, dict):
2186                 continue
2187             thumbnail_url = url_or_none(t.get('url'))
2188             if not thumbnail_url:
2189                 continue
2190             thumbnails.append({
2191                 'url': thumbnail_url,
2192                 'width': int_or_none(t.get('width')),
2193                 'height': int_or_none(t.get('height')),
2194             })
2195
2196         if not thumbnails:
2197             video_thumbnail = None
2198             # We try first to get a high quality image:
2199             m_thumb = re.search(r'<span itemprop="thumbnail".*?href="(.*?)">',
2200                                 video_webpage, re.DOTALL)
2201             if m_thumb is not None:
2202                 video_thumbnail = m_thumb.group(1)
2203             thumbnail_url = try_get(video_info, lambda x: x['thumbnail_url'][0], compat_str)
2204             if thumbnail_url:
2205                 video_thumbnail = compat_urllib_parse_unquote_plus(thumbnail_url)
2206             if video_thumbnail:
2207                 thumbnails.append({'url': video_thumbnail})
2208
2209         # upload date
2210         upload_date = self._html_search_meta(
2211             'datePublished', video_webpage, 'upload date', default=None)
2212         if not upload_date:
2213             upload_date = self._search_regex(
2214                 [r'(?s)id="eow-date.*?>(.*?)</span>',
2215                  r'(?:id="watch-uploader-info".*?>.*?|["\']simpleText["\']\s*:\s*["\'])(?:Published|Uploaded|Streamed live|Started) on (.+?)[<"\']'],
2216                 video_webpage, 'upload date', default=None)
2217         if not upload_date:
2218             upload_date = microformat.get('publishDate') or microformat.get('uploadDate')
2219         upload_date = unified_strdate(upload_date)
2220
2221         video_license = self._html_search_regex(
2222             r'<h4[^>]+class="title"[^>]*>\s*License\s*</h4>\s*<ul[^>]*>\s*<li>(.+?)</li',
2223             video_webpage, 'license', default=None)
2224
2225         m_music = re.search(
2226             r'''(?x)
2227                 <h4[^>]+class="title"[^>]*>\s*Music\s*</h4>\s*
2228                 <ul[^>]*>\s*
2229                 <li>(?P<title>.+?)
2230                 by (?P<creator>.+?)
2231                 (?:
2232                     \(.+?\)|
2233                     <a[^>]*
2234                         (?:
2235                             \bhref=["\']/red[^>]*>|             # drop possible
2236                             >\s*Listen ad-free with YouTube Red # YouTube Red ad
2237                         )
2238                     .*?
2239                 )?</li
2240             ''',
2241             video_webpage)
2242         if m_music:
2243             video_alt_title = remove_quotes(unescapeHTML(m_music.group('title')))
2244             video_creator = clean_html(m_music.group('creator'))
2245         else:
2246             video_alt_title = video_creator = None
2247
2248         def extract_meta(field):
2249             return self._html_search_regex(
2250                 r'<h4[^>]+class="title"[^>]*>\s*%s\s*</h4>\s*<ul[^>]*>\s*<li>(.+?)</li>\s*' % field,
2251                 video_webpage, field, default=None)
2252
2253         track = extract_meta('Song')
2254         artist = extract_meta('Artist')
2255         album = extract_meta('Album')
2256
2257         # Youtube Music Auto-generated description
2258         release_date = release_year = None
2259         if video_description:
2260             mobj = re.search(r'(?s)(?P<track>[^·\n]+)·(?P<artist>[^\n]+)\n+(?P<album>[^\n]+)(?:.+?℗\s*(?P<release_year>\d{4})(?!\d))?(?:.+?Released on\s*:\s*(?P<release_date>\d{4}-\d{2}-\d{2}))?(.+?\nArtist\s*:\s*(?P<clean_artist>[^\n]+))?.+\nAuto-generated by YouTube\.\s*$', video_description)
2261             if mobj:
2262                 if not track:
2263                     track = mobj.group('track').strip()
2264                 if not artist:
2265                     artist = mobj.group('clean_artist') or ', '.join(a.strip() for a in mobj.group('artist').split('·'))
2266                 if not album:
2267                     album = mobj.group('album'.strip())
2268                 release_year = mobj.group('release_year')
2269                 release_date = mobj.group('release_date')
2270                 if release_date:
2271                     release_date = release_date.replace('-', '')
2272                     if not release_year:
2273                         release_year = int(release_date[:4])
2274                 if release_year:
2275                     release_year = int(release_year)
2276
2277         yt_initial_data = self._extract_yt_initial_data(video_id, video_webpage)
2278         contents = try_get(yt_initial_data, lambda x: x['contents']['twoColumnWatchNextResults']['results']['results']['contents'], list) or []
2279         for content in contents:
2280             rows = try_get(content, lambda x: x['videoSecondaryInfoRenderer']['metadataRowContainer']['metadataRowContainerRenderer']['rows'], list) or []
2281             multiple_songs = False
2282             for row in rows:
2283                 if try_get(row, lambda x: x['metadataRowRenderer']['hasDividerLine']) is True:
2284                     multiple_songs = True
2285                     break
2286             for row in rows:
2287                 mrr = row.get('metadataRowRenderer') or {}
2288                 mrr_title = try_get(
2289                     mrr, lambda x: x['title']['simpleText'], compat_str)
2290                 mrr_contents = try_get(
2291                     mrr, lambda x: x['contents'][0], dict) or {}
2292                 mrr_contents_text = try_get(mrr_contents, [lambda x: x['simpleText'], lambda x: x['runs'][0]['text']], compat_str)
2293                 if not (mrr_title and mrr_contents_text):
2294                     continue
2295                 if mrr_title == 'License':
2296                     video_license = mrr_contents_text
2297                 elif not multiple_songs:
2298                     if mrr_title == 'Album':
2299                         album = mrr_contents_text
2300                     elif mrr_title == 'Artist':
2301                         artist = mrr_contents_text
2302                     elif mrr_title == 'Song':
2303                         track = mrr_contents_text
2304
2305         m_episode = re.search(
2306             r'<div[^>]+id="watch7-headline"[^>]*>\s*<span[^>]*>.*?>(?P<series>[^<]+)</a></b>\s*S(?P<season>\d+)\s*•\s*E(?P<episode>\d+)</span>',
2307             video_webpage)
2308         if m_episode:
2309             series = unescapeHTML(m_episode.group('series'))
2310             season_number = int(m_episode.group('season'))
2311             episode_number = int(m_episode.group('episode'))
2312         else:
2313             series = season_number = episode_number = None
2314
2315         m_cat_container = self._search_regex(
2316             r'(?s)<h4[^>]*>\s*Category\s*</h4>\s*<ul[^>]*>(.*?)</ul>',
2317             video_webpage, 'categories', default=None)
2318         category = None
2319         if m_cat_container:
2320             category = self._html_search_regex(
2321                 r'(?s)<a[^<]+>(.*?)</a>', m_cat_container, 'category',
2322                 default=None)
2323         if not category:
2324             category = try_get(
2325                 microformat, lambda x: x['category'], compat_str)
2326         video_categories = None if category is None else [category]
2327
2328         video_tags = [
2329             unescapeHTML(m.group('content'))
2330             for m in re.finditer(self._meta_regex('og:video:tag'), video_webpage)]
2331         if not video_tags:
2332             video_tags = try_get(video_details, lambda x: x['keywords'], list)
2333
2334         def _extract_count(count_name):
2335             return str_to_int(self._search_regex(
2336                 (r'-%s-button[^>]+><span[^>]+class="yt-uix-button-content"[^>]*>([\d,]+)</span>' % re.escape(count_name),
2337                  r'["\']label["\']\s*:\s*["\']([\d,.]+)\s+%ss["\']' % re.escape(count_name)),
2338                 video_webpage, count_name, default=None))
2339
2340         like_count = _extract_count('like')
2341         dislike_count = _extract_count('dislike')
2342
2343         if view_count is None:
2344             view_count = str_to_int(self._search_regex(
2345                 r'<[^>]+class=["\']watch-view-count[^>]+>\s*([\d,\s]+)', video_webpage,
2346                 'view count', default=None))
2347
2348         average_rating = (
2349             float_or_none(video_details.get('averageRating'))
2350             or try_get(video_info, lambda x: float_or_none(x['avg_rating'][0])))
2351
2352         # subtitles
2353         video_subtitles = self.extract_subtitles(
2354             video_id, video_webpage, has_live_chat_replay)
2355         automatic_captions = self.extract_automatic_captions(video_id, video_webpage)
2356
2357         video_duration = try_get(
2358             video_info, lambda x: int_or_none(x['length_seconds'][0]))
2359         if not video_duration:
2360             video_duration = int_or_none(video_details.get('lengthSeconds'))
2361         if not video_duration:
2362             video_duration = parse_duration(self._html_search_meta(
2363                 'duration', video_webpage, 'video duration'))
2364
2365         # Get Subscriber Count of channel
2366         subscriber_count = parse_count(self._search_regex(
2367             r'"text":"([\d\.]+\w?) subscribers"',
2368             video_webpage,
2369             'subscriber count',
2370             default=None
2371         ))
2372
2373         # annotations
2374         video_annotations = None
2375         if self._downloader.params.get('writeannotations', False):
2376             xsrf_token = self._search_regex(
2377                 r'([\'"])XSRF_TOKEN\1\s*:\s*([\'"])(?P<xsrf_token>[A-Za-z0-9+/=]+)\2',
2378                 video_webpage, 'xsrf token', group='xsrf_token', fatal=False)
2379             invideo_url = try_get(
2380                 player_response, lambda x: x['annotations'][0]['playerAnnotationsUrlsRenderer']['invideoUrl'], compat_str)
2381             if xsrf_token and invideo_url:
2382                 xsrf_field_name = self._search_regex(
2383                     r'([\'"])XSRF_FIELD_NAME\1\s*:\s*([\'"])(?P<xsrf_field_name>\w+)\2',
2384                     video_webpage, 'xsrf field name',
2385                     group='xsrf_field_name', default='session_token')
2386                 video_annotations = self._download_webpage(
2387                     self._proto_relative_url(invideo_url),
2388                     video_id, note='Downloading annotations',
2389                     errnote='Unable to download video annotations', fatal=False,
2390                     data=urlencode_postdata({xsrf_field_name: xsrf_token}))
2391
2392         chapters = self._extract_chapters(video_webpage, description_original, video_id, video_duration)
2393
2394         # Look for the DASH manifest
2395         if self._downloader.params.get('youtube_include_dash_manifest', True):
2396             dash_mpd_fatal = True
2397             for mpd_url in dash_mpds:
2398                 dash_formats = {}
2399                 try:
2400                     def decrypt_sig(mobj):
2401                         s = mobj.group(1)
2402                         dec_s = self._decrypt_signature(s, video_id, player_url, age_gate)
2403                         return '/signature/%s' % dec_s
2404
2405                     mpd_url = re.sub(r'/s/([a-fA-F0-9\.]+)', decrypt_sig, mpd_url)
2406
2407                     for df in self._extract_mpd_formats(
2408                             mpd_url, video_id, fatal=dash_mpd_fatal,
2409                             formats_dict=self._formats):
2410                         if not df.get('filesize'):
2411                             df['filesize'] = _extract_filesize(df['url'])
2412                         # Do not overwrite DASH format found in some previous DASH manifest
2413                         if df['format_id'] not in dash_formats:
2414                             dash_formats[df['format_id']] = df
2415                         # Additional DASH manifests may end up in HTTP Error 403 therefore
2416                         # allow them to fail without bug report message if we already have
2417                         # some DASH manifest succeeded. This is temporary workaround to reduce
2418                         # burst of bug reports until we figure out the reason and whether it
2419                         # can be fixed at all.
2420                         dash_mpd_fatal = False
2421                 except (ExtractorError, KeyError) as e:
2422                     self.report_warning(
2423                         'Skipping DASH manifest: %r' % e, video_id)
2424                 if dash_formats:
2425                     # Remove the formats we found through non-DASH, they
2426                     # contain less info and it can be wrong, because we use
2427                     # fixed values (for example the resolution). See
2428                     # https://github.com/ytdl-org/youtube-dl/issues/5774 for an
2429                     # example.
2430                     formats = [f for f in formats if f['format_id'] not in dash_formats.keys()]
2431                     formats.extend(dash_formats.values())
2432
2433         # Check for malformed aspect ratio
2434         stretched_m = re.search(
2435             r'<meta\s+property="og:video:tag".*?content="yt:stretch=(?P<w>[0-9]+):(?P<h>[0-9]+)">',
2436             video_webpage)
2437         if stretched_m:
2438             w = float(stretched_m.group('w'))
2439             h = float(stretched_m.group('h'))
2440             # yt:stretch may hold invalid ratio data (e.g. for Q39EVAstoRM ratio is 17:0).
2441             # We will only process correct ratios.
2442             if w > 0 and h > 0:
2443                 ratio = w / h
2444                 for f in formats:
2445                     if f.get('vcodec') != 'none':
2446                         f['stretched_ratio'] = ratio
2447
2448         if not formats:
2449             if 'reason' in video_info:
2450                 if 'The uploader has not made this video available in your country.' in video_info['reason']:
2451                     regions_allowed = self._html_search_meta(
2452                         'regionsAllowed', video_webpage, default=None)
2453                     countries = regions_allowed.split(',') if regions_allowed else None
2454                     self.raise_geo_restricted(
2455                         msg=video_info['reason'][0], countries=countries)
2456                 reason = video_info['reason'][0]
2457                 if 'Invalid parameters' in reason:
2458                     unavailable_message = extract_unavailable_message()
2459                     if unavailable_message:
2460                         reason = unavailable_message
2461                 raise ExtractorError(
2462                     'YouTube said: %s' % reason,
2463                     expected=True, video_id=video_id)
2464             if video_info.get('license_info') or try_get(player_response, lambda x: x['streamingData']['licenseInfos']):
2465                 raise ExtractorError('This video is DRM protected.', expected=True)
2466
2467         self._sort_formats(formats)
2468
2469         self.mark_watched(video_id, video_info, player_response)
2470
2471         return {
2472             'id': video_id,
2473             'uploader': video_uploader,
2474             'uploader_id': video_uploader_id,
2475             'uploader_url': video_uploader_url,
2476             'channel_id': channel_id,
2477             'channel_url': channel_url,
2478             'upload_date': upload_date,
2479             'license': video_license,
2480             'creator': video_creator or artist,
2481             'title': video_title,
2482             'alt_title': video_alt_title or track,
2483             'thumbnails': thumbnails,
2484             'description': video_description,
2485             'categories': video_categories,
2486             'tags': video_tags,
2487             'subtitles': video_subtitles,
2488             'automatic_captions': automatic_captions,
2489             'duration': video_duration,
2490             'age_limit': 18 if age_gate else 0,
2491             'annotations': video_annotations,
2492             'chapters': chapters,
2493             'webpage_url': proto + '://www.youtube.com/watch?v=%s' % video_id,
2494             'view_count': view_count,
2495             'like_count': like_count,
2496             'dislike_count': dislike_count,
2497             'average_rating': average_rating,
2498             'formats': formats,
2499             'is_live': is_live,
2500             'start_time': start_time,
2501             'end_time': end_time,
2502             'series': series,
2503             'season_number': season_number,
2504             'episode_number': episode_number,
2505             'track': track,
2506             'artist': artist,
2507             'album': album,
2508             'release_date': release_date,
2509             'release_year': release_year,
2510             'subscriber_count': subscriber_count,
2511         }
2512
2513
2514 class YoutubeTabIE(YoutubeBaseInfoExtractor):
2515     IE_DESC = 'YouTube.com tab'
2516     _VALID_URL = r'''(?x)
2517                     https?://
2518                         (?:\w+\.)?
2519                         (?:
2520                             youtube(?:kids)?\.com|
2521                             invidio\.us
2522                         )/
2523                         (?:
2524                             (?:channel|c|user)/|
2525                             (?P<not_channel>
2526                                 feed/|
2527                                 (?:playlist|watch)\?.*?\blist=
2528                             )|
2529                             (?!(%s)([/#?]|$))  # Direct URLs
2530                         )
2531                         (?P<id>[^/?\#&]+)
2532                     ''' % YoutubeBaseInfoExtractor._RESERVED_NAMES
2533     IE_NAME = 'youtube:tab'
2534
2535     _TESTS = [{
2536         # playlists, multipage
2537         'url': 'https://www.youtube.com/c/ИгорьКлейнер/playlists?view=1&flow=grid',
2538         'playlist_mincount': 94,
2539         'info_dict': {
2540             'id': 'UCqj7Cz7revf5maW9g5pgNcg',
2541             'title': 'Игорь Клейнер - Playlists',
2542             'description': 'md5:be97ee0f14ee314f1f002cf187166ee2',
2543         },
2544     }, {
2545         # playlists, multipage, different order
2546         'url': 'https://www.youtube.com/user/igorkle1/playlists?view=1&sort=dd',
2547         'playlist_mincount': 94,
2548         'info_dict': {
2549             'id': 'UCqj7Cz7revf5maW9g5pgNcg',
2550             'title': 'Игорь Клейнер - Playlists',
2551             'description': 'md5:be97ee0f14ee314f1f002cf187166ee2',
2552         },
2553     }, {
2554         # playlists, singlepage
2555         'url': 'https://www.youtube.com/user/ThirstForScience/playlists',
2556         'playlist_mincount': 4,
2557         'info_dict': {
2558             'id': 'UCAEtajcuhQ6an9WEzY9LEMQ',
2559             'title': 'ThirstForScience - Playlists',
2560             'description': 'md5:609399d937ea957b0f53cbffb747a14c',
2561         }
2562     }, {
2563         'url': 'https://www.youtube.com/c/ChristophLaimer/playlists',
2564         'only_matching': True,
2565     }, {
2566         # basic, single video playlist
2567         'url': 'https://www.youtube.com/playlist?list=PL4lCao7KL_QFVb7Iudeipvc2BCavECqzc',
2568         'info_dict': {
2569             'uploader_id': 'UCmlqkdCBesrv2Lak1mF_MxA',
2570             'uploader': 'Sergey M.',
2571             'id': 'PL4lCao7KL_QFVb7Iudeipvc2BCavECqzc',
2572             'title': 'youtube-dl public playlist',
2573         },
2574         'playlist_count': 1,
2575     }, {
2576         # empty playlist
2577         'url': 'https://www.youtube.com/playlist?list=PL4lCao7KL_QFodcLWhDpGCYnngnHtQ-Xf',
2578         'info_dict': {
2579             'uploader_id': 'UCmlqkdCBesrv2Lak1mF_MxA',
2580             'uploader': 'Sergey M.',
2581             'id': 'PL4lCao7KL_QFodcLWhDpGCYnngnHtQ-Xf',
2582             'title': 'youtube-dl empty playlist',
2583         },
2584         'playlist_count': 0,
2585     }, {
2586         # Home tab
2587         'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/featured',
2588         'info_dict': {
2589             'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
2590             'title': 'lex will - Home',
2591             'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488',
2592         },
2593         'playlist_mincount': 2,
2594     }, {
2595         # Videos tab
2596         'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/videos',
2597         'info_dict': {
2598             'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
2599             'title': 'lex will - Videos',
2600             'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488',
2601         },
2602         'playlist_mincount': 975,
2603     }, {
2604         # Videos tab, sorted by popular
2605         'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/videos?view=0&sort=p&flow=grid',
2606         'info_dict': {
2607             'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
2608             'title': 'lex will - Videos',
2609             'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488',
2610         },
2611         'playlist_mincount': 199,
2612     }, {
2613         # Playlists tab
2614         'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/playlists',
2615         'info_dict': {
2616             'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
2617             'title': 'lex will - Playlists',
2618             'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488',
2619         },
2620         'playlist_mincount': 17,
2621     }, {
2622         # Community tab
2623         'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/community',
2624         'info_dict': {
2625             'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
2626             'title': 'lex will - Community',
2627             'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488',
2628         },
2629         'playlist_mincount': 18,
2630     }, {
2631         # Channels tab
2632         'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/channels',
2633         'info_dict': {
2634             'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
2635             'title': 'lex will - Channels',
2636             'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488',
2637         },
2638         'playlist_mincount': 138,
2639     }, {
2640         'url': 'https://invidio.us/channel/UCmlqkdCBesrv2Lak1mF_MxA',
2641         'only_matching': True,
2642     }, {
2643         'url': 'https://www.youtubekids.com/channel/UCmlqkdCBesrv2Lak1mF_MxA',
2644         'only_matching': True,
2645     }, {
2646         'url': 'https://music.youtube.com/channel/UCmlqkdCBesrv2Lak1mF_MxA',
2647         'only_matching': True,
2648     }, {
2649         'note': 'Playlist with deleted videos (#651). As a bonus, the video #51 is also twice in this list.',
2650         'url': 'https://www.youtube.com/playlist?list=PLwP_SiAcdui0KVebT0mU9Apz359a4ubsC',
2651         'info_dict': {
2652             'title': '29C3: Not my department',
2653             'id': 'PLwP_SiAcdui0KVebT0mU9Apz359a4ubsC',
2654             'uploader': 'Christiaan008',
2655             'uploader_id': 'UCEPzS1rYsrkqzSLNp76nrcg',
2656         },
2657         'playlist_count': 96,
2658     }, {
2659         'note': 'Large playlist',
2660         'url': 'https://www.youtube.com/playlist?list=UUBABnxM4Ar9ten8Mdjj1j0Q',
2661         'info_dict': {
2662             'title': 'Uploads from Cauchemar',
2663             'id': 'UUBABnxM4Ar9ten8Mdjj1j0Q',
2664             'uploader': 'Cauchemar',
2665             'uploader_id': 'UCBABnxM4Ar9ten8Mdjj1j0Q',
2666         },
2667         'playlist_mincount': 1123,
2668     }, {
2669         # even larger playlist, 8832 videos
2670         'url': 'http://www.youtube.com/user/NASAgovVideo/videos',
2671         'only_matching': True,
2672     }, {
2673         'note': 'Buggy playlist: the webpage has a "Load more" button but it doesn\'t have more videos',
2674         'url': 'https://www.youtube.com/playlist?list=UUXw-G3eDE9trcvY2sBMM_aA',
2675         'info_dict': {
2676             'title': 'Uploads from Interstellar Movie',
2677             'id': 'UUXw-G3eDE9trcvY2sBMM_aA',
2678             'uploader': 'Interstellar Movie',
2679             'uploader_id': 'UCXw-G3eDE9trcvY2sBMM_aA',
2680         },
2681         'playlist_mincount': 21,
2682     }, {
2683         # https://github.com/ytdl-org/youtube-dl/issues/21844
2684         'url': 'https://www.youtube.com/playlist?list=PLzH6n4zXuckpfMu_4Ff8E7Z1behQks5ba',
2685         'info_dict': {
2686             'title': 'Data Analysis with Dr Mike Pound',
2687             'id': 'PLzH6n4zXuckpfMu_4Ff8E7Z1behQks5ba',
2688             'uploader_id': 'UC9-y-6csu5WGm29I7JiwpnA',
2689             'uploader': 'Computerphile',
2690         },
2691         'playlist_mincount': 11,
2692     }, {
2693         'url': 'https://invidio.us/playlist?list=PL4lCao7KL_QFVb7Iudeipvc2BCavECqzc',
2694         'only_matching': True,
2695     }, {
2696         # Playlist URL that does not actually serve a playlist
2697         'url': 'https://www.youtube.com/watch?v=FqZTN594JQw&list=PLMYEtVRpaqY00V9W81Cwmzp6N6vZqfUKD4',
2698         'info_dict': {
2699             'id': 'FqZTN594JQw',
2700             'ext': 'webm',
2701             'title': "Smiley's People 01 detective, Adventure Series, Action",
2702             'uploader': 'STREEM',
2703             'uploader_id': 'UCyPhqAZgwYWZfxElWVbVJng',
2704             'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UCyPhqAZgwYWZfxElWVbVJng',
2705             'upload_date': '20150526',
2706             'license': 'Standard YouTube License',
2707             'description': 'md5:507cdcb5a49ac0da37a920ece610be80',
2708             'categories': ['People & Blogs'],
2709             'tags': list,
2710             'view_count': int,
2711             'like_count': int,
2712             'dislike_count': int,
2713         },
2714         'params': {
2715             'skip_download': True,
2716         },
2717         'skip': 'This video is not available.',
2718         'add_ie': [YoutubeIE.ie_key()],
2719     }, {
2720         'url': 'https://www.youtubekids.com/watch?v=Agk7R8I8o5U&list=PUZ6jURNr1WQZCNHF0ao-c0g',
2721         'only_matching': True,
2722     }, {
2723         'url': 'https://www.youtube.com/watch?v=MuAGGZNfUkU&list=RDMM',
2724         'only_matching': True,
2725     }, {
2726         'url': 'https://www.youtube.com/channel/UCoMdktPbSTixAyNGwb-UYkQ/live',
2727         'info_dict': {
2728             'id': '9Auq9mYxFEE',
2729             'ext': 'mp4',
2730             'title': 'Watch Sky News live',
2731             'uploader': 'Sky News',
2732             'uploader_id': 'skynews',
2733             'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/skynews',
2734             'upload_date': '20191102',
2735             'description': 'md5:78de4e1c2359d0ea3ed829678e38b662',
2736             'categories': ['News & Politics'],
2737             'tags': list,
2738             'like_count': int,
2739             'dislike_count': int,
2740         },
2741         'params': {
2742             'skip_download': True,
2743         },
2744     }, {
2745         'url': 'https://www.youtube.com/user/TheYoungTurks/live',
2746         'info_dict': {
2747             'id': 'a48o2S1cPoo',
2748             'ext': 'mp4',
2749             'title': 'The Young Turks - Live Main Show',
2750             'uploader': 'The Young Turks',
2751             'uploader_id': 'TheYoungTurks',
2752             'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/TheYoungTurks',
2753             'upload_date': '20150715',
2754             'license': 'Standard YouTube License',
2755             'description': 'md5:438179573adcdff3c97ebb1ee632b891',
2756             'categories': ['News & Politics'],
2757             'tags': ['Cenk Uygur (TV Program Creator)', 'The Young Turks (Award-Winning Work)', 'Talk Show (TV Genre)'],
2758             'like_count': int,
2759             'dislike_count': int,
2760         },
2761         'params': {
2762             'skip_download': True,
2763         },
2764         'only_matching': True,
2765     }, {
2766         'url': 'https://www.youtube.com/channel/UC1yBKRuGpC1tSM73A0ZjYjQ/live',
2767         'only_matching': True,
2768     }, {
2769         'url': 'https://www.youtube.com/c/CommanderVideoHq/live',
2770         'only_matching': True,
2771     }, {
2772         'url': 'https://www.youtube.com/feed/trending',
2773         'only_matching': True,
2774     }, {
2775         # needs auth
2776         'url': 'https://www.youtube.com/feed/library',
2777         'only_matching': True,
2778     }, {
2779         # needs auth
2780         'url': 'https://www.youtube.com/feed/history',
2781         'only_matching': True,
2782     }, {
2783         # needs auth
2784         'url': 'https://www.youtube.com/feed/subscriptions',
2785         'only_matching': True,
2786     }, {
2787         # needs auth
2788         'url': 'https://www.youtube.com/feed/watch_later',
2789         'only_matching': True,
2790     }, {
2791         # no longer available?
2792         'url': 'https://www.youtube.com/feed/recommended',
2793         'only_matching': True,
2794     }
2795         # TODO
2796         # {
2797         #     'url': 'https://www.youtube.com/TheYoungTurks/live',
2798         #     'only_matching': True,
2799         # }
2800     ]
2801
2802     def _extract_channel_id(self, webpage):
2803         channel_id = self._html_search_meta(
2804             'channelId', webpage, 'channel id', default=None)
2805         if channel_id:
2806             return channel_id
2807         channel_url = self._html_search_meta(
2808             ('og:url', 'al:ios:url', 'al:android:url', 'al:web:url',
2809              'twitter:url', 'twitter:app:url:iphone', 'twitter:app:url:ipad',
2810              'twitter:app:url:googleplay'), webpage, 'channel url')
2811         return self._search_regex(
2812             r'https?://(?:www\.)?youtube\.com/channel/([^/?#&])+',
2813             channel_url, 'channel id')
2814
2815     @staticmethod
2816     def _extract_grid_item_renderer(item):
2817         for item_kind in ('Playlist', 'Video', 'Channel'):
2818             renderer = item.get('grid%sRenderer' % item_kind)
2819             if renderer:
2820                 return renderer
2821
2822     def _extract_video(self, renderer):
2823         video_id = renderer.get('videoId')
2824         title = try_get(
2825             renderer,
2826             (lambda x: x['title']['runs'][0]['text'],
2827              lambda x: x['title']['simpleText']), compat_str)
2828         description = try_get(
2829             renderer, lambda x: x['descriptionSnippet']['runs'][0]['text'],
2830             compat_str)
2831         duration = parse_duration(try_get(
2832             renderer, lambda x: x['lengthText']['simpleText'], compat_str))
2833         view_count_text = try_get(
2834             renderer, lambda x: x['viewCountText']['simpleText'], compat_str) or ''
2835         view_count = str_to_int(self._search_regex(
2836             r'^([\d,]+)', re.sub(r'\s', '', view_count_text),
2837             'view count', default=None))
2838         uploader = try_get(
2839             renderer, lambda x: x['ownerText']['runs'][0]['text'], compat_str)
2840         return {
2841             '_type': 'url_transparent',
2842             'ie_key': YoutubeIE.ie_key(),
2843             'id': video_id,
2844             'url': video_id,
2845             'title': title,
2846             'description': description,
2847             'duration': duration,
2848             'view_count': view_count,
2849             'uploader': uploader,
2850         }
2851
2852     def _grid_entries(self, grid_renderer):
2853         for item in grid_renderer['items']:
2854             if not isinstance(item, dict):
2855                 continue
2856             renderer = self._extract_grid_item_renderer(item)
2857             if not isinstance(renderer, dict):
2858                 continue
2859             title = try_get(
2860                 renderer, lambda x: x['title']['runs'][0]['text'], compat_str)
2861             # playlist
2862             playlist_id = renderer.get('playlistId')
2863             if playlist_id:
2864                 yield self.url_result(
2865                     'https://www.youtube.com/playlist?list=%s' % playlist_id,
2866                     ie=YoutubeTabIE.ie_key(), video_id=playlist_id,
2867                     video_title=title)
2868             # video
2869             video_id = renderer.get('videoId')
2870             if video_id:
2871                 yield self._extract_video(renderer)
2872             # channel
2873             channel_id = renderer.get('channelId')
2874             if channel_id:
2875                 title = try_get(
2876                     renderer, lambda x: x['title']['simpleText'], compat_str)
2877                 yield self.url_result(
2878                     'https://www.youtube.com/channel/%s' % channel_id,
2879                     ie=YoutubeTabIE.ie_key(), video_title=title)
2880
2881     def _shelf_entries_from_content(self, shelf_renderer):
2882         content = shelf_renderer.get('content')
2883         if not isinstance(content, dict):
2884             return
2885         renderer = content.get('gridRenderer')
2886         if renderer:
2887             # TODO: add support for nested playlists so each shelf is processed
2888             # as separate playlist
2889             # TODO: this includes only first N items
2890             for entry in self._grid_entries(renderer):
2891                 yield entry
2892         renderer = content.get('horizontalListRenderer')
2893         if renderer:
2894             # TODO
2895             pass
2896
2897     def _shelf_entries(self, shelf_renderer):
2898         ep = try_get(
2899             shelf_renderer, lambda x: x['endpoint']['commandMetadata']['webCommandMetadata']['url'],
2900             compat_str)
2901         shelf_url = urljoin('https://www.youtube.com', ep)
2902         if shelf_url:
2903             title = try_get(
2904                 shelf_renderer, lambda x: x['title']['runs'][0]['text'], compat_str)
2905             yield self.url_result(shelf_url, video_title=title)
2906         # Shelf may not contain shelf URL, fallback to extraction from content
2907         for entry in self._shelf_entries_from_content(shelf_renderer):
2908             yield entry
2909
2910     def _playlist_entries(self, video_list_renderer):
2911         for content in video_list_renderer['contents']:
2912             if not isinstance(content, dict):
2913                 continue
2914             renderer = content.get('playlistVideoRenderer') or content.get('playlistPanelVideoRenderer')
2915             if not isinstance(renderer, dict):
2916                 continue
2917             video_id = renderer.get('videoId')
2918             if not video_id:
2919                 continue
2920             yield self._extract_video(renderer)
2921
2922     r""" # Not needed in the new implementation
2923     def _itemSection_entries(self, item_sect_renderer):
2924         for content in item_sect_renderer['contents']:
2925             if not isinstance(content, dict):
2926                 continue
2927             renderer = content.get('videoRenderer', {})
2928             if not isinstance(renderer, dict):
2929                 continue
2930             video_id = renderer.get('videoId')
2931             if not video_id:
2932                 continue
2933             yield self._extract_video(renderer)
2934     """
2935
2936     def _rich_entries(self, rich_grid_renderer):
2937         renderer = try_get(
2938             rich_grid_renderer, lambda x: x['content']['videoRenderer'], dict) or {}
2939         video_id = renderer.get('videoId')
2940         if not video_id:
2941             return
2942         yield self._extract_video(renderer)
2943
2944     def _video_entry(self, video_renderer):
2945         video_id = video_renderer.get('videoId')
2946         if video_id:
2947             return self._extract_video(video_renderer)
2948
2949     def _post_thread_entries(self, post_thread_renderer):
2950         post_renderer = try_get(
2951             post_thread_renderer, lambda x: x['post']['backstagePostRenderer'], dict)
2952         if not post_renderer:
2953             return
2954         # video attachment
2955         video_renderer = try_get(
2956             post_renderer, lambda x: x['backstageAttachment']['videoRenderer'], dict)
2957         video_id = None
2958         if video_renderer:
2959             entry = self._video_entry(video_renderer)
2960             if entry:
2961                 yield entry
2962         # inline video links
2963         runs = try_get(post_renderer, lambda x: x['contentText']['runs'], list) or []
2964         for run in runs:
2965             if not isinstance(run, dict):
2966                 continue
2967             ep_url = try_get(
2968                 run, lambda x: x['navigationEndpoint']['urlEndpoint']['url'], compat_str)
2969             if not ep_url:
2970                 continue
2971             if not YoutubeIE.suitable(ep_url):
2972                 continue
2973             ep_video_id = YoutubeIE._match_id(ep_url)
2974             if video_id == ep_video_id:
2975                 continue
2976             yield self.url_result(ep_url, ie=YoutubeIE.ie_key(), video_id=video_id)
2977
2978     def _post_thread_continuation_entries(self, post_thread_continuation):
2979         contents = post_thread_continuation.get('contents')
2980         if not isinstance(contents, list):
2981             return
2982         for content in contents:
2983             renderer = content.get('backstagePostThreadRenderer')
2984             if not isinstance(renderer, dict):
2985                 continue
2986             for entry in self._post_thread_entries(renderer):
2987                 yield entry
2988
2989     @staticmethod
2990     def _extract_next_continuation_data(renderer):
2991         next_continuation = try_get(
2992             renderer, lambda x: x['continuations'][0]['nextContinuationData'], dict)
2993         if not next_continuation:
2994             return
2995         continuation = next_continuation.get('continuation')
2996         if not continuation:
2997             return
2998         ctp = next_continuation.get('clickTrackingParams')
2999         return {
3000             'ctoken': continuation,
3001             'continuation': continuation,
3002             'itct': ctp,
3003         }
3004
3005     @classmethod
3006     def _extract_continuation(cls, renderer):
3007         next_continuation = cls._extract_next_continuation_data(renderer)
3008         if next_continuation:
3009             return next_continuation
3010         contents = renderer.get('contents')
3011         if not isinstance(contents, list):
3012             return
3013         for content in contents:
3014             if not isinstance(content, dict):
3015                 continue
3016             continuation_ep = try_get(
3017                 content, lambda x: x['continuationItemRenderer']['continuationEndpoint'],
3018                 dict)
3019             if not continuation_ep:
3020                 continue
3021             continuation = try_get(
3022                 continuation_ep, lambda x: x['continuationCommand']['token'], compat_str)
3023             if not continuation:
3024                 continue
3025             ctp = continuation_ep.get('clickTrackingParams')
3026             if not ctp:
3027                 continue
3028             return {
3029                 'ctoken': continuation,
3030                 'continuation': continuation,
3031                 'itct': ctp,
3032             }
3033
3034     def _entries(self, tab, identity_token):
3035
3036         def extract_entries(parent_renderer):  # this needs to called again for continuation to work with feeds
3037             contents = try_get(parent_renderer, lambda x: x['contents'], list) or []
3038             for content in contents:
3039                 if not isinstance(content, dict):
3040                     continue
3041                 is_renderer = try_get(content, lambda x: x['itemSectionRenderer'], dict)
3042                 if not is_renderer:
3043                     renderer = content.get('richItemRenderer')
3044                     if renderer:
3045                         for entry in self._rich_entries(renderer):
3046                             yield entry
3047                         continuation_list[0] = self._extract_continuation(parent_renderer)
3048                     continue
3049                 isr_contents = try_get(is_renderer, lambda x: x['contents'], list) or []
3050                 for isr_content in isr_contents:
3051                     if not isinstance(isr_content, dict):
3052                         continue
3053                     renderer = isr_content.get('playlistVideoListRenderer')
3054                     if renderer:
3055                         for entry in self._playlist_entries(renderer):
3056                             yield entry
3057                         continuation_list[0] = self._extract_continuation(renderer)
3058                         continue
3059                     renderer = isr_content.get('gridRenderer')
3060                     if renderer:
3061                         for entry in self._grid_entries(renderer):
3062                             yield entry
3063                         continuation_list[0] = self._extract_continuation(renderer)
3064                         continue
3065                     renderer = isr_content.get('shelfRenderer')
3066                     if renderer:
3067                         for entry in self._shelf_entries(renderer):
3068                             yield entry
3069                         continue
3070                     renderer = isr_content.get('backstagePostThreadRenderer')
3071                     if renderer:
3072                         for entry in self._post_thread_entries(renderer):
3073                             yield entry
3074                         continuation_list[0] = self._extract_continuation(renderer)
3075                         continue
3076                     renderer = isr_content.get('videoRenderer')
3077                     if renderer:
3078                         entry = self._video_entry(renderer)
3079                         if entry:
3080                             yield entry
3081
3082                 if not continuation_list[0]:
3083                     continuation_list[0] = self._extract_continuation(is_renderer)
3084
3085             if not continuation_list[0]:
3086                 continuation_list[0] = self._extract_continuation(parent_renderer)
3087
3088         continuation_list = [None]  # Python 2 doesnot support nonlocal
3089         parent_renderer = (
3090             try_get(tab, lambda x: x['sectionListRenderer'], dict)
3091             or try_get(tab, lambda x: x['richGridRenderer'], dict) or {})
3092         for entry in extract_entries(parent_renderer):
3093             yield entry
3094         continuation = continuation_list[0]
3095
3096         headers = {
3097             'x-youtube-client-name': '1',
3098             'x-youtube-client-version': '2.20201112.04.01',
3099         }
3100         if identity_token:
3101             headers['x-youtube-identity-token'] = identity_token
3102
3103         for page_num in itertools.count(1):
3104             if not continuation:
3105                 break
3106             browse = self._download_json(
3107                 'https://www.youtube.com/browse_ajax', None,
3108                 'Downloading page %d' % page_num,
3109                 headers=headers, query=continuation, fatal=False)
3110             if not browse:
3111                 break
3112             response = try_get(browse, lambda x: x[1]['response'], dict)
3113             if not response:
3114                 break
3115
3116             continuation_contents = try_get(
3117                 response, lambda x: x['continuationContents'], dict)
3118             if continuation_contents:
3119                 continuation_renderer = continuation_contents.get('playlistVideoListContinuation')
3120                 if continuation_renderer:
3121                     for entry in self._playlist_entries(continuation_renderer):
3122                         yield entry
3123                     continuation = self._extract_continuation(continuation_renderer)
3124                     continue
3125                 continuation_renderer = continuation_contents.get('gridContinuation')
3126                 if continuation_renderer:
3127                     for entry in self._grid_entries(continuation_renderer):
3128                         yield entry
3129                     continuation = self._extract_continuation(continuation_renderer)
3130                     continue
3131                 continuation_renderer = continuation_contents.get('itemSectionContinuation')
3132                 if continuation_renderer:
3133                     for entry in self._post_thread_continuation_entries(continuation_renderer):
3134                         yield entry
3135                     continuation = self._extract_continuation(continuation_renderer)
3136                     continue
3137                 continuation_renderer = continuation_contents.get('sectionListContinuation')  # for feeds
3138                 if continuation_renderer:
3139                     continuation_list = [None]
3140                     for entry in extract_entries(continuation_renderer):
3141                         yield entry
3142                     continuation = continuation_list[0]
3143                     continue
3144
3145             continuation_items = try_get(
3146                 response, lambda x: x['onResponseReceivedActions'][0]['appendContinuationItemsAction']['continuationItems'], list)
3147             if continuation_items:
3148                 continuation_item = continuation_items[0]
3149                 if not isinstance(continuation_item, dict):
3150                     continue
3151                 renderer = continuation_item.get('playlistVideoRenderer') or continuation_item.get('itemSectionRenderer')
3152                 if renderer:
3153                     video_list_renderer = {'contents': continuation_items}
3154                     for entry in self._playlist_entries(video_list_renderer):
3155                         yield entry
3156                     continuation = self._extract_continuation(video_list_renderer)
3157                     continue
3158             break
3159
3160     @staticmethod
3161     def _extract_selected_tab(tabs):
3162         for tab in tabs:
3163             if try_get(tab, lambda x: x['tabRenderer']['selected'], bool):
3164                 return tab['tabRenderer']
3165         else:
3166             raise ExtractorError('Unable to find selected tab')
3167
3168     @staticmethod
3169     def _extract_uploader(data):
3170         uploader = {}
3171         sidebar_renderer = try_get(
3172             data, lambda x: x['sidebar']['playlistSidebarRenderer']['items'], list)
3173         if sidebar_renderer:
3174             for item in sidebar_renderer:
3175                 if not isinstance(item, dict):
3176                     continue
3177                 renderer = item.get('playlistSidebarSecondaryInfoRenderer')
3178                 if not isinstance(renderer, dict):
3179                     continue
3180                 owner = try_get(
3181                     renderer, lambda x: x['videoOwner']['videoOwnerRenderer']['title']['runs'][0], dict)
3182                 if owner:
3183                     uploader['uploader'] = owner.get('text')
3184                     uploader['uploader_id'] = try_get(
3185                         owner, lambda x: x['navigationEndpoint']['browseEndpoint']['browseId'], compat_str)
3186                     uploader['uploader_url'] = urljoin(
3187                         'https://www.youtube.com/',
3188                         try_get(owner, lambda x: x['navigationEndpoint']['browseEndpoint']['canonicalBaseUrl'], compat_str))
3189         return uploader
3190
3191     def _extract_from_tabs(self, item_id, webpage, data, tabs, identity_token):
3192         selected_tab = self._extract_selected_tab(tabs)
3193         renderer = try_get(
3194             data, lambda x: x['metadata']['channelMetadataRenderer'], dict)
3195         playlist_id = title = description = None
3196         if renderer:
3197             channel_title = renderer.get('title') or item_id
3198             tab_title = selected_tab.get('title')
3199             title = channel_title or item_id
3200             if tab_title:
3201                 title += ' - %s' % tab_title
3202             description = renderer.get('description')
3203             playlist_id = renderer.get('externalId')
3204         renderer = try_get(
3205             data, lambda x: x['metadata']['playlistMetadataRenderer'], dict)
3206         if renderer:
3207             title = renderer.get('title')
3208             description = None
3209             playlist_id = item_id
3210         if playlist_id is None:
3211             playlist_id = item_id
3212         if title is None:
3213             title = "Youtube " + playlist_id.title()
3214         playlist = self.playlist_result(
3215             self._entries(selected_tab['content'], identity_token),
3216             playlist_id=playlist_id, playlist_title=title,
3217             playlist_description=description)
3218         playlist.update(self._extract_uploader(data))
3219         return playlist
3220
3221     def _extract_from_playlist(self, item_id, data, playlist):
3222         title = playlist.get('title') or try_get(
3223             data, lambda x: x['titleText']['simpleText'], compat_str)
3224         playlist_id = playlist.get('playlistId') or item_id
3225         return self.playlist_result(
3226             self._playlist_entries(playlist), playlist_id=playlist_id,
3227             playlist_title=title)
3228
3229     def _extract_alerts(self, data):
3230         for alert_dict in try_get(data, lambda x: x['alerts'], list) or []:
3231             for renderer in alert_dict:
3232                 alert = alert_dict[renderer]
3233                 alert_type = alert.get('type')
3234                 if not alert_type:
3235                     continue
3236                 message = try_get(alert, lambda x: x['text']['simpleText'], compat_str)
3237                 if message:
3238                     yield alert_type, message
3239                 for run in try_get(alert, lambda x: x['text']['runs'], list) or []:
3240                     message = try_get(run, lambda x: x['text'], compat_str)
3241                     if message:
3242                         yield alert_type, message
3243
3244     def _real_extract(self, url):
3245         item_id = self._match_id(url)
3246         url = compat_urlparse.urlunparse(
3247             compat_urlparse.urlparse(url)._replace(netloc='www.youtube.com'))
3248         is_home = re.match(r'(?P<pre>%s)(?P<post>/?(?![^#?]).*$)' % self._VALID_URL, url)
3249         if is_home is not None and is_home.group('not_channel') is None and item_id != 'feed':
3250             self._downloader.report_warning(
3251                 'A channel/user page was given. All the channel\'s videos will be downloaded. '
3252                 'To download only the videos in the home page, add a "/home" to the URL')
3253             url = '%s/videos%s' % (is_home.group('pre'), is_home.group('post') or '')
3254
3255         # Handle both video/playlist URLs
3256         qs = compat_urlparse.parse_qs(compat_urlparse.urlparse(url).query)
3257         video_id = qs.get('v', [None])[0]
3258         playlist_id = qs.get('list', [None])[0]
3259
3260         if is_home.group('not_channel') is not None and is_home.group('not_channel').startswith('watch') and not video_id:
3261             if playlist_id:
3262                 self._downloader.report_warning('%s is not a valid Youtube URL. Trying to download playlist %s' % (url, playlist_id))
3263                 url = 'https://www.youtube.com/playlist?list=%s' % playlist_id
3264                 # return self.url_result(playlist_id, ie=YoutubePlaylistIE.ie_key())
3265             else:
3266                 raise ExtractorError('Unable to recognize tab page')
3267         if video_id and playlist_id:
3268             if self._downloader.params.get('noplaylist'):
3269                 self.to_screen('Downloading just video %s because of --no-playlist' % video_id)
3270                 return self.url_result(video_id, ie=YoutubeIE.ie_key(), video_id=video_id)
3271             self.to_screen('Downloading playlist %s - add --no-playlist to just download video %s' % (playlist_id, video_id))
3272
3273         webpage = self._download_webpage(url, item_id)
3274         identity_token = self._search_regex(
3275             r'\bID_TOKEN["\']\s*:\s*["\'](.+?)["\']', webpage,
3276             'identity token', default=None)
3277         data = self._extract_yt_initial_data(item_id, webpage)
3278         for alert_type, alert_message in self._extract_alerts(data):
3279             self._downloader.report_warning('YouTube said: %s - %s' % (alert_type, alert_message))
3280         tabs = try_get(
3281             data, lambda x: x['contents']['twoColumnBrowseResultsRenderer']['tabs'], list)
3282         if tabs:
3283             return self._extract_from_tabs(item_id, webpage, data, tabs, identity_token)
3284         playlist = try_get(
3285             data, lambda x: x['contents']['twoColumnWatchNextResults']['playlist']['playlist'], dict)
3286         if playlist:
3287             return self._extract_from_playlist(item_id, data, playlist)
3288         # Fallback to video extraction if no playlist alike page is recognized.
3289         # First check for the current video then try the v attribute of URL query.
3290         video_id = try_get(
3291             data, lambda x: x['currentVideoEndpoint']['watchEndpoint']['videoId'],
3292             compat_str) or video_id
3293         if video_id:
3294             return self.url_result(video_id, ie=YoutubeIE.ie_key(), video_id=video_id)
3295         # Failed to recognize
3296         raise ExtractorError('Unable to recognize tab page')
3297
3298
3299 class YoutubePlaylistIE(InfoExtractor):
3300     IE_DESC = 'YouTube.com playlists'
3301     _VALID_URL = r'''(?x)(?:
3302                         (?:https?://)?
3303                         (?:\w+\.)?
3304                         (?:
3305                             (?:
3306                                 youtube(?:kids)?\.com|
3307                                 invidio\.us|
3308                                 youtu\.be
3309                             )
3310                             /.*?\?.*?\blist=
3311                         )?
3312                         (?P<id>%(playlist_id)s)
3313                      )''' % {'playlist_id': YoutubeBaseInfoExtractor._PLAYLIST_ID_RE}
3314     IE_NAME = 'youtube:playlist'
3315     _TESTS = [{
3316         'note': 'issue #673',
3317         'url': 'PLBB231211A4F62143',
3318         'info_dict': {
3319             'title': '[OLD]Team Fortress 2 (Class-based LP)',
3320             'id': 'PLBB231211A4F62143',
3321             'uploader': 'Wickydoo',
3322             'uploader_id': 'UCKSpbfbl5kRQpTdL7kMc-1Q',
3323         },
3324         'playlist_mincount': 29,
3325     }, {
3326         'url': 'PLtPgu7CB4gbY9oDN3drwC3cMbJggS7dKl',
3327         'info_dict': {
3328             'title': 'YDL_safe_search',
3329             'id': 'PLtPgu7CB4gbY9oDN3drwC3cMbJggS7dKl',
3330         },
3331         'playlist_count': 2,
3332         'skip': 'This playlist is private',
3333     }, {
3334         'note': 'embedded',
3335         'url': 'https://www.youtube.com/embed/videoseries?list=PL6IaIsEjSbf96XFRuNccS_RuEXwNdsoEu',
3336         'playlist_count': 4,
3337         'info_dict': {
3338             'title': 'JODA15',
3339             'id': 'PL6IaIsEjSbf96XFRuNccS_RuEXwNdsoEu',
3340             'uploader': 'milan',
3341             'uploader_id': 'UCEI1-PVPcYXjB73Hfelbmaw',
3342         }
3343     }, {
3344         'url': 'http://www.youtube.com/embed/_xDOZElKyNU?list=PLsyOSbh5bs16vubvKePAQ1x3PhKavfBIl',
3345         'playlist_mincount': 982,
3346         'info_dict': {
3347             'title': '2018 Chinese New Singles (11/6 updated)',
3348             'id': 'PLsyOSbh5bs16vubvKePAQ1x3PhKavfBIl',
3349             'uploader': 'LBK',
3350             'uploader_id': 'UC21nz3_MesPLqtDqwdvnoxA',
3351         }
3352     }, {
3353         'url': 'https://youtu.be/yeWKywCrFtk?list=PL2qgrgXsNUG5ig9cat4ohreBjYLAPC0J5',
3354         'info_dict': {
3355             'id': 'yeWKywCrFtk',
3356             'ext': 'mp4',
3357             'title': 'Small Scale Baler and Braiding Rugs',
3358             'uploader': 'Backus-Page House Museum',
3359             'uploader_id': 'backuspagemuseum',
3360             'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/backuspagemuseum',
3361             'upload_date': '20161008',
3362             'description': 'md5:800c0c78d5eb128500bffd4f0b4f2e8a',
3363             'categories': ['Nonprofits & Activism'],
3364             'tags': list,
3365             'like_count': int,
3366             'dislike_count': int,
3367         },
3368         'params': {
3369             'noplaylist': True,
3370             'skip_download': True,
3371         },
3372     }, {
3373         'url': 'https://youtu.be/uWyaPkt-VOI?list=PL9D9FC436B881BA21',
3374         'only_matching': True,
3375     }, {
3376         'url': 'TLGGrESM50VT6acwMjAyMjAxNw',
3377         'only_matching': True,
3378     }, {
3379         # music album playlist
3380         'url': 'OLAK5uy_m4xAFdmMC5rX3Ji3g93pQe3hqLZw_9LhM',
3381         'only_matching': True,
3382     }]
3383
3384     @classmethod
3385     def suitable(cls, url):
3386         return False if YoutubeTabIE.suitable(url) else super(
3387             YoutubePlaylistIE, cls).suitable(url)
3388
3389     def _real_extract(self, url):
3390         playlist_id = self._match_id(url)
3391         qs = compat_urlparse.parse_qs(compat_urlparse.urlparse(url).query)
3392         if not qs:
3393             qs = {'list': playlist_id}
3394         return self.url_result(
3395             update_url_query('https://www.youtube.com/playlist', qs),
3396             ie=YoutubeTabIE.ie_key(), video_id=playlist_id)
3397
3398
3399 class YoutubeYtUserIE(InfoExtractor):
3400     _VALID_URL = r'ytuser:(?P<id>.+)'
3401     _TESTS = [{
3402         'url': 'ytuser:phihag',
3403         'only_matching': True,
3404     }]
3405
3406     def _real_extract(self, url):
3407         user_id = self._match_id(url)
3408         return self.url_result(
3409             'https://www.youtube.com/user/%s' % user_id,
3410             ie=YoutubeTabIE.ie_key(), video_id=user_id)
3411
3412
3413 class YoutubeFavouritesIE(YoutubeBaseInfoExtractor):
3414     IE_NAME = 'youtube:favorites'
3415     IE_DESC = 'YouTube.com liked videos, ":ytfav" for short (requires authentication)'
3416     _VALID_URL = r':ytfav(?:ou?rite)?s?'
3417     _LOGIN_REQUIRED = True
3418     _TESTS = [{
3419         'url': ':ytfav',
3420         'only_matching': True,
3421     }, {
3422         'url': ':ytfavorites',
3423         'only_matching': True,
3424     }]
3425
3426     def _real_extract(self, url):
3427         return self.url_result(
3428             'https://www.youtube.com/playlist?list=LL',
3429             ie=YoutubeTabIE.ie_key())
3430
3431
3432 class YoutubeSearchIE(SearchInfoExtractor, YoutubeBaseInfoExtractor):
3433     IE_DESC = 'YouTube.com searches'
3434     # there doesn't appear to be a real limit, for example if you search for
3435     # 'python' you get more than 8.000.000 results
3436     _MAX_RESULTS = float('inf')
3437     IE_NAME = 'youtube:search'
3438     _SEARCH_KEY = 'ytsearch'
3439     _SEARCH_PARAMS = None
3440     _TESTS = []
3441
3442     def _entries(self, query, n):
3443         data = {
3444             'context': {
3445                 'client': {
3446                     'clientName': 'WEB',
3447                     'clientVersion': '2.20201021.03.00',
3448                 }
3449             },
3450             'query': query,
3451         }
3452         if self._SEARCH_PARAMS:
3453             data['params'] = self._SEARCH_PARAMS
3454         total = 0
3455         for page_num in itertools.count(1):
3456             search = self._download_json(
3457                 'https://www.youtube.com/youtubei/v1/search?key=AIzaSyAO_FJ2SlqU8Q4STEHLGCilw_Y9_11qcW8',
3458                 video_id='query "%s"' % query,
3459                 note='Downloading page %s' % page_num,
3460                 errnote='Unable to download API page', fatal=False,
3461                 data=json.dumps(data).encode('utf8'),
3462                 headers={'content-type': 'application/json'})
3463             if not search:
3464                 break
3465             slr_contents = try_get(
3466                 search,
3467                 (lambda x: x['contents']['twoColumnSearchResultsRenderer']['primaryContents']['sectionListRenderer']['contents'],
3468                  lambda x: x['onResponseReceivedCommands'][0]['appendContinuationItemsAction']['continuationItems']),
3469                 list)
3470             if not slr_contents:
3471                 break
3472
3473             isr_contents = []
3474             continuation_token = None
3475             # Youtube sometimes adds promoted content to searches,
3476             # changing the index location of videos and token.
3477             # So we search through all entries till we find them.
3478             for index, isr in enumerate(slr_contents):
3479                 if not isr_contents:
3480                     isr_contents = try_get(
3481                         slr_contents,
3482                         (lambda x: x[index]['itemSectionRenderer']['contents']),
3483                         list)
3484                     for content in isr_contents:
3485                         if content.get('videoRenderer') is not None:
3486                             break
3487                     else:
3488                         isr_contents = []
3489
3490                 if continuation_token is None:
3491                     continuation_token = try_get(
3492                         slr_contents,
3493                         lambda x: x[index]['continuationItemRenderer']['continuationEndpoint']['continuationCommand'][
3494                             'token'],
3495                         compat_str)
3496                 if continuation_token is not None and isr_contents:
3497                     break
3498
3499             if not isr_contents:
3500                 break
3501             for content in isr_contents:
3502                 if not isinstance(content, dict):
3503                     continue
3504                 video = content.get('videoRenderer')
3505                 if not isinstance(video, dict):
3506                     continue
3507                 video_id = video.get('videoId')
3508                 if not video_id:
3509                     continue
3510                 title = try_get(video, lambda x: x['title']['runs'][0]['text'], compat_str)
3511                 description = try_get(video, lambda x: x['descriptionSnippet']['runs'][0]['text'], compat_str)
3512                 duration = parse_duration(try_get(video, lambda x: x['lengthText']['simpleText'], compat_str))
3513                 view_count_text = try_get(video, lambda x: x['viewCountText']['simpleText'], compat_str) or ''
3514                 view_count = int_or_none(self._search_regex(
3515                     r'^(\d+)', re.sub(r'\s', '', view_count_text),
3516                     'view count', default=None))
3517                 uploader = try_get(video, lambda x: x['ownerText']['runs'][0]['text'], compat_str)
3518                 total += 1
3519                 yield {
3520                     '_type': 'url_transparent',
3521                     'ie_key': YoutubeIE.ie_key(),
3522                     'id': video_id,
3523                     'url': video_id,
3524                     'title': title,
3525                     'description': description,
3526                     'duration': duration,
3527                     'view_count': view_count,
3528                     'uploader': uploader,
3529                 }
3530                 if total == n:
3531                     return
3532             if not continuation_token:
3533                 break
3534             data['continuation'] = continuation_token
3535
3536     def _get_n_results(self, query, n):
3537         """Get a specified number of results for a query"""
3538         return self.playlist_result(self._entries(query, n), query)
3539
3540
3541 class YoutubeSearchDateIE(YoutubeSearchIE):
3542     IE_NAME = YoutubeSearchIE.IE_NAME + ':date'
3543     _SEARCH_KEY = 'ytsearchdate'
3544     IE_DESC = 'YouTube.com searches, newest videos first'
3545     _SEARCH_PARAMS = 'CAI%3D'
3546
3547
3548 class YoutubeSearchURLIE(YoutubeSearchIE):
3549     IE_DESC = 'YouTube.com search URLs'
3550     IE_NAME = YoutubeSearchIE.IE_NAME + '_url'
3551     _VALID_URL = r'https?://(?:www\.)?youtube\.com/results\?(.*?&)?(?:search_query|q)=(?:[^&]+)(?:[&]|$)'
3552     # _MAX_RESULTS = 100
3553     _TESTS = [{
3554         'url': 'https://www.youtube.com/results?baz=bar&search_query=youtube-dl+test+video&filters=video&lclk=video',
3555         'playlist_mincount': 5,
3556         'info_dict': {
3557             'title': 'youtube-dl test video',
3558         }
3559     }, {
3560         'url': 'https://www.youtube.com/results?q=test&sp=EgQIBBgB',
3561         'only_matching': True,
3562     }]
3563
3564     @classmethod
3565     def _make_valid_url(cls):
3566         return cls._VALID_URL
3567
3568     def _real_extract(self, url):
3569         qs = compat_parse_qs(compat_urllib_parse_urlparse(url).query)
3570         query = (qs.get('search_query') or qs.get('q'))[0]
3571         self._SEARCH_PARAMS = qs.get('sp', ('',))[0]
3572         return self._get_n_results(query, self._MAX_RESULTS)
3573
3574
3575 class YoutubeFeedsInfoExtractor(YoutubeTabIE):
3576     """
3577     Base class for feed extractors
3578     Subclasses must define the _FEED_NAME property.
3579     """
3580     _LOGIN_REQUIRED = True
3581     # _MAX_PAGES = 5
3582     _TESTS = []
3583
3584     @property
3585     def IE_NAME(self):
3586         return 'youtube:%s' % self._FEED_NAME
3587
3588     def _real_initialize(self):
3589         self._login()
3590
3591     def _real_extract(self, url):
3592         return self.url_result(
3593             'https://www.youtube.com/feed/%s' % self._FEED_NAME,
3594             ie=YoutubeTabIE.ie_key())
3595
3596
3597 class YoutubeWatchLaterIE(InfoExtractor):
3598     IE_NAME = 'youtube:watchlater'
3599     IE_DESC = 'Youtube watch later list, ":ytwatchlater" for short (requires authentication)'
3600     _VALID_URL = r':ytwatchlater'
3601     _TESTS = [{
3602         'url': ':ytwatchlater',
3603         'only_matching': True,
3604     }]
3605
3606     def _real_extract(self, url):
3607         return self.url_result(
3608             'https://www.youtube.com/playlist?list=WL', ie=YoutubeTabIE.ie_key())
3609
3610
3611 class YoutubeRecommendedIE(YoutubeFeedsInfoExtractor):
3612     IE_DESC = 'YouTube.com recommended videos, ":ytrec" for short (requires authentication)'
3613     _VALID_URL = r'https?://(?:www\.)?youtube\.com/?(?:[?#]|$)|:ytrec(?:ommended)?'
3614     _FEED_NAME = 'recommended'
3615     _TESTS = [{
3616         'url': ':ytrec',
3617         'only_matching': True,
3618     }, {
3619         'url': ':ytrecommended',
3620         'only_matching': True,
3621     }, {
3622         'url': 'https://youtube.com',
3623         'only_matching': True,
3624     }]
3625
3626
3627 class YoutubeSubscriptionsIE(YoutubeFeedsInfoExtractor):
3628     IE_DESC = 'YouTube.com subscriptions feed, ":ytsubs" for short (requires authentication)'
3629     _VALID_URL = r':ytsub(?:scription)?s?'
3630     _FEED_NAME = 'subscriptions'
3631     _TESTS = [{
3632         'url': ':ytsubs',
3633         'only_matching': True,
3634     }, {
3635         'url': ':ytsubscriptions',
3636         'only_matching': True,
3637     }]
3638
3639
3640 class YoutubeHistoryIE(YoutubeFeedsInfoExtractor):
3641     IE_DESC = 'Youtube watch history, ":ythistory" for short (requires authentication)'
3642     _VALID_URL = r':ythistory'
3643     _FEED_NAME = 'history'
3644     _TESTS = [{
3645         'url': ':ythistory',
3646         'only_matching': True,
3647     }]
3648
3649
3650 class YoutubeTruncatedURLIE(InfoExtractor):
3651     IE_NAME = 'youtube:truncated_url'
3652     IE_DESC = False  # Do not list
3653     _VALID_URL = r'''(?x)
3654         (?:https?://)?
3655         (?:\w+\.)?[yY][oO][uU][tT][uU][bB][eE](?:-nocookie)?\.com/
3656         (?:watch\?(?:
3657             feature=[a-z_]+|
3658             annotation_id=annotation_[^&]+|
3659             x-yt-cl=[0-9]+|
3660             hl=[^&]*|
3661             t=[0-9]+
3662         )?
3663         |
3664             attribution_link\?a=[^&]+
3665         )
3666         $
3667     '''
3668
3669     _TESTS = [{
3670         'url': 'https://www.youtube.com/watch?annotation_id=annotation_3951667041',
3671         'only_matching': True,
3672     }, {
3673         'url': 'https://www.youtube.com/watch?',
3674         'only_matching': True,
3675     }, {
3676         'url': 'https://www.youtube.com/watch?x-yt-cl=84503534',
3677         'only_matching': True,
3678     }, {
3679         'url': 'https://www.youtube.com/watch?feature=foo',
3680         'only_matching': True,
3681     }, {
3682         'url': 'https://www.youtube.com/watch?hl=en-GB',
3683         'only_matching': True,
3684     }, {
3685         'url': 'https://www.youtube.com/watch?t=2372',
3686         'only_matching': True,
3687     }]
3688
3689     def _real_extract(self, url):
3690         raise ExtractorError(
3691             'Did you forget to quote the URL? Remember that & is a meta '
3692             'character in most shells, so you want to put the URL in quotes, '
3693             'like  youtube-dl '
3694             '"https://www.youtube.com/watch?feature=foo&v=BaW_jenozKc" '
3695             ' or simply  youtube-dl BaW_jenozKc  .',
3696             expected=True)
3697
3698
3699 class YoutubeTruncatedIDIE(InfoExtractor):
3700     IE_NAME = 'youtube:truncated_id'
3701     IE_DESC = False  # Do not list
3702     _VALID_URL = r'https?://(?:www\.)?youtube\.com/watch\?v=(?P<id>[0-9A-Za-z_-]{1,10})$'
3703
3704     _TESTS = [{
3705         'url': 'https://www.youtube.com/watch?v=N_708QY7Ob',
3706         'only_matching': True,
3707     }]
3708
3709     def _real_extract(self, url):
3710         video_id = self._match_id(url)
3711         raise ExtractorError(
3712             'Incomplete YouTube ID %s. URL %s looks truncated.' % (video_id, url),
3713             expected=True)
3714
3715
3716 # Do Youtube show urls even exist anymore? I couldn't find any
3717 r'''
3718 class YoutubeShowIE(YoutubeTabIE):
3719     IE_DESC = 'YouTube.com (multi-season) shows'
3720     _VALID_URL = r'https?://(?:www\.)?youtube\.com/show/(?P<id>[^?#]*)'
3721     IE_NAME = 'youtube:show'
3722     _TESTS = [{
3723         'url': 'https://www.youtube.com/show/airdisasters',
3724         'playlist_mincount': 5,
3725         'info_dict': {
3726             'id': 'airdisasters',
3727             'title': 'Air Disasters',
3728         }
3729     }]
3730
3731     def _real_extract(self, url):
3732         playlist_id = self._match_id(url)
3733         return super(YoutubeShowIE, self)._real_extract(
3734             'https://www.youtube.com/show/%s/playlists' % playlist_id)
3735 '''