youtube_dlc/extractor/youtube.py

   1 # coding: utf-8
   2
   3 from __future__ import unicode_literals
   4
   5
   6 import itertools
   7 import json
   8 import os.path
   9 import random
  10 import re
  11 import time
  12 import traceback
  13
  14 from .common import InfoExtractor, SearchInfoExtractor
  15 from ..jsinterp import JSInterpreter
  16 from ..swfinterp import SWFInterpreter
  17 from ..compat import (
  18     compat_chr,
  19     compat_HTTPError,
  20     compat_kwargs,
  21     compat_parse_qs,
  22     compat_urllib_parse_unquote,
  23     compat_urllib_parse_unquote_plus,
  24     compat_urllib_parse_urlencode,
  25     compat_urllib_parse_urlparse,
  26     compat_urlparse,
  27     compat_str,
  28 )
  29 from ..utils import (
  30     bool_or_none,
  31     clean_html,
  32     error_to_compat_str,
  33     ExtractorError,
  34     float_or_none,
  35     get_element_by_id,
  36     int_or_none,
  37     mimetype2ext,
  38     parse_codecs,
  39     parse_count,
  40     parse_duration,
  41     remove_quotes,
  42     remove_start,
  43     smuggle_url,
  44     str_or_none,
  45     str_to_int,
  46     try_get,
  47     unescapeHTML,
  48     unified_strdate,
  49     unsmuggle_url,
  50     update_url_query,
  51     uppercase_escape,
  52     url_or_none,
  53     urlencode_postdata,
  54     urljoin,
  55 )
  56
  57
  58 class YoutubeBaseInfoExtractor(InfoExtractor):
  59     """Provide base functions for Youtube extractors"""
  60     _LOGIN_URL = 'https://accounts.google.com/ServiceLogin'
  61     _TWOFACTOR_URL = 'https://accounts.google.com/signin/challenge'
  62
  63     _LOOKUP_URL = 'https://accounts.google.com/_/signin/sl/lookup'
  64     _CHALLENGE_URL = 'https://accounts.google.com/_/signin/sl/challenge'
  65     _TFA_URL = 'https://accounts.google.com/_/signin/challenge?hl=en&TL={0}'
  66
  67     _RESERVED_NAMES = (
  68         r'embed|e|watch_popup|channel|c|user|playlist|watch|w|v|movies|results|shared|'
  69         r'storefront|oops|index|account|reporthistory|t/terms|about|upload|signin|logout|'
  70         r'feed/(?:watch_later|history|subscriptions|library|trending|recommended)')
  71
  72     _NETRC_MACHINE = 'youtube'
  73     # If True it will raise an error if no login info is provided
  74     _LOGIN_REQUIRED = False
  75
  76     _PLAYLIST_ID_RE = r'(?:(?:PL|LL|EC|UU|FL|RD|UL|TL|PU|OLAK5uy_)[0-9A-Za-z-_]{10,}|RDMM|WL|LL|LM)'
  77
  78     def _set_language(self):
  79         self._set_cookie(
  80             '.youtube.com', 'PREF', 'f1=50000000&f6=8&hl=en',
  81             # YouTube sets the expire time to about two months
  82             expire_time=time.time() + 2 * 30 * 24 * 3600)
  83
  84     def _ids_to_results(self, ids):
  85         return [
  86             self.url_result(vid_id, 'Youtube', video_id=vid_id)
  87             for vid_id in ids]
  88
  89     def _login(self):
  90         """
  91         Attempt to log in to YouTube.
  92         True is returned if successful or skipped.
  93         False is returned if login failed.
  94
  95         If _LOGIN_REQUIRED is set and no authentication was provided, an error is raised.
  96         """
  97         username, password = self._get_login_info()
  98         # No authentication to be performed
  99         if username is None:
 100             if self._LOGIN_REQUIRED and self._downloader.params.get('cookiefile') is None:
 101                 raise ExtractorError('No login info available, needed for using %s.' % self.IE_NAME, expected=True)
 102             if self._downloader.params.get('cookiefile') and False:  # TODO remove 'and False' later - too many people using outdated cookies and open issues, remind them.
 103                 self.to_screen('[Cookies] Reminder - Make sure to always use up to date cookies!')
 104             return True
 105
 106         login_page = self._download_webpage(
 107             self._LOGIN_URL, None,
 108             note='Downloading login page',
 109             errnote='unable to fetch login page', fatal=False)
 110         if login_page is False:
 111             return
 112
 113         login_form = self._hidden_inputs(login_page)
 114
 115         def req(url, f_req, note, errnote):
 116             data = login_form.copy()
 117             data.update({
 118                 'pstMsg': 1,
 119                 'checkConnection': 'youtube',
 120                 'checkedDomains': 'youtube',
 121                 'hl': 'en',
 122                 'deviceinfo': '[null,null,null,[],null,"US",null,null,[],"GlifWebSignIn",null,[null,null,[]]]',
 123                 'f.req': json.dumps(f_req),
 124                 'flowName': 'GlifWebSignIn',
 125                 'flowEntry': 'ServiceLogin',
 126                 # TODO: reverse actual botguard identifier generation algo
 127                 'bgRequest': '["identifier",""]',
 128             })
 129             return self._download_json(
 130                 url, None, note=note, errnote=errnote,
 131                 transform_source=lambda s: re.sub(r'^[^[]*', '', s),
 132                 fatal=False,
 133                 data=urlencode_postdata(data), headers={
 134                     'Content-Type': 'application/x-www-form-urlencoded;charset=utf-8',
 135                     'Google-Accounts-XSRF': 1,
 136                 })
 137
 138         def warn(message):
 139             self._downloader.report_warning(message)
 140
 141         lookup_req = [
 142             username,
 143             None, [], None, 'US', None, None, 2, False, True,
 144             [
 145                 None, None,
 146                 [2, 1, None, 1,
 147                  'https://accounts.google.com/ServiceLogin?passive=true&continue=https%3A%2F%2Fwww.youtube.com%2Fsignin%3Fnext%3D%252F%26action_handle_signin%3Dtrue%26hl%3Den%26app%3Ddesktop%26feature%3Dsign_in_button&hl=en&service=youtube&uilel=3&requestPath=%2FServiceLogin&Page=PasswordSeparationSignIn',
 148                  None, [], 4],
 149                 1, [None, None, []], None, None, None, True
 150             ],
 151             username,
 152         ]
 153
 154         lookup_results = req(
 155             self._LOOKUP_URL, lookup_req,
 156             'Looking up account info', 'Unable to look up account info')
 157
 158         if lookup_results is False:
 159             return False
 160
 161         user_hash = try_get(lookup_results, lambda x: x[0][2], compat_str)
 162         if not user_hash:
 163             warn('Unable to extract user hash')
 164             return False
 165
 166         challenge_req = [
 167             user_hash,
 168             None, 1, None, [1, None, None, None, [password, None, True]],
 169             [
 170                 None, None, [2, 1, None, 1, 'https://accounts.google.com/ServiceLogin?passive=true&continue=https%3A%2F%2Fwww.youtube.com%2Fsignin%3Fnext%3D%252F%26action_handle_signin%3Dtrue%26hl%3Den%26app%3Ddesktop%26feature%3Dsign_in_button&hl=en&service=youtube&uilel=3&requestPath=%2FServiceLogin&Page=PasswordSeparationSignIn', None, [], 4],
 171                 1, [None, None, []], None, None, None, True
 172             ]]
 173
 174         challenge_results = req(
 175             self._CHALLENGE_URL, challenge_req,
 176             'Logging in', 'Unable to log in')
 177
 178         if challenge_results is False:
 179             return
 180
 181         login_res = try_get(challenge_results, lambda x: x[0][5], list)
 182         if login_res:
 183             login_msg = try_get(login_res, lambda x: x[5], compat_str)
 184             warn(
 185                 'Unable to login: %s' % 'Invalid password'
 186                 if login_msg == 'INCORRECT_ANSWER_ENTERED' else login_msg)
 187             return False
 188
 189         res = try_get(challenge_results, lambda x: x[0][-1], list)
 190         if not res:
 191             warn('Unable to extract result entry')
 192             return False
 193
 194         login_challenge = try_get(res, lambda x: x[0][0], list)
 195         if login_challenge:
 196             challenge_str = try_get(login_challenge, lambda x: x[2], compat_str)
 197             if challenge_str == 'TWO_STEP_VERIFICATION':
 198                 # SEND_SUCCESS - TFA code has been successfully sent to phone
 199                 # QUOTA_EXCEEDED - reached the limit of TFA codes
 200                 status = try_get(login_challenge, lambda x: x[5], compat_str)
 201                 if status == 'QUOTA_EXCEEDED':
 202                     warn('Exceeded the limit of TFA codes, try later')
 203                     return False
 204
 205                 tl = try_get(challenge_results, lambda x: x[1][2], compat_str)
 206                 if not tl:
 207                     warn('Unable to extract TL')
 208                     return False
 209
 210                 tfa_code = self._get_tfa_info('2-step verification code')
 211
 212                 if not tfa_code:
 213                     warn(
 214                         'Two-factor authentication required. Provide it either interactively or with --twofactor <code>'
 215                         '(Note that only TOTP (Google Authenticator App) codes work at this time.)')
 216                     return False
 217
 218                 tfa_code = remove_start(tfa_code, 'G-')
 219
 220                 tfa_req = [
 221                     user_hash, None, 2, None,
 222                     [
 223                         9, None, None, None, None, None, None, None,
 224                         [None, tfa_code, True, 2]
 225                     ]]
 226
 227                 tfa_results = req(
 228                     self._TFA_URL.format(tl), tfa_req,
 229                     'Submitting TFA code', 'Unable to submit TFA code')
 230
 231                 if tfa_results is False:
 232                     return False
 233
 234                 tfa_res = try_get(tfa_results, lambda x: x[0][5], list)
 235                 if tfa_res:
 236                     tfa_msg = try_get(tfa_res, lambda x: x[5], compat_str)
 237                     warn(
 238                         'Unable to finish TFA: %s' % 'Invalid TFA code'
 239                         if tfa_msg == 'INCORRECT_ANSWER_ENTERED' else tfa_msg)
 240                     return False
 241
 242                 check_cookie_url = try_get(
 243                     tfa_results, lambda x: x[0][-1][2], compat_str)
 244             else:
 245                 CHALLENGES = {
 246                     'LOGIN_CHALLENGE': "This device isn't recognized. For your security, Google wants to make sure it's really you.",
 247                     'USERNAME_RECOVERY': 'Please provide additional information to aid in the recovery process.',
 248                     'REAUTH': "There is something unusual about your activity. For your security, Google wants to make sure it's really you.",
 249                 }
 250                 challenge = CHALLENGES.get(
 251                     challenge_str,
 252                     '%s returned error %s.' % (self.IE_NAME, challenge_str))
 253                 warn('%s\nGo to https://accounts.google.com/, login and solve a challenge.' % challenge)
 254                 return False
 255         else:
 256             check_cookie_url = try_get(res, lambda x: x[2], compat_str)
 257
 258         if not check_cookie_url:
 259             warn('Unable to extract CheckCookie URL')
 260             return False
 261
 262         check_cookie_results = self._download_webpage(
 263             check_cookie_url, None, 'Checking cookie', fatal=False)
 264
 265         if check_cookie_results is False:
 266             return False
 267
 268         if 'https://myaccount.google.com/' not in check_cookie_results:
 269             warn('Unable to log in')
 270             return False
 271
 272         return True
 273
 274     def _download_webpage_handle(self, *args, **kwargs):
 275         query = kwargs.get('query', {}).copy()
 276         kwargs['query'] = query
 277         return super(YoutubeBaseInfoExtractor, self)._download_webpage_handle(
 278             *args, **compat_kwargs(kwargs))
 279
 280     def _get_yt_initial_data(self, video_id, webpage):
 281         config = self._search_regex(
 282             (r'window\["ytInitialData"\]\s*=\s*(.*?)(?<=});',
 283              r'var\s+ytInitialData\s*=\s*(.*?)(?<=});'),
 284             webpage, 'ytInitialData', default=None)
 285         if config:
 286             return self._parse_json(
 287                 uppercase_escape(config), video_id, fatal=False)
 288
 289     def _real_initialize(self):
 290         if self._downloader is None:
 291             return
 292         self._set_language()
 293         if not self._login():
 294             return
 295
 296     _DEFAULT_API_DATA = {
 297         'context': {
 298             'client': {
 299                 'clientName': 'WEB',
 300                 'clientVersion': '2.20201021.03.00',
 301             }
 302         },
 303     }
 304
 305     _YT_INITIAL_DATA_RE = r'(?:window\s*\[\s*["\']ytInitialData["\']\s*\]|ytInitialData)\s*=\s*({.+?})\s*;'
 306     _YT_INITIAL_PLAYER_RESPONSE_RE = r'ytInitialPlayerResponse\s*=\s*({.+?})\s*;'
 307     _YT_INITIAL_BOUNDARY_RE = r'(?:var\s+meta|</script|\n)'
 308
 309     def _call_api(self, ep, query, video_id):
 310         data = self._DEFAULT_API_DATA.copy()
 311         data.update(query)
 312
 313         response = self._download_json(
 314             'https://www.youtube.com/youtubei/v1/%s' % ep, video_id=video_id,
 315             note='Downloading API JSON', errnote='Unable to download API page',
 316             data=json.dumps(data).encode('utf8'),
 317             headers={'content-type': 'application/json'},
 318             query={'key': 'AIzaSyAO_FJ2SlqU8Q4STEHLGCilw_Y9_11qcW8'})
 319
 320         return response
 321
 322     def _extract_yt_initial_data(self, video_id, webpage):
 323         return self._parse_json(
 324             self._search_regex(
 325                 (r'%s\s*%s' % (self._YT_INITIAL_DATA_RE, self._YT_INITIAL_BOUNDARY_RE),
 326                  self._YT_INITIAL_DATA_RE), webpage, 'yt initial data'),
 327             video_id)
 328
 329     def _extract_ytcfg(self, video_id, webpage):
 330         return self._parse_json(
 331             self._search_regex(
 332                 r'ytcfg\.set\s*\(\s*({.+?})\s*\)\s*;', webpage, 'ytcfg',
 333                 default='{}'), video_id, fatal=False)
 334
 335
 336 class YoutubeIE(YoutubeBaseInfoExtractor):
 337     IE_DESC = 'YouTube.com'
 338     _VALID_URL = r"""(?x)^
 339                      (
 340                          (?:https?://|//)                                    # http(s):// or protocol-independent URL
 341                          (?:(?:(?:(?:\w+\.)?[yY][oO][uU][tT][uU][bB][eE](?:-nocookie|kids)?\.com/|
 342                             (?:www\.)?deturl\.com/www\.youtube\.com/|
 343                             (?:www\.)?pwnyoutube\.com/|
 344                             (?:www\.)?hooktube\.com/|
 345                             (?:www\.)?yourepeat\.com/|
 346                             tube\.majestyc\.net/|
 347                             # Invidious instances taken from https://github.com/omarroth/invidious/wiki/Invidious-Instances
 348                             (?:(?:www|dev)\.)?invidio\.us/|
 349                             (?:(?:www|no)\.)?invidiou\.sh/|
 350                             (?:(?:www|fi)\.)?invidious\.snopyta\.org/|
 351                             (?:www\.)?invidious\.kabi\.tk/|
 352                             (?:www\.)?invidious\.13ad\.de/|
 353                             (?:www\.)?invidious\.mastodon\.host/|
 354                             (?:www\.)?invidious\.zapashcanon\.fr/|
 355                             (?:www\.)?invidious\.kavin\.rocks/|
 356                             (?:www\.)?invidious\.tube/|
 357                             (?:www\.)?invidiou\.site/|
 358                             (?:www\.)?invidious\.site/|
 359                             (?:www\.)?invidious\.xyz/|
 360                             (?:www\.)?invidious\.nixnet\.xyz/|
 361                             (?:www\.)?invidious\.drycat\.fr/|
 362                             (?:www\.)?tube\.poal\.co/|
 363                             (?:www\.)?tube\.connect\.cafe/|
 364                             (?:www\.)?vid\.wxzm\.sx/|
 365                             (?:www\.)?vid\.mint\.lgbt/|
 366                             (?:www\.)?yewtu\.be/|
 367                             (?:www\.)?yt\.elukerio\.org/|
 368                             (?:www\.)?yt\.lelux\.fi/|
 369                             (?:www\.)?invidious\.ggc-project\.de/|
 370                             (?:www\.)?yt\.maisputain\.ovh/|
 371                             (?:www\.)?invidious\.13ad\.de/|
 372                             (?:www\.)?invidious\.toot\.koeln/|
 373                             (?:www\.)?invidious\.fdn\.fr/|
 374                             (?:www\.)?watch\.nettohikari\.com/|
 375                             (?:www\.)?kgg2m7yk5aybusll\.onion/|
 376                             (?:www\.)?qklhadlycap4cnod\.onion/|
 377                             (?:www\.)?axqzx4s6s54s32yentfqojs3x5i7faxza6xo3ehd4bzzsg2ii4fv2iid\.onion/|
 378                             (?:www\.)?c7hqkpkpemu6e7emz5b4vyz7idjgdvgaaa3dyimmeojqbgpea3xqjoid\.onion/|
 379                             (?:www\.)?fz253lmuao3strwbfbmx46yu7acac2jz27iwtorgmbqlkurlclmancad\.onion/|
 380                             (?:www\.)?invidious\.l4qlywnpwqsluw65ts7md3khrivpirse744un3x7mlskqauz5pyuzgqd\.onion/|
 381                             (?:www\.)?owxfohz4kjyv25fvlqilyxast7inivgiktls3th44jhk3ej3i7ya\.b32\.i2p/|
 382                             (?:www\.)?4l2dgddgsrkf2ous66i6seeyi6etzfgrue332grh2n7madpwopotugyd\.onion/|
 383                             youtube\.googleapis\.com/)                        # the various hostnames, with wildcard subdomains
 384                          (?:.*?\#/)?                                          # handle anchor (#/) redirect urls
 385                          (?:                                                  # the various things that can precede the ID:
 386                              (?:(?:v|embed|e)/(?!videoseries))                # v/ or embed/ or e/
 387                              |(?:                                             # or the v= param in all its forms
 388                                  (?:(?:watch|movie)(?:_popup)?(?:\.php)?/?)?  # preceding watch(_popup|.php) or nothing (like /?v=xxxx)
 389                                  (?:\?|\#!?)                                  # the params delimiter ? or # or #!
 390                                  (?:.*?[&;])??                                # any other preceding param (like /?s=tuff&v=xxxx or ?s=tuff&amp;v=V36LpHqtcDY)
 391                                  v=
 392                              )
 393                          ))
 394                          |(?:
 395                             youtu\.be|                                        # just youtu.be/xxxx
 396                             vid\.plus|                                        # or vid.plus/xxxx
 397                             zwearz\.com/watch|                                # or zwearz.com/watch/xxxx
 398                          )/
 399                          |(?:www\.)?cleanvideosearch\.com/media/action/yt/watch\?videoId=
 400                          )
 401                      )?                                                       # all until now is optional -> you can pass the naked ID
 402                      (?P<id>[0-9A-Za-z_-]{11})                                      # here is it! the YouTube video ID
 403                      (?!.*?\blist=
 404                         (?:
 405                             %(playlist_id)s|                                  # combined list/video URLs are handled by the playlist IE
 406                             WL                                                # WL are handled by the watch later IE
 407                         )
 408                      )
 409                      (?(1).+)?                                                # if we found the ID, everything can follow
 410                      $""" % {'playlist_id': YoutubeBaseInfoExtractor._PLAYLIST_ID_RE}
 411     _NEXT_URL_RE = r'[\?&]next_url=([^&]+)'
 412     _PLAYER_INFO_RE = (
 413         r'/(?P<id>[a-zA-Z0-9_-]{8,})/player_ias\.vflset(?:/[a-zA-Z]{2,3}_[a-zA-Z]{2,3})?/base\.(?P<ext>[a-z]+)$',
 414         r'\b(?P<id>vfl[a-zA-Z0-9_-]+)\b.*?\.(?P<ext>[a-z]+)$',
 415     )
 416     _formats = {
 417         '5': {'ext': 'flv', 'width': 400, 'height': 240, 'acodec': 'mp3', 'abr': 64, 'vcodec': 'h263'},
 418         '6': {'ext': 'flv', 'width': 450, 'height': 270, 'acodec': 'mp3', 'abr': 64, 'vcodec': 'h263'},
 419         '13': {'ext': '3gp', 'acodec': 'aac', 'vcodec': 'mp4v'},
 420         '17': {'ext': '3gp', 'width': 176, 'height': 144, 'acodec': 'aac', 'abr': 24, 'vcodec': 'mp4v'},
 421         '18': {'ext': 'mp4', 'width': 640, 'height': 360, 'acodec': 'aac', 'abr': 96, 'vcodec': 'h264'},
 422         '22': {'ext': 'mp4', 'width': 1280, 'height': 720, 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264'},
 423         '34': {'ext': 'flv', 'width': 640, 'height': 360, 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264'},
 424         '35': {'ext': 'flv', 'width': 854, 'height': 480, 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264'},
 425         # itag 36 videos are either 320x180 (BaW_jenozKc) or 320x240 (__2ABJjxzNo), abr varies as well
 426         '36': {'ext': '3gp', 'width': 320, 'acodec': 'aac', 'vcodec': 'mp4v'},
 427         '37': {'ext': 'mp4', 'width': 1920, 'height': 1080, 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264'},
 428         '38': {'ext': 'mp4', 'width': 4096, 'height': 3072, 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264'},
 429         '43': {'ext': 'webm', 'width': 640, 'height': 360, 'acodec': 'vorbis', 'abr': 128, 'vcodec': 'vp8'},
 430         '44': {'ext': 'webm', 'width': 854, 'height': 480, 'acodec': 'vorbis', 'abr': 128, 'vcodec': 'vp8'},
 431         '45': {'ext': 'webm', 'width': 1280, 'height': 720, 'acodec': 'vorbis', 'abr': 192, 'vcodec': 'vp8'},
 432         '46': {'ext': 'webm', 'width': 1920, 'height': 1080, 'acodec': 'vorbis', 'abr': 192, 'vcodec': 'vp8'},
 433         '59': {'ext': 'mp4', 'width': 854, 'height': 480, 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264'},
 434         '78': {'ext': 'mp4', 'width': 854, 'height': 480, 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264'},
 435
 436
 437         # 3D videos
 438         '82': {'ext': 'mp4', 'height': 360, 'format_note': '3D', 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264', 'preference': -20},
 439         '83': {'ext': 'mp4', 'height': 480, 'format_note': '3D', 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264', 'preference': -20},
 440         '84': {'ext': 'mp4', 'height': 720, 'format_note': '3D', 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264', 'preference': -20},
 441         '85': {'ext': 'mp4', 'height': 1080, 'format_note': '3D', 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264', 'preference': -20},
 442         '100': {'ext': 'webm', 'height': 360, 'format_note': '3D', 'acodec': 'vorbis', 'abr': 128, 'vcodec': 'vp8', 'preference': -20},
 443         '101': {'ext': 'webm', 'height': 480, 'format_note': '3D', 'acodec': 'vorbis', 'abr': 192, 'vcodec': 'vp8', 'preference': -20},
 444         '102': {'ext': 'webm', 'height': 720, 'format_note': '3D', 'acodec': 'vorbis', 'abr': 192, 'vcodec': 'vp8', 'preference': -20},
 445
 446         # Apple HTTP Live Streaming
 447         '91': {'ext': 'mp4', 'height': 144, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 48, 'vcodec': 'h264', 'preference': -10},
 448         '92': {'ext': 'mp4', 'height': 240, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 48, 'vcodec': 'h264', 'preference': -10},
 449         '93': {'ext': 'mp4', 'height': 360, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264', 'preference': -10},
 450         '94': {'ext': 'mp4', 'height': 480, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264', 'preference': -10},
 451         '95': {'ext': 'mp4', 'height': 720, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 256, 'vcodec': 'h264', 'preference': -10},
 452         '96': {'ext': 'mp4', 'height': 1080, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 256, 'vcodec': 'h264', 'preference': -10},
 453         '132': {'ext': 'mp4', 'height': 240, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 48, 'vcodec': 'h264', 'preference': -10},
 454         '151': {'ext': 'mp4', 'height': 72, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 24, 'vcodec': 'h264', 'preference': -10},
 455
 456         # DASH mp4 video
 457         '133': {'ext': 'mp4', 'height': 240, 'format_note': 'DASH video', 'vcodec': 'h264'},
 458         '134': {'ext': 'mp4', 'height': 360, 'format_note': 'DASH video', 'vcodec': 'h264'},
 459         '135': {'ext': 'mp4', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'h264'},
 460         '136': {'ext': 'mp4', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'h264'},
 461         '137': {'ext': 'mp4', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'h264'},
 462         '138': {'ext': 'mp4', 'format_note': 'DASH video', 'vcodec': 'h264'},  # Height can vary (https://github.com/ytdl-org/youtube-dl/issues/4559)
 463         '160': {'ext': 'mp4', 'height': 144, 'format_note': 'DASH video', 'vcodec': 'h264'},
 464         '212': {'ext': 'mp4', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'h264'},
 465         '264': {'ext': 'mp4', 'height': 1440, 'format_note': 'DASH video', 'vcodec': 'h264'},
 466         '298': {'ext': 'mp4', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'h264', 'fps': 60},
 467         '299': {'ext': 'mp4', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'h264', 'fps': 60},
 468         '266': {'ext': 'mp4', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'h264'},
 469
 470         # Dash mp4 audio
 471         '139': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'abr': 48, 'container': 'm4a_dash'},
 472         '140': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'abr': 128, 'container': 'm4a_dash'},
 473         '141': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'abr': 256, 'container': 'm4a_dash'},
 474         '256': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'container': 'm4a_dash'},
 475         '258': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'container': 'm4a_dash'},
 476         '325': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'dtse', 'container': 'm4a_dash'},
 477         '328': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'ec-3', 'container': 'm4a_dash'},
 478
 479         # Dash webm
 480         '167': {'ext': 'webm', 'height': 360, 'width': 640, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
 481         '168': {'ext': 'webm', 'height': 480, 'width': 854, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
 482         '169': {'ext': 'webm', 'height': 720, 'width': 1280, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
 483         '170': {'ext': 'webm', 'height': 1080, 'width': 1920, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
 484         '218': {'ext': 'webm', 'height': 480, 'width': 854, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
 485         '219': {'ext': 'webm', 'height': 480, 'width': 854, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
 486         '278': {'ext': 'webm', 'height': 144, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp9'},
 487         '242': {'ext': 'webm', 'height': 240, 'format_note': 'DASH video', 'vcodec': 'vp9'},
 488         '243': {'ext': 'webm', 'height': 360, 'format_note': 'DASH video', 'vcodec': 'vp9'},
 489         '244': {'ext': 'webm', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'vp9'},
 490         '245': {'ext': 'webm', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'vp9'},
 491         '246': {'ext': 'webm', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'vp9'},
 492         '247': {'ext': 'webm', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'vp9'},
 493         '248': {'ext': 'webm', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'vp9'},
 494         '271': {'ext': 'webm', 'height': 1440, 'format_note': 'DASH video', 'vcodec': 'vp9'},
 495         # itag 272 videos are either 3840x2160 (e.g. RtoitU2A-3E) or 7680x4320 (sLprVF6d7Ug)
 496         '272': {'ext': 'webm', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'vp9'},
 497         '302': {'ext': 'webm', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'vp9', 'fps': 60},
 498         '303': {'ext': 'webm', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'vp9', 'fps': 60},
 499         '308': {'ext': 'webm', 'height': 1440, 'format_note': 'DASH video', 'vcodec': 'vp9', 'fps': 60},
 500         '313': {'ext': 'webm', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'vp9'},
 501         '315': {'ext': 'webm', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'vp9', 'fps': 60},
 502
 503         # Dash webm audio
 504         '171': {'ext': 'webm', 'acodec': 'vorbis', 'format_note': 'DASH audio', 'abr': 128},
 505         '172': {'ext': 'webm', 'acodec': 'vorbis', 'format_note': 'DASH audio', 'abr': 256},
 506
 507         # Dash webm audio with opus inside
 508         '249': {'ext': 'webm', 'format_note': 'DASH audio', 'acodec': 'opus', 'abr': 50},
 509         '250': {'ext': 'webm', 'format_note': 'DASH audio', 'acodec': 'opus', 'abr': 70},
 510         '251': {'ext': 'webm', 'format_note': 'DASH audio', 'acodec': 'opus', 'abr': 160},
 511
 512         # RTMP (unnamed)
 513         '_rtmp': {'protocol': 'rtmp'},
 514
 515         # av01 video only formats sometimes served with "unknown" codecs
 516         '394': {'acodec': 'none', 'vcodec': 'av01.0.05M.08'},
 517         '395': {'acodec': 'none', 'vcodec': 'av01.0.05M.08'},
 518         '396': {'acodec': 'none', 'vcodec': 'av01.0.05M.08'},
 519         '397': {'acodec': 'none', 'vcodec': 'av01.0.05M.08'},
 520     }
 521     _SUBTITLE_FORMATS = ('json3', 'srv1', 'srv2', 'srv3', 'ttml', 'vtt')
 522
 523     _GEO_BYPASS = False
 524
 525     IE_NAME = 'youtube'
 526     _TESTS = [
 527         {
 528             'url': 'https://www.youtube.com/watch?v=BaW_jenozKc&t=1s&end=9',
 529             'info_dict': {
 530                 'id': 'BaW_jenozKc',
 531                 'ext': 'mp4',
 532                 'title': 'youtube-dl test video "\'/\\ä↭𝕐',
 533                 'uploader': 'Philipp Hagemeister',
 534                 'uploader_id': 'phihag',
 535                 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/phihag',
 536                 'channel_id': 'UCLqxVugv74EIW3VWh2NOa3Q',
 537                 'channel_url': r're:https?://(?:www\.)?youtube\.com/channel/UCLqxVugv74EIW3VWh2NOa3Q',
 538                 'upload_date': '20121002',
 539                 'description': 'test chars:  "\'/\\ä↭𝕐\ntest URL: https://github.com/rg3/youtube-dl/issues/1892\n\nThis is a test video for youtube-dl.\n\nFor more information, contact phihag@phihag.de .',
 540                 'categories': ['Science & Technology'],
 541                 'tags': ['youtube-dl'],
 542                 'duration': 10,
 543                 'view_count': int,
 544                 'like_count': int,
 545                 'dislike_count': int,
 546                 'start_time': 1,
 547                 'end_time': 9,
 548             }
 549         },
 550         {
 551             'url': '//www.YouTube.com/watch?v=yZIXLfi8CZQ',
 552             'note': 'Embed-only video (#1746)',
 553             'info_dict': {
 554                 'id': 'yZIXLfi8CZQ',
 555                 'ext': 'mp4',
 556                 'upload_date': '20120608',
 557                 'title': 'Principal Sexually Assaults A Teacher - Episode 117 - 8th June 2012',
 558                 'description': 'md5:09b78bd971f1e3e289601dfba15ca4f7',
 559                 'uploader': 'SET India',
 560                 'uploader_id': 'setindia',
 561                 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/setindia',
 562                 'age_limit': 18,
 563             }
 564         },
 565         {
 566             'url': 'https://www.youtube.com/watch?v=BaW_jenozKc&v=yZIXLfi8CZQ',
 567             'note': 'Use the first video ID in the URL',
 568             'info_dict': {
 569                 'id': 'BaW_jenozKc',
 570                 'ext': 'mp4',
 571                 'title': 'youtube-dl test video "\'/\\ä↭𝕐',
 572                 'uploader': 'Philipp Hagemeister',
 573                 'uploader_id': 'phihag',
 574                 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/phihag',
 575                 'upload_date': '20121002',
 576                 'description': 'test chars:  "\'/\\ä↭𝕐\ntest URL: https://github.com/rg3/youtube-dl/issues/1892\n\nThis is a test video for youtube-dl.\n\nFor more information, contact phihag@phihag.de .',
 577                 'categories': ['Science & Technology'],
 578                 'tags': ['youtube-dl'],
 579                 'duration': 10,
 580                 'view_count': int,
 581                 'like_count': int,
 582                 'dislike_count': int,
 583             },
 584             'params': {
 585                 'skip_download': True,
 586             },
 587         },
 588         {
 589             'url': 'https://www.youtube.com/watch?v=a9LDPn-MO4I',
 590             'note': '256k DASH audio (format 141) via DASH manifest',
 591             'info_dict': {
 592                 'id': 'a9LDPn-MO4I',
 593                 'ext': 'm4a',
 594                 'upload_date': '20121002',
 595                 'uploader_id': '8KVIDEO',
 596                 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/8KVIDEO',
 597                 'description': '',
 598                 'uploader': '8KVIDEO',
 599                 'title': 'UHDTV TEST 8K VIDEO.mp4'
 600             },
 601             'params': {
 602                 'youtube_include_dash_manifest': True,
 603                 'format': '141',
 604             },
 605             'skip': 'format 141 not served anymore',
 606         },
 607         # DASH manifest with encrypted signature
 608         {
 609             'url': 'https://www.youtube.com/watch?v=IB3lcPjvWLA',
 610             'info_dict': {
 611                 'id': 'IB3lcPjvWLA',
 612                 'ext': 'm4a',
 613                 'title': 'Afrojack, Spree Wilson - The Spark (Official Music Video) ft. Spree Wilson',
 614                 'description': 'md5:8f5e2b82460520b619ccac1f509d43bf',
 615                 'duration': 244,
 616                 'uploader': 'AfrojackVEVO',
 617                 'uploader_id': 'AfrojackVEVO',
 618                 'upload_date': '20131011',
 619             },
 620             'params': {
 621                 'youtube_include_dash_manifest': True,
 622                 'format': '141/bestaudio[ext=m4a]',
 623             },
 624         },
 625         # Controversy video
 626         {
 627             'url': 'https://www.youtube.com/watch?v=T4XJQO3qol8',
 628             'info_dict': {
 629                 'id': 'T4XJQO3qol8',
 630                 'ext': 'mp4',
 631                 'duration': 219,
 632                 'upload_date': '20100909',
 633                 'uploader': 'Amazing Atheist',
 634                 'uploader_id': 'TheAmazingAtheist',
 635                 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/TheAmazingAtheist',
 636                 'title': 'Burning Everyone\'s Koran',
 637                 'description': 'SUBSCRIBE: http://www.youtube.com/saturninefilms\n\nEven Obama has taken a stand against freedom on this issue: http://www.huffingtonpost.com/2010/09/09/obama-gma-interview-quran_n_710282.html',
 638             }
 639         },
 640         # Normal age-gate video (embed allowed)
 641         {
 642             'url': 'https://youtube.com/watch?v=HtVdAasjOgU',
 643             'info_dict': {
 644                 'id': 'HtVdAasjOgU',
 645                 'ext': 'mp4',
 646                 'title': 'The Witcher 3: Wild Hunt - The Sword Of Destiny Trailer',
 647                 'description': r're:(?s).{100,}About the Game\n.*?The Witcher 3: Wild Hunt.{100,}',
 648                 'duration': 142,
 649                 'uploader': 'The Witcher',
 650                 'uploader_id': 'WitcherGame',
 651                 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/WitcherGame',
 652                 'upload_date': '20140605',
 653                 'age_limit': 18,
 654             },
 655         },
 656         # video_info is None (https://github.com/ytdl-org/youtube-dl/issues/4421)
 657         # YouTube Red ad is not captured for creator
 658         {
 659             'url': '__2ABJjxzNo',
 660             'info_dict': {
 661                 'id': '__2ABJjxzNo',
 662                 'ext': 'mp4',
 663                 'duration': 266,
 664                 'upload_date': '20100430',
 665                 'uploader_id': 'deadmau5',
 666                 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/deadmau5',
 667                 'creator': 'Dada Life, deadmau5',
 668                 'description': 'md5:12c56784b8032162bb936a5f76d55360',
 669                 'uploader': 'deadmau5',
 670                 'title': 'Deadmau5 - Some Chords (HD)',
 671                 'alt_title': 'This Machine Kills Some Chords',
 672             },
 673             'expected_warnings': [
 674                 'DASH manifest missing',
 675             ]
 676         },
 677         # Olympics (https://github.com/ytdl-org/youtube-dl/issues/4431)
 678         {
 679             'url': 'lqQg6PlCWgI',
 680             'info_dict': {
 681                 'id': 'lqQg6PlCWgI',
 682                 'ext': 'mp4',
 683                 'duration': 6085,
 684                 'upload_date': '20150827',
 685                 'uploader_id': 'olympic',
 686                 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/olympic',
 687                 'description': 'HO09  - Women -  GER-AUS - Hockey - 31 July 2012 - London 2012 Olympic Games',
 688                 'uploader': 'Olympic',
 689                 'title': 'Hockey - Women -  GER-AUS - London 2012 Olympic Games',
 690             },
 691             'params': {
 692                 'skip_download': 'requires avconv',
 693             }
 694         },
 695         # Non-square pixels
 696         {
 697             'url': 'https://www.youtube.com/watch?v=_b-2C3KPAM0',
 698             'info_dict': {
 699                 'id': '_b-2C3KPAM0',
 700                 'ext': 'mp4',
 701                 'stretched_ratio': 16 / 9.,
 702                 'duration': 85,
 703                 'upload_date': '20110310',
 704                 'uploader_id': 'AllenMeow',
 705                 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/AllenMeow',
 706                 'description': 'made by Wacom from Korea | 字幕&加油添醋 by TY\'s Allen | 感謝heylisa00cavey1001同學熱情提供梗及翻譯',
 707                 'uploader': '孫ᄋᄅ',
 708                 'title': '[A-made] 變態妍字幕版 太妍 我就是這樣的人',
 709             },
 710         },
 711         # url_encoded_fmt_stream_map is empty string
 712         {
 713             'url': 'qEJwOuvDf7I',
 714             'info_dict': {
 715                 'id': 'qEJwOuvDf7I',
 716                 'ext': 'webm',
 717                 'title': 'Обсуждение судебной практики по выборам 14 сентября 2014 года в Санкт-Петербурге',
 718                 'description': '',
 719                 'upload_date': '20150404',
 720                 'uploader_id': 'spbelect',
 721                 'uploader': 'Наблюдатели Петербурга',
 722             },
 723             'params': {
 724                 'skip_download': 'requires avconv',
 725             },
 726             'skip': 'This live event has ended.',
 727         },
 728         # Extraction from multiple DASH manifests (https://github.com/ytdl-org/youtube-dl/pull/6097)
 729         {
 730             'url': 'https://www.youtube.com/watch?v=FIl7x6_3R5Y',
 731             'info_dict': {
 732                 'id': 'FIl7x6_3R5Y',
 733                 'ext': 'webm',
 734                 'title': 'md5:7b81415841e02ecd4313668cde88737a',
 735                 'description': 'md5:116377fd2963b81ec4ce64b542173306',
 736                 'duration': 220,
 737                 'upload_date': '20150625',
 738                 'uploader_id': 'dorappi2000',
 739                 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/dorappi2000',
 740                 'uploader': 'dorappi2000',
 741                 'formats': 'mincount:31',
 742             },
 743             'skip': 'not actual anymore',
 744         },
 745         # DASH manifest with segment_list
 746         {
 747             'url': 'https://www.youtube.com/embed/CsmdDsKjzN8',
 748             'md5': '8ce563a1d667b599d21064e982ab9e31',
 749             'info_dict': {
 750                 'id': 'CsmdDsKjzN8',
 751                 'ext': 'mp4',
 752                 'upload_date': '20150501',  # According to '<meta itemprop="datePublished"', but in other places it's 20150510
 753                 'uploader': 'Airtek',
 754                 'description': 'Retransmisión en directo de la XVIII media maratón de Zaragoza.',
 755                 'uploader_id': 'UCzTzUmjXxxacNnL8I3m4LnQ',
 756                 'title': 'Retransmisión XVIII Media maratón Zaragoza 2015',
 757             },
 758             'params': {
 759                 'youtube_include_dash_manifest': True,
 760                 'format': '135',  # bestvideo
 761             },
 762             'skip': 'This live event has ended.',
 763         },
 764         {
 765             # Multifeed videos (multiple cameras), URL is for Main Camera
 766             'url': 'https://www.youtube.com/watch?v=jqWvoWXjCVs',
 767             'info_dict': {
 768                 'id': 'jqWvoWXjCVs',
 769                 'title': 'teamPGP: Rocket League Noob Stream',
 770                 'description': 'md5:dc7872fb300e143831327f1bae3af010',
 771             },
 772             'playlist': [{
 773                 'info_dict': {
 774                     'id': 'jqWvoWXjCVs',
 775                     'ext': 'mp4',
 776                     'title': 'teamPGP: Rocket League Noob Stream (Main Camera)',
 777                     'description': 'md5:dc7872fb300e143831327f1bae3af010',
 778                     'duration': 7335,
 779                     'upload_date': '20150721',
 780                     'uploader': 'Beer Games Beer',
 781                     'uploader_id': 'beergamesbeer',
 782                     'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/beergamesbeer',
 783                     'license': 'Standard YouTube License',
 784                 },
 785             }, {
 786                 'info_dict': {
 787                     'id': '6h8e8xoXJzg',
 788                     'ext': 'mp4',
 789                     'title': 'teamPGP: Rocket League Noob Stream (kreestuh)',
 790                     'description': 'md5:dc7872fb300e143831327f1bae3af010',
 791                     'duration': 7337,
 792                     'upload_date': '20150721',
 793                     'uploader': 'Beer Games Beer',
 794                     'uploader_id': 'beergamesbeer',
 795                     'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/beergamesbeer',
 796                     'license': 'Standard YouTube License',
 797                 },
 798             }, {
 799                 'info_dict': {
 800                     'id': 'PUOgX5z9xZw',
 801                     'ext': 'mp4',
 802                     'title': 'teamPGP: Rocket League Noob Stream (grizzle)',
 803                     'description': 'md5:dc7872fb300e143831327f1bae3af010',
 804                     'duration': 7337,
 805                     'upload_date': '20150721',
 806                     'uploader': 'Beer Games Beer',
 807                     'uploader_id': 'beergamesbeer',
 808                     'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/beergamesbeer',
 809                     'license': 'Standard YouTube License',
 810                 },
 811             }, {
 812                 'info_dict': {
 813                     'id': 'teuwxikvS5k',
 814                     'ext': 'mp4',
 815                     'title': 'teamPGP: Rocket League Noob Stream (zim)',
 816                     'description': 'md5:dc7872fb300e143831327f1bae3af010',
 817                     'duration': 7334,
 818                     'upload_date': '20150721',
 819                     'uploader': 'Beer Games Beer',
 820                     'uploader_id': 'beergamesbeer',
 821                     'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/beergamesbeer',
 822                     'license': 'Standard YouTube License',
 823                 },
 824             }],
 825             'params': {
 826                 'skip_download': True,
 827             },
 828             'skip': 'This video is not available.',
 829         },
 830         {
 831             # Multifeed video with comma in title (see https://github.com/ytdl-org/youtube-dl/issues/8536)
 832             'url': 'https://www.youtube.com/watch?v=gVfLd0zydlo',
 833             'info_dict': {
 834                 'id': 'gVfLd0zydlo',
 835                 'title': 'DevConf.cz 2016 Day 2 Workshops 1 14:00 - 15:30',
 836             },
 837             'playlist_count': 2,
 838             'skip': 'Not multifeed anymore',
 839         },
 840         {
 841             'url': 'https://vid.plus/FlRa-iH7PGw',
 842             'only_matching': True,
 843         },
 844         {
 845             'url': 'https://zwearz.com/watch/9lWxNJF-ufM/electra-woman-dyna-girl-official-trailer-grace-helbig.html',
 846             'only_matching': True,
 847         },
 848         {
 849             # Title with JS-like syntax "};" (see https://github.com/ytdl-org/youtube-dl/issues/7468)
 850             # Also tests cut-off URL expansion in video description (see
 851             # https://github.com/ytdl-org/youtube-dl/issues/1892,
 852             # https://github.com/ytdl-org/youtube-dl/issues/8164)
 853             'url': 'https://www.youtube.com/watch?v=lsguqyKfVQg',
 854             'info_dict': {
 855                 'id': 'lsguqyKfVQg',
 856                 'ext': 'mp4',
 857                 'title': '{dark walk}; Loki/AC/Dishonored; collab w/Elflover21',
 858                 'alt_title': 'Dark Walk - Position Music',
 859                 'description': 'md5:8085699c11dc3f597ce0410b0dcbb34a',
 860                 'duration': 133,
 861                 'upload_date': '20151119',
 862                 'uploader_id': 'IronSoulElf',
 863                 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/IronSoulElf',
 864                 'uploader': 'IronSoulElf',
 865                 'creator': 'Todd Haberman,  Daniel Law Heath and Aaron Kaplan',
 866                 'track': 'Dark Walk - Position Music',
 867                 'artist': 'Todd Haberman,  Daniel Law Heath and Aaron Kaplan',
 868                 'album': 'Position Music - Production Music Vol. 143 - Dark Walk',
 869             },
 870             'params': {
 871                 'skip_download': True,
 872             },
 873         },
 874         {
 875             # Tags with '};' (see https://github.com/ytdl-org/youtube-dl/issues/7468)
 876             'url': 'https://www.youtube.com/watch?v=Ms7iBXnlUO8',
 877             'only_matching': True,
 878         },
 879         {
 880             # Video with yt:stretch=17:0
 881             'url': 'https://www.youtube.com/watch?v=Q39EVAstoRM',
 882             'info_dict': {
 883                 'id': 'Q39EVAstoRM',
 884                 'ext': 'mp4',
 885                 'title': 'Clash Of Clans#14 Dicas De Ataque Para CV 4',
 886                 'description': 'md5:ee18a25c350637c8faff806845bddee9',
 887                 'upload_date': '20151107',
 888                 'uploader_id': 'UCCr7TALkRbo3EtFzETQF1LA',
 889                 'uploader': 'CH GAMER DROID',
 890             },
 891             'params': {
 892                 'skip_download': True,
 893             },
 894             'skip': 'This video does not exist.',
 895         },
 896         {
 897             # Video licensed under Creative Commons
 898             'url': 'https://www.youtube.com/watch?v=M4gD1WSo5mA',
 899             'info_dict': {
 900                 'id': 'M4gD1WSo5mA',
 901                 'ext': 'mp4',
 902                 'title': 'md5:e41008789470fc2533a3252216f1c1d1',
 903                 'description': 'md5:a677553cf0840649b731a3024aeff4cc',
 904                 'duration': 721,
 905                 'upload_date': '20150127',
 906                 'uploader_id': 'BerkmanCenter',
 907                 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/BerkmanCenter',
 908                 'uploader': 'The Berkman Klein Center for Internet & Society',
 909                 'license': 'Creative Commons Attribution license (reuse allowed)',
 910             },
 911             'params': {
 912                 'skip_download': True,
 913             },
 914         },
 915         {
 916             # Channel-like uploader_url
 917             'url': 'https://www.youtube.com/watch?v=eQcmzGIKrzg',
 918             'info_dict': {
 919                 'id': 'eQcmzGIKrzg',
 920                 'ext': 'mp4',
 921                 'title': 'Democratic Socialism and Foreign Policy | Bernie Sanders',
 922                 'description': 'md5:dda0d780d5a6e120758d1711d062a867',
 923                 'duration': 4060,
 924                 'upload_date': '20151119',
 925                 'uploader': 'Bernie Sanders',
 926                 'uploader_id': 'UCH1dpzjCEiGAt8CXkryhkZg',
 927                 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UCH1dpzjCEiGAt8CXkryhkZg',
 928                 'license': 'Creative Commons Attribution license (reuse allowed)',
 929             },
 930             'params': {
 931                 'skip_download': True,
 932             },
 933         },
 934         {
 935             'url': 'https://www.youtube.com/watch?feature=player_embedded&amp;amp;v=V36LpHqtcDY',
 936             'only_matching': True,
 937         },
 938         {
 939             # YouTube Red paid video (https://github.com/ytdl-org/youtube-dl/issues/10059)
 940             'url': 'https://www.youtube.com/watch?v=i1Ko8UG-Tdo',
 941             'only_matching': True,
 942         },
 943         {
 944             # Rental video preview
 945             'url': 'https://www.youtube.com/watch?v=yYr8q0y5Jfg',
 946             'info_dict': {
 947                 'id': 'uGpuVWrhIzE',
 948                 'ext': 'mp4',
 949                 'title': 'Piku - Trailer',
 950                 'description': 'md5:c36bd60c3fd6f1954086c083c72092eb',
 951                 'upload_date': '20150811',
 952                 'uploader': 'FlixMatrix',
 953                 'uploader_id': 'FlixMatrixKaravan',
 954                 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/FlixMatrixKaravan',
 955                 'license': 'Standard YouTube License',
 956             },
 957             'params': {
 958                 'skip_download': True,
 959             },
 960             'skip': 'This video is not available.',
 961         },
 962         {
 963             # YouTube Red video with episode data
 964             'url': 'https://www.youtube.com/watch?v=iqKdEhx-dD4',
 965             'info_dict': {
 966                 'id': 'iqKdEhx-dD4',
 967                 'ext': 'mp4',
 968                 'title': 'Isolation - Mind Field (Ep 1)',
 969                 'description': 'md5:46a29be4ceffa65b92d277b93f463c0f',
 970                 'duration': 2085,
 971                 'upload_date': '20170118',
 972                 'uploader': 'Vsauce',
 973                 'uploader_id': 'Vsauce',
 974                 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/Vsauce',
 975                 'series': 'Mind Field',
 976                 'season_number': 1,
 977                 'episode_number': 1,
 978             },
 979             'params': {
 980                 'skip_download': True,
 981             },
 982             'expected_warnings': [
 983                 'Skipping DASH manifest',
 984             ],
 985         },
 986         {
 987             # The following content has been identified by the YouTube community
 988             # as inappropriate or offensive to some audiences.
 989             'url': 'https://www.youtube.com/watch?v=6SJNVb0GnPI',
 990             'info_dict': {
 991                 'id': '6SJNVb0GnPI',
 992                 'ext': 'mp4',
 993                 'title': 'Race Differences in Intelligence',
 994                 'description': 'md5:5d161533167390427a1f8ee89a1fc6f1',
 995                 'duration': 965,
 996                 'upload_date': '20140124',
 997                 'uploader': 'New Century Foundation',
 998                 'uploader_id': 'UCEJYpZGqgUob0zVVEaLhvVg',
 999                 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UCEJYpZGqgUob0zVVEaLhvVg',
1000             },
1001             'params': {
1002                 'skip_download': True,
1003             },
1004         },
1005         {
1006             # itag 212
1007             'url': '1t24XAntNCY',
1008             'only_matching': True,
1009         },
1010         {
1011             # geo restricted to JP
1012             'url': 'sJL6WA-aGkQ',
1013             'only_matching': True,
1014         },
1015         {
1016             'url': 'https://invidio.us/watch?v=BaW_jenozKc',
1017             'only_matching': True,
1018         },
1019         {
1020             # DRM protected
1021             'url': 'https://www.youtube.com/watch?v=s7_qI6_mIXc',
1022             'only_matching': True,
1023         },
1024         {
1025             # Video with unsupported adaptive stream type formats
1026             'url': 'https://www.youtube.com/watch?v=Z4Vy8R84T1U',
1027             'info_dict': {
1028                 'id': 'Z4Vy8R84T1U',
1029                 'ext': 'mp4',
1030                 'title': 'saman SMAN 53 Jakarta(Sancety) opening COFFEE4th at SMAN 53 Jakarta',
1031                 'description': 'md5:d41d8cd98f00b204e9800998ecf8427e',
1032                 'duration': 433,
1033                 'upload_date': '20130923',
1034                 'uploader': 'Amelia Putri Harwita',
1035                 'uploader_id': 'UCpOxM49HJxmC1qCalXyB3_Q',
1036                 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UCpOxM49HJxmC1qCalXyB3_Q',
1037                 'formats': 'maxcount:10',
1038             },
1039             'params': {
1040                 'skip_download': True,
1041                 'youtube_include_dash_manifest': False,
1042             },
1043             'skip': 'not actual anymore',
1044         },
1045         {
1046             # Youtube Music Auto-generated description
1047             'url': 'https://music.youtube.com/watch?v=MgNrAu2pzNs',
1048             'info_dict': {
1049                 'id': 'MgNrAu2pzNs',
1050                 'ext': 'mp4',
1051                 'title': 'Voyeur Girl',
1052                 'description': 'md5:7ae382a65843d6df2685993e90a8628f',
1053                 'upload_date': '20190312',
1054                 'uploader': 'Stephen - Topic',
1055                 'uploader_id': 'UC-pWHpBjdGG69N9mM2auIAA',
1056                 'artist': 'Stephen',
1057                 'track': 'Voyeur Girl',
1058                 'album': 'it\'s too much love to know my dear',
1059                 'release_date': '20190313',
1060                 'release_year': 2019,
1061             },
1062             'params': {
1063                 'skip_download': True,
1064             },
1065         },
1066         {
1067             'url': 'https://www.youtubekids.com/watch?v=3b8nCWDgZ6Q',
1068             'only_matching': True,
1069         },
1070         {
1071             # invalid -> valid video id redirection
1072             'url': 'DJztXj2GPfl',
1073             'info_dict': {
1074                 'id': 'DJztXj2GPfk',
1075                 'ext': 'mp4',
1076                 'title': 'Panjabi MC - Mundian To Bach Ke (The Dictator Soundtrack)',
1077                 'description': 'md5:bf577a41da97918e94fa9798d9228825',
1078                 'upload_date': '20090125',
1079                 'uploader': 'Prochorowka',
1080                 'uploader_id': 'Prochorowka',
1081                 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/Prochorowka',
1082                 'artist': 'Panjabi MC',
1083                 'track': 'Beware of the Boys (Mundian to Bach Ke) - Motivo Hi-Lectro Remix',
1084                 'album': 'Beware of the Boys (Mundian To Bach Ke)',
1085             },
1086             'params': {
1087                 'skip_download': True,
1088             },
1089         },
1090         {
1091             # empty description results in an empty string
1092             'url': 'https://www.youtube.com/watch?v=x41yOUIvK2k',
1093             'info_dict': {
1094                 'id': 'x41yOUIvK2k',
1095                 'ext': 'mp4',
1096                 'title': 'IMG 3456',
1097                 'description': '',
1098                 'upload_date': '20170613',
1099                 'uploader_id': 'ElevageOrVert',
1100                 'uploader': 'ElevageOrVert',
1101             },
1102             'params': {
1103                 'skip_download': True,
1104             },
1105         },
1106         {
1107             # with '};' inside yt initial data (see [1])
1108             # see [2] for an example with '};' inside ytInitialPlayerResponse
1109             # 1. https://github.com/ytdl-org/youtube-dl/issues/27093
1110             # 2. https://github.com/ytdl-org/youtube-dl/issues/27216
1111             'url': 'https://www.youtube.com/watch?v=CHqg6qOn4no',
1112             'info_dict': {
1113                 'id': 'CHqg6qOn4no',
1114                 'ext': 'mp4',
1115                 'title': 'Part 77   Sort a list of simple types in c#',
1116                 'description': 'md5:b8746fa52e10cdbf47997903f13b20dc',
1117                 'upload_date': '20130831',
1118                 'uploader_id': 'kudvenkat',
1119                 'uploader': 'kudvenkat',
1120             },
1121             'params': {
1122                 'skip_download': True,
1123             },
1124         },
1125         {
1126             # another example of '};' in ytInitialData
1127             'url': 'https://www.youtube.com/watch?v=gVfgbahppCY',
1128             'only_matching': True,
1129         },
1130         {
1131             'url': 'https://www.youtube.com/watch_popup?v=63RmMXCd_bQ',
1132             'only_matching': True,
1133         },
1134     ]
1135
1136     def __init__(self, *args, **kwargs):
1137         super(YoutubeIE, self).__init__(*args, **kwargs)
1138         self._player_cache = {}
1139
1140     def report_video_info_webpage_download(self, video_id):
1141         """Report attempt to download video info webpage."""
1142         self.to_screen('%s: Downloading video info webpage' % video_id)
1143
1144     def report_information_extraction(self, video_id):
1145         """Report attempt to extract video information."""
1146         self.to_screen('%s: Extracting video information' % video_id)
1147
1148     def report_unavailable_format(self, video_id, format):
1149         """Report extracted video URL."""
1150         self.to_screen('%s: Format %s not available' % (video_id, format))
1151
1152     def report_rtmp_download(self):
1153         """Indicate the download will use the RTMP protocol."""
1154         self.to_screen('RTMP download detected')
1155
1156     def _signature_cache_id(self, example_sig):
1157         """ Return a string representation of a signature """
1158         return '.'.join(compat_str(len(part)) for part in example_sig.split('.'))
1159
1160     @classmethod
1161     def _extract_player_info(cls, player_url):
1162         for player_re in cls._PLAYER_INFO_RE:
1163             id_m = re.search(player_re, player_url)
1164             if id_m:
1165                 break
1166         else:
1167             raise ExtractorError('Cannot identify player %r' % player_url)
1168         return id_m.group('ext'), id_m.group('id')
1169
1170     def _extract_signature_function(self, video_id, player_url, example_sig):
1171         player_type, player_id = self._extract_player_info(player_url)
1172
1173         # Read from filesystem cache
1174         func_id = '%s_%s_%s' % (
1175             player_type, player_id, self._signature_cache_id(example_sig))
1176         assert os.path.basename(func_id) == func_id
1177
1178         cache_spec = self._downloader.cache.load('youtube-sigfuncs', func_id)
1179         if cache_spec is not None:
1180             return lambda s: ''.join(s[i] for i in cache_spec)
1181
1182         download_note = (
1183             'Downloading player %s' % player_url
1184             if self._downloader.params.get('verbose') else
1185             'Downloading %s player %s' % (player_type, player_id)
1186         )
1187         if player_type == 'js':
1188             code = self._download_webpage(
1189                 player_url, video_id,
1190                 note=download_note,
1191                 errnote='Download of %s failed' % player_url)
1192             res = self._parse_sig_js(code)
1193         elif player_type == 'swf':
1194             urlh = self._request_webpage(
1195                 player_url, video_id,
1196                 note=download_note,
1197                 errnote='Download of %s failed' % player_url)
1198             code = urlh.read()
1199             res = self._parse_sig_swf(code)
1200         else:
1201             assert False, 'Invalid player type %r' % player_type
1202
1203         test_string = ''.join(map(compat_chr, range(len(example_sig))))
1204         cache_res = res(test_string)
1205         cache_spec = [ord(c) for c in cache_res]
1206
1207         self._downloader.cache.store('youtube-sigfuncs', func_id, cache_spec)
1208         return res
1209
1210     def _print_sig_code(self, func, example_sig):
1211         def gen_sig_code(idxs):
1212             def _genslice(start, end, step):
1213                 starts = '' if start == 0 else str(start)
1214                 ends = (':%d' % (end + step)) if end + step >= 0 else ':'
1215                 steps = '' if step == 1 else (':%d' % step)
1216                 return 's[%s%s%s]' % (starts, ends, steps)
1217
1218             step = None
1219             # Quelch pyflakes warnings - start will be set when step is set
1220             start = '(Never used)'
1221             for i, prev in zip(idxs[1:], idxs[:-1]):
1222                 if step is not None:
1223                     if i - prev == step:
1224                         continue
1225                     yield _genslice(start, prev, step)
1226                     step = None
1227                     continue
1228                 if i - prev in [-1, 1]:
1229                     step = i - prev
1230                     start = prev
1231                     continue
1232                 else:
1233                     yield 's[%d]' % prev
1234             if step is None:
1235                 yield 's[%d]' % i
1236             else:
1237                 yield _genslice(start, i, step)
1238
1239         test_string = ''.join(map(compat_chr, range(len(example_sig))))
1240         cache_res = func(test_string)
1241         cache_spec = [ord(c) for c in cache_res]
1242         expr_code = ' + '.join(gen_sig_code(cache_spec))
1243         signature_id_tuple = '(%s)' % (
1244             ', '.join(compat_str(len(p)) for p in example_sig.split('.')))
1245         code = ('if tuple(len(p) for p in s.split(\'.\')) == %s:\n'
1246                 '    return %s\n') % (signature_id_tuple, expr_code)
1247         self.to_screen('Extracted signature function:\n' + code)
1248
1249     def _parse_sig_js(self, jscode):
1250         funcname = self._search_regex(
1251             (r'\b[cs]\s*&&\s*[adf]\.set\([^,]+\s*,\s*encodeURIComponent\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\(',
1252              r'\b[a-zA-Z0-9]+\s*&&\s*[a-zA-Z0-9]+\.set\([^,]+\s*,\s*encodeURIComponent\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\(',
1253              r'(?:\b|[^a-zA-Z0-9$])(?P<sig>[a-zA-Z0-9$]{2})\s*=\s*function\(\s*a\s*\)\s*{\s*a\s*=\s*a\.split\(\s*""\s*\)',
1254              r'(?P<sig>[a-zA-Z0-9$]+)\s*=\s*function\(\s*a\s*\)\s*{\s*a\s*=\s*a\.split\(\s*""\s*\)',
1255              # Obsolete patterns
1256              r'(["\'])signature\1\s*,\s*(?P<sig>[a-zA-Z0-9$]+)\(',
1257              r'\.sig\|\|(?P<sig>[a-zA-Z0-9$]+)\(',
1258              r'yt\.akamaized\.net/\)\s*\|\|\s*.*?\s*[cs]\s*&&\s*[adf]\.set\([^,]+\s*,\s*(?:encodeURIComponent\s*\()?\s*(?P<sig>[a-zA-Z0-9$]+)\(',
1259              r'\b[cs]\s*&&\s*[adf]\.set\([^,]+\s*,\s*(?P<sig>[a-zA-Z0-9$]+)\(',
1260              r'\b[a-zA-Z0-9]+\s*&&\s*[a-zA-Z0-9]+\.set\([^,]+\s*,\s*(?P<sig>[a-zA-Z0-9$]+)\(',
1261              r'\bc\s*&&\s*a\.set\([^,]+\s*,\s*\([^)]*\)\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\(',
1262              r'\bc\s*&&\s*[a-zA-Z0-9]+\.set\([^,]+\s*,\s*\([^)]*\)\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\(',
1263              r'\bc\s*&&\s*[a-zA-Z0-9]+\.set\([^,]+\s*,\s*\([^)]*\)\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\('),
1264             jscode, 'Initial JS player signature function name', group='sig')
1265
1266         jsi = JSInterpreter(jscode)
1267         initial_function = jsi.extract_function(funcname)
1268         return lambda s: initial_function([s])
1269
1270     def _parse_sig_swf(self, file_contents):
1271         swfi = SWFInterpreter(file_contents)
1272         TARGET_CLASSNAME = 'SignatureDecipher'
1273         searched_class = swfi.extract_class(TARGET_CLASSNAME)
1274         initial_function = swfi.extract_function(searched_class, 'decipher')
1275         return lambda s: initial_function([s])
1276
1277     def _decrypt_signature(self, s, video_id, player_url, age_gate=False):
1278         """Turn the encrypted s field into a working signature"""
1279
1280         if player_url is None:
1281             raise ExtractorError('Cannot decrypt signature without player_url')
1282
1283         if player_url.startswith('//'):
1284             player_url = 'https:' + player_url
1285         elif not re.match(r'https?://', player_url):
1286             player_url = compat_urlparse.urljoin(
1287                 'https://www.youtube.com', player_url)
1288         try:
1289             player_id = (player_url, self._signature_cache_id(s))
1290             if player_id not in self._player_cache:
1291                 func = self._extract_signature_function(
1292                     video_id, player_url, s
1293                 )
1294                 self._player_cache[player_id] = func
1295             func = self._player_cache[player_id]
1296             if self._downloader.params.get('youtube_print_sig_code'):
1297                 self._print_sig_code(func, s)
1298             return func(s)
1299         except Exception as e:
1300             tb = traceback.format_exc()
1301             raise ExtractorError(
1302                 'Signature extraction failed: ' + tb, cause=e)
1303
1304     def _get_subtitles(self, video_id, webpage, has_live_chat_replay):
1305         try:
1306             subs_doc = self._download_xml(
1307                 'https://video.google.com/timedtext?hl=en&type=list&v=%s' % video_id,
1308                 video_id, note=False)
1309         except ExtractorError as err:
1310             self._downloader.report_warning('unable to download video subtitles: %s' % error_to_compat_str(err))
1311             return {}
1312
1313         sub_lang_list = {}
1314         for track in subs_doc.findall('track'):
1315             lang = track.attrib['lang_code']
1316             if lang in sub_lang_list:
1317                 continue
1318             sub_formats = []
1319             for ext in self._SUBTITLE_FORMATS:
1320                 params = compat_urllib_parse_urlencode({
1321                     'lang': lang,
1322                     'v': video_id,
1323                     'fmt': ext,
1324                     'name': track.attrib['name'].encode('utf-8'),
1325                 })
1326                 sub_formats.append({
1327                     'url': 'https://www.youtube.com/api/timedtext?' + params,
1328                     'ext': ext,
1329                 })
1330             sub_lang_list[lang] = sub_formats
1331         if has_live_chat_replay:
1332             sub_lang_list['live_chat'] = [
1333                 {
1334                     'video_id': video_id,
1335                     'ext': 'json',
1336                     'protocol': 'youtube_live_chat_replay',
1337                 },
1338             ]
1339         if not sub_lang_list:
1340             self._downloader.report_warning('video doesn\'t have subtitles')
1341             return {}
1342         return sub_lang_list
1343
1344     def _get_ytplayer_config(self, video_id, webpage):
1345         patterns = (
1346             # User data may contain arbitrary character sequences that may affect
1347             # JSON extraction with regex, e.g. when '};' is contained the second
1348             # regex won't capture the whole JSON. Yet working around by trying more
1349             # concrete regex first keeping in mind proper quoted string handling
1350             # to be implemented in future that will replace this workaround (see
1351             # https://github.com/ytdl-org/youtube-dl/issues/7468,
1352             # https://github.com/ytdl-org/youtube-dl/pull/7599)
1353             r';ytplayer\.config\s*=\s*({.+?});ytplayer',
1354             r';ytplayer\.config\s*=\s*({.+?});',
1355         )
1356         config = self._search_regex(
1357             patterns, webpage, 'ytplayer.config', default=None)
1358         if config:
1359             return self._parse_json(
1360                 uppercase_escape(config), video_id, fatal=False)
1361
1362     def _get_automatic_captions(self, video_id, player_response, player_config):
1363         """We need the webpage for getting the captions url, pass it as an
1364            argument to speed up the process."""
1365         self.to_screen('%s: Looking for automatic captions' % video_id)
1366         err_msg = 'Couldn\'t find automatic captions for %s' % video_id
1367         if not (player_response or player_config):
1368             self._downloader.report_warning(err_msg)
1369             return {}
1370         try:
1371             args = player_config.get('args') if player_config else {}
1372             caption_url = args.get('ttsurl')
1373             if caption_url:
1374                 timestamp = args['timestamp']
1375                 # We get the available subtitles
1376                 list_params = compat_urllib_parse_urlencode({
1377                     'type': 'list',
1378                     'tlangs': 1,
1379                     'asrs': 1,
1380                 })
1381                 list_url = caption_url + '&' + list_params
1382                 caption_list = self._download_xml(list_url, video_id)
1383                 original_lang_node = caption_list.find('track')
1384                 if original_lang_node is None:
1385                     self._downloader.report_warning('Video doesn\'t have automatic captions')
1386                     return {}
1387                 original_lang = original_lang_node.attrib['lang_code']
1388                 caption_kind = original_lang_node.attrib.get('kind', '')
1389
1390                 sub_lang_list = {}
1391                 for lang_node in caption_list.findall('target'):
1392                     sub_lang = lang_node.attrib['lang_code']
1393                     sub_formats = []
1394                     for ext in self._SUBTITLE_FORMATS:
1395                         params = compat_urllib_parse_urlencode({
1396                             'lang': original_lang,
1397                             'tlang': sub_lang,
1398                             'fmt': ext,
1399                             'ts': timestamp,
1400                             'kind': caption_kind,
1401                         })
1402                         sub_formats.append({
1403                             'url': caption_url + '&' + params,
1404                             'ext': ext,
1405                         })
1406                     sub_lang_list[sub_lang] = sub_formats
1407                 return sub_lang_list
1408
1409             def make_captions(sub_url, sub_langs):
1410                 parsed_sub_url = compat_urllib_parse_urlparse(sub_url)
1411                 caption_qs = compat_parse_qs(parsed_sub_url.query)
1412                 captions = {}
1413                 for sub_lang in sub_langs:
1414                     sub_formats = []
1415                     for ext in self._SUBTITLE_FORMATS:
1416                         caption_qs.update({
1417                             'tlang': [sub_lang],
1418                             'fmt': [ext],
1419                         })
1420                         sub_url = compat_urlparse.urlunparse(parsed_sub_url._replace(
1421                             query=compat_urllib_parse_urlencode(caption_qs, True)))
1422                         sub_formats.append({
1423                             'url': sub_url,
1424                             'ext': ext,
1425                         })
1426                     captions[sub_lang] = sub_formats
1427                 return captions
1428
1429             # New captions format as of 22.06.2017
1430             if player_response:
1431                 renderer = player_response['captions']['playerCaptionsTracklistRenderer']
1432                 base_url = renderer['captionTracks'][0]['baseUrl']
1433                 sub_lang_list = []
1434                 for lang in renderer['translationLanguages']:
1435                     lang_code = lang.get('languageCode')
1436                     if lang_code:
1437                         sub_lang_list.append(lang_code)
1438                 return make_captions(base_url, sub_lang_list)
1439
1440             # Some videos don't provide ttsurl but rather caption_tracks and
1441             # caption_translation_languages (e.g. 20LmZk1hakA)
1442             # Does not used anymore as of 22.06.2017
1443             caption_tracks = args['caption_tracks']
1444             caption_translation_languages = args['caption_translation_languages']
1445             caption_url = compat_parse_qs(caption_tracks.split(',')[0])['u'][0]
1446             sub_lang_list = []
1447             for lang in caption_translation_languages.split(','):
1448                 lang_qs = compat_parse_qs(compat_urllib_parse_unquote_plus(lang))
1449                 sub_lang = lang_qs.get('lc', [None])[0]
1450                 if sub_lang:
1451                     sub_lang_list.append(sub_lang)
1452             return make_captions(caption_url, sub_lang_list)
1453         # An extractor error can be raise by the download process if there are
1454         # no automatic captions but there are subtitles
1455         except (KeyError, IndexError, ExtractorError):
1456             self._downloader.report_warning(err_msg)
1457             return {}
1458
1459     def _mark_watched(self, video_id, video_info, player_response):
1460         playback_url = url_or_none(try_get(
1461             player_response,
1462             lambda x: x['playbackTracking']['videostatsPlaybackUrl']['baseUrl']) or try_get(
1463             video_info, lambda x: x['videostats_playback_base_url'][0]))
1464         if not playback_url:
1465             return
1466         parsed_playback_url = compat_urlparse.urlparse(playback_url)
1467         qs = compat_urlparse.parse_qs(parsed_playback_url.query)
1468
1469         # cpn generation algorithm is reverse engineered from base.js.
1470         # In fact it works even with dummy cpn.
1471         CPN_ALPHABET = 'abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789-_'
1472         cpn = ''.join((CPN_ALPHABET[random.randint(0, 256) & 63] for _ in range(0, 16)))
1473
1474         qs.update({
1475             'ver': ['2'],
1476             'cpn': [cpn],
1477         })
1478         playback_url = compat_urlparse.urlunparse(
1479             parsed_playback_url._replace(query=compat_urllib_parse_urlencode(qs, True)))
1480
1481         self._download_webpage(
1482             playback_url, video_id, 'Marking watched',
1483             'Unable to mark watched', fatal=False)
1484
1485     @staticmethod
1486     def _extract_urls(webpage):
1487         # Embedded YouTube player
1488         entries = [
1489             unescapeHTML(mobj.group('url'))
1490             for mobj in re.finditer(r'''(?x)
1491             (?:
1492                 <iframe[^>]+?src=|
1493                 data-video-url=|
1494                 <embed[^>]+?src=|
1495                 embedSWF\(?:\s*|
1496                 <object[^>]+data=|
1497                 new\s+SWFObject\(
1498             )
1499             (["\'])
1500                 (?P<url>(?:https?:)?//(?:www\.)?youtube(?:-nocookie)?\.com/
1501                 (?:embed|v|p)/[0-9A-Za-z_-]{11}.*?)
1502             \1''', webpage)]
1503
1504         # lazyYT YouTube embed
1505         entries.extend(list(map(
1506             unescapeHTML,
1507             re.findall(r'class="lazyYT" data-youtube-id="([^"]+)"', webpage))))
1508
1509         # Wordpress "YouTube Video Importer" plugin
1510         matches = re.findall(r'''(?x)<div[^>]+
1511             class=(?P<q1>[\'"])[^\'"]*\byvii_single_video_player\b[^\'"]*(?P=q1)[^>]+
1512             data-video_id=(?P<q2>[\'"])([^\'"]+)(?P=q2)''', webpage)
1513         entries.extend(m[-1] for m in matches)
1514
1515         return entries
1516
1517     @staticmethod
1518     def _extract_url(webpage):
1519         urls = YoutubeIE._extract_urls(webpage)
1520         return urls[0] if urls else None
1521
1522     @classmethod
1523     def extract_id(cls, url):
1524         mobj = re.match(cls._VALID_URL, url, re.VERBOSE)
1525         if mobj is None:
1526             raise ExtractorError('Invalid URL: %s' % url)
1527         video_id = mobj.group(2)
1528         return video_id
1529
1530     def _extract_chapters_from_json(self, webpage, video_id, duration):
1531         if not webpage:
1532             return
1533         data = self._extract_yt_initial_data(video_id, webpage)
1534         if not data or not isinstance(data, dict):
1535             return
1536         chapters_list = try_get(
1537             data,
1538             lambda x: x['playerOverlays']
1539                        ['playerOverlayRenderer']
1540                        ['decoratedPlayerBarRenderer']
1541                        ['decoratedPlayerBarRenderer']
1542                        ['playerBar']
1543                        ['chapteredPlayerBarRenderer']
1544                        ['chapters'],
1545             list)
1546         if not chapters_list:
1547             return
1548
1549         def chapter_time(chapter):
1550             return float_or_none(
1551                 try_get(
1552                     chapter,
1553                     lambda x: x['chapterRenderer']['timeRangeStartMillis'],
1554                     int),
1555                 scale=1000)
1556         chapters = []
1557         for next_num, chapter in enumerate(chapters_list, start=1):
1558             start_time = chapter_time(chapter)
1559             if start_time is None:
1560                 continue
1561             end_time = (chapter_time(chapters_list[next_num])
1562                         if next_num < len(chapters_list) else duration)
1563             if end_time is None:
1564                 continue
1565             title = try_get(
1566                 chapter, lambda x: x['chapterRenderer']['title']['simpleText'],
1567                 compat_str)
1568             chapters.append({
1569                 'start_time': start_time,
1570                 'end_time': end_time,
1571                 'title': title,
1572             })
1573         return chapters
1574
1575     @staticmethod
1576     def _extract_chapters_from_description(description, duration):
1577         if not description:
1578             return None
1579         chapter_lines = re.findall(
1580             r'(?:^|<br\s*/>)([^<]*<a[^>]+onclick=["\']yt\.www\.watch\.player\.seekTo[^>]+>(\d{1,2}:\d{1,2}(?::\d{1,2})?)</a>[^>]*)(?=$|<br\s*/>)',
1581             description)
1582         if not chapter_lines:
1583             return None
1584         chapters = []
1585         for next_num, (chapter_line, time_point) in enumerate(
1586                 chapter_lines, start=1):
1587             start_time = parse_duration(time_point)
1588             if start_time is None:
1589                 continue
1590             if start_time > duration:
1591                 break
1592             end_time = (duration if next_num == len(chapter_lines)
1593                         else parse_duration(chapter_lines[next_num][1]))
1594             if end_time is None:
1595                 continue
1596             if end_time > duration:
1597                 end_time = duration
1598             if start_time > end_time:
1599                 break
1600             chapter_title = re.sub(
1601                 r'<a[^>]+>[^<]+</a>', '', chapter_line).strip(' \t-')
1602             chapter_title = re.sub(r'\s+', ' ', chapter_title)
1603             chapters.append({
1604                 'start_time': start_time,
1605                 'end_time': end_time,
1606                 'title': chapter_title,
1607             })
1608         return chapters
1609
1610     def _extract_chapters(self, webpage, description, video_id, duration):
1611         return (self._extract_chapters_from_json(webpage, video_id, duration)
1612                 or self._extract_chapters_from_description(description, duration))
1613
1614     def _real_extract(self, url):
1615         url, smuggled_data = unsmuggle_url(url, {})
1616
1617         proto = (
1618             'http' if self._downloader.params.get('prefer_insecure', False)
1619             else 'https')
1620
1621         start_time = None
1622         end_time = None
1623         parsed_url = compat_urllib_parse_urlparse(url)
1624         for component in [parsed_url.fragment, parsed_url.query]:
1625             query = compat_parse_qs(component)
1626             if start_time is None and 't' in query:
1627                 start_time = parse_duration(query['t'][0])
1628             if start_time is None and 'start' in query:
1629                 start_time = parse_duration(query['start'][0])
1630             if end_time is None and 'end' in query:
1631                 end_time = parse_duration(query['end'][0])
1632
1633         # Extract original video URL from URL with redirection, like age verification, using next_url parameter
1634         mobj = re.search(self._NEXT_URL_RE, url)
1635         if mobj:
1636             url = proto + '://www.youtube.com/' + compat_urllib_parse_unquote(mobj.group(1)).lstrip('/')
1637         video_id = self.extract_id(url)
1638
1639         # Get video webpage
1640         url = proto + '://www.youtube.com/watch?v=%s&gl=US&hl=en&has_verified=1&bpctr=9999999999' % video_id
1641         video_webpage, urlh = self._download_webpage_handle(url, video_id)
1642
1643         qs = compat_parse_qs(compat_urllib_parse_urlparse(urlh.geturl()).query)
1644         video_id = qs.get('v', [None])[0] or video_id
1645
1646         # Attempt to extract SWF player URL
1647         mobj = re.search(r'swfConfig.*?"(https?:\\/\\/.*?watch.*?-.*?\.swf)"', video_webpage)
1648         if mobj is not None:
1649             player_url = re.sub(r'\\(.)', r'\1', mobj.group(1))
1650         else:
1651             player_url = None
1652
1653         dash_mpds = []
1654
1655         def add_dash_mpd(video_info):
1656             dash_mpd = video_info.get('dashmpd')
1657             if dash_mpd and dash_mpd[0] not in dash_mpds:
1658                 dash_mpds.append(dash_mpd[0])
1659
1660         def add_dash_mpd_pr(pl_response):
1661             dash_mpd = url_or_none(try_get(
1662                 pl_response, lambda x: x['streamingData']['dashManifestUrl'],
1663                 compat_str))
1664             if dash_mpd and dash_mpd not in dash_mpds:
1665                 dash_mpds.append(dash_mpd)
1666
1667         is_live = None
1668         view_count = None
1669
1670         def extract_view_count(v_info):
1671             return int_or_none(try_get(v_info, lambda x: x['view_count'][0]))
1672
1673         def extract_player_response(player_response, video_id):
1674             pl_response = str_or_none(player_response)
1675             if not pl_response:
1676                 return
1677             pl_response = self._parse_json(pl_response, video_id, fatal=False)
1678             if isinstance(pl_response, dict):
1679                 add_dash_mpd_pr(pl_response)
1680                 return pl_response
1681
1682         def extract_embedded_config(embed_webpage, video_id):
1683             embedded_config = self._search_regex(
1684                 r'setConfig\(({.*})\);',
1685                 embed_webpage, 'ytInitialData', default=None)
1686             if embedded_config:
1687                 return embedded_config
1688
1689         player_response = {}
1690
1691         # Get video info
1692         video_info = {}
1693         embed_webpage = None
1694         if (self._og_search_property('restrictions:age', video_webpage, default=None) == '18+'
1695                 or re.search(r'player-age-gate-content">', video_webpage) is not None):
1696             cookie_keys = self._get_cookies('https://www.youtube.com').keys()
1697             age_gate = True
1698             # We simulate the access to the video from www.youtube.com/v/{video_id}
1699             # this can be viewed without login into Youtube
1700             url = proto + '://www.youtube.com/embed/%s' % video_id
1701             embed_webpage = self._download_webpage(url, video_id, 'Downloading embed webpage')
1702             ext = extract_embedded_config(embed_webpage, video_id)
1703             # playabilityStatus = re.search(r'{\\\"status\\\":\\\"(?P<playabilityStatus>[^\"]+)\\\"', ext)
1704             playable_in_embed = re.search(r'{\\\"playableInEmbed\\\":(?P<playableinEmbed>[^\,]+)', ext)
1705             if not playable_in_embed:
1706                 self.to_screen('Could not determine whether playabale in embed for video %s' % video_id)
1707                 playable_in_embed = ''
1708             else:
1709                 playable_in_embed = playable_in_embed.group('playableinEmbed')
1710             # check if video is only playable on youtube in other words not playable in embed - if so it requires auth (cookies)
1711             # if re.search(r'player-unavailable">', embed_webpage) is not None:
1712             if playable_in_embed == 'false':
1713                 '''
1714                 # TODO apply this patch when Support for Python 2.6(!) and above drops
1715                 if ({'VISITOR_INFO1_LIVE', 'HSID', 'SSID', 'SID'} <= cookie_keys
1716                         or {'VISITOR_INFO1_LIVE', '__Secure-3PSID', 'LOGIN_INFO'} <= cookie_keys):
1717                 '''
1718                 if (set(('VISITOR_INFO1_LIVE', 'HSID', 'SSID', 'SID')) <= set(cookie_keys)
1719                         or set(('VISITOR_INFO1_LIVE', '__Secure-3PSID', 'LOGIN_INFO')) <= set(cookie_keys)):
1720                     age_gate = False
1721                     # Try looking directly into the video webpage
1722                     ytplayer_config = self._get_ytplayer_config(video_id, video_webpage)
1723                     if ytplayer_config:
1724                         args = ytplayer_config.get("args")
1725                         if args is not None:
1726                             if args.get('url_encoded_fmt_stream_map') or args.get('hlsvp'):
1727                                 # Convert to the same format returned by compat_parse_qs
1728                                 video_info = dict((k, [v]) for k, v in args.items())
1729                                 add_dash_mpd(video_info)
1730                             # Rental video is not rented but preview is available (e.g.
1731                             # https://www.youtube.com/watch?v=yYr8q0y5Jfg,
1732                             # https://github.com/ytdl-org/youtube-dl/issues/10532)
1733                             if not video_info and args.get('ypc_vid'):
1734                                 return self.url_result(
1735                                     args['ypc_vid'], YoutubeIE.ie_key(), video_id=args['ypc_vid'])
1736                             if args.get('livestream') == '1' or args.get('live_playback') == 1:
1737                                 is_live = True
1738                             if not player_response:
1739                                 player_response = extract_player_response(args.get('player_response'), video_id)
1740                         elif not player_response:
1741                             player_response = ytplayer_config
1742                     if not video_info or self._downloader.params.get('youtube_include_dash_manifest', True):
1743                         add_dash_mpd_pr(player_response)
1744                 else:
1745                     raise ExtractorError('Video is age restricted and only playable on Youtube. Requires cookies!', expected=True)
1746             else:
1747                 data = compat_urllib_parse_urlencode({
1748                     'video_id': video_id,
1749                     'eurl': 'https://youtube.googleapis.com/v/' + video_id,
1750                     'sts': self._search_regex(
1751                         r'"sts"\s*:\s*(\d+)', embed_webpage, 'sts', default=''),
1752                 })
1753                 video_info_url = proto + '://www.youtube.com/get_video_info?' + data
1754                 try:
1755                     video_info_webpage = self._download_webpage(
1756                         video_info_url, video_id,
1757                         note='Refetching age-gated info webpage',
1758                         errnote='unable to download video info webpage')
1759                 except ExtractorError:
1760                     video_info_webpage = None
1761                 if video_info_webpage:
1762                     video_info = compat_parse_qs(video_info_webpage)
1763                     pl_response = video_info.get('player_response', [None])[0]
1764                     player_response = extract_player_response(pl_response, video_id)
1765                     add_dash_mpd(video_info)
1766                     view_count = extract_view_count(video_info)
1767         else:
1768             age_gate = False
1769             # Try looking directly into the video webpage
1770             ytplayer_config = self._get_ytplayer_config(video_id, video_webpage)
1771             if ytplayer_config:
1772                 args = ytplayer_config.get('args', {})
1773                 if args.get('url_encoded_fmt_stream_map') or args.get('hlsvp'):
1774                     # Convert to the same format returned by compat_parse_qs
1775                     video_info = dict((k, [v]) for k, v in args.items())
1776                     add_dash_mpd(video_info)
1777                 # Rental video is not rented but preview is available (e.g.
1778                 # https://www.youtube.com/watch?v=yYr8q0y5Jfg,
1779                 # https://github.com/ytdl-org/youtube-dl/issues/10532)
1780                 if not video_info and args.get('ypc_vid'):
1781                     return self.url_result(
1782                         args['ypc_vid'], YoutubeIE.ie_key(), video_id=args['ypc_vid'])
1783                 if args.get('livestream') == '1' or args.get('live_playback') == 1:
1784                     is_live = True
1785                 if not player_response:
1786                     player_response = extract_player_response(args.get('player_response'), video_id)
1787             if not video_info or self._downloader.params.get('youtube_include_dash_manifest', True):
1788                 add_dash_mpd_pr(player_response)
1789
1790         if not video_info and not player_response:
1791             player_response = extract_player_response(
1792                 self._search_regex(
1793                     (r'%s\s*%s' % (self._YT_INITIAL_PLAYER_RESPONSE_RE, self._YT_INITIAL_BOUNDARY_RE),
1794                      self._YT_INITIAL_PLAYER_RESPONSE_RE), video_webpage,
1795                     'initial player response', default='{}'),
1796                 video_id)
1797
1798         def extract_unavailable_message():
1799             messages = []
1800             for tag, kind in (('h1', 'message'), ('div', 'submessage')):
1801                 msg = self._html_search_regex(
1802                     r'(?s)<{tag}[^>]+id=["\']unavailable-{kind}["\'][^>]*>(.+?)</{tag}>'.format(tag=tag, kind=kind),
1803                     video_webpage, 'unavailable %s' % kind, default=None)
1804                 if msg:
1805                     messages.append(msg)
1806             if messages:
1807                 return '\n'.join(messages)
1808
1809         if not video_info and not player_response:
1810             unavailable_message = extract_unavailable_message()
1811             if not unavailable_message:
1812                 unavailable_message = 'Unable to extract video data'
1813             raise ExtractorError(
1814                 'YouTube said: %s' % unavailable_message, expected=True, video_id=video_id)
1815
1816         if not isinstance(video_info, dict):
1817             video_info = {}
1818
1819         video_details = try_get(
1820             player_response, lambda x: x['videoDetails'], dict) or {}
1821
1822         microformat = try_get(
1823             player_response, lambda x: x['microformat']['playerMicroformatRenderer'], dict) or {}
1824
1825         video_title = video_info.get('title', [None])[0] or video_details.get('title')
1826         if not video_title:
1827             self._downloader.report_warning('Unable to extract video title')
1828             video_title = '_'
1829
1830         description_original = video_description = get_element_by_id("eow-description", video_webpage)
1831         if video_description:
1832
1833             def replace_url(m):
1834                 redir_url = compat_urlparse.urljoin(url, m.group(1))
1835                 parsed_redir_url = compat_urllib_parse_urlparse(redir_url)
1836                 if re.search(r'^(?:www\.)?(?:youtube(?:-nocookie)?\.com|youtu\.be)$', parsed_redir_url.netloc) and parsed_redir_url.path == '/redirect':
1837                     qs = compat_parse_qs(parsed_redir_url.query)
1838                     q = qs.get('q')
1839                     if q and q[0]:
1840                         return q[0]
1841                 return redir_url
1842
1843             description_original = video_description = re.sub(r'''(?x)
1844                 <a\s+
1845                     (?:[a-zA-Z-]+="[^"]*"\s+)*?
1846                     (?:title|href)="([^"]+)"\s+
1847                     (?:[a-zA-Z-]+="[^"]*"\s+)*?
1848                     class="[^"]*"[^>]*>
1849                 [^<]+\.{3}\s*
1850                 </a>
1851             ''', replace_url, video_description)
1852             video_description = clean_html(video_description)
1853         else:
1854             video_description = video_details.get('shortDescription')
1855             if video_description is None:
1856                 video_description = self._html_search_meta('description', video_webpage)
1857
1858         if not smuggled_data.get('force_singlefeed', False):
1859             if not self._downloader.params.get('noplaylist'):
1860                 multifeed_metadata_list = try_get(
1861                     player_response,
1862                     lambda x: x['multicamera']['playerLegacyMulticameraRenderer']['metadataList'],
1863                     compat_str) or try_get(
1864                     video_info, lambda x: x['multifeed_metadata_list'][0], compat_str)
1865                 if multifeed_metadata_list:
1866                     entries = []
1867                     feed_ids = []
1868                     for feed in multifeed_metadata_list.split(','):
1869                         # Unquote should take place before split on comma (,) since textual
1870                         # fields may contain comma as well (see
1871                         # https://github.com/ytdl-org/youtube-dl/issues/8536)
1872                         feed_data = compat_parse_qs(compat_urllib_parse_unquote_plus(feed))
1873
1874                         def feed_entry(name):
1875                             return try_get(feed_data, lambda x: x[name][0], compat_str)
1876
1877                         feed_id = feed_entry('id')
1878                         if not feed_id:
1879                             continue
1880                         feed_title = feed_entry('title')
1881                         title = video_title
1882                         if feed_title:
1883                             title += ' (%s)' % feed_title
1884                         entries.append({
1885                             '_type': 'url_transparent',
1886                             'ie_key': 'Youtube',
1887                             'url': smuggle_url(
1888                                 '%s://www.youtube.com/watch?v=%s' % (proto, feed_data['id'][0]),
1889                                 {'force_singlefeed': True}),
1890                             'title': title,
1891                         })
1892                         feed_ids.append(feed_id)
1893                     self.to_screen(
1894                         'Downloading multifeed video (%s) - add --no-playlist to just download video %s'
1895                         % (', '.join(feed_ids), video_id))
1896                     return self.playlist_result(entries, video_id, video_title, video_description)
1897             else:
1898                 self.to_screen('Downloading just video %s because of --no-playlist' % video_id)
1899
1900         if view_count is None:
1901             view_count = extract_view_count(video_info)
1902         if view_count is None and video_details:
1903             view_count = int_or_none(video_details.get('viewCount'))
1904         if view_count is None and microformat:
1905             view_count = int_or_none(microformat.get('viewCount'))
1906
1907         if is_live is None:
1908             is_live = bool_or_none(video_details.get('isLive'))
1909
1910         has_live_chat_replay = False
1911         if not is_live:
1912             yt_initial_data = self._get_yt_initial_data(video_id, video_webpage)
1913             try:
1914                 yt_initial_data['contents']['twoColumnWatchNextResults']['conversationBar']['liveChatRenderer']['continuations'][0]['reloadContinuationData']['continuation']
1915                 has_live_chat_replay = True
1916             except (KeyError, IndexError, TypeError):
1917                 pass
1918
1919         # Check for "rental" videos
1920         if 'ypc_video_rental_bar_text' in video_info and 'author' not in video_info:
1921             raise ExtractorError('"rental" videos not supported. See https://github.com/ytdl-org/youtube-dl/issues/359 for more information.', expected=True)
1922
1923         def _extract_filesize(media_url):
1924             return int_or_none(self._search_regex(
1925                 r'\bclen[=/](\d+)', media_url, 'filesize', default=None))
1926
1927         streaming_formats = try_get(player_response, lambda x: x['streamingData']['formats'], list) or []
1928         streaming_formats.extend(try_get(player_response, lambda x: x['streamingData']['adaptiveFormats'], list) or [])
1929
1930         if 'conn' in video_info and video_info['conn'][0].startswith('rtmp'):
1931             self.report_rtmp_download()
1932             formats = [{
1933                 'format_id': '_rtmp',
1934                 'protocol': 'rtmp',
1935                 'url': video_info['conn'][0],
1936                 'player_url': player_url,
1937             }]
1938         elif not is_live and (streaming_formats or len(video_info.get('url_encoded_fmt_stream_map', [''])[0]) >= 1 or len(video_info.get('adaptive_fmts', [''])[0]) >= 1):
1939             encoded_url_map = video_info.get('url_encoded_fmt_stream_map', [''])[0] + ',' + video_info.get('adaptive_fmts', [''])[0]
1940             if 'rtmpe%3Dyes' in encoded_url_map:
1941                 raise ExtractorError('rtmpe downloads are not supported, see https://github.com/ytdl-org/youtube-dl/issues/343 for more information.', expected=True)
1942             formats = []
1943             formats_spec = {}
1944             fmt_list = video_info.get('fmt_list', [''])[0]
1945             if fmt_list:
1946                 for fmt in fmt_list.split(','):
1947                     spec = fmt.split('/')
1948                     if len(spec) > 1:
1949                         width_height = spec[1].split('x')
1950                         if len(width_height) == 2:
1951                             formats_spec[spec[0]] = {
1952                                 'resolution': spec[1],
1953                                 'width': int_or_none(width_height[0]),
1954                                 'height': int_or_none(width_height[1]),
1955                             }
1956             for fmt in streaming_formats:
1957                 itag = str_or_none(fmt.get('itag'))
1958                 if not itag:
1959                     continue
1960                 quality = fmt.get('quality')
1961                 quality_label = fmt.get('qualityLabel') or quality
1962                 formats_spec[itag] = {
1963                     'asr': int_or_none(fmt.get('audioSampleRate')),
1964                     'filesize': int_or_none(fmt.get('contentLength')),
1965                     'format_note': quality_label,
1966                     'fps': int_or_none(fmt.get('fps')),
1967                     'height': int_or_none(fmt.get('height')),
1968                     # bitrate for itag 43 is always 2147483647
1969                     'tbr': float_or_none(fmt.get('averageBitrate') or fmt.get('bitrate'), 1000) if itag != '43' else None,
1970                     'width': int_or_none(fmt.get('width')),
1971                 }
1972
1973             for fmt in streaming_formats:
1974                 if fmt.get('drmFamilies') or fmt.get('drm_families'):
1975                     continue
1976                 url = url_or_none(fmt.get('url'))
1977
1978                 if not url:
1979                     cipher = fmt.get('cipher') or fmt.get('signatureCipher')
1980                     if not cipher:
1981                         continue
1982                     url_data = compat_parse_qs(cipher)
1983                     url = url_or_none(try_get(url_data, lambda x: x['url'][0], compat_str))
1984                     if not url:
1985                         continue
1986                 else:
1987                     cipher = None
1988                     url_data = compat_parse_qs(compat_urllib_parse_urlparse(url).query)
1989
1990                 stream_type = int_or_none(try_get(url_data, lambda x: x['stream_type'][0]))
1991                 # Unsupported FORMAT_STREAM_TYPE_OTF
1992                 if stream_type == 3:
1993                     continue
1994
1995                 format_id = fmt.get('itag') or url_data['itag'][0]
1996                 if not format_id:
1997                     continue
1998                 format_id = compat_str(format_id)
1999
2000                 if cipher:
2001                     if 's' in url_data or self._downloader.params.get('youtube_include_dash_manifest', True):
2002                         ASSETS_RE = (
2003                             r'<script[^>]+\bsrc=("[^"]+")[^>]+\bname=["\']player_ias/base',
2004                             r'"jsUrl"\s*:\s*("[^"]+")',
2005                             r'"assets":.+?"js":\s*("[^"]+")')
2006                         jsplayer_url_json = self._search_regex(
2007                             ASSETS_RE,
2008                             embed_webpage if age_gate else video_webpage,
2009                             'JS player URL (1)', default=None)
2010                         if not jsplayer_url_json and not age_gate:
2011                             # We need the embed website after all
2012                             if embed_webpage is None:
2013                                 embed_url = proto + '://www.youtube.com/embed/%s' % video_id
2014                                 embed_webpage = self._download_webpage(
2015                                     embed_url, video_id, 'Downloading embed webpage')
2016                             jsplayer_url_json = self._search_regex(
2017                                 ASSETS_RE, embed_webpage, 'JS player URL')
2018
2019                         player_url = json.loads(jsplayer_url_json)
2020                         if player_url is None:
2021                             player_url_json = self._search_regex(
2022                                 r'ytplayer\.config.*?"url"\s*:\s*("[^"]+")',
2023                                 video_webpage, 'age gate player URL')
2024                             player_url = json.loads(player_url_json)
2025
2026                     if 'sig' in url_data:
2027                         url += '&signature=' + url_data['sig'][0]
2028                     elif 's' in url_data:
2029                         encrypted_sig = url_data['s'][0]
2030
2031                         if self._downloader.params.get('verbose'):
2032                             if player_url is None:
2033                                 player_desc = 'unknown'
2034                             else:
2035                                 player_type, player_version = self._extract_player_info(player_url)
2036                                 player_desc = '%s player %s' % ('flash' if player_type == 'swf' else 'html5', player_version)
2037                             parts_sizes = self._signature_cache_id(encrypted_sig)
2038                             self.to_screen('{%s} signature length %s, %s' %
2039                                            (format_id, parts_sizes, player_desc))
2040
2041                         signature = self._decrypt_signature(
2042                             encrypted_sig, video_id, player_url, age_gate)
2043                         sp = try_get(url_data, lambda x: x['sp'][0], compat_str) or 'signature'
2044                         url += '&%s=%s' % (sp, signature)
2045                 if 'ratebypass' not in url:
2046                     url += '&ratebypass=yes'
2047
2048                 dct = {
2049                     'format_id': format_id,
2050                     'url': url,
2051                     'player_url': player_url,
2052                 }
2053                 if format_id in self._formats:
2054                     dct.update(self._formats[format_id])
2055                 if format_id in formats_spec:
2056                     dct.update(formats_spec[format_id])
2057
2058                 # Some itags are not included in DASH manifest thus corresponding formats will
2059                 # lack metadata (see https://github.com/ytdl-org/youtube-dl/pull/5993).
2060                 # Trying to extract metadata from url_encoded_fmt_stream_map entry.
2061                 mobj = re.search(r'^(?P<width>\d+)[xX](?P<height>\d+)$', url_data.get('size', [''])[0])
2062                 width, height = (int(mobj.group('width')), int(mobj.group('height'))) if mobj else (None, None)
2063
2064                 if width is None:
2065                     width = int_or_none(fmt.get('width'))
2066                 if height is None:
2067                     height = int_or_none(fmt.get('height'))
2068
2069                 filesize = int_or_none(url_data.get(
2070                     'clen', [None])[0]) or _extract_filesize(url)
2071
2072                 quality = url_data.get('quality', [None])[0] or fmt.get('quality')
2073                 quality_label = url_data.get('quality_label', [None])[0] or fmt.get('qualityLabel')
2074
2075                 tbr = (float_or_none(url_data.get('bitrate', [None])[0], 1000)
2076                        or float_or_none(fmt.get('bitrate'), 1000)) if format_id != '43' else None
2077                 fps = int_or_none(url_data.get('fps', [None])[0]) or int_or_none(fmt.get('fps'))
2078
2079                 more_fields = {
2080                     'filesize': filesize,
2081                     'tbr': tbr,
2082                     'width': width,
2083                     'height': height,
2084                     'fps': fps,
2085                     'format_note': quality_label or quality,
2086                 }
2087                 for key, value in more_fields.items():
2088                     if value:
2089                         dct[key] = value
2090                 type_ = url_data.get('type', [None])[0] or fmt.get('mimeType')
2091                 if type_:
2092                     type_split = type_.split(';')
2093                     kind_ext = type_split[0].split('/')
2094                     if len(kind_ext) == 2:
2095                         kind, _ = kind_ext
2096                         dct['ext'] = mimetype2ext(type_split[0])
2097                         if kind in ('audio', 'video'):
2098                             codecs = None
2099                             for mobj in re.finditer(
2100                                     r'(?P<key>[a-zA-Z_-]+)=(?P<quote>["\']?)(?P<val>.+?)(?P=quote)(?:;|$)', type_):
2101                                 if mobj.group('key') == 'codecs':
2102                                     codecs = mobj.group('val')
2103                                     break
2104                             if codecs:
2105                                 dct.update(parse_codecs(codecs))
2106                 if dct.get('acodec') == 'none' or dct.get('vcodec') == 'none':
2107                     dct['downloader_options'] = {
2108                         # Youtube throttles chunks >~10M
2109                         'http_chunk_size': 10485760,
2110                     }
2111                 formats.append(dct)
2112         else:
2113             manifest_url = (
2114                 url_or_none(try_get(
2115                     player_response,
2116                     lambda x: x['streamingData']['hlsManifestUrl'],
2117                     compat_str))
2118                 or url_or_none(try_get(
2119                     video_info, lambda x: x['hlsvp'][0], compat_str)))
2120             if manifest_url:
2121                 formats = []
2122                 m3u8_formats = self._extract_m3u8_formats(
2123                     manifest_url, video_id, 'mp4', fatal=False)
2124                 for a_format in m3u8_formats:
2125                     itag = self._search_regex(
2126                         r'/itag/(\d+)/', a_format['url'], 'itag', default=None)
2127                     if itag:
2128                         a_format['format_id'] = itag
2129                         if itag in self._formats:
2130                             dct = self._formats[itag].copy()
2131                             dct.update(a_format)
2132                             a_format = dct
2133                     a_format['player_url'] = player_url
2134                     # Accept-Encoding header causes failures in live streams on Youtube and Youtube Gaming
2135                     a_format.setdefault('http_headers', {})['Youtubedl-no-compression'] = 'True'
2136                     if self._downloader.params.get('youtube_include_hls_manifest', True):
2137                         formats.append(a_format)
2138             else:
2139                 error_message = extract_unavailable_message()
2140                 if not error_message:
2141                     reason_list = try_get(
2142                         player_response,
2143                         lambda x: x['playabilityStatus']['errorScreen']['playerErrorMessageRenderer']['subreason']['runs'],
2144                         list) or []
2145                     for reason in reason_list:
2146                         if not isinstance(reason, dict):
2147                             continue
2148                         reason_text = try_get(reason, lambda x: x['text'], compat_str)
2149                         if reason_text:
2150                             if not error_message:
2151                                 error_message = ''
2152                             error_message += reason_text
2153                     if error_message:
2154                         error_message = clean_html(error_message)
2155                 if not error_message:
2156                     error_message = clean_html(try_get(
2157                         player_response, lambda x: x['playabilityStatus']['reason'],
2158                         compat_str))
2159                 if not error_message:
2160                     error_message = clean_html(
2161                         try_get(video_info, lambda x: x['reason'][0], compat_str))
2162                 if error_message:
2163                     raise ExtractorError(error_message, expected=True)
2164                 raise ExtractorError('no conn, hlsvp, hlsManifestUrl or url_encoded_fmt_stream_map information found in video info')
2165
2166         # uploader
2167         video_uploader = try_get(
2168             video_info, lambda x: x['author'][0],
2169             compat_str) or str_or_none(video_details.get('author'))
2170         if video_uploader:
2171             video_uploader = compat_urllib_parse_unquote_plus(video_uploader)
2172         else:
2173             self._downloader.report_warning('unable to extract uploader name')
2174
2175         # uploader_id
2176         video_uploader_id = None
2177         video_uploader_url = None
2178         mobj = re.search(
2179             r'<link itemprop="url" href="(?P<uploader_url>https?://www\.youtube\.com/(?:user|channel)/(?P<uploader_id>[^"]+))">',
2180             video_webpage)
2181         if mobj is not None:
2182             video_uploader_id = mobj.group('uploader_id')
2183             video_uploader_url = mobj.group('uploader_url')
2184         else:
2185             owner_profile_url = url_or_none(microformat.get('ownerProfileUrl'))
2186             if owner_profile_url:
2187                 video_uploader_id = self._search_regex(
2188                     r'(?:user|channel)/([^/]+)', owner_profile_url, 'uploader id',
2189                     default=None)
2190                 video_uploader_url = owner_profile_url
2191
2192         channel_id = (
2193             str_or_none(video_details.get('channelId'))
2194             or self._html_search_meta(
2195                 'channelId', video_webpage, 'channel id', default=None)
2196             or self._search_regex(
2197                 r'data-channel-external-id=(["\'])(?P<id>(?:(?!\1).)+)\1',
2198                 video_webpage, 'channel id', default=None, group='id'))
2199         channel_url = 'http://www.youtube.com/channel/%s' % channel_id if channel_id else None
2200
2201         thumbnails = []
2202         thumbnails_list = try_get(
2203             video_details, lambda x: x['thumbnail']['thumbnails'], list) or []
2204         for t in thumbnails_list:
2205             if not isinstance(t, dict):
2206                 continue
2207             thumbnail_url = url_or_none(t.get('url'))
2208             if not thumbnail_url:
2209                 continue
2210             thumbnails.append({
2211                 'url': thumbnail_url,
2212                 'width': int_or_none(t.get('width')),
2213                 'height': int_or_none(t.get('height')),
2214             })
2215
2216         if not thumbnails:
2217             video_thumbnail = None
2218             # We try first to get a high quality image:
2219             m_thumb = re.search(r'<span itemprop="thumbnail".*?href="(.*?)">',
2220                                 video_webpage, re.DOTALL)
2221             if m_thumb is not None:
2222                 video_thumbnail = m_thumb.group(1)
2223             thumbnail_url = try_get(video_info, lambda x: x['thumbnail_url'][0], compat_str)
2224             if thumbnail_url:
2225                 video_thumbnail = compat_urllib_parse_unquote_plus(thumbnail_url)
2226             if video_thumbnail:
2227                 thumbnails.append({'url': video_thumbnail})
2228
2229         # upload date
2230         upload_date = self._html_search_meta(
2231             'datePublished', video_webpage, 'upload date', default=None)
2232         if not upload_date:
2233             upload_date = self._search_regex(
2234                 [r'(?s)id="eow-date.*?>(.*?)</span>',
2235                  r'(?:id="watch-uploader-info".*?>.*?|["\']simpleText["\']\s*:\s*["\'])(?:Published|Uploaded|Streamed live|Started) on (.+?)[<"\']'],
2236                 video_webpage, 'upload date', default=None)
2237         if not upload_date:
2238             upload_date = microformat.get('publishDate') or microformat.get('uploadDate')
2239         upload_date = unified_strdate(upload_date)
2240
2241         video_license = self._html_search_regex(
2242             r'<h4[^>]+class="title"[^>]*>\s*License\s*</h4>\s*<ul[^>]*>\s*<li>(.+?)</li',
2243             video_webpage, 'license', default=None)
2244
2245         m_music = re.search(
2246             r'''(?x)
2247                 <h4[^>]+class="title"[^>]*>\s*Music\s*</h4>\s*
2248                 <ul[^>]*>\s*
2249                 <li>(?P<title>.+?)
2250                 by (?P<creator>.+?)
2251                 (?:
2252                     \(.+?\)|
2253                     <a[^>]*
2254                         (?:
2255                             \bhref=["\']/red[^>]*>|             # drop possible
2256                             >\s*Listen ad-free with YouTube Red # YouTube Red ad
2257                         )
2258                     .*?
2259                 )?</li
2260             ''',
2261             video_webpage)
2262         if m_music:
2263             video_alt_title = remove_quotes(unescapeHTML(m_music.group('title')))
2264             video_creator = clean_html(m_music.group('creator'))
2265         else:
2266             video_alt_title = video_creator = None
2267
2268         def extract_meta(field):
2269             return self._html_search_regex(
2270                 r'<h4[^>]+class="title"[^>]*>\s*%s\s*</h4>\s*<ul[^>]*>\s*<li>(.+?)</li>\s*' % field,
2271                 video_webpage, field, default=None)
2272
2273         track = extract_meta('Song')
2274         artist = extract_meta('Artist')
2275         album = extract_meta('Album')
2276
2277         # Youtube Music Auto-generated description
2278         release_date = release_year = None
2279         if video_description:
2280             mobj = re.search(r'(?s)(?P<track>[^·\n]+)·(?P<artist>[^\n]+)\n+(?P<album>[^\n]+)(?:.+?℗\s*(?P<release_year>\d{4})(?!\d))?(?:.+?Released on\s*:\s*(?P<release_date>\d{4}-\d{2}-\d{2}))?(.+?\nArtist\s*:\s*(?P<clean_artist>[^\n]+))?.+\nAuto-generated by YouTube\.\s*$', video_description)
2281             if mobj:
2282                 if not track:
2283                     track = mobj.group('track').strip()
2284                 if not artist:
2285                     artist = mobj.group('clean_artist') or ', '.join(a.strip() for a in mobj.group('artist').split('·'))
2286                 if not album:
2287                     album = mobj.group('album'.strip())
2288                 release_year = mobj.group('release_year')
2289                 release_date = mobj.group('release_date')
2290                 if release_date:
2291                     release_date = release_date.replace('-', '')
2292                     if not release_year:
2293                         release_year = int(release_date[:4])
2294                 if release_year:
2295                     release_year = int(release_year)
2296
2297         yt_initial_data = self._extract_yt_initial_data(video_id, video_webpage)
2298         contents = try_get(yt_initial_data, lambda x: x['contents']['twoColumnWatchNextResults']['results']['results']['contents'], list) or []
2299         for content in contents:
2300             rows = try_get(content, lambda x: x['videoSecondaryInfoRenderer']['metadataRowContainer']['metadataRowContainerRenderer']['rows'], list) or []
2301             multiple_songs = False
2302             for row in rows:
2303                 if try_get(row, lambda x: x['metadataRowRenderer']['hasDividerLine']) is True:
2304                     multiple_songs = True
2305                     break
2306             for row in rows:
2307                 mrr = row.get('metadataRowRenderer') or {}
2308                 mrr_title = try_get(
2309                     mrr, lambda x: x['title']['simpleText'], compat_str)
2310                 mrr_contents = try_get(
2311                     mrr, lambda x: x['contents'][0], dict) or {}
2312                 mrr_contents_text = try_get(mrr_contents, [lambda x: x['simpleText'], lambda x: x['runs'][0]['text']], compat_str)
2313                 if not (mrr_title and mrr_contents_text):
2314                     continue
2315                 if mrr_title == 'License':
2316                     video_license = mrr_contents_text
2317                 elif not multiple_songs:
2318                     if mrr_title == 'Album':
2319                         album = mrr_contents_text
2320                     elif mrr_title == 'Artist':
2321                         artist = mrr_contents_text
2322                     elif mrr_title == 'Song':
2323                         track = mrr_contents_text
2324
2325         m_episode = re.search(
2326             r'<div[^>]+id="watch7-headline"[^>]*>\s*<span[^>]*>.*?>(?P<series>[^<]+)</a></b>\s*S(?P<season>\d+)\s*•\s*E(?P<episode>\d+)</span>',
2327             video_webpage)
2328         if m_episode:
2329             series = unescapeHTML(m_episode.group('series'))
2330             season_number = int(m_episode.group('season'))
2331             episode_number = int(m_episode.group('episode'))
2332         else:
2333             series = season_number = episode_number = None
2334
2335         m_cat_container = self._search_regex(
2336             r'(?s)<h4[^>]*>\s*Category\s*</h4>\s*<ul[^>]*>(.*?)</ul>',
2337             video_webpage, 'categories', default=None)
2338         category = None
2339         if m_cat_container:
2340             category = self._html_search_regex(
2341                 r'(?s)<a[^<]+>(.*?)</a>', m_cat_container, 'category',
2342                 default=None)
2343         if not category:
2344             category = try_get(
2345                 microformat, lambda x: x['category'], compat_str)
2346         video_categories = None if category is None else [category]
2347
2348         video_tags = [
2349             unescapeHTML(m.group('content'))
2350             for m in re.finditer(self._meta_regex('og:video:tag'), video_webpage)]
2351         if not video_tags:
2352             video_tags = try_get(video_details, lambda x: x['keywords'], list)
2353
2354         def _extract_count(count_name):
2355             return str_to_int(self._search_regex(
2356                 (r'-%s-button[^>]+><span[^>]+class="yt-uix-button-content"[^>]*>([\d,]+)</span>' % re.escape(count_name),
2357                  r'["\']label["\']\s*:\s*["\']([\d,.]+)\s+%ss["\']' % re.escape(count_name)),
2358                 video_webpage, count_name, default=None))
2359
2360         like_count = _extract_count('like')
2361         dislike_count = _extract_count('dislike')
2362
2363         if view_count is None:
2364             view_count = str_to_int(self._search_regex(
2365                 r'<[^>]+class=["\']watch-view-count[^>]+>\s*([\d,\s]+)', video_webpage,
2366                 'view count', default=None))
2367
2368         average_rating = (
2369             float_or_none(video_details.get('averageRating'))
2370             or try_get(video_info, lambda x: float_or_none(x['avg_rating'][0])))
2371
2372         # subtitles
2373         video_subtitles = self.extract_subtitles(
2374             video_id, video_webpage, has_live_chat_replay)
2375         automatic_captions = self.extract_automatic_captions(video_id, player_response, ytplayer_config)
2376
2377         video_duration = try_get(
2378             video_info, lambda x: int_or_none(x['length_seconds'][0]))
2379         if not video_duration:
2380             video_duration = int_or_none(video_details.get('lengthSeconds'))
2381         if not video_duration:
2382             video_duration = parse_duration(self._html_search_meta(
2383                 'duration', video_webpage, 'video duration'))
2384
2385         # Get Subscriber Count of channel
2386         subscriber_count = parse_count(self._search_regex(
2387             r'"text":"([\d\.]+\w?) subscribers"',
2388             video_webpage,
2389             'subscriber count',
2390             default=None
2391         ))
2392
2393         # annotations
2394         video_annotations = None
2395         if self._downloader.params.get('writeannotations', False):
2396             xsrf_token = None
2397             ytcfg = self._extract_ytcfg(video_id, video_webpage)
2398             if ytcfg:
2399                 xsrf_token = try_get(ytcfg, lambda x: x['XSRF_TOKEN'], compat_str)
2400             if not xsrf_token:
2401                 xsrf_token = self._search_regex(
2402                     r'([\'"])XSRF_TOKEN\1\s*:\s*([\'"])(?P<xsrf_token>(?:(?!\2).)+)\2',
2403                     video_webpage, 'xsrf token', group='xsrf_token', fatal=False)
2404             invideo_url = try_get(
2405                 player_response, lambda x: x['annotations'][0]['playerAnnotationsUrlsRenderer']['invideoUrl'], compat_str)
2406             if xsrf_token and invideo_url:
2407                 xsrf_field_name = None
2408                 if ytcfg:
2409                     xsrf_field_name = try_get(ytcfg, lambda x: x['XSRF_FIELD_NAME'], compat_str)
2410                 if not xsrf_field_name:
2411                     xsrf_field_name = self._search_regex(
2412                         r'([\'"])XSRF_FIELD_NAME\1\s*:\s*([\'"])(?P<xsrf_field_name>\w+)\2',
2413                         video_webpage, 'xsrf field name',
2414                         group='xsrf_field_name', default='session_token')
2415                 video_annotations = self._download_webpage(
2416                     self._proto_relative_url(invideo_url),
2417                     video_id, note='Downloading annotations',
2418                     errnote='Unable to download video annotations', fatal=False,
2419                     data=urlencode_postdata({xsrf_field_name: xsrf_token}))
2420
2421         chapters = self._extract_chapters(video_webpage, description_original, video_id, video_duration)
2422
2423         # Look for the DASH manifest
2424         if self._downloader.params.get('youtube_include_dash_manifest', True):
2425             dash_mpd_fatal = True
2426             for mpd_url in dash_mpds:
2427                 dash_formats = {}
2428                 try:
2429                     def decrypt_sig(mobj):
2430                         s = mobj.group(1)
2431                         dec_s = self._decrypt_signature(s, video_id, player_url, age_gate)
2432                         return '/signature/%s' % dec_s
2433
2434                     mpd_url = re.sub(r'/s/([a-fA-F0-9\.]+)', decrypt_sig, mpd_url)
2435
2436                     for df in self._extract_mpd_formats(
2437                             mpd_url, video_id, fatal=dash_mpd_fatal,
2438                             formats_dict=self._formats):
2439                         if not df.get('filesize'):
2440                             df['filesize'] = _extract_filesize(df['url'])
2441                         # Do not overwrite DASH format found in some previous DASH manifest
2442                         if df['format_id'] not in dash_formats:
2443                             dash_formats[df['format_id']] = df
2444                         # Additional DASH manifests may end up in HTTP Error 403 therefore
2445                         # allow them to fail without bug report message if we already have
2446                         # some DASH manifest succeeded. This is temporary workaround to reduce
2447                         # burst of bug reports until we figure out the reason and whether it
2448                         # can be fixed at all.
2449                         dash_mpd_fatal = False
2450                 except (ExtractorError, KeyError) as e:
2451                     self.report_warning(
2452                         'Skipping DASH manifest: %r' % e, video_id)
2453                 if dash_formats:
2454                     # Remove the formats we found through non-DASH, they
2455                     # contain less info and it can be wrong, because we use
2456                     # fixed values (for example the resolution). See
2457                     # https://github.com/ytdl-org/youtube-dl/issues/5774 for an
2458                     # example.
2459                     formats = [f for f in formats if f['format_id'] not in dash_formats.keys()]
2460                     formats.extend(dash_formats.values())
2461
2462         # Check for malformed aspect ratio
2463         stretched_m = re.search(
2464             r'<meta\s+property="og:video:tag".*?content="yt:stretch=(?P<w>[0-9]+):(?P<h>[0-9]+)">',
2465             video_webpage)
2466         if stretched_m:
2467             w = float(stretched_m.group('w'))
2468             h = float(stretched_m.group('h'))
2469             # yt:stretch may hold invalid ratio data (e.g. for Q39EVAstoRM ratio is 17:0).
2470             # We will only process correct ratios.
2471             if w > 0 and h > 0:
2472                 ratio = w / h
2473                 for f in formats:
2474                     if f.get('vcodec') != 'none':
2475                         f['stretched_ratio'] = ratio
2476
2477         if not formats:
2478             if 'reason' in video_info:
2479                 if 'The uploader has not made this video available in your country.' in video_info['reason']:
2480                     regions_allowed = self._html_search_meta(
2481                         'regionsAllowed', video_webpage, default=None)
2482                     countries = regions_allowed.split(',') if regions_allowed else None
2483                     self.raise_geo_restricted(
2484                         msg=video_info['reason'][0], countries=countries)
2485                 reason = video_info['reason'][0]
2486                 if 'Invalid parameters' in reason:
2487                     unavailable_message = extract_unavailable_message()
2488                     if unavailable_message:
2489                         reason = unavailable_message
2490                 raise ExtractorError(
2491                     'YouTube said: %s' % reason,
2492                     expected=True, video_id=video_id)
2493             if video_info.get('license_info') or try_get(player_response, lambda x: x['streamingData']['licenseInfos']):
2494                 raise ExtractorError('This video is DRM protected.', expected=True)
2495
2496         self._sort_formats(formats)
2497
2498         self.mark_watched(video_id, video_info, player_response)
2499
2500         return {
2501             'id': video_id,
2502             'uploader': video_uploader,
2503             'uploader_id': video_uploader_id,
2504             'uploader_url': video_uploader_url,
2505             'channel_id': channel_id,
2506             'channel_url': channel_url,
2507             'upload_date': upload_date,
2508             'license': video_license,
2509             'creator': video_creator or artist,
2510             'title': video_title,
2511             'alt_title': video_alt_title or track,
2512             'thumbnails': thumbnails,
2513             'description': video_description,
2514             'categories': video_categories,
2515             'tags': video_tags,
2516             'subtitles': video_subtitles,
2517             'automatic_captions': automatic_captions,
2518             'duration': video_duration,
2519             'age_limit': 18 if age_gate else 0,
2520             'annotations': video_annotations,
2521             'chapters': chapters,
2522             'webpage_url': proto + '://www.youtube.com/watch?v=%s' % video_id,
2523             'view_count': view_count,
2524             'like_count': like_count,
2525             'dislike_count': dislike_count,
2526             'average_rating': average_rating,
2527             'formats': formats,
2528             'is_live': is_live,
2529             'start_time': start_time,
2530             'end_time': end_time,
2531             'series': series,
2532             'season_number': season_number,
2533             'episode_number': episode_number,
2534             'track': track,
2535             'artist': artist,
2536             'album': album,
2537             'release_date': release_date,
2538             'release_year': release_year,
2539             'subscriber_count': subscriber_count,
2540         }
2541
2542
2543 class YoutubeTabIE(YoutubeBaseInfoExtractor):
2544     IE_DESC = 'YouTube.com tab'
2545     _VALID_URL = r'''(?x)
2546                     https?://
2547                         (?:\w+\.)?
2548                         (?:
2549                             youtube(?:kids)?\.com|
2550                             invidio\.us
2551                         )/
2552                         (?:
2553                             (?:channel|c|user)/|
2554                             (?P<not_channel>
2555                                 feed/|
2556                                 (?:playlist|watch)\?.*?\blist=
2557                             )|
2558                             (?!(?:%s)\b)  # Direct URLs
2559                         )
2560                         (?P<id>[^/?\#&]+)
2561                     ''' % YoutubeBaseInfoExtractor._RESERVED_NAMES
2562     IE_NAME = 'youtube:tab'
2563
2564     _TESTS = [{
2565         # playlists, multipage
2566         'url': 'https://www.youtube.com/c/ИгорьКлейнер/playlists?view=1&flow=grid',
2567         'playlist_mincount': 94,
2568         'info_dict': {
2569             'id': 'UCqj7Cz7revf5maW9g5pgNcg',
2570             'title': 'Игорь Клейнер - Playlists',
2571             'description': 'md5:be97ee0f14ee314f1f002cf187166ee2',
2572         },
2573     }, {
2574         # playlists, multipage, different order
2575         'url': 'https://www.youtube.com/user/igorkle1/playlists?view=1&sort=dd',
2576         'playlist_mincount': 94,
2577         'info_dict': {
2578             'id': 'UCqj7Cz7revf5maW9g5pgNcg',
2579             'title': 'Игорь Клейнер - Playlists',
2580             'description': 'md5:be97ee0f14ee314f1f002cf187166ee2',
2581         },
2582     }, {
2583         # playlists, singlepage
2584         'url': 'https://www.youtube.com/user/ThirstForScience/playlists',
2585         'playlist_mincount': 4,
2586         'info_dict': {
2587             'id': 'UCAEtajcuhQ6an9WEzY9LEMQ',
2588             'title': 'ThirstForScience - Playlists',
2589             'description': 'md5:609399d937ea957b0f53cbffb747a14c',
2590         }
2591     }, {
2592         'url': 'https://www.youtube.com/c/ChristophLaimer/playlists',
2593         'only_matching': True,
2594     }, {
2595         # basic, single video playlist
2596         'url': 'https://www.youtube.com/playlist?list=PL4lCao7KL_QFVb7Iudeipvc2BCavECqzc',
2597         'info_dict': {
2598             'uploader_id': 'UCmlqkdCBesrv2Lak1mF_MxA',
2599             'uploader': 'Sergey M.',
2600             'id': 'PL4lCao7KL_QFVb7Iudeipvc2BCavECqzc',
2601             'title': 'youtube-dl public playlist',
2602         },
2603         'playlist_count': 1,
2604     }, {
2605         # empty playlist
2606         'url': 'https://www.youtube.com/playlist?list=PL4lCao7KL_QFodcLWhDpGCYnngnHtQ-Xf',
2607         'info_dict': {
2608             'uploader_id': 'UCmlqkdCBesrv2Lak1mF_MxA',
2609             'uploader': 'Sergey M.',
2610             'id': 'PL4lCao7KL_QFodcLWhDpGCYnngnHtQ-Xf',
2611             'title': 'youtube-dl empty playlist',
2612         },
2613         'playlist_count': 0,
2614     }, {
2615         # Home tab
2616         'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/featured',
2617         'info_dict': {
2618             'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
2619             'title': 'lex will - Home',
2620             'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488',
2621         },
2622         'playlist_mincount': 2,
2623     }, {
2624         # Videos tab
2625         'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/videos',
2626         'info_dict': {
2627             'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
2628             'title': 'lex will - Videos',
2629             'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488',
2630         },
2631         'playlist_mincount': 975,
2632     }, {
2633         # Videos tab, sorted by popular
2634         'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/videos?view=0&sort=p&flow=grid',
2635         'info_dict': {
2636             'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
2637             'title': 'lex will - Videos',
2638             'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488',
2639         },
2640         'playlist_mincount': 199,
2641     }, {
2642         # Playlists tab
2643         'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/playlists',
2644         'info_dict': {
2645             'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
2646             'title': 'lex will - Playlists',
2647             'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488',
2648         },
2649         'playlist_mincount': 17,
2650     }, {
2651         # Community tab
2652         'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/community',
2653         'info_dict': {
2654             'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
2655             'title': 'lex will - Community',
2656             'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488',
2657         },
2658         'playlist_mincount': 18,
2659     }, {
2660         # Channels tab
2661         'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/channels',
2662         'info_dict': {
2663             'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
2664             'title': 'lex will - Channels',
2665             'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488',
2666         },
2667         'playlist_mincount': 138,
2668     }, {
2669         'url': 'https://invidio.us/channel/UCmlqkdCBesrv2Lak1mF_MxA',
2670         'only_matching': True,
2671     }, {
2672         'url': 'https://www.youtubekids.com/channel/UCmlqkdCBesrv2Lak1mF_MxA',
2673         'only_matching': True,
2674     }, {
2675         'url': 'https://music.youtube.com/channel/UCmlqkdCBesrv2Lak1mF_MxA',
2676         'only_matching': True,
2677     }, {
2678         'note': 'Playlist with deleted videos (#651). As a bonus, the video #51 is also twice in this list.',
2679         'url': 'https://www.youtube.com/playlist?list=PLwP_SiAcdui0KVebT0mU9Apz359a4ubsC',
2680         'info_dict': {
2681             'title': '29C3: Not my department',
2682             'id': 'PLwP_SiAcdui0KVebT0mU9Apz359a4ubsC',
2683             'uploader': 'Christiaan008',
2684             'uploader_id': 'UCEPzS1rYsrkqzSLNp76nrcg',
2685         },
2686         'playlist_count': 96,
2687     }, {
2688         'note': 'Large playlist',
2689         'url': 'https://www.youtube.com/playlist?list=UUBABnxM4Ar9ten8Mdjj1j0Q',
2690         'info_dict': {
2691             'title': 'Uploads from Cauchemar',
2692             'id': 'UUBABnxM4Ar9ten8Mdjj1j0Q',
2693             'uploader': 'Cauchemar',
2694             'uploader_id': 'UCBABnxM4Ar9ten8Mdjj1j0Q',
2695         },
2696         'playlist_mincount': 1123,
2697     }, {
2698         # even larger playlist, 8832 videos
2699         'url': 'http://www.youtube.com/user/NASAgovVideo/videos',
2700         'only_matching': True,
2701     }, {
2702         'note': 'Buggy playlist: the webpage has a "Load more" button but it doesn\'t have more videos',
2703         'url': 'https://www.youtube.com/playlist?list=UUXw-G3eDE9trcvY2sBMM_aA',
2704         'info_dict': {
2705             'title': 'Uploads from Interstellar Movie',
2706             'id': 'UUXw-G3eDE9trcvY2sBMM_aA',
2707             'uploader': 'Interstellar Movie',
2708             'uploader_id': 'UCXw-G3eDE9trcvY2sBMM_aA',
2709         },
2710         'playlist_mincount': 21,
2711     }, {
2712         # https://github.com/ytdl-org/youtube-dl/issues/21844
2713         'url': 'https://www.youtube.com/playlist?list=PLzH6n4zXuckpfMu_4Ff8E7Z1behQks5ba',
2714         'info_dict': {
2715             'title': 'Data Analysis with Dr Mike Pound',
2716             'id': 'PLzH6n4zXuckpfMu_4Ff8E7Z1behQks5ba',
2717             'uploader_id': 'UC9-y-6csu5WGm29I7JiwpnA',
2718             'uploader': 'Computerphile',
2719         },
2720         'playlist_mincount': 11,
2721     }, {
2722         'url': 'https://invidio.us/playlist?list=PL4lCao7KL_QFVb7Iudeipvc2BCavECqzc',
2723         'only_matching': True,
2724     }, {
2725         # Playlist URL that does not actually serve a playlist
2726         'url': 'https://www.youtube.com/watch?v=FqZTN594JQw&list=PLMYEtVRpaqY00V9W81Cwmzp6N6vZqfUKD4',
2727         'info_dict': {
2728             'id': 'FqZTN594JQw',
2729             'ext': 'webm',
2730             'title': "Smiley's People 01 detective, Adventure Series, Action",
2731             'uploader': 'STREEM',
2732             'uploader_id': 'UCyPhqAZgwYWZfxElWVbVJng',
2733             'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UCyPhqAZgwYWZfxElWVbVJng',
2734             'upload_date': '20150526',
2735             'license': 'Standard YouTube License',
2736             'description': 'md5:507cdcb5a49ac0da37a920ece610be80',
2737             'categories': ['People & Blogs'],
2738             'tags': list,
2739             'view_count': int,
2740             'like_count': int,
2741             'dislike_count': int,
2742         },
2743         'params': {
2744             'skip_download': True,
2745         },
2746         'skip': 'This video is not available.',
2747         'add_ie': [YoutubeIE.ie_key()],
2748     }, {
2749         'url': 'https://www.youtubekids.com/watch?v=Agk7R8I8o5U&list=PUZ6jURNr1WQZCNHF0ao-c0g',
2750         'only_matching': True,
2751     }, {
2752         'url': 'https://www.youtube.com/watch?v=MuAGGZNfUkU&list=RDMM',
2753         'only_matching': True,
2754     }, {
2755         'url': 'https://www.youtube.com/channel/UCoMdktPbSTixAyNGwb-UYkQ/live',
2756         'info_dict': {
2757             'id': '9Auq9mYxFEE',
2758             'ext': 'mp4',
2759             'title': 'Watch Sky News live',
2760             'uploader': 'Sky News',
2761             'uploader_id': 'skynews',
2762             'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/skynews',
2763             'upload_date': '20191102',
2764             'description': 'md5:78de4e1c2359d0ea3ed829678e38b662',
2765             'categories': ['News & Politics'],
2766             'tags': list,
2767             'like_count': int,
2768             'dislike_count': int,
2769         },
2770         'params': {
2771             'skip_download': True,
2772         },
2773     }, {
2774         'url': 'https://www.youtube.com/user/TheYoungTurks/live',
2775         'info_dict': {
2776             'id': 'a48o2S1cPoo',
2777             'ext': 'mp4',
2778             'title': 'The Young Turks - Live Main Show',
2779             'uploader': 'The Young Turks',
2780             'uploader_id': 'TheYoungTurks',
2781             'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/TheYoungTurks',
2782             'upload_date': '20150715',
2783             'license': 'Standard YouTube License',
2784             'description': 'md5:438179573adcdff3c97ebb1ee632b891',
2785             'categories': ['News & Politics'],
2786             'tags': ['Cenk Uygur (TV Program Creator)', 'The Young Turks (Award-Winning Work)', 'Talk Show (TV Genre)'],
2787             'like_count': int,
2788             'dislike_count': int,
2789         },
2790         'params': {
2791             'skip_download': True,
2792         },
2793         'only_matching': True,
2794     }, {
2795         'url': 'https://www.youtube.com/channel/UC1yBKRuGpC1tSM73A0ZjYjQ/live',
2796         'only_matching': True,
2797     }, {
2798         'url': 'https://www.youtube.com/c/CommanderVideoHq/live',
2799         'only_matching': True,
2800     }, {
2801         'url': 'https://www.youtube.com/feed/trending',
2802         'only_matching': True,
2803     }, {
2804         # needs auth
2805         'url': 'https://www.youtube.com/feed/library',
2806         'only_matching': True,
2807     }, {
2808         # needs auth
2809         'url': 'https://www.youtube.com/feed/history',
2810         'only_matching': True,
2811     }, {
2812         # needs auth
2813         'url': 'https://www.youtube.com/feed/subscriptions',
2814         'only_matching': True,
2815     }, {
2816         # needs auth
2817         'url': 'https://www.youtube.com/feed/watch_later',
2818         'only_matching': True,
2819     }, {
2820         # no longer available?
2821         'url': 'https://www.youtube.com/feed/recommended',
2822         'only_matching': True,
2823     }, {
2824         # inline playlist with not always working continuations
2825         'url': 'https://www.youtube.com/watch?v=UC6u0Tct-Fo&list=PL36D642111D65BE7C',
2826         'only_matching': True,
2827     }, {
2828         'url': 'https://www.youtube.com/course?list=ECUl4u3cNGP61MdtwGTqZA0MreSaDybji8',
2829         'only_matching': True,
2830     }, {
2831         'url': 'https://www.youtube.com/course',
2832         'only_matching': True,
2833     }, {
2834         'url': 'https://www.youtube.com/zsecurity',
2835         'only_matching': True,
2836     }, {
2837         'url': 'http://www.youtube.com/NASAgovVideo/videos',
2838         'only_matching': True,
2839     }, {
2840         'url': 'https://www.youtube.com/TheYoungTurks/live',
2841         'only_matching': True,
2842     }]
2843
2844     @classmethod
2845     def suitable(cls, url):
2846         return False if YoutubeIE.suitable(url) else super(
2847             YoutubeTabIE, cls).suitable(url)
2848
2849     def _extract_channel_id(self, webpage):
2850         channel_id = self._html_search_meta(
2851             'channelId', webpage, 'channel id', default=None)
2852         if channel_id:
2853             return channel_id
2854         channel_url = self._html_search_meta(
2855             ('og:url', 'al:ios:url', 'al:android:url', 'al:web:url',
2856              'twitter:url', 'twitter:app:url:iphone', 'twitter:app:url:ipad',
2857              'twitter:app:url:googleplay'), webpage, 'channel url')
2858         return self._search_regex(
2859             r'https?://(?:www\.)?youtube\.com/channel/([^/?#&])+',
2860             channel_url, 'channel id')
2861
2862     @staticmethod
2863     def _extract_grid_item_renderer(item):
2864         for item_kind in ('Playlist', 'Video', 'Channel'):
2865             renderer = item.get('grid%sRenderer' % item_kind)
2866             if renderer:
2867                 return renderer
2868
2869     def _extract_video(self, renderer):
2870         video_id = renderer.get('videoId')
2871         title = try_get(
2872             renderer,
2873             (lambda x: x['title']['runs'][0]['text'],
2874              lambda x: x['title']['simpleText']), compat_str)
2875         description = try_get(
2876             renderer, lambda x: x['descriptionSnippet']['runs'][0]['text'],
2877             compat_str)
2878         duration = parse_duration(try_get(
2879             renderer, lambda x: x['lengthText']['simpleText'], compat_str))
2880         view_count_text = try_get(
2881             renderer, lambda x: x['viewCountText']['simpleText'], compat_str) or ''
2882         view_count = str_to_int(self._search_regex(
2883             r'^([\d,]+)', re.sub(r'\s', '', view_count_text),
2884             'view count', default=None))
2885         uploader = try_get(
2886             renderer, lambda x: x['ownerText']['runs'][0]['text'], compat_str)
2887         return {
2888             '_type': 'url_transparent',
2889             'ie_key': YoutubeIE.ie_key(),
2890             'id': video_id,
2891             'url': video_id,
2892             'title': title,
2893             'description': description,
2894             'duration': duration,
2895             'view_count': view_count,
2896             'uploader': uploader,
2897         }
2898
2899     def _grid_entries(self, grid_renderer):
2900         for item in grid_renderer['items']:
2901             if not isinstance(item, dict):
2902                 continue
2903             renderer = self._extract_grid_item_renderer(item)
2904             if not isinstance(renderer, dict):
2905                 continue
2906             title = try_get(
2907                 renderer, lambda x: x['title']['runs'][0]['text'], compat_str)
2908             # playlist
2909             playlist_id = renderer.get('playlistId')
2910             if playlist_id:
2911                 yield self.url_result(
2912                     'https://www.youtube.com/playlist?list=%s' % playlist_id,
2913                     ie=YoutubeTabIE.ie_key(), video_id=playlist_id,
2914                     video_title=title)
2915             # video
2916             video_id = renderer.get('videoId')
2917             if video_id:
2918                 yield self._extract_video(renderer)
2919             # channel
2920             channel_id = renderer.get('channelId')
2921             if channel_id:
2922                 title = try_get(
2923                     renderer, lambda x: x['title']['simpleText'], compat_str)
2924                 yield self.url_result(
2925                     'https://www.youtube.com/channel/%s' % channel_id,
2926                     ie=YoutubeTabIE.ie_key(), video_title=title)
2927
2928     def _shelf_entries_from_content(self, shelf_renderer):
2929         content = shelf_renderer.get('content')
2930         if not isinstance(content, dict):
2931             return
2932         renderer = content.get('gridRenderer')
2933         if renderer:
2934             # TODO: add support for nested playlists so each shelf is processed
2935             # as separate playlist
2936             # TODO: this includes only first N items
2937             for entry in self._grid_entries(renderer):
2938                 yield entry
2939         renderer = content.get('horizontalListRenderer')
2940         if renderer:
2941             # TODO
2942             pass
2943
2944     def _shelf_entries(self, shelf_renderer, skip_channels=False):
2945         ep = try_get(
2946             shelf_renderer, lambda x: x['endpoint']['commandMetadata']['webCommandMetadata']['url'],
2947             compat_str)
2948         shelf_url = urljoin('https://www.youtube.com', ep)
2949         if shelf_url:
2950             # Skipping links to another channels, note that checking for
2951             # endpoint.commandMetadata.webCommandMetadata.webPageTypwebPageType == WEB_PAGE_TYPE_CHANNEL
2952             # will not work
2953             if skip_channels and '/channels?' in shelf_url:
2954                 return
2955             title = try_get(
2956                 shelf_renderer, lambda x: x['title']['runs'][0]['text'], compat_str)
2957             yield self.url_result(shelf_url, video_title=title)
2958         # Shelf may not contain shelf URL, fallback to extraction from content
2959         for entry in self._shelf_entries_from_content(shelf_renderer):
2960             yield entry
2961
2962     def _playlist_entries(self, video_list_renderer):
2963         for content in video_list_renderer['contents']:
2964             if not isinstance(content, dict):
2965                 continue
2966             renderer = content.get('playlistVideoRenderer') or content.get('playlistPanelVideoRenderer')
2967             if not isinstance(renderer, dict):
2968                 continue
2969             video_id = renderer.get('videoId')
2970             if not video_id:
2971                 continue
2972             yield self._extract_video(renderer)
2973
2974     r""" # Not needed in the new implementation
2975     def _itemSection_entries(self, item_sect_renderer):
2976         for content in item_sect_renderer['contents']:
2977             if not isinstance(content, dict):
2978                 continue
2979             renderer = content.get('videoRenderer', {})
2980             if not isinstance(renderer, dict):
2981                 continue
2982             video_id = renderer.get('videoId')
2983             if not video_id:
2984                 continue
2985             yield self._extract_video(renderer)
2986     """
2987
2988     def _rich_entries(self, rich_grid_renderer):
2989         renderer = try_get(
2990             rich_grid_renderer, lambda x: x['content']['videoRenderer'], dict) or {}
2991         video_id = renderer.get('videoId')
2992         if not video_id:
2993             return
2994         yield self._extract_video(renderer)
2995
2996     def _video_entry(self, video_renderer):
2997         video_id = video_renderer.get('videoId')
2998         if video_id:
2999             return self._extract_video(video_renderer)
3000
3001     def _post_thread_entries(self, post_thread_renderer):
3002         post_renderer = try_get(
3003             post_thread_renderer, lambda x: x['post']['backstagePostRenderer'], dict)
3004         if not post_renderer:
3005             return
3006         # video attachment
3007         video_renderer = try_get(
3008             post_renderer, lambda x: x['backstageAttachment']['videoRenderer'], dict)
3009         video_id = None
3010         if video_renderer:
3011             entry = self._video_entry(video_renderer)
3012             if entry:
3013                 yield entry
3014         # inline video links
3015         runs = try_get(post_renderer, lambda x: x['contentText']['runs'], list) or []
3016         for run in runs:
3017             if not isinstance(run, dict):
3018                 continue
3019             ep_url = try_get(
3020                 run, lambda x: x['navigationEndpoint']['urlEndpoint']['url'], compat_str)
3021             if not ep_url:
3022                 continue
3023             if not YoutubeIE.suitable(ep_url):
3024                 continue
3025             ep_video_id = YoutubeIE._match_id(ep_url)
3026             if video_id == ep_video_id:
3027                 continue
3028             yield self.url_result(ep_url, ie=YoutubeIE.ie_key(), video_id=video_id)
3029
3030     def _post_thread_continuation_entries(self, post_thread_continuation):
3031         contents = post_thread_continuation.get('contents')
3032         if not isinstance(contents, list):
3033             return
3034         for content in contents:
3035             renderer = content.get('backstagePostThreadRenderer')
3036             if not isinstance(renderer, dict):
3037                 continue
3038             for entry in self._post_thread_entries(renderer):
3039                 yield entry
3040
3041     @staticmethod
3042     def _build_continuation_query(continuation, ctp=None):
3043         query = {
3044             'ctoken': continuation,
3045             'continuation': continuation,
3046         }
3047         if ctp:
3048             query['itct'] = ctp
3049         return query
3050
3051     @staticmethod
3052     def _extract_next_continuation_data(renderer):
3053         next_continuation = try_get(
3054             renderer, lambda x: x['continuations'][0]['nextContinuationData'], dict)
3055         if not next_continuation:
3056             return
3057         continuation = next_continuation.get('continuation')
3058         if not continuation:
3059             return
3060         ctp = next_continuation.get('clickTrackingParams')
3061         return YoutubeTabIE._build_continuation_query(continuation, ctp)
3062
3063     @classmethod
3064     def _extract_continuation(cls, renderer):
3065         next_continuation = cls._extract_next_continuation_data(renderer)
3066         if next_continuation:
3067             return next_continuation
3068         contents = renderer.get('contents')
3069         if not isinstance(contents, list):
3070             return
3071         for content in contents:
3072             if not isinstance(content, dict):
3073                 continue
3074             continuation_ep = try_get(
3075                 content, lambda x: x['continuationItemRenderer']['continuationEndpoint'],
3076                 dict)
3077             if not continuation_ep:
3078                 continue
3079             continuation = try_get(
3080                 continuation_ep, lambda x: x['continuationCommand']['token'], compat_str)
3081             if not continuation:
3082                 continue
3083             ctp = continuation_ep.get('clickTrackingParams')
3084             return YoutubeTabIE._build_continuation_query(continuation, ctp)
3085
3086     def _entries(self, tab, identity_token):
3087
3088         def extract_entries(parent_renderer):  # this needs to called again for continuation to work with feeds
3089             contents = try_get(parent_renderer, lambda x: x['contents'], list) or []
3090             for content in contents:
3091                 if not isinstance(content, dict):
3092                     continue
3093                 is_renderer = try_get(content, lambda x: x['itemSectionRenderer'], dict)
3094                 if not is_renderer:
3095                     renderer = content.get('richItemRenderer')
3096                     if renderer:
3097                         for entry in self._rich_entries(renderer):
3098                             yield entry
3099                         continuation_list[0] = self._extract_continuation(parent_renderer)
3100                     continue
3101                 isr_contents = try_get(is_renderer, lambda x: x['contents'], list) or []
3102                 for isr_content in isr_contents:
3103                     if not isinstance(isr_content, dict):
3104                         continue
3105                     renderer = isr_content.get('playlistVideoListRenderer')
3106                     if renderer:
3107                         for entry in self._playlist_entries(renderer):
3108                             yield entry
3109                         continuation_list[0] = self._extract_continuation(renderer)
3110                         continue
3111                     renderer = isr_content.get('gridRenderer')
3112                     if renderer:
3113                         for entry in self._grid_entries(renderer):
3114                             yield entry
3115                         continuation_list[0] = self._extract_continuation(renderer)
3116                         continue
3117                     renderer = isr_content.get('shelfRenderer')
3118                     if renderer:
3119                         is_channels_tab = tab.get('title') == 'Channels'
3120                         for entry in self._shelf_entries(renderer, not is_channels_tab):
3121                             yield entry
3122                         continue
3123                     renderer = isr_content.get('backstagePostThreadRenderer')
3124                     if renderer:
3125                         for entry in self._post_thread_entries(renderer):
3126                             yield entry
3127                         continuation_list[0] = self._extract_continuation(renderer)
3128                         continue
3129                     renderer = isr_content.get('videoRenderer')
3130                     if renderer:
3131                         entry = self._video_entry(renderer)
3132                         if entry:
3133                             yield entry
3134
3135                 if not continuation_list[0]:
3136                     continuation_list[0] = self._extract_continuation(is_renderer)
3137
3138             if not continuation_list[0]:
3139                 continuation_list[0] = self._extract_continuation(parent_renderer)
3140
3141         continuation_list = [None]  # Python 2 doesnot support nonlocal
3142         tab_content = try_get(tab, lambda x: x['content'], dict)
3143         if not tab_content:
3144             return
3145         parent_renderer = (
3146             try_get(tab_content, lambda x: x['sectionListRenderer'], dict)
3147             or try_get(tab_content, lambda x: x['richGridRenderer'], dict) or {})
3148         for entry in extract_entries(parent_renderer):
3149             yield entry
3150         continuation = continuation_list[0]
3151
3152         headers = {
3153             'x-youtube-client-name': '1',
3154             'x-youtube-client-version': '2.20201112.04.01',
3155         }
3156         if identity_token:
3157             headers['x-youtube-identity-token'] = identity_token
3158
3159         for page_num in itertools.count(1):
3160             if not continuation:
3161                 break
3162             count = 0
3163             retries = 3
3164             while count <= retries:
3165                 try:
3166                     # Downloading page may result in intermittent 5xx HTTP error
3167                     # that is usually worked around with a retry
3168                     browse = self._download_json(
3169                         'https://www.youtube.com/browse_ajax', None,
3170                         'Downloading page %d%s'
3171                         % (page_num, ' (retry #%d)' % count if count else ''),
3172                         headers=headers, query=continuation)
3173                     break
3174                 except ExtractorError as e:
3175                     if isinstance(e.cause, compat_HTTPError) and e.cause.code in (500, 503):
3176                         count += 1
3177                         if count <= retries:
3178                             continue
3179                     raise
3180             if not browse:
3181                 break
3182             response = try_get(browse, lambda x: x[1]['response'], dict)
3183             if not response:
3184                 break
3185
3186             continuation_contents = try_get(
3187                 response, lambda x: x['continuationContents'], dict)
3188             if continuation_contents:
3189                 continuation_renderer = continuation_contents.get('playlistVideoListContinuation')
3190                 if continuation_renderer:
3191                     for entry in self._playlist_entries(continuation_renderer):
3192                         yield entry
3193                     continuation = self._extract_continuation(continuation_renderer)
3194                     continue
3195                 continuation_renderer = continuation_contents.get('gridContinuation')
3196                 if continuation_renderer:
3197                     for entry in self._grid_entries(continuation_renderer):
3198                         yield entry
3199                     continuation = self._extract_continuation(continuation_renderer)
3200                     continue
3201                 continuation_renderer = continuation_contents.get('itemSectionContinuation')
3202                 if continuation_renderer:
3203                     for entry in self._post_thread_continuation_entries(continuation_renderer):
3204                         yield entry
3205                     continuation = self._extract_continuation(continuation_renderer)
3206                     continue
3207                 continuation_renderer = continuation_contents.get('sectionListContinuation')  # for feeds
3208                 if continuation_renderer:
3209                     continuation_list = [None]
3210                     for entry in extract_entries(continuation_renderer):
3211                         yield entry
3212                     continuation = continuation_list[0]
3213                     continue
3214
3215             continuation_items = try_get(
3216                 response, lambda x: x['onResponseReceivedActions'][0]['appendContinuationItemsAction']['continuationItems'], list)
3217             if continuation_items:
3218                 continuation_item = continuation_items[0]
3219                 if not isinstance(continuation_item, dict):
3220                     continue
3221                 renderer = continuation_item.get('playlistVideoRenderer') or continuation_item.get('itemSectionRenderer')
3222                 if renderer:
3223                     video_list_renderer = {'contents': continuation_items}
3224                     for entry in self._playlist_entries(video_list_renderer):
3225                         yield entry
3226                     continuation = self._extract_continuation(video_list_renderer)
3227                     continue
3228             break
3229
3230     @staticmethod
3231     def _extract_selected_tab(tabs):
3232         for tab in tabs:
3233             if try_get(tab, lambda x: x['tabRenderer']['selected'], bool):
3234                 return tab['tabRenderer']
3235         else:
3236             raise ExtractorError('Unable to find selected tab')
3237
3238     @staticmethod
3239     def _extract_uploader(data):
3240         uploader = {}
3241         sidebar_renderer = try_get(
3242             data, lambda x: x['sidebar']['playlistSidebarRenderer']['items'], list)
3243         if sidebar_renderer:
3244             for item in sidebar_renderer:
3245                 if not isinstance(item, dict):
3246                     continue
3247                 renderer = item.get('playlistSidebarSecondaryInfoRenderer')
3248                 if not isinstance(renderer, dict):
3249                     continue
3250                 owner = try_get(
3251                     renderer, lambda x: x['videoOwner']['videoOwnerRenderer']['title']['runs'][0], dict)
3252                 if owner:
3253                     uploader['uploader'] = owner.get('text')
3254                     uploader['uploader_id'] = try_get(
3255                         owner, lambda x: x['navigationEndpoint']['browseEndpoint']['browseId'], compat_str)
3256                     uploader['uploader_url'] = urljoin(
3257                         'https://www.youtube.com/',
3258                         try_get(owner, lambda x: x['navigationEndpoint']['browseEndpoint']['canonicalBaseUrl'], compat_str))
3259         return uploader
3260
3261     def _extract_from_tabs(self, item_id, webpage, data, tabs, identity_token):
3262         selected_tab = self._extract_selected_tab(tabs)
3263         renderer = try_get(
3264             data, lambda x: x['metadata']['channelMetadataRenderer'], dict)
3265         playlist_id = title = description = None
3266         if renderer:
3267             channel_title = renderer.get('title') or item_id
3268             tab_title = selected_tab.get('title')
3269             title = channel_title or item_id
3270             if tab_title:
3271                 title += ' - %s' % tab_title
3272             description = renderer.get('description')
3273             playlist_id = renderer.get('externalId')
3274         renderer = try_get(
3275             data, lambda x: x['metadata']['playlistMetadataRenderer'], dict)
3276         if renderer:
3277             title = renderer.get('title')
3278             description = None
3279             playlist_id = item_id
3280         if playlist_id is None:
3281             playlist_id = item_id
3282         if title is None:
3283             title = "Youtube " + playlist_id.title()
3284         playlist = self.playlist_result(
3285             self._entries(selected_tab, identity_token),
3286             playlist_id=playlist_id, playlist_title=title,
3287             playlist_description=description)
3288         playlist.update(self._extract_uploader(data))
3289         return playlist
3290
3291     def _extract_from_playlist(self, item_id, url, data, playlist):
3292         title = playlist.get('title') or try_get(
3293             data, lambda x: x['titleText']['simpleText'], compat_str)
3294         playlist_id = playlist.get('playlistId') or item_id
3295         # Inline playlist rendition continuation does not always work
3296         # at Youtube side, so delegating regular tab-based playlist URL
3297         # processing whenever possible.
3298         playlist_url = urljoin(url, try_get(
3299             playlist, lambda x: x['endpoint']['commandMetadata']['webCommandMetadata']['url'],
3300             compat_str))
3301         if playlist_url and playlist_url != url:
3302             return self.url_result(
3303                 playlist_url, ie=YoutubeTabIE.ie_key(), video_id=playlist_id,
3304                 video_title=title)
3305         return self.playlist_result(
3306             self._playlist_entries(playlist), playlist_id=playlist_id,
3307             playlist_title=title)
3308
3309     @staticmethod
3310     def _extract_alerts(data):
3311         for alert_dict in try_get(data, lambda x: x['alerts'], list) or []:
3312             if not isinstance(alert_dict, dict):
3313                 continue
3314             for renderer in alert_dict:
3315                 alert = alert_dict[renderer]
3316                 alert_type = alert.get('type')
3317                 if not alert_type:
3318                     continue
3319                 message = try_get(alert, lambda x: x['text']['simpleText'], compat_str)
3320                 if message:
3321                     yield alert_type, message
3322                 for run in try_get(alert, lambda x: x['text']['runs'], list) or []:
3323                     message = try_get(run, lambda x: x['text'], compat_str)
3324                     if message:
3325                         yield alert_type, message
3326
3327     def _extract_identity_token(self, webpage, item_id):
3328         ytcfg = self._extract_ytcfg(item_id, webpage)
3329         if ytcfg:
3330             token = try_get(ytcfg, lambda x: x['ID_TOKEN'], compat_str)
3331             if token:
3332                 return token
3333         return self._search_regex(
3334             r'\bID_TOKEN["\']\s*:\s*["\'](.+?)["\']', webpage,
3335             'identity token', default=None)
3336
3337     def _real_extract(self, url):
3338         item_id = self._match_id(url)
3339         url = compat_urlparse.urlunparse(
3340             compat_urlparse.urlparse(url)._replace(netloc='www.youtube.com'))
3341         is_home = re.match(r'(?P<pre>%s)(?P<post>/?(?![^#?]).*$)' % self._VALID_URL, url)
3342         if is_home is not None and is_home.group('not_channel') is None and item_id != 'feed':
3343             self._downloader.report_warning(
3344                 'A channel/user page was given. All the channel\'s videos will be downloaded. '
3345                 'To download only the videos in the home page, add a "/home" to the URL')
3346             url = '%s/videos%s' % (is_home.group('pre'), is_home.group('post') or '')
3347
3348         # Handle both video/playlist URLs
3349         qs = compat_urlparse.parse_qs(compat_urlparse.urlparse(url).query)
3350         video_id = qs.get('v', [None])[0]
3351         playlist_id = qs.get('list', [None])[0]
3352
3353         if is_home is not None and is_home.group('not_channel') is not None and is_home.group('not_channel').startswith('watch') and not video_id:
3354             if playlist_id:
3355                 self._downloader.report_warning('%s is not a valid Youtube URL. Trying to download playlist %s' % (url, playlist_id))
3356                 url = 'https://www.youtube.com/playlist?list=%s' % playlist_id
3357                 # return self.url_result(playlist_id, ie=YoutubePlaylistIE.ie_key())
3358             else:
3359                 raise ExtractorError('Unable to recognize tab page')
3360         if video_id and playlist_id:
3361             if self._downloader.params.get('noplaylist'):
3362                 self.to_screen('Downloading just video %s because of --no-playlist' % video_id)
3363                 return self.url_result(video_id, ie=YoutubeIE.ie_key(), video_id=video_id)
3364             self.to_screen('Downloading playlist %s - add --no-playlist to just download video %s' % (playlist_id, video_id))
3365
3366         webpage = self._download_webpage(url, item_id)
3367         identity_token = self._extract_identity_token(webpage, item_id)
3368         data = self._extract_yt_initial_data(item_id, webpage)
3369         for alert_type, alert_message in self._extract_alerts(data):
3370             self._downloader.report_warning('YouTube said: %s - %s' % (alert_type, alert_message))
3371         tabs = try_get(
3372             data, lambda x: x['contents']['twoColumnBrowseResultsRenderer']['tabs'], list)
3373         if tabs:
3374             return self._extract_from_tabs(item_id, webpage, data, tabs, identity_token)
3375         playlist = try_get(
3376             data, lambda x: x['contents']['twoColumnWatchNextResults']['playlist']['playlist'], dict)
3377         if playlist:
3378             return self._extract_from_playlist(item_id, url, data, playlist)
3379         # Fallback to video extraction if no playlist alike page is recognized.
3380         # First check for the current video then try the v attribute of URL query.
3381         video_id = try_get(
3382             data, lambda x: x['currentVideoEndpoint']['watchEndpoint']['videoId'],
3383             compat_str) or video_id
3384         if video_id:
3385             return self.url_result(video_id, ie=YoutubeIE.ie_key(), video_id=video_id)
3386         # Failed to recognize
3387         raise ExtractorError('Unable to recognize tab page')
3388
3389
3390 class YoutubePlaylistIE(InfoExtractor):
3391     IE_DESC = 'YouTube.com playlists'
3392     _VALID_URL = r'''(?x)(?:
3393                         (?:https?://)?
3394                         (?:\w+\.)?
3395                         (?:
3396                             (?:
3397                                 youtube(?:kids)?\.com|
3398                                 invidio\.us
3399                             )
3400                             /.*?\?.*?\blist=
3401                         )?
3402                         (?P<id>%(playlist_id)s)
3403                      )''' % {'playlist_id': YoutubeBaseInfoExtractor._PLAYLIST_ID_RE}
3404     IE_NAME = 'youtube:playlist'
3405     _TESTS = [{
3406         'note': 'issue #673',
3407         'url': 'PLBB231211A4F62143',
3408         'info_dict': {
3409             'title': '[OLD]Team Fortress 2 (Class-based LP)',
3410             'id': 'PLBB231211A4F62143',
3411             'uploader': 'Wickydoo',
3412             'uploader_id': 'UCKSpbfbl5kRQpTdL7kMc-1Q',
3413         },
3414         'playlist_mincount': 29,
3415     }, {
3416         'url': 'PLtPgu7CB4gbY9oDN3drwC3cMbJggS7dKl',
3417         'info_dict': {
3418             'title': 'YDL_safe_search',
3419             'id': 'PLtPgu7CB4gbY9oDN3drwC3cMbJggS7dKl',
3420         },
3421         'playlist_count': 2,
3422         'skip': 'This playlist is private',
3423     }, {
3424         'note': 'embedded',
3425         'url': 'https://www.youtube.com/embed/videoseries?list=PL6IaIsEjSbf96XFRuNccS_RuEXwNdsoEu',
3426         'playlist_count': 4,
3427         'info_dict': {
3428             'title': 'JODA15',
3429             'id': 'PL6IaIsEjSbf96XFRuNccS_RuEXwNdsoEu',
3430             'uploader': 'milan',
3431             'uploader_id': 'UCEI1-PVPcYXjB73Hfelbmaw',
3432         }
3433     }, {
3434         'url': 'http://www.youtube.com/embed/_xDOZElKyNU?list=PLsyOSbh5bs16vubvKePAQ1x3PhKavfBIl',
3435         'playlist_mincount': 982,
3436         'info_dict': {
3437             'title': '2018 Chinese New Singles (11/6 updated)',
3438             'id': 'PLsyOSbh5bs16vubvKePAQ1x3PhKavfBIl',
3439             'uploader': 'LBK',
3440             'uploader_id': 'UC21nz3_MesPLqtDqwdvnoxA',
3441         }
3442     }, {
3443         'url': 'TLGGrESM50VT6acwMjAyMjAxNw',
3444         'only_matching': True,
3445     }, {
3446         # music album playlist
3447         'url': 'OLAK5uy_m4xAFdmMC5rX3Ji3g93pQe3hqLZw_9LhM',
3448         'only_matching': True,
3449     }]
3450
3451     @classmethod
3452     def suitable(cls, url):
3453         return False if YoutubeTabIE.suitable(url) else super(
3454             YoutubePlaylistIE, cls).suitable(url)
3455
3456     def _real_extract(self, url):
3457         playlist_id = self._match_id(url)
3458         qs = compat_urlparse.parse_qs(compat_urlparse.urlparse(url).query)
3459         if not qs:
3460             qs = {'list': playlist_id}
3461         return self.url_result(
3462             update_url_query('https://www.youtube.com/playlist', qs),
3463             ie=YoutubeTabIE.ie_key(), video_id=playlist_id)
3464
3465
3466 class YoutubeYtBeIE(InfoExtractor):
3467     _VALID_URL = r'https?://youtu\.be/(?P<id>[0-9A-Za-z_-]{11})/*?.*?\blist=(?P<playlist_id>%(playlist_id)s)' % {'playlist_id': YoutubeBaseInfoExtractor._PLAYLIST_ID_RE}
3468     _TESTS = [{
3469         'url': 'https://youtu.be/yeWKywCrFtk?list=PL2qgrgXsNUG5ig9cat4ohreBjYLAPC0J5',
3470         'info_dict': {
3471             'id': 'yeWKywCrFtk',
3472             'ext': 'mp4',
3473             'title': 'Small Scale Baler and Braiding Rugs',
3474             'uploader': 'Backus-Page House Museum',
3475             'uploader_id': 'backuspagemuseum',
3476             'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/backuspagemuseum',
3477             'upload_date': '20161008',
3478             'description': 'md5:800c0c78d5eb128500bffd4f0b4f2e8a',
3479             'categories': ['Nonprofits & Activism'],
3480             'tags': list,
3481             'like_count': int,
3482             'dislike_count': int,
3483         },
3484         'params': {
3485             'noplaylist': True,
3486             'skip_download': True,
3487         },
3488     }, {
3489         'url': 'https://youtu.be/uWyaPkt-VOI?list=PL9D9FC436B881BA21',
3490         'only_matching': True,
3491     }]
3492
3493     def _real_extract(self, url):
3494         mobj = re.match(self._VALID_URL, url)
3495         video_id = mobj.group('id')
3496         playlist_id = mobj.group('playlist_id')
3497         return self.url_result(
3498             update_url_query('https://www.youtube.com/watch', {
3499                 'v': video_id,
3500                 'list': playlist_id,
3501                 'feature': 'youtu.be',
3502             }), ie=YoutubeTabIE.ie_key(), video_id=playlist_id)
3503
3504
3505 class YoutubeYtUserIE(InfoExtractor):
3506     _VALID_URL = r'ytuser:(?P<id>.+)'
3507     _TESTS = [{
3508         'url': 'ytuser:phihag',
3509         'only_matching': True,
3510     }]
3511
3512     def _real_extract(self, url):
3513         user_id = self._match_id(url)
3514         return self.url_result(
3515             'https://www.youtube.com/user/%s' % user_id,
3516             ie=YoutubeTabIE.ie_key(), video_id=user_id)
3517
3518
3519 class YoutubeFavouritesIE(YoutubeBaseInfoExtractor):
3520     IE_NAME = 'youtube:favorites'
3521     IE_DESC = 'YouTube.com liked videos, ":ytfav" for short (requires authentication)'
3522     _VALID_URL = r':ytfav(?:ou?rite)?s?'
3523     _LOGIN_REQUIRED = True
3524     _TESTS = [{
3525         'url': ':ytfav',
3526         'only_matching': True,
3527     }, {
3528         'url': ':ytfavorites',
3529         'only_matching': True,
3530     }]
3531
3532     def _real_extract(self, url):
3533         return self.url_result(
3534             'https://www.youtube.com/playlist?list=LL',
3535             ie=YoutubeTabIE.ie_key())
3536
3537
3538 class YoutubeSearchIE(SearchInfoExtractor, YoutubeBaseInfoExtractor):
3539     IE_DESC = 'YouTube.com searches'
3540     # there doesn't appear to be a real limit, for example if you search for
3541     # 'python' you get more than 8.000.000 results
3542     _MAX_RESULTS = float('inf')
3543     IE_NAME = 'youtube:search'
3544     _SEARCH_KEY = 'ytsearch'
3545     _SEARCH_PARAMS = None
3546     _TESTS = []
3547
3548     def _entries(self, query, n):
3549         data = {
3550             'context': {
3551                 'client': {
3552                     'clientName': 'WEB',
3553                     'clientVersion': '2.20201021.03.00',
3554                 }
3555             },
3556             'query': query,
3557         }
3558         if self._SEARCH_PARAMS:
3559             data['params'] = self._SEARCH_PARAMS
3560         total = 0
3561         for page_num in itertools.count(1):
3562             search = self._download_json(
3563                 'https://www.youtube.com/youtubei/v1/search?key=AIzaSyAO_FJ2SlqU8Q4STEHLGCilw_Y9_11qcW8',
3564                 video_id='query "%s"' % query,
3565                 note='Downloading page %s' % page_num,
3566                 errnote='Unable to download API page', fatal=False,
3567                 data=json.dumps(data).encode('utf8'),
3568                 headers={'content-type': 'application/json'})
3569             if not search:
3570                 break
3571             slr_contents = try_get(
3572                 search,
3573                 (lambda x: x['contents']['twoColumnSearchResultsRenderer']['primaryContents']['sectionListRenderer']['contents'],
3574                  lambda x: x['onResponseReceivedCommands'][0]['appendContinuationItemsAction']['continuationItems']),
3575                 list)
3576             if not slr_contents:
3577                 break
3578
3579             isr_contents = []
3580             continuation_token = None
3581             # Youtube sometimes adds promoted content to searches,
3582             # changing the index location of videos and token.
3583             # So we search through all entries till we find them.
3584             for index, isr in enumerate(slr_contents):
3585                 if not isr_contents:
3586                     isr_contents = try_get(
3587                         slr_contents,
3588                         (lambda x: x[index]['itemSectionRenderer']['contents']),
3589                         list)
3590                     for content in isr_contents:
3591                         if content.get('videoRenderer') is not None:
3592                             break
3593                     else:
3594                         isr_contents = []
3595
3596                 if continuation_token is None:
3597                     continuation_token = try_get(
3598                         slr_contents,
3599                         lambda x: x[index]['continuationItemRenderer']['continuationEndpoint']['continuationCommand'][
3600                             'token'],
3601                         compat_str)
3602                 if continuation_token is not None and isr_contents:
3603                     break
3604
3605             if not isr_contents:
3606                 break
3607             for content in isr_contents:
3608                 if not isinstance(content, dict):
3609                     continue
3610                 video = content.get('videoRenderer')
3611                 if not isinstance(video, dict):
3612                     continue
3613                 video_id = video.get('videoId')
3614                 if not video_id:
3615                     continue
3616                 title = try_get(video, lambda x: x['title']['runs'][0]['text'], compat_str)
3617                 description = try_get(video, lambda x: x['descriptionSnippet']['runs'][0]['text'], compat_str)
3618                 duration = parse_duration(try_get(video, lambda x: x['lengthText']['simpleText'], compat_str))
3619                 view_count_text = try_get(video, lambda x: x['viewCountText']['simpleText'], compat_str) or ''
3620                 view_count = int_or_none(self._search_regex(
3621                     r'^(\d+)', re.sub(r'\s', '', view_count_text),
3622                     'view count', default=None))
3623                 uploader = try_get(video, lambda x: x['ownerText']['runs'][0]['text'], compat_str)
3624                 total += 1
3625                 yield {
3626                     '_type': 'url_transparent',
3627                     'ie_key': YoutubeIE.ie_key(),
3628                     'id': video_id,
3629                     'url': video_id,
3630                     'title': title,
3631                     'description': description,
3632                     'duration': duration,
3633                     'view_count': view_count,
3634                     'uploader': uploader,
3635                 }
3636                 if total == n:
3637                     return
3638             if not continuation_token:
3639                 break
3640             data['continuation'] = continuation_token
3641
3642     def _get_n_results(self, query, n):
3643         """Get a specified number of results for a query"""
3644         return self.playlist_result(self._entries(query, n), query)
3645
3646
3647 class YoutubeSearchDateIE(YoutubeSearchIE):
3648     IE_NAME = YoutubeSearchIE.IE_NAME + ':date'
3649     _SEARCH_KEY = 'ytsearchdate'
3650     IE_DESC = 'YouTube.com searches, newest videos first'
3651     _SEARCH_PARAMS = 'CAI%3D'
3652
3653
3654 class YoutubeSearchURLIE(YoutubeSearchIE):
3655     IE_DESC = 'YouTube.com search URLs'
3656     IE_NAME = YoutubeSearchIE.IE_NAME + '_url'
3657     _VALID_URL = r'https?://(?:www\.)?youtube\.com/results\?(.*?&)?(?:search_query|q)=(?:[^&]+)(?:[&]|$)'
3658     # _MAX_RESULTS = 100
3659     _TESTS = [{
3660         'url': 'https://www.youtube.com/results?baz=bar&search_query=youtube-dl+test+video&filters=video&lclk=video',
3661         'playlist_mincount': 5,
3662         'info_dict': {
3663             'title': 'youtube-dl test video',
3664         }
3665     }, {
3666         'url': 'https://www.youtube.com/results?q=test&sp=EgQIBBgB',
3667         'only_matching': True,
3668     }]
3669
3670     @classmethod
3671     def _make_valid_url(cls):
3672         return cls._VALID_URL
3673
3674     def _real_extract(self, url):
3675         qs = compat_parse_qs(compat_urllib_parse_urlparse(url).query)
3676         query = (qs.get('search_query') or qs.get('q'))[0]
3677         self._SEARCH_PARAMS = qs.get('sp', ('',))[0]
3678         return self._get_n_results(query, self._MAX_RESULTS)
3679
3680
3681 class YoutubeFeedsInfoExtractor(YoutubeTabIE):
3682     """
3683     Base class for feed extractors
3684     Subclasses must define the _FEED_NAME property.
3685     """
3686     _LOGIN_REQUIRED = True
3687     # _MAX_PAGES = 5
3688     _TESTS = []
3689
3690     @property
3691     def IE_NAME(self):
3692         return 'youtube:%s' % self._FEED_NAME
3693
3694     def _real_initialize(self):
3695         self._login()
3696
3697     def _real_extract(self, url):
3698         return self.url_result(
3699             'https://www.youtube.com/feed/%s' % self._FEED_NAME,
3700             ie=YoutubeTabIE.ie_key())
3701
3702
3703 class YoutubeWatchLaterIE(InfoExtractor):
3704     IE_NAME = 'youtube:watchlater'
3705     IE_DESC = 'Youtube watch later list, ":ytwatchlater" for short (requires authentication)'
3706     _VALID_URL = r':ytwatchlater'
3707     _TESTS = [{
3708         'url': ':ytwatchlater',
3709         'only_matching': True,
3710     }]
3711
3712     def _real_extract(self, url):
3713         return self.url_result(
3714             'https://www.youtube.com/playlist?list=WL', ie=YoutubeTabIE.ie_key())
3715
3716
3717 class YoutubeRecommendedIE(YoutubeFeedsInfoExtractor):
3718     IE_DESC = 'YouTube.com recommended videos, ":ytrec" for short (requires authentication)'
3719     _VALID_URL = r'https?://(?:www\.)?youtube\.com/?(?:[?#]|$)|:ytrec(?:ommended)?'
3720     _FEED_NAME = 'recommended'
3721     _TESTS = [{
3722         'url': ':ytrec',
3723         'only_matching': True,
3724     }, {
3725         'url': ':ytrecommended',
3726         'only_matching': True,
3727     }, {
3728         'url': 'https://youtube.com',
3729         'only_matching': True,
3730     }]
3731
3732
3733 class YoutubeSubscriptionsIE(YoutubeFeedsInfoExtractor):
3734     IE_DESC = 'YouTube.com subscriptions feed, ":ytsubs" for short (requires authentication)'
3735     _VALID_URL = r':ytsub(?:scription)?s?'
3736     _FEED_NAME = 'subscriptions'
3737     _TESTS = [{
3738         'url': ':ytsubs',
3739         'only_matching': True,
3740     }, {
3741         'url': ':ytsubscriptions',
3742         'only_matching': True,
3743     }]
3744
3745
3746 class YoutubeHistoryIE(YoutubeFeedsInfoExtractor):
3747     IE_DESC = 'Youtube watch history, ":ythistory" for short (requires authentication)'
3748     _VALID_URL = r':ythistory'
3749     _FEED_NAME = 'history'
3750     _TESTS = [{
3751         'url': ':ythistory',
3752         'only_matching': True,
3753     }]
3754
3755
3756 class YoutubeTruncatedURLIE(InfoExtractor):
3757     IE_NAME = 'youtube:truncated_url'
3758     IE_DESC = False  # Do not list
3759     _VALID_URL = r'''(?x)
3760         (?:https?://)?
3761         (?:\w+\.)?[yY][oO][uU][tT][uU][bB][eE](?:-nocookie)?\.com/
3762         (?:watch\?(?:
3763             feature=[a-z_]+|
3764             annotation_id=annotation_[^&]+|
3765             x-yt-cl=[0-9]+|
3766             hl=[^&]*|
3767             t=[0-9]+
3768         )?
3769         |
3770             attribution_link\?a=[^&]+
3771         )
3772         $
3773     '''
3774
3775     _TESTS = [{
3776         'url': 'https://www.youtube.com/watch?annotation_id=annotation_3951667041',
3777         'only_matching': True,
3778     }, {
3779         'url': 'https://www.youtube.com/watch?',
3780         'only_matching': True,
3781     }, {
3782         'url': 'https://www.youtube.com/watch?x-yt-cl=84503534',
3783         'only_matching': True,
3784     }, {
3785         'url': 'https://www.youtube.com/watch?feature=foo',
3786         'only_matching': True,
3787     }, {
3788         'url': 'https://www.youtube.com/watch?hl=en-GB',
3789         'only_matching': True,
3790     }, {
3791         'url': 'https://www.youtube.com/watch?t=2372',
3792         'only_matching': True,
3793     }]
3794
3795     def _real_extract(self, url):
3796         raise ExtractorError(
3797             'Did you forget to quote the URL? Remember that & is a meta '
3798             'character in most shells, so you want to put the URL in quotes, '
3799             'like  youtube-dl '
3800             '"https://www.youtube.com/watch?feature=foo&v=BaW_jenozKc" '
3801             ' or simply  youtube-dl BaW_jenozKc  .',
3802             expected=True)
3803
3804
3805 class YoutubeTruncatedIDIE(InfoExtractor):
3806     IE_NAME = 'youtube:truncated_id'
3807     IE_DESC = False  # Do not list
3808     _VALID_URL = r'https?://(?:www\.)?youtube\.com/watch\?v=(?P<id>[0-9A-Za-z_-]{1,10})$'
3809
3810     _TESTS = [{
3811         'url': 'https://www.youtube.com/watch?v=N_708QY7Ob',
3812         'only_matching': True,
3813     }]
3814
3815     def _real_extract(self, url):
3816         video_id = self._match_id(url)
3817         raise ExtractorError(
3818             'Incomplete YouTube ID %s. URL %s looks truncated.' % (video_id, url),
3819             expected=True)
3820
3821
3822 # Do Youtube show urls even exist anymore? I couldn't find any
3823 r'''
3824 class YoutubeShowIE(YoutubeTabIE):
3825     IE_DESC = 'YouTube.com (multi-season) shows'
3826     _VALID_URL = r'https?://(?:www\.)?youtube\.com/show/(?P<id>[^?#]*)'
3827     IE_NAME = 'youtube:show'
3828     _TESTS = [{
3829         'url': 'https://www.youtube.com/show/airdisasters',
3830         'playlist_mincount': 5,
3831         'info_dict': {
3832             'id': 'airdisasters',
3833             'title': 'Air Disasters',
3834         }
3835     }]
3836
3837     def _real_extract(self, url):
3838         playlist_id = self._match_id(url)
3839         return super(YoutubeShowIE, self)._real_extract(
3840             'https://www.youtube.com/show/%s/playlists' % playlist_id)
3841 '''