youtube_dlc/extractor/youtube.py

   1 # coding: utf-8
   2
   3 from __future__ import unicode_literals
   4
   5
   6 import itertools
   7 import json
   8 import os.path
   9 import random
  10 import re
  11 import time
  12 import traceback
  13
  14 from .common import InfoExtractor, SearchInfoExtractor
  15 from ..jsinterp import JSInterpreter
  16 from ..swfinterp import SWFInterpreter
  17 from ..compat import (
  18     compat_chr,
  19     compat_HTTPError,
  20     compat_kwargs,
  21     compat_parse_qs,
  22     compat_urllib_parse_unquote,
  23     compat_urllib_parse_unquote_plus,
  24     compat_urllib_parse_urlencode,
  25     compat_urllib_parse_urlparse,
  26     compat_urlparse,
  27     compat_str,
  28 )
  29 from ..utils import (
  30     bool_or_none,
  31     clean_html,
  32     error_to_compat_str,
  33     ExtractorError,
  34     float_or_none,
  35     get_element_by_id,
  36     int_or_none,
  37     mimetype2ext,
  38     parse_codecs,
  39     parse_count,
  40     parse_duration,
  41     remove_quotes,
  42     remove_start,
  43     smuggle_url,
  44     str_or_none,
  45     str_to_int,
  46     try_get,
  47     unescapeHTML,
  48     unified_strdate,
  49     unsmuggle_url,
  50     update_url_query,
  51     uppercase_escape,
  52     url_or_none,
  53     urlencode_postdata,
  54     urljoin,
  55 )
  56
  57
  58 class YoutubeBaseInfoExtractor(InfoExtractor):
  59     """Provide base functions for Youtube extractors"""
  60     _LOGIN_URL = 'https://accounts.google.com/ServiceLogin'
  61     _TWOFACTOR_URL = 'https://accounts.google.com/signin/challenge'
  62
  63     _LOOKUP_URL = 'https://accounts.google.com/_/signin/sl/lookup'
  64     _CHALLENGE_URL = 'https://accounts.google.com/_/signin/sl/challenge'
  65     _TFA_URL = 'https://accounts.google.com/_/signin/challenge?hl=en&TL={0}'
  66
  67     _RESERVED_NAMES = (
  68         r'embed|e|watch_popup|channel|c|user|playlist|watch|w|v|movies|results|shared|'
  69         r'storefront|oops|index|account|reporthistory|t/terms|about|upload|signin|logout|'
  70         r'feed/(?:watch_later|history|subscriptions|library|trending|recommended)')
  71
  72     _NETRC_MACHINE = 'youtube'
  73     # If True it will raise an error if no login info is provided
  74     _LOGIN_REQUIRED = False
  75
  76     _PLAYLIST_ID_RE = r'(?:(?:PL|LL|EC|UU|FL|RD|UL|TL|PU|OLAK5uy_)[0-9A-Za-z-_]{10,}|RDMM|WL|LL|LM)'
  77
  78     def _set_language(self):
  79         self._set_cookie(
  80             '.youtube.com', 'PREF', 'f1=50000000&f6=8&hl=en',
  81             # YouTube sets the expire time to about two months
  82             expire_time=time.time() + 2 * 30 * 24 * 3600)
  83
  84     def _ids_to_results(self, ids):
  85         return [
  86             self.url_result(vid_id, 'Youtube', video_id=vid_id)
  87             for vid_id in ids]
  88
  89     def _login(self):
  90         """
  91         Attempt to log in to YouTube.
  92         True is returned if successful or skipped.
  93         False is returned if login failed.
  94
  95         If _LOGIN_REQUIRED is set and no authentication was provided, an error is raised.
  96         """
  97         username, password = self._get_login_info()
  98         # No authentication to be performed
  99         if username is None:
 100             if self._LOGIN_REQUIRED and self._downloader.params.get('cookiefile') is None:
 101                 raise ExtractorError('No login info available, needed for using %s.' % self.IE_NAME, expected=True)
 102             if self._downloader.params.get('cookiefile') and False:  # TODO remove 'and False' later - too many people using outdated cookies and open issues, remind them.
 103                 self.to_screen('[Cookies] Reminder - Make sure to always use up to date cookies!')
 104             return True
 105
 106         login_page = self._download_webpage(
 107             self._LOGIN_URL, None,
 108             note='Downloading login page',
 109             errnote='unable to fetch login page', fatal=False)
 110         if login_page is False:
 111             return
 112
 113         login_form = self._hidden_inputs(login_page)
 114
 115         def req(url, f_req, note, errnote):
 116             data = login_form.copy()
 117             data.update({
 118                 'pstMsg': 1,
 119                 'checkConnection': 'youtube',
 120                 'checkedDomains': 'youtube',
 121                 'hl': 'en',
 122                 'deviceinfo': '[null,null,null,[],null,"US",null,null,[],"GlifWebSignIn",null,[null,null,[]]]',
 123                 'f.req': json.dumps(f_req),
 124                 'flowName': 'GlifWebSignIn',
 125                 'flowEntry': 'ServiceLogin',
 126                 # TODO: reverse actual botguard identifier generation algo
 127                 'bgRequest': '["identifier",""]',
 128             })
 129             return self._download_json(
 130                 url, None, note=note, errnote=errnote,
 131                 transform_source=lambda s: re.sub(r'^[^[]*', '', s),
 132                 fatal=False,
 133                 data=urlencode_postdata(data), headers={
 134                     'Content-Type': 'application/x-www-form-urlencoded;charset=utf-8',
 135                     'Google-Accounts-XSRF': 1,
 136                 })
 137
 138         def warn(message):
 139             self._downloader.report_warning(message)
 140
 141         lookup_req = [
 142             username,
 143             None, [], None, 'US', None, None, 2, False, True,
 144             [
 145                 None, None,
 146                 [2, 1, None, 1,
 147                  'https://accounts.google.com/ServiceLogin?passive=true&continue=https%3A%2F%2Fwww.youtube.com%2Fsignin%3Fnext%3D%252F%26action_handle_signin%3Dtrue%26hl%3Den%26app%3Ddesktop%26feature%3Dsign_in_button&hl=en&service=youtube&uilel=3&requestPath=%2FServiceLogin&Page=PasswordSeparationSignIn',
 148                  None, [], 4],
 149                 1, [None, None, []], None, None, None, True
 150             ],
 151             username,
 152         ]
 153
 154         lookup_results = req(
 155             self._LOOKUP_URL, lookup_req,
 156             'Looking up account info', 'Unable to look up account info')
 157
 158         if lookup_results is False:
 159             return False
 160
 161         user_hash = try_get(lookup_results, lambda x: x[0][2], compat_str)
 162         if not user_hash:
 163             warn('Unable to extract user hash')
 164             return False
 165
 166         challenge_req = [
 167             user_hash,
 168             None, 1, None, [1, None, None, None, [password, None, True]],
 169             [
 170                 None, None, [2, 1, None, 1, 'https://accounts.google.com/ServiceLogin?passive=true&continue=https%3A%2F%2Fwww.youtube.com%2Fsignin%3Fnext%3D%252F%26action_handle_signin%3Dtrue%26hl%3Den%26app%3Ddesktop%26feature%3Dsign_in_button&hl=en&service=youtube&uilel=3&requestPath=%2FServiceLogin&Page=PasswordSeparationSignIn', None, [], 4],
 171                 1, [None, None, []], None, None, None, True
 172             ]]
 173
 174         challenge_results = req(
 175             self._CHALLENGE_URL, challenge_req,
 176             'Logging in', 'Unable to log in')
 177
 178         if challenge_results is False:
 179             return
 180
 181         login_res = try_get(challenge_results, lambda x: x[0][5], list)
 182         if login_res:
 183             login_msg = try_get(login_res, lambda x: x[5], compat_str)
 184             warn(
 185                 'Unable to login: %s' % 'Invalid password'
 186                 if login_msg == 'INCORRECT_ANSWER_ENTERED' else login_msg)
 187             return False
 188
 189         res = try_get(challenge_results, lambda x: x[0][-1], list)
 190         if not res:
 191             warn('Unable to extract result entry')
 192             return False
 193
 194         login_challenge = try_get(res, lambda x: x[0][0], list)
 195         if login_challenge:
 196             challenge_str = try_get(login_challenge, lambda x: x[2], compat_str)
 197             if challenge_str == 'TWO_STEP_VERIFICATION':
 198                 # SEND_SUCCESS - TFA code has been successfully sent to phone
 199                 # QUOTA_EXCEEDED - reached the limit of TFA codes
 200                 status = try_get(login_challenge, lambda x: x[5], compat_str)
 201                 if status == 'QUOTA_EXCEEDED':
 202                     warn('Exceeded the limit of TFA codes, try later')
 203                     return False
 204
 205                 tl = try_get(challenge_results, lambda x: x[1][2], compat_str)
 206                 if not tl:
 207                     warn('Unable to extract TL')
 208                     return False
 209
 210                 tfa_code = self._get_tfa_info('2-step verification code')
 211
 212                 if not tfa_code:
 213                     warn(
 214                         'Two-factor authentication required. Provide it either interactively or with --twofactor <code>'
 215                         '(Note that only TOTP (Google Authenticator App) codes work at this time.)')
 216                     return False
 217
 218                 tfa_code = remove_start(tfa_code, 'G-')
 219
 220                 tfa_req = [
 221                     user_hash, None, 2, None,
 222                     [
 223                         9, None, None, None, None, None, None, None,
 224                         [None, tfa_code, True, 2]
 225                     ]]
 226
 227                 tfa_results = req(
 228                     self._TFA_URL.format(tl), tfa_req,
 229                     'Submitting TFA code', 'Unable to submit TFA code')
 230
 231                 if tfa_results is False:
 232                     return False
 233
 234                 tfa_res = try_get(tfa_results, lambda x: x[0][5], list)
 235                 if tfa_res:
 236                     tfa_msg = try_get(tfa_res, lambda x: x[5], compat_str)
 237                     warn(
 238                         'Unable to finish TFA: %s' % 'Invalid TFA code'
 239                         if tfa_msg == 'INCORRECT_ANSWER_ENTERED' else tfa_msg)
 240                     return False
 241
 242                 check_cookie_url = try_get(
 243                     tfa_results, lambda x: x[0][-1][2], compat_str)
 244             else:
 245                 CHALLENGES = {
 246                     'LOGIN_CHALLENGE': "This device isn't recognized. For your security, Google wants to make sure it's really you.",
 247                     'USERNAME_RECOVERY': 'Please provide additional information to aid in the recovery process.',
 248                     'REAUTH': "There is something unusual about your activity. For your security, Google wants to make sure it's really you.",
 249                 }
 250                 challenge = CHALLENGES.get(
 251                     challenge_str,
 252                     '%s returned error %s.' % (self.IE_NAME, challenge_str))
 253                 warn('%s\nGo to https://accounts.google.com/, login and solve a challenge.' % challenge)
 254                 return False
 255         else:
 256             check_cookie_url = try_get(res, lambda x: x[2], compat_str)
 257
 258         if not check_cookie_url:
 259             warn('Unable to extract CheckCookie URL')
 260             return False
 261
 262         check_cookie_results = self._download_webpage(
 263             check_cookie_url, None, 'Checking cookie', fatal=False)
 264
 265         if check_cookie_results is False:
 266             return False
 267
 268         if 'https://myaccount.google.com/' not in check_cookie_results:
 269             warn('Unable to log in')
 270             return False
 271
 272         return True
 273
 274     def _download_webpage_handle(self, *args, **kwargs):
 275         query = kwargs.get('query', {}).copy()
 276         kwargs['query'] = query
 277         return super(YoutubeBaseInfoExtractor, self)._download_webpage_handle(
 278             *args, **compat_kwargs(kwargs))
 279
 280     def _real_initialize(self):
 281         if self._downloader is None:
 282             return
 283         self._set_language()
 284         if not self._login():
 285             return
 286
 287     _DEFAULT_API_DATA = {
 288         'context': {
 289             'client': {
 290                 'clientName': 'WEB',
 291                 'clientVersion': '2.20201021.03.00',
 292             }
 293         },
 294     }
 295
 296     _YT_INITIAL_DATA_RE = r'(?:window\s*\[\s*["\']ytInitialData["\']\s*\]|ytInitialData)\s*=\s*({.+?})\s*;'
 297     _YT_INITIAL_PLAYER_RESPONSE_RE = r'ytInitialPlayerResponse\s*=\s*({.+?})\s*;'
 298     _YT_INITIAL_BOUNDARY_RE = r'(?:var\s+meta|</script|\n)'
 299
 300     def _call_api(self, ep, query, video_id):
 301         data = self._DEFAULT_API_DATA.copy()
 302         data.update(query)
 303
 304         response = self._download_json(
 305             'https://www.youtube.com/youtubei/v1/%s' % ep, video_id=video_id,
 306             note='Downloading API JSON', errnote='Unable to download API page',
 307             data=json.dumps(data).encode('utf8'),
 308             headers={'content-type': 'application/json'},
 309             query={'key': 'AIzaSyAO_FJ2SlqU8Q4STEHLGCilw_Y9_11qcW8'})
 310
 311         return response
 312
 313     def _extract_yt_initial_data(self, video_id, webpage):
 314         return self._parse_json(
 315             self._search_regex(
 316                 (r'%s\s*%s' % (self._YT_INITIAL_DATA_RE, self._YT_INITIAL_BOUNDARY_RE),
 317                  self._YT_INITIAL_DATA_RE), webpage, 'yt initial data'),
 318             video_id)
 319
 320     def _extract_ytcfg(self, video_id, webpage):
 321         return self._parse_json(
 322             self._search_regex(
 323                 r'ytcfg\.set\s*\(\s*({.+?})\s*\)\s*;', webpage, 'ytcfg',
 324                 default='{}'), video_id, fatal=False)
 325
 326     def _extract_video(self, renderer):
 327         video_id = renderer.get('videoId')
 328         title = try_get(
 329             renderer,
 330             (lambda x: x['title']['runs'][0]['text'],
 331              lambda x: x['title']['simpleText']), compat_str)
 332         description = try_get(
 333             renderer, lambda x: x['descriptionSnippet']['runs'][0]['text'],
 334             compat_str)
 335         duration = parse_duration(try_get(
 336             renderer, lambda x: x['lengthText']['simpleText'], compat_str))
 337         view_count_text = try_get(
 338             renderer, lambda x: x['viewCountText']['simpleText'], compat_str) or ''
 339         view_count = str_to_int(self._search_regex(
 340             r'^([\d,]+)', re.sub(r'\s', '', view_count_text),
 341             'view count', default=None))
 342         uploader = try_get(
 343             renderer, lambda x: x['ownerText']['runs'][0]['text'], compat_str)
 344         return {
 345             '_type': 'url_transparent',
 346             'ie_key': YoutubeIE.ie_key(),
 347             'id': video_id,
 348             'url': video_id,
 349             'title': title,
 350             'description': description,
 351             'duration': duration,
 352             'view_count': view_count,
 353             'uploader': uploader,
 354         }
 355
 356
 357 class YoutubeIE(YoutubeBaseInfoExtractor):
 358     IE_DESC = 'YouTube.com'
 359     _VALID_URL = r"""(?x)^
 360                      (
 361                          (?:https?://|//)                                    # http(s):// or protocol-independent URL
 362                          (?:(?:(?:(?:\w+\.)?[yY][oO][uU][tT][uU][bB][eE](?:-nocookie|kids)?\.com/|
 363                             (?:www\.)?deturl\.com/www\.youtube\.com/|
 364                             (?:www\.)?pwnyoutube\.com/|
 365                             (?:www\.)?hooktube\.com/|
 366                             (?:www\.)?yourepeat\.com/|
 367                             tube\.majestyc\.net/|
 368                             # Invidious instances taken from https://github.com/omarroth/invidious/wiki/Invidious-Instances
 369                             (?:(?:www|dev)\.)?invidio\.us/|
 370                             (?:(?:www|no)\.)?invidiou\.sh/|
 371                             (?:(?:www|fi)\.)?invidious\.snopyta\.org/|
 372                             (?:www\.)?invidious\.kabi\.tk/|
 373                             (?:www\.)?invidious\.13ad\.de/|
 374                             (?:www\.)?invidious\.mastodon\.host/|
 375                             (?:www\.)?invidious\.zapashcanon\.fr/|
 376                             (?:www\.)?invidious\.kavin\.rocks/|
 377                             (?:www\.)?invidious\.tube/|
 378                             (?:www\.)?invidiou\.site/|
 379                             (?:www\.)?invidious\.site/|
 380                             (?:www\.)?invidious\.xyz/|
 381                             (?:www\.)?invidious\.nixnet\.xyz/|
 382                             (?:www\.)?invidious\.drycat\.fr/|
 383                             (?:www\.)?tube\.poal\.co/|
 384                             (?:www\.)?tube\.connect\.cafe/|
 385                             (?:www\.)?vid\.wxzm\.sx/|
 386                             (?:www\.)?vid\.mint\.lgbt/|
 387                             (?:www\.)?yewtu\.be/|
 388                             (?:www\.)?yt\.elukerio\.org/|
 389                             (?:www\.)?yt\.lelux\.fi/|
 390                             (?:www\.)?invidious\.ggc-project\.de/|
 391                             (?:www\.)?yt\.maisputain\.ovh/|
 392                             (?:www\.)?invidious\.13ad\.de/|
 393                             (?:www\.)?invidious\.toot\.koeln/|
 394                             (?:www\.)?invidious\.fdn\.fr/|
 395                             (?:www\.)?watch\.nettohikari\.com/|
 396                             (?:www\.)?kgg2m7yk5aybusll\.onion/|
 397                             (?:www\.)?qklhadlycap4cnod\.onion/|
 398                             (?:www\.)?axqzx4s6s54s32yentfqojs3x5i7faxza6xo3ehd4bzzsg2ii4fv2iid\.onion/|
 399                             (?:www\.)?c7hqkpkpemu6e7emz5b4vyz7idjgdvgaaa3dyimmeojqbgpea3xqjoid\.onion/|
 400                             (?:www\.)?fz253lmuao3strwbfbmx46yu7acac2jz27iwtorgmbqlkurlclmancad\.onion/|
 401                             (?:www\.)?invidious\.l4qlywnpwqsluw65ts7md3khrivpirse744un3x7mlskqauz5pyuzgqd\.onion/|
 402                             (?:www\.)?owxfohz4kjyv25fvlqilyxast7inivgiktls3th44jhk3ej3i7ya\.b32\.i2p/|
 403                             (?:www\.)?4l2dgddgsrkf2ous66i6seeyi6etzfgrue332grh2n7madpwopotugyd\.onion/|
 404                             youtube\.googleapis\.com/)                        # the various hostnames, with wildcard subdomains
 405                          (?:.*?\#/)?                                          # handle anchor (#/) redirect urls
 406                          (?:                                                  # the various things that can precede the ID:
 407                              (?:(?:v|embed|e)/(?!videoseries))                # v/ or embed/ or e/
 408                              |(?:                                             # or the v= param in all its forms
 409                                  (?:(?:watch|movie)(?:_popup)?(?:\.php)?/?)?  # preceding watch(_popup|.php) or nothing (like /?v=xxxx)
 410                                  (?:\?|\#!?)                                  # the params delimiter ? or # or #!
 411                                  (?:.*?[&;])??                                # any other preceding param (like /?s=tuff&v=xxxx or ?s=tuff&amp;v=V36LpHqtcDY)
 412                                  v=
 413                              )
 414                          ))
 415                          |(?:
 416                             youtu\.be|                                        # just youtu.be/xxxx
 417                             vid\.plus|                                        # or vid.plus/xxxx
 418                             zwearz\.com/watch|                                # or zwearz.com/watch/xxxx
 419                          )/
 420                          |(?:www\.)?cleanvideosearch\.com/media/action/yt/watch\?videoId=
 421                          )
 422                      )?                                                       # all until now is optional -> you can pass the naked ID
 423                      (?P<id>[0-9A-Za-z_-]{11})                                      # here is it! the YouTube video ID
 424                      (?!.*?\blist=
 425                         (?:
 426                             %(playlist_id)s|                                  # combined list/video URLs are handled by the playlist IE
 427                             WL                                                # WL are handled by the watch later IE
 428                         )
 429                      )
 430                      (?(1).+)?                                                # if we found the ID, everything can follow
 431                      $""" % {'playlist_id': YoutubeBaseInfoExtractor._PLAYLIST_ID_RE}
 432     _NEXT_URL_RE = r'[\?&]next_url=([^&]+)'
 433     _PLAYER_INFO_RE = (
 434         r'/(?P<id>[a-zA-Z0-9_-]{8,})/player_ias\.vflset(?:/[a-zA-Z]{2,3}_[a-zA-Z]{2,3})?/base\.(?P<ext>[a-z]+)$',
 435         r'\b(?P<id>vfl[a-zA-Z0-9_-]+)\b.*?\.(?P<ext>[a-z]+)$',
 436     )
 437     _formats = {
 438         '5': {'ext': 'flv', 'width': 400, 'height': 240, 'acodec': 'mp3', 'abr': 64, 'vcodec': 'h263'},
 439         '6': {'ext': 'flv', 'width': 450, 'height': 270, 'acodec': 'mp3', 'abr': 64, 'vcodec': 'h263'},
 440         '13': {'ext': '3gp', 'acodec': 'aac', 'vcodec': 'mp4v'},
 441         '17': {'ext': '3gp', 'width': 176, 'height': 144, 'acodec': 'aac', 'abr': 24, 'vcodec': 'mp4v'},
 442         '18': {'ext': 'mp4', 'width': 640, 'height': 360, 'acodec': 'aac', 'abr': 96, 'vcodec': 'h264'},
 443         '22': {'ext': 'mp4', 'width': 1280, 'height': 720, 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264'},
 444         '34': {'ext': 'flv', 'width': 640, 'height': 360, 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264'},
 445         '35': {'ext': 'flv', 'width': 854, 'height': 480, 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264'},
 446         # itag 36 videos are either 320x180 (BaW_jenozKc) or 320x240 (__2ABJjxzNo), abr varies as well
 447         '36': {'ext': '3gp', 'width': 320, 'acodec': 'aac', 'vcodec': 'mp4v'},
 448         '37': {'ext': 'mp4', 'width': 1920, 'height': 1080, 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264'},
 449         '38': {'ext': 'mp4', 'width': 4096, 'height': 3072, 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264'},
 450         '43': {'ext': 'webm', 'width': 640, 'height': 360, 'acodec': 'vorbis', 'abr': 128, 'vcodec': 'vp8'},
 451         '44': {'ext': 'webm', 'width': 854, 'height': 480, 'acodec': 'vorbis', 'abr': 128, 'vcodec': 'vp8'},
 452         '45': {'ext': 'webm', 'width': 1280, 'height': 720, 'acodec': 'vorbis', 'abr': 192, 'vcodec': 'vp8'},
 453         '46': {'ext': 'webm', 'width': 1920, 'height': 1080, 'acodec': 'vorbis', 'abr': 192, 'vcodec': 'vp8'},
 454         '59': {'ext': 'mp4', 'width': 854, 'height': 480, 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264'},
 455         '78': {'ext': 'mp4', 'width': 854, 'height': 480, 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264'},
 456
 457
 458         # 3D videos
 459         '82': {'ext': 'mp4', 'height': 360, 'format_note': '3D', 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264', 'preference': -20},
 460         '83': {'ext': 'mp4', 'height': 480, 'format_note': '3D', 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264', 'preference': -20},
 461         '84': {'ext': 'mp4', 'height': 720, 'format_note': '3D', 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264', 'preference': -20},
 462         '85': {'ext': 'mp4', 'height': 1080, 'format_note': '3D', 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264', 'preference': -20},
 463         '100': {'ext': 'webm', 'height': 360, 'format_note': '3D', 'acodec': 'vorbis', 'abr': 128, 'vcodec': 'vp8', 'preference': -20},
 464         '101': {'ext': 'webm', 'height': 480, 'format_note': '3D', 'acodec': 'vorbis', 'abr': 192, 'vcodec': 'vp8', 'preference': -20},
 465         '102': {'ext': 'webm', 'height': 720, 'format_note': '3D', 'acodec': 'vorbis', 'abr': 192, 'vcodec': 'vp8', 'preference': -20},
 466
 467         # Apple HTTP Live Streaming
 468         '91': {'ext': 'mp4', 'height': 144, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 48, 'vcodec': 'h264', 'preference': -10},
 469         '92': {'ext': 'mp4', 'height': 240, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 48, 'vcodec': 'h264', 'preference': -10},
 470         '93': {'ext': 'mp4', 'height': 360, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264', 'preference': -10},
 471         '94': {'ext': 'mp4', 'height': 480, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264', 'preference': -10},
 472         '95': {'ext': 'mp4', 'height': 720, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 256, 'vcodec': 'h264', 'preference': -10},
 473         '96': {'ext': 'mp4', 'height': 1080, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 256, 'vcodec': 'h264', 'preference': -10},
 474         '132': {'ext': 'mp4', 'height': 240, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 48, 'vcodec': 'h264', 'preference': -10},
 475         '151': {'ext': 'mp4', 'height': 72, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 24, 'vcodec': 'h264', 'preference': -10},
 476
 477         # DASH mp4 video
 478         '133': {'ext': 'mp4', 'height': 240, 'format_note': 'DASH video', 'vcodec': 'h264'},
 479         '134': {'ext': 'mp4', 'height': 360, 'format_note': 'DASH video', 'vcodec': 'h264'},
 480         '135': {'ext': 'mp4', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'h264'},
 481         '136': {'ext': 'mp4', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'h264'},
 482         '137': {'ext': 'mp4', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'h264'},
 483         '138': {'ext': 'mp4', 'format_note': 'DASH video', 'vcodec': 'h264'},  # Height can vary (https://github.com/ytdl-org/youtube-dl/issues/4559)
 484         '160': {'ext': 'mp4', 'height': 144, 'format_note': 'DASH video', 'vcodec': 'h264'},
 485         '212': {'ext': 'mp4', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'h264'},
 486         '264': {'ext': 'mp4', 'height': 1440, 'format_note': 'DASH video', 'vcodec': 'h264'},
 487         '298': {'ext': 'mp4', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'h264', 'fps': 60},
 488         '299': {'ext': 'mp4', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'h264', 'fps': 60},
 489         '266': {'ext': 'mp4', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'h264'},
 490
 491         # Dash mp4 audio
 492         '139': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'abr': 48, 'container': 'm4a_dash'},
 493         '140': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'abr': 128, 'container': 'm4a_dash'},
 494         '141': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'abr': 256, 'container': 'm4a_dash'},
 495         '256': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'container': 'm4a_dash'},
 496         '258': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'container': 'm4a_dash'},
 497         '325': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'dtse', 'container': 'm4a_dash'},
 498         '328': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'ec-3', 'container': 'm4a_dash'},
 499
 500         # Dash webm
 501         '167': {'ext': 'webm', 'height': 360, 'width': 640, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
 502         '168': {'ext': 'webm', 'height': 480, 'width': 854, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
 503         '169': {'ext': 'webm', 'height': 720, 'width': 1280, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
 504         '170': {'ext': 'webm', 'height': 1080, 'width': 1920, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
 505         '218': {'ext': 'webm', 'height': 480, 'width': 854, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
 506         '219': {'ext': 'webm', 'height': 480, 'width': 854, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
 507         '278': {'ext': 'webm', 'height': 144, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp9'},
 508         '242': {'ext': 'webm', 'height': 240, 'format_note': 'DASH video', 'vcodec': 'vp9'},
 509         '243': {'ext': 'webm', 'height': 360, 'format_note': 'DASH video', 'vcodec': 'vp9'},
 510         '244': {'ext': 'webm', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'vp9'},
 511         '245': {'ext': 'webm', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'vp9'},
 512         '246': {'ext': 'webm', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'vp9'},
 513         '247': {'ext': 'webm', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'vp9'},
 514         '248': {'ext': 'webm', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'vp9'},
 515         '271': {'ext': 'webm', 'height': 1440, 'format_note': 'DASH video', 'vcodec': 'vp9'},
 516         # itag 272 videos are either 3840x2160 (e.g. RtoitU2A-3E) or 7680x4320 (sLprVF6d7Ug)
 517         '272': {'ext': 'webm', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'vp9'},
 518         '302': {'ext': 'webm', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'vp9', 'fps': 60},
 519         '303': {'ext': 'webm', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'vp9', 'fps': 60},
 520         '308': {'ext': 'webm', 'height': 1440, 'format_note': 'DASH video', 'vcodec': 'vp9', 'fps': 60},
 521         '313': {'ext': 'webm', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'vp9'},
 522         '315': {'ext': 'webm', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'vp9', 'fps': 60},
 523
 524         # Dash webm audio
 525         '171': {'ext': 'webm', 'acodec': 'vorbis', 'format_note': 'DASH audio', 'abr': 128},
 526         '172': {'ext': 'webm', 'acodec': 'vorbis', 'format_note': 'DASH audio', 'abr': 256},
 527
 528         # Dash webm audio with opus inside
 529         '249': {'ext': 'webm', 'format_note': 'DASH audio', 'acodec': 'opus', 'abr': 50},
 530         '250': {'ext': 'webm', 'format_note': 'DASH audio', 'acodec': 'opus', 'abr': 70},
 531         '251': {'ext': 'webm', 'format_note': 'DASH audio', 'acodec': 'opus', 'abr': 160},
 532
 533         # RTMP (unnamed)
 534         '_rtmp': {'protocol': 'rtmp'},
 535
 536         # av01 video only formats sometimes served with "unknown" codecs
 537         '394': {'acodec': 'none', 'vcodec': 'av01.0.05M.08'},
 538         '395': {'acodec': 'none', 'vcodec': 'av01.0.05M.08'},
 539         '396': {'acodec': 'none', 'vcodec': 'av01.0.05M.08'},
 540         '397': {'acodec': 'none', 'vcodec': 'av01.0.05M.08'},
 541     }
 542     _SUBTITLE_FORMATS = ('json3', 'srv1', 'srv2', 'srv3', 'ttml', 'vtt')
 543
 544     _GEO_BYPASS = False
 545
 546     IE_NAME = 'youtube'
 547     _TESTS = [
 548         {
 549             'url': 'https://www.youtube.com/watch?v=BaW_jenozKc&t=1s&end=9',
 550             'info_dict': {
 551                 'id': 'BaW_jenozKc',
 552                 'ext': 'mp4',
 553                 'title': 'youtube-dl test video "\'/\\ä↭𝕐',
 554                 'uploader': 'Philipp Hagemeister',
 555                 'uploader_id': 'phihag',
 556                 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/phihag',
 557                 'channel_id': 'UCLqxVugv74EIW3VWh2NOa3Q',
 558                 'channel_url': r're:https?://(?:www\.)?youtube\.com/channel/UCLqxVugv74EIW3VWh2NOa3Q',
 559                 'upload_date': '20121002',
 560                 'description': 'test chars:  "\'/\\ä↭𝕐\ntest URL: https://github.com/rg3/youtube-dl/issues/1892\n\nThis is a test video for youtube-dl.\n\nFor more information, contact phihag@phihag.de .',
 561                 'categories': ['Science & Technology'],
 562                 'tags': ['youtube-dl'],
 563                 'duration': 10,
 564                 'view_count': int,
 565                 'like_count': int,
 566                 'dislike_count': int,
 567                 'start_time': 1,
 568                 'end_time': 9,
 569             }
 570         },
 571         {
 572             'url': '//www.YouTube.com/watch?v=yZIXLfi8CZQ',
 573             'note': 'Embed-only video (#1746)',
 574             'info_dict': {
 575                 'id': 'yZIXLfi8CZQ',
 576                 'ext': 'mp4',
 577                 'upload_date': '20120608',
 578                 'title': 'Principal Sexually Assaults A Teacher - Episode 117 - 8th June 2012',
 579                 'description': 'md5:09b78bd971f1e3e289601dfba15ca4f7',
 580                 'uploader': 'SET India',
 581                 'uploader_id': 'setindia',
 582                 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/setindia',
 583                 'age_limit': 18,
 584             }
 585         },
 586         {
 587             'url': 'https://www.youtube.com/watch?v=BaW_jenozKc&v=yZIXLfi8CZQ',
 588             'note': 'Use the first video ID in the URL',
 589             'info_dict': {
 590                 'id': 'BaW_jenozKc',
 591                 'ext': 'mp4',
 592                 'title': 'youtube-dl test video "\'/\\ä↭𝕐',
 593                 'uploader': 'Philipp Hagemeister',
 594                 'uploader_id': 'phihag',
 595                 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/phihag',
 596                 'upload_date': '20121002',
 597                 'description': 'test chars:  "\'/\\ä↭𝕐\ntest URL: https://github.com/rg3/youtube-dl/issues/1892\n\nThis is a test video for youtube-dl.\n\nFor more information, contact phihag@phihag.de .',
 598                 'categories': ['Science & Technology'],
 599                 'tags': ['youtube-dl'],
 600                 'duration': 10,
 601                 'view_count': int,
 602                 'like_count': int,
 603                 'dislike_count': int,
 604             },
 605             'params': {
 606                 'skip_download': True,
 607             },
 608         },
 609         {
 610             'url': 'https://www.youtube.com/watch?v=a9LDPn-MO4I',
 611             'note': '256k DASH audio (format 141) via DASH manifest',
 612             'info_dict': {
 613                 'id': 'a9LDPn-MO4I',
 614                 'ext': 'm4a',
 615                 'upload_date': '20121002',
 616                 'uploader_id': '8KVIDEO',
 617                 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/8KVIDEO',
 618                 'description': '',
 619                 'uploader': '8KVIDEO',
 620                 'title': 'UHDTV TEST 8K VIDEO.mp4'
 621             },
 622             'params': {
 623                 'youtube_include_dash_manifest': True,
 624                 'format': '141',
 625             },
 626             'skip': 'format 141 not served anymore',
 627         },
 628         # DASH manifest with encrypted signature
 629         {
 630             'url': 'https://www.youtube.com/watch?v=IB3lcPjvWLA',
 631             'info_dict': {
 632                 'id': 'IB3lcPjvWLA',
 633                 'ext': 'm4a',
 634                 'title': 'Afrojack, Spree Wilson - The Spark (Official Music Video) ft. Spree Wilson',
 635                 'description': 'md5:8f5e2b82460520b619ccac1f509d43bf',
 636                 'duration': 244,
 637                 'uploader': 'AfrojackVEVO',
 638                 'uploader_id': 'AfrojackVEVO',
 639                 'upload_date': '20131011',
 640             },
 641             'params': {
 642                 'youtube_include_dash_manifest': True,
 643                 'format': '141/bestaudio[ext=m4a]',
 644             },
 645         },
 646         # Controversy video
 647         {
 648             'url': 'https://www.youtube.com/watch?v=T4XJQO3qol8',
 649             'info_dict': {
 650                 'id': 'T4XJQO3qol8',
 651                 'ext': 'mp4',
 652                 'duration': 219,
 653                 'upload_date': '20100909',
 654                 'uploader': 'Amazing Atheist',
 655                 'uploader_id': 'TheAmazingAtheist',
 656                 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/TheAmazingAtheist',
 657                 'title': 'Burning Everyone\'s Koran',
 658                 'description': 'SUBSCRIBE: http://www.youtube.com/saturninefilms\n\nEven Obama has taken a stand against freedom on this issue: http://www.huffingtonpost.com/2010/09/09/obama-gma-interview-quran_n_710282.html',
 659             }
 660         },
 661         # Normal age-gate video (embed allowed)
 662         {
 663             'url': 'https://youtube.com/watch?v=HtVdAasjOgU',
 664             'info_dict': {
 665                 'id': 'HtVdAasjOgU',
 666                 'ext': 'mp4',
 667                 'title': 'The Witcher 3: Wild Hunt - The Sword Of Destiny Trailer',
 668                 'description': r're:(?s).{100,}About the Game\n.*?The Witcher 3: Wild Hunt.{100,}',
 669                 'duration': 142,
 670                 'uploader': 'The Witcher',
 671                 'uploader_id': 'WitcherGame',
 672                 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/WitcherGame',
 673                 'upload_date': '20140605',
 674                 'age_limit': 18,
 675             },
 676         },
 677         # video_info is None (https://github.com/ytdl-org/youtube-dl/issues/4421)
 678         # YouTube Red ad is not captured for creator
 679         {
 680             'url': '__2ABJjxzNo',
 681             'info_dict': {
 682                 'id': '__2ABJjxzNo',
 683                 'ext': 'mp4',
 684                 'duration': 266,
 685                 'upload_date': '20100430',
 686                 'uploader_id': 'deadmau5',
 687                 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/deadmau5',
 688                 'creator': 'Dada Life, deadmau5',
 689                 'description': 'md5:12c56784b8032162bb936a5f76d55360',
 690                 'uploader': 'deadmau5',
 691                 'title': 'Deadmau5 - Some Chords (HD)',
 692                 'alt_title': 'This Machine Kills Some Chords',
 693             },
 694             'expected_warnings': [
 695                 'DASH manifest missing',
 696             ]
 697         },
 698         # Olympics (https://github.com/ytdl-org/youtube-dl/issues/4431)
 699         {
 700             'url': 'lqQg6PlCWgI',
 701             'info_dict': {
 702                 'id': 'lqQg6PlCWgI',
 703                 'ext': 'mp4',
 704                 'duration': 6085,
 705                 'upload_date': '20150827',
 706                 'uploader_id': 'olympic',
 707                 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/olympic',
 708                 'description': 'HO09  - Women -  GER-AUS - Hockey - 31 July 2012 - London 2012 Olympic Games',
 709                 'uploader': 'Olympic',
 710                 'title': 'Hockey - Women -  GER-AUS - London 2012 Olympic Games',
 711             },
 712             'params': {
 713                 'skip_download': 'requires avconv',
 714             }
 715         },
 716         # Non-square pixels
 717         {
 718             'url': 'https://www.youtube.com/watch?v=_b-2C3KPAM0',
 719             'info_dict': {
 720                 'id': '_b-2C3KPAM0',
 721                 'ext': 'mp4',
 722                 'stretched_ratio': 16 / 9.,
 723                 'duration': 85,
 724                 'upload_date': '20110310',
 725                 'uploader_id': 'AllenMeow',
 726                 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/AllenMeow',
 727                 'description': 'made by Wacom from Korea | 字幕&加油添醋 by TY\'s Allen | 感謝heylisa00cavey1001同學熱情提供梗及翻譯',
 728                 'uploader': '孫ᄋᄅ',
 729                 'title': '[A-made] 變態妍字幕版 太妍 我就是這樣的人',
 730             },
 731         },
 732         # url_encoded_fmt_stream_map is empty string
 733         {
 734             'url': 'qEJwOuvDf7I',
 735             'info_dict': {
 736                 'id': 'qEJwOuvDf7I',
 737                 'ext': 'webm',
 738                 'title': 'Обсуждение судебной практики по выборам 14 сентября 2014 года в Санкт-Петербурге',
 739                 'description': '',
 740                 'upload_date': '20150404',
 741                 'uploader_id': 'spbelect',
 742                 'uploader': 'Наблюдатели Петербурга',
 743             },
 744             'params': {
 745                 'skip_download': 'requires avconv',
 746             },
 747             'skip': 'This live event has ended.',
 748         },
 749         # Extraction from multiple DASH manifests (https://github.com/ytdl-org/youtube-dl/pull/6097)
 750         {
 751             'url': 'https://www.youtube.com/watch?v=FIl7x6_3R5Y',
 752             'info_dict': {
 753                 'id': 'FIl7x6_3R5Y',
 754                 'ext': 'webm',
 755                 'title': 'md5:7b81415841e02ecd4313668cde88737a',
 756                 'description': 'md5:116377fd2963b81ec4ce64b542173306',
 757                 'duration': 220,
 758                 'upload_date': '20150625',
 759                 'uploader_id': 'dorappi2000',
 760                 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/dorappi2000',
 761                 'uploader': 'dorappi2000',
 762                 'formats': 'mincount:31',
 763             },
 764             'skip': 'not actual anymore',
 765         },
 766         # DASH manifest with segment_list
 767         {
 768             'url': 'https://www.youtube.com/embed/CsmdDsKjzN8',
 769             'md5': '8ce563a1d667b599d21064e982ab9e31',
 770             'info_dict': {
 771                 'id': 'CsmdDsKjzN8',
 772                 'ext': 'mp4',
 773                 'upload_date': '20150501',  # According to '<meta itemprop="datePublished"', but in other places it's 20150510
 774                 'uploader': 'Airtek',
 775                 'description': 'Retransmisión en directo de la XVIII media maratón de Zaragoza.',
 776                 'uploader_id': 'UCzTzUmjXxxacNnL8I3m4LnQ',
 777                 'title': 'Retransmisión XVIII Media maratón Zaragoza 2015',
 778             },
 779             'params': {
 780                 'youtube_include_dash_manifest': True,
 781                 'format': '135',  # bestvideo
 782             },
 783             'skip': 'This live event has ended.',
 784         },
 785         {
 786             # Multifeed videos (multiple cameras), URL is for Main Camera
 787             'url': 'https://www.youtube.com/watch?v=jqWvoWXjCVs',
 788             'info_dict': {
 789                 'id': 'jqWvoWXjCVs',
 790                 'title': 'teamPGP: Rocket League Noob Stream',
 791                 'description': 'md5:dc7872fb300e143831327f1bae3af010',
 792             },
 793             'playlist': [{
 794                 'info_dict': {
 795                     'id': 'jqWvoWXjCVs',
 796                     'ext': 'mp4',
 797                     'title': 'teamPGP: Rocket League Noob Stream (Main Camera)',
 798                     'description': 'md5:dc7872fb300e143831327f1bae3af010',
 799                     'duration': 7335,
 800                     'upload_date': '20150721',
 801                     'uploader': 'Beer Games Beer',
 802                     'uploader_id': 'beergamesbeer',
 803                     'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/beergamesbeer',
 804                     'license': 'Standard YouTube License',
 805                 },
 806             }, {
 807                 'info_dict': {
 808                     'id': '6h8e8xoXJzg',
 809                     'ext': 'mp4',
 810                     'title': 'teamPGP: Rocket League Noob Stream (kreestuh)',
 811                     'description': 'md5:dc7872fb300e143831327f1bae3af010',
 812                     'duration': 7337,
 813                     'upload_date': '20150721',
 814                     'uploader': 'Beer Games Beer',
 815                     'uploader_id': 'beergamesbeer',
 816                     'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/beergamesbeer',
 817                     'license': 'Standard YouTube License',
 818                 },
 819             }, {
 820                 'info_dict': {
 821                     'id': 'PUOgX5z9xZw',
 822                     'ext': 'mp4',
 823                     'title': 'teamPGP: Rocket League Noob Stream (grizzle)',
 824                     'description': 'md5:dc7872fb300e143831327f1bae3af010',
 825                     'duration': 7337,
 826                     'upload_date': '20150721',
 827                     'uploader': 'Beer Games Beer',
 828                     'uploader_id': 'beergamesbeer',
 829                     'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/beergamesbeer',
 830                     'license': 'Standard YouTube License',
 831                 },
 832             }, {
 833                 'info_dict': {
 834                     'id': 'teuwxikvS5k',
 835                     'ext': 'mp4',
 836                     'title': 'teamPGP: Rocket League Noob Stream (zim)',
 837                     'description': 'md5:dc7872fb300e143831327f1bae3af010',
 838                     'duration': 7334,
 839                     'upload_date': '20150721',
 840                     'uploader': 'Beer Games Beer',
 841                     'uploader_id': 'beergamesbeer',
 842                     'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/beergamesbeer',
 843                     'license': 'Standard YouTube License',
 844                 },
 845             }],
 846             'params': {
 847                 'skip_download': True,
 848             },
 849             'skip': 'This video is not available.',
 850         },
 851         {
 852             # Multifeed video with comma in title (see https://github.com/ytdl-org/youtube-dl/issues/8536)
 853             'url': 'https://www.youtube.com/watch?v=gVfLd0zydlo',
 854             'info_dict': {
 855                 'id': 'gVfLd0zydlo',
 856                 'title': 'DevConf.cz 2016 Day 2 Workshops 1 14:00 - 15:30',
 857             },
 858             'playlist_count': 2,
 859             'skip': 'Not multifeed anymore',
 860         },
 861         {
 862             'url': 'https://vid.plus/FlRa-iH7PGw',
 863             'only_matching': True,
 864         },
 865         {
 866             'url': 'https://zwearz.com/watch/9lWxNJF-ufM/electra-woman-dyna-girl-official-trailer-grace-helbig.html',
 867             'only_matching': True,
 868         },
 869         {
 870             # Title with JS-like syntax "};" (see https://github.com/ytdl-org/youtube-dl/issues/7468)
 871             # Also tests cut-off URL expansion in video description (see
 872             # https://github.com/ytdl-org/youtube-dl/issues/1892,
 873             # https://github.com/ytdl-org/youtube-dl/issues/8164)
 874             'url': 'https://www.youtube.com/watch?v=lsguqyKfVQg',
 875             'info_dict': {
 876                 'id': 'lsguqyKfVQg',
 877                 'ext': 'mp4',
 878                 'title': '{dark walk}; Loki/AC/Dishonored; collab w/Elflover21',
 879                 'alt_title': 'Dark Walk - Position Music',
 880                 'description': 'md5:8085699c11dc3f597ce0410b0dcbb34a',
 881                 'duration': 133,
 882                 'upload_date': '20151119',
 883                 'uploader_id': 'IronSoulElf',
 884                 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/IronSoulElf',
 885                 'uploader': 'IronSoulElf',
 886                 'creator': 'Todd Haberman,  Daniel Law Heath and Aaron Kaplan',
 887                 'track': 'Dark Walk - Position Music',
 888                 'artist': 'Todd Haberman,  Daniel Law Heath and Aaron Kaplan',
 889                 'album': 'Position Music - Production Music Vol. 143 - Dark Walk',
 890             },
 891             'params': {
 892                 'skip_download': True,
 893             },
 894         },
 895         {
 896             # Tags with '};' (see https://github.com/ytdl-org/youtube-dl/issues/7468)
 897             'url': 'https://www.youtube.com/watch?v=Ms7iBXnlUO8',
 898             'only_matching': True,
 899         },
 900         {
 901             # Video with yt:stretch=17:0
 902             'url': 'https://www.youtube.com/watch?v=Q39EVAstoRM',
 903             'info_dict': {
 904                 'id': 'Q39EVAstoRM',
 905                 'ext': 'mp4',
 906                 'title': 'Clash Of Clans#14 Dicas De Ataque Para CV 4',
 907                 'description': 'md5:ee18a25c350637c8faff806845bddee9',
 908                 'upload_date': '20151107',
 909                 'uploader_id': 'UCCr7TALkRbo3EtFzETQF1LA',
 910                 'uploader': 'CH GAMER DROID',
 911             },
 912             'params': {
 913                 'skip_download': True,
 914             },
 915             'skip': 'This video does not exist.',
 916         },
 917         {
 918             # Video licensed under Creative Commons
 919             'url': 'https://www.youtube.com/watch?v=M4gD1WSo5mA',
 920             'info_dict': {
 921                 'id': 'M4gD1WSo5mA',
 922                 'ext': 'mp4',
 923                 'title': 'md5:e41008789470fc2533a3252216f1c1d1',
 924                 'description': 'md5:a677553cf0840649b731a3024aeff4cc',
 925                 'duration': 721,
 926                 'upload_date': '20150127',
 927                 'uploader_id': 'BerkmanCenter',
 928                 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/BerkmanCenter',
 929                 'uploader': 'The Berkman Klein Center for Internet & Society',
 930                 'license': 'Creative Commons Attribution license (reuse allowed)',
 931             },
 932             'params': {
 933                 'skip_download': True,
 934             },
 935         },
 936         {
 937             # Channel-like uploader_url
 938             'url': 'https://www.youtube.com/watch?v=eQcmzGIKrzg',
 939             'info_dict': {
 940                 'id': 'eQcmzGIKrzg',
 941                 'ext': 'mp4',
 942                 'title': 'Democratic Socialism and Foreign Policy | Bernie Sanders',
 943                 'description': 'md5:dda0d780d5a6e120758d1711d062a867',
 944                 'duration': 4060,
 945                 'upload_date': '20151119',
 946                 'uploader': 'Bernie Sanders',
 947                 'uploader_id': 'UCH1dpzjCEiGAt8CXkryhkZg',
 948                 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UCH1dpzjCEiGAt8CXkryhkZg',
 949                 'license': 'Creative Commons Attribution license (reuse allowed)',
 950             },
 951             'params': {
 952                 'skip_download': True,
 953             },
 954         },
 955         {
 956             'url': 'https://www.youtube.com/watch?feature=player_embedded&amp;amp;v=V36LpHqtcDY',
 957             'only_matching': True,
 958         },
 959         {
 960             # YouTube Red paid video (https://github.com/ytdl-org/youtube-dl/issues/10059)
 961             'url': 'https://www.youtube.com/watch?v=i1Ko8UG-Tdo',
 962             'only_matching': True,
 963         },
 964         {
 965             # Rental video preview
 966             'url': 'https://www.youtube.com/watch?v=yYr8q0y5Jfg',
 967             'info_dict': {
 968                 'id': 'uGpuVWrhIzE',
 969                 'ext': 'mp4',
 970                 'title': 'Piku - Trailer',
 971                 'description': 'md5:c36bd60c3fd6f1954086c083c72092eb',
 972                 'upload_date': '20150811',
 973                 'uploader': 'FlixMatrix',
 974                 'uploader_id': 'FlixMatrixKaravan',
 975                 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/FlixMatrixKaravan',
 976                 'license': 'Standard YouTube License',
 977             },
 978             'params': {
 979                 'skip_download': True,
 980             },
 981             'skip': 'This video is not available.',
 982         },
 983         {
 984             # YouTube Red video with episode data
 985             'url': 'https://www.youtube.com/watch?v=iqKdEhx-dD4',
 986             'info_dict': {
 987                 'id': 'iqKdEhx-dD4',
 988                 'ext': 'mp4',
 989                 'title': 'Isolation - Mind Field (Ep 1)',
 990                 'description': 'md5:46a29be4ceffa65b92d277b93f463c0f',
 991                 'duration': 2085,
 992                 'upload_date': '20170118',
 993                 'uploader': 'Vsauce',
 994                 'uploader_id': 'Vsauce',
 995                 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/Vsauce',
 996                 'series': 'Mind Field',
 997                 'season_number': 1,
 998                 'episode_number': 1,
 999             },
1000             'params': {
1001                 'skip_download': True,
1002             },
1003             'expected_warnings': [
1004                 'Skipping DASH manifest',
1005             ],
1006         },
1007         {
1008             # The following content has been identified by the YouTube community
1009             # as inappropriate or offensive to some audiences.
1010             'url': 'https://www.youtube.com/watch?v=6SJNVb0GnPI',
1011             'info_dict': {
1012                 'id': '6SJNVb0GnPI',
1013                 'ext': 'mp4',
1014                 'title': 'Race Differences in Intelligence',
1015                 'description': 'md5:5d161533167390427a1f8ee89a1fc6f1',
1016                 'duration': 965,
1017                 'upload_date': '20140124',
1018                 'uploader': 'New Century Foundation',
1019                 'uploader_id': 'UCEJYpZGqgUob0zVVEaLhvVg',
1020                 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UCEJYpZGqgUob0zVVEaLhvVg',
1021             },
1022             'params': {
1023                 'skip_download': True,
1024             },
1025         },
1026         {
1027             # itag 212
1028             'url': '1t24XAntNCY',
1029             'only_matching': True,
1030         },
1031         {
1032             # geo restricted to JP
1033             'url': 'sJL6WA-aGkQ',
1034             'only_matching': True,
1035         },
1036         {
1037             'url': 'https://invidio.us/watch?v=BaW_jenozKc',
1038             'only_matching': True,
1039         },
1040         {
1041             # DRM protected
1042             'url': 'https://www.youtube.com/watch?v=s7_qI6_mIXc',
1043             'only_matching': True,
1044         },
1045         {
1046             # Video with unsupported adaptive stream type formats
1047             'url': 'https://www.youtube.com/watch?v=Z4Vy8R84T1U',
1048             'info_dict': {
1049                 'id': 'Z4Vy8R84T1U',
1050                 'ext': 'mp4',
1051                 'title': 'saman SMAN 53 Jakarta(Sancety) opening COFFEE4th at SMAN 53 Jakarta',
1052                 'description': 'md5:d41d8cd98f00b204e9800998ecf8427e',
1053                 'duration': 433,
1054                 'upload_date': '20130923',
1055                 'uploader': 'Amelia Putri Harwita',
1056                 'uploader_id': 'UCpOxM49HJxmC1qCalXyB3_Q',
1057                 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UCpOxM49HJxmC1qCalXyB3_Q',
1058                 'formats': 'maxcount:10',
1059             },
1060             'params': {
1061                 'skip_download': True,
1062                 'youtube_include_dash_manifest': False,
1063             },
1064             'skip': 'not actual anymore',
1065         },
1066         {
1067             # Youtube Music Auto-generated description
1068             'url': 'https://music.youtube.com/watch?v=MgNrAu2pzNs',
1069             'info_dict': {
1070                 'id': 'MgNrAu2pzNs',
1071                 'ext': 'mp4',
1072                 'title': 'Voyeur Girl',
1073                 'description': 'md5:7ae382a65843d6df2685993e90a8628f',
1074                 'upload_date': '20190312',
1075                 'uploader': 'Stephen - Topic',
1076                 'uploader_id': 'UC-pWHpBjdGG69N9mM2auIAA',
1077                 'artist': 'Stephen',
1078                 'track': 'Voyeur Girl',
1079                 'album': 'it\'s too much love to know my dear',
1080                 'release_date': '20190313',
1081                 'release_year': 2019,
1082             },
1083             'params': {
1084                 'skip_download': True,
1085             },
1086         },
1087         {
1088             'url': 'https://www.youtubekids.com/watch?v=3b8nCWDgZ6Q',
1089             'only_matching': True,
1090         },
1091         {
1092             # invalid -> valid video id redirection
1093             'url': 'DJztXj2GPfl',
1094             'info_dict': {
1095                 'id': 'DJztXj2GPfk',
1096                 'ext': 'mp4',
1097                 'title': 'Panjabi MC - Mundian To Bach Ke (The Dictator Soundtrack)',
1098                 'description': 'md5:bf577a41da97918e94fa9798d9228825',
1099                 'upload_date': '20090125',
1100                 'uploader': 'Prochorowka',
1101                 'uploader_id': 'Prochorowka',
1102                 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/Prochorowka',
1103                 'artist': 'Panjabi MC',
1104                 'track': 'Beware of the Boys (Mundian to Bach Ke) - Motivo Hi-Lectro Remix',
1105                 'album': 'Beware of the Boys (Mundian To Bach Ke)',
1106             },
1107             'params': {
1108                 'skip_download': True,
1109             },
1110         },
1111         {
1112             # empty description results in an empty string
1113             'url': 'https://www.youtube.com/watch?v=x41yOUIvK2k',
1114             'info_dict': {
1115                 'id': 'x41yOUIvK2k',
1116                 'ext': 'mp4',
1117                 'title': 'IMG 3456',
1118                 'description': '',
1119                 'upload_date': '20170613',
1120                 'uploader_id': 'ElevageOrVert',
1121                 'uploader': 'ElevageOrVert',
1122             },
1123             'params': {
1124                 'skip_download': True,
1125             },
1126         },
1127         {
1128             # with '};' inside yt initial data (see [1])
1129             # see [2] for an example with '};' inside ytInitialPlayerResponse
1130             # 1. https://github.com/ytdl-org/youtube-dl/issues/27093
1131             # 2. https://github.com/ytdl-org/youtube-dl/issues/27216
1132             'url': 'https://www.youtube.com/watch?v=CHqg6qOn4no',
1133             'info_dict': {
1134                 'id': 'CHqg6qOn4no',
1135                 'ext': 'mp4',
1136                 'title': 'Part 77   Sort a list of simple types in c#',
1137                 'description': 'md5:b8746fa52e10cdbf47997903f13b20dc',
1138                 'upload_date': '20130831',
1139                 'uploader_id': 'kudvenkat',
1140                 'uploader': 'kudvenkat',
1141             },
1142             'params': {
1143                 'skip_download': True,
1144             },
1145         },
1146         {
1147             # another example of '};' in ytInitialData
1148             'url': 'https://www.youtube.com/watch?v=gVfgbahppCY',
1149             'only_matching': True,
1150         },
1151         {
1152             'url': 'https://www.youtube.com/watch_popup?v=63RmMXCd_bQ',
1153             'only_matching': True,
1154         },
1155     ]
1156
1157     def __init__(self, *args, **kwargs):
1158         super(YoutubeIE, self).__init__(*args, **kwargs)
1159         self._player_cache = {}
1160
1161     def report_video_info_webpage_download(self, video_id):
1162         """Report attempt to download video info webpage."""
1163         self.to_screen('%s: Downloading video info webpage' % video_id)
1164
1165     def report_information_extraction(self, video_id):
1166         """Report attempt to extract video information."""
1167         self.to_screen('%s: Extracting video information' % video_id)
1168
1169     def report_unavailable_format(self, video_id, format):
1170         """Report extracted video URL."""
1171         self.to_screen('%s: Format %s not available' % (video_id, format))
1172
1173     def report_rtmp_download(self):
1174         """Indicate the download will use the RTMP protocol."""
1175         self.to_screen('RTMP download detected')
1176
1177     def _signature_cache_id(self, example_sig):
1178         """ Return a string representation of a signature """
1179         return '.'.join(compat_str(len(part)) for part in example_sig.split('.'))
1180
1181     @classmethod
1182     def _extract_player_info(cls, player_url):
1183         for player_re in cls._PLAYER_INFO_RE:
1184             id_m = re.search(player_re, player_url)
1185             if id_m:
1186                 break
1187         else:
1188             raise ExtractorError('Cannot identify player %r' % player_url)
1189         return id_m.group('ext'), id_m.group('id')
1190
1191     def _extract_signature_function(self, video_id, player_url, example_sig):
1192         player_type, player_id = self._extract_player_info(player_url)
1193
1194         # Read from filesystem cache
1195         func_id = '%s_%s_%s' % (
1196             player_type, player_id, self._signature_cache_id(example_sig))
1197         assert os.path.basename(func_id) == func_id
1198
1199         cache_spec = self._downloader.cache.load('youtube-sigfuncs', func_id)
1200         if cache_spec is not None:
1201             return lambda s: ''.join(s[i] for i in cache_spec)
1202
1203         download_note = (
1204             'Downloading player %s' % player_url
1205             if self._downloader.params.get('verbose') else
1206             'Downloading %s player %s' % (player_type, player_id)
1207         )
1208         if player_type == 'js':
1209             code = self._download_webpage(
1210                 player_url, video_id,
1211                 note=download_note,
1212                 errnote='Download of %s failed' % player_url)
1213             res = self._parse_sig_js(code)
1214         elif player_type == 'swf':
1215             urlh = self._request_webpage(
1216                 player_url, video_id,
1217                 note=download_note,
1218                 errnote='Download of %s failed' % player_url)
1219             code = urlh.read()
1220             res = self._parse_sig_swf(code)
1221         else:
1222             assert False, 'Invalid player type %r' % player_type
1223
1224         test_string = ''.join(map(compat_chr, range(len(example_sig))))
1225         cache_res = res(test_string)
1226         cache_spec = [ord(c) for c in cache_res]
1227
1228         self._downloader.cache.store('youtube-sigfuncs', func_id, cache_spec)
1229         return res
1230
1231     def _print_sig_code(self, func, example_sig):
1232         def gen_sig_code(idxs):
1233             def _genslice(start, end, step):
1234                 starts = '' if start == 0 else str(start)
1235                 ends = (':%d' % (end + step)) if end + step >= 0 else ':'
1236                 steps = '' if step == 1 else (':%d' % step)
1237                 return 's[%s%s%s]' % (starts, ends, steps)
1238
1239             step = None
1240             # Quelch pyflakes warnings - start will be set when step is set
1241             start = '(Never used)'
1242             for i, prev in zip(idxs[1:], idxs[:-1]):
1243                 if step is not None:
1244                     if i - prev == step:
1245                         continue
1246                     yield _genslice(start, prev, step)
1247                     step = None
1248                     continue
1249                 if i - prev in [-1, 1]:
1250                     step = i - prev
1251                     start = prev
1252                     continue
1253                 else:
1254                     yield 's[%d]' % prev
1255             if step is None:
1256                 yield 's[%d]' % i
1257             else:
1258                 yield _genslice(start, i, step)
1259
1260         test_string = ''.join(map(compat_chr, range(len(example_sig))))
1261         cache_res = func(test_string)
1262         cache_spec = [ord(c) for c in cache_res]
1263         expr_code = ' + '.join(gen_sig_code(cache_spec))
1264         signature_id_tuple = '(%s)' % (
1265             ', '.join(compat_str(len(p)) for p in example_sig.split('.')))
1266         code = ('if tuple(len(p) for p in s.split(\'.\')) == %s:\n'
1267                 '    return %s\n') % (signature_id_tuple, expr_code)
1268         self.to_screen('Extracted signature function:\n' + code)
1269
1270     def _parse_sig_js(self, jscode):
1271         funcname = self._search_regex(
1272             (r'\b[cs]\s*&&\s*[adf]\.set\([^,]+\s*,\s*encodeURIComponent\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\(',
1273              r'\b[a-zA-Z0-9]+\s*&&\s*[a-zA-Z0-9]+\.set\([^,]+\s*,\s*encodeURIComponent\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\(',
1274              r'(?:\b|[^a-zA-Z0-9$])(?P<sig>[a-zA-Z0-9$]{2})\s*=\s*function\(\s*a\s*\)\s*{\s*a\s*=\s*a\.split\(\s*""\s*\)',
1275              r'(?P<sig>[a-zA-Z0-9$]+)\s*=\s*function\(\s*a\s*\)\s*{\s*a\s*=\s*a\.split\(\s*""\s*\)',
1276              # Obsolete patterns
1277              r'(["\'])signature\1\s*,\s*(?P<sig>[a-zA-Z0-9$]+)\(',
1278              r'\.sig\|\|(?P<sig>[a-zA-Z0-9$]+)\(',
1279              r'yt\.akamaized\.net/\)\s*\|\|\s*.*?\s*[cs]\s*&&\s*[adf]\.set\([^,]+\s*,\s*(?:encodeURIComponent\s*\()?\s*(?P<sig>[a-zA-Z0-9$]+)\(',
1280              r'\b[cs]\s*&&\s*[adf]\.set\([^,]+\s*,\s*(?P<sig>[a-zA-Z0-9$]+)\(',
1281              r'\b[a-zA-Z0-9]+\s*&&\s*[a-zA-Z0-9]+\.set\([^,]+\s*,\s*(?P<sig>[a-zA-Z0-9$]+)\(',
1282              r'\bc\s*&&\s*a\.set\([^,]+\s*,\s*\([^)]*\)\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\(',
1283              r'\bc\s*&&\s*[a-zA-Z0-9]+\.set\([^,]+\s*,\s*\([^)]*\)\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\(',
1284              r'\bc\s*&&\s*[a-zA-Z0-9]+\.set\([^,]+\s*,\s*\([^)]*\)\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\('),
1285             jscode, 'Initial JS player signature function name', group='sig')
1286
1287         jsi = JSInterpreter(jscode)
1288         initial_function = jsi.extract_function(funcname)
1289         return lambda s: initial_function([s])
1290
1291     def _parse_sig_swf(self, file_contents):
1292         swfi = SWFInterpreter(file_contents)
1293         TARGET_CLASSNAME = 'SignatureDecipher'
1294         searched_class = swfi.extract_class(TARGET_CLASSNAME)
1295         initial_function = swfi.extract_function(searched_class, 'decipher')
1296         return lambda s: initial_function([s])
1297
1298     def _decrypt_signature(self, s, video_id, player_url, age_gate=False):
1299         """Turn the encrypted s field into a working signature"""
1300
1301         if player_url is None:
1302             raise ExtractorError('Cannot decrypt signature without player_url')
1303
1304         if player_url.startswith('//'):
1305             player_url = 'https:' + player_url
1306         elif not re.match(r'https?://', player_url):
1307             player_url = compat_urlparse.urljoin(
1308                 'https://www.youtube.com', player_url)
1309         try:
1310             player_id = (player_url, self._signature_cache_id(s))
1311             if player_id not in self._player_cache:
1312                 func = self._extract_signature_function(
1313                     video_id, player_url, s
1314                 )
1315                 self._player_cache[player_id] = func
1316             func = self._player_cache[player_id]
1317             if self._downloader.params.get('youtube_print_sig_code'):
1318                 self._print_sig_code(func, s)
1319             return func(s)
1320         except Exception as e:
1321             tb = traceback.format_exc()
1322             raise ExtractorError(
1323                 'Signature extraction failed: ' + tb, cause=e)
1324
1325     def _get_subtitles(self, video_id, webpage, has_live_chat_replay):
1326         try:
1327             subs_doc = self._download_xml(
1328                 'https://video.google.com/timedtext?hl=en&type=list&v=%s' % video_id,
1329                 video_id, note=False)
1330         except ExtractorError as err:
1331             self._downloader.report_warning('unable to download video subtitles: %s' % error_to_compat_str(err))
1332             return {}
1333
1334         sub_lang_list = {}
1335         for track in subs_doc.findall('track'):
1336             lang = track.attrib['lang_code']
1337             if lang in sub_lang_list:
1338                 continue
1339             sub_formats = []
1340             for ext in self._SUBTITLE_FORMATS:
1341                 params = compat_urllib_parse_urlencode({
1342                     'lang': lang,
1343                     'v': video_id,
1344                     'fmt': ext,
1345                     'name': track.attrib['name'].encode('utf-8'),
1346                 })
1347                 sub_formats.append({
1348                     'url': 'https://www.youtube.com/api/timedtext?' + params,
1349                     'ext': ext,
1350                 })
1351             sub_lang_list[lang] = sub_formats
1352         if has_live_chat_replay:
1353             sub_lang_list['live_chat'] = [
1354                 {
1355                     'video_id': video_id,
1356                     'ext': 'json',
1357                     'protocol': 'youtube_live_chat_replay',
1358                 },
1359             ]
1360         if not sub_lang_list:
1361             self._downloader.report_warning('video doesn\'t have subtitles')
1362             return {}
1363         return sub_lang_list
1364
1365     def _get_ytplayer_config(self, video_id, webpage):
1366         patterns = (
1367             # User data may contain arbitrary character sequences that may affect
1368             # JSON extraction with regex, e.g. when '};' is contained the second
1369             # regex won't capture the whole JSON. Yet working around by trying more
1370             # concrete regex first keeping in mind proper quoted string handling
1371             # to be implemented in future that will replace this workaround (see
1372             # https://github.com/ytdl-org/youtube-dl/issues/7468,
1373             # https://github.com/ytdl-org/youtube-dl/pull/7599)
1374             r';ytplayer\.config\s*=\s*({.+?});ytplayer',
1375             r';ytplayer\.config\s*=\s*({.+?});',
1376         )
1377         config = self._search_regex(
1378             patterns, webpage, 'ytplayer.config', default=None)
1379         if config:
1380             return self._parse_json(
1381                 uppercase_escape(config), video_id, fatal=False)
1382
1383     def _get_automatic_captions(self, video_id, player_response, player_config):
1384         """We need the webpage for getting the captions url, pass it as an
1385            argument to speed up the process."""
1386         self.to_screen('%s: Looking for automatic captions' % video_id)
1387         err_msg = 'Couldn\'t find automatic captions for %s' % video_id
1388         if not (player_response or player_config):
1389             self._downloader.report_warning(err_msg)
1390             return {}
1391         try:
1392             args = player_config.get('args') if player_config else {}
1393             caption_url = args.get('ttsurl')
1394             if caption_url:
1395                 timestamp = args['timestamp']
1396                 # We get the available subtitles
1397                 list_params = compat_urllib_parse_urlencode({
1398                     'type': 'list',
1399                     'tlangs': 1,
1400                     'asrs': 1,
1401                 })
1402                 list_url = caption_url + '&' + list_params
1403                 caption_list = self._download_xml(list_url, video_id)
1404                 original_lang_node = caption_list.find('track')
1405                 if original_lang_node is None:
1406                     self._downloader.report_warning('Video doesn\'t have automatic captions')
1407                     return {}
1408                 original_lang = original_lang_node.attrib['lang_code']
1409                 caption_kind = original_lang_node.attrib.get('kind', '')
1410
1411                 sub_lang_list = {}
1412                 for lang_node in caption_list.findall('target'):
1413                     sub_lang = lang_node.attrib['lang_code']
1414                     sub_formats = []
1415                     for ext in self._SUBTITLE_FORMATS:
1416                         params = compat_urllib_parse_urlencode({
1417                             'lang': original_lang,
1418                             'tlang': sub_lang,
1419                             'fmt': ext,
1420                             'ts': timestamp,
1421                             'kind': caption_kind,
1422                         })
1423                         sub_formats.append({
1424                             'url': caption_url + '&' + params,
1425                             'ext': ext,
1426                         })
1427                     sub_lang_list[sub_lang] = sub_formats
1428                 return sub_lang_list
1429
1430             def make_captions(sub_url, sub_langs):
1431                 parsed_sub_url = compat_urllib_parse_urlparse(sub_url)
1432                 caption_qs = compat_parse_qs(parsed_sub_url.query)
1433                 captions = {}
1434                 for sub_lang in sub_langs:
1435                     sub_formats = []
1436                     for ext in self._SUBTITLE_FORMATS:
1437                         caption_qs.update({
1438                             'tlang': [sub_lang],
1439                             'fmt': [ext],
1440                         })
1441                         sub_url = compat_urlparse.urlunparse(parsed_sub_url._replace(
1442                             query=compat_urllib_parse_urlencode(caption_qs, True)))
1443                         sub_formats.append({
1444                             'url': sub_url,
1445                             'ext': ext,
1446                         })
1447                     captions[sub_lang] = sub_formats
1448                 return captions
1449
1450             # New captions format as of 22.06.2017
1451             if player_response:
1452                 renderer = player_response['captions']['playerCaptionsTracklistRenderer']
1453                 base_url = renderer['captionTracks'][0]['baseUrl']
1454                 sub_lang_list = []
1455                 for lang in renderer['translationLanguages']:
1456                     lang_code = lang.get('languageCode')
1457                     if lang_code:
1458                         sub_lang_list.append(lang_code)
1459                 return make_captions(base_url, sub_lang_list)
1460
1461             # Some videos don't provide ttsurl but rather caption_tracks and
1462             # caption_translation_languages (e.g. 20LmZk1hakA)
1463             # Does not used anymore as of 22.06.2017
1464             caption_tracks = args['caption_tracks']
1465             caption_translation_languages = args['caption_translation_languages']
1466             caption_url = compat_parse_qs(caption_tracks.split(',')[0])['u'][0]
1467             sub_lang_list = []
1468             for lang in caption_translation_languages.split(','):
1469                 lang_qs = compat_parse_qs(compat_urllib_parse_unquote_plus(lang))
1470                 sub_lang = lang_qs.get('lc', [None])[0]
1471                 if sub_lang:
1472                     sub_lang_list.append(sub_lang)
1473             return make_captions(caption_url, sub_lang_list)
1474         # An extractor error can be raise by the download process if there are
1475         # no automatic captions but there are subtitles
1476         except (KeyError, IndexError, ExtractorError):
1477             self._downloader.report_warning(err_msg)
1478             return {}
1479
1480     def _mark_watched(self, video_id, video_info, player_response):
1481         playback_url = url_or_none(try_get(
1482             player_response,
1483             lambda x: x['playbackTracking']['videostatsPlaybackUrl']['baseUrl']) or try_get(
1484             video_info, lambda x: x['videostats_playback_base_url'][0]))
1485         if not playback_url:
1486             return
1487         parsed_playback_url = compat_urlparse.urlparse(playback_url)
1488         qs = compat_urlparse.parse_qs(parsed_playback_url.query)
1489
1490         # cpn generation algorithm is reverse engineered from base.js.
1491         # In fact it works even with dummy cpn.
1492         CPN_ALPHABET = 'abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789-_'
1493         cpn = ''.join((CPN_ALPHABET[random.randint(0, 256) & 63] for _ in range(0, 16)))
1494
1495         qs.update({
1496             'ver': ['2'],
1497             'cpn': [cpn],
1498         })
1499         playback_url = compat_urlparse.urlunparse(
1500             parsed_playback_url._replace(query=compat_urllib_parse_urlencode(qs, True)))
1501
1502         self._download_webpage(
1503             playback_url, video_id, 'Marking watched',
1504             'Unable to mark watched', fatal=False)
1505
1506     @staticmethod
1507     def _extract_urls(webpage):
1508         # Embedded YouTube player
1509         entries = [
1510             unescapeHTML(mobj.group('url'))
1511             for mobj in re.finditer(r'''(?x)
1512             (?:
1513                 <iframe[^>]+?src=|
1514                 data-video-url=|
1515                 <embed[^>]+?src=|
1516                 embedSWF\(?:\s*|
1517                 <object[^>]+data=|
1518                 new\s+SWFObject\(
1519             )
1520             (["\'])
1521                 (?P<url>(?:https?:)?//(?:www\.)?youtube(?:-nocookie)?\.com/
1522                 (?:embed|v|p)/[0-9A-Za-z_-]{11}.*?)
1523             \1''', webpage)]
1524
1525         # lazyYT YouTube embed
1526         entries.extend(list(map(
1527             unescapeHTML,
1528             re.findall(r'class="lazyYT" data-youtube-id="([^"]+)"', webpage))))
1529
1530         # Wordpress "YouTube Video Importer" plugin
1531         matches = re.findall(r'''(?x)<div[^>]+
1532             class=(?P<q1>[\'"])[^\'"]*\byvii_single_video_player\b[^\'"]*(?P=q1)[^>]+
1533             data-video_id=(?P<q2>[\'"])([^\'"]+)(?P=q2)''', webpage)
1534         entries.extend(m[-1] for m in matches)
1535
1536         return entries
1537
1538     @staticmethod
1539     def _extract_url(webpage):
1540         urls = YoutubeIE._extract_urls(webpage)
1541         return urls[0] if urls else None
1542
1543     @classmethod
1544     def extract_id(cls, url):
1545         mobj = re.match(cls._VALID_URL, url, re.VERBOSE)
1546         if mobj is None:
1547             raise ExtractorError('Invalid URL: %s' % url)
1548         video_id = mobj.group(2)
1549         return video_id
1550
1551     def _extract_chapters_from_json(self, webpage, video_id, duration):
1552         if not webpage:
1553             return
1554         data = self._extract_yt_initial_data(video_id, webpage)
1555         if not data or not isinstance(data, dict):
1556             return
1557         chapters_list = try_get(
1558             data,
1559             lambda x: x['playerOverlays']
1560                        ['playerOverlayRenderer']
1561                        ['decoratedPlayerBarRenderer']
1562                        ['decoratedPlayerBarRenderer']
1563                        ['playerBar']
1564                        ['chapteredPlayerBarRenderer']
1565                        ['chapters'],
1566             list)
1567         if not chapters_list:
1568             return
1569
1570         def chapter_time(chapter):
1571             return float_or_none(
1572                 try_get(
1573                     chapter,
1574                     lambda x: x['chapterRenderer']['timeRangeStartMillis'],
1575                     int),
1576                 scale=1000)
1577         chapters = []
1578         for next_num, chapter in enumerate(chapters_list, start=1):
1579             start_time = chapter_time(chapter)
1580             if start_time is None:
1581                 continue
1582             end_time = (chapter_time(chapters_list[next_num])
1583                         if next_num < len(chapters_list) else duration)
1584             if end_time is None:
1585                 continue
1586             title = try_get(
1587                 chapter, lambda x: x['chapterRenderer']['title']['simpleText'],
1588                 compat_str)
1589             chapters.append({
1590                 'start_time': start_time,
1591                 'end_time': end_time,
1592                 'title': title,
1593             })
1594         return chapters
1595
1596     @staticmethod
1597     def _extract_chapters_from_description(description, duration):
1598         if not description:
1599             return None
1600         chapter_lines = re.findall(
1601             r'(?:^|<br\s*/>)([^<]*<a[^>]+onclick=["\']yt\.www\.watch\.player\.seekTo[^>]+>(\d{1,2}:\d{1,2}(?::\d{1,2})?)</a>[^>]*)(?=$|<br\s*/>)',
1602             description)
1603         if not chapter_lines:
1604             return None
1605         chapters = []
1606         for next_num, (chapter_line, time_point) in enumerate(
1607                 chapter_lines, start=1):
1608             start_time = parse_duration(time_point)
1609             if start_time is None:
1610                 continue
1611             if start_time > duration:
1612                 break
1613             end_time = (duration if next_num == len(chapter_lines)
1614                         else parse_duration(chapter_lines[next_num][1]))
1615             if end_time is None:
1616                 continue
1617             if end_time > duration:
1618                 end_time = duration
1619             if start_time > end_time:
1620                 break
1621             chapter_title = re.sub(
1622                 r'<a[^>]+>[^<]+</a>', '', chapter_line).strip(' \t-')
1623             chapter_title = re.sub(r'\s+', ' ', chapter_title)
1624             chapters.append({
1625                 'start_time': start_time,
1626                 'end_time': end_time,
1627                 'title': chapter_title,
1628             })
1629         return chapters
1630
1631     def _extract_chapters(self, webpage, description, video_id, duration):
1632         return (self._extract_chapters_from_json(webpage, video_id, duration)
1633                 or self._extract_chapters_from_description(description, duration))
1634
1635     def _real_extract(self, url):
1636         url, smuggled_data = unsmuggle_url(url, {})
1637
1638         proto = (
1639             'http' if self._downloader.params.get('prefer_insecure', False)
1640             else 'https')
1641
1642         start_time = None
1643         end_time = None
1644         parsed_url = compat_urllib_parse_urlparse(url)
1645         for component in [parsed_url.fragment, parsed_url.query]:
1646             query = compat_parse_qs(component)
1647             if start_time is None and 't' in query:
1648                 start_time = parse_duration(query['t'][0])
1649             if start_time is None and 'start' in query:
1650                 start_time = parse_duration(query['start'][0])
1651             if end_time is None and 'end' in query:
1652                 end_time = parse_duration(query['end'][0])
1653
1654         # Extract original video URL from URL with redirection, like age verification, using next_url parameter
1655         mobj = re.search(self._NEXT_URL_RE, url)
1656         if mobj:
1657             url = proto + '://www.youtube.com/' + compat_urllib_parse_unquote(mobj.group(1)).lstrip('/')
1658         video_id = self.extract_id(url)
1659
1660         # Get video webpage
1661         url = proto + '://www.youtube.com/watch?v=%s&gl=US&hl=en&has_verified=1&bpctr=9999999999' % video_id
1662         video_webpage, urlh = self._download_webpage_handle(url, video_id)
1663
1664         qs = compat_parse_qs(compat_urllib_parse_urlparse(urlh.geturl()).query)
1665         video_id = qs.get('v', [None])[0] or video_id
1666
1667         # Attempt to extract SWF player URL
1668         mobj = re.search(r'swfConfig.*?"(https?:\\/\\/.*?watch.*?-.*?\.swf)"', video_webpage)
1669         if mobj is not None:
1670             player_url = re.sub(r'\\(.)', r'\1', mobj.group(1))
1671         else:
1672             player_url = None
1673
1674         dash_mpds = []
1675
1676         def add_dash_mpd(video_info):
1677             dash_mpd = video_info.get('dashmpd')
1678             if dash_mpd and dash_mpd[0] not in dash_mpds:
1679                 dash_mpds.append(dash_mpd[0])
1680
1681         def add_dash_mpd_pr(pl_response):
1682             dash_mpd = url_or_none(try_get(
1683                 pl_response, lambda x: x['streamingData']['dashManifestUrl'],
1684                 compat_str))
1685             if dash_mpd and dash_mpd not in dash_mpds:
1686                 dash_mpds.append(dash_mpd)
1687
1688         is_live = None
1689         view_count = None
1690
1691         def extract_view_count(v_info):
1692             return int_or_none(try_get(v_info, lambda x: x['view_count'][0]))
1693
1694         def extract_player_response(player_response, video_id):
1695             pl_response = str_or_none(player_response)
1696             if not pl_response:
1697                 return
1698             pl_response = self._parse_json(pl_response, video_id, fatal=False)
1699             if isinstance(pl_response, dict):
1700                 add_dash_mpd_pr(pl_response)
1701                 return pl_response
1702
1703         def extract_embedded_config(embed_webpage, video_id):
1704             embedded_config = self._search_regex(
1705                 r'setConfig\(({.*})\);',
1706                 embed_webpage, 'ytInitialData', default=None)
1707             if embedded_config:
1708                 return embedded_config
1709
1710         video_info = {}
1711         player_response = {}
1712         ytplayer_config = None
1713         embed_webpage = None
1714
1715         # Get video info
1716         if (self._og_search_property('restrictions:age', video_webpage, default=None) == '18+'
1717                 or re.search(r'player-age-gate-content">', video_webpage) is not None):
1718             cookie_keys = self._get_cookies('https://www.youtube.com').keys()
1719             age_gate = True
1720             # We simulate the access to the video from www.youtube.com/v/{video_id}
1721             # this can be viewed without login into Youtube
1722             url = proto + '://www.youtube.com/embed/%s' % video_id
1723             embed_webpage = self._download_webpage(url, video_id, 'Downloading embed webpage')
1724             ext = extract_embedded_config(embed_webpage, video_id)
1725             # playabilityStatus = re.search(r'{\\\"status\\\":\\\"(?P<playabilityStatus>[^\"]+)\\\"', ext)
1726             playable_in_embed = re.search(r'{\\\"playableInEmbed\\\":(?P<playableinEmbed>[^\,]+)', ext)
1727             if not playable_in_embed:
1728                 self.to_screen('Could not determine whether playabale in embed for video %s' % video_id)
1729                 playable_in_embed = ''
1730             else:
1731                 playable_in_embed = playable_in_embed.group('playableinEmbed')
1732             # check if video is only playable on youtube in other words not playable in embed - if so it requires auth (cookies)
1733             # if re.search(r'player-unavailable">', embed_webpage) is not None:
1734             if playable_in_embed == 'false':
1735                 '''
1736                 # TODO apply this patch when Support for Python 2.6(!) and above drops
1737                 if ({'VISITOR_INFO1_LIVE', 'HSID', 'SSID', 'SID'} <= cookie_keys
1738                         or {'VISITOR_INFO1_LIVE', '__Secure-3PSID', 'LOGIN_INFO'} <= cookie_keys):
1739                 '''
1740                 if (set(('VISITOR_INFO1_LIVE', 'HSID', 'SSID', 'SID')) <= set(cookie_keys)
1741                         or set(('VISITOR_INFO1_LIVE', '__Secure-3PSID', 'LOGIN_INFO')) <= set(cookie_keys)):
1742                     age_gate = False
1743                     # Try looking directly into the video webpage
1744                     ytplayer_config = self._get_ytplayer_config(video_id, video_webpage)
1745                     if ytplayer_config:
1746                         args = ytplayer_config.get("args")
1747                         if args is not None:
1748                             if args.get('url_encoded_fmt_stream_map') or args.get('hlsvp'):
1749                                 # Convert to the same format returned by compat_parse_qs
1750                                 video_info = dict((k, [v]) for k, v in args.items())
1751                                 add_dash_mpd(video_info)
1752                             # Rental video is not rented but preview is available (e.g.
1753                             # https://www.youtube.com/watch?v=yYr8q0y5Jfg,
1754                             # https://github.com/ytdl-org/youtube-dl/issues/10532)
1755                             if not video_info and args.get('ypc_vid'):
1756                                 return self.url_result(
1757                                     args['ypc_vid'], YoutubeIE.ie_key(), video_id=args['ypc_vid'])
1758                             if args.get('livestream') == '1' or args.get('live_playback') == 1:
1759                                 is_live = True
1760                             if not player_response:
1761                                 player_response = extract_player_response(args.get('player_response'), video_id)
1762                         elif not player_response:
1763                             player_response = ytplayer_config
1764                     if not video_info or self._downloader.params.get('youtube_include_dash_manifest', True):
1765                         add_dash_mpd_pr(player_response)
1766                 else:
1767                     raise ExtractorError('Video is age restricted and only playable on Youtube. Requires cookies!', expected=True)
1768             else:
1769                 data = compat_urllib_parse_urlencode({
1770                     'video_id': video_id,
1771                     'eurl': 'https://youtube.googleapis.com/v/' + video_id,
1772                     'sts': self._search_regex(
1773                         r'"sts"\s*:\s*(\d+)', embed_webpage, 'sts', default=''),
1774                 })
1775                 video_info_url = proto + '://www.youtube.com/get_video_info?' + data
1776                 try:
1777                     video_info_webpage = self._download_webpage(
1778                         video_info_url, video_id,
1779                         note='Refetching age-gated info webpage',
1780                         errnote='unable to download video info webpage')
1781                 except ExtractorError:
1782                     video_info_webpage = None
1783                 if video_info_webpage:
1784                     video_info = compat_parse_qs(video_info_webpage)
1785                     pl_response = video_info.get('player_response', [None])[0]
1786                     player_response = extract_player_response(pl_response, video_id)
1787                     add_dash_mpd(video_info)
1788                     view_count = extract_view_count(video_info)
1789         else:
1790             age_gate = False
1791             # Try looking directly into the video webpage
1792             ytplayer_config = self._get_ytplayer_config(video_id, video_webpage)
1793             if ytplayer_config:
1794                 args = ytplayer_config.get('args', {})
1795                 if args.get('url_encoded_fmt_stream_map') or args.get('hlsvp'):
1796                     # Convert to the same format returned by compat_parse_qs
1797                     video_info = dict((k, [v]) for k, v in args.items())
1798                     add_dash_mpd(video_info)
1799                 # Rental video is not rented but preview is available (e.g.
1800                 # https://www.youtube.com/watch?v=yYr8q0y5Jfg,
1801                 # https://github.com/ytdl-org/youtube-dl/issues/10532)
1802                 if not video_info and args.get('ypc_vid'):
1803                     return self.url_result(
1804                         args['ypc_vid'], YoutubeIE.ie_key(), video_id=args['ypc_vid'])
1805                 if args.get('livestream') == '1' or args.get('live_playback') == 1:
1806                     is_live = True
1807                 if not player_response:
1808                     player_response = extract_player_response(args.get('player_response'), video_id)
1809             if not video_info or self._downloader.params.get('youtube_include_dash_manifest', True):
1810                 add_dash_mpd_pr(player_response)
1811
1812         if not video_info and not player_response:
1813             player_response = extract_player_response(
1814                 self._search_regex(
1815                     (r'%s\s*%s' % (self._YT_INITIAL_PLAYER_RESPONSE_RE, self._YT_INITIAL_BOUNDARY_RE),
1816                      self._YT_INITIAL_PLAYER_RESPONSE_RE), video_webpage,
1817                     'initial player response', default='{}'),
1818                 video_id)
1819
1820         def extract_unavailable_message():
1821             messages = []
1822             for tag, kind in (('h1', 'message'), ('div', 'submessage')):
1823                 msg = self._html_search_regex(
1824                     r'(?s)<{tag}[^>]+id=["\']unavailable-{kind}["\'][^>]*>(.+?)</{tag}>'.format(tag=tag, kind=kind),
1825                     video_webpage, 'unavailable %s' % kind, default=None)
1826                 if msg:
1827                     messages.append(msg)
1828             if messages:
1829                 return '\n'.join(messages)
1830
1831         if not video_info and not player_response:
1832             unavailable_message = extract_unavailable_message()
1833             if not unavailable_message:
1834                 unavailable_message = 'Unable to extract video data'
1835             raise ExtractorError(
1836                 'YouTube said: %s' % unavailable_message, expected=True, video_id=video_id)
1837
1838         if not isinstance(video_info, dict):
1839             video_info = {}
1840
1841         playable_in_embed = try_get(
1842             player_response, lambda x: x['playabilityStatus']['playableInEmbed'])
1843
1844         video_details = try_get(
1845             player_response, lambda x: x['videoDetails'], dict) or {}
1846
1847         microformat = try_get(
1848             player_response, lambda x: x['microformat']['playerMicroformatRenderer'], dict) or {}
1849
1850         video_title = video_info.get('title', [None])[0] or video_details.get('title')
1851         if not video_title:
1852             self._downloader.report_warning('Unable to extract video title')
1853             video_title = '_'
1854
1855         description_original = video_description = get_element_by_id("eow-description", video_webpage)
1856         if video_description:
1857
1858             def replace_url(m):
1859                 redir_url = compat_urlparse.urljoin(url, m.group(1))
1860                 parsed_redir_url = compat_urllib_parse_urlparse(redir_url)
1861                 if re.search(r'^(?:www\.)?(?:youtube(?:-nocookie)?\.com|youtu\.be)$', parsed_redir_url.netloc) and parsed_redir_url.path == '/redirect':
1862                     qs = compat_parse_qs(parsed_redir_url.query)
1863                     q = qs.get('q')
1864                     if q and q[0]:
1865                         return q[0]
1866                 return redir_url
1867
1868             description_original = video_description = re.sub(r'''(?x)
1869                 <a\s+
1870                     (?:[a-zA-Z-]+="[^"]*"\s+)*?
1871                     (?:title|href)="([^"]+)"\s+
1872                     (?:[a-zA-Z-]+="[^"]*"\s+)*?
1873                     class="[^"]*"[^>]*>
1874                 [^<]+\.{3}\s*
1875                 </a>
1876             ''', replace_url, video_description)
1877             video_description = clean_html(video_description)
1878         else:
1879             video_description = video_details.get('shortDescription')
1880             if video_description is None:
1881                 video_description = self._html_search_meta('description', video_webpage)
1882
1883         if not smuggled_data.get('force_singlefeed', False):
1884             if not self._downloader.params.get('noplaylist'):
1885                 multifeed_metadata_list = try_get(
1886                     player_response,
1887                     lambda x: x['multicamera']['playerLegacyMulticameraRenderer']['metadataList'],
1888                     compat_str) or try_get(
1889                     video_info, lambda x: x['multifeed_metadata_list'][0], compat_str)
1890                 if multifeed_metadata_list:
1891                     entries = []
1892                     feed_ids = []
1893                     for feed in multifeed_metadata_list.split(','):
1894                         # Unquote should take place before split on comma (,) since textual
1895                         # fields may contain comma as well (see
1896                         # https://github.com/ytdl-org/youtube-dl/issues/8536)
1897                         feed_data = compat_parse_qs(compat_urllib_parse_unquote_plus(feed))
1898
1899                         def feed_entry(name):
1900                             return try_get(feed_data, lambda x: x[name][0], compat_str)
1901
1902                         feed_id = feed_entry('id')
1903                         if not feed_id:
1904                             continue
1905                         feed_title = feed_entry('title')
1906                         title = video_title
1907                         if feed_title:
1908                             title += ' (%s)' % feed_title
1909                         entries.append({
1910                             '_type': 'url_transparent',
1911                             'ie_key': 'Youtube',
1912                             'url': smuggle_url(
1913                                 '%s://www.youtube.com/watch?v=%s' % (proto, feed_data['id'][0]),
1914                                 {'force_singlefeed': True}),
1915                             'title': title,
1916                         })
1917                         feed_ids.append(feed_id)
1918                     self.to_screen(
1919                         'Downloading multifeed video (%s) - add --no-playlist to just download video %s'
1920                         % (', '.join(feed_ids), video_id))
1921                     return self.playlist_result(entries, video_id, video_title, video_description)
1922             else:
1923                 self.to_screen('Downloading just video %s because of --no-playlist' % video_id)
1924
1925         if view_count is None:
1926             view_count = extract_view_count(video_info)
1927         if view_count is None and video_details:
1928             view_count = int_or_none(video_details.get('viewCount'))
1929         if view_count is None and microformat:
1930             view_count = int_or_none(microformat.get('viewCount'))
1931
1932         if is_live is None:
1933             is_live = bool_or_none(video_details.get('isLive'))
1934
1935         has_live_chat_replay = False
1936         if not is_live:
1937             yt_initial_data = self._extract_yt_initial_data(video_id, video_webpage)
1938             try:
1939                 yt_initial_data['contents']['twoColumnWatchNextResults']['conversationBar']['liveChatRenderer']['continuations'][0]['reloadContinuationData']['continuation']
1940                 has_live_chat_replay = True
1941             except (KeyError, IndexError, TypeError):
1942                 pass
1943
1944         # Check for "rental" videos
1945         if 'ypc_video_rental_bar_text' in video_info and 'author' not in video_info:
1946             raise ExtractorError('"rental" videos not supported. See https://github.com/ytdl-org/youtube-dl/issues/359 for more information.', expected=True)
1947
1948         def _extract_filesize(media_url):
1949             return int_or_none(self._search_regex(
1950                 r'\bclen[=/](\d+)', media_url, 'filesize', default=None))
1951
1952         streaming_formats = try_get(player_response, lambda x: x['streamingData']['formats'], list) or []
1953         streaming_formats.extend(try_get(player_response, lambda x: x['streamingData']['adaptiveFormats'], list) or [])
1954
1955         if 'conn' in video_info and video_info['conn'][0].startswith('rtmp'):
1956             self.report_rtmp_download()
1957             formats = [{
1958                 'format_id': '_rtmp',
1959                 'protocol': 'rtmp',
1960                 'url': video_info['conn'][0],
1961                 'player_url': player_url,
1962             }]
1963         elif not is_live and (streaming_formats or len(video_info.get('url_encoded_fmt_stream_map', [''])[0]) >= 1 or len(video_info.get('adaptive_fmts', [''])[0]) >= 1):
1964             encoded_url_map = video_info.get('url_encoded_fmt_stream_map', [''])[0] + ',' + video_info.get('adaptive_fmts', [''])[0]
1965             if 'rtmpe%3Dyes' in encoded_url_map:
1966                 raise ExtractorError('rtmpe downloads are not supported, see https://github.com/ytdl-org/youtube-dl/issues/343 for more information.', expected=True)
1967             formats = []
1968             formats_spec = {}
1969             fmt_list = video_info.get('fmt_list', [''])[0]
1970             if fmt_list:
1971                 for fmt in fmt_list.split(','):
1972                     spec = fmt.split('/')
1973                     if len(spec) > 1:
1974                         width_height = spec[1].split('x')
1975                         if len(width_height) == 2:
1976                             formats_spec[spec[0]] = {
1977                                 'resolution': spec[1],
1978                                 'width': int_or_none(width_height[0]),
1979                                 'height': int_or_none(width_height[1]),
1980                             }
1981             for fmt in streaming_formats:
1982                 itag = str_or_none(fmt.get('itag'))
1983                 if not itag:
1984                     continue
1985                 quality = fmt.get('quality')
1986                 quality_label = fmt.get('qualityLabel') or quality
1987                 formats_spec[itag] = {
1988                     'asr': int_or_none(fmt.get('audioSampleRate')),
1989                     'filesize': int_or_none(fmt.get('contentLength')),
1990                     'format_note': quality_label,
1991                     'fps': int_or_none(fmt.get('fps')),
1992                     'height': int_or_none(fmt.get('height')),
1993                     # bitrate for itag 43 is always 2147483647
1994                     'tbr': float_or_none(fmt.get('averageBitrate') or fmt.get('bitrate'), 1000) if itag != '43' else None,
1995                     'width': int_or_none(fmt.get('width')),
1996                 }
1997
1998             for fmt in streaming_formats:
1999                 if fmt.get('drmFamilies') or fmt.get('drm_families'):
2000                     continue
2001                 url = url_or_none(fmt.get('url'))
2002
2003                 if not url:
2004                     cipher = fmt.get('cipher') or fmt.get('signatureCipher')
2005                     if not cipher:
2006                         continue
2007                     url_data = compat_parse_qs(cipher)
2008                     url = url_or_none(try_get(url_data, lambda x: x['url'][0], compat_str))
2009                     if not url:
2010                         continue
2011                 else:
2012                     cipher = None
2013                     url_data = compat_parse_qs(compat_urllib_parse_urlparse(url).query)
2014
2015                 stream_type = int_or_none(try_get(url_data, lambda x: x['stream_type'][0]))
2016                 # Unsupported FORMAT_STREAM_TYPE_OTF
2017                 if stream_type == 3:
2018                     continue
2019
2020                 format_id = fmt.get('itag') or url_data['itag'][0]
2021                 if not format_id:
2022                     continue
2023                 format_id = compat_str(format_id)
2024
2025                 if cipher:
2026                     if 's' in url_data or self._downloader.params.get('youtube_include_dash_manifest', True):
2027                         ASSETS_RE = (
2028                             r'<script[^>]+\bsrc=("[^"]+")[^>]+\bname=["\']player_ias/base',
2029                             r'"jsUrl"\s*:\s*("[^"]+")',
2030                             r'"assets":.+?"js":\s*("[^"]+")')
2031                         jsplayer_url_json = self._search_regex(
2032                             ASSETS_RE,
2033                             embed_webpage if age_gate else video_webpage,
2034                             'JS player URL (1)', default=None)
2035                         if not jsplayer_url_json and not age_gate:
2036                             # We need the embed website after all
2037                             if embed_webpage is None:
2038                                 embed_url = proto + '://www.youtube.com/embed/%s' % video_id
2039                                 embed_webpage = self._download_webpage(
2040                                     embed_url, video_id, 'Downloading embed webpage')
2041                             jsplayer_url_json = self._search_regex(
2042                                 ASSETS_RE, embed_webpage, 'JS player URL')
2043
2044                         player_url = json.loads(jsplayer_url_json)
2045                         if player_url is None:
2046                             player_url_json = self._search_regex(
2047                                 r'ytplayer\.config.*?"url"\s*:\s*("[^"]+")',
2048                                 video_webpage, 'age gate player URL')
2049                             player_url = json.loads(player_url_json)
2050
2051                     if 'sig' in url_data:
2052                         url += '&signature=' + url_data['sig'][0]
2053                     elif 's' in url_data:
2054                         encrypted_sig = url_data['s'][0]
2055
2056                         if self._downloader.params.get('verbose'):
2057                             if player_url is None:
2058                                 player_desc = 'unknown'
2059                             else:
2060                                 player_type, player_version = self._extract_player_info(player_url)
2061                                 player_desc = '%s player %s' % ('flash' if player_type == 'swf' else 'html5', player_version)
2062                             parts_sizes = self._signature_cache_id(encrypted_sig)
2063                             self.to_screen('{%s} signature length %s, %s' %
2064                                            (format_id, parts_sizes, player_desc))
2065
2066                         signature = self._decrypt_signature(
2067                             encrypted_sig, video_id, player_url, age_gate)
2068                         sp = try_get(url_data, lambda x: x['sp'][0], compat_str) or 'signature'
2069                         url += '&%s=%s' % (sp, signature)
2070                 if 'ratebypass' not in url:
2071                     url += '&ratebypass=yes'
2072
2073                 dct = {
2074                     'format_id': format_id,
2075                     'url': url,
2076                     'player_url': player_url,
2077                 }
2078                 if format_id in self._formats:
2079                     dct.update(self._formats[format_id])
2080                 if format_id in formats_spec:
2081                     dct.update(formats_spec[format_id])
2082
2083                 # Some itags are not included in DASH manifest thus corresponding formats will
2084                 # lack metadata (see https://github.com/ytdl-org/youtube-dl/pull/5993).
2085                 # Trying to extract metadata from url_encoded_fmt_stream_map entry.
2086                 mobj = re.search(r'^(?P<width>\d+)[xX](?P<height>\d+)$', url_data.get('size', [''])[0])
2087                 width, height = (int(mobj.group('width')), int(mobj.group('height'))) if mobj else (None, None)
2088
2089                 if width is None:
2090                     width = int_or_none(fmt.get('width'))
2091                 if height is None:
2092                     height = int_or_none(fmt.get('height'))
2093
2094                 filesize = int_or_none(url_data.get(
2095                     'clen', [None])[0]) or _extract_filesize(url)
2096
2097                 quality = url_data.get('quality', [None])[0] or fmt.get('quality')
2098                 quality_label = url_data.get('quality_label', [None])[0] or fmt.get('qualityLabel')
2099
2100                 tbr = (float_or_none(url_data.get('bitrate', [None])[0], 1000)
2101                        or float_or_none(fmt.get('bitrate'), 1000)) if format_id != '43' else None
2102                 fps = int_or_none(url_data.get('fps', [None])[0]) or int_or_none(fmt.get('fps'))
2103
2104                 more_fields = {
2105                     'filesize': filesize,
2106                     'tbr': tbr,
2107                     'width': width,
2108                     'height': height,
2109                     'fps': fps,
2110                     'format_note': quality_label or quality,
2111                 }
2112                 for key, value in more_fields.items():
2113                     if value:
2114                         dct[key] = value
2115                 type_ = url_data.get('type', [None])[0] or fmt.get('mimeType')
2116                 if type_:
2117                     type_split = type_.split(';')
2118                     kind_ext = type_split[0].split('/')
2119                     if len(kind_ext) == 2:
2120                         kind, _ = kind_ext
2121                         dct['ext'] = mimetype2ext(type_split[0])
2122                         if kind in ('audio', 'video'):
2123                             codecs = None
2124                             for mobj in re.finditer(
2125                                     r'(?P<key>[a-zA-Z_-]+)=(?P<quote>["\']?)(?P<val>.+?)(?P=quote)(?:;|$)', type_):
2126                                 if mobj.group('key') == 'codecs':
2127                                     codecs = mobj.group('val')
2128                                     break
2129                             if codecs:
2130                                 dct.update(parse_codecs(codecs))
2131                 if dct.get('acodec') == 'none' or dct.get('vcodec') == 'none':
2132                     dct['downloader_options'] = {
2133                         # Youtube throttles chunks >~10M
2134                         'http_chunk_size': 10485760,
2135                     }
2136                 formats.append(dct)
2137         else:
2138             manifest_url = (
2139                 url_or_none(try_get(
2140                     player_response,
2141                     lambda x: x['streamingData']['hlsManifestUrl'],
2142                     compat_str))
2143                 or url_or_none(try_get(
2144                     video_info, lambda x: x['hlsvp'][0], compat_str)))
2145             if manifest_url:
2146                 formats = []
2147                 m3u8_formats = self._extract_m3u8_formats(
2148                     manifest_url, video_id, 'mp4', fatal=False)
2149                 for a_format in m3u8_formats:
2150                     itag = self._search_regex(
2151                         r'/itag/(\d+)/', a_format['url'], 'itag', default=None)
2152                     if itag:
2153                         a_format['format_id'] = itag
2154                         if itag in self._formats:
2155                             dct = self._formats[itag].copy()
2156                             dct.update(a_format)
2157                             a_format = dct
2158                     a_format['player_url'] = player_url
2159                     # Accept-Encoding header causes failures in live streams on Youtube and Youtube Gaming
2160                     a_format.setdefault('http_headers', {})['Youtubedl-no-compression'] = 'True'
2161                     if self._downloader.params.get('youtube_include_hls_manifest', True):
2162                         formats.append(a_format)
2163             else:
2164                 error_message = extract_unavailable_message()
2165                 if not error_message:
2166                     reason_list = try_get(
2167                         player_response,
2168                         lambda x: x['playabilityStatus']['errorScreen']['playerErrorMessageRenderer']['subreason']['runs'],
2169                         list) or []
2170                     for reason in reason_list:
2171                         if not isinstance(reason, dict):
2172                             continue
2173                         reason_text = try_get(reason, lambda x: x['text'], compat_str)
2174                         if reason_text:
2175                             if not error_message:
2176                                 error_message = ''
2177                             error_message += reason_text
2178                     if error_message:
2179                         error_message = clean_html(error_message)
2180                 if not error_message:
2181                     error_message = clean_html(try_get(
2182                         player_response, lambda x: x['playabilityStatus']['reason'],
2183                         compat_str))
2184                 if not error_message:
2185                     error_message = clean_html(
2186                         try_get(video_info, lambda x: x['reason'][0], compat_str))
2187                 if error_message:
2188                     raise ExtractorError(error_message, expected=True)
2189                 raise ExtractorError('no conn, hlsvp, hlsManifestUrl or url_encoded_fmt_stream_map information found in video info')
2190
2191         # uploader
2192         video_uploader = try_get(
2193             video_info, lambda x: x['author'][0],
2194             compat_str) or str_or_none(video_details.get('author'))
2195         if video_uploader:
2196             video_uploader = compat_urllib_parse_unquote_plus(video_uploader)
2197         else:
2198             self._downloader.report_warning('unable to extract uploader name')
2199
2200         # uploader_id
2201         video_uploader_id = None
2202         video_uploader_url = None
2203         mobj = re.search(
2204             r'<link itemprop="url" href="(?P<uploader_url>https?://www\.youtube\.com/(?:user|channel)/(?P<uploader_id>[^"]+))">',
2205             video_webpage)
2206         if mobj is not None:
2207             video_uploader_id = mobj.group('uploader_id')
2208             video_uploader_url = mobj.group('uploader_url')
2209         else:
2210             owner_profile_url = url_or_none(microformat.get('ownerProfileUrl'))
2211             if owner_profile_url:
2212                 video_uploader_id = self._search_regex(
2213                     r'(?:user|channel)/([^/]+)', owner_profile_url, 'uploader id',
2214                     default=None)
2215                 video_uploader_url = owner_profile_url
2216
2217         channel_id = (
2218             str_or_none(video_details.get('channelId'))
2219             or self._html_search_meta(
2220                 'channelId', video_webpage, 'channel id', default=None)
2221             or self._search_regex(
2222                 r'data-channel-external-id=(["\'])(?P<id>(?:(?!\1).)+)\1',
2223                 video_webpage, 'channel id', default=None, group='id'))
2224         channel_url = 'http://www.youtube.com/channel/%s' % channel_id if channel_id else None
2225
2226         thumbnails = []
2227         thumbnails_list = try_get(
2228             video_details, lambda x: x['thumbnail']['thumbnails'], list) or []
2229         for t in thumbnails_list:
2230             if not isinstance(t, dict):
2231                 continue
2232             thumbnail_url = url_or_none(t.get('url'))
2233             if not thumbnail_url:
2234                 continue
2235             thumbnails.append({
2236                 'url': thumbnail_url,
2237                 'width': int_or_none(t.get('width')),
2238                 'height': int_or_none(t.get('height')),
2239             })
2240
2241         if not thumbnails:
2242             video_thumbnail = None
2243             # We try first to get a high quality image:
2244             m_thumb = re.search(r'<span itemprop="thumbnail".*?href="(.*?)">',
2245                                 video_webpage, re.DOTALL)
2246             if m_thumb is not None:
2247                 video_thumbnail = m_thumb.group(1)
2248             thumbnail_url = try_get(video_info, lambda x: x['thumbnail_url'][0], compat_str)
2249             if thumbnail_url:
2250                 video_thumbnail = compat_urllib_parse_unquote_plus(thumbnail_url)
2251             if video_thumbnail:
2252                 thumbnails.append({'url': video_thumbnail})
2253
2254         # upload date
2255         upload_date = self._html_search_meta(
2256             'datePublished', video_webpage, 'upload date', default=None)
2257         if not upload_date:
2258             upload_date = self._search_regex(
2259                 [r'(?s)id="eow-date.*?>(.*?)</span>',
2260                  r'(?:id="watch-uploader-info".*?>.*?|["\']simpleText["\']\s*:\s*["\'])(?:Published|Uploaded|Streamed live|Started) on (.+?)[<"\']'],
2261                 video_webpage, 'upload date', default=None)
2262         if not upload_date:
2263             upload_date = microformat.get('publishDate') or microformat.get('uploadDate')
2264         upload_date = unified_strdate(upload_date)
2265
2266         video_license = self._html_search_regex(
2267             r'<h4[^>]+class="title"[^>]*>\s*License\s*</h4>\s*<ul[^>]*>\s*<li>(.+?)</li',
2268             video_webpage, 'license', default=None)
2269
2270         m_music = re.search(
2271             r'''(?x)
2272                 <h4[^>]+class="title"[^>]*>\s*Music\s*</h4>\s*
2273                 <ul[^>]*>\s*
2274                 <li>(?P<title>.+?)
2275                 by (?P<creator>.+?)
2276                 (?:
2277                     \(.+?\)|
2278                     <a[^>]*
2279                         (?:
2280                             \bhref=["\']/red[^>]*>|             # drop possible
2281                             >\s*Listen ad-free with YouTube Red # YouTube Red ad
2282                         )
2283                     .*?
2284                 )?</li
2285             ''',
2286             video_webpage)
2287         if m_music:
2288             video_alt_title = remove_quotes(unescapeHTML(m_music.group('title')))
2289             video_creator = clean_html(m_music.group('creator'))
2290         else:
2291             video_alt_title = video_creator = None
2292
2293         def extract_meta(field):
2294             return self._html_search_regex(
2295                 r'<h4[^>]+class="title"[^>]*>\s*%s\s*</h4>\s*<ul[^>]*>\s*<li>(.+?)</li>\s*' % field,
2296                 video_webpage, field, default=None)
2297
2298         track = extract_meta('Song')
2299         artist = extract_meta('Artist')
2300         album = extract_meta('Album')
2301
2302         # Youtube Music Auto-generated description
2303         release_date = release_year = None
2304         if video_description:
2305             mobj = re.search(r'(?s)(?P<track>[^·\n]+)·(?P<artist>[^\n]+)\n+(?P<album>[^\n]+)(?:.+?℗\s*(?P<release_year>\d{4})(?!\d))?(?:.+?Released on\s*:\s*(?P<release_date>\d{4}-\d{2}-\d{2}))?(.+?\nArtist\s*:\s*(?P<clean_artist>[^\n]+))?.+\nAuto-generated by YouTube\.\s*$', video_description)
2306             if mobj:
2307                 if not track:
2308                     track = mobj.group('track').strip()
2309                 if not artist:
2310                     artist = mobj.group('clean_artist') or ', '.join(a.strip() for a in mobj.group('artist').split('·'))
2311                 if not album:
2312                     album = mobj.group('album'.strip())
2313                 release_year = mobj.group('release_year')
2314                 release_date = mobj.group('release_date')
2315                 if release_date:
2316                     release_date = release_date.replace('-', '')
2317                     if not release_year:
2318                         release_year = int(release_date[:4])
2319                 if release_year:
2320                     release_year = int(release_year)
2321
2322         yt_initial_data = self._extract_yt_initial_data(video_id, video_webpage)
2323         contents = try_get(yt_initial_data, lambda x: x['contents']['twoColumnWatchNextResults']['results']['results']['contents'], list) or []
2324         for content in contents:
2325             rows = try_get(content, lambda x: x['videoSecondaryInfoRenderer']['metadataRowContainer']['metadataRowContainerRenderer']['rows'], list) or []
2326             multiple_songs = False
2327             for row in rows:
2328                 if try_get(row, lambda x: x['metadataRowRenderer']['hasDividerLine']) is True:
2329                     multiple_songs = True
2330                     break
2331             for row in rows:
2332                 mrr = row.get('metadataRowRenderer') or {}
2333                 mrr_title = try_get(
2334                     mrr, lambda x: x['title']['simpleText'], compat_str)
2335                 mrr_contents = try_get(
2336                     mrr, lambda x: x['contents'][0], dict) or {}
2337                 mrr_contents_text = try_get(mrr_contents, [lambda x: x['simpleText'], lambda x: x['runs'][0]['text']], compat_str)
2338                 if not (mrr_title and mrr_contents_text):
2339                     continue
2340                 if mrr_title == 'License':
2341                     video_license = mrr_contents_text
2342                 elif not multiple_songs:
2343                     if mrr_title == 'Album':
2344                         album = mrr_contents_text
2345                     elif mrr_title == 'Artist':
2346                         artist = mrr_contents_text
2347                     elif mrr_title == 'Song':
2348                         track = mrr_contents_text
2349
2350         m_episode = re.search(
2351             r'<div[^>]+id="watch7-headline"[^>]*>\s*<span[^>]*>.*?>(?P<series>[^<]+)</a></b>\s*S(?P<season>\d+)\s*•\s*E(?P<episode>\d+)</span>',
2352             video_webpage)
2353         if m_episode:
2354             series = unescapeHTML(m_episode.group('series'))
2355             season_number = int(m_episode.group('season'))
2356             episode_number = int(m_episode.group('episode'))
2357         else:
2358             series = season_number = episode_number = None
2359
2360         m_cat_container = self._search_regex(
2361             r'(?s)<h4[^>]*>\s*Category\s*</h4>\s*<ul[^>]*>(.*?)</ul>',
2362             video_webpage, 'categories', default=None)
2363         category = None
2364         if m_cat_container:
2365             category = self._html_search_regex(
2366                 r'(?s)<a[^<]+>(.*?)</a>', m_cat_container, 'category',
2367                 default=None)
2368         if not category:
2369             category = try_get(
2370                 microformat, lambda x: x['category'], compat_str)
2371         video_categories = None if category is None else [category]
2372
2373         video_tags = [
2374             unescapeHTML(m.group('content'))
2375             for m in re.finditer(self._meta_regex('og:video:tag'), video_webpage)]
2376         if not video_tags:
2377             video_tags = try_get(video_details, lambda x: x['keywords'], list)
2378
2379         def _extract_count(count_name):
2380             return str_to_int(self._search_regex(
2381                 (r'-%s-button[^>]+><span[^>]+class="yt-uix-button-content"[^>]*>([\d,]+)</span>' % re.escape(count_name),
2382                  r'["\']label["\']\s*:\s*["\']([\d,.]+)\s+%ss["\']' % re.escape(count_name)),
2383                 video_webpage, count_name, default=None))
2384
2385         like_count = _extract_count('like')
2386         dislike_count = _extract_count('dislike')
2387
2388         if view_count is None:
2389             view_count = str_to_int(self._search_regex(
2390                 r'<[^>]+class=["\']watch-view-count[^>]+>\s*([\d,\s]+)', video_webpage,
2391                 'view count', default=None))
2392
2393         average_rating = (
2394             float_or_none(video_details.get('averageRating'))
2395             or try_get(video_info, lambda x: float_or_none(x['avg_rating'][0])))
2396
2397         # subtitles
2398         video_subtitles = self.extract_subtitles(
2399             video_id, video_webpage, has_live_chat_replay)
2400         automatic_captions = self.extract_automatic_captions(video_id, player_response, ytplayer_config)
2401
2402         video_duration = try_get(
2403             video_info, lambda x: int_or_none(x['length_seconds'][0]))
2404         if not video_duration:
2405             video_duration = int_or_none(video_details.get('lengthSeconds'))
2406         if not video_duration:
2407             video_duration = parse_duration(self._html_search_meta(
2408                 'duration', video_webpage, 'video duration'))
2409
2410         # Get Subscriber Count of channel
2411         subscriber_count = parse_count(self._search_regex(
2412             r'"text":"([\d\.]+\w?) subscribers"',
2413             video_webpage,
2414             'subscriber count',
2415             default=None
2416         ))
2417
2418         # get xsrf for annotations or comments
2419         get_annotations = self._downloader.params.get('writeannotations', False)
2420         get_comments = self._downloader.params.get('getcomments', False)
2421         if get_annotations or get_comments:
2422             xsrf_token = None
2423             ytcfg = self._extract_ytcfg(video_id, video_webpage)
2424             if ytcfg:
2425                 xsrf_token = try_get(ytcfg, lambda x: x['XSRF_TOKEN'], compat_str)
2426             if not xsrf_token:
2427                 xsrf_token = self._search_regex(
2428                     r'([\'"])XSRF_TOKEN\1\s*:\s*([\'"])(?P<xsrf_token>(?:(?!\2).)+)\2',
2429                     video_webpage, 'xsrf token', group='xsrf_token', fatal=False)
2430
2431         # annotations
2432         video_annotations = None
2433         if get_annotations:
2434             invideo_url = try_get(
2435                 player_response, lambda x: x['annotations'][0]['playerAnnotationsUrlsRenderer']['invideoUrl'], compat_str)
2436             if xsrf_token and invideo_url:
2437                 xsrf_field_name = None
2438                 if ytcfg:
2439                     xsrf_field_name = try_get(ytcfg, lambda x: x['XSRF_FIELD_NAME'], compat_str)
2440                 if not xsrf_field_name:
2441                     xsrf_field_name = self._search_regex(
2442                         r'([\'"])XSRF_FIELD_NAME\1\s*:\s*([\'"])(?P<xsrf_field_name>\w+)\2',
2443                         video_webpage, 'xsrf field name',
2444                         group='xsrf_field_name', default='session_token')
2445                 video_annotations = self._download_webpage(
2446                     self._proto_relative_url(invideo_url),
2447                     video_id, note='Downloading annotations',
2448                     errnote='Unable to download video annotations', fatal=False,
2449                     data=urlencode_postdata({xsrf_field_name: xsrf_token}))
2450
2451         chapters = self._extract_chapters(video_webpage, description_original, video_id, video_duration)
2452
2453         # Get comments
2454         # TODO: Refactor and move to seperate function
2455         if get_comments:
2456             expected_video_comment_count = 0
2457             video_comments = []
2458
2459             def find_value(html, key, num_chars=2, separator='"'):
2460                 pos_begin = html.find(key) + len(key) + num_chars
2461                 pos_end = html.find(separator, pos_begin)
2462                 return html[pos_begin: pos_end]
2463
2464             def search_dict(partial, key):
2465                 if isinstance(partial, dict):
2466                     for k, v in partial.items():
2467                         if k == key:
2468                             yield v
2469                         else:
2470                             for o in search_dict(v, key):
2471                                 yield o
2472                 elif isinstance(partial, list):
2473                     for i in partial:
2474                         for o in search_dict(i, key):
2475                             yield o
2476
2477             try:
2478                 ncd = next(search_dict(yt_initial_data, 'nextContinuationData'))
2479                 continuations = [ncd['continuation']]
2480             # Handle videos where comments have been disabled entirely
2481             except StopIteration:
2482                 continuations = []
2483
2484             def get_continuation(continuation, session_token, replies=False):
2485                 query = {
2486                     'pbj': 1,
2487                     'ctoken': continuation,
2488                 }
2489                 if replies:
2490                     query['action_get_comment_replies'] = 1
2491                 else:
2492                     query['action_get_comments'] = 1
2493
2494                 while True:
2495                     content, handle = self._download_webpage_handle(
2496                         'https://www.youtube.com/comment_service_ajax',
2497                         video_id,
2498                         note=False,
2499                         expected_status=[413],
2500                         data=urlencode_postdata({
2501                             'session_token': session_token
2502                         }),
2503                         query=query,
2504                         headers={
2505                             'Accept': '*/*',
2506                             'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:76.0) Gecko/20100101 Firefox/76.0',
2507                             'X-YouTube-Client-Name': '1',
2508                             'X-YouTube-Client-Version': '2.20201202.06.01'
2509                         }
2510                     )
2511
2512                     response_code = handle.getcode()
2513                     if (response_code == 200):
2514                         return self._parse_json(content, video_id)
2515                     if (response_code == 413):
2516                         return None
2517                     raise ExtractorError('Unexpected HTTP error code: %s' % response_code)
2518
2519             first_continuation = True
2520             while continuations:
2521                 continuation, itct = continuations.pop()
2522                 comment_response = get_continuation(continuation, xsrf_token)
2523                 if not comment_response:
2524                     continue
2525                 if list(search_dict(comment_response, 'externalErrorMessage')):
2526                     raise ExtractorError('Error returned from server: ' + next(search_dict(comment_response, 'externalErrorMessage')))
2527
2528                 if 'continuationContents' not in comment_response['response']:
2529                     # Something is wrong here. Youtube won't accept this continuation token for some reason and responds with a user satisfaction dialog (error?)
2530                     continue
2531                 # not sure if this actually helps
2532                 if 'xsrf_token' in comment_response:
2533                     xsrf_token = comment_response['xsrf_token']
2534
2535                 item_section = comment_response['response']['continuationContents']['itemSectionContinuation']
2536                 if first_continuation:
2537                     expected_video_comment_count = int(item_section['header']['commentsHeaderRenderer']['countText']['runs'][0]['text'].replace(' Comments', '').replace('1 Comment', '1').replace(',', ''))
2538                     first_continuation = False
2539                 if 'contents' not in item_section:
2540                     # continuation returned no comments?
2541                     # set an empty array as to not break the for loop
2542                     item_section['contents'] = []
2543
2544                 for meta_comment in item_section['contents']:
2545                     comment = meta_comment['commentThreadRenderer']['comment']['commentRenderer']
2546                     video_comments.append({
2547                         'id': comment['commentId'],
2548                         'text': ''.join([c['text'] for c in comment['contentText']['runs']]),
2549                         'time_text': ''.join([c['text'] for c in comment['publishedTimeText']['runs']]),
2550                         'author': comment.get('authorText', {}).get('simpleText', ''),
2551                         'votes': comment.get('voteCount', {}).get('simpleText', '0'),
2552                         'author_thumbnail': comment['authorThumbnail']['thumbnails'][-1]['url'],
2553                         'parent': 'root'
2554                     })
2555                     if 'replies' not in meta_comment['commentThreadRenderer']:
2556                         continue
2557
2558                     reply_continuations = [rcn['nextContinuationData']['continuation'] for rcn in meta_comment['commentThreadRenderer']['replies']['commentRepliesRenderer']['continuations']]
2559                     while reply_continuations:
2560                         time.sleep(1)
2561                         continuation = reply_continuations.pop()
2562                         replies_data = get_continuation(continuation, xsrf_token, True)
2563                         if not replies_data or 'continuationContents' not in replies_data[1]['response']:
2564                             continue
2565
2566                         if self._downloader.params.get('verbose', False):
2567                             self.to_screen('[debug] Comments downloaded (chain %s) %s of ~%s' % (comment['commentId'], len(video_comments), expected_video_comment_count))
2568                         reply_comment_meta = replies_data[1]['response']['continuationContents']['commentRepliesContinuation']
2569                         for reply_meta in replies_data[1]['response']['continuationContents']['commentRepliesContinuation']['contents']:
2570                             reply_comment = reply_meta['commentRenderer']
2571                             video_comments.append({
2572                                 'id': reply_comment['commentId'],
2573                                 'text': ''.join([c['text'] for c in reply_comment['contentText']['runs']]),
2574                                 'time_text': ''.join([c['text'] for c in reply_comment['publishedTimeText']['runs']]),
2575                                 'author': reply_comment.get('authorText', {}).get('simpleText', ''),
2576                                 'votes': reply_comment.get('voteCount', {}).get('simpleText', '0'),
2577                                 'author_thumbnail': reply_comment['authorThumbnail']['thumbnails'][-1]['url'],
2578                                 'parent': comment['commentId']
2579                             })
2580                         if 'continuations' not in reply_comment_meta or len(reply_comment_meta['continuations']) == 0:
2581                             continue
2582
2583                         reply_continuations += [rcn['nextContinuationData']['continuation'] for rcn in reply_comment_meta['continuations']]
2584
2585                 self.to_screen('Comments downloaded %s of ~%s' % (len(video_comments), expected_video_comment_count))
2586
2587                 if 'continuations' in item_section:
2588                     continuations += [ncd['nextContinuationData']['continuation'] for ncd in item_section['continuations']]
2589                 time.sleep(1)
2590
2591             self.to_screen('Total comments downloaded %s of ~%s' % (len(video_comments), expected_video_comment_count))
2592         else:
2593             expected_video_comment_count = None
2594             video_comments = None
2595
2596         # Look for the DASH manifest
2597         if self._downloader.params.get('youtube_include_dash_manifest', True):
2598             dash_mpd_fatal = True
2599             for mpd_url in dash_mpds:
2600                 dash_formats = {}
2601                 try:
2602                     def decrypt_sig(mobj):
2603                         s = mobj.group(1)
2604                         dec_s = self._decrypt_signature(s, video_id, player_url, age_gate)
2605                         return '/signature/%s' % dec_s
2606
2607                     mpd_url = re.sub(r'/s/([a-fA-F0-9\.]+)', decrypt_sig, mpd_url)
2608
2609                     for df in self._extract_mpd_formats(
2610                             mpd_url, video_id, fatal=dash_mpd_fatal,
2611                             formats_dict=self._formats):
2612                         if not df.get('filesize'):
2613                             df['filesize'] = _extract_filesize(df['url'])
2614                         # Do not overwrite DASH format found in some previous DASH manifest
2615                         if df['format_id'] not in dash_formats:
2616                             dash_formats[df['format_id']] = df
2617                         # Additional DASH manifests may end up in HTTP Error 403 therefore
2618                         # allow them to fail without bug report message if we already have
2619                         # some DASH manifest succeeded. This is temporary workaround to reduce
2620                         # burst of bug reports until we figure out the reason and whether it
2621                         # can be fixed at all.
2622                         dash_mpd_fatal = False
2623                 except (ExtractorError, KeyError) as e:
2624                     self.report_warning(
2625                         'Skipping DASH manifest: %r' % e, video_id)
2626                 if dash_formats:
2627                     # Remove the formats we found through non-DASH, they
2628                     # contain less info and it can be wrong, because we use
2629                     # fixed values (for example the resolution). See
2630                     # https://github.com/ytdl-org/youtube-dl/issues/5774 for an
2631                     # example.
2632                     formats = [f for f in formats if f['format_id'] not in dash_formats.keys()]
2633                     formats.extend(dash_formats.values())
2634
2635         # Check for malformed aspect ratio
2636         stretched_m = re.search(
2637             r'<meta\s+property="og:video:tag".*?content="yt:stretch=(?P<w>[0-9]+):(?P<h>[0-9]+)">',
2638             video_webpage)
2639         if stretched_m:
2640             w = float(stretched_m.group('w'))
2641             h = float(stretched_m.group('h'))
2642             # yt:stretch may hold invalid ratio data (e.g. for Q39EVAstoRM ratio is 17:0).
2643             # We will only process correct ratios.
2644             if w > 0 and h > 0:
2645                 ratio = w / h
2646                 for f in formats:
2647                     if f.get('vcodec') != 'none':
2648                         f['stretched_ratio'] = ratio
2649
2650         if not formats:
2651             if 'reason' in video_info:
2652                 if 'The uploader has not made this video available in your country.' in video_info['reason']:
2653                     regions_allowed = self._html_search_meta(
2654                         'regionsAllowed', video_webpage, default=None)
2655                     countries = regions_allowed.split(',') if regions_allowed else None
2656                     self.raise_geo_restricted(
2657                         msg=video_info['reason'][0], countries=countries)
2658                 reason = video_info['reason'][0]
2659                 if 'Invalid parameters' in reason:
2660                     unavailable_message = extract_unavailable_message()
2661                     if unavailable_message:
2662                         reason = unavailable_message
2663                 raise ExtractorError(
2664                     'YouTube said: %s' % reason,
2665                     expected=True, video_id=video_id)
2666             if video_info.get('license_info') or try_get(player_response, lambda x: x['streamingData']['licenseInfos']):
2667                 raise ExtractorError('This video is DRM protected.', expected=True)
2668
2669         self._sort_formats(formats)
2670
2671         self.mark_watched(video_id, video_info, player_response)
2672
2673         return {
2674             'id': video_id,
2675             'uploader': video_uploader,
2676             'uploader_id': video_uploader_id,
2677             'uploader_url': video_uploader_url,
2678             'channel_id': channel_id,
2679             'channel_url': channel_url,
2680             'upload_date': upload_date,
2681             'license': video_license,
2682             'creator': video_creator or artist,
2683             'title': video_title,
2684             'alt_title': video_alt_title or track,
2685             'thumbnails': thumbnails,
2686             'description': video_description,
2687             'categories': video_categories,
2688             'tags': video_tags,
2689             'subtitles': video_subtitles,
2690             'automatic_captions': automatic_captions,
2691             'duration': video_duration,
2692             'age_limit': 18 if age_gate else 0,
2693             'annotations': video_annotations,
2694             'chapters': chapters,
2695             'webpage_url': proto + '://www.youtube.com/watch?v=%s' % video_id,
2696             'view_count': view_count,
2697             'like_count': like_count,
2698             'dislike_count': dislike_count,
2699             'average_rating': average_rating,
2700             'formats': formats,
2701             'is_live': is_live,
2702             'start_time': start_time,
2703             'end_time': end_time,
2704             'series': series,
2705             'season_number': season_number,
2706             'episode_number': episode_number,
2707             'track': track,
2708             'artist': artist,
2709             'album': album,
2710             'release_date': release_date,
2711             'release_year': release_year,
2712             'subscriber_count': subscriber_count,
2713             'playable_in_embed': playable_in_embed,
2714             'comments': video_comments,
2715             'comment_count': expected_video_comment_count,
2716         }
2717
2718
2719 class YoutubeTabIE(YoutubeBaseInfoExtractor):
2720     IE_DESC = 'YouTube.com tab'
2721     _VALID_URL = r'''(?x)
2722                     https?://
2723                         (?:\w+\.)?
2724                         (?:
2725                             youtube(?:kids)?\.com|
2726                             invidio\.us
2727                         )/
2728                         (?:
2729                             (?:channel|c|user)/|
2730                             (?P<not_channel>
2731                                 feed/|
2732                                 (?:playlist|watch)\?.*?\blist=
2733                             )|
2734                             (?!(?:%s)\b)  # Direct URLs
2735                         )
2736                         (?P<id>[^/?\#&]+)
2737                     ''' % YoutubeBaseInfoExtractor._RESERVED_NAMES
2738     IE_NAME = 'youtube:tab'
2739
2740     _TESTS = [{
2741         # playlists, multipage
2742         'url': 'https://www.youtube.com/c/ИгорьКлейнер/playlists?view=1&flow=grid',
2743         'playlist_mincount': 94,
2744         'info_dict': {
2745             'id': 'UCqj7Cz7revf5maW9g5pgNcg',
2746             'title': 'Игорь Клейнер - Playlists',
2747             'description': 'md5:be97ee0f14ee314f1f002cf187166ee2',
2748         },
2749     }, {
2750         # playlists, multipage, different order
2751         'url': 'https://www.youtube.com/user/igorkle1/playlists?view=1&sort=dd',
2752         'playlist_mincount': 94,
2753         'info_dict': {
2754             'id': 'UCqj7Cz7revf5maW9g5pgNcg',
2755             'title': 'Игорь Клейнер - Playlists',
2756             'description': 'md5:be97ee0f14ee314f1f002cf187166ee2',
2757         },
2758     }, {
2759         # playlists, singlepage
2760         'url': 'https://www.youtube.com/user/ThirstForScience/playlists',
2761         'playlist_mincount': 4,
2762         'info_dict': {
2763             'id': 'UCAEtajcuhQ6an9WEzY9LEMQ',
2764             'title': 'ThirstForScience - Playlists',
2765             'description': 'md5:609399d937ea957b0f53cbffb747a14c',
2766         }
2767     }, {
2768         'url': 'https://www.youtube.com/c/ChristophLaimer/playlists',
2769         'only_matching': True,
2770     }, {
2771         # basic, single video playlist
2772         'url': 'https://www.youtube.com/playlist?list=PL4lCao7KL_QFVb7Iudeipvc2BCavECqzc',
2773         'info_dict': {
2774             'uploader_id': 'UCmlqkdCBesrv2Lak1mF_MxA',
2775             'uploader': 'Sergey M.',
2776             'id': 'PL4lCao7KL_QFVb7Iudeipvc2BCavECqzc',
2777             'title': 'youtube-dl public playlist',
2778         },
2779         'playlist_count': 1,
2780     }, {
2781         # empty playlist
2782         'url': 'https://www.youtube.com/playlist?list=PL4lCao7KL_QFodcLWhDpGCYnngnHtQ-Xf',
2783         'info_dict': {
2784             'uploader_id': 'UCmlqkdCBesrv2Lak1mF_MxA',
2785             'uploader': 'Sergey M.',
2786             'id': 'PL4lCao7KL_QFodcLWhDpGCYnngnHtQ-Xf',
2787             'title': 'youtube-dl empty playlist',
2788         },
2789         'playlist_count': 0,
2790     }, {
2791         # Home tab
2792         'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/featured',
2793         'info_dict': {
2794             'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
2795             'title': 'lex will - Home',
2796             'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488',
2797         },
2798         'playlist_mincount': 2,
2799     }, {
2800         # Videos tab
2801         'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/videos',
2802         'info_dict': {
2803             'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
2804             'title': 'lex will - Videos',
2805             'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488',
2806         },
2807         'playlist_mincount': 975,
2808     }, {
2809         # Videos tab, sorted by popular
2810         'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/videos?view=0&sort=p&flow=grid',
2811         'info_dict': {
2812             'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
2813             'title': 'lex will - Videos',
2814             'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488',
2815         },
2816         'playlist_mincount': 199,
2817     }, {
2818         # Playlists tab
2819         'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/playlists',
2820         'info_dict': {
2821             'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
2822             'title': 'lex will - Playlists',
2823             'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488',
2824         },
2825         'playlist_mincount': 17,
2826     }, {
2827         # Community tab
2828         'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/community',
2829         'info_dict': {
2830             'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
2831             'title': 'lex will - Community',
2832             'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488',
2833         },
2834         'playlist_mincount': 18,
2835     }, {
2836         # Channels tab
2837         'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/channels',
2838         'info_dict': {
2839             'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
2840             'title': 'lex will - Channels',
2841             'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488',
2842         },
2843         'playlist_mincount': 138,
2844     }, {
2845         'url': 'https://invidio.us/channel/UCmlqkdCBesrv2Lak1mF_MxA',
2846         'only_matching': True,
2847     }, {
2848         'url': 'https://www.youtubekids.com/channel/UCmlqkdCBesrv2Lak1mF_MxA',
2849         'only_matching': True,
2850     }, {
2851         'url': 'https://music.youtube.com/channel/UCmlqkdCBesrv2Lak1mF_MxA',
2852         'only_matching': True,
2853     }, {
2854         'note': 'Playlist with deleted videos (#651). As a bonus, the video #51 is also twice in this list.',
2855         'url': 'https://www.youtube.com/playlist?list=PLwP_SiAcdui0KVebT0mU9Apz359a4ubsC',
2856         'info_dict': {
2857             'title': '29C3: Not my department',
2858             'id': 'PLwP_SiAcdui0KVebT0mU9Apz359a4ubsC',
2859             'uploader': 'Christiaan008',
2860             'uploader_id': 'UCEPzS1rYsrkqzSLNp76nrcg',
2861         },
2862         'playlist_count': 96,
2863     }, {
2864         'note': 'Large playlist',
2865         'url': 'https://www.youtube.com/playlist?list=UUBABnxM4Ar9ten8Mdjj1j0Q',
2866         'info_dict': {
2867             'title': 'Uploads from Cauchemar',
2868             'id': 'UUBABnxM4Ar9ten8Mdjj1j0Q',
2869             'uploader': 'Cauchemar',
2870             'uploader_id': 'UCBABnxM4Ar9ten8Mdjj1j0Q',
2871         },
2872         'playlist_mincount': 1123,
2873     }, {
2874         # even larger playlist, 8832 videos
2875         'url': 'http://www.youtube.com/user/NASAgovVideo/videos',
2876         'only_matching': True,
2877     }, {
2878         'note': 'Buggy playlist: the webpage has a "Load more" button but it doesn\'t have more videos',
2879         'url': 'https://www.youtube.com/playlist?list=UUXw-G3eDE9trcvY2sBMM_aA',
2880         'info_dict': {
2881             'title': 'Uploads from Interstellar Movie',
2882             'id': 'UUXw-G3eDE9trcvY2sBMM_aA',
2883             'uploader': 'Interstellar Movie',
2884             'uploader_id': 'UCXw-G3eDE9trcvY2sBMM_aA',
2885         },
2886         'playlist_mincount': 21,
2887     }, {
2888         # https://github.com/ytdl-org/youtube-dl/issues/21844
2889         'url': 'https://www.youtube.com/playlist?list=PLzH6n4zXuckpfMu_4Ff8E7Z1behQks5ba',
2890         'info_dict': {
2891             'title': 'Data Analysis with Dr Mike Pound',
2892             'id': 'PLzH6n4zXuckpfMu_4Ff8E7Z1behQks5ba',
2893             'uploader_id': 'UC9-y-6csu5WGm29I7JiwpnA',
2894             'uploader': 'Computerphile',
2895         },
2896         'playlist_mincount': 11,
2897     }, {
2898         'url': 'https://invidio.us/playlist?list=PL4lCao7KL_QFVb7Iudeipvc2BCavECqzc',
2899         'only_matching': True,
2900     }, {
2901         # Playlist URL that does not actually serve a playlist
2902         'url': 'https://www.youtube.com/watch?v=FqZTN594JQw&list=PLMYEtVRpaqY00V9W81Cwmzp6N6vZqfUKD4',
2903         'info_dict': {
2904             'id': 'FqZTN594JQw',
2905             'ext': 'webm',
2906             'title': "Smiley's People 01 detective, Adventure Series, Action",
2907             'uploader': 'STREEM',
2908             'uploader_id': 'UCyPhqAZgwYWZfxElWVbVJng',
2909             'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UCyPhqAZgwYWZfxElWVbVJng',
2910             'upload_date': '20150526',
2911             'license': 'Standard YouTube License',
2912             'description': 'md5:507cdcb5a49ac0da37a920ece610be80',
2913             'categories': ['People & Blogs'],
2914             'tags': list,
2915             'view_count': int,
2916             'like_count': int,
2917             'dislike_count': int,
2918         },
2919         'params': {
2920             'skip_download': True,
2921         },
2922         'skip': 'This video is not available.',
2923         'add_ie': [YoutubeIE.ie_key()],
2924     }, {
2925         'url': 'https://www.youtubekids.com/watch?v=Agk7R8I8o5U&list=PUZ6jURNr1WQZCNHF0ao-c0g',
2926         'only_matching': True,
2927     }, {
2928         'url': 'https://www.youtube.com/watch?v=MuAGGZNfUkU&list=RDMM',
2929         'only_matching': True,
2930     }, {
2931         'url': 'https://www.youtube.com/channel/UCoMdktPbSTixAyNGwb-UYkQ/live',
2932         'info_dict': {
2933             'id': '9Auq9mYxFEE',
2934             'ext': 'mp4',
2935             'title': 'Watch Sky News live',
2936             'uploader': 'Sky News',
2937             'uploader_id': 'skynews',
2938             'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/skynews',
2939             'upload_date': '20191102',
2940             'description': 'md5:78de4e1c2359d0ea3ed829678e38b662',
2941             'categories': ['News & Politics'],
2942             'tags': list,
2943             'like_count': int,
2944             'dislike_count': int,
2945         },
2946         'params': {
2947             'skip_download': True,
2948         },
2949     }, {
2950         'url': 'https://www.youtube.com/user/TheYoungTurks/live',
2951         'info_dict': {
2952             'id': 'a48o2S1cPoo',
2953             'ext': 'mp4',
2954             'title': 'The Young Turks - Live Main Show',
2955             'uploader': 'The Young Turks',
2956             'uploader_id': 'TheYoungTurks',
2957             'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/TheYoungTurks',
2958             'upload_date': '20150715',
2959             'license': 'Standard YouTube License',
2960             'description': 'md5:438179573adcdff3c97ebb1ee632b891',
2961             'categories': ['News & Politics'],
2962             'tags': ['Cenk Uygur (TV Program Creator)', 'The Young Turks (Award-Winning Work)', 'Talk Show (TV Genre)'],
2963             'like_count': int,
2964             'dislike_count': int,
2965         },
2966         'params': {
2967             'skip_download': True,
2968         },
2969         'only_matching': True,
2970     }, {
2971         'url': 'https://www.youtube.com/channel/UC1yBKRuGpC1tSM73A0ZjYjQ/live',
2972         'only_matching': True,
2973     }, {
2974         'url': 'https://www.youtube.com/c/CommanderVideoHq/live',
2975         'only_matching': True,
2976     }, {
2977         'url': 'https://www.youtube.com/feed/trending',
2978         'only_matching': True,
2979     }, {
2980         # needs auth
2981         'url': 'https://www.youtube.com/feed/library',
2982         'only_matching': True,
2983     }, {
2984         # needs auth
2985         'url': 'https://www.youtube.com/feed/history',
2986         'only_matching': True,
2987     }, {
2988         # needs auth
2989         'url': 'https://www.youtube.com/feed/subscriptions',
2990         'only_matching': True,
2991     }, {
2992         # needs auth
2993         'url': 'https://www.youtube.com/feed/watch_later',
2994         'only_matching': True,
2995     }, {
2996         # no longer available?
2997         'url': 'https://www.youtube.com/feed/recommended',
2998         'only_matching': True,
2999     }, {
3000         # inline playlist with not always working continuations
3001         'url': 'https://www.youtube.com/watch?v=UC6u0Tct-Fo&list=PL36D642111D65BE7C',
3002         'only_matching': True,
3003     }, {
3004         'url': 'https://www.youtube.com/course?list=ECUl4u3cNGP61MdtwGTqZA0MreSaDybji8',
3005         'only_matching': True,
3006     }, {
3007         'url': 'https://www.youtube.com/course',
3008         'only_matching': True,
3009     }, {
3010         'url': 'https://www.youtube.com/zsecurity',
3011         'only_matching': True,
3012     }, {
3013         'url': 'http://www.youtube.com/NASAgovVideo/videos',
3014         'only_matching': True,
3015     }, {
3016         'url': 'https://www.youtube.com/TheYoungTurks/live',
3017         'only_matching': True,
3018     }]
3019
3020     @classmethod
3021     def suitable(cls, url):
3022         return False if YoutubeIE.suitable(url) else super(
3023             YoutubeTabIE, cls).suitable(url)
3024
3025     def _extract_channel_id(self, webpage):
3026         channel_id = self._html_search_meta(
3027             'channelId', webpage, 'channel id', default=None)
3028         if channel_id:
3029             return channel_id
3030         channel_url = self._html_search_meta(
3031             ('og:url', 'al:ios:url', 'al:android:url', 'al:web:url',
3032              'twitter:url', 'twitter:app:url:iphone', 'twitter:app:url:ipad',
3033              'twitter:app:url:googleplay'), webpage, 'channel url')
3034         return self._search_regex(
3035             r'https?://(?:www\.)?youtube\.com/channel/([^/?#&])+',
3036             channel_url, 'channel id')
3037
3038     @staticmethod
3039     def _extract_grid_item_renderer(item):
3040         for item_kind in ('Playlist', 'Video', 'Channel'):
3041             renderer = item.get('grid%sRenderer' % item_kind)
3042             if renderer:
3043                 return renderer
3044
3045     def _grid_entries(self, grid_renderer):
3046         for item in grid_renderer['items']:
3047             if not isinstance(item, dict):
3048                 continue
3049             renderer = self._extract_grid_item_renderer(item)
3050             if not isinstance(renderer, dict):
3051                 continue
3052             title = try_get(
3053                 renderer, lambda x: x['title']['runs'][0]['text'], compat_str)
3054             # playlist
3055             playlist_id = renderer.get('playlistId')
3056             if playlist_id:
3057                 yield self.url_result(
3058                     'https://www.youtube.com/playlist?list=%s' % playlist_id,
3059                     ie=YoutubeTabIE.ie_key(), video_id=playlist_id,
3060                     video_title=title)
3061             # video
3062             video_id = renderer.get('videoId')
3063             if video_id:
3064                 yield self._extract_video(renderer)
3065             # channel
3066             channel_id = renderer.get('channelId')
3067             if channel_id:
3068                 title = try_get(
3069                     renderer, lambda x: x['title']['simpleText'], compat_str)
3070                 yield self.url_result(
3071                     'https://www.youtube.com/channel/%s' % channel_id,
3072                     ie=YoutubeTabIE.ie_key(), video_title=title)
3073
3074     def _shelf_entries_from_content(self, shelf_renderer):
3075         content = shelf_renderer.get('content')
3076         if not isinstance(content, dict):
3077             return
3078         renderer = content.get('gridRenderer')
3079         if renderer:
3080             # TODO: add support for nested playlists so each shelf is processed
3081             # as separate playlist
3082             # TODO: this includes only first N items
3083             for entry in self._grid_entries(renderer):
3084                 yield entry
3085         renderer = content.get('horizontalListRenderer')
3086         if renderer:
3087             # TODO
3088             pass
3089
3090     def _shelf_entries(self, shelf_renderer, skip_channels=False):
3091         ep = try_get(
3092             shelf_renderer, lambda x: x['endpoint']['commandMetadata']['webCommandMetadata']['url'],
3093             compat_str)
3094         shelf_url = urljoin('https://www.youtube.com', ep)
3095         if shelf_url:
3096             # Skipping links to another channels, note that checking for
3097             # endpoint.commandMetadata.webCommandMetadata.webPageTypwebPageType == WEB_PAGE_TYPE_CHANNEL
3098             # will not work
3099             if skip_channels and '/channels?' in shelf_url:
3100                 return
3101             title = try_get(
3102                 shelf_renderer, lambda x: x['title']['runs'][0]['text'], compat_str)
3103             yield self.url_result(shelf_url, video_title=title)
3104         # Shelf may not contain shelf URL, fallback to extraction from content
3105         for entry in self._shelf_entries_from_content(shelf_renderer):
3106             yield entry
3107
3108     def _playlist_entries(self, video_list_renderer):
3109         for content in video_list_renderer['contents']:
3110             if not isinstance(content, dict):
3111                 continue
3112             renderer = content.get('playlistVideoRenderer') or content.get('playlistPanelVideoRenderer')
3113             if not isinstance(renderer, dict):
3114                 continue
3115             video_id = renderer.get('videoId')
3116             if not video_id:
3117                 continue
3118             yield self._extract_video(renderer)
3119
3120     r""" # Not needed in the new implementation
3121     def _itemSection_entries(self, item_sect_renderer):
3122         for content in item_sect_renderer['contents']:
3123             if not isinstance(content, dict):
3124                 continue
3125             renderer = content.get('videoRenderer', {})
3126             if not isinstance(renderer, dict):
3127                 continue
3128             video_id = renderer.get('videoId')
3129             if not video_id:
3130                 continue
3131             yield self._extract_video(renderer)
3132     """
3133
3134     def _rich_entries(self, rich_grid_renderer):
3135         renderer = try_get(
3136             rich_grid_renderer, lambda x: x['content']['videoRenderer'], dict) or {}
3137         video_id = renderer.get('videoId')
3138         if not video_id:
3139             return
3140         yield self._extract_video(renderer)
3141
3142     def _video_entry(self, video_renderer):
3143         video_id = video_renderer.get('videoId')
3144         if video_id:
3145             return self._extract_video(video_renderer)
3146
3147     def _post_thread_entries(self, post_thread_renderer):
3148         post_renderer = try_get(
3149             post_thread_renderer, lambda x: x['post']['backstagePostRenderer'], dict)
3150         if not post_renderer:
3151             return
3152         # video attachment
3153         video_renderer = try_get(
3154             post_renderer, lambda x: x['backstageAttachment']['videoRenderer'], dict)
3155         video_id = None
3156         if video_renderer:
3157             entry = self._video_entry(video_renderer)
3158             if entry:
3159                 yield entry
3160         # inline video links
3161         runs = try_get(post_renderer, lambda x: x['contentText']['runs'], list) or []
3162         for run in runs:
3163             if not isinstance(run, dict):
3164                 continue
3165             ep_url = try_get(
3166                 run, lambda x: x['navigationEndpoint']['urlEndpoint']['url'], compat_str)
3167             if not ep_url:
3168                 continue
3169             if not YoutubeIE.suitable(ep_url):
3170                 continue
3171             ep_video_id = YoutubeIE._match_id(ep_url)
3172             if video_id == ep_video_id:
3173                 continue
3174             yield self.url_result(ep_url, ie=YoutubeIE.ie_key(), video_id=video_id)
3175
3176     def _post_thread_continuation_entries(self, post_thread_continuation):
3177         contents = post_thread_continuation.get('contents')
3178         if not isinstance(contents, list):
3179             return
3180         for content in contents:
3181             renderer = content.get('backstagePostThreadRenderer')
3182             if not isinstance(renderer, dict):
3183                 continue
3184             for entry in self._post_thread_entries(renderer):
3185                 yield entry
3186
3187     @staticmethod
3188     def _build_continuation_query(continuation, ctp=None):
3189         query = {
3190             'ctoken': continuation,
3191             'continuation': continuation,
3192         }
3193         if ctp:
3194             query['itct'] = ctp
3195         return query
3196
3197     @staticmethod
3198     def _extract_next_continuation_data(renderer):
3199         next_continuation = try_get(
3200             renderer, lambda x: x['continuations'][0]['nextContinuationData'], dict)
3201         if not next_continuation:
3202             return
3203         continuation = next_continuation.get('continuation')
3204         if not continuation:
3205             return
3206         ctp = next_continuation.get('clickTrackingParams')
3207         return YoutubeTabIE._build_continuation_query(continuation, ctp)
3208
3209     @classmethod
3210     def _extract_continuation(cls, renderer):
3211         next_continuation = cls._extract_next_continuation_data(renderer)
3212         if next_continuation:
3213             return next_continuation
3214         contents = renderer.get('contents')
3215         if not isinstance(contents, list):
3216             return
3217         for content in contents:
3218             if not isinstance(content, dict):
3219                 continue
3220             continuation_ep = try_get(
3221                 content, lambda x: x['continuationItemRenderer']['continuationEndpoint'],
3222                 dict)
3223             if not continuation_ep:
3224                 continue
3225             continuation = try_get(
3226                 continuation_ep, lambda x: x['continuationCommand']['token'], compat_str)
3227             if not continuation:
3228                 continue
3229             ctp = continuation_ep.get('clickTrackingParams')
3230             return YoutubeTabIE._build_continuation_query(continuation, ctp)
3231
3232     def _entries(self, tab, identity_token):
3233
3234         def extract_entries(parent_renderer):  # this needs to called again for continuation to work with feeds
3235             contents = try_get(parent_renderer, lambda x: x['contents'], list) or []
3236             for content in contents:
3237                 if not isinstance(content, dict):
3238                     continue
3239                 is_renderer = try_get(content, lambda x: x['itemSectionRenderer'], dict)
3240                 if not is_renderer:
3241                     renderer = content.get('richItemRenderer')
3242                     if renderer:
3243                         for entry in self._rich_entries(renderer):
3244                             yield entry
3245                         continuation_list[0] = self._extract_continuation(parent_renderer)
3246                     continue
3247                 isr_contents = try_get(is_renderer, lambda x: x['contents'], list) or []
3248                 for isr_content in isr_contents:
3249                     if not isinstance(isr_content, dict):
3250                         continue
3251                     renderer = isr_content.get('playlistVideoListRenderer')
3252                     if renderer:
3253                         for entry in self._playlist_entries(renderer):
3254                             yield entry
3255                         continuation_list[0] = self._extract_continuation(renderer)
3256                         continue
3257                     renderer = isr_content.get('gridRenderer')
3258                     if renderer:
3259                         for entry in self._grid_entries(renderer):
3260                             yield entry
3261                         continuation_list[0] = self._extract_continuation(renderer)
3262                         continue
3263                     renderer = isr_content.get('shelfRenderer')
3264                     if renderer:
3265                         is_channels_tab = tab.get('title') == 'Channels'
3266                         for entry in self._shelf_entries(renderer, not is_channels_tab):
3267                             yield entry
3268                         continue
3269                     renderer = isr_content.get('backstagePostThreadRenderer')
3270                     if renderer:
3271                         for entry in self._post_thread_entries(renderer):
3272                             yield entry
3273                         continuation_list[0] = self._extract_continuation(renderer)
3274                         continue
3275                     renderer = isr_content.get('videoRenderer')
3276                     if renderer:
3277                         entry = self._video_entry(renderer)
3278                         if entry:
3279                             yield entry
3280
3281                 if not continuation_list[0]:
3282                     continuation_list[0] = self._extract_continuation(is_renderer)
3283
3284             if not continuation_list[0]:
3285                 continuation_list[0] = self._extract_continuation(parent_renderer)
3286
3287         continuation_list = [None]  # Python 2 doesnot support nonlocal
3288         tab_content = try_get(tab, lambda x: x['content'], dict)
3289         if not tab_content:
3290             return
3291         parent_renderer = (
3292             try_get(tab_content, lambda x: x['sectionListRenderer'], dict)
3293             or try_get(tab_content, lambda x: x['richGridRenderer'], dict) or {})
3294         for entry in extract_entries(parent_renderer):
3295             yield entry
3296         continuation = continuation_list[0]
3297
3298         headers = {
3299             'x-youtube-client-name': '1',
3300             'x-youtube-client-version': '2.20201112.04.01',
3301         }
3302         if identity_token:
3303             headers['x-youtube-identity-token'] = identity_token
3304
3305         for page_num in itertools.count(1):
3306             if not continuation:
3307                 break
3308             count = 0
3309             retries = 3
3310             while count <= retries:
3311                 try:
3312                     # Downloading page may result in intermittent 5xx HTTP error
3313                     # that is usually worked around with a retry
3314                     browse = self._download_json(
3315                         'https://www.youtube.com/browse_ajax', None,
3316                         'Downloading page %d%s'
3317                         % (page_num, ' (retry #%d)' % count if count else ''),
3318                         headers=headers, query=continuation)
3319                     break
3320                 except ExtractorError as e:
3321                     if isinstance(e.cause, compat_HTTPError) and e.cause.code in (500, 503):
3322                         count += 1
3323                         if count <= retries:
3324                             continue
3325                     raise
3326             if not browse:
3327                 break
3328             response = try_get(browse, lambda x: x[1]['response'], dict)
3329             if not response:
3330                 break
3331
3332             continuation_contents = try_get(
3333                 response, lambda x: x['continuationContents'], dict)
3334             if continuation_contents:
3335                 continuation_renderer = continuation_contents.get('playlistVideoListContinuation')
3336                 if continuation_renderer:
3337                     for entry in self._playlist_entries(continuation_renderer):
3338                         yield entry
3339                     continuation = self._extract_continuation(continuation_renderer)
3340                     continue
3341                 continuation_renderer = continuation_contents.get('gridContinuation')
3342                 if continuation_renderer:
3343                     for entry in self._grid_entries(continuation_renderer):
3344                         yield entry
3345                     continuation = self._extract_continuation(continuation_renderer)
3346                     continue
3347                 continuation_renderer = continuation_contents.get('itemSectionContinuation')
3348                 if continuation_renderer:
3349                     for entry in self._post_thread_continuation_entries(continuation_renderer):
3350                         yield entry
3351                     continuation = self._extract_continuation(continuation_renderer)
3352                     continue
3353                 continuation_renderer = continuation_contents.get('sectionListContinuation')  # for feeds
3354                 if continuation_renderer:
3355                     continuation_list = [None]
3356                     for entry in extract_entries(continuation_renderer):
3357                         yield entry
3358                     continuation = continuation_list[0]
3359                     continue
3360
3361             continuation_items = try_get(
3362                 response, lambda x: x['onResponseReceivedActions'][0]['appendContinuationItemsAction']['continuationItems'], list)
3363             if continuation_items:
3364                 continuation_item = continuation_items[0]
3365                 if not isinstance(continuation_item, dict):
3366                     continue
3367                 renderer = continuation_item.get('playlistVideoRenderer') or continuation_item.get('itemSectionRenderer')
3368                 if renderer:
3369                     video_list_renderer = {'contents': continuation_items}
3370                     for entry in self._playlist_entries(video_list_renderer):
3371                         yield entry
3372                     continuation = self._extract_continuation(video_list_renderer)
3373                     continue
3374             break
3375
3376     @staticmethod
3377     def _extract_selected_tab(tabs):
3378         for tab in tabs:
3379             if try_get(tab, lambda x: x['tabRenderer']['selected'], bool):
3380                 return tab['tabRenderer']
3381         else:
3382             raise ExtractorError('Unable to find selected tab')
3383
3384     @staticmethod
3385     def _extract_uploader(data):
3386         uploader = {}
3387         sidebar_renderer = try_get(
3388             data, lambda x: x['sidebar']['playlistSidebarRenderer']['items'], list)
3389         if sidebar_renderer:
3390             for item in sidebar_renderer:
3391                 if not isinstance(item, dict):
3392                     continue
3393                 renderer = item.get('playlistSidebarSecondaryInfoRenderer')
3394                 if not isinstance(renderer, dict):
3395                     continue
3396                 owner = try_get(
3397                     renderer, lambda x: x['videoOwner']['videoOwnerRenderer']['title']['runs'][0], dict)
3398                 if owner:
3399                     uploader['uploader'] = owner.get('text')
3400                     uploader['uploader_id'] = try_get(
3401                         owner, lambda x: x['navigationEndpoint']['browseEndpoint']['browseId'], compat_str)
3402                     uploader['uploader_url'] = urljoin(
3403                         'https://www.youtube.com/',
3404                         try_get(owner, lambda x: x['navigationEndpoint']['browseEndpoint']['canonicalBaseUrl'], compat_str))
3405         return uploader
3406
3407     def _extract_from_tabs(self, item_id, webpage, data, tabs, identity_token):
3408         selected_tab = self._extract_selected_tab(tabs)
3409         renderer = try_get(
3410             data, lambda x: x['metadata']['channelMetadataRenderer'], dict)
3411         playlist_id = title = description = None
3412         if renderer:
3413             channel_title = renderer.get('title') or item_id
3414             tab_title = selected_tab.get('title')
3415             title = channel_title or item_id
3416             if tab_title:
3417                 title += ' - %s' % tab_title
3418             description = renderer.get('description')
3419             playlist_id = renderer.get('externalId')
3420
3421         # this has thumbnails, but there is currently no thumbnail field for playlists
3422         # sidebar.playlistSidebarRenderer has even more data, but its stucture is more complec
3423         renderer = try_get(
3424             data, lambda x: x['microformat']['microformatDataRenderer'], dict)
3425         if not renderer:
3426             renderer = try_get(
3427                 data, lambda x: x['metadata']['playlistMetadataRenderer'], dict)
3428         if renderer:
3429             title = renderer.get('title')
3430             description = renderer.get('description')
3431             playlist_id = item_id
3432
3433         if playlist_id is None:
3434             playlist_id = item_id
3435         if title is None:
3436             title = "Youtube " + playlist_id.title()
3437         playlist = self.playlist_result(
3438             self._entries(selected_tab, identity_token),
3439             playlist_id=playlist_id, playlist_title=title,
3440             playlist_description=description)
3441         playlist.update(self._extract_uploader(data))
3442         return playlist
3443
3444     def _extract_from_playlist(self, item_id, url, data, playlist):
3445         title = playlist.get('title') or try_get(
3446             data, lambda x: x['titleText']['simpleText'], compat_str)
3447         playlist_id = playlist.get('playlistId') or item_id
3448         # Inline playlist rendition continuation does not always work
3449         # at Youtube side, so delegating regular tab-based playlist URL
3450         # processing whenever possible.
3451         playlist_url = urljoin(url, try_get(
3452             playlist, lambda x: x['endpoint']['commandMetadata']['webCommandMetadata']['url'],
3453             compat_str))
3454         if playlist_url and playlist_url != url:
3455             return self.url_result(
3456                 playlist_url, ie=YoutubeTabIE.ie_key(), video_id=playlist_id,
3457                 video_title=title)
3458         return self.playlist_result(
3459             self._playlist_entries(playlist), playlist_id=playlist_id,
3460             playlist_title=title)
3461
3462     @staticmethod
3463     def _extract_alerts(data):
3464         for alert_dict in try_get(data, lambda x: x['alerts'], list) or []:
3465             if not isinstance(alert_dict, dict):
3466                 continue
3467             for renderer in alert_dict:
3468                 alert = alert_dict[renderer]
3469                 alert_type = alert.get('type')
3470                 if not alert_type:
3471                     continue
3472                 message = try_get(alert, lambda x: x['text']['simpleText'], compat_str)
3473                 if message:
3474                     yield alert_type, message
3475                 for run in try_get(alert, lambda x: x['text']['runs'], list) or []:
3476                     message = try_get(run, lambda x: x['text'], compat_str)
3477                     if message:
3478                         yield alert_type, message
3479
3480     def _extract_identity_token(self, webpage, item_id):
3481         ytcfg = self._extract_ytcfg(item_id, webpage)
3482         if ytcfg:
3483             token = try_get(ytcfg, lambda x: x['ID_TOKEN'], compat_str)
3484             if token:
3485                 return token
3486         return self._search_regex(
3487             r'\bID_TOKEN["\']\s*:\s*["\'](.+?)["\']', webpage,
3488             'identity token', default=None)
3489
3490     def _real_extract(self, url):
3491         item_id = self._match_id(url)
3492         url = compat_urlparse.urlunparse(
3493             compat_urlparse.urlparse(url)._replace(netloc='www.youtube.com'))
3494         is_home = re.match(r'(?P<pre>%s)(?P<post>/?(?![^#?]).*$)' % self._VALID_URL, url)
3495         if is_home is not None and is_home.group('not_channel') is None and item_id != 'feed':
3496             self._downloader.report_warning(
3497                 'A channel/user page was given. All the channel\'s videos will be downloaded. '
3498                 'To download only the videos in the home page, add a "/featured" to the URL')
3499             url = '%s/videos%s' % (is_home.group('pre'), is_home.group('post') or '')
3500
3501         # Handle both video/playlist URLs
3502         qs = compat_urlparse.parse_qs(compat_urlparse.urlparse(url).query)
3503         video_id = qs.get('v', [None])[0]
3504         playlist_id = qs.get('list', [None])[0]
3505
3506         if is_home is not None and is_home.group('not_channel') is not None and is_home.group('not_channel').startswith('watch') and not video_id:
3507             if playlist_id:
3508                 self._downloader.report_warning('%s is not a valid Youtube URL. Trying to download playlist %s' % (url, playlist_id))
3509                 url = 'https://www.youtube.com/playlist?list=%s' % playlist_id
3510                 # return self.url_result(playlist_id, ie=YoutubePlaylistIE.ie_key())
3511             else:
3512                 raise ExtractorError('Unable to recognize tab page')
3513         if video_id and playlist_id:
3514             if self._downloader.params.get('noplaylist'):
3515                 self.to_screen('Downloading just video %s because of --no-playlist' % video_id)
3516                 return self.url_result(video_id, ie=YoutubeIE.ie_key(), video_id=video_id)
3517             self.to_screen('Downloading playlist %s - add --no-playlist to just download video %s' % (playlist_id, video_id))
3518
3519         webpage = self._download_webpage(url, item_id)
3520         identity_token = self._extract_identity_token(webpage, item_id)
3521         data = self._extract_yt_initial_data(item_id, webpage)
3522         err_msg = None
3523         for alert_type, alert_message in self._extract_alerts(data):
3524             if alert_type.lower() == 'error':
3525                 if err_msg:
3526                     self._downloader.report_warning('YouTube said: %s - %s' % ('ERROR', err_msg))
3527                 err_msg = alert_message
3528             else:
3529                 self._downloader.report_warning('YouTube said: %s - %s' % (alert_type, alert_message))
3530         if err_msg:
3531             raise ExtractorError('YouTube said: %s' % err_msg, expected=True)
3532         tabs = try_get(
3533             data, lambda x: x['contents']['twoColumnBrowseResultsRenderer']['tabs'], list)
3534         if tabs:
3535             return self._extract_from_tabs(item_id, webpage, data, tabs, identity_token)
3536         playlist = try_get(
3537             data, lambda x: x['contents']['twoColumnWatchNextResults']['playlist']['playlist'], dict)
3538         if playlist:
3539             return self._extract_from_playlist(item_id, url, data, playlist)
3540         # Fallback to video extraction if no playlist alike page is recognized.
3541         # First check for the current video then try the v attribute of URL query.
3542         video_id = try_get(
3543             data, lambda x: x['currentVideoEndpoint']['watchEndpoint']['videoId'],
3544             compat_str) or video_id
3545         if video_id:
3546             return self.url_result(video_id, ie=YoutubeIE.ie_key(), video_id=video_id)
3547         # Failed to recognize
3548         raise ExtractorError('Unable to recognize tab page')
3549
3550
3551 class YoutubePlaylistIE(InfoExtractor):
3552     IE_DESC = 'YouTube.com playlists'
3553     _VALID_URL = r'''(?x)(?:
3554                         (?:https?://)?
3555                         (?:\w+\.)?
3556                         (?:
3557                             (?:
3558                                 youtube(?:kids)?\.com|
3559                                 invidio\.us
3560                             )
3561                             /.*?\?.*?\blist=
3562                         )?
3563                         (?P<id>%(playlist_id)s)
3564                      )''' % {'playlist_id': YoutubeBaseInfoExtractor._PLAYLIST_ID_RE}
3565     IE_NAME = 'youtube:playlist'
3566     _TESTS = [{
3567         'note': 'issue #673',
3568         'url': 'PLBB231211A4F62143',
3569         'info_dict': {
3570             'title': '[OLD]Team Fortress 2 (Class-based LP)',
3571             'id': 'PLBB231211A4F62143',
3572             'uploader': 'Wickydoo',
3573             'uploader_id': 'UCKSpbfbl5kRQpTdL7kMc-1Q',
3574         },
3575         'playlist_mincount': 29,
3576     }, {
3577         'url': 'PLtPgu7CB4gbY9oDN3drwC3cMbJggS7dKl',
3578         'info_dict': {
3579             'title': 'YDL_safe_search',
3580             'id': 'PLtPgu7CB4gbY9oDN3drwC3cMbJggS7dKl',
3581         },
3582         'playlist_count': 2,
3583         'skip': 'This playlist is private',
3584     }, {
3585         'note': 'embedded',
3586         'url': 'https://www.youtube.com/embed/videoseries?list=PL6IaIsEjSbf96XFRuNccS_RuEXwNdsoEu',
3587         'playlist_count': 4,
3588         'info_dict': {
3589             'title': 'JODA15',
3590             'id': 'PL6IaIsEjSbf96XFRuNccS_RuEXwNdsoEu',
3591             'uploader': 'milan',
3592             'uploader_id': 'UCEI1-PVPcYXjB73Hfelbmaw',
3593         }
3594     }, {
3595         'url': 'http://www.youtube.com/embed/_xDOZElKyNU?list=PLsyOSbh5bs16vubvKePAQ1x3PhKavfBIl',
3596         'playlist_mincount': 982,
3597         'info_dict': {
3598             'title': '2018 Chinese New Singles (11/6 updated)',
3599             'id': 'PLsyOSbh5bs16vubvKePAQ1x3PhKavfBIl',
3600             'uploader': 'LBK',
3601             'uploader_id': 'UC21nz3_MesPLqtDqwdvnoxA',
3602         }
3603     }, {
3604         'url': 'TLGGrESM50VT6acwMjAyMjAxNw',
3605         'only_matching': True,
3606     }, {
3607         # music album playlist
3608         'url': 'OLAK5uy_m4xAFdmMC5rX3Ji3g93pQe3hqLZw_9LhM',
3609         'only_matching': True,
3610     }]
3611
3612     @classmethod
3613     def suitable(cls, url):
3614         return False if YoutubeTabIE.suitable(url) else super(
3615             YoutubePlaylistIE, cls).suitable(url)
3616
3617     def _real_extract(self, url):
3618         playlist_id = self._match_id(url)
3619         qs = compat_urlparse.parse_qs(compat_urlparse.urlparse(url).query)
3620         if not qs:
3621             qs = {'list': playlist_id}
3622         return self.url_result(
3623             update_url_query('https://www.youtube.com/playlist', qs),
3624             ie=YoutubeTabIE.ie_key(), video_id=playlist_id)
3625
3626
3627 class YoutubeYtBeIE(InfoExtractor):
3628     IE_DESC = 'youtu.be'
3629     _VALID_URL = r'https?://youtu\.be/(?P<id>[0-9A-Za-z_-]{11})/*?.*?\blist=(?P<playlist_id>%(playlist_id)s)' % {'playlist_id': YoutubeBaseInfoExtractor._PLAYLIST_ID_RE}
3630     _TESTS = [{
3631         'url': 'https://youtu.be/yeWKywCrFtk?list=PL2qgrgXsNUG5ig9cat4ohreBjYLAPC0J5',
3632         'info_dict': {
3633             'id': 'yeWKywCrFtk',
3634             'ext': 'mp4',
3635             'title': 'Small Scale Baler and Braiding Rugs',
3636             'uploader': 'Backus-Page House Museum',
3637             'uploader_id': 'backuspagemuseum',
3638             'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/backuspagemuseum',
3639             'upload_date': '20161008',
3640             'description': 'md5:800c0c78d5eb128500bffd4f0b4f2e8a',
3641             'categories': ['Nonprofits & Activism'],
3642             'tags': list,
3643             'like_count': int,
3644             'dislike_count': int,
3645         },
3646         'params': {
3647             'noplaylist': True,
3648             'skip_download': True,
3649         },
3650     }, {
3651         'url': 'https://youtu.be/uWyaPkt-VOI?list=PL9D9FC436B881BA21',
3652         'only_matching': True,
3653     }]
3654
3655     def _real_extract(self, url):
3656         mobj = re.match(self._VALID_URL, url)
3657         video_id = mobj.group('id')
3658         playlist_id = mobj.group('playlist_id')
3659         return self.url_result(
3660             update_url_query('https://www.youtube.com/watch', {
3661                 'v': video_id,
3662                 'list': playlist_id,
3663                 'feature': 'youtu.be',
3664             }), ie=YoutubeTabIE.ie_key(), video_id=playlist_id)
3665
3666
3667 class YoutubeYtUserIE(InfoExtractor):
3668     IE_DESC = 'YouTube.com user videos, URL or "ytuser" keyword'
3669     _VALID_URL = r'ytuser:(?P<id>.+)'
3670     _TESTS = [{
3671         'url': 'ytuser:phihag',
3672         'only_matching': True,
3673     }]
3674
3675     def _real_extract(self, url):
3676         user_id = self._match_id(url)
3677         return self.url_result(
3678             'https://www.youtube.com/user/%s' % user_id,
3679             ie=YoutubeTabIE.ie_key(), video_id=user_id)
3680
3681
3682 class YoutubeFavouritesIE(YoutubeBaseInfoExtractor):
3683     IE_NAME = 'youtube:favorites'
3684     IE_DESC = 'YouTube.com liked videos, ":ytfav" for short (requires authentication)'
3685     _VALID_URL = r':ytfav(?:ou?rite)?s?'
3686     _LOGIN_REQUIRED = True
3687     _TESTS = [{
3688         'url': ':ytfav',
3689         'only_matching': True,
3690     }, {
3691         'url': ':ytfavorites',
3692         'only_matching': True,
3693     }]
3694
3695     def _real_extract(self, url):
3696         return self.url_result(
3697             'https://www.youtube.com/playlist?list=LL',
3698             ie=YoutubeTabIE.ie_key())
3699
3700
3701 class YoutubeSearchIE(SearchInfoExtractor, YoutubeBaseInfoExtractor):
3702     IE_DESC = 'YouTube.com searches'
3703     # there doesn't appear to be a real limit, for example if you search for
3704     # 'python' you get more than 8.000.000 results
3705     _MAX_RESULTS = float('inf')
3706     IE_NAME = 'youtube:search'
3707     _SEARCH_KEY = 'ytsearch'
3708     _SEARCH_PARAMS = None
3709     _TESTS = []
3710
3711     def _entries(self, query, n):
3712         data = {
3713             'context': {
3714                 'client': {
3715                     'clientName': 'WEB',
3716                     'clientVersion': '2.20201021.03.00',
3717                 }
3718             },
3719             'query': query,
3720         }
3721         if self._SEARCH_PARAMS:
3722             data['params'] = self._SEARCH_PARAMS
3723         total = 0
3724         for page_num in itertools.count(1):
3725             search = self._download_json(
3726                 'https://www.youtube.com/youtubei/v1/search?key=AIzaSyAO_FJ2SlqU8Q4STEHLGCilw_Y9_11qcW8',
3727                 video_id='query "%s"' % query,
3728                 note='Downloading page %s' % page_num,
3729                 errnote='Unable to download API page', fatal=False,
3730                 data=json.dumps(data).encode('utf8'),
3731                 headers={'content-type': 'application/json'})
3732             if not search:
3733                 break
3734             slr_contents = try_get(
3735                 search,
3736                 (lambda x: x['contents']['twoColumnSearchResultsRenderer']['primaryContents']['sectionListRenderer']['contents'],
3737                  lambda x: x['onResponseReceivedCommands'][0]['appendContinuationItemsAction']['continuationItems']),
3738                 list)
3739             if not slr_contents:
3740                 break
3741
3742             # Youtube sometimes adds promoted content to searches,
3743             # changing the index location of videos and token.
3744             # So we search through all entries till we find them.
3745             continuation_token = None
3746             for slr_content in slr_contents:
3747                 isr_contents = try_get(
3748                     slr_content,
3749                     lambda x: x['itemSectionRenderer']['contents'],
3750                     list)
3751                 if not isr_contents:
3752                     continue
3753                 for content in isr_contents:
3754                     if not isinstance(content, dict):
3755                         continue
3756                     video = content.get('videoRenderer')
3757                     if not isinstance(video, dict):
3758                         continue
3759                     video_id = video.get('videoId')
3760                     if not video_id:
3761                         continue
3762
3763                     yield self._extract_video(video)
3764                     total += 1
3765                     if total == n:
3766                         return
3767
3768                 if continuation_token is None:
3769                     continuation_token = try_get(
3770                         slr_content,
3771                         lambda x: x['continuationItemRenderer']['continuationEndpoint']['continuationCommand']['token'],
3772                         compat_str)
3773
3774             if not continuation_token:
3775                 break
3776             data['continuation'] = continuation_token
3777
3778     def _get_n_results(self, query, n):
3779         """Get a specified number of results for a query"""
3780         return self.playlist_result(self._entries(query, n), query)
3781
3782
3783 class YoutubeSearchDateIE(YoutubeSearchIE):
3784     IE_NAME = YoutubeSearchIE.IE_NAME + ':date'
3785     _SEARCH_KEY = 'ytsearchdate'
3786     IE_DESC = 'YouTube.com searches, newest videos first, "ytsearchdate" keyword'
3787     _SEARCH_PARAMS = 'CAI%3D'
3788
3789
3790 class YoutubeSearchURLIE(YoutubeSearchIE):
3791     IE_DESC = 'YouTube.com searches, "ytsearch" keyword'
3792     IE_NAME = YoutubeSearchIE.IE_NAME + '_url'
3793     _VALID_URL = r'https?://(?:www\.)?youtube\.com/results\?(.*?&)?(?:search_query|q)=(?:[^&]+)(?:[&]|$)'
3794     # _MAX_RESULTS = 100
3795     _TESTS = [{
3796         'url': 'https://www.youtube.com/results?baz=bar&search_query=youtube-dl+test+video&filters=video&lclk=video',
3797         'playlist_mincount': 5,
3798         'info_dict': {
3799             'title': 'youtube-dl test video',
3800         }
3801     }, {
3802         'url': 'https://www.youtube.com/results?q=test&sp=EgQIBBgB',
3803         'only_matching': True,
3804     }]
3805
3806     @classmethod
3807     def _make_valid_url(cls):
3808         return cls._VALID_URL
3809
3810     def _real_extract(self, url):
3811         qs = compat_parse_qs(compat_urllib_parse_urlparse(url).query)
3812         query = (qs.get('search_query') or qs.get('q'))[0]
3813         self._SEARCH_PARAMS = qs.get('sp', ('',))[0]
3814         return self._get_n_results(query, self._MAX_RESULTS)
3815
3816
3817 class YoutubeFeedsInfoExtractor(YoutubeTabIE):
3818     """
3819     Base class for feed extractors
3820     Subclasses must define the _FEED_NAME property.
3821     """
3822     _LOGIN_REQUIRED = True
3823     # _MAX_PAGES = 5
3824     _TESTS = []
3825
3826     @property
3827     def IE_NAME(self):
3828         return 'youtube:%s' % self._FEED_NAME
3829
3830     def _real_initialize(self):
3831         self._login()
3832
3833     def _real_extract(self, url):
3834         return self.url_result(
3835             'https://www.youtube.com/feed/%s' % self._FEED_NAME,
3836             ie=YoutubeTabIE.ie_key())
3837
3838
3839 class YoutubeWatchLaterIE(InfoExtractor):
3840     IE_NAME = 'youtube:watchlater'
3841     IE_DESC = 'Youtube watch later list, ":ytwatchlater" for short (requires authentication)'
3842     _VALID_URL = r':ytwatchlater'
3843     _TESTS = [{
3844         'url': ':ytwatchlater',
3845         'only_matching': True,
3846     }]
3847
3848     def _real_extract(self, url):
3849         return self.url_result(
3850             'https://www.youtube.com/playlist?list=WL', ie=YoutubeTabIE.ie_key())
3851
3852
3853 class YoutubeRecommendedIE(YoutubeFeedsInfoExtractor):
3854     IE_DESC = 'YouTube.com recommended videos, ":ytrec" for short (requires authentication)'
3855     _VALID_URL = r'https?://(?:www\.)?youtube\.com/?(?:[?#]|$)|:ytrec(?:ommended)?'
3856     _FEED_NAME = 'recommended'
3857     _TESTS = [{
3858         'url': ':ytrec',
3859         'only_matching': True,
3860     }, {
3861         'url': ':ytrecommended',
3862         'only_matching': True,
3863     }, {
3864         'url': 'https://youtube.com',
3865         'only_matching': True,
3866     }]
3867
3868
3869 class YoutubeSubscriptionsIE(YoutubeFeedsInfoExtractor):
3870     IE_DESC = 'YouTube.com subscriptions feed, ":ytsubs" for short (requires authentication)'
3871     _VALID_URL = r':ytsub(?:scription)?s?'
3872     _FEED_NAME = 'subscriptions'
3873     _TESTS = [{
3874         'url': ':ytsubs',
3875         'only_matching': True,
3876     }, {
3877         'url': ':ytsubscriptions',
3878         'only_matching': True,
3879     }]
3880
3881
3882 class YoutubeHistoryIE(YoutubeFeedsInfoExtractor):
3883     IE_DESC = 'Youtube watch history, ":ythistory" for short (requires authentication)'
3884     _VALID_URL = r':ythistory'
3885     _FEED_NAME = 'history'
3886     _TESTS = [{
3887         'url': ':ythistory',
3888         'only_matching': True,
3889     }]
3890
3891
3892 class YoutubeTruncatedURLIE(InfoExtractor):
3893     IE_NAME = 'youtube:truncated_url'
3894     IE_DESC = False  # Do not list
3895     _VALID_URL = r'''(?x)
3896         (?:https?://)?
3897         (?:\w+\.)?[yY][oO][uU][tT][uU][bB][eE](?:-nocookie)?\.com/
3898         (?:watch\?(?:
3899             feature=[a-z_]+|
3900             annotation_id=annotation_[^&]+|
3901             x-yt-cl=[0-9]+|
3902             hl=[^&]*|
3903             t=[0-9]+
3904         )?
3905         |
3906             attribution_link\?a=[^&]+
3907         )
3908         $
3909     '''
3910
3911     _TESTS = [{
3912         'url': 'https://www.youtube.com/watch?annotation_id=annotation_3951667041',
3913         'only_matching': True,
3914     }, {
3915         'url': 'https://www.youtube.com/watch?',
3916         'only_matching': True,
3917     }, {
3918         'url': 'https://www.youtube.com/watch?x-yt-cl=84503534',
3919         'only_matching': True,
3920     }, {
3921         'url': 'https://www.youtube.com/watch?feature=foo',
3922         'only_matching': True,
3923     }, {
3924         'url': 'https://www.youtube.com/watch?hl=en-GB',
3925         'only_matching': True,
3926     }, {
3927         'url': 'https://www.youtube.com/watch?t=2372',
3928         'only_matching': True,
3929     }]
3930
3931     def _real_extract(self, url):
3932         raise ExtractorError(
3933             'Did you forget to quote the URL? Remember that & is a meta '
3934             'character in most shells, so you want to put the URL in quotes, '
3935             'like  youtube-dl '
3936             '"https://www.youtube.com/watch?feature=foo&v=BaW_jenozKc" '
3937             ' or simply  youtube-dl BaW_jenozKc  .',
3938             expected=True)
3939
3940
3941 class YoutubeTruncatedIDIE(InfoExtractor):
3942     IE_NAME = 'youtube:truncated_id'
3943     IE_DESC = False  # Do not list
3944     _VALID_URL = r'https?://(?:www\.)?youtube\.com/watch\?v=(?P<id>[0-9A-Za-z_-]{1,10})$'
3945
3946     _TESTS = [{
3947         'url': 'https://www.youtube.com/watch?v=N_708QY7Ob',
3948         'only_matching': True,
3949     }]
3950
3951     def _real_extract(self, url):
3952         video_id = self._match_id(url)
3953         raise ExtractorError(
3954             'Incomplete YouTube ID %s. URL %s looks truncated.' % (video_id, url),
3955             expected=True)
3956
3957
3958 # Do Youtube show urls even exist anymore? I couldn't find any
3959 r'''
3960 class YoutubeShowIE(YoutubeTabIE):
3961     IE_DESC = 'YouTube.com (multi-season) shows'
3962     _VALID_URL = r'https?://(?:www\.)?youtube\.com/show/(?P<id>[^?#]*)'
3963     IE_NAME = 'youtube:show'
3964     _TESTS = [{
3965         'url': 'https://www.youtube.com/show/airdisasters',
3966         'playlist_mincount': 5,
3967         'info_dict': {
3968             'id': 'airdisasters',
3969             'title': 'Air Disasters',
3970         }
3971     }]
3972
3973     def _real_extract(self, url):
3974         playlist_id = self._match_id(url)
3975         return super(YoutubeShowIE, self)._real_extract(
3976             'https://www.youtube.com/show/%s/playlists' % playlist_id)
3977 '''