youtube_dlc/extractor/youtube.py

   1 # coding: utf-8
   2
   3 from __future__ import unicode_literals
   4
   5
   6 import itertools
   7 import json
   8 import os.path
   9 import random
  10 import re
  11 import time
  12 import traceback
  13
  14 from .common import InfoExtractor, SearchInfoExtractor
  15 from ..jsinterp import JSInterpreter
  16 from ..swfinterp import SWFInterpreter
  17 from ..compat import (
  18     compat_chr,
  19     compat_kwargs,
  20     compat_parse_qs,
  21     compat_urllib_parse_unquote,
  22     compat_urllib_parse_unquote_plus,
  23     compat_urllib_parse_urlencode,
  24     compat_urllib_parse_urlparse,
  25     compat_urlparse,
  26     compat_str,
  27 )
  28 from ..utils import (
  29     bool_or_none,
  30     clean_html,
  31     error_to_compat_str,
  32     ExtractorError,
  33     float_or_none,
  34     get_element_by_id,
  35     int_or_none,
  36     mimetype2ext,
  37     orderedSet,
  38     parse_codecs,
  39     parse_count,
  40     parse_duration,
  41     remove_quotes,
  42     remove_start,
  43     smuggle_url,
  44     str_or_none,
  45     str_to_int,
  46     try_get,
  47     unescapeHTML,
  48     unified_strdate,
  49     unsmuggle_url,
  50     update_url_query,
  51     uppercase_escape,
  52     url_or_none,
  53     urlencode_postdata,
  54     urljoin,
  55 )
  56
  57
  58 class YoutubeBaseInfoExtractor(InfoExtractor):
  59     """Provide base functions for Youtube extractors"""
  60     _LOGIN_URL = 'https://accounts.google.com/ServiceLogin'
  61     _TWOFACTOR_URL = 'https://accounts.google.com/signin/challenge'
  62
  63     _LOOKUP_URL = 'https://accounts.google.com/_/signin/sl/lookup'
  64     _CHALLENGE_URL = 'https://accounts.google.com/_/signin/sl/challenge'
  65     _TFA_URL = 'https://accounts.google.com/_/signin/challenge?hl=en&TL={0}'
  66
  67     _NETRC_MACHINE = 'youtube'
  68     # If True it will raise an error if no login info is provided
  69     _LOGIN_REQUIRED = False
  70
  71     _PLAYLIST_ID_RE = r'(?:(?:PL|LL|EC|UU|FL|RD|UL|TL|PU|OLAK5uy_)[0-9A-Za-z-_]{10,}|RDMM)'
  72
  73     _YOUTUBE_CLIENT_HEADERS = {
  74         'x-youtube-client-name': '1',
  75         'x-youtube-client-version': '1.20200609.04.02',
  76     }
  77
  78     def _set_language(self):
  79         self._set_cookie(
  80             '.youtube.com', 'PREF', 'f1=50000000&f6=8&hl=en',
  81             # YouTube sets the expire time to about two months
  82             expire_time=time.time() + 2 * 30 * 24 * 3600)
  83
  84     def _ids_to_results(self, ids):
  85         return [
  86             self.url_result(vid_id, 'Youtube', video_id=vid_id)
  87             for vid_id in ids]
  88
  89     def _login(self):
  90         """
  91         Attempt to log in to YouTube.
  92         True is returned if successful or skipped.
  93         False is returned if login failed.
  94
  95         If _LOGIN_REQUIRED is set and no authentication was provided, an error is raised.
  96         """
  97         username, password = self._get_login_info()
  98         # No authentication to be performed
  99         if username is None:
 100             if self._LOGIN_REQUIRED and self._downloader.params.get('cookiefile') is None:
 101                 raise ExtractorError('No login info available, needed for using %s.' % self.IE_NAME, expected=True)
 102             if self._downloader.params.get('cookiefile') and False:  # TODO remove 'and False' later - too many people using outdated cookies and open issues, remind them.
 103                 self.to_screen('[Cookies] Reminder - Make sure to always use up to date cookies!')
 104             return True
 105
 106         login_page = self._download_webpage(
 107             self._LOGIN_URL, None,
 108             note='Downloading login page',
 109             errnote='unable to fetch login page', fatal=False)
 110         if login_page is False:
 111             return
 112
 113         login_form = self._hidden_inputs(login_page)
 114
 115         def req(url, f_req, note, errnote):
 116             data = login_form.copy()
 117             data.update({
 118                 'pstMsg': 1,
 119                 'checkConnection': 'youtube',
 120                 'checkedDomains': 'youtube',
 121                 'hl': 'en',
 122                 'deviceinfo': '[null,null,null,[],null,"US",null,null,[],"GlifWebSignIn",null,[null,null,[]]]',
 123                 'f.req': json.dumps(f_req),
 124                 'flowName': 'GlifWebSignIn',
 125                 'flowEntry': 'ServiceLogin',
 126                 # TODO: reverse actual botguard identifier generation algo
 127                 'bgRequest': '["identifier",""]',
 128             })
 129             return self._download_json(
 130                 url, None, note=note, errnote=errnote,
 131                 transform_source=lambda s: re.sub(r'^[^[]*', '', s),
 132                 fatal=False,
 133                 data=urlencode_postdata(data), headers={
 134                     'Content-Type': 'application/x-www-form-urlencoded;charset=utf-8',
 135                     'Google-Accounts-XSRF': 1,
 136                 })
 137
 138         def warn(message):
 139             self._downloader.report_warning(message)
 140
 141         lookup_req = [
 142             username,
 143             None, [], None, 'US', None, None, 2, False, True,
 144             [
 145                 None, None,
 146                 [2, 1, None, 1,
 147                  'https://accounts.google.com/ServiceLogin?passive=true&continue=https%3A%2F%2Fwww.youtube.com%2Fsignin%3Fnext%3D%252F%26action_handle_signin%3Dtrue%26hl%3Den%26app%3Ddesktop%26feature%3Dsign_in_button&hl=en&service=youtube&uilel=3&requestPath=%2FServiceLogin&Page=PasswordSeparationSignIn',
 148                  None, [], 4],
 149                 1, [None, None, []], None, None, None, True
 150             ],
 151             username,
 152         ]
 153
 154         lookup_results = req(
 155             self._LOOKUP_URL, lookup_req,
 156             'Looking up account info', 'Unable to look up account info')
 157
 158         if lookup_results is False:
 159             return False
 160
 161         user_hash = try_get(lookup_results, lambda x: x[0][2], compat_str)
 162         if not user_hash:
 163             warn('Unable to extract user hash')
 164             return False
 165
 166         challenge_req = [
 167             user_hash,
 168             None, 1, None, [1, None, None, None, [password, None, True]],
 169             [
 170                 None, None, [2, 1, None, 1, 'https://accounts.google.com/ServiceLogin?passive=true&continue=https%3A%2F%2Fwww.youtube.com%2Fsignin%3Fnext%3D%252F%26action_handle_signin%3Dtrue%26hl%3Den%26app%3Ddesktop%26feature%3Dsign_in_button&hl=en&service=youtube&uilel=3&requestPath=%2FServiceLogin&Page=PasswordSeparationSignIn', None, [], 4],
 171                 1, [None, None, []], None, None, None, True
 172             ]]
 173
 174         challenge_results = req(
 175             self._CHALLENGE_URL, challenge_req,
 176             'Logging in', 'Unable to log in')
 177
 178         if challenge_results is False:
 179             return
 180
 181         login_res = try_get(challenge_results, lambda x: x[0][5], list)
 182         if login_res:
 183             login_msg = try_get(login_res, lambda x: x[5], compat_str)
 184             warn(
 185                 'Unable to login: %s' % 'Invalid password'
 186                 if login_msg == 'INCORRECT_ANSWER_ENTERED' else login_msg)
 187             return False
 188
 189         res = try_get(challenge_results, lambda x: x[0][-1], list)
 190         if not res:
 191             warn('Unable to extract result entry')
 192             return False
 193
 194         login_challenge = try_get(res, lambda x: x[0][0], list)
 195         if login_challenge:
 196             challenge_str = try_get(login_challenge, lambda x: x[2], compat_str)
 197             if challenge_str == 'TWO_STEP_VERIFICATION':
 198                 # SEND_SUCCESS - TFA code has been successfully sent to phone
 199                 # QUOTA_EXCEEDED - reached the limit of TFA codes
 200                 status = try_get(login_challenge, lambda x: x[5], compat_str)
 201                 if status == 'QUOTA_EXCEEDED':
 202                     warn('Exceeded the limit of TFA codes, try later')
 203                     return False
 204
 205                 tl = try_get(challenge_results, lambda x: x[1][2], compat_str)
 206                 if not tl:
 207                     warn('Unable to extract TL')
 208                     return False
 209
 210                 tfa_code = self._get_tfa_info('2-step verification code')
 211
 212                 if not tfa_code:
 213                     warn(
 214                         'Two-factor authentication required. Provide it either interactively or with --twofactor <code>'
 215                         '(Note that only TOTP (Google Authenticator App) codes work at this time.)')
 216                     return False
 217
 218                 tfa_code = remove_start(tfa_code, 'G-')
 219
 220                 tfa_req = [
 221                     user_hash, None, 2, None,
 222                     [
 223                         9, None, None, None, None, None, None, None,
 224                         [None, tfa_code, True, 2]
 225                     ]]
 226
 227                 tfa_results = req(
 228                     self._TFA_URL.format(tl), tfa_req,
 229                     'Submitting TFA code', 'Unable to submit TFA code')
 230
 231                 if tfa_results is False:
 232                     return False
 233
 234                 tfa_res = try_get(tfa_results, lambda x: x[0][5], list)
 235                 if tfa_res:
 236                     tfa_msg = try_get(tfa_res, lambda x: x[5], compat_str)
 237                     warn(
 238                         'Unable to finish TFA: %s' % 'Invalid TFA code'
 239                         if tfa_msg == 'INCORRECT_ANSWER_ENTERED' else tfa_msg)
 240                     return False
 241
 242                 check_cookie_url = try_get(
 243                     tfa_results, lambda x: x[0][-1][2], compat_str)
 244             else:
 245                 CHALLENGES = {
 246                     'LOGIN_CHALLENGE': "This device isn't recognized. For your security, Google wants to make sure it's really you.",
 247                     'USERNAME_RECOVERY': 'Please provide additional information to aid in the recovery process.',
 248                     'REAUTH': "There is something unusual about your activity. For your security, Google wants to make sure it's really you.",
 249                 }
 250                 challenge = CHALLENGES.get(
 251                     challenge_str,
 252                     '%s returned error %s.' % (self.IE_NAME, challenge_str))
 253                 warn('%s\nGo to https://accounts.google.com/, login and solve a challenge.' % challenge)
 254                 return False
 255         else:
 256             check_cookie_url = try_get(res, lambda x: x[2], compat_str)
 257
 258         if not check_cookie_url:
 259             warn('Unable to extract CheckCookie URL')
 260             return False
 261
 262         check_cookie_results = self._download_webpage(
 263             check_cookie_url, None, 'Checking cookie', fatal=False)
 264
 265         if check_cookie_results is False:
 266             return False
 267
 268         if 'https://myaccount.google.com/' not in check_cookie_results:
 269             warn('Unable to log in')
 270             return False
 271
 272         return True
 273
 274     def _download_webpage_handle(self, *args, **kwargs):
 275         query = kwargs.get('query', {}).copy()
 276         kwargs['query'] = query
 277         return super(YoutubeBaseInfoExtractor, self)._download_webpage_handle(
 278             *args, **compat_kwargs(kwargs))
 279
 280     def _get_yt_initial_data(self, video_id, webpage):
 281         config = self._search_regex(
 282             (r'window\["ytInitialData"\]\s*=\s*(.*?)(?<=});',
 283              r'var\s+ytInitialData\s*=\s*(.*?)(?<=});'),
 284             webpage, 'ytInitialData', default=None)
 285         if config:
 286             return self._parse_json(
 287                 uppercase_escape(config), video_id, fatal=False)
 288
 289     def _real_initialize(self):
 290         if self._downloader is None:
 291             return
 292         self._set_language()
 293         if not self._login():
 294             return
 295
 296     _DEFAULT_API_DATA = {
 297         'context': {
 298             'client': {
 299                 'clientName': 'WEB',
 300                 'clientVersion': '2.20201021.03.00',
 301             }
 302         },
 303     }
 304
 305     def _call_api(self, ep, query, video_id):
 306         data = self._DEFAULT_API_DATA.copy()
 307         data.update(query)
 308
 309         response = self._download_json(
 310             'https://www.youtube.com/youtubei/v1/%s' % ep, video_id=video_id,
 311             note='Downloading API JSON', errnote='Unable to download API page',
 312             data=json.dumps(data).encode('utf8'),
 313             headers={'content-type': 'application/json'},
 314             query={'key': 'AIzaSyAO_FJ2SlqU8Q4STEHLGCilw_Y9_11qcW8'})
 315
 316         return response
 317
 318     def _extract_yt_initial_data(self, video_id, webpage):
 319         return self._parse_json(
 320             self._search_regex(
 321                 r'(?:window\s*\[\s*["\']ytInitialData["\']\s*\]|ytInitialData)\s*=\s*({.+?})\s*;',
 322                 webpage, 'yt initial data'),
 323             video_id)
 324
 325
 326 class YoutubeIE(YoutubeBaseInfoExtractor):
 327     IE_DESC = 'YouTube.com'
 328     _VALID_URL = r"""(?x)^
 329                      (
 330                          (?:https?://|//)                                    # http(s):// or protocol-independent URL
 331                          (?:(?:(?:(?:\w+\.)?[yY][oO][uU][tT][uU][bB][eE](?:-nocookie|kids)?\.com/|
 332                             (?:www\.)?deturl\.com/www\.youtube\.com/|
 333                             (?:www\.)?pwnyoutube\.com/|
 334                             (?:www\.)?hooktube\.com/|
 335                             (?:www\.)?yourepeat\.com/|
 336                             tube\.majestyc\.net/|
 337                             # Invidious instances taken from https://github.com/omarroth/invidious/wiki/Invidious-Instances
 338                             (?:(?:www|dev)\.)?invidio\.us/|
 339                             (?:(?:www|no)\.)?invidiou\.sh/|
 340                             (?:(?:www|fi|de)\.)?invidious\.snopyta\.org/|
 341                             (?:www\.)?invidious\.kabi\.tk/|
 342                             (?:www\.)?invidious\.13ad\.de/|
 343                             (?:www\.)?invidious\.mastodon\.host/|
 344                             (?:www\.)?invidious\.nixnet\.xyz/|
 345                             (?:www\.)?invidious\.drycat\.fr/|
 346                             (?:www\.)?tube\.poal\.co/|
 347                             (?:www\.)?vid\.wxzm\.sx/|
 348                             (?:www\.)?yewtu\.be/|
 349                             (?:www\.)?yt\.elukerio\.org/|
 350                             (?:www\.)?yt\.lelux\.fi/|
 351                             (?:www\.)?invidious\.ggc-project\.de/|
 352                             (?:www\.)?yt\.maisputain\.ovh/|
 353                             (?:www\.)?invidious\.13ad\.de/|
 354                             (?:www\.)?invidious\.toot\.koeln/|
 355                             (?:www\.)?invidious\.fdn\.fr/|
 356                             (?:www\.)?watch\.nettohikari\.com/|
 357                             (?:www\.)?kgg2m7yk5aybusll\.onion/|
 358                             (?:www\.)?qklhadlycap4cnod\.onion/|
 359                             (?:www\.)?axqzx4s6s54s32yentfqojs3x5i7faxza6xo3ehd4bzzsg2ii4fv2iid\.onion/|
 360                             (?:www\.)?c7hqkpkpemu6e7emz5b4vyz7idjgdvgaaa3dyimmeojqbgpea3xqjoid\.onion/|
 361                             (?:www\.)?fz253lmuao3strwbfbmx46yu7acac2jz27iwtorgmbqlkurlclmancad\.onion/|
 362                             (?:www\.)?invidious\.l4qlywnpwqsluw65ts7md3khrivpirse744un3x7mlskqauz5pyuzgqd\.onion/|
 363                             (?:www\.)?owxfohz4kjyv25fvlqilyxast7inivgiktls3th44jhk3ej3i7ya\.b32\.i2p/|
 364                             (?:www\.)?4l2dgddgsrkf2ous66i6seeyi6etzfgrue332grh2n7madpwopotugyd\.onion/|
 365                             youtube\.googleapis\.com/)                        # the various hostnames, with wildcard subdomains
 366                          (?:.*?\#/)?                                          # handle anchor (#/) redirect urls
 367                          (?:                                                  # the various things that can precede the ID:
 368                              (?:(?:v|embed|e)/(?!videoseries))                # v/ or embed/ or e/
 369                              |(?:                                             # or the v= param in all its forms
 370                                  (?:(?:watch|movie)(?:_popup)?(?:\.php)?/?)?  # preceding watch(_popup|.php) or nothing (like /?v=xxxx)
 371                                  (?:\?|\#!?)                                  # the params delimiter ? or # or #!
 372                                  (?:.*?[&;])??                                # any other preceding param (like /?s=tuff&v=xxxx or ?s=tuff&amp;v=V36LpHqtcDY)
 373                                  v=
 374                              )
 375                          ))
 376                          |(?:
 377                             youtu\.be|                                        # just youtu.be/xxxx
 378                             vid\.plus|                                        # or vid.plus/xxxx
 379                             zwearz\.com/watch|                                # or zwearz.com/watch/xxxx
 380                          )/
 381                          |(?:www\.)?cleanvideosearch\.com/media/action/yt/watch\?videoId=
 382                          )
 383                      )?                                                       # all until now is optional -> you can pass the naked ID
 384                      (?P<id>[0-9A-Za-z_-]{11})                                      # here is it! the YouTube video ID
 385                      (?!.*?\blist=
 386                         (?:
 387                             %(playlist_id)s|                                  # combined list/video URLs are handled by the playlist IE
 388                             WL                                                # WL are handled by the watch later IE
 389                         )
 390                      )
 391                      (?(1).+)?                                                # if we found the ID, everything can follow
 392                      $""" % {'playlist_id': YoutubeBaseInfoExtractor._PLAYLIST_ID_RE}
 393     _NEXT_URL_RE = r'[\?&]next_url=([^&]+)'
 394     _PLAYER_INFO_RE = (
 395         r'/(?P<id>[a-zA-Z0-9_-]{8,})/player_ias\.vflset(?:/[a-zA-Z]{2,3}_[a-zA-Z]{2,3})?/base\.(?P<ext>[a-z]+)$',
 396         r'\b(?P<id>vfl[a-zA-Z0-9_-]+)\b.*?\.(?P<ext>[a-z]+)$',
 397     )
 398     _formats = {
 399         '5': {'ext': 'flv', 'width': 400, 'height': 240, 'acodec': 'mp3', 'abr': 64, 'vcodec': 'h263'},
 400         '6': {'ext': 'flv', 'width': 450, 'height': 270, 'acodec': 'mp3', 'abr': 64, 'vcodec': 'h263'},
 401         '13': {'ext': '3gp', 'acodec': 'aac', 'vcodec': 'mp4v'},
 402         '17': {'ext': '3gp', 'width': 176, 'height': 144, 'acodec': 'aac', 'abr': 24, 'vcodec': 'mp4v'},
 403         '18': {'ext': 'mp4', 'width': 640, 'height': 360, 'acodec': 'aac', 'abr': 96, 'vcodec': 'h264'},
 404         '22': {'ext': 'mp4', 'width': 1280, 'height': 720, 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264'},
 405         '34': {'ext': 'flv', 'width': 640, 'height': 360, 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264'},
 406         '35': {'ext': 'flv', 'width': 854, 'height': 480, 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264'},
 407         # itag 36 videos are either 320x180 (BaW_jenozKc) or 320x240 (__2ABJjxzNo), abr varies as well
 408         '36': {'ext': '3gp', 'width': 320, 'acodec': 'aac', 'vcodec': 'mp4v'},
 409         '37': {'ext': 'mp4', 'width': 1920, 'height': 1080, 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264'},
 410         '38': {'ext': 'mp4', 'width': 4096, 'height': 3072, 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264'},
 411         '43': {'ext': 'webm', 'width': 640, 'height': 360, 'acodec': 'vorbis', 'abr': 128, 'vcodec': 'vp8'},
 412         '44': {'ext': 'webm', 'width': 854, 'height': 480, 'acodec': 'vorbis', 'abr': 128, 'vcodec': 'vp8'},
 413         '45': {'ext': 'webm', 'width': 1280, 'height': 720, 'acodec': 'vorbis', 'abr': 192, 'vcodec': 'vp8'},
 414         '46': {'ext': 'webm', 'width': 1920, 'height': 1080, 'acodec': 'vorbis', 'abr': 192, 'vcodec': 'vp8'},
 415         '59': {'ext': 'mp4', 'width': 854, 'height': 480, 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264'},
 416         '78': {'ext': 'mp4', 'width': 854, 'height': 480, 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264'},
 417
 418
 419         # 3D videos
 420         '82': {'ext': 'mp4', 'height': 360, 'format_note': '3D', 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264', 'preference': -20},
 421         '83': {'ext': 'mp4', 'height': 480, 'format_note': '3D', 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264', 'preference': -20},
 422         '84': {'ext': 'mp4', 'height': 720, 'format_note': '3D', 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264', 'preference': -20},
 423         '85': {'ext': 'mp4', 'height': 1080, 'format_note': '3D', 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264', 'preference': -20},
 424         '100': {'ext': 'webm', 'height': 360, 'format_note': '3D', 'acodec': 'vorbis', 'abr': 128, 'vcodec': 'vp8', 'preference': -20},
 425         '101': {'ext': 'webm', 'height': 480, 'format_note': '3D', 'acodec': 'vorbis', 'abr': 192, 'vcodec': 'vp8', 'preference': -20},
 426         '102': {'ext': 'webm', 'height': 720, 'format_note': '3D', 'acodec': 'vorbis', 'abr': 192, 'vcodec': 'vp8', 'preference': -20},
 427
 428         # Apple HTTP Live Streaming
 429         '91': {'ext': 'mp4', 'height': 144, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 48, 'vcodec': 'h264', 'preference': -10},
 430         '92': {'ext': 'mp4', 'height': 240, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 48, 'vcodec': 'h264', 'preference': -10},
 431         '93': {'ext': 'mp4', 'height': 360, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264', 'preference': -10},
 432         '94': {'ext': 'mp4', 'height': 480, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264', 'preference': -10},
 433         '95': {'ext': 'mp4', 'height': 720, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 256, 'vcodec': 'h264', 'preference': -10},
 434         '96': {'ext': 'mp4', 'height': 1080, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 256, 'vcodec': 'h264', 'preference': -10},
 435         '132': {'ext': 'mp4', 'height': 240, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 48, 'vcodec': 'h264', 'preference': -10},
 436         '151': {'ext': 'mp4', 'height': 72, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 24, 'vcodec': 'h264', 'preference': -10},
 437
 438         # DASH mp4 video
 439         '133': {'ext': 'mp4', 'height': 240, 'format_note': 'DASH video', 'vcodec': 'h264'},
 440         '134': {'ext': 'mp4', 'height': 360, 'format_note': 'DASH video', 'vcodec': 'h264'},
 441         '135': {'ext': 'mp4', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'h264'},
 442         '136': {'ext': 'mp4', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'h264'},
 443         '137': {'ext': 'mp4', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'h264'},
 444         '138': {'ext': 'mp4', 'format_note': 'DASH video', 'vcodec': 'h264'},  # Height can vary (https://github.com/ytdl-org/youtube-dl/issues/4559)
 445         '160': {'ext': 'mp4', 'height': 144, 'format_note': 'DASH video', 'vcodec': 'h264'},
 446         '212': {'ext': 'mp4', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'h264'},
 447         '264': {'ext': 'mp4', 'height': 1440, 'format_note': 'DASH video', 'vcodec': 'h264'},
 448         '298': {'ext': 'mp4', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'h264', 'fps': 60},
 449         '299': {'ext': 'mp4', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'h264', 'fps': 60},
 450         '266': {'ext': 'mp4', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'h264'},
 451
 452         # Dash mp4 audio
 453         '139': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'abr': 48, 'container': 'm4a_dash'},
 454         '140': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'abr': 128, 'container': 'm4a_dash'},
 455         '141': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'abr': 256, 'container': 'm4a_dash'},
 456         '256': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'container': 'm4a_dash'},
 457         '258': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'container': 'm4a_dash'},
 458         '325': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'dtse', 'container': 'm4a_dash'},
 459         '328': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'ec-3', 'container': 'm4a_dash'},
 460
 461         # Dash webm
 462         '167': {'ext': 'webm', 'height': 360, 'width': 640, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
 463         '168': {'ext': 'webm', 'height': 480, 'width': 854, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
 464         '169': {'ext': 'webm', 'height': 720, 'width': 1280, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
 465         '170': {'ext': 'webm', 'height': 1080, 'width': 1920, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
 466         '218': {'ext': 'webm', 'height': 480, 'width': 854, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
 467         '219': {'ext': 'webm', 'height': 480, 'width': 854, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
 468         '278': {'ext': 'webm', 'height': 144, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp9'},
 469         '242': {'ext': 'webm', 'height': 240, 'format_note': 'DASH video', 'vcodec': 'vp9'},
 470         '243': {'ext': 'webm', 'height': 360, 'format_note': 'DASH video', 'vcodec': 'vp9'},
 471         '244': {'ext': 'webm', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'vp9'},
 472         '245': {'ext': 'webm', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'vp9'},
 473         '246': {'ext': 'webm', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'vp9'},
 474         '247': {'ext': 'webm', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'vp9'},
 475         '248': {'ext': 'webm', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'vp9'},
 476         '271': {'ext': 'webm', 'height': 1440, 'format_note': 'DASH video', 'vcodec': 'vp9'},
 477         # itag 272 videos are either 3840x2160 (e.g. RtoitU2A-3E) or 7680x4320 (sLprVF6d7Ug)
 478         '272': {'ext': 'webm', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'vp9'},
 479         '302': {'ext': 'webm', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'vp9', 'fps': 60},
 480         '303': {'ext': 'webm', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'vp9', 'fps': 60},
 481         '308': {'ext': 'webm', 'height': 1440, 'format_note': 'DASH video', 'vcodec': 'vp9', 'fps': 60},
 482         '313': {'ext': 'webm', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'vp9'},
 483         '315': {'ext': 'webm', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'vp9', 'fps': 60},
 484
 485         # Dash webm audio
 486         '171': {'ext': 'webm', 'acodec': 'vorbis', 'format_note': 'DASH audio', 'abr': 128},
 487         '172': {'ext': 'webm', 'acodec': 'vorbis', 'format_note': 'DASH audio', 'abr': 256},
 488
 489         # Dash webm audio with opus inside
 490         '249': {'ext': 'webm', 'format_note': 'DASH audio', 'acodec': 'opus', 'abr': 50},
 491         '250': {'ext': 'webm', 'format_note': 'DASH audio', 'acodec': 'opus', 'abr': 70},
 492         '251': {'ext': 'webm', 'format_note': 'DASH audio', 'acodec': 'opus', 'abr': 160},
 493
 494         # RTMP (unnamed)
 495         '_rtmp': {'protocol': 'rtmp'},
 496
 497         # av01 video only formats sometimes served with "unknown" codecs
 498         '394': {'acodec': 'none', 'vcodec': 'av01.0.05M.08'},
 499         '395': {'acodec': 'none', 'vcodec': 'av01.0.05M.08'},
 500         '396': {'acodec': 'none', 'vcodec': 'av01.0.05M.08'},
 501         '397': {'acodec': 'none', 'vcodec': 'av01.0.05M.08'},
 502     }
 503     _SUBTITLE_FORMATS = ('json3', 'srv1', 'srv2', 'srv3', 'ttml', 'vtt')
 504
 505     _GEO_BYPASS = False
 506
 507     IE_NAME = 'youtube'
 508     _TESTS = [
 509         {
 510             'url': 'https://www.youtube.com/watch?v=BaW_jenozKc&t=1s&end=9',
 511             'info_dict': {
 512                 'id': 'BaW_jenozKc',
 513                 'ext': 'mp4',
 514                 'title': 'youtube-dl test video "\'/\\ä↭𝕐',
 515                 'uploader': 'Philipp Hagemeister',
 516                 'uploader_id': 'phihag',
 517                 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/phihag',
 518                 'channel_id': 'UCLqxVugv74EIW3VWh2NOa3Q',
 519                 'channel_url': r're:https?://(?:www\.)?youtube\.com/channel/UCLqxVugv74EIW3VWh2NOa3Q',
 520                 'upload_date': '20121002',
 521                 'description': 'test chars:  "\'/\\ä↭𝕐\ntest URL: https://github.com/rg3/youtube-dl/issues/1892\n\nThis is a test video for youtube-dl.\n\nFor more information, contact phihag@phihag.de .',
 522                 'categories': ['Science & Technology'],
 523                 'tags': ['youtube-dl'],
 524                 'duration': 10,
 525                 'view_count': int,
 526                 'like_count': int,
 527                 'dislike_count': int,
 528                 'start_time': 1,
 529                 'end_time': 9,
 530             }
 531         },
 532         {
 533             'url': '//www.YouTube.com/watch?v=yZIXLfi8CZQ',
 534             'note': 'Embed-only video (#1746)',
 535             'info_dict': {
 536                 'id': 'yZIXLfi8CZQ',
 537                 'ext': 'mp4',
 538                 'upload_date': '20120608',
 539                 'title': 'Principal Sexually Assaults A Teacher - Episode 117 - 8th June 2012',
 540                 'description': 'md5:09b78bd971f1e3e289601dfba15ca4f7',
 541                 'uploader': 'SET India',
 542                 'uploader_id': 'setindia',
 543                 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/setindia',
 544                 'age_limit': 18,
 545             }
 546         },
 547         {
 548             'url': 'https://www.youtube.com/watch?v=BaW_jenozKc&v=yZIXLfi8CZQ',
 549             'note': 'Use the first video ID in the URL',
 550             'info_dict': {
 551                 'id': 'BaW_jenozKc',
 552                 'ext': 'mp4',
 553                 'title': 'youtube-dl test video "\'/\\ä↭𝕐',
 554                 'uploader': 'Philipp Hagemeister',
 555                 'uploader_id': 'phihag',
 556                 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/phihag',
 557                 'upload_date': '20121002',
 558                 'description': 'test chars:  "\'/\\ä↭𝕐\ntest URL: https://github.com/rg3/youtube-dl/issues/1892\n\nThis is a test video for youtube-dl.\n\nFor more information, contact phihag@phihag.de .',
 559                 'categories': ['Science & Technology'],
 560                 'tags': ['youtube-dl'],
 561                 'duration': 10,
 562                 'view_count': int,
 563                 'like_count': int,
 564                 'dislike_count': int,
 565             },
 566             'params': {
 567                 'skip_download': True,
 568             },
 569         },
 570         {
 571             'url': 'https://www.youtube.com/watch?v=a9LDPn-MO4I',
 572             'note': '256k DASH audio (format 141) via DASH manifest',
 573             'info_dict': {
 574                 'id': 'a9LDPn-MO4I',
 575                 'ext': 'm4a',
 576                 'upload_date': '20121002',
 577                 'uploader_id': '8KVIDEO',
 578                 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/8KVIDEO',
 579                 'description': '',
 580                 'uploader': '8KVIDEO',
 581                 'title': 'UHDTV TEST 8K VIDEO.mp4'
 582             },
 583             'params': {
 584                 'youtube_include_dash_manifest': True,
 585                 'format': '141',
 586             },
 587             'skip': 'format 141 not served anymore',
 588         },
 589         # DASH manifest with encrypted signature
 590         {
 591             'url': 'https://www.youtube.com/watch?v=IB3lcPjvWLA',
 592             'info_dict': {
 593                 'id': 'IB3lcPjvWLA',
 594                 'ext': 'm4a',
 595                 'title': 'Afrojack, Spree Wilson - The Spark (Official Music Video) ft. Spree Wilson',
 596                 'description': 'md5:8f5e2b82460520b619ccac1f509d43bf',
 597                 'duration': 244,
 598                 'uploader': 'AfrojackVEVO',
 599                 'uploader_id': 'AfrojackVEVO',
 600                 'upload_date': '20131011',
 601             },
 602             'params': {
 603                 'youtube_include_dash_manifest': True,
 604                 'format': '141/bestaudio[ext=m4a]',
 605             },
 606         },
 607         # Controversy video
 608         {
 609             'url': 'https://www.youtube.com/watch?v=T4XJQO3qol8',
 610             'info_dict': {
 611                 'id': 'T4XJQO3qol8',
 612                 'ext': 'mp4',
 613                 'duration': 219,
 614                 'upload_date': '20100909',
 615                 'uploader': 'Amazing Atheist',
 616                 'uploader_id': 'TheAmazingAtheist',
 617                 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/TheAmazingAtheist',
 618                 'title': 'Burning Everyone\'s Koran',
 619                 'description': 'SUBSCRIBE: http://www.youtube.com/saturninefilms\n\nEven Obama has taken a stand against freedom on this issue: http://www.huffingtonpost.com/2010/09/09/obama-gma-interview-quran_n_710282.html',
 620             }
 621         },
 622         # Normal age-gate video (embed allowed)
 623         {
 624             'url': 'https://youtube.com/watch?v=HtVdAasjOgU',
 625             'info_dict': {
 626                 'id': 'HtVdAasjOgU',
 627                 'ext': 'mp4',
 628                 'title': 'The Witcher 3: Wild Hunt - The Sword Of Destiny Trailer',
 629                 'description': r're:(?s).{100,}About the Game\n.*?The Witcher 3: Wild Hunt.{100,}',
 630                 'duration': 142,
 631                 'uploader': 'The Witcher',
 632                 'uploader_id': 'WitcherGame',
 633                 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/WitcherGame',
 634                 'upload_date': '20140605',
 635                 'age_limit': 18,
 636             },
 637         },
 638         # video_info is None (https://github.com/ytdl-org/youtube-dl/issues/4421)
 639         # YouTube Red ad is not captured for creator
 640         {
 641             'url': '__2ABJjxzNo',
 642             'info_dict': {
 643                 'id': '__2ABJjxzNo',
 644                 'ext': 'mp4',
 645                 'duration': 266,
 646                 'upload_date': '20100430',
 647                 'uploader_id': 'deadmau5',
 648                 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/deadmau5',
 649                 'creator': 'Dada Life, deadmau5',
 650                 'description': 'md5:12c56784b8032162bb936a5f76d55360',
 651                 'uploader': 'deadmau5',
 652                 'title': 'Deadmau5 - Some Chords (HD)',
 653                 'alt_title': 'This Machine Kills Some Chords',
 654             },
 655             'expected_warnings': [
 656                 'DASH manifest missing',
 657             ]
 658         },
 659         # Olympics (https://github.com/ytdl-org/youtube-dl/issues/4431)
 660         {
 661             'url': 'lqQg6PlCWgI',
 662             'info_dict': {
 663                 'id': 'lqQg6PlCWgI',
 664                 'ext': 'mp4',
 665                 'duration': 6085,
 666                 'upload_date': '20150827',
 667                 'uploader_id': 'olympic',
 668                 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/olympic',
 669                 'description': 'HO09  - Women -  GER-AUS - Hockey - 31 July 2012 - London 2012 Olympic Games',
 670                 'uploader': 'Olympic',
 671                 'title': 'Hockey - Women -  GER-AUS - London 2012 Olympic Games',
 672             },
 673             'params': {
 674                 'skip_download': 'requires avconv',
 675             }
 676         },
 677         # Non-square pixels
 678         {
 679             'url': 'https://www.youtube.com/watch?v=_b-2C3KPAM0',
 680             'info_dict': {
 681                 'id': '_b-2C3KPAM0',
 682                 'ext': 'mp4',
 683                 'stretched_ratio': 16 / 9.,
 684                 'duration': 85,
 685                 'upload_date': '20110310',
 686                 'uploader_id': 'AllenMeow',
 687                 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/AllenMeow',
 688                 'description': 'made by Wacom from Korea | 字幕&加油添醋 by TY\'s Allen | 感謝heylisa00cavey1001同學熱情提供梗及翻譯',
 689                 'uploader': '孫ᄋᄅ',
 690                 'title': '[A-made] 變態妍字幕版 太妍 我就是這樣的人',
 691             },
 692         },
 693         # url_encoded_fmt_stream_map is empty string
 694         {
 695             'url': 'qEJwOuvDf7I',
 696             'info_dict': {
 697                 'id': 'qEJwOuvDf7I',
 698                 'ext': 'webm',
 699                 'title': 'Обсуждение судебной практики по выборам 14 сентября 2014 года в Санкт-Петербурге',
 700                 'description': '',
 701                 'upload_date': '20150404',
 702                 'uploader_id': 'spbelect',
 703                 'uploader': 'Наблюдатели Петербурга',
 704             },
 705             'params': {
 706                 'skip_download': 'requires avconv',
 707             },
 708             'skip': 'This live event has ended.',
 709         },
 710         # Extraction from multiple DASH manifests (https://github.com/ytdl-org/youtube-dl/pull/6097)
 711         {
 712             'url': 'https://www.youtube.com/watch?v=FIl7x6_3R5Y',
 713             'info_dict': {
 714                 'id': 'FIl7x6_3R5Y',
 715                 'ext': 'webm',
 716                 'title': 'md5:7b81415841e02ecd4313668cde88737a',
 717                 'description': 'md5:116377fd2963b81ec4ce64b542173306',
 718                 'duration': 220,
 719                 'upload_date': '20150625',
 720                 'uploader_id': 'dorappi2000',
 721                 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/dorappi2000',
 722                 'uploader': 'dorappi2000',
 723                 'formats': 'mincount:31',
 724             },
 725             'skip': 'not actual anymore',
 726         },
 727         # DASH manifest with segment_list
 728         {
 729             'url': 'https://www.youtube.com/embed/CsmdDsKjzN8',
 730             'md5': '8ce563a1d667b599d21064e982ab9e31',
 731             'info_dict': {
 732                 'id': 'CsmdDsKjzN8',
 733                 'ext': 'mp4',
 734                 'upload_date': '20150501',  # According to '<meta itemprop="datePublished"', but in other places it's 20150510
 735                 'uploader': 'Airtek',
 736                 'description': 'Retransmisión en directo de la XVIII media maratón de Zaragoza.',
 737                 'uploader_id': 'UCzTzUmjXxxacNnL8I3m4LnQ',
 738                 'title': 'Retransmisión XVIII Media maratón Zaragoza 2015',
 739             },
 740             'params': {
 741                 'youtube_include_dash_manifest': True,
 742                 'format': '135',  # bestvideo
 743             },
 744             'skip': 'This live event has ended.',
 745         },
 746         {
 747             # Multifeed videos (multiple cameras), URL is for Main Camera
 748             'url': 'https://www.youtube.com/watch?v=jqWvoWXjCVs',
 749             'info_dict': {
 750                 'id': 'jqWvoWXjCVs',
 751                 'title': 'teamPGP: Rocket League Noob Stream',
 752                 'description': 'md5:dc7872fb300e143831327f1bae3af010',
 753             },
 754             'playlist': [{
 755                 'info_dict': {
 756                     'id': 'jqWvoWXjCVs',
 757                     'ext': 'mp4',
 758                     'title': 'teamPGP: Rocket League Noob Stream (Main Camera)',
 759                     'description': 'md5:dc7872fb300e143831327f1bae3af010',
 760                     'duration': 7335,
 761                     'upload_date': '20150721',
 762                     'uploader': 'Beer Games Beer',
 763                     'uploader_id': 'beergamesbeer',
 764                     'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/beergamesbeer',
 765                     'license': 'Standard YouTube License',
 766                 },
 767             }, {
 768                 'info_dict': {
 769                     'id': '6h8e8xoXJzg',
 770                     'ext': 'mp4',
 771                     'title': 'teamPGP: Rocket League Noob Stream (kreestuh)',
 772                     'description': 'md5:dc7872fb300e143831327f1bae3af010',
 773                     'duration': 7337,
 774                     'upload_date': '20150721',
 775                     'uploader': 'Beer Games Beer',
 776                     'uploader_id': 'beergamesbeer',
 777                     'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/beergamesbeer',
 778                     'license': 'Standard YouTube License',
 779                 },
 780             }, {
 781                 'info_dict': {
 782                     'id': 'PUOgX5z9xZw',
 783                     'ext': 'mp4',
 784                     'title': 'teamPGP: Rocket League Noob Stream (grizzle)',
 785                     'description': 'md5:dc7872fb300e143831327f1bae3af010',
 786                     'duration': 7337,
 787                     'upload_date': '20150721',
 788                     'uploader': 'Beer Games Beer',
 789                     'uploader_id': 'beergamesbeer',
 790                     'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/beergamesbeer',
 791                     'license': 'Standard YouTube License',
 792                 },
 793             }, {
 794                 'info_dict': {
 795                     'id': 'teuwxikvS5k',
 796                     'ext': 'mp4',
 797                     'title': 'teamPGP: Rocket League Noob Stream (zim)',
 798                     'description': 'md5:dc7872fb300e143831327f1bae3af010',
 799                     'duration': 7334,
 800                     'upload_date': '20150721',
 801                     'uploader': 'Beer Games Beer',
 802                     'uploader_id': 'beergamesbeer',
 803                     'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/beergamesbeer',
 804                     'license': 'Standard YouTube License',
 805                 },
 806             }],
 807             'params': {
 808                 'skip_download': True,
 809             },
 810             'skip': 'This video is not available.',
 811         },
 812         {
 813             # Multifeed video with comma in title (see https://github.com/ytdl-org/youtube-dl/issues/8536)
 814             'url': 'https://www.youtube.com/watch?v=gVfLd0zydlo',
 815             'info_dict': {
 816                 'id': 'gVfLd0zydlo',
 817                 'title': 'DevConf.cz 2016 Day 2 Workshops 1 14:00 - 15:30',
 818             },
 819             'playlist_count': 2,
 820             'skip': 'Not multifeed anymore',
 821         },
 822         {
 823             'url': 'https://vid.plus/FlRa-iH7PGw',
 824             'only_matching': True,
 825         },
 826         {
 827             'url': 'https://zwearz.com/watch/9lWxNJF-ufM/electra-woman-dyna-girl-official-trailer-grace-helbig.html',
 828             'only_matching': True,
 829         },
 830         {
 831             # Title with JS-like syntax "};" (see https://github.com/ytdl-org/youtube-dl/issues/7468)
 832             # Also tests cut-off URL expansion in video description (see
 833             # https://github.com/ytdl-org/youtube-dl/issues/1892,
 834             # https://github.com/ytdl-org/youtube-dl/issues/8164)
 835             'url': 'https://www.youtube.com/watch?v=lsguqyKfVQg',
 836             'info_dict': {
 837                 'id': 'lsguqyKfVQg',
 838                 'ext': 'mp4',
 839                 'title': '{dark walk}; Loki/AC/Dishonored; collab w/Elflover21',
 840                 'alt_title': 'Dark Walk - Position Music',
 841                 'description': 'md5:8085699c11dc3f597ce0410b0dcbb34a',
 842                 'duration': 133,
 843                 'upload_date': '20151119',
 844                 'uploader_id': 'IronSoulElf',
 845                 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/IronSoulElf',
 846                 'uploader': 'IronSoulElf',
 847                 'creator': 'Todd Haberman,  Daniel Law Heath and Aaron Kaplan',
 848                 'track': 'Dark Walk - Position Music',
 849                 'artist': 'Todd Haberman,  Daniel Law Heath and Aaron Kaplan',
 850                 'album': 'Position Music - Production Music Vol. 143 - Dark Walk',
 851             },
 852             'params': {
 853                 'skip_download': True,
 854             },
 855         },
 856         {
 857             # Tags with '};' (see https://github.com/ytdl-org/youtube-dl/issues/7468)
 858             'url': 'https://www.youtube.com/watch?v=Ms7iBXnlUO8',
 859             'only_matching': True,
 860         },
 861         {
 862             # Video with yt:stretch=17:0
 863             'url': 'https://www.youtube.com/watch?v=Q39EVAstoRM',
 864             'info_dict': {
 865                 'id': 'Q39EVAstoRM',
 866                 'ext': 'mp4',
 867                 'title': 'Clash Of Clans#14 Dicas De Ataque Para CV 4',
 868                 'description': 'md5:ee18a25c350637c8faff806845bddee9',
 869                 'upload_date': '20151107',
 870                 'uploader_id': 'UCCr7TALkRbo3EtFzETQF1LA',
 871                 'uploader': 'CH GAMER DROID',
 872             },
 873             'params': {
 874                 'skip_download': True,
 875             },
 876             'skip': 'This video does not exist.',
 877         },
 878         {
 879             # Video licensed under Creative Commons
 880             'url': 'https://www.youtube.com/watch?v=M4gD1WSo5mA',
 881             'info_dict': {
 882                 'id': 'M4gD1WSo5mA',
 883                 'ext': 'mp4',
 884                 'title': 'md5:e41008789470fc2533a3252216f1c1d1',
 885                 'description': 'md5:a677553cf0840649b731a3024aeff4cc',
 886                 'duration': 721,
 887                 'upload_date': '20150127',
 888                 'uploader_id': 'BerkmanCenter',
 889                 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/BerkmanCenter',
 890                 'uploader': 'The Berkman Klein Center for Internet & Society',
 891                 'license': 'Creative Commons Attribution license (reuse allowed)',
 892             },
 893             'params': {
 894                 'skip_download': True,
 895             },
 896         },
 897         {
 898             # Channel-like uploader_url
 899             'url': 'https://www.youtube.com/watch?v=eQcmzGIKrzg',
 900             'info_dict': {
 901                 'id': 'eQcmzGIKrzg',
 902                 'ext': 'mp4',
 903                 'title': 'Democratic Socialism and Foreign Policy | Bernie Sanders',
 904                 'description': 'md5:dda0d780d5a6e120758d1711d062a867',
 905                 'duration': 4060,
 906                 'upload_date': '20151119',
 907                 'uploader': 'Bernie Sanders',
 908                 'uploader_id': 'UCH1dpzjCEiGAt8CXkryhkZg',
 909                 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UCH1dpzjCEiGAt8CXkryhkZg',
 910                 'license': 'Creative Commons Attribution license (reuse allowed)',
 911             },
 912             'params': {
 913                 'skip_download': True,
 914             },
 915         },
 916         {
 917             'url': 'https://www.youtube.com/watch?feature=player_embedded&amp;amp;v=V36LpHqtcDY',
 918             'only_matching': True,
 919         },
 920         {
 921             # YouTube Red paid video (https://github.com/ytdl-org/youtube-dl/issues/10059)
 922             'url': 'https://www.youtube.com/watch?v=i1Ko8UG-Tdo',
 923             'only_matching': True,
 924         },
 925         {
 926             # Rental video preview
 927             'url': 'https://www.youtube.com/watch?v=yYr8q0y5Jfg',
 928             'info_dict': {
 929                 'id': 'uGpuVWrhIzE',
 930                 'ext': 'mp4',
 931                 'title': 'Piku - Trailer',
 932                 'description': 'md5:c36bd60c3fd6f1954086c083c72092eb',
 933                 'upload_date': '20150811',
 934                 'uploader': 'FlixMatrix',
 935                 'uploader_id': 'FlixMatrixKaravan',
 936                 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/FlixMatrixKaravan',
 937                 'license': 'Standard YouTube License',
 938             },
 939             'params': {
 940                 'skip_download': True,
 941             },
 942             'skip': 'This video is not available.',
 943         },
 944         {
 945             # YouTube Red video with episode data
 946             'url': 'https://www.youtube.com/watch?v=iqKdEhx-dD4',
 947             'info_dict': {
 948                 'id': 'iqKdEhx-dD4',
 949                 'ext': 'mp4',
 950                 'title': 'Isolation - Mind Field (Ep 1)',
 951                 'description': 'md5:46a29be4ceffa65b92d277b93f463c0f',
 952                 'duration': 2085,
 953                 'upload_date': '20170118',
 954                 'uploader': 'Vsauce',
 955                 'uploader_id': 'Vsauce',
 956                 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/Vsauce',
 957                 'series': 'Mind Field',
 958                 'season_number': 1,
 959                 'episode_number': 1,
 960             },
 961             'params': {
 962                 'skip_download': True,
 963             },
 964             'expected_warnings': [
 965                 'Skipping DASH manifest',
 966             ],
 967         },
 968         {
 969             # The following content has been identified by the YouTube community
 970             # as inappropriate or offensive to some audiences.
 971             'url': 'https://www.youtube.com/watch?v=6SJNVb0GnPI',
 972             'info_dict': {
 973                 'id': '6SJNVb0GnPI',
 974                 'ext': 'mp4',
 975                 'title': 'Race Differences in Intelligence',
 976                 'description': 'md5:5d161533167390427a1f8ee89a1fc6f1',
 977                 'duration': 965,
 978                 'upload_date': '20140124',
 979                 'uploader': 'New Century Foundation',
 980                 'uploader_id': 'UCEJYpZGqgUob0zVVEaLhvVg',
 981                 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UCEJYpZGqgUob0zVVEaLhvVg',
 982             },
 983             'params': {
 984                 'skip_download': True,
 985             },
 986         },
 987         {
 988             # itag 212
 989             'url': '1t24XAntNCY',
 990             'only_matching': True,
 991         },
 992         {
 993             # geo restricted to JP
 994             'url': 'sJL6WA-aGkQ',
 995             'only_matching': True,
 996         },
 997         {
 998             'url': 'https://invidio.us/watch?v=BaW_jenozKc',
 999             'only_matching': True,
1000         },
1001         {
1002             # DRM protected
1003             'url': 'https://www.youtube.com/watch?v=s7_qI6_mIXc',
1004             'only_matching': True,
1005         },
1006         {
1007             # Video with unsupported adaptive stream type formats
1008             'url': 'https://www.youtube.com/watch?v=Z4Vy8R84T1U',
1009             'info_dict': {
1010                 'id': 'Z4Vy8R84T1U',
1011                 'ext': 'mp4',
1012                 'title': 'saman SMAN 53 Jakarta(Sancety) opening COFFEE4th at SMAN 53 Jakarta',
1013                 'description': 'md5:d41d8cd98f00b204e9800998ecf8427e',
1014                 'duration': 433,
1015                 'upload_date': '20130923',
1016                 'uploader': 'Amelia Putri Harwita',
1017                 'uploader_id': 'UCpOxM49HJxmC1qCalXyB3_Q',
1018                 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UCpOxM49HJxmC1qCalXyB3_Q',
1019                 'formats': 'maxcount:10',
1020             },
1021             'params': {
1022                 'skip_download': True,
1023                 'youtube_include_dash_manifest': False,
1024             },
1025             'skip': 'not actual anymore',
1026         },
1027         {
1028             # Youtube Music Auto-generated description
1029             'url': 'https://music.youtube.com/watch?v=MgNrAu2pzNs',
1030             'info_dict': {
1031                 'id': 'MgNrAu2pzNs',
1032                 'ext': 'mp4',
1033                 'title': 'Voyeur Girl',
1034                 'description': 'md5:7ae382a65843d6df2685993e90a8628f',
1035                 'upload_date': '20190312',
1036                 'uploader': 'Stephen - Topic',
1037                 'uploader_id': 'UC-pWHpBjdGG69N9mM2auIAA',
1038                 'artist': 'Stephen',
1039                 'track': 'Voyeur Girl',
1040                 'album': 'it\'s too much love to know my dear',
1041                 'release_date': '20190313',
1042                 'release_year': 2019,
1043             },
1044             'params': {
1045                 'skip_download': True,
1046             },
1047         },
1048         {
1049             'url': 'https://www.youtubekids.com/watch?v=3b8nCWDgZ6Q',
1050             'only_matching': True,
1051         },
1052         {
1053             # invalid -> valid video id redirection
1054             'url': 'DJztXj2GPfl',
1055             'info_dict': {
1056                 'id': 'DJztXj2GPfk',
1057                 'ext': 'mp4',
1058                 'title': 'Panjabi MC - Mundian To Bach Ke (The Dictator Soundtrack)',
1059                 'description': 'md5:bf577a41da97918e94fa9798d9228825',
1060                 'upload_date': '20090125',
1061                 'uploader': 'Prochorowka',
1062                 'uploader_id': 'Prochorowka',
1063                 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/Prochorowka',
1064                 'artist': 'Panjabi MC',
1065                 'track': 'Beware of the Boys (Mundian to Bach Ke) - Motivo Hi-Lectro Remix',
1066                 'album': 'Beware of the Boys (Mundian To Bach Ke)',
1067             },
1068             'params': {
1069                 'skip_download': True,
1070             },
1071         },
1072         {
1073             # empty description results in an empty string
1074             'url': 'https://www.youtube.com/watch?v=x41yOUIvK2k',
1075             'info_dict': {
1076                 'id': 'x41yOUIvK2k',
1077                 'ext': 'mp4',
1078                 'title': 'IMG 3456',
1079                 'description': '',
1080                 'upload_date': '20170613',
1081                 'uploader_id': 'ElevageOrVert',
1082                 'uploader': 'ElevageOrVert',
1083             },
1084             'params': {
1085                 'skip_download': True,
1086             },
1087         },
1088     ]
1089
1090     def __init__(self, *args, **kwargs):
1091         super(YoutubeIE, self).__init__(*args, **kwargs)
1092         self._player_cache = {}
1093
1094     def report_video_info_webpage_download(self, video_id):
1095         """Report attempt to download video info webpage."""
1096         self.to_screen('%s: Downloading video info webpage' % video_id)
1097
1098     def report_information_extraction(self, video_id):
1099         """Report attempt to extract video information."""
1100         self.to_screen('%s: Extracting video information' % video_id)
1101
1102     def report_unavailable_format(self, video_id, format):
1103         """Report extracted video URL."""
1104         self.to_screen('%s: Format %s not available' % (video_id, format))
1105
1106     def report_rtmp_download(self):
1107         """Indicate the download will use the RTMP protocol."""
1108         self.to_screen('RTMP download detected')
1109
1110     def _signature_cache_id(self, example_sig):
1111         """ Return a string representation of a signature """
1112         return '.'.join(compat_str(len(part)) for part in example_sig.split('.'))
1113
1114     @classmethod
1115     def _extract_player_info(cls, player_url):
1116         for player_re in cls._PLAYER_INFO_RE:
1117             id_m = re.search(player_re, player_url)
1118             if id_m:
1119                 break
1120         else:
1121             raise ExtractorError('Cannot identify player %r' % player_url)
1122         return id_m.group('ext'), id_m.group('id')
1123
1124     def _extract_signature_function(self, video_id, player_url, example_sig):
1125         player_type, player_id = self._extract_player_info(player_url)
1126
1127         # Read from filesystem cache
1128         func_id = '%s_%s_%s' % (
1129             player_type, player_id, self._signature_cache_id(example_sig))
1130         assert os.path.basename(func_id) == func_id
1131
1132         cache_spec = self._downloader.cache.load('youtube-sigfuncs', func_id)
1133         if cache_spec is not None:
1134             return lambda s: ''.join(s[i] for i in cache_spec)
1135
1136         download_note = (
1137             'Downloading player %s' % player_url
1138             if self._downloader.params.get('verbose') else
1139             'Downloading %s player %s' % (player_type, player_id)
1140         )
1141         if player_type == 'js':
1142             code = self._download_webpage(
1143                 player_url, video_id,
1144                 note=download_note,
1145                 errnote='Download of %s failed' % player_url)
1146             res = self._parse_sig_js(code)
1147         elif player_type == 'swf':
1148             urlh = self._request_webpage(
1149                 player_url, video_id,
1150                 note=download_note,
1151                 errnote='Download of %s failed' % player_url)
1152             code = urlh.read()
1153             res = self._parse_sig_swf(code)
1154         else:
1155             assert False, 'Invalid player type %r' % player_type
1156
1157         test_string = ''.join(map(compat_chr, range(len(example_sig))))
1158         cache_res = res(test_string)
1159         cache_spec = [ord(c) for c in cache_res]
1160
1161         self._downloader.cache.store('youtube-sigfuncs', func_id, cache_spec)
1162         return res
1163
1164     def _print_sig_code(self, func, example_sig):
1165         def gen_sig_code(idxs):
1166             def _genslice(start, end, step):
1167                 starts = '' if start == 0 else str(start)
1168                 ends = (':%d' % (end + step)) if end + step >= 0 else ':'
1169                 steps = '' if step == 1 else (':%d' % step)
1170                 return 's[%s%s%s]' % (starts, ends, steps)
1171
1172             step = None
1173             # Quelch pyflakes warnings - start will be set when step is set
1174             start = '(Never used)'
1175             for i, prev in zip(idxs[1:], idxs[:-1]):
1176                 if step is not None:
1177                     if i - prev == step:
1178                         continue
1179                     yield _genslice(start, prev, step)
1180                     step = None
1181                     continue
1182                 if i - prev in [-1, 1]:
1183                     step = i - prev
1184                     start = prev
1185                     continue
1186                 else:
1187                     yield 's[%d]' % prev
1188             if step is None:
1189                 yield 's[%d]' % i
1190             else:
1191                 yield _genslice(start, i, step)
1192
1193         test_string = ''.join(map(compat_chr, range(len(example_sig))))
1194         cache_res = func(test_string)
1195         cache_spec = [ord(c) for c in cache_res]
1196         expr_code = ' + '.join(gen_sig_code(cache_spec))
1197         signature_id_tuple = '(%s)' % (
1198             ', '.join(compat_str(len(p)) for p in example_sig.split('.')))
1199         code = ('if tuple(len(p) for p in s.split(\'.\')) == %s:\n'
1200                 '    return %s\n') % (signature_id_tuple, expr_code)
1201         self.to_screen('Extracted signature function:\n' + code)
1202
1203     def _parse_sig_js(self, jscode):
1204         funcname = self._search_regex(
1205             (r'\b[cs]\s*&&\s*[adf]\.set\([^,]+\s*,\s*encodeURIComponent\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\(',
1206              r'\b[a-zA-Z0-9]+\s*&&\s*[a-zA-Z0-9]+\.set\([^,]+\s*,\s*encodeURIComponent\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\(',
1207              r'(?:\b|[^a-zA-Z0-9$])(?P<sig>[a-zA-Z0-9$]{2})\s*=\s*function\(\s*a\s*\)\s*{\s*a\s*=\s*a\.split\(\s*""\s*\)',
1208              r'(?P<sig>[a-zA-Z0-9$]+)\s*=\s*function\(\s*a\s*\)\s*{\s*a\s*=\s*a\.split\(\s*""\s*\)',
1209              # Obsolete patterns
1210              r'(["\'])signature\1\s*,\s*(?P<sig>[a-zA-Z0-9$]+)\(',
1211              r'\.sig\|\|(?P<sig>[a-zA-Z0-9$]+)\(',
1212              r'yt\.akamaized\.net/\)\s*\|\|\s*.*?\s*[cs]\s*&&\s*[adf]\.set\([^,]+\s*,\s*(?:encodeURIComponent\s*\()?\s*(?P<sig>[a-zA-Z0-9$]+)\(',
1213              r'\b[cs]\s*&&\s*[adf]\.set\([^,]+\s*,\s*(?P<sig>[a-zA-Z0-9$]+)\(',
1214              r'\b[a-zA-Z0-9]+\s*&&\s*[a-zA-Z0-9]+\.set\([^,]+\s*,\s*(?P<sig>[a-zA-Z0-9$]+)\(',
1215              r'\bc\s*&&\s*a\.set\([^,]+\s*,\s*\([^)]*\)\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\(',
1216              r'\bc\s*&&\s*[a-zA-Z0-9]+\.set\([^,]+\s*,\s*\([^)]*\)\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\(',
1217              r'\bc\s*&&\s*[a-zA-Z0-9]+\.set\([^,]+\s*,\s*\([^)]*\)\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\('),
1218             jscode, 'Initial JS player signature function name', group='sig')
1219
1220         jsi = JSInterpreter(jscode)
1221         initial_function = jsi.extract_function(funcname)
1222         return lambda s: initial_function([s])
1223
1224     def _parse_sig_swf(self, file_contents):
1225         swfi = SWFInterpreter(file_contents)
1226         TARGET_CLASSNAME = 'SignatureDecipher'
1227         searched_class = swfi.extract_class(TARGET_CLASSNAME)
1228         initial_function = swfi.extract_function(searched_class, 'decipher')
1229         return lambda s: initial_function([s])
1230
1231     def _decrypt_signature(self, s, video_id, player_url, age_gate=False):
1232         """Turn the encrypted s field into a working signature"""
1233
1234         if player_url is None:
1235             raise ExtractorError('Cannot decrypt signature without player_url')
1236
1237         if player_url.startswith('//'):
1238             player_url = 'https:' + player_url
1239         elif not re.match(r'https?://', player_url):
1240             player_url = compat_urlparse.urljoin(
1241                 'https://www.youtube.com', player_url)
1242         try:
1243             player_id = (player_url, self._signature_cache_id(s))
1244             if player_id not in self._player_cache:
1245                 func = self._extract_signature_function(
1246                     video_id, player_url, s
1247                 )
1248                 self._player_cache[player_id] = func
1249             func = self._player_cache[player_id]
1250             if self._downloader.params.get('youtube_print_sig_code'):
1251                 self._print_sig_code(func, s)
1252             return func(s)
1253         except Exception as e:
1254             tb = traceback.format_exc()
1255             raise ExtractorError(
1256                 'Signature extraction failed: ' + tb, cause=e)
1257
1258     def _get_subtitles(self, video_id, webpage, has_live_chat_replay):
1259         try:
1260             subs_doc = self._download_xml(
1261                 'https://video.google.com/timedtext?hl=en&type=list&v=%s' % video_id,
1262                 video_id, note=False)
1263         except ExtractorError as err:
1264             self._downloader.report_warning('unable to download video subtitles: %s' % error_to_compat_str(err))
1265             return {}
1266
1267         sub_lang_list = {}
1268         for track in subs_doc.findall('track'):
1269             lang = track.attrib['lang_code']
1270             if lang in sub_lang_list:
1271                 continue
1272             sub_formats = []
1273             for ext in self._SUBTITLE_FORMATS:
1274                 params = compat_urllib_parse_urlencode({
1275                     'lang': lang,
1276                     'v': video_id,
1277                     'fmt': ext,
1278                     'name': track.attrib['name'].encode('utf-8'),
1279                 })
1280                 sub_formats.append({
1281                     'url': 'https://www.youtube.com/api/timedtext?' + params,
1282                     'ext': ext,
1283                 })
1284             sub_lang_list[lang] = sub_formats
1285         if has_live_chat_replay:
1286             sub_lang_list['live_chat'] = [
1287                 {
1288                     'video_id': video_id,
1289                     'ext': 'json',
1290                     'protocol': 'youtube_live_chat_replay',
1291                 },
1292             ]
1293         if not sub_lang_list:
1294             self._downloader.report_warning('video doesn\'t have subtitles')
1295             return {}
1296         return sub_lang_list
1297
1298     def _get_ytplayer_config(self, video_id, webpage):
1299         patterns = (
1300             # User data may contain arbitrary character sequences that may affect
1301             # JSON extraction with regex, e.g. when '};' is contained the second
1302             # regex won't capture the whole JSON. Yet working around by trying more
1303             # concrete regex first keeping in mind proper quoted string handling
1304             # to be implemented in future that will replace this workaround (see
1305             # https://github.com/ytdl-org/youtube-dl/issues/7468,
1306             # https://github.com/ytdl-org/youtube-dl/pull/7599)
1307             r';ytplayer\.config\s*=\s*({.+?});ytplayer',
1308             r';ytplayer\.config\s*=\s*({.+?});',
1309             r'ytInitialPlayerResponse\s*=\s*({.+?});var meta'  # Needed???
1310         )
1311         config = self._search_regex(
1312             patterns, webpage, 'ytplayer.config', default=None)
1313         if config:
1314             return self._parse_json(
1315                 uppercase_escape(config), video_id, fatal=False)
1316
1317     def _get_music_metadata_from_yt_initial(self, yt_initial):
1318         music_metadata = []
1319         key_map = {
1320             'Album': 'album',
1321             'Artist': 'artist',
1322             'Song': 'track'
1323         }
1324         contents = try_get(yt_initial, lambda x: x['contents']['twoColumnWatchNextResults']['results']['results']['contents'])
1325         if type(contents) is list:
1326             for content in contents:
1327                 music_track = {}
1328                 if type(content) is not dict:
1329                     continue
1330                 videoSecondaryInfoRenderer = try_get(content, lambda x: x['videoSecondaryInfoRenderer'])
1331                 if type(videoSecondaryInfoRenderer) is not dict:
1332                     continue
1333                 rows = try_get(videoSecondaryInfoRenderer, lambda x: x['metadataRowContainer']['metadataRowContainerRenderer']['rows'])
1334                 if type(rows) is not list:
1335                     continue
1336                 for row in rows:
1337                     metadataRowRenderer = try_get(row, lambda x: x['metadataRowRenderer'])
1338                     if type(metadataRowRenderer) is not dict:
1339                         continue
1340                     key = try_get(metadataRowRenderer, lambda x: x['title']['simpleText'])
1341                     value = try_get(metadataRowRenderer, lambda x: x['contents'][0]['simpleText']) or \
1342                         try_get(metadataRowRenderer, lambda x: x['contents'][0]['runs'][0]['text'])
1343                     if type(key) is not str or type(value) is not str:
1344                         continue
1345                     if key in key_map:
1346                         if key_map[key] in music_track:
1347                             # we've started on a new track
1348                             music_metadata.append(music_track)
1349                             music_track = {}
1350                         music_track[key_map[key]] = value
1351                 if len(music_track.keys()):
1352                     music_metadata.append(music_track)
1353         return music_metadata
1354
1355     def _get_automatic_captions(self, video_id, webpage):
1356         """We need the webpage for getting the captions url, pass it as an
1357            argument to speed up the process."""
1358         self.to_screen('%s: Looking for automatic captions' % video_id)
1359         player_config = self._get_ytplayer_config(video_id, webpage)
1360         err_msg = 'Couldn\'t find automatic captions for %s' % video_id
1361         if not player_config:
1362             self._downloader.report_warning(err_msg)
1363             return {}
1364         try:
1365             args = player_config['args']
1366             caption_url = args.get('ttsurl')
1367             if caption_url:
1368                 timestamp = args['timestamp']
1369                 # We get the available subtitles
1370                 list_params = compat_urllib_parse_urlencode({
1371                     'type': 'list',
1372                     'tlangs': 1,
1373                     'asrs': 1,
1374                 })
1375                 list_url = caption_url + '&' + list_params
1376                 caption_list = self._download_xml(list_url, video_id)
1377                 original_lang_node = caption_list.find('track')
1378                 if original_lang_node is None:
1379                     self._downloader.report_warning('Video doesn\'t have automatic captions')
1380                     return {}
1381                 original_lang = original_lang_node.attrib['lang_code']
1382                 caption_kind = original_lang_node.attrib.get('kind', '')
1383
1384                 sub_lang_list = {}
1385                 for lang_node in caption_list.findall('target'):
1386                     sub_lang = lang_node.attrib['lang_code']
1387                     sub_formats = []
1388                     for ext in self._SUBTITLE_FORMATS:
1389                         params = compat_urllib_parse_urlencode({
1390                             'lang': original_lang,
1391                             'tlang': sub_lang,
1392                             'fmt': ext,
1393                             'ts': timestamp,
1394                             'kind': caption_kind,
1395                         })
1396                         sub_formats.append({
1397                             'url': caption_url + '&' + params,
1398                             'ext': ext,
1399                         })
1400                     sub_lang_list[sub_lang] = sub_formats
1401                 return sub_lang_list
1402
1403             def make_captions(sub_url, sub_langs):
1404                 parsed_sub_url = compat_urllib_parse_urlparse(sub_url)
1405                 caption_qs = compat_parse_qs(parsed_sub_url.query)
1406                 captions = {}
1407                 for sub_lang in sub_langs:
1408                     sub_formats = []
1409                     for ext in self._SUBTITLE_FORMATS:
1410                         caption_qs.update({
1411                             'tlang': [sub_lang],
1412                             'fmt': [ext],
1413                         })
1414                         sub_url = compat_urlparse.urlunparse(parsed_sub_url._replace(
1415                             query=compat_urllib_parse_urlencode(caption_qs, True)))
1416                         sub_formats.append({
1417                             'url': sub_url,
1418                             'ext': ext,
1419                         })
1420                     captions[sub_lang] = sub_formats
1421                 return captions
1422
1423             # New captions format as of 22.06.2017
1424             player_response = args.get('player_response')
1425             if player_response and isinstance(player_response, compat_str):
1426                 player_response = self._parse_json(
1427                     player_response, video_id, fatal=False)
1428                 if player_response:
1429                     renderer = player_response['captions']['playerCaptionsTracklistRenderer']
1430                     base_url = renderer['captionTracks'][0]['baseUrl']
1431                     sub_lang_list = []
1432                     for lang in renderer['translationLanguages']:
1433                         lang_code = lang.get('languageCode')
1434                         if lang_code:
1435                             sub_lang_list.append(lang_code)
1436                     return make_captions(base_url, sub_lang_list)
1437
1438             # Some videos don't provide ttsurl but rather caption_tracks and
1439             # caption_translation_languages (e.g. 20LmZk1hakA)
1440             # Does not used anymore as of 22.06.2017
1441             caption_tracks = args['caption_tracks']
1442             caption_translation_languages = args['caption_translation_languages']
1443             caption_url = compat_parse_qs(caption_tracks.split(',')[0])['u'][0]
1444             sub_lang_list = []
1445             for lang in caption_translation_languages.split(','):
1446                 lang_qs = compat_parse_qs(compat_urllib_parse_unquote_plus(lang))
1447                 sub_lang = lang_qs.get('lc', [None])[0]
1448                 if sub_lang:
1449                     sub_lang_list.append(sub_lang)
1450             return make_captions(caption_url, sub_lang_list)
1451         # An extractor error can be raise by the download process if there are
1452         # no automatic captions but there are subtitles
1453         except (KeyError, IndexError, ExtractorError):
1454             self._downloader.report_warning(err_msg)
1455             return {}
1456
1457     def _mark_watched(self, video_id, video_info, player_response):
1458         playback_url = url_or_none(try_get(
1459             player_response,
1460             lambda x: x['playbackTracking']['videostatsPlaybackUrl']['baseUrl']) or try_get(
1461             video_info, lambda x: x['videostats_playback_base_url'][0]))
1462         if not playback_url:
1463             return
1464         parsed_playback_url = compat_urlparse.urlparse(playback_url)
1465         qs = compat_urlparse.parse_qs(parsed_playback_url.query)
1466
1467         # cpn generation algorithm is reverse engineered from base.js.
1468         # In fact it works even with dummy cpn.
1469         CPN_ALPHABET = 'abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789-_'
1470         cpn = ''.join((CPN_ALPHABET[random.randint(0, 256) & 63] for _ in range(0, 16)))
1471
1472         qs.update({
1473             'ver': ['2'],
1474             'cpn': [cpn],
1475         })
1476         playback_url = compat_urlparse.urlunparse(
1477             parsed_playback_url._replace(query=compat_urllib_parse_urlencode(qs, True)))
1478
1479         self._download_webpage(
1480             playback_url, video_id, 'Marking watched',
1481             'Unable to mark watched', fatal=False)
1482
1483     @staticmethod
1484     def _extract_urls(webpage):
1485         # Embedded YouTube player
1486         entries = [
1487             unescapeHTML(mobj.group('url'))
1488             for mobj in re.finditer(r'''(?x)
1489             (?:
1490                 <iframe[^>]+?src=|
1491                 data-video-url=|
1492                 <embed[^>]+?src=|
1493                 embedSWF\(?:\s*|
1494                 <object[^>]+data=|
1495                 new\s+SWFObject\(
1496             )
1497             (["\'])
1498                 (?P<url>(?:https?:)?//(?:www\.)?youtube(?:-nocookie)?\.com/
1499                 (?:embed|v|p)/[0-9A-Za-z_-]{11}.*?)
1500             \1''', webpage)]
1501
1502         # lazyYT YouTube embed
1503         entries.extend(list(map(
1504             unescapeHTML,
1505             re.findall(r'class="lazyYT" data-youtube-id="([^"]+)"', webpage))))
1506
1507         # Wordpress "YouTube Video Importer" plugin
1508         matches = re.findall(r'''(?x)<div[^>]+
1509             class=(?P<q1>[\'"])[^\'"]*\byvii_single_video_player\b[^\'"]*(?P=q1)[^>]+
1510             data-video_id=(?P<q2>[\'"])([^\'"]+)(?P=q2)''', webpage)
1511         entries.extend(m[-1] for m in matches)
1512
1513         return entries
1514
1515     @staticmethod
1516     def _extract_url(webpage):
1517         urls = YoutubeIE._extract_urls(webpage)
1518         return urls[0] if urls else None
1519
1520     @classmethod
1521     def extract_id(cls, url):
1522         mobj = re.match(cls._VALID_URL, url, re.VERBOSE)
1523         if mobj is None:
1524             raise ExtractorError('Invalid URL: %s' % url)
1525         video_id = mobj.group(2)
1526         return video_id
1527
1528     def _extract_chapters_from_json(self, webpage, video_id, duration):
1529         if not webpage:
1530             return
1531         data = self._extract_yt_initial_data(video_id, webpage)
1532         if not data or not isinstance(data, dict):
1533             return
1534         chapters_list = try_get(
1535             data,
1536             lambda x: x['playerOverlays']
1537                        ['playerOverlayRenderer']
1538                        ['decoratedPlayerBarRenderer']
1539                        ['decoratedPlayerBarRenderer']
1540                        ['playerBar']
1541                        ['chapteredPlayerBarRenderer']
1542                        ['chapters'],
1543             list)
1544         if not chapters_list:
1545             return
1546
1547         def chapter_time(chapter):
1548             return float_or_none(
1549                 try_get(
1550                     chapter,
1551                     lambda x: x['chapterRenderer']['timeRangeStartMillis'],
1552                     int),
1553                 scale=1000)
1554         chapters = []
1555         for next_num, chapter in enumerate(chapters_list, start=1):
1556             start_time = chapter_time(chapter)
1557             if start_time is None:
1558                 continue
1559             end_time = (chapter_time(chapters_list[next_num])
1560                         if next_num < len(chapters_list) else duration)
1561             if end_time is None:
1562                 continue
1563             title = try_get(
1564                 chapter, lambda x: x['chapterRenderer']['title']['simpleText'],
1565                 compat_str)
1566             chapters.append({
1567                 'start_time': start_time,
1568                 'end_time': end_time,
1569                 'title': title,
1570             })
1571         return chapters
1572
1573     @staticmethod
1574     def _extract_chapters_from_description(description, duration):
1575         if not description:
1576             return None
1577         chapter_lines = re.findall(
1578             r'(?:^|<br\s*/>)([^<]*<a[^>]+onclick=["\']yt\.www\.watch\.player\.seekTo[^>]+>(\d{1,2}:\d{1,2}(?::\d{1,2})?)</a>[^>]*)(?=$|<br\s*/>)',
1579             description)
1580         if not chapter_lines:
1581             return None
1582         chapters = []
1583         for next_num, (chapter_line, time_point) in enumerate(
1584                 chapter_lines, start=1):
1585             start_time = parse_duration(time_point)
1586             if start_time is None:
1587                 continue
1588             if start_time > duration:
1589                 break
1590             end_time = (duration if next_num == len(chapter_lines)
1591                         else parse_duration(chapter_lines[next_num][1]))
1592             if end_time is None:
1593                 continue
1594             if end_time > duration:
1595                 end_time = duration
1596             if start_time > end_time:
1597                 break
1598             chapter_title = re.sub(
1599                 r'<a[^>]+>[^<]+</a>', '', chapter_line).strip(' \t-')
1600             chapter_title = re.sub(r'\s+', ' ', chapter_title)
1601             chapters.append({
1602                 'start_time': start_time,
1603                 'end_time': end_time,
1604                 'title': chapter_title,
1605             })
1606         return chapters
1607
1608     def _extract_chapters(self, webpage, description, video_id, duration):
1609         return (self._extract_chapters_from_json(webpage, video_id, duration)
1610                 or self._extract_chapters_from_description(description, duration))
1611
1612     def _real_extract(self, url):
1613         url, smuggled_data = unsmuggle_url(url, {})
1614
1615         proto = (
1616             'http' if self._downloader.params.get('prefer_insecure', False)
1617             else 'https')
1618
1619         start_time = None
1620         end_time = None
1621         parsed_url = compat_urllib_parse_urlparse(url)
1622         for component in [parsed_url.fragment, parsed_url.query]:
1623             query = compat_parse_qs(component)
1624             if start_time is None and 't' in query:
1625                 start_time = parse_duration(query['t'][0])
1626             if start_time is None and 'start' in query:
1627                 start_time = parse_duration(query['start'][0])
1628             if end_time is None and 'end' in query:
1629                 end_time = parse_duration(query['end'][0])
1630
1631         # Extract original video URL from URL with redirection, like age verification, using next_url parameter
1632         mobj = re.search(self._NEXT_URL_RE, url)
1633         if mobj:
1634             url = proto + '://www.youtube.com/' + compat_urllib_parse_unquote(mobj.group(1)).lstrip('/')
1635         video_id = self.extract_id(url)
1636
1637         # Get video webpage
1638         url = proto + '://www.youtube.com/watch?v=%s&gl=US&hl=en&has_verified=1&bpctr=9999999999' % video_id
1639         video_webpage, urlh = self._download_webpage_handle(url, video_id)
1640
1641         qs = compat_parse_qs(compat_urllib_parse_urlparse(urlh.geturl()).query)
1642         video_id = qs.get('v', [None])[0] or video_id
1643
1644         # Attempt to extract SWF player URL
1645         mobj = re.search(r'swfConfig.*?"(https?:\\/\\/.*?watch.*?-.*?\.swf)"', video_webpage)
1646         if mobj is not None:
1647             player_url = re.sub(r'\\(.)', r'\1', mobj.group(1))
1648         else:
1649             player_url = None
1650
1651         dash_mpds = []
1652
1653         def add_dash_mpd(video_info):
1654             dash_mpd = video_info.get('dashmpd')
1655             if dash_mpd and dash_mpd[0] not in dash_mpds:
1656                 dash_mpds.append(dash_mpd[0])
1657
1658         def add_dash_mpd_pr(pl_response):
1659             dash_mpd = url_or_none(try_get(
1660                 pl_response, lambda x: x['streamingData']['dashManifestUrl'],
1661                 compat_str))
1662             if dash_mpd and dash_mpd not in dash_mpds:
1663                 dash_mpds.append(dash_mpd)
1664
1665         is_live = None
1666         view_count = None
1667
1668         def extract_view_count(v_info):
1669             return int_or_none(try_get(v_info, lambda x: x['view_count'][0]))
1670
1671         def extract_player_response(player_response, video_id):
1672             pl_response = str_or_none(player_response)
1673             if not pl_response:
1674                 return
1675             pl_response = self._parse_json(pl_response, video_id, fatal=False)
1676             if isinstance(pl_response, dict):
1677                 add_dash_mpd_pr(pl_response)
1678                 return pl_response
1679
1680         def extract_embedded_config(embed_webpage, video_id):
1681             embedded_config = self._search_regex(
1682                 r'setConfig\(({.*})\);',
1683                 embed_webpage, 'ytInitialData', default=None)
1684             if embedded_config:
1685                 return embedded_config
1686
1687         player_response = {}
1688
1689         # Get video info
1690         video_info = {}
1691         embed_webpage = None
1692         if (self._og_search_property('restrictions:age', video_webpage, default=None) == '18+'
1693                 or re.search(r'player-age-gate-content">', video_webpage) is not None):
1694             cookie_keys = self._get_cookies('https://www.youtube.com').keys()
1695             age_gate = True
1696             # We simulate the access to the video from www.youtube.com/v/{video_id}
1697             # this can be viewed without login into Youtube
1698             url = proto + '://www.youtube.com/embed/%s' % video_id
1699             embed_webpage = self._download_webpage(url, video_id, 'Downloading embed webpage')
1700             ext = extract_embedded_config(embed_webpage, video_id)
1701             # playabilityStatus = re.search(r'{\\\"status\\\":\\\"(?P<playabilityStatus>[^\"]+)\\\"', ext)
1702             playable_in_embed = re.search(r'{\\\"playableInEmbed\\\":(?P<playableinEmbed>[^\,]+)', ext)
1703             if not playable_in_embed:
1704                 self.to_screen('Could not determine whether playabale in embed for video %s' % video_id)
1705                 playable_in_embed = ''
1706             else:
1707                 playable_in_embed = playable_in_embed.group('playableinEmbed')
1708             # check if video is only playable on youtube in other words not playable in embed - if so it requires auth (cookies)
1709             # if re.search(r'player-unavailable">', embed_webpage) is not None:
1710             if playable_in_embed == 'false':
1711                 '''
1712                 # TODO apply this patch when Support for Python 2.6(!) and above drops
1713                 if ({'VISITOR_INFO1_LIVE', 'HSID', 'SSID', 'SID'} <= cookie_keys
1714                         or {'VISITOR_INFO1_LIVE', '__Secure-3PSID', 'LOGIN_INFO'} <= cookie_keys):
1715                 '''
1716                 if (set(('VISITOR_INFO1_LIVE', 'HSID', 'SSID', 'SID')) <= set(cookie_keys)
1717                         or set(('VISITOR_INFO1_LIVE', '__Secure-3PSID', 'LOGIN_INFO')) <= set(cookie_keys)):
1718                     age_gate = False
1719                     # Try looking directly into the video webpage
1720                     ytplayer_config = self._get_ytplayer_config(video_id, video_webpage)
1721                     if ytplayer_config:
1722                         args = ytplayer_config.get("args")
1723                         if args is not None:
1724                             if args.get('url_encoded_fmt_stream_map') or args.get('hlsvp'):
1725                                 # Convert to the same format returned by compat_parse_qs
1726                                 video_info = dict((k, [v]) for k, v in args.items())
1727                                 add_dash_mpd(video_info)
1728                             # Rental video is not rented but preview is available (e.g.
1729                             # https://www.youtube.com/watch?v=yYr8q0y5Jfg,
1730                             # https://github.com/ytdl-org/youtube-dl/issues/10532)
1731                             if not video_info and args.get('ypc_vid'):
1732                                 return self.url_result(
1733                                     args['ypc_vid'], YoutubeIE.ie_key(), video_id=args['ypc_vid'])
1734                             if args.get('livestream') == '1' or args.get('live_playback') == 1:
1735                                 is_live = True
1736                             if not player_response:
1737                                 player_response = extract_player_response(args.get('player_response'), video_id)
1738                         elif not player_response:
1739                             player_response = ytplayer_config
1740                     if not video_info or self._downloader.params.get('youtube_include_dash_manifest', True):
1741                         add_dash_mpd_pr(player_response)
1742                 else:
1743                     raise ExtractorError('Video is age restricted and only playable on Youtube. Requires cookies!', expected=True)
1744             else:
1745                 data = compat_urllib_parse_urlencode({
1746                     'video_id': video_id,
1747                     'eurl': 'https://youtube.googleapis.com/v/' + video_id,
1748                     'sts': self._search_regex(
1749                         r'"sts"\s*:\s*(\d+)', embed_webpage, 'sts', default=''),
1750                 })
1751                 video_info_url = proto + '://www.youtube.com/get_video_info?' + data
1752                 try:
1753                     video_info_webpage = self._download_webpage(
1754                         video_info_url, video_id,
1755                         note='Refetching age-gated info webpage',
1756                         errnote='unable to download video info webpage')
1757                 except ExtractorError:
1758                     video_info_webpage = None
1759                 if video_info_webpage:
1760                     video_info = compat_parse_qs(video_info_webpage)
1761                     pl_response = video_info.get('player_response', [None])[0]
1762                     player_response = extract_player_response(pl_response, video_id)
1763                     add_dash_mpd(video_info)
1764                     view_count = extract_view_count(video_info)
1765         else:
1766             age_gate = False
1767             # Try looking directly into the video webpage
1768             ytplayer_config = self._get_ytplayer_config(video_id, video_webpage)
1769             if ytplayer_config:
1770                 args = ytplayer_config.get('args', {})
1771                 if args.get('url_encoded_fmt_stream_map') or args.get('hlsvp'):
1772                     # Convert to the same format returned by compat_parse_qs
1773                     video_info = dict((k, [v]) for k, v in args.items())
1774                     add_dash_mpd(video_info)
1775                 # Rental video is not rented but preview is available (e.g.
1776                 # https://www.youtube.com/watch?v=yYr8q0y5Jfg,
1777                 # https://github.com/ytdl-org/youtube-dl/issues/10532)
1778                 if not video_info and args.get('ypc_vid'):
1779                     return self.url_result(
1780                         args['ypc_vid'], YoutubeIE.ie_key(), video_id=args['ypc_vid'])
1781                 if args.get('livestream') == '1' or args.get('live_playback') == 1:
1782                     is_live = True
1783                 if not player_response:
1784                     player_response = extract_player_response(args.get('player_response'), video_id)
1785             if not video_info or self._downloader.params.get('youtube_include_dash_manifest', True):
1786                 add_dash_mpd_pr(player_response)
1787
1788         if not video_info and not player_response:
1789             player_response = extract_player_response(
1790                 self._search_regex(
1791                     r'ytInitialPlayerResponse\s*=\s*({.+?})\s*;', video_webpage,
1792                     'initial player response', default='{}'),
1793                 video_id)
1794
1795         def extract_unavailable_message():
1796             messages = []
1797             for tag, kind in (('h1', 'message'), ('div', 'submessage')):
1798                 msg = self._html_search_regex(
1799                     r'(?s)<{tag}[^>]+id=["\']unavailable-{kind}["\'][^>]*>(.+?)</{tag}>'.format(tag=tag, kind=kind),
1800                     video_webpage, 'unavailable %s' % kind, default=None)
1801                 if msg:
1802                     messages.append(msg)
1803             if messages:
1804                 return '\n'.join(messages)
1805
1806         if not video_info and not player_response:
1807             unavailable_message = extract_unavailable_message()
1808             if not unavailable_message:
1809                 unavailable_message = 'Unable to extract video data'
1810             raise ExtractorError(
1811                 'YouTube said: %s' % unavailable_message, expected=True, video_id=video_id)
1812
1813         if not isinstance(video_info, dict):
1814             video_info = {}
1815
1816         video_details = try_get(
1817             player_response, lambda x: x['videoDetails'], dict) or {}
1818
1819         microformat = try_get(
1820             player_response, lambda x: x['microformat']['playerMicroformatRenderer'], dict) or {}
1821
1822         video_title = video_info.get('title', [None])[0] or video_details.get('title')
1823         if not video_title:
1824             self._downloader.report_warning('Unable to extract video title')
1825             video_title = '_'
1826
1827         description_original = video_description = get_element_by_id("eow-description", video_webpage)
1828         if video_description:
1829
1830             def replace_url(m):
1831                 redir_url = compat_urlparse.urljoin(url, m.group(1))
1832                 parsed_redir_url = compat_urllib_parse_urlparse(redir_url)
1833                 if re.search(r'^(?:www\.)?(?:youtube(?:-nocookie)?\.com|youtu\.be)$', parsed_redir_url.netloc) and parsed_redir_url.path == '/redirect':
1834                     qs = compat_parse_qs(parsed_redir_url.query)
1835                     q = qs.get('q')
1836                     if q and q[0]:
1837                         return q[0]
1838                 return redir_url
1839
1840             description_original = video_description = re.sub(r'''(?x)
1841                 <a\s+
1842                     (?:[a-zA-Z-]+="[^"]*"\s+)*?
1843                     (?:title|href)="([^"]+)"\s+
1844                     (?:[a-zA-Z-]+="[^"]*"\s+)*?
1845                     class="[^"]*"[^>]*>
1846                 [^<]+\.{3}\s*
1847                 </a>
1848             ''', replace_url, video_description)
1849             video_description = clean_html(video_description)
1850         else:
1851             video_description = video_details.get('shortDescription')
1852             if video_description is None:
1853                 video_description = self._html_search_meta('description', video_webpage)
1854
1855         if not smuggled_data.get('force_singlefeed', False):
1856             if not self._downloader.params.get('noplaylist'):
1857                 multifeed_metadata_list = try_get(
1858                     player_response,
1859                     lambda x: x['multicamera']['playerLegacyMulticameraRenderer']['metadataList'],
1860                     compat_str) or try_get(
1861                     video_info, lambda x: x['multifeed_metadata_list'][0], compat_str)
1862                 if multifeed_metadata_list:
1863                     entries = []
1864                     feed_ids = []
1865                     for feed in multifeed_metadata_list.split(','):
1866                         # Unquote should take place before split on comma (,) since textual
1867                         # fields may contain comma as well (see
1868                         # https://github.com/ytdl-org/youtube-dl/issues/8536)
1869                         feed_data = compat_parse_qs(compat_urllib_parse_unquote_plus(feed))
1870
1871                         def feed_entry(name):
1872                             return try_get(feed_data, lambda x: x[name][0], compat_str)
1873
1874                         feed_id = feed_entry('id')
1875                         if not feed_id:
1876                             continue
1877                         feed_title = feed_entry('title')
1878                         title = video_title
1879                         if feed_title:
1880                             title += ' (%s)' % feed_title
1881                         entries.append({
1882                             '_type': 'url_transparent',
1883                             'ie_key': 'Youtube',
1884                             'url': smuggle_url(
1885                                 '%s://www.youtube.com/watch?v=%s' % (proto, feed_data['id'][0]),
1886                                 {'force_singlefeed': True}),
1887                             'title': title,
1888                         })
1889                         feed_ids.append(feed_id)
1890                     self.to_screen(
1891                         'Downloading multifeed video (%s) - add --no-playlist to just download video %s'
1892                         % (', '.join(feed_ids), video_id))
1893                     return self.playlist_result(entries, video_id, video_title, video_description)
1894             else:
1895                 self.to_screen('Downloading just video %s because of --no-playlist' % video_id)
1896
1897         if view_count is None:
1898             view_count = extract_view_count(video_info)
1899         if view_count is None and video_details:
1900             view_count = int_or_none(video_details.get('viewCount'))
1901         if view_count is None and microformat:
1902             view_count = int_or_none(microformat.get('viewCount'))
1903
1904         if is_live is None:
1905             is_live = bool_or_none(video_details.get('isLive'))
1906
1907         has_live_chat_replay = False
1908         if not is_live:
1909             yt_initial_data = self._get_yt_initial_data(video_id, video_webpage)
1910             try:
1911                 yt_initial_data['contents']['twoColumnWatchNextResults']['conversationBar']['liveChatRenderer']['continuations'][0]['reloadContinuationData']['continuation']
1912                 has_live_chat_replay = True
1913             except (KeyError, IndexError, TypeError):
1914                 pass
1915
1916         # Check for "rental" videos
1917         if 'ypc_video_rental_bar_text' in video_info and 'author' not in video_info:
1918             raise ExtractorError('"rental" videos not supported. See https://github.com/ytdl-org/youtube-dl/issues/359 for more information.', expected=True)
1919
1920         def _extract_filesize(media_url):
1921             return int_or_none(self._search_regex(
1922                 r'\bclen[=/](\d+)', media_url, 'filesize', default=None))
1923
1924         streaming_formats = try_get(player_response, lambda x: x['streamingData']['formats'], list) or []
1925         streaming_formats.extend(try_get(player_response, lambda x: x['streamingData']['adaptiveFormats'], list) or [])
1926
1927         if 'conn' in video_info and video_info['conn'][0].startswith('rtmp'):
1928             self.report_rtmp_download()
1929             formats = [{
1930                 'format_id': '_rtmp',
1931                 'protocol': 'rtmp',
1932                 'url': video_info['conn'][0],
1933                 'player_url': player_url,
1934             }]
1935         elif not is_live and (streaming_formats or len(video_info.get('url_encoded_fmt_stream_map', [''])[0]) >= 1 or len(video_info.get('adaptive_fmts', [''])[0]) >= 1):
1936             encoded_url_map = video_info.get('url_encoded_fmt_stream_map', [''])[0] + ',' + video_info.get('adaptive_fmts', [''])[0]
1937             if 'rtmpe%3Dyes' in encoded_url_map:
1938                 raise ExtractorError('rtmpe downloads are not supported, see https://github.com/ytdl-org/youtube-dl/issues/343 for more information.', expected=True)
1939             formats = []
1940             formats_spec = {}
1941             fmt_list = video_info.get('fmt_list', [''])[0]
1942             if fmt_list:
1943                 for fmt in fmt_list.split(','):
1944                     spec = fmt.split('/')
1945                     if len(spec) > 1:
1946                         width_height = spec[1].split('x')
1947                         if len(width_height) == 2:
1948                             formats_spec[spec[0]] = {
1949                                 'resolution': spec[1],
1950                                 'width': int_or_none(width_height[0]),
1951                                 'height': int_or_none(width_height[1]),
1952                             }
1953             for fmt in streaming_formats:
1954                 itag = str_or_none(fmt.get('itag'))
1955                 if not itag:
1956                     continue
1957                 quality = fmt.get('quality')
1958                 quality_label = fmt.get('qualityLabel') or quality
1959                 formats_spec[itag] = {
1960                     'asr': int_or_none(fmt.get('audioSampleRate')),
1961                     'filesize': int_or_none(fmt.get('contentLength')),
1962                     'format_note': quality_label,
1963                     'fps': int_or_none(fmt.get('fps')),
1964                     'height': int_or_none(fmt.get('height')),
1965                     # bitrate for itag 43 is always 2147483647
1966                     'tbr': float_or_none(fmt.get('averageBitrate') or fmt.get('bitrate'), 1000) if itag != '43' else None,
1967                     'width': int_or_none(fmt.get('width')),
1968                 }
1969
1970             for fmt in streaming_formats:
1971                 if fmt.get('drmFamilies') or fmt.get('drm_families'):
1972                     continue
1973                 url = url_or_none(fmt.get('url'))
1974
1975                 if not url:
1976                     cipher = fmt.get('cipher') or fmt.get('signatureCipher')
1977                     if not cipher:
1978                         continue
1979                     url_data = compat_parse_qs(cipher)
1980                     url = url_or_none(try_get(url_data, lambda x: x['url'][0], compat_str))
1981                     if not url:
1982                         continue
1983                 else:
1984                     cipher = None
1985                     url_data = compat_parse_qs(compat_urllib_parse_urlparse(url).query)
1986
1987                 stream_type = int_or_none(try_get(url_data, lambda x: x['stream_type'][0]))
1988                 # Unsupported FORMAT_STREAM_TYPE_OTF
1989                 if stream_type == 3:
1990                     continue
1991
1992                 format_id = fmt.get('itag') or url_data['itag'][0]
1993                 if not format_id:
1994                     continue
1995                 format_id = compat_str(format_id)
1996
1997                 if cipher:
1998                     if 's' in url_data or self._downloader.params.get('youtube_include_dash_manifest', True):
1999                         ASSETS_RE = (
2000                             r'<script[^>]+\bsrc=("[^"]+")[^>]+\bname=["\']player_ias/base',
2001                             r'"jsUrl"\s*:\s*("[^"]+")',
2002                             r'"assets":.+?"js":\s*("[^"]+")')
2003                         jsplayer_url_json = self._search_regex(
2004                             ASSETS_RE,
2005                             embed_webpage if age_gate else video_webpage,
2006                             'JS player URL (1)', default=None)
2007                         if not jsplayer_url_json and not age_gate:
2008                             # We need the embed website after all
2009                             if embed_webpage is None:
2010                                 embed_url = proto + '://www.youtube.com/embed/%s' % video_id
2011                                 embed_webpage = self._download_webpage(
2012                                     embed_url, video_id, 'Downloading embed webpage')
2013                             jsplayer_url_json = self._search_regex(
2014                                 ASSETS_RE, embed_webpage, 'JS player URL')
2015
2016                         player_url = json.loads(jsplayer_url_json)
2017                         if player_url is None:
2018                             player_url_json = self._search_regex(
2019                                 r'ytplayer\.config.*?"url"\s*:\s*("[^"]+")',
2020                                 video_webpage, 'age gate player URL')
2021                             player_url = json.loads(player_url_json)
2022
2023                     if 'sig' in url_data:
2024                         url += '&signature=' + url_data['sig'][0]
2025                     elif 's' in url_data:
2026                         encrypted_sig = url_data['s'][0]
2027
2028                         if self._downloader.params.get('verbose'):
2029                             if player_url is None:
2030                                 player_desc = 'unknown'
2031                             else:
2032                                 player_type, player_version = self._extract_player_info(player_url)
2033                                 player_desc = '%s player %s' % ('flash' if player_type == 'swf' else 'html5', player_version)
2034                             parts_sizes = self._signature_cache_id(encrypted_sig)
2035                             self.to_screen('{%s} signature length %s, %s' %
2036                                            (format_id, parts_sizes, player_desc))
2037
2038                         signature = self._decrypt_signature(
2039                             encrypted_sig, video_id, player_url, age_gate)
2040                         sp = try_get(url_data, lambda x: x['sp'][0], compat_str) or 'signature'
2041                         url += '&%s=%s' % (sp, signature)
2042                 if 'ratebypass' not in url:
2043                     url += '&ratebypass=yes'
2044
2045                 dct = {
2046                     'format_id': format_id,
2047                     'url': url,
2048                     'player_url': player_url,
2049                 }
2050                 if format_id in self._formats:
2051                     dct.update(self._formats[format_id])
2052                 if format_id in formats_spec:
2053                     dct.update(formats_spec[format_id])
2054
2055                 # Some itags are not included in DASH manifest thus corresponding formats will
2056                 # lack metadata (see https://github.com/ytdl-org/youtube-dl/pull/5993).
2057                 # Trying to extract metadata from url_encoded_fmt_stream_map entry.
2058                 mobj = re.search(r'^(?P<width>\d+)[xX](?P<height>\d+)$', url_data.get('size', [''])[0])
2059                 width, height = (int(mobj.group('width')), int(mobj.group('height'))) if mobj else (None, None)
2060
2061                 if width is None:
2062                     width = int_or_none(fmt.get('width'))
2063                 if height is None:
2064                     height = int_or_none(fmt.get('height'))
2065
2066                 filesize = int_or_none(url_data.get(
2067                     'clen', [None])[0]) or _extract_filesize(url)
2068
2069                 quality = url_data.get('quality', [None])[0] or fmt.get('quality')
2070                 quality_label = url_data.get('quality_label', [None])[0] or fmt.get('qualityLabel')
2071
2072                 tbr = (float_or_none(url_data.get('bitrate', [None])[0], 1000)
2073                        or float_or_none(fmt.get('bitrate'), 1000)) if format_id != '43' else None
2074                 fps = int_or_none(url_data.get('fps', [None])[0]) or int_or_none(fmt.get('fps'))
2075
2076                 more_fields = {
2077                     'filesize': filesize,
2078                     'tbr': tbr,
2079                     'width': width,
2080                     'height': height,
2081                     'fps': fps,
2082                     'format_note': quality_label or quality,
2083                 }
2084                 for key, value in more_fields.items():
2085                     if value:
2086                         dct[key] = value
2087                 type_ = url_data.get('type', [None])[0] or fmt.get('mimeType')
2088                 if type_:
2089                     type_split = type_.split(';')
2090                     kind_ext = type_split[0].split('/')
2091                     if len(kind_ext) == 2:
2092                         kind, _ = kind_ext
2093                         dct['ext'] = mimetype2ext(type_split[0])
2094                         if kind in ('audio', 'video'):
2095                             codecs = None
2096                             for mobj in re.finditer(
2097                                     r'(?P<key>[a-zA-Z_-]+)=(?P<quote>["\']?)(?P<val>.+?)(?P=quote)(?:;|$)', type_):
2098                                 if mobj.group('key') == 'codecs':
2099                                     codecs = mobj.group('val')
2100                                     break
2101                             if codecs:
2102                                 dct.update(parse_codecs(codecs))
2103                 if dct.get('acodec') == 'none' or dct.get('vcodec') == 'none':
2104                     dct['downloader_options'] = {
2105                         # Youtube throttles chunks >~10M
2106                         'http_chunk_size': 10485760,
2107                     }
2108                 formats.append(dct)
2109         else:
2110             manifest_url = (
2111                 url_or_none(try_get(
2112                     player_response,
2113                     lambda x: x['streamingData']['hlsManifestUrl'],
2114                     compat_str))
2115                 or url_or_none(try_get(
2116                     video_info, lambda x: x['hlsvp'][0], compat_str)))
2117             if manifest_url:
2118                 formats = []
2119                 m3u8_formats = self._extract_m3u8_formats(
2120                     manifest_url, video_id, 'mp4', fatal=False)
2121                 for a_format in m3u8_formats:
2122                     itag = self._search_regex(
2123                         r'/itag/(\d+)/', a_format['url'], 'itag', default=None)
2124                     if itag:
2125                         a_format['format_id'] = itag
2126                         if itag in self._formats:
2127                             dct = self._formats[itag].copy()
2128                             dct.update(a_format)
2129                             a_format = dct
2130                     a_format['player_url'] = player_url
2131                     # Accept-Encoding header causes failures in live streams on Youtube and Youtube Gaming
2132                     a_format.setdefault('http_headers', {})['Youtubedl-no-compression'] = 'True'
2133                     if self._downloader.params.get('youtube_include_hls_manifest', True):
2134                         formats.append(a_format)
2135             else:
2136                 error_message = extract_unavailable_message()
2137                 if not error_message:
2138                     error_message = clean_html(try_get(
2139                         player_response, lambda x: x['playabilityStatus']['reason'],
2140                         compat_str))
2141                 if not error_message:
2142                     error_message = clean_html(
2143                         try_get(video_info, lambda x: x['reason'][0], compat_str))
2144                 if error_message:
2145                     raise ExtractorError(error_message, expected=True)
2146                 raise ExtractorError('no conn, hlsvp, hlsManifestUrl or url_encoded_fmt_stream_map information found in video info')
2147
2148         # uploader
2149         video_uploader = try_get(
2150             video_info, lambda x: x['author'][0],
2151             compat_str) or str_or_none(video_details.get('author'))
2152         if video_uploader:
2153             video_uploader = compat_urllib_parse_unquote_plus(video_uploader)
2154         else:
2155             self._downloader.report_warning('unable to extract uploader name')
2156
2157         # uploader_id
2158         video_uploader_id = None
2159         video_uploader_url = None
2160         mobj = re.search(
2161             r'<link itemprop="url" href="(?P<uploader_url>https?://www\.youtube\.com/(?:user|channel)/(?P<uploader_id>[^"]+))">',
2162             video_webpage)
2163         if mobj is not None:
2164             video_uploader_id = mobj.group('uploader_id')
2165             video_uploader_url = mobj.group('uploader_url')
2166         else:
2167             owner_profile_url = url_or_none(microformat.get('ownerProfileUrl'))
2168             if owner_profile_url:
2169                 video_uploader_id = self._search_regex(
2170                     r'(?:user|channel)/([^/]+)', owner_profile_url, 'uploader id',
2171                     default=None)
2172                 video_uploader_url = owner_profile_url
2173
2174         channel_id = (
2175             str_or_none(video_details.get('channelId'))
2176             or self._html_search_meta(
2177                 'channelId', video_webpage, 'channel id', default=None)
2178             or self._search_regex(
2179                 r'data-channel-external-id=(["\'])(?P<id>(?:(?!\1).)+)\1',
2180                 video_webpage, 'channel id', default=None, group='id'))
2181         channel_url = 'http://www.youtube.com/channel/%s' % channel_id if channel_id else None
2182
2183         thumbnails = []
2184         thumbnails_list = try_get(
2185             video_details, lambda x: x['thumbnail']['thumbnails'], list) or []
2186         for t in thumbnails_list:
2187             if not isinstance(t, dict):
2188                 continue
2189             thumbnail_url = url_or_none(t.get('url'))
2190             if not thumbnail_url:
2191                 continue
2192             thumbnails.append({
2193                 'url': thumbnail_url,
2194                 'width': int_or_none(t.get('width')),
2195                 'height': int_or_none(t.get('height')),
2196             })
2197
2198         if not thumbnails:
2199             video_thumbnail = None
2200             # We try first to get a high quality image:
2201             m_thumb = re.search(r'<span itemprop="thumbnail".*?href="(.*?)">',
2202                                 video_webpage, re.DOTALL)
2203             if m_thumb is not None:
2204                 video_thumbnail = m_thumb.group(1)
2205             thumbnail_url = try_get(video_info, lambda x: x['thumbnail_url'][0], compat_str)
2206             if thumbnail_url:
2207                 video_thumbnail = compat_urllib_parse_unquote_plus(thumbnail_url)
2208             if video_thumbnail:
2209                 thumbnails.append({'url': video_thumbnail})
2210
2211         # upload date
2212         upload_date = self._html_search_meta(
2213             'datePublished', video_webpage, 'upload date', default=None)
2214         if not upload_date:
2215             upload_date = self._search_regex(
2216                 [r'(?s)id="eow-date.*?>(.*?)</span>',
2217                  r'(?:id="watch-uploader-info".*?>.*?|["\']simpleText["\']\s*:\s*["\'])(?:Published|Uploaded|Streamed live|Started) on (.+?)[<"\']'],
2218                 video_webpage, 'upload date', default=None)
2219         if not upload_date:
2220             upload_date = microformat.get('publishDate') or microformat.get('uploadDate')
2221         upload_date = unified_strdate(upload_date)
2222
2223         video_license = self._html_search_regex(
2224             r'<h4[^>]+class="title"[^>]*>\s*License\s*</h4>\s*<ul[^>]*>\s*<li>(.+?)</li',
2225             video_webpage, 'license', default=None)
2226
2227         m_music = re.search(
2228             r'''(?x)
2229                 <h4[^>]+class="title"[^>]*>\s*Music\s*</h4>\s*
2230                 <ul[^>]*>\s*
2231                 <li>(?P<title>.+?)
2232                 by (?P<creator>.+?)
2233                 (?:
2234                     \(.+?\)|
2235                     <a[^>]*
2236                         (?:
2237                             \bhref=["\']/red[^>]*>|             # drop possible
2238                             >\s*Listen ad-free with YouTube Red # YouTube Red ad
2239                         )
2240                     .*?
2241                 )?</li
2242             ''',
2243             video_webpage)
2244         if m_music:
2245             video_alt_title = remove_quotes(unescapeHTML(m_music.group('title')))
2246             video_creator = clean_html(m_music.group('creator'))
2247         else:
2248             video_alt_title = video_creator = None
2249
2250         def extract_meta(field):
2251             return self._html_search_regex(
2252                 r'<h4[^>]+class="title"[^>]*>\s*%s\s*</h4>\s*<ul[^>]*>\s*<li>(.+?)</li>\s*' % field,
2253                 video_webpage, field, default=None)
2254
2255         track = extract_meta('Song')
2256         artist = extract_meta('Artist')
2257         album = extract_meta('Album')
2258
2259         # Youtube Music Auto-generated description
2260         release_date = release_year = None
2261         if video_description:
2262             mobj = re.search(r'(?s)Provided to YouTube by [^\n]+\n+(?P<track>[^·]+)·(?P<artist>[^\n]+)\n+(?P<album>[^\n]+)(?:.+?℗\s*(?P<release_year>\d{4})(?!\d))?(?:.+?Released on\s*:\s*(?P<release_date>\d{4}-\d{2}-\d{2}))?(.+?\nArtist\s*:\s*(?P<clean_artist>[^\n]+))?', video_description)
2263             if mobj:
2264                 if not track:
2265                     track = mobj.group('track').strip()
2266                 if not artist:
2267                     artist = mobj.group('clean_artist') or ', '.join(a.strip() for a in mobj.group('artist').split('·'))
2268                 if not album:
2269                     album = mobj.group('album'.strip())
2270                 release_year = mobj.group('release_year')
2271                 release_date = mobj.group('release_date')
2272                 if release_date:
2273                     release_date = release_date.replace('-', '')
2274                     if not release_year:
2275                         release_year = int(release_date[:4])
2276                 if release_year:
2277                     release_year = int(release_year)
2278
2279         yt_initial = self._get_yt_initial_data(video_id, video_webpage)
2280         if yt_initial:
2281             music_metadata = self._get_music_metadata_from_yt_initial(yt_initial)
2282             if len(music_metadata):
2283                 album = music_metadata[0].get('album')
2284                 artist = music_metadata[0].get('artist')
2285                 track = music_metadata[0].get('track')
2286
2287         m_episode = re.search(
2288             r'<div[^>]+id="watch7-headline"[^>]*>\s*<span[^>]*>.*?>(?P<series>[^<]+)</a></b>\s*S(?P<season>\d+)\s*•\s*E(?P<episode>\d+)</span>',
2289             video_webpage)
2290         if m_episode:
2291             series = unescapeHTML(m_episode.group('series'))
2292             season_number = int(m_episode.group('season'))
2293             episode_number = int(m_episode.group('episode'))
2294         else:
2295             series = season_number = episode_number = None
2296
2297         m_cat_container = self._search_regex(
2298             r'(?s)<h4[^>]*>\s*Category\s*</h4>\s*<ul[^>]*>(.*?)</ul>',
2299             video_webpage, 'categories', default=None)
2300         category = None
2301         if m_cat_container:
2302             category = self._html_search_regex(
2303                 r'(?s)<a[^<]+>(.*?)</a>', m_cat_container, 'category',
2304                 default=None)
2305         if not category:
2306             category = try_get(
2307                 microformat, lambda x: x['category'], compat_str)
2308         video_categories = None if category is None else [category]
2309
2310         video_tags = [
2311             unescapeHTML(m.group('content'))
2312             for m in re.finditer(self._meta_regex('og:video:tag'), video_webpage)]
2313         if not video_tags:
2314             video_tags = try_get(video_details, lambda x: x['keywords'], list)
2315
2316         def _extract_count(count_name):
2317             return str_to_int(self._search_regex(
2318                 r'-%s-button[^>]+><span[^>]+class="yt-uix-button-content"[^>]*>([\d,]+)</span>'
2319                 % re.escape(count_name),
2320                 video_webpage, count_name, default=None))
2321
2322         like_count = _extract_count('like')
2323         dislike_count = _extract_count('dislike')
2324
2325         if view_count is None:
2326             view_count = str_to_int(self._search_regex(
2327                 r'<[^>]+class=["\']watch-view-count[^>]+>\s*([\d,\s]+)', video_webpage,
2328                 'view count', default=None))
2329
2330         average_rating = (
2331             float_or_none(video_details.get('averageRating'))
2332             or try_get(video_info, lambda x: float_or_none(x['avg_rating'][0])))
2333
2334         # subtitles
2335         video_subtitles = self.extract_subtitles(
2336             video_id, video_webpage, has_live_chat_replay)
2337         automatic_captions = self.extract_automatic_captions(video_id, video_webpage)
2338
2339         video_duration = try_get(
2340             video_info, lambda x: int_or_none(x['length_seconds'][0]))
2341         if not video_duration:
2342             video_duration = int_or_none(video_details.get('lengthSeconds'))
2343         if not video_duration:
2344             video_duration = parse_duration(self._html_search_meta(
2345                 'duration', video_webpage, 'video duration'))
2346
2347         # Get Subscriber Count of channel
2348         subscriber_count = parse_count(self._search_regex(
2349             r'"text":"([\d\.]+\w?) subscribers"',
2350             video_webpage,
2351             'subscriber count',
2352             default=None
2353         ))
2354
2355         # annotations
2356         video_annotations = None
2357         if self._downloader.params.get('writeannotations', False):
2358             xsrf_token = self._search_regex(
2359                 r'([\'"])XSRF_TOKEN\1\s*:\s*([\'"])(?P<xsrf_token>[A-Za-z0-9+/=]+)\2',
2360                 video_webpage, 'xsrf token', group='xsrf_token', fatal=False)
2361             invideo_url = try_get(
2362                 player_response, lambda x: x['annotations'][0]['playerAnnotationsUrlsRenderer']['invideoUrl'], compat_str)
2363             if xsrf_token and invideo_url:
2364                 xsrf_field_name = self._search_regex(
2365                     r'([\'"])XSRF_FIELD_NAME\1\s*:\s*([\'"])(?P<xsrf_field_name>\w+)\2',
2366                     video_webpage, 'xsrf field name',
2367                     group='xsrf_field_name', default='session_token')
2368                 video_annotations = self._download_webpage(
2369                     self._proto_relative_url(invideo_url),
2370                     video_id, note='Downloading annotations',
2371                     errnote='Unable to download video annotations', fatal=False,
2372                     data=urlencode_postdata({xsrf_field_name: xsrf_token}))
2373
2374         chapters = self._extract_chapters(video_webpage, description_original, video_id, video_duration)
2375
2376         # Look for the DASH manifest
2377         if self._downloader.params.get('youtube_include_dash_manifest', True):
2378             dash_mpd_fatal = True
2379             for mpd_url in dash_mpds:
2380                 dash_formats = {}
2381                 try:
2382                     def decrypt_sig(mobj):
2383                         s = mobj.group(1)
2384                         dec_s = self._decrypt_signature(s, video_id, player_url, age_gate)
2385                         return '/signature/%s' % dec_s
2386
2387                     mpd_url = re.sub(r'/s/([a-fA-F0-9\.]+)', decrypt_sig, mpd_url)
2388
2389                     for df in self._extract_mpd_formats(
2390                             mpd_url, video_id, fatal=dash_mpd_fatal,
2391                             formats_dict=self._formats):
2392                         if not df.get('filesize'):
2393                             df['filesize'] = _extract_filesize(df['url'])
2394                         # Do not overwrite DASH format found in some previous DASH manifest
2395                         if df['format_id'] not in dash_formats:
2396                             dash_formats[df['format_id']] = df
2397                         # Additional DASH manifests may end up in HTTP Error 403 therefore
2398                         # allow them to fail without bug report message if we already have
2399                         # some DASH manifest succeeded. This is temporary workaround to reduce
2400                         # burst of bug reports until we figure out the reason and whether it
2401                         # can be fixed at all.
2402                         dash_mpd_fatal = False
2403                 except (ExtractorError, KeyError) as e:
2404                     self.report_warning(
2405                         'Skipping DASH manifest: %r' % e, video_id)
2406                 if dash_formats:
2407                     # Remove the formats we found through non-DASH, they
2408                     # contain less info and it can be wrong, because we use
2409                     # fixed values (for example the resolution). See
2410                     # https://github.com/ytdl-org/youtube-dl/issues/5774 for an
2411                     # example.
2412                     formats = [f for f in formats if f['format_id'] not in dash_formats.keys()]
2413                     formats.extend(dash_formats.values())
2414
2415         # Check for malformed aspect ratio
2416         stretched_m = re.search(
2417             r'<meta\s+property="og:video:tag".*?content="yt:stretch=(?P<w>[0-9]+):(?P<h>[0-9]+)">',
2418             video_webpage)
2419         if stretched_m:
2420             w = float(stretched_m.group('w'))
2421             h = float(stretched_m.group('h'))
2422             # yt:stretch may hold invalid ratio data (e.g. for Q39EVAstoRM ratio is 17:0).
2423             # We will only process correct ratios.
2424             if w > 0 and h > 0:
2425                 ratio = w / h
2426                 for f in formats:
2427                     if f.get('vcodec') != 'none':
2428                         f['stretched_ratio'] = ratio
2429
2430         if not formats:
2431             if 'reason' in video_info:
2432                 if 'The uploader has not made this video available in your country.' in video_info['reason']:
2433                     regions_allowed = self._html_search_meta(
2434                         'regionsAllowed', video_webpage, default=None)
2435                     countries = regions_allowed.split(',') if regions_allowed else None
2436                     self.raise_geo_restricted(
2437                         msg=video_info['reason'][0], countries=countries)
2438                 reason = video_info['reason'][0]
2439                 if 'Invalid parameters' in reason:
2440                     unavailable_message = extract_unavailable_message()
2441                     if unavailable_message:
2442                         reason = unavailable_message
2443                 raise ExtractorError(
2444                     'YouTube said: %s' % reason,
2445                     expected=True, video_id=video_id)
2446             if video_info.get('license_info') or try_get(player_response, lambda x: x['streamingData']['licenseInfos']):
2447                 raise ExtractorError('This video is DRM protected.', expected=True)
2448
2449         self._sort_formats(formats)
2450
2451         self.mark_watched(video_id, video_info, player_response)
2452
2453         return {
2454             'id': video_id,
2455             'uploader': video_uploader,
2456             'uploader_id': video_uploader_id,
2457             'uploader_url': video_uploader_url,
2458             'channel_id': channel_id,
2459             'channel_url': channel_url,
2460             'upload_date': upload_date,
2461             'license': video_license,
2462             'creator': video_creator or artist,
2463             'title': video_title,
2464             'alt_title': video_alt_title or track,
2465             'thumbnails': thumbnails,
2466             'description': video_description,
2467             'categories': video_categories,
2468             'tags': video_tags,
2469             'subtitles': video_subtitles,
2470             'automatic_captions': automatic_captions,
2471             'duration': video_duration,
2472             'age_limit': 18 if age_gate else 0,
2473             'annotations': video_annotations,
2474             'chapters': chapters,
2475             'webpage_url': proto + '://www.youtube.com/watch?v=%s' % video_id,
2476             'view_count': view_count,
2477             'like_count': like_count,
2478             'dislike_count': dislike_count,
2479             'average_rating': average_rating,
2480             'formats': formats,
2481             'is_live': is_live,
2482             'start_time': start_time,
2483             'end_time': end_time,
2484             'series': series,
2485             'season_number': season_number,
2486             'episode_number': episode_number,
2487             'track': track,
2488             'artist': artist,
2489             'album': album,
2490             'release_date': release_date,
2491             'release_year': release_year,
2492             'subscriber_count': subscriber_count,
2493         }
2494
2495
2496 class YoutubeTabIE(YoutubeBaseInfoExtractor):
2497     IE_DESC = 'YouTube.com tab'
2498     _VALID_URL = r'https?://(?:\w+\.)?(?:youtube(?:kids)?\.com|invidio\.us)/(?:(?:channel|c|user)/|(?:playlist|watch)\?.*?\blist=)(?P<id>[^/?#&]+)'
2499     IE_NAME = 'youtube:tab'
2500
2501     _TESTS = [{
2502         # playlists, multipage
2503         'url': 'https://www.youtube.com/c/ИгорьКлейнер/playlists?view=1&flow=grid',
2504         'playlist_mincount': 94,
2505         'info_dict': {
2506             'id': 'UCqj7Cz7revf5maW9g5pgNcg',
2507             'title': 'Игорь Клейнер - Playlists',
2508             'description': 'md5:be97ee0f14ee314f1f002cf187166ee2',
2509         },
2510     }, {
2511         # playlists, multipage, different order
2512         'url': 'https://www.youtube.com/user/igorkle1/playlists?view=1&sort=dd',
2513         'playlist_mincount': 94,
2514         'info_dict': {
2515             'id': 'UCqj7Cz7revf5maW9g5pgNcg',
2516             'title': 'Игорь Клейнер - Playlists',
2517             'description': 'md5:be97ee0f14ee314f1f002cf187166ee2',
2518         },
2519     }, {
2520         # playlists, singlepage
2521         'url': 'https://www.youtube.com/user/ThirstForScience/playlists',
2522         'playlist_mincount': 4,
2523         'info_dict': {
2524             'id': 'UCAEtajcuhQ6an9WEzY9LEMQ',
2525             'title': 'ThirstForScience - Playlists',
2526             'description': 'md5:609399d937ea957b0f53cbffb747a14c',
2527         }
2528     }, {
2529         'url': 'https://www.youtube.com/c/ChristophLaimer/playlists',
2530         'only_matching': True,
2531     }, {
2532         # basic, single video playlist
2533         'url': 'https://www.youtube.com/playlist?list=PL4lCao7KL_QFVb7Iudeipvc2BCavECqzc',
2534         'info_dict': {
2535             'uploader_id': 'UCmlqkdCBesrv2Lak1mF_MxA',
2536             'uploader': 'Sergey M.',
2537             'id': 'PL4lCao7KL_QFVb7Iudeipvc2BCavECqzc',
2538             'title': 'youtube-dl public playlist',
2539         },
2540         'playlist_count': 1,
2541     }, {
2542         # empty playlist
2543         'url': 'https://www.youtube.com/playlist?list=PL4lCao7KL_QFodcLWhDpGCYnngnHtQ-Xf',
2544         'info_dict': {
2545             'uploader_id': 'UCmlqkdCBesrv2Lak1mF_MxA',
2546             'uploader': 'Sergey M.',
2547             'id': 'PL4lCao7KL_QFodcLWhDpGCYnngnHtQ-Xf',
2548             'title': 'youtube-dl empty playlist',
2549         },
2550         'playlist_count': 0,
2551     }, {
2552         # Home tab
2553         'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/featured',
2554         'info_dict': {
2555             'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
2556             'title': 'lex will - Home',
2557             'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488',
2558         },
2559         'playlist_mincount': 2,
2560     }, {
2561         # Videos tab
2562         'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/videos',
2563         'info_dict': {
2564             'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
2565             'title': 'lex will - Videos',
2566             'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488',
2567         },
2568         'playlist_mincount': 975,
2569     }, {
2570         # Videos tab, sorted by popular
2571         'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/videos?view=0&sort=p&flow=grid',
2572         'info_dict': {
2573             'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
2574             'title': 'lex will - Videos',
2575             'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488',
2576         },
2577         'playlist_mincount': 199,
2578     }, {
2579         # Playlists tab
2580         'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/playlists',
2581         'info_dict': {
2582             'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
2583             'title': 'lex will - Playlists',
2584             'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488',
2585         },
2586         'playlist_mincount': 17,
2587     }, {
2588         # Community tab
2589         'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/community',
2590         'info_dict': {
2591             'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
2592             'title': 'lex will - Community',
2593             'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488',
2594         },
2595         'playlist_mincount': 18,
2596     }, {
2597         # Channels tab
2598         'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/channels',
2599         'info_dict': {
2600             'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
2601             'title': 'lex will - Channels',
2602             'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488',
2603         },
2604         'playlist_mincount': 138,
2605     }, {
2606         'url': 'https://invidio.us/channel/UC23qupoDRn9YOAVzeoxjOQA',
2607         'only_matching': True,
2608     }, {
2609         'url': 'https://www.youtubekids.com/channel/UCyu8StPfZWapR6rfW_JgqcA',
2610         'only_matching': True,
2611     }, {
2612         'url': 'https://music.youtube.com/channel/UCT-K0qO8z6NzWrywqefBPBQ',
2613         'only_matching': True,
2614     }, {
2615         'note': 'Playlist with deleted videos (#651). As a bonus, the video #51 is also twice in this list.',
2616         'url': 'https://www.youtube.com/playlist?list=PLwP_SiAcdui0KVebT0mU9Apz359a4ubsC',
2617         'info_dict': {
2618             'title': '29C3: Not my department',
2619             'id': 'PLwP_SiAcdui0KVebT0mU9Apz359a4ubsC',
2620             'uploader': 'Christiaan008',
2621             'uploader_id': 'UCEPzS1rYsrkqzSLNp76nrcg',
2622         },
2623         'playlist_count': 96,
2624     }, {
2625         'note': 'Large playlist',
2626         'url': 'https://www.youtube.com/playlist?list=UUBABnxM4Ar9ten8Mdjj1j0Q',
2627         'info_dict': {
2628             'title': 'Uploads from Cauchemar',
2629             'id': 'UUBABnxM4Ar9ten8Mdjj1j0Q',
2630             'uploader': 'Cauchemar',
2631             'uploader_id': 'UCBABnxM4Ar9ten8Mdjj1j0Q',
2632         },
2633         'playlist_mincount': 1123,
2634     }, {
2635         # even larger playlist, 8832 videos
2636         'url': 'http://www.youtube.com/user/NASAgovVideo/videos',
2637         'only_matching': True,
2638     }, {
2639         'note': 'Buggy playlist: the webpage has a "Load more" button but it doesn\'t have more videos',
2640         'url': 'https://www.youtube.com/playlist?list=UUXw-G3eDE9trcvY2sBMM_aA',
2641         'info_dict': {
2642             'title': 'Uploads from Interstellar Movie',
2643             'id': 'UUXw-G3eDE9trcvY2sBMM_aA',
2644             'uploader': 'Interstellar Movie',
2645             'uploader_id': 'UCXw-G3eDE9trcvY2sBMM_aA',
2646         },
2647         'playlist_mincount': 21,
2648     }, {
2649         # https://github.com/ytdl-org/youtube-dl/issues/21844
2650         'url': 'https://www.youtube.com/playlist?list=PLzH6n4zXuckpfMu_4Ff8E7Z1behQks5ba',
2651         'info_dict': {
2652             'title': 'Data Analysis with Dr Mike Pound',
2653             'id': 'PLzH6n4zXuckpfMu_4Ff8E7Z1behQks5ba',
2654             'uploader_id': 'UC9-y-6csu5WGm29I7JiwpnA',
2655             'uploader': 'Computerphile',
2656         },
2657         'playlist_mincount': 11,
2658     }, {
2659         'url': 'https://invidio.us/playlist?list=PLDIoUOhQQPlXr63I_vwF9GD8sAKh77dWU',
2660         'only_matching': True,
2661     }, {
2662         # Playlist URL that does not actually serve a playlist
2663         'url': 'https://www.youtube.com/watch?v=FqZTN594JQw&list=PLMYEtVRpaqY00V9W81Cwmzp6N6vZqfUKD4',
2664         'info_dict': {
2665             'id': 'FqZTN594JQw',
2666             'ext': 'webm',
2667             'title': "Smiley's People 01 detective, Adventure Series, Action",
2668             'uploader': 'STREEM',
2669             'uploader_id': 'UCyPhqAZgwYWZfxElWVbVJng',
2670             'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UCyPhqAZgwYWZfxElWVbVJng',
2671             'upload_date': '20150526',
2672             'license': 'Standard YouTube License',
2673             'description': 'md5:507cdcb5a49ac0da37a920ece610be80',
2674             'categories': ['People & Blogs'],
2675             'tags': list,
2676             'view_count': int,
2677             'like_count': int,
2678             'dislike_count': int,
2679         },
2680         'params': {
2681             'skip_download': True,
2682         },
2683         'skip': 'This video is not available.',
2684         'add_ie': [YoutubeIE.ie_key()],
2685     }, {
2686         'url': 'https://www.youtubekids.com/watch?v=Agk7R8I8o5U&list=PUZ6jURNr1WQZCNHF0ao-c0g',
2687         'only_matching': True,
2688     }, {
2689         'url': 'https://www.youtube.com/watch?v=MuAGGZNfUkU&list=RDMM',
2690         'only_matching': True,
2691     }]
2692
2693     @classmethod
2694     def suitable(cls, url):
2695         return False if YoutubeLiveIE.suitable(url) else super(
2696             YoutubeTabIE, cls).suitable(url)
2697
2698     def _extract_channel_id(self, webpage):
2699         channel_id = self._html_search_meta(
2700             'channelId', webpage, 'channel id', default=None)
2701         if channel_id:
2702             return channel_id
2703         channel_url = self._html_search_meta(
2704             ('og:url', 'al:ios:url', 'al:android:url', 'al:web:url',
2705              'twitter:url', 'twitter:app:url:iphone', 'twitter:app:url:ipad',
2706              'twitter:app:url:googleplay'), webpage, 'channel url')
2707         return self._search_regex(
2708             r'https?://(?:www\.)?youtube\.com/channel/([^/?#&])+',
2709             channel_url, 'channel id')
2710
2711     @staticmethod
2712     def _extract_grid_item_renderer(item):
2713         for item_kind in ('Playlist', 'Video', 'Channel'):
2714             renderer = item.get('grid%sRenderer' % item_kind)
2715             if renderer:
2716                 return renderer
2717
2718     def _extract_video(self, renderer):
2719         video_id = renderer.get('videoId')
2720         title = try_get(
2721             renderer,
2722             (lambda x: x['title']['runs'][0]['text'],
2723              lambda x: x['title']['simpleText']), compat_str)
2724         description = try_get(
2725             renderer, lambda x: x['descriptionSnippet']['runs'][0]['text'],
2726             compat_str)
2727         duration = parse_duration(try_get(
2728             renderer, lambda x: x['lengthText']['simpleText'], compat_str))
2729         view_count_text = try_get(
2730             renderer, lambda x: x['viewCountText']['simpleText'], compat_str) or ''
2731         view_count = str_to_int(self._search_regex(
2732             r'^([\d,]+)', re.sub(r'\s', '', view_count_text),
2733             'view count', default=None))
2734         uploader = try_get(
2735             renderer, lambda x: x['ownerText']['runs'][0]['text'], compat_str)
2736         return {
2737             '_type': 'url_transparent',
2738             'ie_key': YoutubeIE.ie_key(),
2739             'id': video_id,
2740             'url': video_id,
2741             'title': title,
2742             'description': description,
2743             'duration': duration,
2744             'view_count': view_count,
2745             'uploader': uploader,
2746         }
2747
2748     def _grid_entries(self, grid_renderer):
2749         for item in grid_renderer['items']:
2750             if not isinstance(item, dict):
2751                 continue
2752             renderer = self._extract_grid_item_renderer(item)
2753             if not isinstance(renderer, dict):
2754                 continue
2755             title = try_get(
2756                 renderer, lambda x: x['title']['runs'][0]['text'], compat_str)
2757             # playlist
2758             playlist_id = renderer.get('playlistId')
2759             if playlist_id:
2760                 yield self.url_result(
2761                     'https://www.youtube.com/playlist?list=%s' % playlist_id,
2762                     ie=YoutubeTabIE.ie_key(), video_id=playlist_id,
2763                     video_title=title)
2764             # video
2765             video_id = renderer.get('videoId')
2766             if video_id:
2767                 yield self._extract_video(renderer)
2768             # channel
2769             channel_id = renderer.get('channelId')
2770             if channel_id:
2771                 title = try_get(
2772                     renderer, lambda x: x['title']['simpleText'], compat_str)
2773                 yield self.url_result(
2774                     'https://www.youtube.com/channel/%s' % channel_id,
2775                     ie=YoutubeTabIE.ie_key(), video_title=title)
2776
2777     def _shelf_entries_trimmed(self, shelf_renderer):
2778         renderer = try_get(
2779             shelf_renderer, lambda x: x['content']['horizontalListRenderer'], dict)
2780         if not renderer:
2781             return
2782         # TODO: add support for nested playlists so each shelf is processed
2783         # as separate playlist
2784         # TODO: this includes only first N items
2785         for entry in self._grid_entries(renderer):
2786             yield entry
2787
2788     def _shelf_entries(self, shelf_renderer):
2789         ep = try_get(
2790             shelf_renderer, lambda x: x['endpoint']['commandMetadata']['webCommandMetadata']['url'],
2791             compat_str)
2792         shelf_url = urljoin('https://www.youtube.com', ep)
2793         if not shelf_url:
2794             return
2795         title = try_get(
2796             shelf_renderer, lambda x: x['title']['runs'][0]['text'], compat_str)
2797         yield self.url_result(shelf_url, video_title=title)
2798
2799     def _playlist_entries(self, video_list_renderer):
2800         for content in video_list_renderer['contents']:
2801             if not isinstance(content, dict):
2802                 continue
2803             renderer = content.get('playlistVideoRenderer') or content.get('playlistPanelVideoRenderer')
2804             if not isinstance(renderer, dict):
2805                 continue
2806             video_id = renderer.get('videoId')
2807             if not video_id:
2808                 continue
2809             yield self._extract_video(renderer)
2810
2811     def _video_entry(self, video_renderer):
2812         video_id = video_renderer.get('videoId')
2813         if video_id:
2814             return self._extract_video(video_renderer)
2815
2816     def _post_thread_entries(self, post_thread_renderer):
2817         post_renderer = try_get(
2818             post_thread_renderer, lambda x: x['post']['backstagePostRenderer'], dict)
2819         if not post_renderer:
2820             return
2821         # video attachment
2822         video_renderer = try_get(
2823             post_renderer, lambda x: x['backstageAttachment']['videoRenderer'], dict)
2824         video_id = None
2825         if video_renderer:
2826             entry = self._video_entry(video_renderer)
2827             if entry:
2828                 yield entry
2829         # inline video links
2830         runs = try_get(post_renderer, lambda x: x['contentText']['runs'], list) or []
2831         for run in runs:
2832             if not isinstance(run, dict):
2833                 continue
2834             ep_url = try_get(
2835                 run, lambda x: x['navigationEndpoint']['urlEndpoint']['url'], compat_str)
2836             if not ep_url:
2837                 continue
2838             if not YoutubeIE.suitable(ep_url):
2839                 continue
2840             ep_video_id = YoutubeIE._match_id(ep_url)
2841             if video_id == ep_video_id:
2842                 continue
2843             yield self.url_result(ep_url, ie=YoutubeIE.ie_key(), video_id=video_id)
2844
2845     def _post_thread_continuation_entries(self, post_thread_continuation):
2846         contents = post_thread_continuation.get('contents')
2847         if not isinstance(contents, list):
2848             return
2849         for content in contents:
2850             renderer = content.get('backstagePostThreadRenderer')
2851             if not isinstance(renderer, dict):
2852                 continue
2853             for entry in self._post_thread_entries(renderer):
2854                 yield entry
2855
2856     @staticmethod
2857     def _extract_next_continuation_data(renderer):
2858         next_continuation = try_get(
2859             renderer, lambda x: x['continuations'][0]['nextContinuationData'], dict)
2860         if not next_continuation:
2861             return
2862         continuation = next_continuation.get('continuation')
2863         if not continuation:
2864             return
2865         ctp = next_continuation.get('clickTrackingParams')
2866         return {
2867             'ctoken': continuation,
2868             'continuation': continuation,
2869             'itct': ctp,
2870         }
2871
2872     @classmethod
2873     def _extract_continuation(cls, renderer):
2874         next_continuation = cls._extract_next_continuation_data(renderer)
2875         if next_continuation:
2876             return next_continuation
2877         contents = renderer.get('contents')
2878         if not isinstance(contents, list):
2879             return
2880         for content in contents:
2881             if not isinstance(content, dict):
2882                 continue
2883             continuation_ep = try_get(
2884                 content, lambda x: x['continuationItemRenderer']['continuationEndpoint'],
2885                 dict)
2886             if not continuation_ep:
2887                 continue
2888             continuation = try_get(
2889                 continuation_ep, lambda x: x['continuationCommand']['token'], compat_str)
2890             if not continuation:
2891                 continue
2892             ctp = continuation_ep.get('clickTrackingParams')
2893             if not ctp:
2894                 continue
2895             return {
2896                 'ctoken': continuation,
2897                 'continuation': continuation,
2898                 'itct': ctp,
2899             }
2900
2901     def _entries(self, tab, identity_token):
2902         continuation = None
2903         slr_contents = try_get(tab, lambda x: x['sectionListRenderer']['contents'], list) or []
2904         for slr_content in slr_contents:
2905             if not isinstance(slr_content, dict):
2906                 continue
2907             is_renderer = try_get(slr_content, lambda x: x['itemSectionRenderer'], dict)
2908             if not is_renderer:
2909                 continue
2910             isr_contents = try_get(is_renderer, lambda x: x['contents'], list) or []
2911             for isr_content in isr_contents:
2912                 if not isinstance(isr_content, dict):
2913                     continue
2914                 renderer = isr_content.get('playlistVideoListRenderer')
2915                 if renderer:
2916                     for entry in self._playlist_entries(renderer):
2917                         yield entry
2918                     continuation = self._extract_continuation(renderer)
2919                     continue
2920                 renderer = isr_content.get('gridRenderer')
2921                 if renderer:
2922                     for entry in self._grid_entries(renderer):
2923                         yield entry
2924                     continuation = self._extract_continuation(renderer)
2925                     continue
2926                 renderer = isr_content.get('shelfRenderer')
2927                 if renderer:
2928                     for entry in self._shelf_entries(renderer):
2929                         yield entry
2930                     continue
2931                 renderer = isr_content.get('backstagePostThreadRenderer')
2932                 if renderer:
2933                     for entry in self._post_thread_entries(renderer):
2934                         yield entry
2935                     continuation = self._extract_continuation(renderer)
2936                     continue
2937                 renderer = isr_content.get('videoRenderer')
2938                 if renderer:
2939                     entry = self._video_entry(renderer)
2940                     if entry:
2941                         yield entry
2942
2943             if not continuation:
2944                 continuation = self._extract_continuation(is_renderer)
2945
2946         headers = {
2947             'x-youtube-client-name': '1',
2948             'x-youtube-client-version': '2.20201112.04.01',
2949         }
2950         if identity_token:
2951             headers['x-youtube-identity-token'] = identity_token
2952
2953         for page_num in itertools.count(1):
2954             if not continuation:
2955                 break
2956             browse = self._download_json(
2957                 'https://www.youtube.com/browse_ajax', None,
2958                 'Downloading page %d' % page_num,
2959                 headers=headers, query=continuation, fatal=False)
2960             if not browse:
2961                 break
2962             response = try_get(browse, lambda x: x[1]['response'], dict)
2963             if not response:
2964                 break
2965
2966             continuation_contents = try_get(
2967                 response, lambda x: x['continuationContents'], dict)
2968             if continuation_contents:
2969                 continuation_renderer = continuation_contents.get('playlistVideoListContinuation')
2970                 if continuation_renderer:
2971                     for entry in self._playlist_entries(continuation_renderer):
2972                         yield entry
2973                     continuation = self._extract_continuation(continuation_renderer)
2974                     continue
2975                 continuation_renderer = continuation_contents.get('gridContinuation')
2976                 if continuation_renderer:
2977                     for entry in self._grid_entries(continuation_renderer):
2978                         yield entry
2979                     continuation = self._extract_continuation(continuation_renderer)
2980                     continue
2981                 continuation_renderer = continuation_contents.get('itemSectionContinuation')
2982                 if continuation_renderer:
2983                     for entry in self._post_thread_continuation_entries(continuation_renderer):
2984                         yield entry
2985                     continuation = self._extract_continuation(continuation_renderer)
2986                     continue
2987
2988             continuation_items = try_get(
2989                 response, lambda x: x['onResponseReceivedActions'][0]['appendContinuationItemsAction']['continuationItems'], list)
2990             if continuation_items:
2991                 continuation_item = continuation_items[0]
2992                 if not isinstance(continuation_item, dict):
2993                     continue
2994                 renderer = continuation_item.get('playlistVideoRenderer')
2995                 if renderer:
2996                     video_list_renderer = {'contents': continuation_items}
2997                     for entry in self._playlist_entries(video_list_renderer):
2998                         yield entry
2999                     continuation = self._extract_continuation(video_list_renderer)
3000                     continue
3001
3002             break
3003
3004     @staticmethod
3005     def _extract_selected_tab(tabs):
3006         for tab in tabs:
3007             if try_get(tab, lambda x: x['tabRenderer']['selected'], bool):
3008                 return tab['tabRenderer']
3009         else:
3010             raise ExtractorError('Unable to find selected tab')
3011
3012     @staticmethod
3013     def _extract_uploader(data):
3014         uploader = {}
3015         sidebar_renderer = try_get(
3016             data, lambda x: x['sidebar']['playlistSidebarRenderer']['items'], list)
3017         if sidebar_renderer:
3018             for item in sidebar_renderer:
3019                 if not isinstance(item, dict):
3020                     continue
3021                 renderer = item.get('playlistSidebarSecondaryInfoRenderer')
3022                 if not isinstance(renderer, dict):
3023                     continue
3024                 owner = try_get(
3025                     renderer, lambda x: x['videoOwner']['videoOwnerRenderer']['title']['runs'][0], dict)
3026                 if owner:
3027                     uploader['uploader'] = owner.get('text')
3028                     uploader['uploader_id'] = try_get(
3029                         owner, lambda x: x['navigationEndpoint']['browseEndpoint']['browseId'], compat_str)
3030                     uploader['uploader_url'] = urljoin(
3031                         'https://www.youtube.com/',
3032                         try_get(owner, lambda x: x['navigationEndpoint']['browseEndpoint']['canonicalBaseUrl'], compat_str))
3033         return uploader
3034
3035     def _extract_from_tabs(self, item_id, webpage, data, tabs, identity_token):
3036         selected_tab = self._extract_selected_tab(tabs)
3037         renderer = try_get(
3038             data, lambda x: x['metadata']['channelMetadataRenderer'], dict)
3039         if renderer:
3040             channel_title = renderer.get('title') or item_id
3041             tab_title = selected_tab.get('title')
3042             title = channel_title or item_id
3043             if tab_title:
3044                 title += ' - %s' % tab_title
3045             description = renderer.get('description')
3046             playlist_id = renderer.get('externalId')
3047         renderer = try_get(
3048             data, lambda x: x['metadata']['playlistMetadataRenderer'], dict)
3049         if renderer:
3050             title = renderer.get('title')
3051             description = None
3052             playlist_id = item_id
3053         playlist = self.playlist_result(
3054             self._entries(selected_tab['content'], identity_token),
3055             playlist_id=playlist_id, playlist_title=title,
3056             playlist_description=description)
3057         playlist.update(self._extract_uploader(data))
3058         return playlist
3059
3060     def _extract_from_playlist(self, item_id, data, playlist):
3061         title = playlist.get('title') or try_get(
3062             data, lambda x: x['titleText']['simpleText'], compat_str)
3063         playlist_id = playlist.get('playlistId') or item_id
3064         return self.playlist_result(
3065             self._playlist_entries(playlist), playlist_id=playlist_id,
3066             playlist_title=title)
3067
3068     def _real_extract(self, url):
3069         item_id = self._match_id(url)
3070         url = compat_urlparse.urlunparse(
3071             compat_urlparse.urlparse(url)._replace(netloc='www.youtube.com'))
3072         # Handle both video/playlist URLs
3073         qs = compat_urlparse.parse_qs(compat_urlparse.urlparse(url).query)
3074         video_id = qs.get('v', [None])[0]
3075         playlist_id = qs.get('list', [None])[0]
3076         if video_id and playlist_id:
3077             if self._downloader.params.get('noplaylist'):
3078                 self.to_screen('Downloading just video %s because of --no-playlist' % video_id)
3079                 return self.url_result(video_id, ie=YoutubeIE.ie_key(), video_id=video_id)
3080             self.to_screen('Downloading playlist %s - add --no-playlist to just download video %s' % (playlist_id, video_id))
3081         webpage = self._download_webpage(url, item_id)
3082         identity_token = self._search_regex(
3083             r'\bID_TOKEN["\']\s*:\s*["\'](.+?)["\']', webpage,
3084             'identity token', default=None)
3085         data = self._extract_yt_initial_data(item_id, webpage)
3086         tabs = try_get(
3087             data, lambda x: x['contents']['twoColumnBrowseResultsRenderer']['tabs'], list)
3088         if tabs:
3089             return self._extract_from_tabs(item_id, webpage, data, tabs, identity_token)
3090         playlist = try_get(
3091             data, lambda x: x['contents']['twoColumnWatchNextResults']['playlist']['playlist'], dict)
3092         if playlist:
3093             return self._extract_from_playlist(item_id, data, playlist)
3094         # Fallback to video extraction if no playlist alike page is recognized
3095         if video_id:
3096             return self.url_result(video_id, ie=YoutubeIE.ie_key(), video_id=video_id)
3097         # Failed to recognize
3098         raise ExtractorError('Unable to recognize tab page')
3099
3100
3101 class YoutubePlaylistIE(InfoExtractor):
3102     IE_DESC = 'YouTube.com playlists'
3103     _VALID_URL = r'''(?x)(?:
3104                         (?:https?://)?
3105                         (?:\w+\.)?
3106                         (?:
3107                             (?:
3108                                 youtube(?:kids)?\.com|
3109                                 invidio\.us|
3110                                 youtu\.be
3111                             )
3112                             /.*?\?.*?\blist=
3113                         )?
3114                         (?P<id>%(playlist_id)s)
3115                      )''' % {'playlist_id': YoutubeBaseInfoExtractor._PLAYLIST_ID_RE}
3116     IE_NAME = 'youtube:playlist'
3117     _TESTS = [{
3118         'note': 'issue #673',
3119         'url': 'PLBB231211A4F62143',
3120         'info_dict': {
3121             'title': '[OLD]Team Fortress 2 (Class-based LP)',
3122             'id': 'PLBB231211A4F62143',
3123             'uploader': 'Wickydoo',
3124             'uploader_id': 'UCKSpbfbl5kRQpTdL7kMc-1Q',
3125         },
3126         'playlist_mincount': 29,
3127     }, {
3128         'url': 'PLtPgu7CB4gbY9oDN3drwC3cMbJggS7dKl',
3129         'info_dict': {
3130             'title': 'YDL_safe_search',
3131             'id': 'PLtPgu7CB4gbY9oDN3drwC3cMbJggS7dKl',
3132         },
3133         'playlist_count': 2,
3134         'skip': 'This playlist is private',
3135     }, {
3136         'note': 'embedded',
3137         'url': 'https://www.youtube.com/embed/videoseries?list=PL6IaIsEjSbf96XFRuNccS_RuEXwNdsoEu',
3138         'playlist_count': 4,
3139         'info_dict': {
3140             'title': 'JODA15',
3141             'id': 'PL6IaIsEjSbf96XFRuNccS_RuEXwNdsoEu',
3142             'uploader': 'milan',
3143             'uploader_id': 'UCEI1-PVPcYXjB73Hfelbmaw',
3144         }
3145     }, {
3146         'url': 'http://www.youtube.com/embed/_xDOZElKyNU?list=PLsyOSbh5bs16vubvKePAQ1x3PhKavfBIl',
3147         'playlist_mincount': 982,
3148         'info_dict': {
3149             'title': '2018 Chinese New Singles (11/6 updated)',
3150             'id': 'PLsyOSbh5bs16vubvKePAQ1x3PhKavfBIl',
3151             'uploader': 'LBK',
3152             'uploader_id': 'UC21nz3_MesPLqtDqwdvnoxA',
3153         }
3154     }, {
3155         'url': 'https://youtu.be/yeWKywCrFtk?list=PL2qgrgXsNUG5ig9cat4ohreBjYLAPC0J5',
3156         'info_dict': {
3157             'id': 'yeWKywCrFtk',
3158             'ext': 'mp4',
3159             'title': 'Small Scale Baler and Braiding Rugs',
3160             'uploader': 'Backus-Page House Museum',
3161             'uploader_id': 'backuspagemuseum',
3162             'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/backuspagemuseum',
3163             'upload_date': '20161008',
3164             'description': 'md5:800c0c78d5eb128500bffd4f0b4f2e8a',
3165             'categories': ['Nonprofits & Activism'],
3166             'tags': list,
3167             'like_count': int,
3168             'dislike_count': int,
3169         },
3170         'params': {
3171             'noplaylist': True,
3172             'skip_download': True,
3173         },
3174     }, {
3175         'url': 'https://youtu.be/uWyaPkt-VOI?list=PL9D9FC436B881BA21',
3176         'only_matching': True,
3177     }, {
3178         'url': 'TLGGrESM50VT6acwMjAyMjAxNw',
3179         'only_matching': True,
3180     }, {
3181         # music album playlist
3182         'url': 'OLAK5uy_m4xAFdmMC5rX3Ji3g93pQe3hqLZw_9LhM',
3183         'only_matching': True,
3184     }]
3185
3186     @classmethod
3187     def suitable(cls, url):
3188         return False if YoutubeTabIE.suitable(url) else super(
3189             YoutubePlaylistIE, cls).suitable(url)
3190
3191     def _real_extract(self, url):
3192         playlist_id = self._match_id(url)
3193         qs = compat_urlparse.parse_qs(compat_urlparse.urlparse(url).query)
3194         if not qs:
3195             qs = {'list': playlist_id}
3196         return self.url_result(
3197             update_url_query('https://www.youtube.com/playlist', qs),
3198             ie=YoutubeTabIE.ie_key(), video_id=playlist_id)
3199
3200
3201 class YoutubeYtUserIE(InfoExtractor):
3202     _VALID_URL = r'ytuser:(?P<id>.+)'
3203     _TESTS = [{
3204         'url': 'ytuser:phihag',
3205         'only_matching': True,
3206     }]
3207
3208     def _real_extract(self, url):
3209         user_id = self._match_id(url)
3210         return self.url_result(
3211             'https://www.youtube.com/user/%s' % user_id,
3212             ie=YoutubeTabIE.ie_key(), video_id=user_id)
3213
3214
3215 class YoutubeLiveIE(YoutubeBaseInfoExtractor):
3216     IE_DESC = 'YouTube.com live streams'
3217     _VALID_URL = r'(?P<base_url>https?://(?:\w+\.)?youtube\.com/(?:(?:user|channel|c)/)?(?P<id>[^/]+))/live'
3218     IE_NAME = 'youtube:live'
3219
3220     _TESTS = [{
3221         'url': 'https://www.youtube.com/user/TheYoungTurks/live',
3222         'info_dict': {
3223             'id': 'a48o2S1cPoo',
3224             'ext': 'mp4',
3225             'title': 'The Young Turks - Live Main Show',
3226             'uploader': 'The Young Turks',
3227             'uploader_id': 'TheYoungTurks',
3228             'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/TheYoungTurks',
3229             'upload_date': '20150715',
3230             'license': 'Standard YouTube License',
3231             'description': 'md5:438179573adcdff3c97ebb1ee632b891',
3232             'categories': ['News & Politics'],
3233             'tags': ['Cenk Uygur (TV Program Creator)', 'The Young Turks (Award-Winning Work)', 'Talk Show (TV Genre)'],
3234             'like_count': int,
3235             'dislike_count': int,
3236         },
3237         'params': {
3238             'skip_download': True,
3239         },
3240     }, {
3241         'url': 'https://www.youtube.com/channel/UC1yBKRuGpC1tSM73A0ZjYjQ/live',
3242         'only_matching': True,
3243     }, {
3244         'url': 'https://www.youtube.com/c/CommanderVideoHq/live',
3245         'only_matching': True,
3246     }, {
3247         'url': 'https://www.youtube.com/TheYoungTurks/live',
3248         'only_matching': True,
3249     }]
3250
3251     def _real_extract(self, url):
3252         mobj = re.match(self._VALID_URL, url)
3253         channel_id = mobj.group('id')
3254         base_url = mobj.group('base_url')
3255         webpage = self._download_webpage(url, channel_id, fatal=False)
3256         if webpage:
3257             page_type = self._og_search_property(
3258                 'type', webpage, 'page type', default='')
3259             video_id = self._html_search_meta(
3260                 'videoId', webpage, 'video id', default=None)
3261             if page_type.startswith('video') and video_id and re.match(
3262                     r'^[0-9A-Za-z_-]{11}$', video_id):
3263                 return self.url_result(video_id, YoutubeIE.ie_key())
3264         return self.url_result(base_url)
3265
3266
3267 class YoutubeSearchIE(SearchInfoExtractor, YoutubeBaseInfoExtractor):
3268     IE_DESC = 'YouTube.com searches'
3269     # there doesn't appear to be a real limit, for example if you search for
3270     # 'python' you get more than 8.000.000 results
3271     _MAX_RESULTS = float('inf')
3272     IE_NAME = 'youtube:search'
3273     _SEARCH_KEY = 'ytsearch'
3274     _SEARCH_PARAMS = None
3275     _TESTS = []
3276
3277     def _entries(self, query, n):
3278         data = {
3279             'context': {
3280                 'client': {
3281                     'clientName': 'WEB',
3282                     'clientVersion': '2.20201021.03.00',
3283                 }
3284             },
3285             'query': query,
3286         }
3287         if self._SEARCH_PARAMS:
3288             data['params'] = self._SEARCH_PARAMS
3289         total = 0
3290         for page_num in itertools.count(1):
3291             search = self._download_json(
3292                 'https://www.youtube.com/youtubei/v1/search?key=AIzaSyAO_FJ2SlqU8Q4STEHLGCilw_Y9_11qcW8',
3293                 video_id='query "%s"' % query,
3294                 note='Downloading page %s' % page_num,
3295                 errnote='Unable to download API page', fatal=False,
3296                 data=json.dumps(data).encode('utf8'),
3297                 headers={'content-type': 'application/json'})
3298             if not search:
3299                 break
3300             slr_contents = try_get(
3301                 search,
3302                 (lambda x: x['contents']['twoColumnSearchResultsRenderer']['primaryContents']['sectionListRenderer']['contents'],
3303                  lambda x: x['onResponseReceivedCommands'][0]['appendContinuationItemsAction']['continuationItems']),
3304                 list)
3305             if not slr_contents:
3306                 break
3307             isr_contents = try_get(
3308                 slr_contents,
3309                 lambda x: x[0]['itemSectionRenderer']['contents'],
3310                 list)
3311             if not isr_contents:
3312                 break
3313             for content in isr_contents:
3314                 if not isinstance(content, dict):
3315                     continue
3316                 video = content.get('videoRenderer')
3317                 if not isinstance(video, dict):
3318                     continue
3319                 video_id = video.get('videoId')
3320                 if not video_id:
3321                     continue
3322                 title = try_get(video, lambda x: x['title']['runs'][0]['text'], compat_str)
3323                 description = try_get(video, lambda x: x['descriptionSnippet']['runs'][0]['text'], compat_str)
3324                 duration = parse_duration(try_get(video, lambda x: x['lengthText']['simpleText'], compat_str))
3325                 view_count_text = try_get(video, lambda x: x['viewCountText']['simpleText'], compat_str) or ''
3326                 view_count = int_or_none(self._search_regex(
3327                     r'^(\d+)', re.sub(r'\s', '', view_count_text),
3328                     'view count', default=None))
3329                 uploader = try_get(video, lambda x: x['ownerText']['runs'][0]['text'], compat_str)
3330                 total += 1
3331                 yield {
3332                     '_type': 'url_transparent',
3333                     'ie_key': YoutubeIE.ie_key(),
3334                     'id': video_id,
3335                     'url': video_id,
3336                     'title': title,
3337                     'description': description,
3338                     'duration': duration,
3339                     'view_count': view_count,
3340                     'uploader': uploader,
3341                 }
3342                 if total == n:
3343                     return
3344             token = try_get(
3345                 slr_contents,
3346                 lambda x: x[1]['continuationItemRenderer']['continuationEndpoint']['continuationCommand']['token'],
3347                 compat_str)
3348             if not token:
3349                 break
3350             data['continuation'] = token
3351
3352     def _get_n_results(self, query, n):
3353         """Get a specified number of results for a query"""
3354         return self.playlist_result(self._entries(query, n), query)
3355
3356
3357 class YoutubeSearchDateIE(YoutubeSearchIE):
3358     IE_NAME = YoutubeSearchIE.IE_NAME + ':date'
3359     _SEARCH_KEY = 'ytsearchdate'
3360     IE_DESC = 'YouTube.com searches, newest videos first'
3361     _SEARCH_PARAMS = 'CAI%3D'
3362
3363
3364 class YoutubeFeedsInfoExtractor(YoutubeBaseInfoExtractor):
3365     """
3366     Base class for feed extractors
3367     Subclasses must define the _FEED_NAME and _PLAYLIST_TITLE properties.
3368     """
3369     _LOGIN_REQUIRED = True
3370
3371     @property
3372     def IE_NAME(self):
3373         return 'youtube:%s' % self._FEED_NAME
3374
3375     def _real_initialize(self):
3376         self._login()
3377
3378     def _entries(self, page):
3379         # The extraction process is the same as for playlists, but the regex
3380         # for the video ids doesn't contain an index
3381         ids = []
3382         more_widget_html = content_html = page
3383         for page_num in itertools.count(1):
3384             matches = re.findall(r'href="\s*/watch\?v=([0-9A-Za-z_-]{11})', content_html)
3385
3386             # 'recommended' feed has infinite 'load more' and each new portion spins
3387             # the same videos in (sometimes) slightly different order, so we'll check
3388             # for unicity and break when portion has no new videos
3389             new_ids = list(filter(lambda video_id: video_id not in ids, orderedSet(matches)))
3390             if not new_ids:
3391                 break
3392
3393             ids.extend(new_ids)
3394
3395             for entry in self._ids_to_results(new_ids):
3396                 yield entry
3397
3398             mobj = re.search(r'data-uix-load-more-href="/?(?P<more>[^"]+)"', more_widget_html)
3399             if not mobj:
3400                 break
3401
3402             more = self._download_json(
3403                 'https://www.youtube.com/%s' % mobj.group('more'), self._PLAYLIST_TITLE,
3404                 'Downloading page #%s' % page_num,
3405                 transform_source=uppercase_escape,
3406                 headers=self._YOUTUBE_CLIENT_HEADERS)
3407             content_html = more['content_html']
3408             more_widget_html = more['load_more_widget_html']
3409
3410     def _real_extract(self, url):
3411         page = self._download_webpage(
3412             'https://www.youtube.com/feed/%s' % self._FEED_NAME,
3413             self._PLAYLIST_TITLE)
3414         return self.playlist_result(
3415             self._entries(page), playlist_title=self._PLAYLIST_TITLE)
3416
3417
3418 class YoutubeWatchLaterIE(InfoExtractor):
3419     IE_NAME = 'youtube:watchlater'
3420     IE_DESC = 'Youtube watch later list, ":ytwatchlater" for short (requires authentication)'
3421     _VALID_URL = r'https?://(?:www\.)?youtube\.com/feed/watch_later|:ytwatchlater'
3422
3423     _TESTS = [{
3424         'url': 'https://www.youtube.com/feed/watch_later',
3425         'only_matching': True,
3426     }, {
3427         'url': ':ytwatchlater',
3428         'only_matching': True,
3429     }]
3430
3431     def _real_extract(self, url):
3432         return self.url_result(
3433             'https://www.youtube.com/playlist?list=WL', ie=YoutubeTabIE.ie_key())
3434         _, video = self._check_download_just_video(url, 'WL')
3435         if video:
3436             return video
3437         _, playlist = self._extract_playlist('WL')
3438         return playlist
3439
3440
3441 class YoutubeRecommendedIE(YoutubeFeedsInfoExtractor):
3442     IE_DESC = 'YouTube.com recommended videos, ":ytrec" for short (requires authentication)'
3443     _VALID_URL = r'https?://(?:www\.)?youtube\.com/feed/recommended|:ytrec(?:ommended)?'
3444     _FEED_NAME = 'recommended'
3445     _PLAYLIST_TITLE = 'Youtube Recommended videos'
3446
3447
3448 class YoutubeSubscriptionsIE(YoutubeFeedsInfoExtractor):
3449     IE_DESC = 'YouTube.com subscriptions feed, "ytsubs" keyword (requires authentication)'
3450     _VALID_URL = r'https?://(?:www\.)?youtube\.com/feed/subscriptions|:ytsubs(?:criptions)?'
3451     _FEED_NAME = 'subscriptions'
3452     _PLAYLIST_TITLE = 'Youtube Subscriptions'
3453
3454
3455 class YoutubeHistoryIE(YoutubeFeedsInfoExtractor):
3456     IE_DESC = 'Youtube watch history, ":ythistory" for short (requires authentication)'
3457     _VALID_URL = r'https?://(?:www\.)?youtube\.com/feed/history|:ythistory'
3458     _FEED_NAME = 'history'
3459     _PLAYLIST_TITLE = 'Youtube History'
3460
3461
3462 class YoutubeTruncatedURLIE(InfoExtractor):
3463     IE_NAME = 'youtube:truncated_url'
3464     IE_DESC = False  # Do not list
3465     _VALID_URL = r'''(?x)
3466         (?:https?://)?
3467         (?:\w+\.)?[yY][oO][uU][tT][uU][bB][eE](?:-nocookie)?\.com/
3468         (?:watch\?(?:
3469             feature=[a-z_]+|
3470             annotation_id=annotation_[^&]+|
3471             x-yt-cl=[0-9]+|
3472             hl=[^&]*|
3473             t=[0-9]+
3474         )?
3475         |
3476             attribution_link\?a=[^&]+
3477         )
3478         $
3479     '''
3480
3481     _TESTS = [{
3482         'url': 'https://www.youtube.com/watch?annotation_id=annotation_3951667041',
3483         'only_matching': True,
3484     }, {
3485         'url': 'https://www.youtube.com/watch?',
3486         'only_matching': True,
3487     }, {
3488         'url': 'https://www.youtube.com/watch?x-yt-cl=84503534',
3489         'only_matching': True,
3490     }, {
3491         'url': 'https://www.youtube.com/watch?feature=foo',
3492         'only_matching': True,
3493     }, {
3494         'url': 'https://www.youtube.com/watch?hl=en-GB',
3495         'only_matching': True,
3496     }, {
3497         'url': 'https://www.youtube.com/watch?t=2372',
3498         'only_matching': True,
3499     }]
3500
3501     def _real_extract(self, url):
3502         raise ExtractorError(
3503             'Did you forget to quote the URL? Remember that & is a meta '
3504             'character in most shells, so you want to put the URL in quotes, '
3505             'like  youtube-dl '
3506             '"https://www.youtube.com/watch?feature=foo&v=BaW_jenozKc" '
3507             ' or simply  youtube-dl BaW_jenozKc  .',
3508             expected=True)
3509
3510
3511 class YoutubeTruncatedIDIE(InfoExtractor):
3512     IE_NAME = 'youtube:truncated_id'
3513     IE_DESC = False  # Do not list
3514     _VALID_URL = r'https?://(?:www\.)?youtube\.com/watch\?v=(?P<id>[0-9A-Za-z_-]{1,10})$'
3515
3516     _TESTS = [{
3517         'url': 'https://www.youtube.com/watch?v=N_708QY7Ob',
3518         'only_matching': True,
3519     }]
3520
3521     def _real_extract(self, url):
3522         video_id = self._match_id(url)
3523         raise ExtractorError(
3524             'Incomplete YouTube ID %s. URL %s looks truncated.' % (video_id, url),
3525             expected=True)
3526
3527
3528 # Old extractors. Are these cases handled elsewhere?
3529
3530 class YoutubeSearchURLIE(YoutubeSearchIE):
3531     IE_DESC = 'YouTube.com search URLs'
3532     IE_NAME = 'youtube:search_url'
3533     _VALID_URL = r'https?://(?:www\.)?youtube\.com/results\?(.*?&)?(?:search_query|q)=(?P<query>[^&]+)(?:[&]|$)'
3534     _TESTS = [{
3535         'url': 'https://www.youtube.com/results?baz=bar&search_query=youtube-dl+test+video&filters=video&lclk=video',
3536         'playlist_mincount': 5,
3537         'info_dict': {
3538             'title': 'youtube-dl test video',
3539         }
3540     }, {
3541         'url': 'https://www.youtube.com/results?q=test&sp=EgQIBBgB',
3542         'only_matching': True,
3543     }]
3544
3545     def _process_json_dict(self, obj, videos, c):
3546         if "videoId" in obj:
3547             videos.append(obj)
3548             return
3549
3550         if "nextContinuationData" in obj:
3551             c["continuation"] = obj["nextContinuationData"]
3552             return
3553
3554     def _real_extract(self, url):
3555         mobj = re.match(self._VALID_URL, url)
3556         query = compat_urllib_parse_unquote_plus(mobj.group('query'))
3557         webpage = self._download_webpage(url, query)
3558         return self.playlist_result(self._entries(webpage, query, max_pages=5), playlist_title=query)
3559
3560
3561 class YoutubeShowIE(InfoExtractor):
3562     IE_DESC = 'YouTube.com (multi-season) shows'
3563     _VALID_URL = r'https?://(?:www\.)?youtube\.com/show/(?P<id>[^?#]*)'
3564     IE_NAME = 'youtube:show'
3565     _TESTS = [{
3566         'url': 'https://www.youtube.com/show/airdisasters',
3567         'playlist_mincount': 5,
3568         'info_dict': {
3569             'id': 'airdisasters',
3570             'title': 'Air Disasters',
3571         }
3572     }]
3573
3574     def _real_extract(self, url):
3575         playlist_id = self._match_id(url)
3576         return super(YoutubeShowIE, self)._real_extract(
3577             'https://www.youtube.com/show/%s/playlists' % playlist_id)
3578
3579
3580 class YoutubeFavouritesIE(YoutubeBaseInfoExtractor):
3581     IE_NAME = 'youtube:favorites'
3582     IE_DESC = 'YouTube.com favourite videos, ":ytfav" for short (requires authentication)'
3583     _VALID_URL = r'https?://(?:www\.)?youtube\.com/my_favorites|:ytfav(?:ou?rites)?'
3584     _LOGIN_REQUIRED = True
3585
3586     def _real_extract(self, url):
3587         webpage = self._download_webpage('https://www.youtube.com/my_favorites', 'Youtube Favourites videos')
3588         playlist_id = self._search_regex(r'list=(.+?)["&]', webpage, 'favourites playlist id')
3589         return self.url_result(playlist_id, 'YoutubePlaylist')