youtube_dlc/extractor/youtube.py

   1 # coding: utf-8
   2
   3 from __future__ import unicode_literals
   4
   5
   6 import itertools
   7 import json
   8 import os.path
   9 import random
  10 import re
  11 import time
  12 import traceback
  13
  14 from .common import InfoExtractor, SearchInfoExtractor
  15 from ..jsinterp import JSInterpreter
  16 from ..swfinterp import SWFInterpreter
  17 from ..compat import (
  18     compat_chr,
  19     compat_HTTPError,
  20     compat_kwargs,
  21     compat_parse_qs,
  22     compat_urllib_parse_unquote,
  23     compat_urllib_parse_unquote_plus,
  24     compat_urllib_parse_urlencode,
  25     compat_urllib_parse_urlparse,
  26     compat_urlparse,
  27     compat_str,
  28 )
  29 from ..utils import (
  30     bool_or_none,
  31     clean_html,
  32     error_to_compat_str,
  33     ExtractorError,
  34     float_or_none,
  35     get_element_by_id,
  36     int_or_none,
  37     mimetype2ext,
  38     parse_codecs,
  39     parse_count,
  40     parse_duration,
  41     remove_quotes,
  42     remove_start,
  43     smuggle_url,
  44     str_or_none,
  45     str_to_int,
  46     try_get,
  47     unescapeHTML,
  48     unified_strdate,
  49     unsmuggle_url,
  50     update_url_query,
  51     uppercase_escape,
  52     url_or_none,
  53     urlencode_postdata,
  54     urljoin,
  55 )
  56
  57
  58 class YoutubeBaseInfoExtractor(InfoExtractor):
  59     """Provide base functions for Youtube extractors"""
  60     _LOGIN_URL = 'https://accounts.google.com/ServiceLogin'
  61     _TWOFACTOR_URL = 'https://accounts.google.com/signin/challenge'
  62
  63     _LOOKUP_URL = 'https://accounts.google.com/_/signin/sl/lookup'
  64     _CHALLENGE_URL = 'https://accounts.google.com/_/signin/sl/challenge'
  65     _TFA_URL = 'https://accounts.google.com/_/signin/challenge?hl=en&TL={0}'
  66
  67     _RESERVED_NAMES = (
  68         r'embed|e|watch_popup|channel|c|user|playlist|watch|w|v|movies|results|shared|'
  69         r'storefront|oops|index|account|reporthistory|t/terms|about|upload|signin|logout|'
  70         r'feed/(?:watch_later|history|subscriptions|library|trending|recommended)')
  71
  72     _NETRC_MACHINE = 'youtube'
  73     # If True it will raise an error if no login info is provided
  74     _LOGIN_REQUIRED = False
  75
  76     _PLAYLIST_ID_RE = r'(?:(?:PL|LL|EC|UU|FL|RD|UL|TL|PU|OLAK5uy_)[0-9A-Za-z-_]{10,}|RDMM|WL|LL|LM)'
  77
  78     def _set_language(self):
  79         self._set_cookie(
  80             '.youtube.com', 'PREF', 'f1=50000000&f6=8&hl=en',
  81             # YouTube sets the expire time to about two months
  82             expire_time=time.time() + 2 * 30 * 24 * 3600)
  83
  84     def _ids_to_results(self, ids):
  85         return [
  86             self.url_result(vid_id, 'Youtube', video_id=vid_id)
  87             for vid_id in ids]
  88
  89     def _login(self):
  90         """
  91         Attempt to log in to YouTube.
  92         True is returned if successful or skipped.
  93         False is returned if login failed.
  94
  95         If _LOGIN_REQUIRED is set and no authentication was provided, an error is raised.
  96         """
  97         username, password = self._get_login_info()
  98         # No authentication to be performed
  99         if username is None:
 100             if self._LOGIN_REQUIRED and self._downloader.params.get('cookiefile') is None:
 101                 raise ExtractorError('No login info available, needed for using %s.' % self.IE_NAME, expected=True)
 102             if self._downloader.params.get('cookiefile') and False:  # TODO remove 'and False' later - too many people using outdated cookies and open issues, remind them.
 103                 self.to_screen('[Cookies] Reminder - Make sure to always use up to date cookies!')
 104             return True
 105
 106         login_page = self._download_webpage(
 107             self._LOGIN_URL, None,
 108             note='Downloading login page',
 109             errnote='unable to fetch login page', fatal=False)
 110         if login_page is False:
 111             return
 112
 113         login_form = self._hidden_inputs(login_page)
 114
 115         def req(url, f_req, note, errnote):
 116             data = login_form.copy()
 117             data.update({
 118                 'pstMsg': 1,
 119                 'checkConnection': 'youtube',
 120                 'checkedDomains': 'youtube',
 121                 'hl': 'en',
 122                 'deviceinfo': '[null,null,null,[],null,"US",null,null,[],"GlifWebSignIn",null,[null,null,[]]]',
 123                 'f.req': json.dumps(f_req),
 124                 'flowName': 'GlifWebSignIn',
 125                 'flowEntry': 'ServiceLogin',
 126                 # TODO: reverse actual botguard identifier generation algo
 127                 'bgRequest': '["identifier",""]',
 128             })
 129             return self._download_json(
 130                 url, None, note=note, errnote=errnote,
 131                 transform_source=lambda s: re.sub(r'^[^[]*', '', s),
 132                 fatal=False,
 133                 data=urlencode_postdata(data), headers={
 134                     'Content-Type': 'application/x-www-form-urlencoded;charset=utf-8',
 135                     'Google-Accounts-XSRF': 1,
 136                 })
 137
 138         def warn(message):
 139             self._downloader.report_warning(message)
 140
 141         lookup_req = [
 142             username,
 143             None, [], None, 'US', None, None, 2, False, True,
 144             [
 145                 None, None,
 146                 [2, 1, None, 1,
 147                  'https://accounts.google.com/ServiceLogin?passive=true&continue=https%3A%2F%2Fwww.youtube.com%2Fsignin%3Fnext%3D%252F%26action_handle_signin%3Dtrue%26hl%3Den%26app%3Ddesktop%26feature%3Dsign_in_button&hl=en&service=youtube&uilel=3&requestPath=%2FServiceLogin&Page=PasswordSeparationSignIn',
 148                  None, [], 4],
 149                 1, [None, None, []], None, None, None, True
 150             ],
 151             username,
 152         ]
 153
 154         lookup_results = req(
 155             self._LOOKUP_URL, lookup_req,
 156             'Looking up account info', 'Unable to look up account info')
 157
 158         if lookup_results is False:
 159             return False
 160
 161         user_hash = try_get(lookup_results, lambda x: x[0][2], compat_str)
 162         if not user_hash:
 163             warn('Unable to extract user hash')
 164             return False
 165
 166         challenge_req = [
 167             user_hash,
 168             None, 1, None, [1, None, None, None, [password, None, True]],
 169             [
 170                 None, None, [2, 1, None, 1, 'https://accounts.google.com/ServiceLogin?passive=true&continue=https%3A%2F%2Fwww.youtube.com%2Fsignin%3Fnext%3D%252F%26action_handle_signin%3Dtrue%26hl%3Den%26app%3Ddesktop%26feature%3Dsign_in_button&hl=en&service=youtube&uilel=3&requestPath=%2FServiceLogin&Page=PasswordSeparationSignIn', None, [], 4],
 171                 1, [None, None, []], None, None, None, True
 172             ]]
 173
 174         challenge_results = req(
 175             self._CHALLENGE_URL, challenge_req,
 176             'Logging in', 'Unable to log in')
 177
 178         if challenge_results is False:
 179             return
 180
 181         login_res = try_get(challenge_results, lambda x: x[0][5], list)
 182         if login_res:
 183             login_msg = try_get(login_res, lambda x: x[5], compat_str)
 184             warn(
 185                 'Unable to login: %s' % 'Invalid password'
 186                 if login_msg == 'INCORRECT_ANSWER_ENTERED' else login_msg)
 187             return False
 188
 189         res = try_get(challenge_results, lambda x: x[0][-1], list)
 190         if not res:
 191             warn('Unable to extract result entry')
 192             return False
 193
 194         login_challenge = try_get(res, lambda x: x[0][0], list)
 195         if login_challenge:
 196             challenge_str = try_get(login_challenge, lambda x: x[2], compat_str)
 197             if challenge_str == 'TWO_STEP_VERIFICATION':
 198                 # SEND_SUCCESS - TFA code has been successfully sent to phone
 199                 # QUOTA_EXCEEDED - reached the limit of TFA codes
 200                 status = try_get(login_challenge, lambda x: x[5], compat_str)
 201                 if status == 'QUOTA_EXCEEDED':
 202                     warn('Exceeded the limit of TFA codes, try later')
 203                     return False
 204
 205                 tl = try_get(challenge_results, lambda x: x[1][2], compat_str)
 206                 if not tl:
 207                     warn('Unable to extract TL')
 208                     return False
 209
 210                 tfa_code = self._get_tfa_info('2-step verification code')
 211
 212                 if not tfa_code:
 213                     warn(
 214                         'Two-factor authentication required. Provide it either interactively or with --twofactor <code>'
 215                         '(Note that only TOTP (Google Authenticator App) codes work at this time.)')
 216                     return False
 217
 218                 tfa_code = remove_start(tfa_code, 'G-')
 219
 220                 tfa_req = [
 221                     user_hash, None, 2, None,
 222                     [
 223                         9, None, None, None, None, None, None, None,
 224                         [None, tfa_code, True, 2]
 225                     ]]
 226
 227                 tfa_results = req(
 228                     self._TFA_URL.format(tl), tfa_req,
 229                     'Submitting TFA code', 'Unable to submit TFA code')
 230
 231                 if tfa_results is False:
 232                     return False
 233
 234                 tfa_res = try_get(tfa_results, lambda x: x[0][5], list)
 235                 if tfa_res:
 236                     tfa_msg = try_get(tfa_res, lambda x: x[5], compat_str)
 237                     warn(
 238                         'Unable to finish TFA: %s' % 'Invalid TFA code'
 239                         if tfa_msg == 'INCORRECT_ANSWER_ENTERED' else tfa_msg)
 240                     return False
 241
 242                 check_cookie_url = try_get(
 243                     tfa_results, lambda x: x[0][-1][2], compat_str)
 244             else:
 245                 CHALLENGES = {
 246                     'LOGIN_CHALLENGE': "This device isn't recognized. For your security, Google wants to make sure it's really you.",
 247                     'USERNAME_RECOVERY': 'Please provide additional information to aid in the recovery process.',
 248                     'REAUTH': "There is something unusual about your activity. For your security, Google wants to make sure it's really you.",
 249                 }
 250                 challenge = CHALLENGES.get(
 251                     challenge_str,
 252                     '%s returned error %s.' % (self.IE_NAME, challenge_str))
 253                 warn('%s\nGo to https://accounts.google.com/, login and solve a challenge.' % challenge)
 254                 return False
 255         else:
 256             check_cookie_url = try_get(res, lambda x: x[2], compat_str)
 257
 258         if not check_cookie_url:
 259             warn('Unable to extract CheckCookie URL')
 260             return False
 261
 262         check_cookie_results = self._download_webpage(
 263             check_cookie_url, None, 'Checking cookie', fatal=False)
 264
 265         if check_cookie_results is False:
 266             return False
 267
 268         if 'https://myaccount.google.com/' not in check_cookie_results:
 269             warn('Unable to log in')
 270             return False
 271
 272         return True
 273
 274     def _download_webpage_handle(self, *args, **kwargs):
 275         query = kwargs.get('query', {}).copy()
 276         kwargs['query'] = query
 277         return super(YoutubeBaseInfoExtractor, self)._download_webpage_handle(
 278             *args, **compat_kwargs(kwargs))
 279
 280     def _get_yt_initial_data(self, video_id, webpage):
 281         config = self._search_regex(
 282             (r'window\["ytInitialData"\]\s*=\s*(.*?)(?<=});',
 283              r'var\s+ytInitialData\s*=\s*(.*?)(?<=});'),
 284             webpage, 'ytInitialData', default=None)
 285         if config:
 286             return self._parse_json(
 287                 uppercase_escape(config), video_id, fatal=False)
 288
 289     def _real_initialize(self):
 290         if self._downloader is None:
 291             return
 292         self._set_language()
 293         if not self._login():
 294             return
 295
 296     _DEFAULT_API_DATA = {
 297         'context': {
 298             'client': {
 299                 'clientName': 'WEB',
 300                 'clientVersion': '2.20201021.03.00',
 301             }
 302         },
 303     }
 304
 305     _YT_INITIAL_DATA_RE = r'(?:window\s*\[\s*["\']ytInitialData["\']\s*\]|ytInitialData)\s*=\s*({.+?})\s*;'
 306     _YT_INITIAL_PLAYER_RESPONSE_RE = r'ytInitialPlayerResponse\s*=\s*({.+?})\s*;'
 307     _YT_INITIAL_BOUNDARY_RE = r'(?:var\s+meta|</script|\n)'
 308
 309     def _call_api(self, ep, query, video_id):
 310         data = self._DEFAULT_API_DATA.copy()
 311         data.update(query)
 312
 313         response = self._download_json(
 314             'https://www.youtube.com/youtubei/v1/%s' % ep, video_id=video_id,
 315             note='Downloading API JSON', errnote='Unable to download API page',
 316             data=json.dumps(data).encode('utf8'),
 317             headers={'content-type': 'application/json'},
 318             query={'key': 'AIzaSyAO_FJ2SlqU8Q4STEHLGCilw_Y9_11qcW8'})
 319
 320         return response
 321
 322     def _extract_yt_initial_data(self, video_id, webpage):
 323         return self._parse_json(
 324             self._search_regex(
 325                 (r'%s\s*%s' % (self._YT_INITIAL_DATA_RE, self._YT_INITIAL_BOUNDARY_RE),
 326                  self._YT_INITIAL_DATA_RE), webpage, 'yt initial data'),
 327             video_id)
 328
 329     def _extract_ytcfg(self, video_id, webpage):
 330         return self._parse_json(
 331             self._search_regex(
 332                 r'ytcfg\.set\s*\(\s*({.+?})\s*\)\s*;', webpage, 'ytcfg',
 333                 default='{}'), video_id, fatal=False)
 334
 335     def _extract_video(self, renderer):
 336         video_id = renderer.get('videoId')
 337         title = try_get(
 338             renderer,
 339             (lambda x: x['title']['runs'][0]['text'],
 340              lambda x: x['title']['simpleText']), compat_str)
 341         description = try_get(
 342             renderer, lambda x: x['descriptionSnippet']['runs'][0]['text'],
 343             compat_str)
 344         duration = parse_duration(try_get(
 345             renderer, lambda x: x['lengthText']['simpleText'], compat_str))
 346         view_count_text = try_get(
 347             renderer, lambda x: x['viewCountText']['simpleText'], compat_str) or ''
 348         view_count = str_to_int(self._search_regex(
 349             r'^([\d,]+)', re.sub(r'\s', '', view_count_text),
 350             'view count', default=None))
 351         uploader = try_get(
 352             renderer, lambda x: x['ownerText']['runs'][0]['text'], compat_str)
 353         return {
 354             '_type': 'url_transparent',
 355             'ie_key': YoutubeIE.ie_key(),
 356             'id': video_id,
 357             'url': video_id,
 358             'title': title,
 359             'description': description,
 360             'duration': duration,
 361             'view_count': view_count,
 362             'uploader': uploader,
 363         }
 364
 365
 366 class YoutubeIE(YoutubeBaseInfoExtractor):
 367     IE_DESC = 'YouTube.com'
 368     _VALID_URL = r"""(?x)^
 369                      (
 370                          (?:https?://|//)                                    # http(s):// or protocol-independent URL
 371                          (?:(?:(?:(?:\w+\.)?[yY][oO][uU][tT][uU][bB][eE](?:-nocookie|kids)?\.com/|
 372                             (?:www\.)?deturl\.com/www\.youtube\.com/|
 373                             (?:www\.)?pwnyoutube\.com/|
 374                             (?:www\.)?hooktube\.com/|
 375                             (?:www\.)?yourepeat\.com/|
 376                             tube\.majestyc\.net/|
 377                             # Invidious instances taken from https://github.com/omarroth/invidious/wiki/Invidious-Instances
 378                             (?:(?:www|dev)\.)?invidio\.us/|
 379                             (?:(?:www|no)\.)?invidiou\.sh/|
 380                             (?:(?:www|fi)\.)?invidious\.snopyta\.org/|
 381                             (?:www\.)?invidious\.kabi\.tk/|
 382                             (?:www\.)?invidious\.13ad\.de/|
 383                             (?:www\.)?invidious\.mastodon\.host/|
 384                             (?:www\.)?invidious\.zapashcanon\.fr/|
 385                             (?:www\.)?invidious\.kavin\.rocks/|
 386                             (?:www\.)?invidious\.tube/|
 387                             (?:www\.)?invidiou\.site/|
 388                             (?:www\.)?invidious\.site/|
 389                             (?:www\.)?invidious\.xyz/|
 390                             (?:www\.)?invidious\.nixnet\.xyz/|
 391                             (?:www\.)?invidious\.drycat\.fr/|
 392                             (?:www\.)?tube\.poal\.co/|
 393                             (?:www\.)?tube\.connect\.cafe/|
 394                             (?:www\.)?vid\.wxzm\.sx/|
 395                             (?:www\.)?vid\.mint\.lgbt/|
 396                             (?:www\.)?yewtu\.be/|
 397                             (?:www\.)?yt\.elukerio\.org/|
 398                             (?:www\.)?yt\.lelux\.fi/|
 399                             (?:www\.)?invidious\.ggc-project\.de/|
 400                             (?:www\.)?yt\.maisputain\.ovh/|
 401                             (?:www\.)?invidious\.13ad\.de/|
 402                             (?:www\.)?invidious\.toot\.koeln/|
 403                             (?:www\.)?invidious\.fdn\.fr/|
 404                             (?:www\.)?watch\.nettohikari\.com/|
 405                             (?:www\.)?kgg2m7yk5aybusll\.onion/|
 406                             (?:www\.)?qklhadlycap4cnod\.onion/|
 407                             (?:www\.)?axqzx4s6s54s32yentfqojs3x5i7faxza6xo3ehd4bzzsg2ii4fv2iid\.onion/|
 408                             (?:www\.)?c7hqkpkpemu6e7emz5b4vyz7idjgdvgaaa3dyimmeojqbgpea3xqjoid\.onion/|
 409                             (?:www\.)?fz253lmuao3strwbfbmx46yu7acac2jz27iwtorgmbqlkurlclmancad\.onion/|
 410                             (?:www\.)?invidious\.l4qlywnpwqsluw65ts7md3khrivpirse744un3x7mlskqauz5pyuzgqd\.onion/|
 411                             (?:www\.)?owxfohz4kjyv25fvlqilyxast7inivgiktls3th44jhk3ej3i7ya\.b32\.i2p/|
 412                             (?:www\.)?4l2dgddgsrkf2ous66i6seeyi6etzfgrue332grh2n7madpwopotugyd\.onion/|
 413                             youtube\.googleapis\.com/)                        # the various hostnames, with wildcard subdomains
 414                          (?:.*?\#/)?                                          # handle anchor (#/) redirect urls
 415                          (?:                                                  # the various things that can precede the ID:
 416                              (?:(?:v|embed|e)/(?!videoseries))                # v/ or embed/ or e/
 417                              |(?:                                             # or the v= param in all its forms
 418                                  (?:(?:watch|movie)(?:_popup)?(?:\.php)?/?)?  # preceding watch(_popup|.php) or nothing (like /?v=xxxx)
 419                                  (?:\?|\#!?)                                  # the params delimiter ? or # or #!
 420                                  (?:.*?[&;])??                                # any other preceding param (like /?s=tuff&v=xxxx or ?s=tuff&amp;v=V36LpHqtcDY)
 421                                  v=
 422                              )
 423                          ))
 424                          |(?:
 425                             youtu\.be|                                        # just youtu.be/xxxx
 426                             vid\.plus|                                        # or vid.plus/xxxx
 427                             zwearz\.com/watch|                                # or zwearz.com/watch/xxxx
 428                          )/
 429                          |(?:www\.)?cleanvideosearch\.com/media/action/yt/watch\?videoId=
 430                          )
 431                      )?                                                       # all until now is optional -> you can pass the naked ID
 432                      (?P<id>[0-9A-Za-z_-]{11})                                      # here is it! the YouTube video ID
 433                      (?!.*?\blist=
 434                         (?:
 435                             %(playlist_id)s|                                  # combined list/video URLs are handled by the playlist IE
 436                             WL                                                # WL are handled by the watch later IE
 437                         )
 438                      )
 439                      (?(1).+)?                                                # if we found the ID, everything can follow
 440                      $""" % {'playlist_id': YoutubeBaseInfoExtractor._PLAYLIST_ID_RE}
 441     _NEXT_URL_RE = r'[\?&]next_url=([^&]+)'
 442     _PLAYER_INFO_RE = (
 443         r'/(?P<id>[a-zA-Z0-9_-]{8,})/player_ias\.vflset(?:/[a-zA-Z]{2,3}_[a-zA-Z]{2,3})?/base\.(?P<ext>[a-z]+)$',
 444         r'\b(?P<id>vfl[a-zA-Z0-9_-]+)\b.*?\.(?P<ext>[a-z]+)$',
 445     )
 446     _formats = {
 447         '5': {'ext': 'flv', 'width': 400, 'height': 240, 'acodec': 'mp3', 'abr': 64, 'vcodec': 'h263'},
 448         '6': {'ext': 'flv', 'width': 450, 'height': 270, 'acodec': 'mp3', 'abr': 64, 'vcodec': 'h263'},
 449         '13': {'ext': '3gp', 'acodec': 'aac', 'vcodec': 'mp4v'},
 450         '17': {'ext': '3gp', 'width': 176, 'height': 144, 'acodec': 'aac', 'abr': 24, 'vcodec': 'mp4v'},
 451         '18': {'ext': 'mp4', 'width': 640, 'height': 360, 'acodec': 'aac', 'abr': 96, 'vcodec': 'h264'},
 452         '22': {'ext': 'mp4', 'width': 1280, 'height': 720, 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264'},
 453         '34': {'ext': 'flv', 'width': 640, 'height': 360, 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264'},
 454         '35': {'ext': 'flv', 'width': 854, 'height': 480, 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264'},
 455         # itag 36 videos are either 320x180 (BaW_jenozKc) or 320x240 (__2ABJjxzNo), abr varies as well
 456         '36': {'ext': '3gp', 'width': 320, 'acodec': 'aac', 'vcodec': 'mp4v'},
 457         '37': {'ext': 'mp4', 'width': 1920, 'height': 1080, 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264'},
 458         '38': {'ext': 'mp4', 'width': 4096, 'height': 3072, 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264'},
 459         '43': {'ext': 'webm', 'width': 640, 'height': 360, 'acodec': 'vorbis', 'abr': 128, 'vcodec': 'vp8'},
 460         '44': {'ext': 'webm', 'width': 854, 'height': 480, 'acodec': 'vorbis', 'abr': 128, 'vcodec': 'vp8'},
 461         '45': {'ext': 'webm', 'width': 1280, 'height': 720, 'acodec': 'vorbis', 'abr': 192, 'vcodec': 'vp8'},
 462         '46': {'ext': 'webm', 'width': 1920, 'height': 1080, 'acodec': 'vorbis', 'abr': 192, 'vcodec': 'vp8'},
 463         '59': {'ext': 'mp4', 'width': 854, 'height': 480, 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264'},
 464         '78': {'ext': 'mp4', 'width': 854, 'height': 480, 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264'},
 465
 466
 467         # 3D videos
 468         '82': {'ext': 'mp4', 'height': 360, 'format_note': '3D', 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264', 'preference': -20},
 469         '83': {'ext': 'mp4', 'height': 480, 'format_note': '3D', 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264', 'preference': -20},
 470         '84': {'ext': 'mp4', 'height': 720, 'format_note': '3D', 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264', 'preference': -20},
 471         '85': {'ext': 'mp4', 'height': 1080, 'format_note': '3D', 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264', 'preference': -20},
 472         '100': {'ext': 'webm', 'height': 360, 'format_note': '3D', 'acodec': 'vorbis', 'abr': 128, 'vcodec': 'vp8', 'preference': -20},
 473         '101': {'ext': 'webm', 'height': 480, 'format_note': '3D', 'acodec': 'vorbis', 'abr': 192, 'vcodec': 'vp8', 'preference': -20},
 474         '102': {'ext': 'webm', 'height': 720, 'format_note': '3D', 'acodec': 'vorbis', 'abr': 192, 'vcodec': 'vp8', 'preference': -20},
 475
 476         # Apple HTTP Live Streaming
 477         '91': {'ext': 'mp4', 'height': 144, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 48, 'vcodec': 'h264', 'preference': -10},
 478         '92': {'ext': 'mp4', 'height': 240, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 48, 'vcodec': 'h264', 'preference': -10},
 479         '93': {'ext': 'mp4', 'height': 360, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264', 'preference': -10},
 480         '94': {'ext': 'mp4', 'height': 480, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264', 'preference': -10},
 481         '95': {'ext': 'mp4', 'height': 720, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 256, 'vcodec': 'h264', 'preference': -10},
 482         '96': {'ext': 'mp4', 'height': 1080, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 256, 'vcodec': 'h264', 'preference': -10},
 483         '132': {'ext': 'mp4', 'height': 240, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 48, 'vcodec': 'h264', 'preference': -10},
 484         '151': {'ext': 'mp4', 'height': 72, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 24, 'vcodec': 'h264', 'preference': -10},
 485
 486         # DASH mp4 video
 487         '133': {'ext': 'mp4', 'height': 240, 'format_note': 'DASH video', 'vcodec': 'h264'},
 488         '134': {'ext': 'mp4', 'height': 360, 'format_note': 'DASH video', 'vcodec': 'h264'},
 489         '135': {'ext': 'mp4', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'h264'},
 490         '136': {'ext': 'mp4', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'h264'},
 491         '137': {'ext': 'mp4', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'h264'},
 492         '138': {'ext': 'mp4', 'format_note': 'DASH video', 'vcodec': 'h264'},  # Height can vary (https://github.com/ytdl-org/youtube-dl/issues/4559)
 493         '160': {'ext': 'mp4', 'height': 144, 'format_note': 'DASH video', 'vcodec': 'h264'},
 494         '212': {'ext': 'mp4', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'h264'},
 495         '264': {'ext': 'mp4', 'height': 1440, 'format_note': 'DASH video', 'vcodec': 'h264'},
 496         '298': {'ext': 'mp4', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'h264', 'fps': 60},
 497         '299': {'ext': 'mp4', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'h264', 'fps': 60},
 498         '266': {'ext': 'mp4', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'h264'},
 499
 500         # Dash mp4 audio
 501         '139': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'abr': 48, 'container': 'm4a_dash'},
 502         '140': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'abr': 128, 'container': 'm4a_dash'},
 503         '141': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'abr': 256, 'container': 'm4a_dash'},
 504         '256': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'container': 'm4a_dash'},
 505         '258': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'container': 'm4a_dash'},
 506         '325': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'dtse', 'container': 'm4a_dash'},
 507         '328': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'ec-3', 'container': 'm4a_dash'},
 508
 509         # Dash webm
 510         '167': {'ext': 'webm', 'height': 360, 'width': 640, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
 511         '168': {'ext': 'webm', 'height': 480, 'width': 854, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
 512         '169': {'ext': 'webm', 'height': 720, 'width': 1280, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
 513         '170': {'ext': 'webm', 'height': 1080, 'width': 1920, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
 514         '218': {'ext': 'webm', 'height': 480, 'width': 854, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
 515         '219': {'ext': 'webm', 'height': 480, 'width': 854, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
 516         '278': {'ext': 'webm', 'height': 144, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp9'},
 517         '242': {'ext': 'webm', 'height': 240, 'format_note': 'DASH video', 'vcodec': 'vp9'},
 518         '243': {'ext': 'webm', 'height': 360, 'format_note': 'DASH video', 'vcodec': 'vp9'},
 519         '244': {'ext': 'webm', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'vp9'},
 520         '245': {'ext': 'webm', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'vp9'},
 521         '246': {'ext': 'webm', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'vp9'},
 522         '247': {'ext': 'webm', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'vp9'},
 523         '248': {'ext': 'webm', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'vp9'},
 524         '271': {'ext': 'webm', 'height': 1440, 'format_note': 'DASH video', 'vcodec': 'vp9'},
 525         # itag 272 videos are either 3840x2160 (e.g. RtoitU2A-3E) or 7680x4320 (sLprVF6d7Ug)
 526         '272': {'ext': 'webm', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'vp9'},
 527         '302': {'ext': 'webm', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'vp9', 'fps': 60},
 528         '303': {'ext': 'webm', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'vp9', 'fps': 60},
 529         '308': {'ext': 'webm', 'height': 1440, 'format_note': 'DASH video', 'vcodec': 'vp9', 'fps': 60},
 530         '313': {'ext': 'webm', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'vp9'},
 531         '315': {'ext': 'webm', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'vp9', 'fps': 60},
 532
 533         # Dash webm audio
 534         '171': {'ext': 'webm', 'acodec': 'vorbis', 'format_note': 'DASH audio', 'abr': 128},
 535         '172': {'ext': 'webm', 'acodec': 'vorbis', 'format_note': 'DASH audio', 'abr': 256},
 536
 537         # Dash webm audio with opus inside
 538         '249': {'ext': 'webm', 'format_note': 'DASH audio', 'acodec': 'opus', 'abr': 50},
 539         '250': {'ext': 'webm', 'format_note': 'DASH audio', 'acodec': 'opus', 'abr': 70},
 540         '251': {'ext': 'webm', 'format_note': 'DASH audio', 'acodec': 'opus', 'abr': 160},
 541
 542         # RTMP (unnamed)
 543         '_rtmp': {'protocol': 'rtmp'},
 544
 545         # av01 video only formats sometimes served with "unknown" codecs
 546         '394': {'acodec': 'none', 'vcodec': 'av01.0.05M.08'},
 547         '395': {'acodec': 'none', 'vcodec': 'av01.0.05M.08'},
 548         '396': {'acodec': 'none', 'vcodec': 'av01.0.05M.08'},
 549         '397': {'acodec': 'none', 'vcodec': 'av01.0.05M.08'},
 550     }
 551     _SUBTITLE_FORMATS = ('json3', 'srv1', 'srv2', 'srv3', 'ttml', 'vtt')
 552
 553     _GEO_BYPASS = False
 554
 555     IE_NAME = 'youtube'
 556     _TESTS = [
 557         {
 558             'url': 'https://www.youtube.com/watch?v=BaW_jenozKc&t=1s&end=9',
 559             'info_dict': {
 560                 'id': 'BaW_jenozKc',
 561                 'ext': 'mp4',
 562                 'title': 'youtube-dl test video "\'/\\ä↭𝕐',
 563                 'uploader': 'Philipp Hagemeister',
 564                 'uploader_id': 'phihag',
 565                 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/phihag',
 566                 'channel_id': 'UCLqxVugv74EIW3VWh2NOa3Q',
 567                 'channel_url': r're:https?://(?:www\.)?youtube\.com/channel/UCLqxVugv74EIW3VWh2NOa3Q',
 568                 'upload_date': '20121002',
 569                 'description': 'test chars:  "\'/\\ä↭𝕐\ntest URL: https://github.com/rg3/youtube-dl/issues/1892\n\nThis is a test video for youtube-dl.\n\nFor more information, contact phihag@phihag.de .',
 570                 'categories': ['Science & Technology'],
 571                 'tags': ['youtube-dl'],
 572                 'duration': 10,
 573                 'view_count': int,
 574                 'like_count': int,
 575                 'dislike_count': int,
 576                 'start_time': 1,
 577                 'end_time': 9,
 578             }
 579         },
 580         {
 581             'url': '//www.YouTube.com/watch?v=yZIXLfi8CZQ',
 582             'note': 'Embed-only video (#1746)',
 583             'info_dict': {
 584                 'id': 'yZIXLfi8CZQ',
 585                 'ext': 'mp4',
 586                 'upload_date': '20120608',
 587                 'title': 'Principal Sexually Assaults A Teacher - Episode 117 - 8th June 2012',
 588                 'description': 'md5:09b78bd971f1e3e289601dfba15ca4f7',
 589                 'uploader': 'SET India',
 590                 'uploader_id': 'setindia',
 591                 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/setindia',
 592                 'age_limit': 18,
 593             }
 594         },
 595         {
 596             'url': 'https://www.youtube.com/watch?v=BaW_jenozKc&v=yZIXLfi8CZQ',
 597             'note': 'Use the first video ID in the URL',
 598             'info_dict': {
 599                 'id': 'BaW_jenozKc',
 600                 'ext': 'mp4',
 601                 'title': 'youtube-dl test video "\'/\\ä↭𝕐',
 602                 'uploader': 'Philipp Hagemeister',
 603                 'uploader_id': 'phihag',
 604                 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/phihag',
 605                 'upload_date': '20121002',
 606                 'description': 'test chars:  "\'/\\ä↭𝕐\ntest URL: https://github.com/rg3/youtube-dl/issues/1892\n\nThis is a test video for youtube-dl.\n\nFor more information, contact phihag@phihag.de .',
 607                 'categories': ['Science & Technology'],
 608                 'tags': ['youtube-dl'],
 609                 'duration': 10,
 610                 'view_count': int,
 611                 'like_count': int,
 612                 'dislike_count': int,
 613             },
 614             'params': {
 615                 'skip_download': True,
 616             },
 617         },
 618         {
 619             'url': 'https://www.youtube.com/watch?v=a9LDPn-MO4I',
 620             'note': '256k DASH audio (format 141) via DASH manifest',
 621             'info_dict': {
 622                 'id': 'a9LDPn-MO4I',
 623                 'ext': 'm4a',
 624                 'upload_date': '20121002',
 625                 'uploader_id': '8KVIDEO',
 626                 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/8KVIDEO',
 627                 'description': '',
 628                 'uploader': '8KVIDEO',
 629                 'title': 'UHDTV TEST 8K VIDEO.mp4'
 630             },
 631             'params': {
 632                 'youtube_include_dash_manifest': True,
 633                 'format': '141',
 634             },
 635             'skip': 'format 141 not served anymore',
 636         },
 637         # DASH manifest with encrypted signature
 638         {
 639             'url': 'https://www.youtube.com/watch?v=IB3lcPjvWLA',
 640             'info_dict': {
 641                 'id': 'IB3lcPjvWLA',
 642                 'ext': 'm4a',
 643                 'title': 'Afrojack, Spree Wilson - The Spark (Official Music Video) ft. Spree Wilson',
 644                 'description': 'md5:8f5e2b82460520b619ccac1f509d43bf',
 645                 'duration': 244,
 646                 'uploader': 'AfrojackVEVO',
 647                 'uploader_id': 'AfrojackVEVO',
 648                 'upload_date': '20131011',
 649             },
 650             'params': {
 651                 'youtube_include_dash_manifest': True,
 652                 'format': '141/bestaudio[ext=m4a]',
 653             },
 654         },
 655         # Controversy video
 656         {
 657             'url': 'https://www.youtube.com/watch?v=T4XJQO3qol8',
 658             'info_dict': {
 659                 'id': 'T4XJQO3qol8',
 660                 'ext': 'mp4',
 661                 'duration': 219,
 662                 'upload_date': '20100909',
 663                 'uploader': 'Amazing Atheist',
 664                 'uploader_id': 'TheAmazingAtheist',
 665                 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/TheAmazingAtheist',
 666                 'title': 'Burning Everyone\'s Koran',
 667                 'description': 'SUBSCRIBE: http://www.youtube.com/saturninefilms\n\nEven Obama has taken a stand against freedom on this issue: http://www.huffingtonpost.com/2010/09/09/obama-gma-interview-quran_n_710282.html',
 668             }
 669         },
 670         # Normal age-gate video (embed allowed)
 671         {
 672             'url': 'https://youtube.com/watch?v=HtVdAasjOgU',
 673             'info_dict': {
 674                 'id': 'HtVdAasjOgU',
 675                 'ext': 'mp4',
 676                 'title': 'The Witcher 3: Wild Hunt - The Sword Of Destiny Trailer',
 677                 'description': r're:(?s).{100,}About the Game\n.*?The Witcher 3: Wild Hunt.{100,}',
 678                 'duration': 142,
 679                 'uploader': 'The Witcher',
 680                 'uploader_id': 'WitcherGame',
 681                 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/WitcherGame',
 682                 'upload_date': '20140605',
 683                 'age_limit': 18,
 684             },
 685         },
 686         # video_info is None (https://github.com/ytdl-org/youtube-dl/issues/4421)
 687         # YouTube Red ad is not captured for creator
 688         {
 689             'url': '__2ABJjxzNo',
 690             'info_dict': {
 691                 'id': '__2ABJjxzNo',
 692                 'ext': 'mp4',
 693                 'duration': 266,
 694                 'upload_date': '20100430',
 695                 'uploader_id': 'deadmau5',
 696                 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/deadmau5',
 697                 'creator': 'Dada Life, deadmau5',
 698                 'description': 'md5:12c56784b8032162bb936a5f76d55360',
 699                 'uploader': 'deadmau5',
 700                 'title': 'Deadmau5 - Some Chords (HD)',
 701                 'alt_title': 'This Machine Kills Some Chords',
 702             },
 703             'expected_warnings': [
 704                 'DASH manifest missing',
 705             ]
 706         },
 707         # Olympics (https://github.com/ytdl-org/youtube-dl/issues/4431)
 708         {
 709             'url': 'lqQg6PlCWgI',
 710             'info_dict': {
 711                 'id': 'lqQg6PlCWgI',
 712                 'ext': 'mp4',
 713                 'duration': 6085,
 714                 'upload_date': '20150827',
 715                 'uploader_id': 'olympic',
 716                 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/olympic',
 717                 'description': 'HO09  - Women -  GER-AUS - Hockey - 31 July 2012 - London 2012 Olympic Games',
 718                 'uploader': 'Olympic',
 719                 'title': 'Hockey - Women -  GER-AUS - London 2012 Olympic Games',
 720             },
 721             'params': {
 722                 'skip_download': 'requires avconv',
 723             }
 724         },
 725         # Non-square pixels
 726         {
 727             'url': 'https://www.youtube.com/watch?v=_b-2C3KPAM0',
 728             'info_dict': {
 729                 'id': '_b-2C3KPAM0',
 730                 'ext': 'mp4',
 731                 'stretched_ratio': 16 / 9.,
 732                 'duration': 85,
 733                 'upload_date': '20110310',
 734                 'uploader_id': 'AllenMeow',
 735                 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/AllenMeow',
 736                 'description': 'made by Wacom from Korea | 字幕&加油添醋 by TY\'s Allen | 感謝heylisa00cavey1001同學熱情提供梗及翻譯',
 737                 'uploader': '孫ᄋᄅ',
 738                 'title': '[A-made] 變態妍字幕版 太妍 我就是這樣的人',
 739             },
 740         },
 741         # url_encoded_fmt_stream_map is empty string
 742         {
 743             'url': 'qEJwOuvDf7I',
 744             'info_dict': {
 745                 'id': 'qEJwOuvDf7I',
 746                 'ext': 'webm',
 747                 'title': 'Обсуждение судебной практики по выборам 14 сентября 2014 года в Санкт-Петербурге',
 748                 'description': '',
 749                 'upload_date': '20150404',
 750                 'uploader_id': 'spbelect',
 751                 'uploader': 'Наблюдатели Петербурга',
 752             },
 753             'params': {
 754                 'skip_download': 'requires avconv',
 755             },
 756             'skip': 'This live event has ended.',
 757         },
 758         # Extraction from multiple DASH manifests (https://github.com/ytdl-org/youtube-dl/pull/6097)
 759         {
 760             'url': 'https://www.youtube.com/watch?v=FIl7x6_3R5Y',
 761             'info_dict': {
 762                 'id': 'FIl7x6_3R5Y',
 763                 'ext': 'webm',
 764                 'title': 'md5:7b81415841e02ecd4313668cde88737a',
 765                 'description': 'md5:116377fd2963b81ec4ce64b542173306',
 766                 'duration': 220,
 767                 'upload_date': '20150625',
 768                 'uploader_id': 'dorappi2000',
 769                 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/dorappi2000',
 770                 'uploader': 'dorappi2000',
 771                 'formats': 'mincount:31',
 772             },
 773             'skip': 'not actual anymore',
 774         },
 775         # DASH manifest with segment_list
 776         {
 777             'url': 'https://www.youtube.com/embed/CsmdDsKjzN8',
 778             'md5': '8ce563a1d667b599d21064e982ab9e31',
 779             'info_dict': {
 780                 'id': 'CsmdDsKjzN8',
 781                 'ext': 'mp4',
 782                 'upload_date': '20150501',  # According to '<meta itemprop="datePublished"', but in other places it's 20150510
 783                 'uploader': 'Airtek',
 784                 'description': 'Retransmisión en directo de la XVIII media maratón de Zaragoza.',
 785                 'uploader_id': 'UCzTzUmjXxxacNnL8I3m4LnQ',
 786                 'title': 'Retransmisión XVIII Media maratón Zaragoza 2015',
 787             },
 788             'params': {
 789                 'youtube_include_dash_manifest': True,
 790                 'format': '135',  # bestvideo
 791             },
 792             'skip': 'This live event has ended.',
 793         },
 794         {
 795             # Multifeed videos (multiple cameras), URL is for Main Camera
 796             'url': 'https://www.youtube.com/watch?v=jqWvoWXjCVs',
 797             'info_dict': {
 798                 'id': 'jqWvoWXjCVs',
 799                 'title': 'teamPGP: Rocket League Noob Stream',
 800                 'description': 'md5:dc7872fb300e143831327f1bae3af010',
 801             },
 802             'playlist': [{
 803                 'info_dict': {
 804                     'id': 'jqWvoWXjCVs',
 805                     'ext': 'mp4',
 806                     'title': 'teamPGP: Rocket League Noob Stream (Main Camera)',
 807                     'description': 'md5:dc7872fb300e143831327f1bae3af010',
 808                     'duration': 7335,
 809                     'upload_date': '20150721',
 810                     'uploader': 'Beer Games Beer',
 811                     'uploader_id': 'beergamesbeer',
 812                     'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/beergamesbeer',
 813                     'license': 'Standard YouTube License',
 814                 },
 815             }, {
 816                 'info_dict': {
 817                     'id': '6h8e8xoXJzg',
 818                     'ext': 'mp4',
 819                     'title': 'teamPGP: Rocket League Noob Stream (kreestuh)',
 820                     'description': 'md5:dc7872fb300e143831327f1bae3af010',
 821                     'duration': 7337,
 822                     'upload_date': '20150721',
 823                     'uploader': 'Beer Games Beer',
 824                     'uploader_id': 'beergamesbeer',
 825                     'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/beergamesbeer',
 826                     'license': 'Standard YouTube License',
 827                 },
 828             }, {
 829                 'info_dict': {
 830                     'id': 'PUOgX5z9xZw',
 831                     'ext': 'mp4',
 832                     'title': 'teamPGP: Rocket League Noob Stream (grizzle)',
 833                     'description': 'md5:dc7872fb300e143831327f1bae3af010',
 834                     'duration': 7337,
 835                     'upload_date': '20150721',
 836                     'uploader': 'Beer Games Beer',
 837                     'uploader_id': 'beergamesbeer',
 838                     'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/beergamesbeer',
 839                     'license': 'Standard YouTube License',
 840                 },
 841             }, {
 842                 'info_dict': {
 843                     'id': 'teuwxikvS5k',
 844                     'ext': 'mp4',
 845                     'title': 'teamPGP: Rocket League Noob Stream (zim)',
 846                     'description': 'md5:dc7872fb300e143831327f1bae3af010',
 847                     'duration': 7334,
 848                     'upload_date': '20150721',
 849                     'uploader': 'Beer Games Beer',
 850                     'uploader_id': 'beergamesbeer',
 851                     'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/beergamesbeer',
 852                     'license': 'Standard YouTube License',
 853                 },
 854             }],
 855             'params': {
 856                 'skip_download': True,
 857             },
 858             'skip': 'This video is not available.',
 859         },
 860         {
 861             # Multifeed video with comma in title (see https://github.com/ytdl-org/youtube-dl/issues/8536)
 862             'url': 'https://www.youtube.com/watch?v=gVfLd0zydlo',
 863             'info_dict': {
 864                 'id': 'gVfLd0zydlo',
 865                 'title': 'DevConf.cz 2016 Day 2 Workshops 1 14:00 - 15:30',
 866             },
 867             'playlist_count': 2,
 868             'skip': 'Not multifeed anymore',
 869         },
 870         {
 871             'url': 'https://vid.plus/FlRa-iH7PGw',
 872             'only_matching': True,
 873         },
 874         {
 875             'url': 'https://zwearz.com/watch/9lWxNJF-ufM/electra-woman-dyna-girl-official-trailer-grace-helbig.html',
 876             'only_matching': True,
 877         },
 878         {
 879             # Title with JS-like syntax "};" (see https://github.com/ytdl-org/youtube-dl/issues/7468)
 880             # Also tests cut-off URL expansion in video description (see
 881             # https://github.com/ytdl-org/youtube-dl/issues/1892,
 882             # https://github.com/ytdl-org/youtube-dl/issues/8164)
 883             'url': 'https://www.youtube.com/watch?v=lsguqyKfVQg',
 884             'info_dict': {
 885                 'id': 'lsguqyKfVQg',
 886                 'ext': 'mp4',
 887                 'title': '{dark walk}; Loki/AC/Dishonored; collab w/Elflover21',
 888                 'alt_title': 'Dark Walk - Position Music',
 889                 'description': 'md5:8085699c11dc3f597ce0410b0dcbb34a',
 890                 'duration': 133,
 891                 'upload_date': '20151119',
 892                 'uploader_id': 'IronSoulElf',
 893                 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/IronSoulElf',
 894                 'uploader': 'IronSoulElf',
 895                 'creator': 'Todd Haberman,  Daniel Law Heath and Aaron Kaplan',
 896                 'track': 'Dark Walk - Position Music',
 897                 'artist': 'Todd Haberman,  Daniel Law Heath and Aaron Kaplan',
 898                 'album': 'Position Music - Production Music Vol. 143 - Dark Walk',
 899             },
 900             'params': {
 901                 'skip_download': True,
 902             },
 903         },
 904         {
 905             # Tags with '};' (see https://github.com/ytdl-org/youtube-dl/issues/7468)
 906             'url': 'https://www.youtube.com/watch?v=Ms7iBXnlUO8',
 907             'only_matching': True,
 908         },
 909         {
 910             # Video with yt:stretch=17:0
 911             'url': 'https://www.youtube.com/watch?v=Q39EVAstoRM',
 912             'info_dict': {
 913                 'id': 'Q39EVAstoRM',
 914                 'ext': 'mp4',
 915                 'title': 'Clash Of Clans#14 Dicas De Ataque Para CV 4',
 916                 'description': 'md5:ee18a25c350637c8faff806845bddee9',
 917                 'upload_date': '20151107',
 918                 'uploader_id': 'UCCr7TALkRbo3EtFzETQF1LA',
 919                 'uploader': 'CH GAMER DROID',
 920             },
 921             'params': {
 922                 'skip_download': True,
 923             },
 924             'skip': 'This video does not exist.',
 925         },
 926         {
 927             # Video licensed under Creative Commons
 928             'url': 'https://www.youtube.com/watch?v=M4gD1WSo5mA',
 929             'info_dict': {
 930                 'id': 'M4gD1WSo5mA',
 931                 'ext': 'mp4',
 932                 'title': 'md5:e41008789470fc2533a3252216f1c1d1',
 933                 'description': 'md5:a677553cf0840649b731a3024aeff4cc',
 934                 'duration': 721,
 935                 'upload_date': '20150127',
 936                 'uploader_id': 'BerkmanCenter',
 937                 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/BerkmanCenter',
 938                 'uploader': 'The Berkman Klein Center for Internet & Society',
 939                 'license': 'Creative Commons Attribution license (reuse allowed)',
 940             },
 941             'params': {
 942                 'skip_download': True,
 943             },
 944         },
 945         {
 946             # Channel-like uploader_url
 947             'url': 'https://www.youtube.com/watch?v=eQcmzGIKrzg',
 948             'info_dict': {
 949                 'id': 'eQcmzGIKrzg',
 950                 'ext': 'mp4',
 951                 'title': 'Democratic Socialism and Foreign Policy | Bernie Sanders',
 952                 'description': 'md5:dda0d780d5a6e120758d1711d062a867',
 953                 'duration': 4060,
 954                 'upload_date': '20151119',
 955                 'uploader': 'Bernie Sanders',
 956                 'uploader_id': 'UCH1dpzjCEiGAt8CXkryhkZg',
 957                 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UCH1dpzjCEiGAt8CXkryhkZg',
 958                 'license': 'Creative Commons Attribution license (reuse allowed)',
 959             },
 960             'params': {
 961                 'skip_download': True,
 962             },
 963         },
 964         {
 965             'url': 'https://www.youtube.com/watch?feature=player_embedded&amp;amp;v=V36LpHqtcDY',
 966             'only_matching': True,
 967         },
 968         {
 969             # YouTube Red paid video (https://github.com/ytdl-org/youtube-dl/issues/10059)
 970             'url': 'https://www.youtube.com/watch?v=i1Ko8UG-Tdo',
 971             'only_matching': True,
 972         },
 973         {
 974             # Rental video preview
 975             'url': 'https://www.youtube.com/watch?v=yYr8q0y5Jfg',
 976             'info_dict': {
 977                 'id': 'uGpuVWrhIzE',
 978                 'ext': 'mp4',
 979                 'title': 'Piku - Trailer',
 980                 'description': 'md5:c36bd60c3fd6f1954086c083c72092eb',
 981                 'upload_date': '20150811',
 982                 'uploader': 'FlixMatrix',
 983                 'uploader_id': 'FlixMatrixKaravan',
 984                 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/FlixMatrixKaravan',
 985                 'license': 'Standard YouTube License',
 986             },
 987             'params': {
 988                 'skip_download': True,
 989             },
 990             'skip': 'This video is not available.',
 991         },
 992         {
 993             # YouTube Red video with episode data
 994             'url': 'https://www.youtube.com/watch?v=iqKdEhx-dD4',
 995             'info_dict': {
 996                 'id': 'iqKdEhx-dD4',
 997                 'ext': 'mp4',
 998                 'title': 'Isolation - Mind Field (Ep 1)',
 999                 'description': 'md5:46a29be4ceffa65b92d277b93f463c0f',
1000                 'duration': 2085,
1001                 'upload_date': '20170118',
1002                 'uploader': 'Vsauce',
1003                 'uploader_id': 'Vsauce',
1004                 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/Vsauce',
1005                 'series': 'Mind Field',
1006                 'season_number': 1,
1007                 'episode_number': 1,
1008             },
1009             'params': {
1010                 'skip_download': True,
1011             },
1012             'expected_warnings': [
1013                 'Skipping DASH manifest',
1014             ],
1015         },
1016         {
1017             # The following content has been identified by the YouTube community
1018             # as inappropriate or offensive to some audiences.
1019             'url': 'https://www.youtube.com/watch?v=6SJNVb0GnPI',
1020             'info_dict': {
1021                 'id': '6SJNVb0GnPI',
1022                 'ext': 'mp4',
1023                 'title': 'Race Differences in Intelligence',
1024                 'description': 'md5:5d161533167390427a1f8ee89a1fc6f1',
1025                 'duration': 965,
1026                 'upload_date': '20140124',
1027                 'uploader': 'New Century Foundation',
1028                 'uploader_id': 'UCEJYpZGqgUob0zVVEaLhvVg',
1029                 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UCEJYpZGqgUob0zVVEaLhvVg',
1030             },
1031             'params': {
1032                 'skip_download': True,
1033             },
1034         },
1035         {
1036             # itag 212
1037             'url': '1t24XAntNCY',
1038             'only_matching': True,
1039         },
1040         {
1041             # geo restricted to JP
1042             'url': 'sJL6WA-aGkQ',
1043             'only_matching': True,
1044         },
1045         {
1046             'url': 'https://invidio.us/watch?v=BaW_jenozKc',
1047             'only_matching': True,
1048         },
1049         {
1050             # DRM protected
1051             'url': 'https://www.youtube.com/watch?v=s7_qI6_mIXc',
1052             'only_matching': True,
1053         },
1054         {
1055             # Video with unsupported adaptive stream type formats
1056             'url': 'https://www.youtube.com/watch?v=Z4Vy8R84T1U',
1057             'info_dict': {
1058                 'id': 'Z4Vy8R84T1U',
1059                 'ext': 'mp4',
1060                 'title': 'saman SMAN 53 Jakarta(Sancety) opening COFFEE4th at SMAN 53 Jakarta',
1061                 'description': 'md5:d41d8cd98f00b204e9800998ecf8427e',
1062                 'duration': 433,
1063                 'upload_date': '20130923',
1064                 'uploader': 'Amelia Putri Harwita',
1065                 'uploader_id': 'UCpOxM49HJxmC1qCalXyB3_Q',
1066                 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UCpOxM49HJxmC1qCalXyB3_Q',
1067                 'formats': 'maxcount:10',
1068             },
1069             'params': {
1070                 'skip_download': True,
1071                 'youtube_include_dash_manifest': False,
1072             },
1073             'skip': 'not actual anymore',
1074         },
1075         {
1076             # Youtube Music Auto-generated description
1077             'url': 'https://music.youtube.com/watch?v=MgNrAu2pzNs',
1078             'info_dict': {
1079                 'id': 'MgNrAu2pzNs',
1080                 'ext': 'mp4',
1081                 'title': 'Voyeur Girl',
1082                 'description': 'md5:7ae382a65843d6df2685993e90a8628f',
1083                 'upload_date': '20190312',
1084                 'uploader': 'Stephen - Topic',
1085                 'uploader_id': 'UC-pWHpBjdGG69N9mM2auIAA',
1086                 'artist': 'Stephen',
1087                 'track': 'Voyeur Girl',
1088                 'album': 'it\'s too much love to know my dear',
1089                 'release_date': '20190313',
1090                 'release_year': 2019,
1091             },
1092             'params': {
1093                 'skip_download': True,
1094             },
1095         },
1096         {
1097             'url': 'https://www.youtubekids.com/watch?v=3b8nCWDgZ6Q',
1098             'only_matching': True,
1099         },
1100         {
1101             # invalid -> valid video id redirection
1102             'url': 'DJztXj2GPfl',
1103             'info_dict': {
1104                 'id': 'DJztXj2GPfk',
1105                 'ext': 'mp4',
1106                 'title': 'Panjabi MC - Mundian To Bach Ke (The Dictator Soundtrack)',
1107                 'description': 'md5:bf577a41da97918e94fa9798d9228825',
1108                 'upload_date': '20090125',
1109                 'uploader': 'Prochorowka',
1110                 'uploader_id': 'Prochorowka',
1111                 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/Prochorowka',
1112                 'artist': 'Panjabi MC',
1113                 'track': 'Beware of the Boys (Mundian to Bach Ke) - Motivo Hi-Lectro Remix',
1114                 'album': 'Beware of the Boys (Mundian To Bach Ke)',
1115             },
1116             'params': {
1117                 'skip_download': True,
1118             },
1119         },
1120         {
1121             # empty description results in an empty string
1122             'url': 'https://www.youtube.com/watch?v=x41yOUIvK2k',
1123             'info_dict': {
1124                 'id': 'x41yOUIvK2k',
1125                 'ext': 'mp4',
1126                 'title': 'IMG 3456',
1127                 'description': '',
1128                 'upload_date': '20170613',
1129                 'uploader_id': 'ElevageOrVert',
1130                 'uploader': 'ElevageOrVert',
1131             },
1132             'params': {
1133                 'skip_download': True,
1134             },
1135         },
1136         {
1137             # with '};' inside yt initial data (see [1])
1138             # see [2] for an example with '};' inside ytInitialPlayerResponse
1139             # 1. https://github.com/ytdl-org/youtube-dl/issues/27093
1140             # 2. https://github.com/ytdl-org/youtube-dl/issues/27216
1141             'url': 'https://www.youtube.com/watch?v=CHqg6qOn4no',
1142             'info_dict': {
1143                 'id': 'CHqg6qOn4no',
1144                 'ext': 'mp4',
1145                 'title': 'Part 77   Sort a list of simple types in c#',
1146                 'description': 'md5:b8746fa52e10cdbf47997903f13b20dc',
1147                 'upload_date': '20130831',
1148                 'uploader_id': 'kudvenkat',
1149                 'uploader': 'kudvenkat',
1150             },
1151             'params': {
1152                 'skip_download': True,
1153             },
1154         },
1155         {
1156             # another example of '};' in ytInitialData
1157             'url': 'https://www.youtube.com/watch?v=gVfgbahppCY',
1158             'only_matching': True,
1159         },
1160         {
1161             'url': 'https://www.youtube.com/watch_popup?v=63RmMXCd_bQ',
1162             'only_matching': True,
1163         },
1164     ]
1165
1166     def __init__(self, *args, **kwargs):
1167         super(YoutubeIE, self).__init__(*args, **kwargs)
1168         self._player_cache = {}
1169
1170     def report_video_info_webpage_download(self, video_id):
1171         """Report attempt to download video info webpage."""
1172         self.to_screen('%s: Downloading video info webpage' % video_id)
1173
1174     def report_information_extraction(self, video_id):
1175         """Report attempt to extract video information."""
1176         self.to_screen('%s: Extracting video information' % video_id)
1177
1178     def report_unavailable_format(self, video_id, format):
1179         """Report extracted video URL."""
1180         self.to_screen('%s: Format %s not available' % (video_id, format))
1181
1182     def report_rtmp_download(self):
1183         """Indicate the download will use the RTMP protocol."""
1184         self.to_screen('RTMP download detected')
1185
1186     def _signature_cache_id(self, example_sig):
1187         """ Return a string representation of a signature """
1188         return '.'.join(compat_str(len(part)) for part in example_sig.split('.'))
1189
1190     @classmethod
1191     def _extract_player_info(cls, player_url):
1192         for player_re in cls._PLAYER_INFO_RE:
1193             id_m = re.search(player_re, player_url)
1194             if id_m:
1195                 break
1196         else:
1197             raise ExtractorError('Cannot identify player %r' % player_url)
1198         return id_m.group('ext'), id_m.group('id')
1199
1200     def _extract_signature_function(self, video_id, player_url, example_sig):
1201         player_type, player_id = self._extract_player_info(player_url)
1202
1203         # Read from filesystem cache
1204         func_id = '%s_%s_%s' % (
1205             player_type, player_id, self._signature_cache_id(example_sig))
1206         assert os.path.basename(func_id) == func_id
1207
1208         cache_spec = self._downloader.cache.load('youtube-sigfuncs', func_id)
1209         if cache_spec is not None:
1210             return lambda s: ''.join(s[i] for i in cache_spec)
1211
1212         download_note = (
1213             'Downloading player %s' % player_url
1214             if self._downloader.params.get('verbose') else
1215             'Downloading %s player %s' % (player_type, player_id)
1216         )
1217         if player_type == 'js':
1218             code = self._download_webpage(
1219                 player_url, video_id,
1220                 note=download_note,
1221                 errnote='Download of %s failed' % player_url)
1222             res = self._parse_sig_js(code)
1223         elif player_type == 'swf':
1224             urlh = self._request_webpage(
1225                 player_url, video_id,
1226                 note=download_note,
1227                 errnote='Download of %s failed' % player_url)
1228             code = urlh.read()
1229             res = self._parse_sig_swf(code)
1230         else:
1231             assert False, 'Invalid player type %r' % player_type
1232
1233         test_string = ''.join(map(compat_chr, range(len(example_sig))))
1234         cache_res = res(test_string)
1235         cache_spec = [ord(c) for c in cache_res]
1236
1237         self._downloader.cache.store('youtube-sigfuncs', func_id, cache_spec)
1238         return res
1239
1240     def _print_sig_code(self, func, example_sig):
1241         def gen_sig_code(idxs):
1242             def _genslice(start, end, step):
1243                 starts = '' if start == 0 else str(start)
1244                 ends = (':%d' % (end + step)) if end + step >= 0 else ':'
1245                 steps = '' if step == 1 else (':%d' % step)
1246                 return 's[%s%s%s]' % (starts, ends, steps)
1247
1248             step = None
1249             # Quelch pyflakes warnings - start will be set when step is set
1250             start = '(Never used)'
1251             for i, prev in zip(idxs[1:], idxs[:-1]):
1252                 if step is not None:
1253                     if i - prev == step:
1254                         continue
1255                     yield _genslice(start, prev, step)
1256                     step = None
1257                     continue
1258                 if i - prev in [-1, 1]:
1259                     step = i - prev
1260                     start = prev
1261                     continue
1262                 else:
1263                     yield 's[%d]' % prev
1264             if step is None:
1265                 yield 's[%d]' % i
1266             else:
1267                 yield _genslice(start, i, step)
1268
1269         test_string = ''.join(map(compat_chr, range(len(example_sig))))
1270         cache_res = func(test_string)
1271         cache_spec = [ord(c) for c in cache_res]
1272         expr_code = ' + '.join(gen_sig_code(cache_spec))
1273         signature_id_tuple = '(%s)' % (
1274             ', '.join(compat_str(len(p)) for p in example_sig.split('.')))
1275         code = ('if tuple(len(p) for p in s.split(\'.\')) == %s:\n'
1276                 '    return %s\n') % (signature_id_tuple, expr_code)
1277         self.to_screen('Extracted signature function:\n' + code)
1278
1279     def _parse_sig_js(self, jscode):
1280         funcname = self._search_regex(
1281             (r'\b[cs]\s*&&\s*[adf]\.set\([^,]+\s*,\s*encodeURIComponent\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\(',
1282              r'\b[a-zA-Z0-9]+\s*&&\s*[a-zA-Z0-9]+\.set\([^,]+\s*,\s*encodeURIComponent\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\(',
1283              r'(?:\b|[^a-zA-Z0-9$])(?P<sig>[a-zA-Z0-9$]{2})\s*=\s*function\(\s*a\s*\)\s*{\s*a\s*=\s*a\.split\(\s*""\s*\)',
1284              r'(?P<sig>[a-zA-Z0-9$]+)\s*=\s*function\(\s*a\s*\)\s*{\s*a\s*=\s*a\.split\(\s*""\s*\)',
1285              # Obsolete patterns
1286              r'(["\'])signature\1\s*,\s*(?P<sig>[a-zA-Z0-9$]+)\(',
1287              r'\.sig\|\|(?P<sig>[a-zA-Z0-9$]+)\(',
1288              r'yt\.akamaized\.net/\)\s*\|\|\s*.*?\s*[cs]\s*&&\s*[adf]\.set\([^,]+\s*,\s*(?:encodeURIComponent\s*\()?\s*(?P<sig>[a-zA-Z0-9$]+)\(',
1289              r'\b[cs]\s*&&\s*[adf]\.set\([^,]+\s*,\s*(?P<sig>[a-zA-Z0-9$]+)\(',
1290              r'\b[a-zA-Z0-9]+\s*&&\s*[a-zA-Z0-9]+\.set\([^,]+\s*,\s*(?P<sig>[a-zA-Z0-9$]+)\(',
1291              r'\bc\s*&&\s*a\.set\([^,]+\s*,\s*\([^)]*\)\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\(',
1292              r'\bc\s*&&\s*[a-zA-Z0-9]+\.set\([^,]+\s*,\s*\([^)]*\)\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\(',
1293              r'\bc\s*&&\s*[a-zA-Z0-9]+\.set\([^,]+\s*,\s*\([^)]*\)\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\('),
1294             jscode, 'Initial JS player signature function name', group='sig')
1295
1296         jsi = JSInterpreter(jscode)
1297         initial_function = jsi.extract_function(funcname)
1298         return lambda s: initial_function([s])
1299
1300     def _parse_sig_swf(self, file_contents):
1301         swfi = SWFInterpreter(file_contents)
1302         TARGET_CLASSNAME = 'SignatureDecipher'
1303         searched_class = swfi.extract_class(TARGET_CLASSNAME)
1304         initial_function = swfi.extract_function(searched_class, 'decipher')
1305         return lambda s: initial_function([s])
1306
1307     def _decrypt_signature(self, s, video_id, player_url, age_gate=False):
1308         """Turn the encrypted s field into a working signature"""
1309
1310         if player_url is None:
1311             raise ExtractorError('Cannot decrypt signature without player_url')
1312
1313         if player_url.startswith('//'):
1314             player_url = 'https:' + player_url
1315         elif not re.match(r'https?://', player_url):
1316             player_url = compat_urlparse.urljoin(
1317                 'https://www.youtube.com', player_url)
1318         try:
1319             player_id = (player_url, self._signature_cache_id(s))
1320             if player_id not in self._player_cache:
1321                 func = self._extract_signature_function(
1322                     video_id, player_url, s
1323                 )
1324                 self._player_cache[player_id] = func
1325             func = self._player_cache[player_id]
1326             if self._downloader.params.get('youtube_print_sig_code'):
1327                 self._print_sig_code(func, s)
1328             return func(s)
1329         except Exception as e:
1330             tb = traceback.format_exc()
1331             raise ExtractorError(
1332                 'Signature extraction failed: ' + tb, cause=e)
1333
1334     def _get_subtitles(self, video_id, webpage, has_live_chat_replay):
1335         try:
1336             subs_doc = self._download_xml(
1337                 'https://video.google.com/timedtext?hl=en&type=list&v=%s' % video_id,
1338                 video_id, note=False)
1339         except ExtractorError as err:
1340             self._downloader.report_warning('unable to download video subtitles: %s' % error_to_compat_str(err))
1341             return {}
1342
1343         sub_lang_list = {}
1344         for track in subs_doc.findall('track'):
1345             lang = track.attrib['lang_code']
1346             if lang in sub_lang_list:
1347                 continue
1348             sub_formats = []
1349             for ext in self._SUBTITLE_FORMATS:
1350                 params = compat_urllib_parse_urlencode({
1351                     'lang': lang,
1352                     'v': video_id,
1353                     'fmt': ext,
1354                     'name': track.attrib['name'].encode('utf-8'),
1355                 })
1356                 sub_formats.append({
1357                     'url': 'https://www.youtube.com/api/timedtext?' + params,
1358                     'ext': ext,
1359                 })
1360             sub_lang_list[lang] = sub_formats
1361         if has_live_chat_replay:
1362             sub_lang_list['live_chat'] = [
1363                 {
1364                     'video_id': video_id,
1365                     'ext': 'json',
1366                     'protocol': 'youtube_live_chat_replay',
1367                 },
1368             ]
1369         if not sub_lang_list:
1370             self._downloader.report_warning('video doesn\'t have subtitles')
1371             return {}
1372         return sub_lang_list
1373
1374     def _get_ytplayer_config(self, video_id, webpage):
1375         patterns = (
1376             # User data may contain arbitrary character sequences that may affect
1377             # JSON extraction with regex, e.g. when '};' is contained the second
1378             # regex won't capture the whole JSON. Yet working around by trying more
1379             # concrete regex first keeping in mind proper quoted string handling
1380             # to be implemented in future that will replace this workaround (see
1381             # https://github.com/ytdl-org/youtube-dl/issues/7468,
1382             # https://github.com/ytdl-org/youtube-dl/pull/7599)
1383             r';ytplayer\.config\s*=\s*({.+?});ytplayer',
1384             r';ytplayer\.config\s*=\s*({.+?});',
1385         )
1386         config = self._search_regex(
1387             patterns, webpage, 'ytplayer.config', default=None)
1388         if config:
1389             return self._parse_json(
1390                 uppercase_escape(config), video_id, fatal=False)
1391
1392     def _get_automatic_captions(self, video_id, player_response, player_config):
1393         """We need the webpage for getting the captions url, pass it as an
1394            argument to speed up the process."""
1395         self.to_screen('%s: Looking for automatic captions' % video_id)
1396         err_msg = 'Couldn\'t find automatic captions for %s' % video_id
1397         if not (player_response or player_config):
1398             self._downloader.report_warning(err_msg)
1399             return {}
1400         try:
1401             args = player_config.get('args') if player_config else {}
1402             caption_url = args.get('ttsurl')
1403             if caption_url:
1404                 timestamp = args['timestamp']
1405                 # We get the available subtitles
1406                 list_params = compat_urllib_parse_urlencode({
1407                     'type': 'list',
1408                     'tlangs': 1,
1409                     'asrs': 1,
1410                 })
1411                 list_url = caption_url + '&' + list_params
1412                 caption_list = self._download_xml(list_url, video_id)
1413                 original_lang_node = caption_list.find('track')
1414                 if original_lang_node is None:
1415                     self._downloader.report_warning('Video doesn\'t have automatic captions')
1416                     return {}
1417                 original_lang = original_lang_node.attrib['lang_code']
1418                 caption_kind = original_lang_node.attrib.get('kind', '')
1419
1420                 sub_lang_list = {}
1421                 for lang_node in caption_list.findall('target'):
1422                     sub_lang = lang_node.attrib['lang_code']
1423                     sub_formats = []
1424                     for ext in self._SUBTITLE_FORMATS:
1425                         params = compat_urllib_parse_urlencode({
1426                             'lang': original_lang,
1427                             'tlang': sub_lang,
1428                             'fmt': ext,
1429                             'ts': timestamp,
1430                             'kind': caption_kind,
1431                         })
1432                         sub_formats.append({
1433                             'url': caption_url + '&' + params,
1434                             'ext': ext,
1435                         })
1436                     sub_lang_list[sub_lang] = sub_formats
1437                 return sub_lang_list
1438
1439             def make_captions(sub_url, sub_langs):
1440                 parsed_sub_url = compat_urllib_parse_urlparse(sub_url)
1441                 caption_qs = compat_parse_qs(parsed_sub_url.query)
1442                 captions = {}
1443                 for sub_lang in sub_langs:
1444                     sub_formats = []
1445                     for ext in self._SUBTITLE_FORMATS:
1446                         caption_qs.update({
1447                             'tlang': [sub_lang],
1448                             'fmt': [ext],
1449                         })
1450                         sub_url = compat_urlparse.urlunparse(parsed_sub_url._replace(
1451                             query=compat_urllib_parse_urlencode(caption_qs, True)))
1452                         sub_formats.append({
1453                             'url': sub_url,
1454                             'ext': ext,
1455                         })
1456                     captions[sub_lang] = sub_formats
1457                 return captions
1458
1459             # New captions format as of 22.06.2017
1460             if player_response:
1461                 renderer = player_response['captions']['playerCaptionsTracklistRenderer']
1462                 base_url = renderer['captionTracks'][0]['baseUrl']
1463                 sub_lang_list = []
1464                 for lang in renderer['translationLanguages']:
1465                     lang_code = lang.get('languageCode')
1466                     if lang_code:
1467                         sub_lang_list.append(lang_code)
1468                 return make_captions(base_url, sub_lang_list)
1469
1470             # Some videos don't provide ttsurl but rather caption_tracks and
1471             # caption_translation_languages (e.g. 20LmZk1hakA)
1472             # Does not used anymore as of 22.06.2017
1473             caption_tracks = args['caption_tracks']
1474             caption_translation_languages = args['caption_translation_languages']
1475             caption_url = compat_parse_qs(caption_tracks.split(',')[0])['u'][0]
1476             sub_lang_list = []
1477             for lang in caption_translation_languages.split(','):
1478                 lang_qs = compat_parse_qs(compat_urllib_parse_unquote_plus(lang))
1479                 sub_lang = lang_qs.get('lc', [None])[0]
1480                 if sub_lang:
1481                     sub_lang_list.append(sub_lang)
1482             return make_captions(caption_url, sub_lang_list)
1483         # An extractor error can be raise by the download process if there are
1484         # no automatic captions but there are subtitles
1485         except (KeyError, IndexError, ExtractorError):
1486             self._downloader.report_warning(err_msg)
1487             return {}
1488
1489     def _mark_watched(self, video_id, video_info, player_response):
1490         playback_url = url_or_none(try_get(
1491             player_response,
1492             lambda x: x['playbackTracking']['videostatsPlaybackUrl']['baseUrl']) or try_get(
1493             video_info, lambda x: x['videostats_playback_base_url'][0]))
1494         if not playback_url:
1495             return
1496         parsed_playback_url = compat_urlparse.urlparse(playback_url)
1497         qs = compat_urlparse.parse_qs(parsed_playback_url.query)
1498
1499         # cpn generation algorithm is reverse engineered from base.js.
1500         # In fact it works even with dummy cpn.
1501         CPN_ALPHABET = 'abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789-_'
1502         cpn = ''.join((CPN_ALPHABET[random.randint(0, 256) & 63] for _ in range(0, 16)))
1503
1504         qs.update({
1505             'ver': ['2'],
1506             'cpn': [cpn],
1507         })
1508         playback_url = compat_urlparse.urlunparse(
1509             parsed_playback_url._replace(query=compat_urllib_parse_urlencode(qs, True)))
1510
1511         self._download_webpage(
1512             playback_url, video_id, 'Marking watched',
1513             'Unable to mark watched', fatal=False)
1514
1515     @staticmethod
1516     def _extract_urls(webpage):
1517         # Embedded YouTube player
1518         entries = [
1519             unescapeHTML(mobj.group('url'))
1520             for mobj in re.finditer(r'''(?x)
1521             (?:
1522                 <iframe[^>]+?src=|
1523                 data-video-url=|
1524                 <embed[^>]+?src=|
1525                 embedSWF\(?:\s*|
1526                 <object[^>]+data=|
1527                 new\s+SWFObject\(
1528             )
1529             (["\'])
1530                 (?P<url>(?:https?:)?//(?:www\.)?youtube(?:-nocookie)?\.com/
1531                 (?:embed|v|p)/[0-9A-Za-z_-]{11}.*?)
1532             \1''', webpage)]
1533
1534         # lazyYT YouTube embed
1535         entries.extend(list(map(
1536             unescapeHTML,
1537             re.findall(r'class="lazyYT" data-youtube-id="([^"]+)"', webpage))))
1538
1539         # Wordpress "YouTube Video Importer" plugin
1540         matches = re.findall(r'''(?x)<div[^>]+
1541             class=(?P<q1>[\'"])[^\'"]*\byvii_single_video_player\b[^\'"]*(?P=q1)[^>]+
1542             data-video_id=(?P<q2>[\'"])([^\'"]+)(?P=q2)''', webpage)
1543         entries.extend(m[-1] for m in matches)
1544
1545         return entries
1546
1547     @staticmethod
1548     def _extract_url(webpage):
1549         urls = YoutubeIE._extract_urls(webpage)
1550         return urls[0] if urls else None
1551
1552     @classmethod
1553     def extract_id(cls, url):
1554         mobj = re.match(cls._VALID_URL, url, re.VERBOSE)
1555         if mobj is None:
1556             raise ExtractorError('Invalid URL: %s' % url)
1557         video_id = mobj.group(2)
1558         return video_id
1559
1560     def _extract_chapters_from_json(self, webpage, video_id, duration):
1561         if not webpage:
1562             return
1563         data = self._extract_yt_initial_data(video_id, webpage)
1564         if not data or not isinstance(data, dict):
1565             return
1566         chapters_list = try_get(
1567             data,
1568             lambda x: x['playerOverlays']
1569                        ['playerOverlayRenderer']
1570                        ['decoratedPlayerBarRenderer']
1571                        ['decoratedPlayerBarRenderer']
1572                        ['playerBar']
1573                        ['chapteredPlayerBarRenderer']
1574                        ['chapters'],
1575             list)
1576         if not chapters_list:
1577             return
1578
1579         def chapter_time(chapter):
1580             return float_or_none(
1581                 try_get(
1582                     chapter,
1583                     lambda x: x['chapterRenderer']['timeRangeStartMillis'],
1584                     int),
1585                 scale=1000)
1586         chapters = []
1587         for next_num, chapter in enumerate(chapters_list, start=1):
1588             start_time = chapter_time(chapter)
1589             if start_time is None:
1590                 continue
1591             end_time = (chapter_time(chapters_list[next_num])
1592                         if next_num < len(chapters_list) else duration)
1593             if end_time is None:
1594                 continue
1595             title = try_get(
1596                 chapter, lambda x: x['chapterRenderer']['title']['simpleText'],
1597                 compat_str)
1598             chapters.append({
1599                 'start_time': start_time,
1600                 'end_time': end_time,
1601                 'title': title,
1602             })
1603         return chapters
1604
1605     @staticmethod
1606     def _extract_chapters_from_description(description, duration):
1607         if not description:
1608             return None
1609         chapter_lines = re.findall(
1610             r'(?:^|<br\s*/>)([^<]*<a[^>]+onclick=["\']yt\.www\.watch\.player\.seekTo[^>]+>(\d{1,2}:\d{1,2}(?::\d{1,2})?)</a>[^>]*)(?=$|<br\s*/>)',
1611             description)
1612         if not chapter_lines:
1613             return None
1614         chapters = []
1615         for next_num, (chapter_line, time_point) in enumerate(
1616                 chapter_lines, start=1):
1617             start_time = parse_duration(time_point)
1618             if start_time is None:
1619                 continue
1620             if start_time > duration:
1621                 break
1622             end_time = (duration if next_num == len(chapter_lines)
1623                         else parse_duration(chapter_lines[next_num][1]))
1624             if end_time is None:
1625                 continue
1626             if end_time > duration:
1627                 end_time = duration
1628             if start_time > end_time:
1629                 break
1630             chapter_title = re.sub(
1631                 r'<a[^>]+>[^<]+</a>', '', chapter_line).strip(' \t-')
1632             chapter_title = re.sub(r'\s+', ' ', chapter_title)
1633             chapters.append({
1634                 'start_time': start_time,
1635                 'end_time': end_time,
1636                 'title': chapter_title,
1637             })
1638         return chapters
1639
1640     def _extract_chapters(self, webpage, description, video_id, duration):
1641         return (self._extract_chapters_from_json(webpage, video_id, duration)
1642                 or self._extract_chapters_from_description(description, duration))
1643
1644     def _real_extract(self, url):
1645         url, smuggled_data = unsmuggle_url(url, {})
1646
1647         proto = (
1648             'http' if self._downloader.params.get('prefer_insecure', False)
1649             else 'https')
1650
1651         start_time = None
1652         end_time = None
1653         parsed_url = compat_urllib_parse_urlparse(url)
1654         for component in [parsed_url.fragment, parsed_url.query]:
1655             query = compat_parse_qs(component)
1656             if start_time is None and 't' in query:
1657                 start_time = parse_duration(query['t'][0])
1658             if start_time is None and 'start' in query:
1659                 start_time = parse_duration(query['start'][0])
1660             if end_time is None and 'end' in query:
1661                 end_time = parse_duration(query['end'][0])
1662
1663         # Extract original video URL from URL with redirection, like age verification, using next_url parameter
1664         mobj = re.search(self._NEXT_URL_RE, url)
1665         if mobj:
1666             url = proto + '://www.youtube.com/' + compat_urllib_parse_unquote(mobj.group(1)).lstrip('/')
1667         video_id = self.extract_id(url)
1668
1669         # Get video webpage
1670         url = proto + '://www.youtube.com/watch?v=%s&gl=US&hl=en&has_verified=1&bpctr=9999999999' % video_id
1671         video_webpage, urlh = self._download_webpage_handle(url, video_id)
1672
1673         qs = compat_parse_qs(compat_urllib_parse_urlparse(urlh.geturl()).query)
1674         video_id = qs.get('v', [None])[0] or video_id
1675
1676         # Attempt to extract SWF player URL
1677         mobj = re.search(r'swfConfig.*?"(https?:\\/\\/.*?watch.*?-.*?\.swf)"', video_webpage)
1678         if mobj is not None:
1679             player_url = re.sub(r'\\(.)', r'\1', mobj.group(1))
1680         else:
1681             player_url = None
1682
1683         dash_mpds = []
1684
1685         def add_dash_mpd(video_info):
1686             dash_mpd = video_info.get('dashmpd')
1687             if dash_mpd and dash_mpd[0] not in dash_mpds:
1688                 dash_mpds.append(dash_mpd[0])
1689
1690         def add_dash_mpd_pr(pl_response):
1691             dash_mpd = url_or_none(try_get(
1692                 pl_response, lambda x: x['streamingData']['dashManifestUrl'],
1693                 compat_str))
1694             if dash_mpd and dash_mpd not in dash_mpds:
1695                 dash_mpds.append(dash_mpd)
1696
1697         is_live = None
1698         view_count = None
1699
1700         def extract_view_count(v_info):
1701             return int_or_none(try_get(v_info, lambda x: x['view_count'][0]))
1702
1703         def extract_player_response(player_response, video_id):
1704             pl_response = str_or_none(player_response)
1705             if not pl_response:
1706                 return
1707             pl_response = self._parse_json(pl_response, video_id, fatal=False)
1708             if isinstance(pl_response, dict):
1709                 add_dash_mpd_pr(pl_response)
1710                 return pl_response
1711
1712         def extract_embedded_config(embed_webpage, video_id):
1713             embedded_config = self._search_regex(
1714                 r'setConfig\(({.*})\);',
1715                 embed_webpage, 'ytInitialData', default=None)
1716             if embedded_config:
1717                 return embedded_config
1718
1719         video_info = {}
1720         player_response = {}
1721         ytplayer_config = None
1722         embed_webpage = None
1723
1724         # Get video info
1725         if (self._og_search_property('restrictions:age', video_webpage, default=None) == '18+'
1726                 or re.search(r'player-age-gate-content">', video_webpage) is not None):
1727             cookie_keys = self._get_cookies('https://www.youtube.com').keys()
1728             age_gate = True
1729             # We simulate the access to the video from www.youtube.com/v/{video_id}
1730             # this can be viewed without login into Youtube
1731             url = proto + '://www.youtube.com/embed/%s' % video_id
1732             embed_webpage = self._download_webpage(url, video_id, 'Downloading embed webpage')
1733             ext = extract_embedded_config(embed_webpage, video_id)
1734             # playabilityStatus = re.search(r'{\\\"status\\\":\\\"(?P<playabilityStatus>[^\"]+)\\\"', ext)
1735             playable_in_embed = re.search(r'{\\\"playableInEmbed\\\":(?P<playableinEmbed>[^\,]+)', ext)
1736             if not playable_in_embed:
1737                 self.to_screen('Could not determine whether playabale in embed for video %s' % video_id)
1738                 playable_in_embed = ''
1739             else:
1740                 playable_in_embed = playable_in_embed.group('playableinEmbed')
1741             # check if video is only playable on youtube in other words not playable in embed - if so it requires auth (cookies)
1742             # if re.search(r'player-unavailable">', embed_webpage) is not None:
1743             if playable_in_embed == 'false':
1744                 '''
1745                 # TODO apply this patch when Support for Python 2.6(!) and above drops
1746                 if ({'VISITOR_INFO1_LIVE', 'HSID', 'SSID', 'SID'} <= cookie_keys
1747                         or {'VISITOR_INFO1_LIVE', '__Secure-3PSID', 'LOGIN_INFO'} <= cookie_keys):
1748                 '''
1749                 if (set(('VISITOR_INFO1_LIVE', 'HSID', 'SSID', 'SID')) <= set(cookie_keys)
1750                         or set(('VISITOR_INFO1_LIVE', '__Secure-3PSID', 'LOGIN_INFO')) <= set(cookie_keys)):
1751                     age_gate = False
1752                     # Try looking directly into the video webpage
1753                     ytplayer_config = self._get_ytplayer_config(video_id, video_webpage)
1754                     if ytplayer_config:
1755                         args = ytplayer_config.get("args")
1756                         if args is not None:
1757                             if args.get('url_encoded_fmt_stream_map') or args.get('hlsvp'):
1758                                 # Convert to the same format returned by compat_parse_qs
1759                                 video_info = dict((k, [v]) for k, v in args.items())
1760                                 add_dash_mpd(video_info)
1761                             # Rental video is not rented but preview is available (e.g.
1762                             # https://www.youtube.com/watch?v=yYr8q0y5Jfg,
1763                             # https://github.com/ytdl-org/youtube-dl/issues/10532)
1764                             if not video_info and args.get('ypc_vid'):
1765                                 return self.url_result(
1766                                     args['ypc_vid'], YoutubeIE.ie_key(), video_id=args['ypc_vid'])
1767                             if args.get('livestream') == '1' or args.get('live_playback') == 1:
1768                                 is_live = True
1769                             if not player_response:
1770                                 player_response = extract_player_response(args.get('player_response'), video_id)
1771                         elif not player_response:
1772                             player_response = ytplayer_config
1773                     if not video_info or self._downloader.params.get('youtube_include_dash_manifest', True):
1774                         add_dash_mpd_pr(player_response)
1775                 else:
1776                     raise ExtractorError('Video is age restricted and only playable on Youtube. Requires cookies!', expected=True)
1777             else:
1778                 data = compat_urllib_parse_urlencode({
1779                     'video_id': video_id,
1780                     'eurl': 'https://youtube.googleapis.com/v/' + video_id,
1781                     'sts': self._search_regex(
1782                         r'"sts"\s*:\s*(\d+)', embed_webpage, 'sts', default=''),
1783                 })
1784                 video_info_url = proto + '://www.youtube.com/get_video_info?' + data
1785                 try:
1786                     video_info_webpage = self._download_webpage(
1787                         video_info_url, video_id,
1788                         note='Refetching age-gated info webpage',
1789                         errnote='unable to download video info webpage')
1790                 except ExtractorError:
1791                     video_info_webpage = None
1792                 if video_info_webpage:
1793                     video_info = compat_parse_qs(video_info_webpage)
1794                     pl_response = video_info.get('player_response', [None])[0]
1795                     player_response = extract_player_response(pl_response, video_id)
1796                     add_dash_mpd(video_info)
1797                     view_count = extract_view_count(video_info)
1798         else:
1799             age_gate = False
1800             # Try looking directly into the video webpage
1801             ytplayer_config = self._get_ytplayer_config(video_id, video_webpage)
1802             if ytplayer_config:
1803                 args = ytplayer_config.get('args', {})
1804                 if args.get('url_encoded_fmt_stream_map') or args.get('hlsvp'):
1805                     # Convert to the same format returned by compat_parse_qs
1806                     video_info = dict((k, [v]) for k, v in args.items())
1807                     add_dash_mpd(video_info)
1808                 # Rental video is not rented but preview is available (e.g.
1809                 # https://www.youtube.com/watch?v=yYr8q0y5Jfg,
1810                 # https://github.com/ytdl-org/youtube-dl/issues/10532)
1811                 if not video_info and args.get('ypc_vid'):
1812                     return self.url_result(
1813                         args['ypc_vid'], YoutubeIE.ie_key(), video_id=args['ypc_vid'])
1814                 if args.get('livestream') == '1' or args.get('live_playback') == 1:
1815                     is_live = True
1816                 if not player_response:
1817                     player_response = extract_player_response(args.get('player_response'), video_id)
1818             if not video_info or self._downloader.params.get('youtube_include_dash_manifest', True):
1819                 add_dash_mpd_pr(player_response)
1820
1821         if not video_info and not player_response:
1822             player_response = extract_player_response(
1823                 self._search_regex(
1824                     (r'%s\s*%s' % (self._YT_INITIAL_PLAYER_RESPONSE_RE, self._YT_INITIAL_BOUNDARY_RE),
1825                      self._YT_INITIAL_PLAYER_RESPONSE_RE), video_webpage,
1826                     'initial player response', default='{}'),
1827                 video_id)
1828
1829         def extract_unavailable_message():
1830             messages = []
1831             for tag, kind in (('h1', 'message'), ('div', 'submessage')):
1832                 msg = self._html_search_regex(
1833                     r'(?s)<{tag}[^>]+id=["\']unavailable-{kind}["\'][^>]*>(.+?)</{tag}>'.format(tag=tag, kind=kind),
1834                     video_webpage, 'unavailable %s' % kind, default=None)
1835                 if msg:
1836                     messages.append(msg)
1837             if messages:
1838                 return '\n'.join(messages)
1839
1840         if not video_info and not player_response:
1841             unavailable_message = extract_unavailable_message()
1842             if not unavailable_message:
1843                 unavailable_message = 'Unable to extract video data'
1844             raise ExtractorError(
1845                 'YouTube said: %s' % unavailable_message, expected=True, video_id=video_id)
1846
1847         if not isinstance(video_info, dict):
1848             video_info = {}
1849
1850         playable_in_embed = try_get(
1851             player_response, lambda x: x['playabilityStatus']['playableInEmbed'])
1852
1853         video_details = try_get(
1854             player_response, lambda x: x['videoDetails'], dict) or {}
1855
1856         microformat = try_get(
1857             player_response, lambda x: x['microformat']['playerMicroformatRenderer'], dict) or {}
1858
1859         video_title = video_info.get('title', [None])[0] or video_details.get('title')
1860         if not video_title:
1861             self._downloader.report_warning('Unable to extract video title')
1862             video_title = '_'
1863
1864         description_original = video_description = get_element_by_id("eow-description", video_webpage)
1865         if video_description:
1866
1867             def replace_url(m):
1868                 redir_url = compat_urlparse.urljoin(url, m.group(1))
1869                 parsed_redir_url = compat_urllib_parse_urlparse(redir_url)
1870                 if re.search(r'^(?:www\.)?(?:youtube(?:-nocookie)?\.com|youtu\.be)$', parsed_redir_url.netloc) and parsed_redir_url.path == '/redirect':
1871                     qs = compat_parse_qs(parsed_redir_url.query)
1872                     q = qs.get('q')
1873                     if q and q[0]:
1874                         return q[0]
1875                 return redir_url
1876
1877             description_original = video_description = re.sub(r'''(?x)
1878                 <a\s+
1879                     (?:[a-zA-Z-]+="[^"]*"\s+)*?
1880                     (?:title|href)="([^"]+)"\s+
1881                     (?:[a-zA-Z-]+="[^"]*"\s+)*?
1882                     class="[^"]*"[^>]*>
1883                 [^<]+\.{3}\s*
1884                 </a>
1885             ''', replace_url, video_description)
1886             video_description = clean_html(video_description)
1887         else:
1888             video_description = video_details.get('shortDescription')
1889             if video_description is None:
1890                 video_description = self._html_search_meta('description', video_webpage)
1891
1892         if not smuggled_data.get('force_singlefeed', False):
1893             if not self._downloader.params.get('noplaylist'):
1894                 multifeed_metadata_list = try_get(
1895                     player_response,
1896                     lambda x: x['multicamera']['playerLegacyMulticameraRenderer']['metadataList'],
1897                     compat_str) or try_get(
1898                     video_info, lambda x: x['multifeed_metadata_list'][0], compat_str)
1899                 if multifeed_metadata_list:
1900                     entries = []
1901                     feed_ids = []
1902                     for feed in multifeed_metadata_list.split(','):
1903                         # Unquote should take place before split on comma (,) since textual
1904                         # fields may contain comma as well (see
1905                         # https://github.com/ytdl-org/youtube-dl/issues/8536)
1906                         feed_data = compat_parse_qs(compat_urllib_parse_unquote_plus(feed))
1907
1908                         def feed_entry(name):
1909                             return try_get(feed_data, lambda x: x[name][0], compat_str)
1910
1911                         feed_id = feed_entry('id')
1912                         if not feed_id:
1913                             continue
1914                         feed_title = feed_entry('title')
1915                         title = video_title
1916                         if feed_title:
1917                             title += ' (%s)' % feed_title
1918                         entries.append({
1919                             '_type': 'url_transparent',
1920                             'ie_key': 'Youtube',
1921                             'url': smuggle_url(
1922                                 '%s://www.youtube.com/watch?v=%s' % (proto, feed_data['id'][0]),
1923                                 {'force_singlefeed': True}),
1924                             'title': title,
1925                         })
1926                         feed_ids.append(feed_id)
1927                     self.to_screen(
1928                         'Downloading multifeed video (%s) - add --no-playlist to just download video %s'
1929                         % (', '.join(feed_ids), video_id))
1930                     return self.playlist_result(entries, video_id, video_title, video_description)
1931             else:
1932                 self.to_screen('Downloading just video %s because of --no-playlist' % video_id)
1933
1934         if view_count is None:
1935             view_count = extract_view_count(video_info)
1936         if view_count is None and video_details:
1937             view_count = int_or_none(video_details.get('viewCount'))
1938         if view_count is None and microformat:
1939             view_count = int_or_none(microformat.get('viewCount'))
1940
1941         if is_live is None:
1942             is_live = bool_or_none(video_details.get('isLive'))
1943
1944         has_live_chat_replay = False
1945         if not is_live:
1946             yt_initial_data = self._get_yt_initial_data(video_id, video_webpage)
1947             try:
1948                 yt_initial_data['contents']['twoColumnWatchNextResults']['conversationBar']['liveChatRenderer']['continuations'][0]['reloadContinuationData']['continuation']
1949                 has_live_chat_replay = True
1950             except (KeyError, IndexError, TypeError):
1951                 pass
1952
1953         # Check for "rental" videos
1954         if 'ypc_video_rental_bar_text' in video_info and 'author' not in video_info:
1955             raise ExtractorError('"rental" videos not supported. See https://github.com/ytdl-org/youtube-dl/issues/359 for more information.', expected=True)
1956
1957         def _extract_filesize(media_url):
1958             return int_or_none(self._search_regex(
1959                 r'\bclen[=/](\d+)', media_url, 'filesize', default=None))
1960
1961         streaming_formats = try_get(player_response, lambda x: x['streamingData']['formats'], list) or []
1962         streaming_formats.extend(try_get(player_response, lambda x: x['streamingData']['adaptiveFormats'], list) or [])
1963
1964         if 'conn' in video_info and video_info['conn'][0].startswith('rtmp'):
1965             self.report_rtmp_download()
1966             formats = [{
1967                 'format_id': '_rtmp',
1968                 'protocol': 'rtmp',
1969                 'url': video_info['conn'][0],
1970                 'player_url': player_url,
1971             }]
1972         elif not is_live and (streaming_formats or len(video_info.get('url_encoded_fmt_stream_map', [''])[0]) >= 1 or len(video_info.get('adaptive_fmts', [''])[0]) >= 1):
1973             encoded_url_map = video_info.get('url_encoded_fmt_stream_map', [''])[0] + ',' + video_info.get('adaptive_fmts', [''])[0]
1974             if 'rtmpe%3Dyes' in encoded_url_map:
1975                 raise ExtractorError('rtmpe downloads are not supported, see https://github.com/ytdl-org/youtube-dl/issues/343 for more information.', expected=True)
1976             formats = []
1977             formats_spec = {}
1978             fmt_list = video_info.get('fmt_list', [''])[0]
1979             if fmt_list:
1980                 for fmt in fmt_list.split(','):
1981                     spec = fmt.split('/')
1982                     if len(spec) > 1:
1983                         width_height = spec[1].split('x')
1984                         if len(width_height) == 2:
1985                             formats_spec[spec[0]] = {
1986                                 'resolution': spec[1],
1987                                 'width': int_or_none(width_height[0]),
1988                                 'height': int_or_none(width_height[1]),
1989                             }
1990             for fmt in streaming_formats:
1991                 itag = str_or_none(fmt.get('itag'))
1992                 if not itag:
1993                     continue
1994                 quality = fmt.get('quality')
1995                 quality_label = fmt.get('qualityLabel') or quality
1996                 formats_spec[itag] = {
1997                     'asr': int_or_none(fmt.get('audioSampleRate')),
1998                     'filesize': int_or_none(fmt.get('contentLength')),
1999                     'format_note': quality_label,
2000                     'fps': int_or_none(fmt.get('fps')),
2001                     'height': int_or_none(fmt.get('height')),
2002                     # bitrate for itag 43 is always 2147483647
2003                     'tbr': float_or_none(fmt.get('averageBitrate') or fmt.get('bitrate'), 1000) if itag != '43' else None,
2004                     'width': int_or_none(fmt.get('width')),
2005                 }
2006
2007             for fmt in streaming_formats:
2008                 if fmt.get('drmFamilies') or fmt.get('drm_families'):
2009                     continue
2010                 url = url_or_none(fmt.get('url'))
2011
2012                 if not url:
2013                     cipher = fmt.get('cipher') or fmt.get('signatureCipher')
2014                     if not cipher:
2015                         continue
2016                     url_data = compat_parse_qs(cipher)
2017                     url = url_or_none(try_get(url_data, lambda x: x['url'][0], compat_str))
2018                     if not url:
2019                         continue
2020                 else:
2021                     cipher = None
2022                     url_data = compat_parse_qs(compat_urllib_parse_urlparse(url).query)
2023
2024                 stream_type = int_or_none(try_get(url_data, lambda x: x['stream_type'][0]))
2025                 # Unsupported FORMAT_STREAM_TYPE_OTF
2026                 if stream_type == 3:
2027                     continue
2028
2029                 format_id = fmt.get('itag') or url_data['itag'][0]
2030                 if not format_id:
2031                     continue
2032                 format_id = compat_str(format_id)
2033
2034                 if cipher:
2035                     if 's' in url_data or self._downloader.params.get('youtube_include_dash_manifest', True):
2036                         ASSETS_RE = (
2037                             r'<script[^>]+\bsrc=("[^"]+")[^>]+\bname=["\']player_ias/base',
2038                             r'"jsUrl"\s*:\s*("[^"]+")',
2039                             r'"assets":.+?"js":\s*("[^"]+")')
2040                         jsplayer_url_json = self._search_regex(
2041                             ASSETS_RE,
2042                             embed_webpage if age_gate else video_webpage,
2043                             'JS player URL (1)', default=None)
2044                         if not jsplayer_url_json and not age_gate:
2045                             # We need the embed website after all
2046                             if embed_webpage is None:
2047                                 embed_url = proto + '://www.youtube.com/embed/%s' % video_id
2048                                 embed_webpage = self._download_webpage(
2049                                     embed_url, video_id, 'Downloading embed webpage')
2050                             jsplayer_url_json = self._search_regex(
2051                                 ASSETS_RE, embed_webpage, 'JS player URL')
2052
2053                         player_url = json.loads(jsplayer_url_json)
2054                         if player_url is None:
2055                             player_url_json = self._search_regex(
2056                                 r'ytplayer\.config.*?"url"\s*:\s*("[^"]+")',
2057                                 video_webpage, 'age gate player URL')
2058                             player_url = json.loads(player_url_json)
2059
2060                     if 'sig' in url_data:
2061                         url += '&signature=' + url_data['sig'][0]
2062                     elif 's' in url_data:
2063                         encrypted_sig = url_data['s'][0]
2064
2065                         if self._downloader.params.get('verbose'):
2066                             if player_url is None:
2067                                 player_desc = 'unknown'
2068                             else:
2069                                 player_type, player_version = self._extract_player_info(player_url)
2070                                 player_desc = '%s player %s' % ('flash' if player_type == 'swf' else 'html5', player_version)
2071                             parts_sizes = self._signature_cache_id(encrypted_sig)
2072                             self.to_screen('{%s} signature length %s, %s' %
2073                                            (format_id, parts_sizes, player_desc))
2074
2075                         signature = self._decrypt_signature(
2076                             encrypted_sig, video_id, player_url, age_gate)
2077                         sp = try_get(url_data, lambda x: x['sp'][0], compat_str) or 'signature'
2078                         url += '&%s=%s' % (sp, signature)
2079                 if 'ratebypass' not in url:
2080                     url += '&ratebypass=yes'
2081
2082                 dct = {
2083                     'format_id': format_id,
2084                     'url': url,
2085                     'player_url': player_url,
2086                 }
2087                 if format_id in self._formats:
2088                     dct.update(self._formats[format_id])
2089                 if format_id in formats_spec:
2090                     dct.update(formats_spec[format_id])
2091
2092                 # Some itags are not included in DASH manifest thus corresponding formats will
2093                 # lack metadata (see https://github.com/ytdl-org/youtube-dl/pull/5993).
2094                 # Trying to extract metadata from url_encoded_fmt_stream_map entry.
2095                 mobj = re.search(r'^(?P<width>\d+)[xX](?P<height>\d+)$', url_data.get('size', [''])[0])
2096                 width, height = (int(mobj.group('width')), int(mobj.group('height'))) if mobj else (None, None)
2097
2098                 if width is None:
2099                     width = int_or_none(fmt.get('width'))
2100                 if height is None:
2101                     height = int_or_none(fmt.get('height'))
2102
2103                 filesize = int_or_none(url_data.get(
2104                     'clen', [None])[0]) or _extract_filesize(url)
2105
2106                 quality = url_data.get('quality', [None])[0] or fmt.get('quality')
2107                 quality_label = url_data.get('quality_label', [None])[0] or fmt.get('qualityLabel')
2108
2109                 tbr = (float_or_none(url_data.get('bitrate', [None])[0], 1000)
2110                        or float_or_none(fmt.get('bitrate'), 1000)) if format_id != '43' else None
2111                 fps = int_or_none(url_data.get('fps', [None])[0]) or int_or_none(fmt.get('fps'))
2112
2113                 more_fields = {
2114                     'filesize': filesize,
2115                     'tbr': tbr,
2116                     'width': width,
2117                     'height': height,
2118                     'fps': fps,
2119                     'format_note': quality_label or quality,
2120                 }
2121                 for key, value in more_fields.items():
2122                     if value:
2123                         dct[key] = value
2124                 type_ = url_data.get('type', [None])[0] or fmt.get('mimeType')
2125                 if type_:
2126                     type_split = type_.split(';')
2127                     kind_ext = type_split[0].split('/')
2128                     if len(kind_ext) == 2:
2129                         kind, _ = kind_ext
2130                         dct['ext'] = mimetype2ext(type_split[0])
2131                         if kind in ('audio', 'video'):
2132                             codecs = None
2133                             for mobj in re.finditer(
2134                                     r'(?P<key>[a-zA-Z_-]+)=(?P<quote>["\']?)(?P<val>.+?)(?P=quote)(?:;|$)', type_):
2135                                 if mobj.group('key') == 'codecs':
2136                                     codecs = mobj.group('val')
2137                                     break
2138                             if codecs:
2139                                 dct.update(parse_codecs(codecs))
2140                 if dct.get('acodec') == 'none' or dct.get('vcodec') == 'none':
2141                     dct['downloader_options'] = {
2142                         # Youtube throttles chunks >~10M
2143                         'http_chunk_size': 10485760,
2144                     }
2145                 formats.append(dct)
2146         else:
2147             manifest_url = (
2148                 url_or_none(try_get(
2149                     player_response,
2150                     lambda x: x['streamingData']['hlsManifestUrl'],
2151                     compat_str))
2152                 or url_or_none(try_get(
2153                     video_info, lambda x: x['hlsvp'][0], compat_str)))
2154             if manifest_url:
2155                 formats = []
2156                 m3u8_formats = self._extract_m3u8_formats(
2157                     manifest_url, video_id, 'mp4', fatal=False)
2158                 for a_format in m3u8_formats:
2159                     itag = self._search_regex(
2160                         r'/itag/(\d+)/', a_format['url'], 'itag', default=None)
2161                     if itag:
2162                         a_format['format_id'] = itag
2163                         if itag in self._formats:
2164                             dct = self._formats[itag].copy()
2165                             dct.update(a_format)
2166                             a_format = dct
2167                     a_format['player_url'] = player_url
2168                     # Accept-Encoding header causes failures in live streams on Youtube and Youtube Gaming
2169                     a_format.setdefault('http_headers', {})['Youtubedl-no-compression'] = 'True'
2170                     if self._downloader.params.get('youtube_include_hls_manifest', True):
2171                         formats.append(a_format)
2172             else:
2173                 error_message = extract_unavailable_message()
2174                 if not error_message:
2175                     reason_list = try_get(
2176                         player_response,
2177                         lambda x: x['playabilityStatus']['errorScreen']['playerErrorMessageRenderer']['subreason']['runs'],
2178                         list) or []
2179                     for reason in reason_list:
2180                         if not isinstance(reason, dict):
2181                             continue
2182                         reason_text = try_get(reason, lambda x: x['text'], compat_str)
2183                         if reason_text:
2184                             if not error_message:
2185                                 error_message = ''
2186                             error_message += reason_text
2187                     if error_message:
2188                         error_message = clean_html(error_message)
2189                 if not error_message:
2190                     error_message = clean_html(try_get(
2191                         player_response, lambda x: x['playabilityStatus']['reason'],
2192                         compat_str))
2193                 if not error_message:
2194                     error_message = clean_html(
2195                         try_get(video_info, lambda x: x['reason'][0], compat_str))
2196                 if error_message:
2197                     raise ExtractorError(error_message, expected=True)
2198                 raise ExtractorError('no conn, hlsvp, hlsManifestUrl or url_encoded_fmt_stream_map information found in video info')
2199
2200         # uploader
2201         video_uploader = try_get(
2202             video_info, lambda x: x['author'][0],
2203             compat_str) or str_or_none(video_details.get('author'))
2204         if video_uploader:
2205             video_uploader = compat_urllib_parse_unquote_plus(video_uploader)
2206         else:
2207             self._downloader.report_warning('unable to extract uploader name')
2208
2209         # uploader_id
2210         video_uploader_id = None
2211         video_uploader_url = None
2212         mobj = re.search(
2213             r'<link itemprop="url" href="(?P<uploader_url>https?://www\.youtube\.com/(?:user|channel)/(?P<uploader_id>[^"]+))">',
2214             video_webpage)
2215         if mobj is not None:
2216             video_uploader_id = mobj.group('uploader_id')
2217             video_uploader_url = mobj.group('uploader_url')
2218         else:
2219             owner_profile_url = url_or_none(microformat.get('ownerProfileUrl'))
2220             if owner_profile_url:
2221                 video_uploader_id = self._search_regex(
2222                     r'(?:user|channel)/([^/]+)', owner_profile_url, 'uploader id',
2223                     default=None)
2224                 video_uploader_url = owner_profile_url
2225
2226         channel_id = (
2227             str_or_none(video_details.get('channelId'))
2228             or self._html_search_meta(
2229                 'channelId', video_webpage, 'channel id', default=None)
2230             or self._search_regex(
2231                 r'data-channel-external-id=(["\'])(?P<id>(?:(?!\1).)+)\1',
2232                 video_webpage, 'channel id', default=None, group='id'))
2233         channel_url = 'http://www.youtube.com/channel/%s' % channel_id if channel_id else None
2234
2235         thumbnails = []
2236         thumbnails_list = try_get(
2237             video_details, lambda x: x['thumbnail']['thumbnails'], list) or []
2238         for t in thumbnails_list:
2239             if not isinstance(t, dict):
2240                 continue
2241             thumbnail_url = url_or_none(t.get('url'))
2242             if not thumbnail_url:
2243                 continue
2244             thumbnails.append({
2245                 'url': thumbnail_url,
2246                 'width': int_or_none(t.get('width')),
2247                 'height': int_or_none(t.get('height')),
2248             })
2249
2250         if not thumbnails:
2251             video_thumbnail = None
2252             # We try first to get a high quality image:
2253             m_thumb = re.search(r'<span itemprop="thumbnail".*?href="(.*?)">',
2254                                 video_webpage, re.DOTALL)
2255             if m_thumb is not None:
2256                 video_thumbnail = m_thumb.group(1)
2257             thumbnail_url = try_get(video_info, lambda x: x['thumbnail_url'][0], compat_str)
2258             if thumbnail_url:
2259                 video_thumbnail = compat_urllib_parse_unquote_plus(thumbnail_url)
2260             if video_thumbnail:
2261                 thumbnails.append({'url': video_thumbnail})
2262
2263         # upload date
2264         upload_date = self._html_search_meta(
2265             'datePublished', video_webpage, 'upload date', default=None)
2266         if not upload_date:
2267             upload_date = self._search_regex(
2268                 [r'(?s)id="eow-date.*?>(.*?)</span>',
2269                  r'(?:id="watch-uploader-info".*?>.*?|["\']simpleText["\']\s*:\s*["\'])(?:Published|Uploaded|Streamed live|Started) on (.+?)[<"\']'],
2270                 video_webpage, 'upload date', default=None)
2271         if not upload_date:
2272             upload_date = microformat.get('publishDate') or microformat.get('uploadDate')
2273         upload_date = unified_strdate(upload_date)
2274
2275         video_license = self._html_search_regex(
2276             r'<h4[^>]+class="title"[^>]*>\s*License\s*</h4>\s*<ul[^>]*>\s*<li>(.+?)</li',
2277             video_webpage, 'license', default=None)
2278
2279         m_music = re.search(
2280             r'''(?x)
2281                 <h4[^>]+class="title"[^>]*>\s*Music\s*</h4>\s*
2282                 <ul[^>]*>\s*
2283                 <li>(?P<title>.+?)
2284                 by (?P<creator>.+?)
2285                 (?:
2286                     \(.+?\)|
2287                     <a[^>]*
2288                         (?:
2289                             \bhref=["\']/red[^>]*>|             # drop possible
2290                             >\s*Listen ad-free with YouTube Red # YouTube Red ad
2291                         )
2292                     .*?
2293                 )?</li
2294             ''',
2295             video_webpage)
2296         if m_music:
2297             video_alt_title = remove_quotes(unescapeHTML(m_music.group('title')))
2298             video_creator = clean_html(m_music.group('creator'))
2299         else:
2300             video_alt_title = video_creator = None
2301
2302         def extract_meta(field):
2303             return self._html_search_regex(
2304                 r'<h4[^>]+class="title"[^>]*>\s*%s\s*</h4>\s*<ul[^>]*>\s*<li>(.+?)</li>\s*' % field,
2305                 video_webpage, field, default=None)
2306
2307         track = extract_meta('Song')
2308         artist = extract_meta('Artist')
2309         album = extract_meta('Album')
2310
2311         # Youtube Music Auto-generated description
2312         release_date = release_year = None
2313         if video_description:
2314             mobj = re.search(r'(?s)(?P<track>[^·\n]+)·(?P<artist>[^\n]+)\n+(?P<album>[^\n]+)(?:.+?℗\s*(?P<release_year>\d{4})(?!\d))?(?:.+?Released on\s*:\s*(?P<release_date>\d{4}-\d{2}-\d{2}))?(.+?\nArtist\s*:\s*(?P<clean_artist>[^\n]+))?.+\nAuto-generated by YouTube\.\s*$', video_description)
2315             if mobj:
2316                 if not track:
2317                     track = mobj.group('track').strip()
2318                 if not artist:
2319                     artist = mobj.group('clean_artist') or ', '.join(a.strip() for a in mobj.group('artist').split('·'))
2320                 if not album:
2321                     album = mobj.group('album'.strip())
2322                 release_year = mobj.group('release_year')
2323                 release_date = mobj.group('release_date')
2324                 if release_date:
2325                     release_date = release_date.replace('-', '')
2326                     if not release_year:
2327                         release_year = int(release_date[:4])
2328                 if release_year:
2329                     release_year = int(release_year)
2330
2331         yt_initial_data = self._extract_yt_initial_data(video_id, video_webpage)
2332         contents = try_get(yt_initial_data, lambda x: x['contents']['twoColumnWatchNextResults']['results']['results']['contents'], list) or []
2333         for content in contents:
2334             rows = try_get(content, lambda x: x['videoSecondaryInfoRenderer']['metadataRowContainer']['metadataRowContainerRenderer']['rows'], list) or []
2335             multiple_songs = False
2336             for row in rows:
2337                 if try_get(row, lambda x: x['metadataRowRenderer']['hasDividerLine']) is True:
2338                     multiple_songs = True
2339                     break
2340             for row in rows:
2341                 mrr = row.get('metadataRowRenderer') or {}
2342                 mrr_title = try_get(
2343                     mrr, lambda x: x['title']['simpleText'], compat_str)
2344                 mrr_contents = try_get(
2345                     mrr, lambda x: x['contents'][0], dict) or {}
2346                 mrr_contents_text = try_get(mrr_contents, [lambda x: x['simpleText'], lambda x: x['runs'][0]['text']], compat_str)
2347                 if not (mrr_title and mrr_contents_text):
2348                     continue
2349                 if mrr_title == 'License':
2350                     video_license = mrr_contents_text
2351                 elif not multiple_songs:
2352                     if mrr_title == 'Album':
2353                         album = mrr_contents_text
2354                     elif mrr_title == 'Artist':
2355                         artist = mrr_contents_text
2356                     elif mrr_title == 'Song':
2357                         track = mrr_contents_text
2358
2359         m_episode = re.search(
2360             r'<div[^>]+id="watch7-headline"[^>]*>\s*<span[^>]*>.*?>(?P<series>[^<]+)</a></b>\s*S(?P<season>\d+)\s*•\s*E(?P<episode>\d+)</span>',
2361             video_webpage)
2362         if m_episode:
2363             series = unescapeHTML(m_episode.group('series'))
2364             season_number = int(m_episode.group('season'))
2365             episode_number = int(m_episode.group('episode'))
2366         else:
2367             series = season_number = episode_number = None
2368
2369         m_cat_container = self._search_regex(
2370             r'(?s)<h4[^>]*>\s*Category\s*</h4>\s*<ul[^>]*>(.*?)</ul>',
2371             video_webpage, 'categories', default=None)
2372         category = None
2373         if m_cat_container:
2374             category = self._html_search_regex(
2375                 r'(?s)<a[^<]+>(.*?)</a>', m_cat_container, 'category',
2376                 default=None)
2377         if not category:
2378             category = try_get(
2379                 microformat, lambda x: x['category'], compat_str)
2380         video_categories = None if category is None else [category]
2381
2382         video_tags = [
2383             unescapeHTML(m.group('content'))
2384             for m in re.finditer(self._meta_regex('og:video:tag'), video_webpage)]
2385         if not video_tags:
2386             video_tags = try_get(video_details, lambda x: x['keywords'], list)
2387
2388         def _extract_count(count_name):
2389             return str_to_int(self._search_regex(
2390                 (r'-%s-button[^>]+><span[^>]+class="yt-uix-button-content"[^>]*>([\d,]+)</span>' % re.escape(count_name),
2391                  r'["\']label["\']\s*:\s*["\']([\d,.]+)\s+%ss["\']' % re.escape(count_name)),
2392                 video_webpage, count_name, default=None))
2393
2394         like_count = _extract_count('like')
2395         dislike_count = _extract_count('dislike')
2396
2397         if view_count is None:
2398             view_count = str_to_int(self._search_regex(
2399                 r'<[^>]+class=["\']watch-view-count[^>]+>\s*([\d,\s]+)', video_webpage,
2400                 'view count', default=None))
2401
2402         average_rating = (
2403             float_or_none(video_details.get('averageRating'))
2404             or try_get(video_info, lambda x: float_or_none(x['avg_rating'][0])))
2405
2406         # subtitles
2407         video_subtitles = self.extract_subtitles(
2408             video_id, video_webpage, has_live_chat_replay)
2409         automatic_captions = self.extract_automatic_captions(video_id, player_response, ytplayer_config)
2410
2411         video_duration = try_get(
2412             video_info, lambda x: int_or_none(x['length_seconds'][0]))
2413         if not video_duration:
2414             video_duration = int_or_none(video_details.get('lengthSeconds'))
2415         if not video_duration:
2416             video_duration = parse_duration(self._html_search_meta(
2417                 'duration', video_webpage, 'video duration'))
2418
2419         # Get Subscriber Count of channel
2420         subscriber_count = parse_count(self._search_regex(
2421             r'"text":"([\d\.]+\w?) subscribers"',
2422             video_webpage,
2423             'subscriber count',
2424             default=None
2425         ))
2426
2427         # get xsrf for annotations or comments
2428         get_annotations = self._downloader.params.get('writeannotations', False)
2429         get_comments = self._downloader.params.get('getcomments', False)
2430         if get_annotations or get_comments:
2431             xsrf_token = None
2432             ytcfg = self._extract_ytcfg(video_id, video_webpage)
2433             if ytcfg:
2434                 xsrf_token = try_get(ytcfg, lambda x: x['XSRF_TOKEN'], compat_str)
2435             if not xsrf_token:
2436                 xsrf_token = self._search_regex(
2437                     r'([\'"])XSRF_TOKEN\1\s*:\s*([\'"])(?P<xsrf_token>(?:(?!\2).)+)\2',
2438                     video_webpage, 'xsrf token', group='xsrf_token', fatal=False)
2439
2440         # annotations
2441         video_annotations = None
2442         if get_annotations:
2443             invideo_url = try_get(
2444                 player_response, lambda x: x['annotations'][0]['playerAnnotationsUrlsRenderer']['invideoUrl'], compat_str)
2445             if xsrf_token and invideo_url:
2446                 xsrf_field_name = None
2447                 if ytcfg:
2448                     xsrf_field_name = try_get(ytcfg, lambda x: x['XSRF_FIELD_NAME'], compat_str)
2449                 if not xsrf_field_name:
2450                     xsrf_field_name = self._search_regex(
2451                         r'([\'"])XSRF_FIELD_NAME\1\s*:\s*([\'"])(?P<xsrf_field_name>\w+)\2',
2452                         video_webpage, 'xsrf field name',
2453                         group='xsrf_field_name', default='session_token')
2454                 video_annotations = self._download_webpage(
2455                     self._proto_relative_url(invideo_url),
2456                     video_id, note='Downloading annotations',
2457                     errnote='Unable to download video annotations', fatal=False,
2458                     data=urlencode_postdata({xsrf_field_name: xsrf_token}))
2459
2460         chapters = self._extract_chapters(video_webpage, description_original, video_id, video_duration)
2461
2462         # Get comments
2463         # TODO: Refactor and move to seperate function
2464         if get_comments:
2465             expected_video_comment_count = 0
2466             video_comments = []
2467
2468             def find_value(html, key, num_chars=2, separator='"'):
2469                 pos_begin = html.find(key) + len(key) + num_chars
2470                 pos_end = html.find(separator, pos_begin)
2471                 return html[pos_begin: pos_end]
2472
2473             def search_dict(partial, key):
2474                 if isinstance(partial, dict):
2475                     for k, v in partial.items():
2476                         if k == key:
2477                             yield v
2478                         else:
2479                             for o in search_dict(v, key):
2480                                 yield o
2481                 elif isinstance(partial, list):
2482                     for i in partial:
2483                         for o in search_dict(i, key):
2484                             yield o
2485
2486             try:
2487                 ncd = next(search_dict(yt_initial_data, 'nextContinuationData'))
2488                 continuations = [ncd['continuation']]
2489             # Handle videos where comments have been disabled entirely
2490             except StopIteration:
2491                 continuations = []
2492
2493             def get_continuation(continuation, session_token, replies=False):
2494                 query = {
2495                         'pbj': 1,
2496                         'ctoken': continuation,
2497                 }
2498                 if replies:
2499                     query['action_get_comment_replies'] = 1
2500                 else:
2501                     query['action_get_comments'] = 1
2502
2503                 while True:
2504                     content, handle = self._download_webpage_handle(
2505                         'https://www.youtube.com/comment_service_ajax',
2506                         video_id,
2507                         note=False,
2508                         expected_status=[413],
2509                         data=urlencode_postdata({
2510                             'session_token': session_token
2511                         }),
2512                         query=query,
2513                         headers={
2514                             'Accept': '*/*',
2515                             'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:76.0) Gecko/20100101 Firefox/76.0',
2516                             'X-YouTube-Client-Name': '1',
2517                             'X-YouTube-Client-Version': '2.20201202.06.01'
2518                         }
2519                     )
2520
2521                     response_code = handle.getcode()
2522                     if (response_code == 200):
2523                         return self._parse_json(content, video_id)
2524                     if (response_code == 413):
2525                         return None
2526                     raise ExtractorError('Unexpected HTTP error code: %s' % response_code)
2527
2528             first_continuation = True
2529             while continuations:
2530                 continuation, itct = continuations.pop()
2531                 comment_response = get_continuation(continuation, xsrf_token)
2532                 if not comment_response:
2533                     continue
2534                 if list(search_dict(comment_response, 'externalErrorMessage')):
2535                     raise ExtractorError('Error returned from server: ' + next(search_dict(comment_response, 'externalErrorMessage')))
2536
2537                 if 'continuationContents' not in comment_response['response']:
2538                     # Something is wrong here. Youtube won't accept this continuation token for some reason and responds with a user satisfaction dialog (error?)
2539                     continue
2540                 # not sure if this actually helps
2541                 if 'xsrf_token' in comment_response:
2542                     xsrf_token = comment_response['xsrf_token']
2543
2544                 item_section = comment_response['response']['continuationContents']['itemSectionContinuation']
2545                 if first_continuation:
2546                     expected_video_comment_count = int(item_section['header']['commentsHeaderRenderer']['countText']['runs'][0]['text'].replace(' Comments', '').replace('1 Comment', '1').replace(',', ''))
2547                     first_continuation = False
2548                 if 'contents' not in item_section:
2549                     # continuation returned no comments?
2550                     # set an empty array as to not break the for loop
2551                     item_section['contents'] = []
2552
2553                 for meta_comment in item_section['contents']:
2554                     comment = meta_comment['commentThreadRenderer']['comment']['commentRenderer']
2555                     video_comments.append({
2556                         'id': comment['commentId'],
2557                         'text': ''.join([c['text'] for c in comment['contentText']['runs']]),
2558                         'time_text': ''.join([c['text'] for c in comment['publishedTimeText']['runs']]),
2559                         'author': comment.get('authorText', {}).get('simpleText', ''),
2560                         'votes': comment.get('voteCount', {}).get('simpleText', '0'),
2561                         'author_thumbnail': comment['authorThumbnail']['thumbnails'][-1]['url'],
2562                         'parent': 'root'
2563                     })
2564                     if 'replies' not in meta_comment['commentThreadRenderer']:
2565                         continue
2566
2567                     reply_continuations = [rcn['nextContinuationData']['continuation'] for rcn in meta_comment['commentThreadRenderer']['replies']['commentRepliesRenderer']['continuations']]
2568                     while reply_continuations:
2569                         time.sleep(1)
2570                         continuation = reply_continuations.pop()
2571                         replies_data = get_continuation(continuation, xsrf_token, True)
2572                         if not replies_data or 'continuationContents' not in replies_data[1]['response']:
2573                             continue
2574
2575                         if self._downloader.params.get('verbose', False):
2576                             self.to_screen('[debug] Comments downloaded (chain %s) %s of ~%s' % (comment['commentId'], len(video_comments), expected_video_comment_count))
2577                         reply_comment_meta = replies_data[1]['response']['continuationContents']['commentRepliesContinuation']
2578                         for reply_meta in replies_data[1]['response']['continuationContents']['commentRepliesContinuation']['contents']:
2579                             reply_comment = reply_meta['commentRenderer']
2580                             video_comments.append({
2581                                 'id': reply_comment['commentId'],
2582                                 'text': ''.join([c['text'] for c in reply_comment['contentText']['runs']]),
2583                                 'time_text': ''.join([c['text'] for c in reply_comment['publishedTimeText']['runs']]),
2584                                 'author': reply_comment.get('authorText', {}).get('simpleText', ''),
2585                                 'votes': reply_comment.get('voteCount', {}).get('simpleText', '0'),
2586                                 'author_thumbnail': reply_comment['authorThumbnail']['thumbnails'][-1]['url'],
2587                                 'parent': comment['commentId']
2588                             })
2589                         if 'continuations' not in reply_comment_meta or len(reply_comment_meta['continuations']) == 0:
2590                             continue
2591
2592                         reply_continuations += [rcn['nextContinuationData']['continuation'] for rcn in reply_comment_meta['continuations']]
2593
2594                 self.to_screen('Comments downloaded %s of ~%s' % (len(video_comments), expected_video_comment_count))
2595
2596                 if 'continuations' in item_section:
2597                     continuations += [ncd['nextContinuationData']['continuation'] for ncd in item_section['continuations']]
2598                 time.sleep(1)
2599
2600             self.to_screen('Total comments downloaded %s of ~%s' % (len(video_comments), expected_video_comment_count))
2601         else:
2602             expected_video_comment_count = None
2603             video_comments = None
2604
2605         # Look for the DASH manifest
2606         if self._downloader.params.get('youtube_include_dash_manifest', True):
2607             dash_mpd_fatal = True
2608             for mpd_url in dash_mpds:
2609                 dash_formats = {}
2610                 try:
2611                     def decrypt_sig(mobj):
2612                         s = mobj.group(1)
2613                         dec_s = self._decrypt_signature(s, video_id, player_url, age_gate)
2614                         return '/signature/%s' % dec_s
2615
2616                     mpd_url = re.sub(r'/s/([a-fA-F0-9\.]+)', decrypt_sig, mpd_url)
2617
2618                     for df in self._extract_mpd_formats(
2619                             mpd_url, video_id, fatal=dash_mpd_fatal,
2620                             formats_dict=self._formats):
2621                         if not df.get('filesize'):
2622                             df['filesize'] = _extract_filesize(df['url'])
2623                         # Do not overwrite DASH format found in some previous DASH manifest
2624                         if df['format_id'] not in dash_formats:
2625                             dash_formats[df['format_id']] = df
2626                         # Additional DASH manifests may end up in HTTP Error 403 therefore
2627                         # allow them to fail without bug report message if we already have
2628                         # some DASH manifest succeeded. This is temporary workaround to reduce
2629                         # burst of bug reports until we figure out the reason and whether it
2630                         # can be fixed at all.
2631                         dash_mpd_fatal = False
2632                 except (ExtractorError, KeyError) as e:
2633                     self.report_warning(
2634                         'Skipping DASH manifest: %r' % e, video_id)
2635                 if dash_formats:
2636                     # Remove the formats we found through non-DASH, they
2637                     # contain less info and it can be wrong, because we use
2638                     # fixed values (for example the resolution). See
2639                     # https://github.com/ytdl-org/youtube-dl/issues/5774 for an
2640                     # example.
2641                     formats = [f for f in formats if f['format_id'] not in dash_formats.keys()]
2642                     formats.extend(dash_formats.values())
2643
2644         # Check for malformed aspect ratio
2645         stretched_m = re.search(
2646             r'<meta\s+property="og:video:tag".*?content="yt:stretch=(?P<w>[0-9]+):(?P<h>[0-9]+)">',
2647             video_webpage)
2648         if stretched_m:
2649             w = float(stretched_m.group('w'))
2650             h = float(stretched_m.group('h'))
2651             # yt:stretch may hold invalid ratio data (e.g. for Q39EVAstoRM ratio is 17:0).
2652             # We will only process correct ratios.
2653             if w > 0 and h > 0:
2654                 ratio = w / h
2655                 for f in formats:
2656                     if f.get('vcodec') != 'none':
2657                         f['stretched_ratio'] = ratio
2658
2659         if not formats:
2660             if 'reason' in video_info:
2661                 if 'The uploader has not made this video available in your country.' in video_info['reason']:
2662                     regions_allowed = self._html_search_meta(
2663                         'regionsAllowed', video_webpage, default=None)
2664                     countries = regions_allowed.split(',') if regions_allowed else None
2665                     self.raise_geo_restricted(
2666                         msg=video_info['reason'][0], countries=countries)
2667                 reason = video_info['reason'][0]
2668                 if 'Invalid parameters' in reason:
2669                     unavailable_message = extract_unavailable_message()
2670                     if unavailable_message:
2671                         reason = unavailable_message
2672                 raise ExtractorError(
2673                     'YouTube said: %s' % reason,
2674                     expected=True, video_id=video_id)
2675             if video_info.get('license_info') or try_get(player_response, lambda x: x['streamingData']['licenseInfos']):
2676                 raise ExtractorError('This video is DRM protected.', expected=True)
2677
2678         self._sort_formats(formats)
2679
2680         self.mark_watched(video_id, video_info, player_response)
2681
2682         return {
2683             'id': video_id,
2684             'uploader': video_uploader,
2685             'uploader_id': video_uploader_id,
2686             'uploader_url': video_uploader_url,
2687             'channel_id': channel_id,
2688             'channel_url': channel_url,
2689             'upload_date': upload_date,
2690             'license': video_license,
2691             'creator': video_creator or artist,
2692             'title': video_title,
2693             'alt_title': video_alt_title or track,
2694             'thumbnails': thumbnails,
2695             'description': video_description,
2696             'categories': video_categories,
2697             'tags': video_tags,
2698             'subtitles': video_subtitles,
2699             'automatic_captions': automatic_captions,
2700             'duration': video_duration,
2701             'age_limit': 18 if age_gate else 0,
2702             'annotations': video_annotations,
2703             'chapters': chapters,
2704             'webpage_url': proto + '://www.youtube.com/watch?v=%s' % video_id,
2705             'view_count': view_count,
2706             'like_count': like_count,
2707             'dislike_count': dislike_count,
2708             'average_rating': average_rating,
2709             'formats': formats,
2710             'is_live': is_live,
2711             'start_time': start_time,
2712             'end_time': end_time,
2713             'series': series,
2714             'season_number': season_number,
2715             'episode_number': episode_number,
2716             'track': track,
2717             'artist': artist,
2718             'album': album,
2719             'release_date': release_date,
2720             'release_year': release_year,
2721             'subscriber_count': subscriber_count,
2722             'playable_in_embed': playable_in_embed,
2723             'comments': video_comments,
2724             'comment_count': expected_video_comment_count,
2725         }
2726
2727
2728 class YoutubeTabIE(YoutubeBaseInfoExtractor):
2729     IE_DESC = 'YouTube.com tab'
2730     _VALID_URL = r'''(?x)
2731                     https?://
2732                         (?:\w+\.)?
2733                         (?:
2734                             youtube(?:kids)?\.com|
2735                             invidio\.us
2736                         )/
2737                         (?:
2738                             (?:channel|c|user)/|
2739                             (?P<not_channel>
2740                                 feed/|
2741                                 (?:playlist|watch)\?.*?\blist=
2742                             )|
2743                             (?!(?:%s)\b)  # Direct URLs
2744                         )
2745                         (?P<id>[^/?\#&]+)
2746                     ''' % YoutubeBaseInfoExtractor._RESERVED_NAMES
2747     IE_NAME = 'youtube:tab'
2748
2749     _TESTS = [{
2750         # playlists, multipage
2751         'url': 'https://www.youtube.com/c/ИгорьКлейнер/playlists?view=1&flow=grid',
2752         'playlist_mincount': 94,
2753         'info_dict': {
2754             'id': 'UCqj7Cz7revf5maW9g5pgNcg',
2755             'title': 'Игорь Клейнер - Playlists',
2756             'description': 'md5:be97ee0f14ee314f1f002cf187166ee2',
2757         },
2758     }, {
2759         # playlists, multipage, different order
2760         'url': 'https://www.youtube.com/user/igorkle1/playlists?view=1&sort=dd',
2761         'playlist_mincount': 94,
2762         'info_dict': {
2763             'id': 'UCqj7Cz7revf5maW9g5pgNcg',
2764             'title': 'Игорь Клейнер - Playlists',
2765             'description': 'md5:be97ee0f14ee314f1f002cf187166ee2',
2766         },
2767     }, {
2768         # playlists, singlepage
2769         'url': 'https://www.youtube.com/user/ThirstForScience/playlists',
2770         'playlist_mincount': 4,
2771         'info_dict': {
2772             'id': 'UCAEtajcuhQ6an9WEzY9LEMQ',
2773             'title': 'ThirstForScience - Playlists',
2774             'description': 'md5:609399d937ea957b0f53cbffb747a14c',
2775         }
2776     }, {
2777         'url': 'https://www.youtube.com/c/ChristophLaimer/playlists',
2778         'only_matching': True,
2779     }, {
2780         # basic, single video playlist
2781         'url': 'https://www.youtube.com/playlist?list=PL4lCao7KL_QFVb7Iudeipvc2BCavECqzc',
2782         'info_dict': {
2783             'uploader_id': 'UCmlqkdCBesrv2Lak1mF_MxA',
2784             'uploader': 'Sergey M.',
2785             'id': 'PL4lCao7KL_QFVb7Iudeipvc2BCavECqzc',
2786             'title': 'youtube-dl public playlist',
2787         },
2788         'playlist_count': 1,
2789     }, {
2790         # empty playlist
2791         'url': 'https://www.youtube.com/playlist?list=PL4lCao7KL_QFodcLWhDpGCYnngnHtQ-Xf',
2792         'info_dict': {
2793             'uploader_id': 'UCmlqkdCBesrv2Lak1mF_MxA',
2794             'uploader': 'Sergey M.',
2795             'id': 'PL4lCao7KL_QFodcLWhDpGCYnngnHtQ-Xf',
2796             'title': 'youtube-dl empty playlist',
2797         },
2798         'playlist_count': 0,
2799     }, {
2800         # Home tab
2801         'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/featured',
2802         'info_dict': {
2803             'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
2804             'title': 'lex will - Home',
2805             'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488',
2806         },
2807         'playlist_mincount': 2,
2808     }, {
2809         # Videos tab
2810         'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/videos',
2811         'info_dict': {
2812             'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
2813             'title': 'lex will - Videos',
2814             'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488',
2815         },
2816         'playlist_mincount': 975,
2817     }, {
2818         # Videos tab, sorted by popular
2819         'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/videos?view=0&sort=p&flow=grid',
2820         'info_dict': {
2821             'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
2822             'title': 'lex will - Videos',
2823             'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488',
2824         },
2825         'playlist_mincount': 199,
2826     }, {
2827         # Playlists tab
2828         'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/playlists',
2829         'info_dict': {
2830             'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
2831             'title': 'lex will - Playlists',
2832             'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488',
2833         },
2834         'playlist_mincount': 17,
2835     }, {
2836         # Community tab
2837         'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/community',
2838         'info_dict': {
2839             'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
2840             'title': 'lex will - Community',
2841             'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488',
2842         },
2843         'playlist_mincount': 18,
2844     }, {
2845         # Channels tab
2846         'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/channels',
2847         'info_dict': {
2848             'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
2849             'title': 'lex will - Channels',
2850             'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488',
2851         },
2852         'playlist_mincount': 138,
2853     }, {
2854         'url': 'https://invidio.us/channel/UCmlqkdCBesrv2Lak1mF_MxA',
2855         'only_matching': True,
2856     }, {
2857         'url': 'https://www.youtubekids.com/channel/UCmlqkdCBesrv2Lak1mF_MxA',
2858         'only_matching': True,
2859     }, {
2860         'url': 'https://music.youtube.com/channel/UCmlqkdCBesrv2Lak1mF_MxA',
2861         'only_matching': True,
2862     }, {
2863         'note': 'Playlist with deleted videos (#651). As a bonus, the video #51 is also twice in this list.',
2864         'url': 'https://www.youtube.com/playlist?list=PLwP_SiAcdui0KVebT0mU9Apz359a4ubsC',
2865         'info_dict': {
2866             'title': '29C3: Not my department',
2867             'id': 'PLwP_SiAcdui0KVebT0mU9Apz359a4ubsC',
2868             'uploader': 'Christiaan008',
2869             'uploader_id': 'UCEPzS1rYsrkqzSLNp76nrcg',
2870         },
2871         'playlist_count': 96,
2872     }, {
2873         'note': 'Large playlist',
2874         'url': 'https://www.youtube.com/playlist?list=UUBABnxM4Ar9ten8Mdjj1j0Q',
2875         'info_dict': {
2876             'title': 'Uploads from Cauchemar',
2877             'id': 'UUBABnxM4Ar9ten8Mdjj1j0Q',
2878             'uploader': 'Cauchemar',
2879             'uploader_id': 'UCBABnxM4Ar9ten8Mdjj1j0Q',
2880         },
2881         'playlist_mincount': 1123,
2882     }, {
2883         # even larger playlist, 8832 videos
2884         'url': 'http://www.youtube.com/user/NASAgovVideo/videos',
2885         'only_matching': True,
2886     }, {
2887         'note': 'Buggy playlist: the webpage has a "Load more" button but it doesn\'t have more videos',
2888         'url': 'https://www.youtube.com/playlist?list=UUXw-G3eDE9trcvY2sBMM_aA',
2889         'info_dict': {
2890             'title': 'Uploads from Interstellar Movie',
2891             'id': 'UUXw-G3eDE9trcvY2sBMM_aA',
2892             'uploader': 'Interstellar Movie',
2893             'uploader_id': 'UCXw-G3eDE9trcvY2sBMM_aA',
2894         },
2895         'playlist_mincount': 21,
2896     }, {
2897         # https://github.com/ytdl-org/youtube-dl/issues/21844
2898         'url': 'https://www.youtube.com/playlist?list=PLzH6n4zXuckpfMu_4Ff8E7Z1behQks5ba',
2899         'info_dict': {
2900             'title': 'Data Analysis with Dr Mike Pound',
2901             'id': 'PLzH6n4zXuckpfMu_4Ff8E7Z1behQks5ba',
2902             'uploader_id': 'UC9-y-6csu5WGm29I7JiwpnA',
2903             'uploader': 'Computerphile',
2904         },
2905         'playlist_mincount': 11,
2906     }, {
2907         'url': 'https://invidio.us/playlist?list=PL4lCao7KL_QFVb7Iudeipvc2BCavECqzc',
2908         'only_matching': True,
2909     }, {
2910         # Playlist URL that does not actually serve a playlist
2911         'url': 'https://www.youtube.com/watch?v=FqZTN594JQw&list=PLMYEtVRpaqY00V9W81Cwmzp6N6vZqfUKD4',
2912         'info_dict': {
2913             'id': 'FqZTN594JQw',
2914             'ext': 'webm',
2915             'title': "Smiley's People 01 detective, Adventure Series, Action",
2916             'uploader': 'STREEM',
2917             'uploader_id': 'UCyPhqAZgwYWZfxElWVbVJng',
2918             'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UCyPhqAZgwYWZfxElWVbVJng',
2919             'upload_date': '20150526',
2920             'license': 'Standard YouTube License',
2921             'description': 'md5:507cdcb5a49ac0da37a920ece610be80',
2922             'categories': ['People & Blogs'],
2923             'tags': list,
2924             'view_count': int,
2925             'like_count': int,
2926             'dislike_count': int,
2927         },
2928         'params': {
2929             'skip_download': True,
2930         },
2931         'skip': 'This video is not available.',
2932         'add_ie': [YoutubeIE.ie_key()],
2933     }, {
2934         'url': 'https://www.youtubekids.com/watch?v=Agk7R8I8o5U&list=PUZ6jURNr1WQZCNHF0ao-c0g',
2935         'only_matching': True,
2936     }, {
2937         'url': 'https://www.youtube.com/watch?v=MuAGGZNfUkU&list=RDMM',
2938         'only_matching': True,
2939     }, {
2940         'url': 'https://www.youtube.com/channel/UCoMdktPbSTixAyNGwb-UYkQ/live',
2941         'info_dict': {
2942             'id': '9Auq9mYxFEE',
2943             'ext': 'mp4',
2944             'title': 'Watch Sky News live',
2945             'uploader': 'Sky News',
2946             'uploader_id': 'skynews',
2947             'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/skynews',
2948             'upload_date': '20191102',
2949             'description': 'md5:78de4e1c2359d0ea3ed829678e38b662',
2950             'categories': ['News & Politics'],
2951             'tags': list,
2952             'like_count': int,
2953             'dislike_count': int,
2954         },
2955         'params': {
2956             'skip_download': True,
2957         },
2958     }, {
2959         'url': 'https://www.youtube.com/user/TheYoungTurks/live',
2960         'info_dict': {
2961             'id': 'a48o2S1cPoo',
2962             'ext': 'mp4',
2963             'title': 'The Young Turks - Live Main Show',
2964             'uploader': 'The Young Turks',
2965             'uploader_id': 'TheYoungTurks',
2966             'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/TheYoungTurks',
2967             'upload_date': '20150715',
2968             'license': 'Standard YouTube License',
2969             'description': 'md5:438179573adcdff3c97ebb1ee632b891',
2970             'categories': ['News & Politics'],
2971             'tags': ['Cenk Uygur (TV Program Creator)', 'The Young Turks (Award-Winning Work)', 'Talk Show (TV Genre)'],
2972             'like_count': int,
2973             'dislike_count': int,
2974         },
2975         'params': {
2976             'skip_download': True,
2977         },
2978         'only_matching': True,
2979     }, {
2980         'url': 'https://www.youtube.com/channel/UC1yBKRuGpC1tSM73A0ZjYjQ/live',
2981         'only_matching': True,
2982     }, {
2983         'url': 'https://www.youtube.com/c/CommanderVideoHq/live',
2984         'only_matching': True,
2985     }, {
2986         'url': 'https://www.youtube.com/feed/trending',
2987         'only_matching': True,
2988     }, {
2989         # needs auth
2990         'url': 'https://www.youtube.com/feed/library',
2991         'only_matching': True,
2992     }, {
2993         # needs auth
2994         'url': 'https://www.youtube.com/feed/history',
2995         'only_matching': True,
2996     }, {
2997         # needs auth
2998         'url': 'https://www.youtube.com/feed/subscriptions',
2999         'only_matching': True,
3000     }, {
3001         # needs auth
3002         'url': 'https://www.youtube.com/feed/watch_later',
3003         'only_matching': True,
3004     }, {
3005         # no longer available?
3006         'url': 'https://www.youtube.com/feed/recommended',
3007         'only_matching': True,
3008     }, {
3009         # inline playlist with not always working continuations
3010         'url': 'https://www.youtube.com/watch?v=UC6u0Tct-Fo&list=PL36D642111D65BE7C',
3011         'only_matching': True,
3012     }, {
3013         'url': 'https://www.youtube.com/course?list=ECUl4u3cNGP61MdtwGTqZA0MreSaDybji8',
3014         'only_matching': True,
3015     }, {
3016         'url': 'https://www.youtube.com/course',
3017         'only_matching': True,
3018     }, {
3019         'url': 'https://www.youtube.com/zsecurity',
3020         'only_matching': True,
3021     }, {
3022         'url': 'http://www.youtube.com/NASAgovVideo/videos',
3023         'only_matching': True,
3024     }, {
3025         'url': 'https://www.youtube.com/TheYoungTurks/live',
3026         'only_matching': True,
3027     }]
3028
3029     @classmethod
3030     def suitable(cls, url):
3031         return False if YoutubeIE.suitable(url) else super(
3032             YoutubeTabIE, cls).suitable(url)
3033
3034     def _extract_channel_id(self, webpage):
3035         channel_id = self._html_search_meta(
3036             'channelId', webpage, 'channel id', default=None)
3037         if channel_id:
3038             return channel_id
3039         channel_url = self._html_search_meta(
3040             ('og:url', 'al:ios:url', 'al:android:url', 'al:web:url',
3041              'twitter:url', 'twitter:app:url:iphone', 'twitter:app:url:ipad',
3042              'twitter:app:url:googleplay'), webpage, 'channel url')
3043         return self._search_regex(
3044             r'https?://(?:www\.)?youtube\.com/channel/([^/?#&])+',
3045             channel_url, 'channel id')
3046
3047     @staticmethod
3048     def _extract_grid_item_renderer(item):
3049         for item_kind in ('Playlist', 'Video', 'Channel'):
3050             renderer = item.get('grid%sRenderer' % item_kind)
3051             if renderer:
3052                 return renderer
3053
3054     def _grid_entries(self, grid_renderer):
3055         for item in grid_renderer['items']:
3056             if not isinstance(item, dict):
3057                 continue
3058             renderer = self._extract_grid_item_renderer(item)
3059             if not isinstance(renderer, dict):
3060                 continue
3061             title = try_get(
3062                 renderer, lambda x: x['title']['runs'][0]['text'], compat_str)
3063             # playlist
3064             playlist_id = renderer.get('playlistId')
3065             if playlist_id:
3066                 yield self.url_result(
3067                     'https://www.youtube.com/playlist?list=%s' % playlist_id,
3068                     ie=YoutubeTabIE.ie_key(), video_id=playlist_id,
3069                     video_title=title)
3070             # video
3071             video_id = renderer.get('videoId')
3072             if video_id:
3073                 yield self._extract_video(renderer)
3074             # channel
3075             channel_id = renderer.get('channelId')
3076             if channel_id:
3077                 title = try_get(
3078                     renderer, lambda x: x['title']['simpleText'], compat_str)
3079                 yield self.url_result(
3080                     'https://www.youtube.com/channel/%s' % channel_id,
3081                     ie=YoutubeTabIE.ie_key(), video_title=title)
3082
3083     def _shelf_entries_from_content(self, shelf_renderer):
3084         content = shelf_renderer.get('content')
3085         if not isinstance(content, dict):
3086             return
3087         renderer = content.get('gridRenderer')
3088         if renderer:
3089             # TODO: add support for nested playlists so each shelf is processed
3090             # as separate playlist
3091             # TODO: this includes only first N items
3092             for entry in self._grid_entries(renderer):
3093                 yield entry
3094         renderer = content.get('horizontalListRenderer')
3095         if renderer:
3096             # TODO
3097             pass
3098
3099     def _shelf_entries(self, shelf_renderer, skip_channels=False):
3100         ep = try_get(
3101             shelf_renderer, lambda x: x['endpoint']['commandMetadata']['webCommandMetadata']['url'],
3102             compat_str)
3103         shelf_url = urljoin('https://www.youtube.com', ep)
3104         if shelf_url:
3105             # Skipping links to another channels, note that checking for
3106             # endpoint.commandMetadata.webCommandMetadata.webPageTypwebPageType == WEB_PAGE_TYPE_CHANNEL
3107             # will not work
3108             if skip_channels and '/channels?' in shelf_url:
3109                 return
3110             title = try_get(
3111                 shelf_renderer, lambda x: x['title']['runs'][0]['text'], compat_str)
3112             yield self.url_result(shelf_url, video_title=title)
3113         # Shelf may not contain shelf URL, fallback to extraction from content
3114         for entry in self._shelf_entries_from_content(shelf_renderer):
3115             yield entry
3116
3117     def _playlist_entries(self, video_list_renderer):
3118         for content in video_list_renderer['contents']:
3119             if not isinstance(content, dict):
3120                 continue
3121             renderer = content.get('playlistVideoRenderer') or content.get('playlistPanelVideoRenderer')
3122             if not isinstance(renderer, dict):
3123                 continue
3124             video_id = renderer.get('videoId')
3125             if not video_id:
3126                 continue
3127             yield self._extract_video(renderer)
3128
3129     r""" # Not needed in the new implementation
3130     def _itemSection_entries(self, item_sect_renderer):
3131         for content in item_sect_renderer['contents']:
3132             if not isinstance(content, dict):
3133                 continue
3134             renderer = content.get('videoRenderer', {})
3135             if not isinstance(renderer, dict):
3136                 continue
3137             video_id = renderer.get('videoId')
3138             if not video_id:
3139                 continue
3140             yield self._extract_video(renderer)
3141     """
3142
3143     def _rich_entries(self, rich_grid_renderer):
3144         renderer = try_get(
3145             rich_grid_renderer, lambda x: x['content']['videoRenderer'], dict) or {}
3146         video_id = renderer.get('videoId')
3147         if not video_id:
3148             return
3149         yield self._extract_video(renderer)
3150
3151     def _video_entry(self, video_renderer):
3152         video_id = video_renderer.get('videoId')
3153         if video_id:
3154             return self._extract_video(video_renderer)
3155
3156     def _post_thread_entries(self, post_thread_renderer):
3157         post_renderer = try_get(
3158             post_thread_renderer, lambda x: x['post']['backstagePostRenderer'], dict)
3159         if not post_renderer:
3160             return
3161         # video attachment
3162         video_renderer = try_get(
3163             post_renderer, lambda x: x['backstageAttachment']['videoRenderer'], dict)
3164         video_id = None
3165         if video_renderer:
3166             entry = self._video_entry(video_renderer)
3167             if entry:
3168                 yield entry
3169         # inline video links
3170         runs = try_get(post_renderer, lambda x: x['contentText']['runs'], list) or []
3171         for run in runs:
3172             if not isinstance(run, dict):
3173                 continue
3174             ep_url = try_get(
3175                 run, lambda x: x['navigationEndpoint']['urlEndpoint']['url'], compat_str)
3176             if not ep_url:
3177                 continue
3178             if not YoutubeIE.suitable(ep_url):
3179                 continue
3180             ep_video_id = YoutubeIE._match_id(ep_url)
3181             if video_id == ep_video_id:
3182                 continue
3183             yield self.url_result(ep_url, ie=YoutubeIE.ie_key(), video_id=video_id)
3184
3185     def _post_thread_continuation_entries(self, post_thread_continuation):
3186         contents = post_thread_continuation.get('contents')
3187         if not isinstance(contents, list):
3188             return
3189         for content in contents:
3190             renderer = content.get('backstagePostThreadRenderer')
3191             if not isinstance(renderer, dict):
3192                 continue
3193             for entry in self._post_thread_entries(renderer):
3194                 yield entry
3195
3196     @staticmethod
3197     def _build_continuation_query(continuation, ctp=None):
3198         query = {
3199             'ctoken': continuation,
3200             'continuation': continuation,
3201         }
3202         if ctp:
3203             query['itct'] = ctp
3204         return query
3205
3206     @staticmethod
3207     def _extract_next_continuation_data(renderer):
3208         next_continuation = try_get(
3209             renderer, lambda x: x['continuations'][0]['nextContinuationData'], dict)
3210         if not next_continuation:
3211             return
3212         continuation = next_continuation.get('continuation')
3213         if not continuation:
3214             return
3215         ctp = next_continuation.get('clickTrackingParams')
3216         return YoutubeTabIE._build_continuation_query(continuation, ctp)
3217
3218     @classmethod
3219     def _extract_continuation(cls, renderer):
3220         next_continuation = cls._extract_next_continuation_data(renderer)
3221         if next_continuation:
3222             return next_continuation
3223         contents = renderer.get('contents')
3224         if not isinstance(contents, list):
3225             return
3226         for content in contents:
3227             if not isinstance(content, dict):
3228                 continue
3229             continuation_ep = try_get(
3230                 content, lambda x: x['continuationItemRenderer']['continuationEndpoint'],
3231                 dict)
3232             if not continuation_ep:
3233                 continue
3234             continuation = try_get(
3235                 continuation_ep, lambda x: x['continuationCommand']['token'], compat_str)
3236             if not continuation:
3237                 continue
3238             ctp = continuation_ep.get('clickTrackingParams')
3239             return YoutubeTabIE._build_continuation_query(continuation, ctp)
3240
3241     def _entries(self, tab, identity_token):
3242
3243         def extract_entries(parent_renderer):  # this needs to called again for continuation to work with feeds
3244             contents = try_get(parent_renderer, lambda x: x['contents'], list) or []
3245             for content in contents:
3246                 if not isinstance(content, dict):
3247                     continue
3248                 is_renderer = try_get(content, lambda x: x['itemSectionRenderer'], dict)
3249                 if not is_renderer:
3250                     renderer = content.get('richItemRenderer')
3251                     if renderer:
3252                         for entry in self._rich_entries(renderer):
3253                             yield entry
3254                         continuation_list[0] = self._extract_continuation(parent_renderer)
3255                     continue
3256                 isr_contents = try_get(is_renderer, lambda x: x['contents'], list) or []
3257                 for isr_content in isr_contents:
3258                     if not isinstance(isr_content, dict):
3259                         continue
3260                     renderer = isr_content.get('playlistVideoListRenderer')
3261                     if renderer:
3262                         for entry in self._playlist_entries(renderer):
3263                             yield entry
3264                         continuation_list[0] = self._extract_continuation(renderer)
3265                         continue
3266                     renderer = isr_content.get('gridRenderer')
3267                     if renderer:
3268                         for entry in self._grid_entries(renderer):
3269                             yield entry
3270                         continuation_list[0] = self._extract_continuation(renderer)
3271                         continue
3272                     renderer = isr_content.get('shelfRenderer')
3273                     if renderer:
3274                         is_channels_tab = tab.get('title') == 'Channels'
3275                         for entry in self._shelf_entries(renderer, not is_channels_tab):
3276                             yield entry
3277                         continue
3278                     renderer = isr_content.get('backstagePostThreadRenderer')
3279                     if renderer:
3280                         for entry in self._post_thread_entries(renderer):
3281                             yield entry
3282                         continuation_list[0] = self._extract_continuation(renderer)
3283                         continue
3284                     renderer = isr_content.get('videoRenderer')
3285                     if renderer:
3286                         entry = self._video_entry(renderer)
3287                         if entry:
3288                             yield entry
3289
3290                 if not continuation_list[0]:
3291                     continuation_list[0] = self._extract_continuation(is_renderer)
3292
3293             if not continuation_list[0]:
3294                 continuation_list[0] = self._extract_continuation(parent_renderer)
3295
3296         continuation_list = [None]  # Python 2 doesnot support nonlocal
3297         tab_content = try_get(tab, lambda x: x['content'], dict)
3298         if not tab_content:
3299             return
3300         parent_renderer = (
3301             try_get(tab_content, lambda x: x['sectionListRenderer'], dict)
3302             or try_get(tab_content, lambda x: x['richGridRenderer'], dict) or {})
3303         for entry in extract_entries(parent_renderer):
3304             yield entry
3305         continuation = continuation_list[0]
3306
3307         headers = {
3308             'x-youtube-client-name': '1',
3309             'x-youtube-client-version': '2.20201112.04.01',
3310         }
3311         if identity_token:
3312             headers['x-youtube-identity-token'] = identity_token
3313
3314         for page_num in itertools.count(1):
3315             if not continuation:
3316                 break
3317             count = 0
3318             retries = 3
3319             while count <= retries:
3320                 try:
3321                     # Downloading page may result in intermittent 5xx HTTP error
3322                     # that is usually worked around with a retry
3323                     browse = self._download_json(
3324                         'https://www.youtube.com/browse_ajax', None,
3325                         'Downloading page %d%s'
3326                         % (page_num, ' (retry #%d)' % count if count else ''),
3327                         headers=headers, query=continuation)
3328                     break
3329                 except ExtractorError as e:
3330                     if isinstance(e.cause, compat_HTTPError) and e.cause.code in (500, 503):
3331                         count += 1
3332                         if count <= retries:
3333                             continue
3334                     raise
3335             if not browse:
3336                 break
3337             response = try_get(browse, lambda x: x[1]['response'], dict)
3338             if not response:
3339                 break
3340
3341             continuation_contents = try_get(
3342                 response, lambda x: x['continuationContents'], dict)
3343             if continuation_contents:
3344                 continuation_renderer = continuation_contents.get('playlistVideoListContinuation')
3345                 if continuation_renderer:
3346                     for entry in self._playlist_entries(continuation_renderer):
3347                         yield entry
3348                     continuation = self._extract_continuation(continuation_renderer)
3349                     continue
3350                 continuation_renderer = continuation_contents.get('gridContinuation')
3351                 if continuation_renderer:
3352                     for entry in self._grid_entries(continuation_renderer):
3353                         yield entry
3354                     continuation = self._extract_continuation(continuation_renderer)
3355                     continue
3356                 continuation_renderer = continuation_contents.get('itemSectionContinuation')
3357                 if continuation_renderer:
3358                     for entry in self._post_thread_continuation_entries(continuation_renderer):
3359                         yield entry
3360                     continuation = self._extract_continuation(continuation_renderer)
3361                     continue
3362                 continuation_renderer = continuation_contents.get('sectionListContinuation')  # for feeds
3363                 if continuation_renderer:
3364                     continuation_list = [None]
3365                     for entry in extract_entries(continuation_renderer):
3366                         yield entry
3367                     continuation = continuation_list[0]
3368                     continue
3369
3370             continuation_items = try_get(
3371                 response, lambda x: x['onResponseReceivedActions'][0]['appendContinuationItemsAction']['continuationItems'], list)
3372             if continuation_items:
3373                 continuation_item = continuation_items[0]
3374                 if not isinstance(continuation_item, dict):
3375                     continue
3376                 renderer = continuation_item.get('playlistVideoRenderer') or continuation_item.get('itemSectionRenderer')
3377                 if renderer:
3378                     video_list_renderer = {'contents': continuation_items}
3379                     for entry in self._playlist_entries(video_list_renderer):
3380                         yield entry
3381                     continuation = self._extract_continuation(video_list_renderer)
3382                     continue
3383             break
3384
3385     @staticmethod
3386     def _extract_selected_tab(tabs):
3387         for tab in tabs:
3388             if try_get(tab, lambda x: x['tabRenderer']['selected'], bool):
3389                 return tab['tabRenderer']
3390         else:
3391             raise ExtractorError('Unable to find selected tab')
3392
3393     @staticmethod
3394     def _extract_uploader(data):
3395         uploader = {}
3396         sidebar_renderer = try_get(
3397             data, lambda x: x['sidebar']['playlistSidebarRenderer']['items'], list)
3398         if sidebar_renderer:
3399             for item in sidebar_renderer:
3400                 if not isinstance(item, dict):
3401                     continue
3402                 renderer = item.get('playlistSidebarSecondaryInfoRenderer')
3403                 if not isinstance(renderer, dict):
3404                     continue
3405                 owner = try_get(
3406                     renderer, lambda x: x['videoOwner']['videoOwnerRenderer']['title']['runs'][0], dict)
3407                 if owner:
3408                     uploader['uploader'] = owner.get('text')
3409                     uploader['uploader_id'] = try_get(
3410                         owner, lambda x: x['navigationEndpoint']['browseEndpoint']['browseId'], compat_str)
3411                     uploader['uploader_url'] = urljoin(
3412                         'https://www.youtube.com/',
3413                         try_get(owner, lambda x: x['navigationEndpoint']['browseEndpoint']['canonicalBaseUrl'], compat_str))
3414         return uploader
3415
3416     def _extract_from_tabs(self, item_id, webpage, data, tabs, identity_token):
3417         selected_tab = self._extract_selected_tab(tabs)
3418         renderer = try_get(
3419             data, lambda x: x['metadata']['channelMetadataRenderer'], dict)
3420         playlist_id = title = description = None
3421         if renderer:
3422             channel_title = renderer.get('title') or item_id
3423             tab_title = selected_tab.get('title')
3424             title = channel_title or item_id
3425             if tab_title:
3426                 title += ' - %s' % tab_title
3427             description = renderer.get('description')
3428             playlist_id = renderer.get('externalId')
3429
3430         # this has thumbnails, but there is currently no thumbnail field for playlists
3431         # sidebar.playlistSidebarRenderer has even more data, but its stucture is more complec
3432         renderer = try_get(
3433             data, lambda x: x['microformat']['microformatDataRenderer'], dict)
3434         if not renderer:
3435             renderer = try_get(
3436                 data, lambda x: x['metadata']['playlistMetadataRenderer'], dict)
3437         if renderer:
3438             title = renderer.get('title')
3439             description = renderer.get('description')
3440             playlist_id = item_id
3441
3442         if playlist_id is None:
3443             playlist_id = item_id
3444         if title is None:
3445             title = "Youtube " + playlist_id.title()
3446         playlist = self.playlist_result(
3447             self._entries(selected_tab, identity_token),
3448             playlist_id=playlist_id, playlist_title=title,
3449             playlist_description=description)
3450         playlist.update(self._extract_uploader(data))
3451         return playlist
3452
3453     def _extract_from_playlist(self, item_id, url, data, playlist):
3454         title = playlist.get('title') or try_get(
3455             data, lambda x: x['titleText']['simpleText'], compat_str)
3456         playlist_id = playlist.get('playlistId') or item_id
3457         # Inline playlist rendition continuation does not always work
3458         # at Youtube side, so delegating regular tab-based playlist URL
3459         # processing whenever possible.
3460         playlist_url = urljoin(url, try_get(
3461             playlist, lambda x: x['endpoint']['commandMetadata']['webCommandMetadata']['url'],
3462             compat_str))
3463         if playlist_url and playlist_url != url:
3464             return self.url_result(
3465                 playlist_url, ie=YoutubeTabIE.ie_key(), video_id=playlist_id,
3466                 video_title=title)
3467         return self.playlist_result(
3468             self._playlist_entries(playlist), playlist_id=playlist_id,
3469             playlist_title=title)
3470
3471     @staticmethod
3472     def _extract_alerts(data):
3473         for alert_dict in try_get(data, lambda x: x['alerts'], list) or []:
3474             if not isinstance(alert_dict, dict):
3475                 continue
3476             for renderer in alert_dict:
3477                 alert = alert_dict[renderer]
3478                 alert_type = alert.get('type')
3479                 if not alert_type:
3480                     continue
3481                 message = try_get(alert, lambda x: x['text']['simpleText'], compat_str)
3482                 if message:
3483                     yield alert_type, message
3484                 for run in try_get(alert, lambda x: x['text']['runs'], list) or []:
3485                     message = try_get(run, lambda x: x['text'], compat_str)
3486                     if message:
3487                         yield alert_type, message
3488
3489     def _extract_identity_token(self, webpage, item_id):
3490         ytcfg = self._extract_ytcfg(item_id, webpage)
3491         if ytcfg:
3492             token = try_get(ytcfg, lambda x: x['ID_TOKEN'], compat_str)
3493             if token:
3494                 return token
3495         return self._search_regex(
3496             r'\bID_TOKEN["\']\s*:\s*["\'](.+?)["\']', webpage,
3497             'identity token', default=None)
3498
3499     def _real_extract(self, url):
3500         item_id = self._match_id(url)
3501         url = compat_urlparse.urlunparse(
3502             compat_urlparse.urlparse(url)._replace(netloc='www.youtube.com'))
3503         is_home = re.match(r'(?P<pre>%s)(?P<post>/?(?![^#?]).*$)' % self._VALID_URL, url)
3504         if is_home is not None and is_home.group('not_channel') is None and item_id != 'feed':
3505             self._downloader.report_warning(
3506                 'A channel/user page was given. All the channel\'s videos will be downloaded. '
3507                 'To download only the videos in the home page, add a "/featured" to the URL')
3508             url = '%s/videos%s' % (is_home.group('pre'), is_home.group('post') or '')
3509
3510         # Handle both video/playlist URLs
3511         qs = compat_urlparse.parse_qs(compat_urlparse.urlparse(url).query)
3512         video_id = qs.get('v', [None])[0]
3513         playlist_id = qs.get('list', [None])[0]
3514
3515         if is_home is not None and is_home.group('not_channel') is not None and is_home.group('not_channel').startswith('watch') and not video_id:
3516             if playlist_id:
3517                 self._downloader.report_warning('%s is not a valid Youtube URL. Trying to download playlist %s' % (url, playlist_id))
3518                 url = 'https://www.youtube.com/playlist?list=%s' % playlist_id
3519                 # return self.url_result(playlist_id, ie=YoutubePlaylistIE.ie_key())
3520             else:
3521                 raise ExtractorError('Unable to recognize tab page')
3522         if video_id and playlist_id:
3523             if self._downloader.params.get('noplaylist'):
3524                 self.to_screen('Downloading just video %s because of --no-playlist' % video_id)
3525                 return self.url_result(video_id, ie=YoutubeIE.ie_key(), video_id=video_id)
3526             self.to_screen('Downloading playlist %s - add --no-playlist to just download video %s' % (playlist_id, video_id))
3527
3528         webpage = self._download_webpage(url, item_id)
3529         identity_token = self._extract_identity_token(webpage, item_id)
3530         data = self._extract_yt_initial_data(item_id, webpage)
3531         err_msg = None
3532         for alert_type, alert_message in self._extract_alerts(data):
3533             if alert_type.lower() == 'error':
3534                 if err_msg:
3535                     self._downloader.report_warning('YouTube said: %s - %s' % ('ERROR', err_msg))
3536                 err_msg = alert_message
3537             else:
3538                 self._downloader.report_warning('YouTube said: %s - %s' % (alert_type, alert_message))
3539         if err_msg:
3540             raise ExtractorError('YouTube said: %s' % err_msg, expected=True)
3541         tabs = try_get(
3542             data, lambda x: x['contents']['twoColumnBrowseResultsRenderer']['tabs'], list)
3543         if tabs:
3544             return self._extract_from_tabs(item_id, webpage, data, tabs, identity_token)
3545         playlist = try_get(
3546             data, lambda x: x['contents']['twoColumnWatchNextResults']['playlist']['playlist'], dict)
3547         if playlist:
3548             return self._extract_from_playlist(item_id, url, data, playlist)
3549         # Fallback to video extraction if no playlist alike page is recognized.
3550         # First check for the current video then try the v attribute of URL query.
3551         video_id = try_get(
3552             data, lambda x: x['currentVideoEndpoint']['watchEndpoint']['videoId'],
3553             compat_str) or video_id
3554         if video_id:
3555             return self.url_result(video_id, ie=YoutubeIE.ie_key(), video_id=video_id)
3556         # Failed to recognize
3557         raise ExtractorError('Unable to recognize tab page')
3558
3559
3560 class YoutubePlaylistIE(InfoExtractor):
3561     IE_DESC = 'YouTube.com playlists'
3562     _VALID_URL = r'''(?x)(?:
3563                         (?:https?://)?
3564                         (?:\w+\.)?
3565                         (?:
3566                             (?:
3567                                 youtube(?:kids)?\.com|
3568                                 invidio\.us
3569                             )
3570                             /.*?\?.*?\blist=
3571                         )?
3572                         (?P<id>%(playlist_id)s)
3573                      )''' % {'playlist_id': YoutubeBaseInfoExtractor._PLAYLIST_ID_RE}
3574     IE_NAME = 'youtube:playlist'
3575     _TESTS = [{
3576         'note': 'issue #673',
3577         'url': 'PLBB231211A4F62143',
3578         'info_dict': {
3579             'title': '[OLD]Team Fortress 2 (Class-based LP)',
3580             'id': 'PLBB231211A4F62143',
3581             'uploader': 'Wickydoo',
3582             'uploader_id': 'UCKSpbfbl5kRQpTdL7kMc-1Q',
3583         },
3584         'playlist_mincount': 29,
3585     }, {
3586         'url': 'PLtPgu7CB4gbY9oDN3drwC3cMbJggS7dKl',
3587         'info_dict': {
3588             'title': 'YDL_safe_search',
3589             'id': 'PLtPgu7CB4gbY9oDN3drwC3cMbJggS7dKl',
3590         },
3591         'playlist_count': 2,
3592         'skip': 'This playlist is private',
3593     }, {
3594         'note': 'embedded',
3595         'url': 'https://www.youtube.com/embed/videoseries?list=PL6IaIsEjSbf96XFRuNccS_RuEXwNdsoEu',
3596         'playlist_count': 4,
3597         'info_dict': {
3598             'title': 'JODA15',
3599             'id': 'PL6IaIsEjSbf96XFRuNccS_RuEXwNdsoEu',
3600             'uploader': 'milan',
3601             'uploader_id': 'UCEI1-PVPcYXjB73Hfelbmaw',
3602         }
3603     }, {
3604         'url': 'http://www.youtube.com/embed/_xDOZElKyNU?list=PLsyOSbh5bs16vubvKePAQ1x3PhKavfBIl',
3605         'playlist_mincount': 982,
3606         'info_dict': {
3607             'title': '2018 Chinese New Singles (11/6 updated)',
3608             'id': 'PLsyOSbh5bs16vubvKePAQ1x3PhKavfBIl',
3609             'uploader': 'LBK',
3610             'uploader_id': 'UC21nz3_MesPLqtDqwdvnoxA',
3611         }
3612     }, {
3613         'url': 'TLGGrESM50VT6acwMjAyMjAxNw',
3614         'only_matching': True,
3615     }, {
3616         # music album playlist
3617         'url': 'OLAK5uy_m4xAFdmMC5rX3Ji3g93pQe3hqLZw_9LhM',
3618         'only_matching': True,
3619     }]
3620
3621     @classmethod
3622     def suitable(cls, url):
3623         return False if YoutubeTabIE.suitable(url) else super(
3624             YoutubePlaylistIE, cls).suitable(url)
3625
3626     def _real_extract(self, url):
3627         playlist_id = self._match_id(url)
3628         qs = compat_urlparse.parse_qs(compat_urlparse.urlparse(url).query)
3629         if not qs:
3630             qs = {'list': playlist_id}
3631         return self.url_result(
3632             update_url_query('https://www.youtube.com/playlist', qs),
3633             ie=YoutubeTabIE.ie_key(), video_id=playlist_id)
3634
3635
3636 class YoutubeYtBeIE(InfoExtractor):
3637     IE_DESC = 'youtu.be'
3638     _VALID_URL = r'https?://youtu\.be/(?P<id>[0-9A-Za-z_-]{11})/*?.*?\blist=(?P<playlist_id>%(playlist_id)s)' % {'playlist_id': YoutubeBaseInfoExtractor._PLAYLIST_ID_RE}
3639     _TESTS = [{
3640         'url': 'https://youtu.be/yeWKywCrFtk?list=PL2qgrgXsNUG5ig9cat4ohreBjYLAPC0J5',
3641         'info_dict': {
3642             'id': 'yeWKywCrFtk',
3643             'ext': 'mp4',
3644             'title': 'Small Scale Baler and Braiding Rugs',
3645             'uploader': 'Backus-Page House Museum',
3646             'uploader_id': 'backuspagemuseum',
3647             'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/backuspagemuseum',
3648             'upload_date': '20161008',
3649             'description': 'md5:800c0c78d5eb128500bffd4f0b4f2e8a',
3650             'categories': ['Nonprofits & Activism'],
3651             'tags': list,
3652             'like_count': int,
3653             'dislike_count': int,
3654         },
3655         'params': {
3656             'noplaylist': True,
3657             'skip_download': True,
3658         },
3659     }, {
3660         'url': 'https://youtu.be/uWyaPkt-VOI?list=PL9D9FC436B881BA21',
3661         'only_matching': True,
3662     }]
3663
3664     def _real_extract(self, url):
3665         mobj = re.match(self._VALID_URL, url)
3666         video_id = mobj.group('id')
3667         playlist_id = mobj.group('playlist_id')
3668         return self.url_result(
3669             update_url_query('https://www.youtube.com/watch', {
3670                 'v': video_id,
3671                 'list': playlist_id,
3672                 'feature': 'youtu.be',
3673             }), ie=YoutubeTabIE.ie_key(), video_id=playlist_id)
3674
3675
3676 class YoutubeYtUserIE(InfoExtractor):
3677     IE_DESC = 'YouTube.com user videos, URL or "ytuser" keyword'
3678     _VALID_URL = r'ytuser:(?P<id>.+)'
3679     _TESTS = [{
3680         'url': 'ytuser:phihag',
3681         'only_matching': True,
3682     }]
3683
3684     def _real_extract(self, url):
3685         user_id = self._match_id(url)
3686         return self.url_result(
3687             'https://www.youtube.com/user/%s' % user_id,
3688             ie=YoutubeTabIE.ie_key(), video_id=user_id)
3689
3690
3691 class YoutubeFavouritesIE(YoutubeBaseInfoExtractor):
3692     IE_NAME = 'youtube:favorites'
3693     IE_DESC = 'YouTube.com liked videos, ":ytfav" for short (requires authentication)'
3694     _VALID_URL = r':ytfav(?:ou?rite)?s?'
3695     _LOGIN_REQUIRED = True
3696     _TESTS = [{
3697         'url': ':ytfav',
3698         'only_matching': True,
3699     }, {
3700         'url': ':ytfavorites',
3701         'only_matching': True,
3702     }]
3703
3704     def _real_extract(self, url):
3705         return self.url_result(
3706             'https://www.youtube.com/playlist?list=LL',
3707             ie=YoutubeTabIE.ie_key())
3708
3709
3710 class YoutubeSearchIE(SearchInfoExtractor, YoutubeBaseInfoExtractor):
3711     IE_DESC = 'YouTube.com searches'
3712     # there doesn't appear to be a real limit, for example if you search for
3713     # 'python' you get more than 8.000.000 results
3714     _MAX_RESULTS = float('inf')
3715     IE_NAME = 'youtube:search'
3716     _SEARCH_KEY = 'ytsearch'
3717     _SEARCH_PARAMS = None
3718     _TESTS = []
3719
3720     def _entries(self, query, n):
3721         data = {
3722             'context': {
3723                 'client': {
3724                     'clientName': 'WEB',
3725                     'clientVersion': '2.20201021.03.00',
3726                 }
3727             },
3728             'query': query,
3729         }
3730         if self._SEARCH_PARAMS:
3731             data['params'] = self._SEARCH_PARAMS
3732         total = 0
3733         for page_num in itertools.count(1):
3734             search = self._download_json(
3735                 'https://www.youtube.com/youtubei/v1/search?key=AIzaSyAO_FJ2SlqU8Q4STEHLGCilw_Y9_11qcW8',
3736                 video_id='query "%s"' % query,
3737                 note='Downloading page %s' % page_num,
3738                 errnote='Unable to download API page', fatal=False,
3739                 data=json.dumps(data).encode('utf8'),
3740                 headers={'content-type': 'application/json'})
3741             if not search:
3742                 break
3743             slr_contents = try_get(
3744                 search,
3745                 (lambda x: x['contents']['twoColumnSearchResultsRenderer']['primaryContents']['sectionListRenderer']['contents'],
3746                  lambda x: x['onResponseReceivedCommands'][0]['appendContinuationItemsAction']['continuationItems']),
3747                 list)
3748             if not slr_contents:
3749                 break
3750
3751             # Youtube sometimes adds promoted content to searches,
3752             # changing the index location of videos and token.
3753             # So we search through all entries till we find them.
3754             continuation_token = None
3755             for slr_content in slr_contents:
3756                 isr_contents = try_get(
3757                     slr_content,
3758                     lambda x: x['itemSectionRenderer']['contents'],
3759                     list)
3760                 if not isr_contents:
3761                     continue
3762                 for content in isr_contents:
3763                     if not isinstance(content, dict):
3764                         continue
3765                     video = content.get('videoRenderer')
3766                     if not isinstance(video, dict):
3767                         continue
3768                     video_id = video.get('videoId')
3769                     if not video_id:
3770                         continue
3771
3772                     yield self._extract_video(video)
3773                     total += 1
3774                     if total == n:
3775                         return
3776
3777                 if continuation_token is None:
3778                     continuation_token = try_get(
3779                         slr_content,
3780                         lambda x: x['continuationItemRenderer']['continuationEndpoint']['continuationCommand']['token'],
3781                         compat_str)
3782
3783             if not continuation_token:
3784                 break
3785             data['continuation'] = continuation_token
3786
3787     def _get_n_results(self, query, n):
3788         """Get a specified number of results for a query"""
3789         return self.playlist_result(self._entries(query, n), query)
3790
3791
3792 class YoutubeSearchDateIE(YoutubeSearchIE):
3793     IE_NAME = YoutubeSearchIE.IE_NAME + ':date'
3794     _SEARCH_KEY = 'ytsearchdate'
3795     IE_DESC = 'YouTube.com searches, newest videos first, "ytsearchdate" keyword'
3796     _SEARCH_PARAMS = 'CAI%3D'
3797
3798
3799 class YoutubeSearchURLIE(YoutubeSearchIE):
3800     IE_DESC = 'YouTube.com searches, "ytsearch" keyword'
3801     IE_NAME = YoutubeSearchIE.IE_NAME + '_url'
3802     _VALID_URL = r'https?://(?:www\.)?youtube\.com/results\?(.*?&)?(?:search_query|q)=(?:[^&]+)(?:[&]|$)'
3803     # _MAX_RESULTS = 100
3804     _TESTS = [{
3805         'url': 'https://www.youtube.com/results?baz=bar&search_query=youtube-dl+test+video&filters=video&lclk=video',
3806         'playlist_mincount': 5,
3807         'info_dict': {
3808             'title': 'youtube-dl test video',
3809         }
3810     }, {
3811         'url': 'https://www.youtube.com/results?q=test&sp=EgQIBBgB',
3812         'only_matching': True,
3813     }]
3814
3815     @classmethod
3816     def _make_valid_url(cls):
3817         return cls._VALID_URL
3818
3819     def _real_extract(self, url):
3820         qs = compat_parse_qs(compat_urllib_parse_urlparse(url).query)
3821         query = (qs.get('search_query') or qs.get('q'))[0]
3822         self._SEARCH_PARAMS = qs.get('sp', ('',))[0]
3823         return self._get_n_results(query, self._MAX_RESULTS)
3824
3825
3826 class YoutubeFeedsInfoExtractor(YoutubeTabIE):
3827     """
3828     Base class for feed extractors
3829     Subclasses must define the _FEED_NAME property.
3830     """
3831     _LOGIN_REQUIRED = True
3832     # _MAX_PAGES = 5
3833     _TESTS = []
3834
3835     @property
3836     def IE_NAME(self):
3837         return 'youtube:%s' % self._FEED_NAME
3838
3839     def _real_initialize(self):
3840         self._login()
3841
3842     def _real_extract(self, url):
3843         return self.url_result(
3844             'https://www.youtube.com/feed/%s' % self._FEED_NAME,
3845             ie=YoutubeTabIE.ie_key())
3846
3847
3848 class YoutubeWatchLaterIE(InfoExtractor):
3849     IE_NAME = 'youtube:watchlater'
3850     IE_DESC = 'Youtube watch later list, ":ytwatchlater" for short (requires authentication)'
3851     _VALID_URL = r':ytwatchlater'
3852     _TESTS = [{
3853         'url': ':ytwatchlater',
3854         'only_matching': True,
3855     }]
3856
3857     def _real_extract(self, url):
3858         return self.url_result(
3859             'https://www.youtube.com/playlist?list=WL', ie=YoutubeTabIE.ie_key())
3860
3861
3862 class YoutubeRecommendedIE(YoutubeFeedsInfoExtractor):
3863     IE_DESC = 'YouTube.com recommended videos, ":ytrec" for short (requires authentication)'
3864     _VALID_URL = r'https?://(?:www\.)?youtube\.com/?(?:[?#]|$)|:ytrec(?:ommended)?'
3865     _FEED_NAME = 'recommended'
3866     _TESTS = [{
3867         'url': ':ytrec',
3868         'only_matching': True,
3869     }, {
3870         'url': ':ytrecommended',
3871         'only_matching': True,
3872     }, {
3873         'url': 'https://youtube.com',
3874         'only_matching': True,
3875     }]
3876
3877
3878 class YoutubeSubscriptionsIE(YoutubeFeedsInfoExtractor):
3879     IE_DESC = 'YouTube.com subscriptions feed, ":ytsubs" for short (requires authentication)'
3880     _VALID_URL = r':ytsub(?:scription)?s?'
3881     _FEED_NAME = 'subscriptions'
3882     _TESTS = [{
3883         'url': ':ytsubs',
3884         'only_matching': True,
3885     }, {
3886         'url': ':ytsubscriptions',
3887         'only_matching': True,
3888     }]
3889
3890
3891 class YoutubeHistoryIE(YoutubeFeedsInfoExtractor):
3892     IE_DESC = 'Youtube watch history, ":ythistory" for short (requires authentication)'
3893     _VALID_URL = r':ythistory'
3894     _FEED_NAME = 'history'
3895     _TESTS = [{
3896         'url': ':ythistory',
3897         'only_matching': True,
3898     }]
3899
3900
3901 class YoutubeTruncatedURLIE(InfoExtractor):
3902     IE_NAME = 'youtube:truncated_url'
3903     IE_DESC = False  # Do not list
3904     _VALID_URL = r'''(?x)
3905         (?:https?://)?
3906         (?:\w+\.)?[yY][oO][uU][tT][uU][bB][eE](?:-nocookie)?\.com/
3907         (?:watch\?(?:
3908             feature=[a-z_]+|
3909             annotation_id=annotation_[^&]+|
3910             x-yt-cl=[0-9]+|
3911             hl=[^&]*|
3912             t=[0-9]+
3913         )?
3914         |
3915             attribution_link\?a=[^&]+
3916         )
3917         $
3918     '''
3919
3920     _TESTS = [{
3921         'url': 'https://www.youtube.com/watch?annotation_id=annotation_3951667041',
3922         'only_matching': True,
3923     }, {
3924         'url': 'https://www.youtube.com/watch?',
3925         'only_matching': True,
3926     }, {
3927         'url': 'https://www.youtube.com/watch?x-yt-cl=84503534',
3928         'only_matching': True,
3929     }, {
3930         'url': 'https://www.youtube.com/watch?feature=foo',
3931         'only_matching': True,
3932     }, {
3933         'url': 'https://www.youtube.com/watch?hl=en-GB',
3934         'only_matching': True,
3935     }, {
3936         'url': 'https://www.youtube.com/watch?t=2372',
3937         'only_matching': True,
3938     }]
3939
3940     def _real_extract(self, url):
3941         raise ExtractorError(
3942             'Did you forget to quote the URL? Remember that & is a meta '
3943             'character in most shells, so you want to put the URL in quotes, '
3944             'like  youtube-dl '
3945             '"https://www.youtube.com/watch?feature=foo&v=BaW_jenozKc" '
3946             ' or simply  youtube-dl BaW_jenozKc  .',
3947             expected=True)
3948
3949
3950 class YoutubeTruncatedIDIE(InfoExtractor):
3951     IE_NAME = 'youtube:truncated_id'
3952     IE_DESC = False  # Do not list
3953     _VALID_URL = r'https?://(?:www\.)?youtube\.com/watch\?v=(?P<id>[0-9A-Za-z_-]{1,10})$'
3954
3955     _TESTS = [{
3956         'url': 'https://www.youtube.com/watch?v=N_708QY7Ob',
3957         'only_matching': True,
3958     }]
3959
3960     def _real_extract(self, url):
3961         video_id = self._match_id(url)
3962         raise ExtractorError(
3963             'Incomplete YouTube ID %s. URL %s looks truncated.' % (video_id, url),
3964             expected=True)
3965
3966
3967 # Do Youtube show urls even exist anymore? I couldn't find any
3968 r'''
3969 class YoutubeShowIE(YoutubeTabIE):
3970     IE_DESC = 'YouTube.com (multi-season) shows'
3971     _VALID_URL = r'https?://(?:www\.)?youtube\.com/show/(?P<id>[^?#]*)'
3972     IE_NAME = 'youtube:show'
3973     _TESTS = [{
3974         'url': 'https://www.youtube.com/show/airdisasters',
3975         'playlist_mincount': 5,
3976         'info_dict': {
3977             'id': 'airdisasters',
3978             'title': 'Air Disasters',
3979         }
3980     }]
3981
3982     def _real_extract(self, url):
3983         playlist_id = self._match_id(url)
3984         return super(YoutubeShowIE, self)._real_extract(
3985             'https://www.youtube.com/show/%s/playlists' % playlist_id)
3986 '''