youtube_dlc/extractor/youtube.py

   1 # coding: utf-8
   2
   3 from __future__ import unicode_literals
   4
   5
   6 import itertools
   7 import json
   8 import os.path
   9 import random
  10 import re
  11 import time
  12 import traceback
  13
  14 from .common import InfoExtractor, SearchInfoExtractor
  15 from ..jsinterp import JSInterpreter
  16 from ..swfinterp import SWFInterpreter
  17 from ..compat import (
  18     compat_chr,
  19     compat_HTTPError,
  20     compat_kwargs,
  21     compat_parse_qs,
  22     compat_urllib_parse_unquote,
  23     compat_urllib_parse_unquote_plus,
  24     compat_urllib_parse_urlencode,
  25     compat_urllib_parse_urlparse,
  26     compat_urlparse,
  27     compat_str,
  28 )
  29 from ..utils import (
  30     bool_or_none,
  31     clean_html,
  32     error_to_compat_str,
  33     ExtractorError,
  34     format_field,
  35     float_or_none,
  36     get_element_by_id,
  37     int_or_none,
  38     mimetype2ext,
  39     parse_codecs,
  40     parse_count,
  41     parse_duration,
  42     remove_quotes,
  43     remove_start,
  44     smuggle_url,
  45     str_or_none,
  46     str_to_int,
  47     try_get,
  48     unescapeHTML,
  49     unified_strdate,
  50     unsmuggle_url,
  51     update_url_query,
  52     uppercase_escape,
  53     url_or_none,
  54     urlencode_postdata,
  55     urljoin,
  56 )
  57
  58
  59 class YoutubeBaseInfoExtractor(InfoExtractor):
  60     """Provide base functions for Youtube extractors"""
  61     _LOGIN_URL = 'https://accounts.google.com/ServiceLogin'
  62     _TWOFACTOR_URL = 'https://accounts.google.com/signin/challenge'
  63
  64     _LOOKUP_URL = 'https://accounts.google.com/_/signin/sl/lookup'
  65     _CHALLENGE_URL = 'https://accounts.google.com/_/signin/sl/challenge'
  66     _TFA_URL = 'https://accounts.google.com/_/signin/challenge?hl=en&TL={0}'
  67
  68     _RESERVED_NAMES = (
  69         r'embed|e|watch_popup|channel|c|user|playlist|watch|w|v|movies|results|shared|'
  70         r'storefront|oops|index|account|reporthistory|t/terms|about|upload|signin|logout|'
  71         r'feed/(?:watch_later|history|subscriptions|library|trending|recommended)')
  72
  73     _NETRC_MACHINE = 'youtube'
  74     # If True it will raise an error if no login info is provided
  75     _LOGIN_REQUIRED = False
  76
  77     _PLAYLIST_ID_RE = r'(?:(?:PL|LL|EC|UU|FL|RD|UL|TL|PU|OLAK5uy_)[0-9A-Za-z-_]{10,}|RDMM|WL|LL|LM)'
  78
  79     def _set_language(self):
  80         self._set_cookie(
  81             '.youtube.com', 'PREF', 'f1=50000000&f6=8&hl=en',
  82             # YouTube sets the expire time to about two months
  83             expire_time=time.time() + 2 * 30 * 24 * 3600)
  84
  85     def _ids_to_results(self, ids):
  86         return [
  87             self.url_result(vid_id, 'Youtube', video_id=vid_id)
  88             for vid_id in ids]
  89
  90     def _login(self):
  91         """
  92         Attempt to log in to YouTube.
  93         True is returned if successful or skipped.
  94         False is returned if login failed.
  95
  96         If _LOGIN_REQUIRED is set and no authentication was provided, an error is raised.
  97         """
  98         username, password = self._get_login_info()
  99         # No authentication to be performed
 100         if username is None:
 101             if self._LOGIN_REQUIRED and self._downloader.params.get('cookiefile') is None:
 102                 raise ExtractorError('No login info available, needed for using %s.' % self.IE_NAME, expected=True)
 103             if self._downloader.params.get('cookiefile') and False:  # TODO remove 'and False' later - too many people using outdated cookies and open issues, remind them.
 104                 self.to_screen('[Cookies] Reminder - Make sure to always use up to date cookies!')
 105             return True
 106
 107         login_page = self._download_webpage(
 108             self._LOGIN_URL, None,
 109             note='Downloading login page',
 110             errnote='unable to fetch login page', fatal=False)
 111         if login_page is False:
 112             return
 113
 114         login_form = self._hidden_inputs(login_page)
 115
 116         def req(url, f_req, note, errnote):
 117             data = login_form.copy()
 118             data.update({
 119                 'pstMsg': 1,
 120                 'checkConnection': 'youtube',
 121                 'checkedDomains': 'youtube',
 122                 'hl': 'en',
 123                 'deviceinfo': '[null,null,null,[],null,"US",null,null,[],"GlifWebSignIn",null,[null,null,[]]]',
 124                 'f.req': json.dumps(f_req),
 125                 'flowName': 'GlifWebSignIn',
 126                 'flowEntry': 'ServiceLogin',
 127                 # TODO: reverse actual botguard identifier generation algo
 128                 'bgRequest': '["identifier",""]',
 129             })
 130             return self._download_json(
 131                 url, None, note=note, errnote=errnote,
 132                 transform_source=lambda s: re.sub(r'^[^[]*', '', s),
 133                 fatal=False,
 134                 data=urlencode_postdata(data), headers={
 135                     'Content-Type': 'application/x-www-form-urlencoded;charset=utf-8',
 136                     'Google-Accounts-XSRF': 1,
 137                 })
 138
 139         def warn(message):
 140             self._downloader.report_warning(message)
 141
 142         lookup_req = [
 143             username,
 144             None, [], None, 'US', None, None, 2, False, True,
 145             [
 146                 None, None,
 147                 [2, 1, None, 1,
 148                  'https://accounts.google.com/ServiceLogin?passive=true&continue=https%3A%2F%2Fwww.youtube.com%2Fsignin%3Fnext%3D%252F%26action_handle_signin%3Dtrue%26hl%3Den%26app%3Ddesktop%26feature%3Dsign_in_button&hl=en&service=youtube&uilel=3&requestPath=%2FServiceLogin&Page=PasswordSeparationSignIn',
 149                  None, [], 4],
 150                 1, [None, None, []], None, None, None, True
 151             ],
 152             username,
 153         ]
 154
 155         lookup_results = req(
 156             self._LOOKUP_URL, lookup_req,
 157             'Looking up account info', 'Unable to look up account info')
 158
 159         if lookup_results is False:
 160             return False
 161
 162         user_hash = try_get(lookup_results, lambda x: x[0][2], compat_str)
 163         if not user_hash:
 164             warn('Unable to extract user hash')
 165             return False
 166
 167         challenge_req = [
 168             user_hash,
 169             None, 1, None, [1, None, None, None, [password, None, True]],
 170             [
 171                 None, None, [2, 1, None, 1, 'https://accounts.google.com/ServiceLogin?passive=true&continue=https%3A%2F%2Fwww.youtube.com%2Fsignin%3Fnext%3D%252F%26action_handle_signin%3Dtrue%26hl%3Den%26app%3Ddesktop%26feature%3Dsign_in_button&hl=en&service=youtube&uilel=3&requestPath=%2FServiceLogin&Page=PasswordSeparationSignIn', None, [], 4],
 172                 1, [None, None, []], None, None, None, True
 173             ]]
 174
 175         challenge_results = req(
 176             self._CHALLENGE_URL, challenge_req,
 177             'Logging in', 'Unable to log in')
 178
 179         if challenge_results is False:
 180             return
 181
 182         login_res = try_get(challenge_results, lambda x: x[0][5], list)
 183         if login_res:
 184             login_msg = try_get(login_res, lambda x: x[5], compat_str)
 185             warn(
 186                 'Unable to login: %s' % 'Invalid password'
 187                 if login_msg == 'INCORRECT_ANSWER_ENTERED' else login_msg)
 188             return False
 189
 190         res = try_get(challenge_results, lambda x: x[0][-1], list)
 191         if not res:
 192             warn('Unable to extract result entry')
 193             return False
 194
 195         login_challenge = try_get(res, lambda x: x[0][0], list)
 196         if login_challenge:
 197             challenge_str = try_get(login_challenge, lambda x: x[2], compat_str)
 198             if challenge_str == 'TWO_STEP_VERIFICATION':
 199                 # SEND_SUCCESS - TFA code has been successfully sent to phone
 200                 # QUOTA_EXCEEDED - reached the limit of TFA codes
 201                 status = try_get(login_challenge, lambda x: x[5], compat_str)
 202                 if status == 'QUOTA_EXCEEDED':
 203                     warn('Exceeded the limit of TFA codes, try later')
 204                     return False
 205
 206                 tl = try_get(challenge_results, lambda x: x[1][2], compat_str)
 207                 if not tl:
 208                     warn('Unable to extract TL')
 209                     return False
 210
 211                 tfa_code = self._get_tfa_info('2-step verification code')
 212
 213                 if not tfa_code:
 214                     warn(
 215                         'Two-factor authentication required. Provide it either interactively or with --twofactor <code>'
 216                         '(Note that only TOTP (Google Authenticator App) codes work at this time.)')
 217                     return False
 218
 219                 tfa_code = remove_start(tfa_code, 'G-')
 220
 221                 tfa_req = [
 222                     user_hash, None, 2, None,
 223                     [
 224                         9, None, None, None, None, None, None, None,
 225                         [None, tfa_code, True, 2]
 226                     ]]
 227
 228                 tfa_results = req(
 229                     self._TFA_URL.format(tl), tfa_req,
 230                     'Submitting TFA code', 'Unable to submit TFA code')
 231
 232                 if tfa_results is False:
 233                     return False
 234
 235                 tfa_res = try_get(tfa_results, lambda x: x[0][5], list)
 236                 if tfa_res:
 237                     tfa_msg = try_get(tfa_res, lambda x: x[5], compat_str)
 238                     warn(
 239                         'Unable to finish TFA: %s' % 'Invalid TFA code'
 240                         if tfa_msg == 'INCORRECT_ANSWER_ENTERED' else tfa_msg)
 241                     return False
 242
 243                 check_cookie_url = try_get(
 244                     tfa_results, lambda x: x[0][-1][2], compat_str)
 245             else:
 246                 CHALLENGES = {
 247                     'LOGIN_CHALLENGE': "This device isn't recognized. For your security, Google wants to make sure it's really you.",
 248                     'USERNAME_RECOVERY': 'Please provide additional information to aid in the recovery process.',
 249                     'REAUTH': "There is something unusual about your activity. For your security, Google wants to make sure it's really you.",
 250                 }
 251                 challenge = CHALLENGES.get(
 252                     challenge_str,
 253                     '%s returned error %s.' % (self.IE_NAME, challenge_str))
 254                 warn('%s\nGo to https://accounts.google.com/, login and solve a challenge.' % challenge)
 255                 return False
 256         else:
 257             check_cookie_url = try_get(res, lambda x: x[2], compat_str)
 258
 259         if not check_cookie_url:
 260             warn('Unable to extract CheckCookie URL')
 261             return False
 262
 263         check_cookie_results = self._download_webpage(
 264             check_cookie_url, None, 'Checking cookie', fatal=False)
 265
 266         if check_cookie_results is False:
 267             return False
 268
 269         if 'https://myaccount.google.com/' not in check_cookie_results:
 270             warn('Unable to log in')
 271             return False
 272
 273         return True
 274
 275     def _download_webpage_handle(self, *args, **kwargs):
 276         query = kwargs.get('query', {}).copy()
 277         kwargs['query'] = query
 278         return super(YoutubeBaseInfoExtractor, self)._download_webpage_handle(
 279             *args, **compat_kwargs(kwargs))
 280
 281     def _real_initialize(self):
 282         if self._downloader is None:
 283             return
 284         self._set_language()
 285         if not self._login():
 286             return
 287
 288     _DEFAULT_API_DATA = {
 289         'context': {
 290             'client': {
 291                 'clientName': 'WEB',
 292                 'clientVersion': '2.20201021.03.00',
 293             }
 294         },
 295     }
 296
 297     _YT_INITIAL_DATA_RE = r'(?:window\s*\[\s*["\']ytInitialData["\']\s*\]|ytInitialData)\s*=\s*({.+?})\s*;'
 298     _YT_INITIAL_PLAYER_RESPONSE_RE = r'ytInitialPlayerResponse\s*=\s*({.+?})\s*;'
 299     _YT_INITIAL_BOUNDARY_RE = r'(?:var\s+meta|</script|\n)'
 300
 301     def _call_api(self, ep, query, video_id):
 302         data = self._DEFAULT_API_DATA.copy()
 303         data.update(query)
 304
 305         response = self._download_json(
 306             'https://www.youtube.com/youtubei/v1/%s' % ep, video_id=video_id,
 307             note='Downloading API JSON', errnote='Unable to download API page',
 308             data=json.dumps(data).encode('utf8'),
 309             headers={'content-type': 'application/json'},
 310             query={'key': 'AIzaSyAO_FJ2SlqU8Q4STEHLGCilw_Y9_11qcW8'})
 311
 312         return response
 313
 314     def _extract_yt_initial_data(self, video_id, webpage):
 315         return self._parse_json(
 316             self._search_regex(
 317                 (r'%s\s*%s' % (self._YT_INITIAL_DATA_RE, self._YT_INITIAL_BOUNDARY_RE),
 318                  self._YT_INITIAL_DATA_RE), webpage, 'yt initial data'),
 319             video_id)
 320
 321     def _extract_ytcfg(self, video_id, webpage):
 322         return self._parse_json(
 323             self._search_regex(
 324                 r'ytcfg\.set\s*\(\s*({.+?})\s*\)\s*;', webpage, 'ytcfg',
 325                 default='{}'), video_id, fatal=False)
 326
 327     def _extract_video(self, renderer):
 328         video_id = renderer.get('videoId')
 329         title = try_get(
 330             renderer,
 331             (lambda x: x['title']['runs'][0]['text'],
 332              lambda x: x['title']['simpleText']), compat_str)
 333         description = try_get(
 334             renderer, lambda x: x['descriptionSnippet']['runs'][0]['text'],
 335             compat_str)
 336         duration = parse_duration(try_get(
 337             renderer, lambda x: x['lengthText']['simpleText'], compat_str))
 338         view_count_text = try_get(
 339             renderer, lambda x: x['viewCountText']['simpleText'], compat_str) or ''
 340         view_count = str_to_int(self._search_regex(
 341             r'^([\d,]+)', re.sub(r'\s', '', view_count_text),
 342             'view count', default=None))
 343         uploader = try_get(
 344             renderer, lambda x: x['ownerText']['runs'][0]['text'], compat_str)
 345         return {
 346             '_type': 'url_transparent',
 347             'ie_key': YoutubeIE.ie_key(),
 348             'id': video_id,
 349             'url': video_id,
 350             'title': title,
 351             'description': description,
 352             'duration': duration,
 353             'view_count': view_count,
 354             'uploader': uploader,
 355         }
 356
 357
 358 class YoutubeIE(YoutubeBaseInfoExtractor):
 359     IE_DESC = 'YouTube.com'
 360     _VALID_URL = r"""(?x)^
 361                      (
 362                          (?:https?://|//)                                    # http(s):// or protocol-independent URL
 363                          (?:(?:(?:(?:\w+\.)?[yY][oO][uU][tT][uU][bB][eE](?:-nocookie|kids)?\.com/|
 364                             (?:www\.)?deturl\.com/www\.youtube\.com/|
 365                             (?:www\.)?pwnyoutube\.com/|
 366                             (?:www\.)?hooktube\.com/|
 367                             (?:www\.)?yourepeat\.com/|
 368                             tube\.majestyc\.net/|
 369                             # Invidious instances taken from https://github.com/omarroth/invidious/wiki/Invidious-Instances
 370                             (?:(?:www|dev)\.)?invidio\.us/|
 371                             (?:(?:www|no)\.)?invidiou\.sh/|
 372                             (?:(?:www|fi)\.)?invidious\.snopyta\.org/|
 373                             (?:www\.)?invidious\.kabi\.tk/|
 374                             (?:www\.)?invidious\.13ad\.de/|
 375                             (?:www\.)?invidious\.mastodon\.host/|
 376                             (?:www\.)?invidious\.zapashcanon\.fr/|
 377                             (?:www\.)?invidious\.kavin\.rocks/|
 378                             (?:www\.)?invidious\.tube/|
 379                             (?:www\.)?invidiou\.site/|
 380                             (?:www\.)?invidious\.site/|
 381                             (?:www\.)?invidious\.xyz/|
 382                             (?:www\.)?invidious\.nixnet\.xyz/|
 383                             (?:www\.)?invidious\.drycat\.fr/|
 384                             (?:www\.)?tube\.poal\.co/|
 385                             (?:www\.)?tube\.connect\.cafe/|
 386                             (?:www\.)?vid\.wxzm\.sx/|
 387                             (?:www\.)?vid\.mint\.lgbt/|
 388                             (?:www\.)?yewtu\.be/|
 389                             (?:www\.)?yt\.elukerio\.org/|
 390                             (?:www\.)?yt\.lelux\.fi/|
 391                             (?:www\.)?invidious\.ggc-project\.de/|
 392                             (?:www\.)?yt\.maisputain\.ovh/|
 393                             (?:www\.)?invidious\.13ad\.de/|
 394                             (?:www\.)?invidious\.toot\.koeln/|
 395                             (?:www\.)?invidious\.fdn\.fr/|
 396                             (?:www\.)?watch\.nettohikari\.com/|
 397                             (?:www\.)?kgg2m7yk5aybusll\.onion/|
 398                             (?:www\.)?qklhadlycap4cnod\.onion/|
 399                             (?:www\.)?axqzx4s6s54s32yentfqojs3x5i7faxza6xo3ehd4bzzsg2ii4fv2iid\.onion/|
 400                             (?:www\.)?c7hqkpkpemu6e7emz5b4vyz7idjgdvgaaa3dyimmeojqbgpea3xqjoid\.onion/|
 401                             (?:www\.)?fz253lmuao3strwbfbmx46yu7acac2jz27iwtorgmbqlkurlclmancad\.onion/|
 402                             (?:www\.)?invidious\.l4qlywnpwqsluw65ts7md3khrivpirse744un3x7mlskqauz5pyuzgqd\.onion/|
 403                             (?:www\.)?owxfohz4kjyv25fvlqilyxast7inivgiktls3th44jhk3ej3i7ya\.b32\.i2p/|
 404                             (?:www\.)?4l2dgddgsrkf2ous66i6seeyi6etzfgrue332grh2n7madpwopotugyd\.onion/|
 405                             youtube\.googleapis\.com/)                        # the various hostnames, with wildcard subdomains
 406                          (?:.*?\#/)?                                          # handle anchor (#/) redirect urls
 407                          (?:                                                  # the various things that can precede the ID:
 408                              (?:(?:v|embed|e)/(?!videoseries))                # v/ or embed/ or e/
 409                              |(?:                                             # or the v= param in all its forms
 410                                  (?:(?:watch|movie)(?:_popup)?(?:\.php)?/?)?  # preceding watch(_popup|.php) or nothing (like /?v=xxxx)
 411                                  (?:\?|\#!?)                                  # the params delimiter ? or # or #!
 412                                  (?:.*?[&;])??                                # any other preceding param (like /?s=tuff&v=xxxx or ?s=tuff&amp;v=V36LpHqtcDY)
 413                                  v=
 414                              )
 415                          ))
 416                          |(?:
 417                             youtu\.be|                                        # just youtu.be/xxxx
 418                             vid\.plus|                                        # or vid.plus/xxxx
 419                             zwearz\.com/watch|                                # or zwearz.com/watch/xxxx
 420                          )/
 421                          |(?:www\.)?cleanvideosearch\.com/media/action/yt/watch\?videoId=
 422                          )
 423                      )?                                                       # all until now is optional -> you can pass the naked ID
 424                      (?P<id>[0-9A-Za-z_-]{11})                                      # here is it! the YouTube video ID
 425                      (?!.*?\blist=
 426                         (?:
 427                             %(playlist_id)s|                                  # combined list/video URLs are handled by the playlist IE
 428                             WL                                                # WL are handled by the watch later IE
 429                         )
 430                      )
 431                      (?(1).+)?                                                # if we found the ID, everything can follow
 432                      $""" % {'playlist_id': YoutubeBaseInfoExtractor._PLAYLIST_ID_RE}
 433     _NEXT_URL_RE = r'[\?&]next_url=([^&]+)'
 434     _PLAYER_INFO_RE = (
 435         r'/(?P<id>[a-zA-Z0-9_-]{8,})/player_ias\.vflset(?:/[a-zA-Z]{2,3}_[a-zA-Z]{2,3})?/base\.(?P<ext>[a-z]+)$',
 436         r'\b(?P<id>vfl[a-zA-Z0-9_-]+)\b.*?\.(?P<ext>[a-z]+)$',
 437     )
 438     _formats = {
 439         '5': {'ext': 'flv', 'width': 400, 'height': 240, 'acodec': 'mp3', 'abr': 64, 'vcodec': 'h263'},
 440         '6': {'ext': 'flv', 'width': 450, 'height': 270, 'acodec': 'mp3', 'abr': 64, 'vcodec': 'h263'},
 441         '13': {'ext': '3gp', 'acodec': 'aac', 'vcodec': 'mp4v'},
 442         '17': {'ext': '3gp', 'width': 176, 'height': 144, 'acodec': 'aac', 'abr': 24, 'vcodec': 'mp4v'},
 443         '18': {'ext': 'mp4', 'width': 640, 'height': 360, 'acodec': 'aac', 'abr': 96, 'vcodec': 'h264'},
 444         '22': {'ext': 'mp4', 'width': 1280, 'height': 720, 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264'},
 445         '34': {'ext': 'flv', 'width': 640, 'height': 360, 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264'},
 446         '35': {'ext': 'flv', 'width': 854, 'height': 480, 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264'},
 447         # itag 36 videos are either 320x180 (BaW_jenozKc) or 320x240 (__2ABJjxzNo), abr varies as well
 448         '36': {'ext': '3gp', 'width': 320, 'acodec': 'aac', 'vcodec': 'mp4v'},
 449         '37': {'ext': 'mp4', 'width': 1920, 'height': 1080, 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264'},
 450         '38': {'ext': 'mp4', 'width': 4096, 'height': 3072, 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264'},
 451         '43': {'ext': 'webm', 'width': 640, 'height': 360, 'acodec': 'vorbis', 'abr': 128, 'vcodec': 'vp8'},
 452         '44': {'ext': 'webm', 'width': 854, 'height': 480, 'acodec': 'vorbis', 'abr': 128, 'vcodec': 'vp8'},
 453         '45': {'ext': 'webm', 'width': 1280, 'height': 720, 'acodec': 'vorbis', 'abr': 192, 'vcodec': 'vp8'},
 454         '46': {'ext': 'webm', 'width': 1920, 'height': 1080, 'acodec': 'vorbis', 'abr': 192, 'vcodec': 'vp8'},
 455         '59': {'ext': 'mp4', 'width': 854, 'height': 480, 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264'},
 456         '78': {'ext': 'mp4', 'width': 854, 'height': 480, 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264'},
 457
 458
 459         # 3D videos
 460         '82': {'ext': 'mp4', 'height': 360, 'format_note': '3D', 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264', 'preference': -20},
 461         '83': {'ext': 'mp4', 'height': 480, 'format_note': '3D', 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264', 'preference': -20},
 462         '84': {'ext': 'mp4', 'height': 720, 'format_note': '3D', 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264', 'preference': -20},
 463         '85': {'ext': 'mp4', 'height': 1080, 'format_note': '3D', 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264', 'preference': -20},
 464         '100': {'ext': 'webm', 'height': 360, 'format_note': '3D', 'acodec': 'vorbis', 'abr': 128, 'vcodec': 'vp8', 'preference': -20},
 465         '101': {'ext': 'webm', 'height': 480, 'format_note': '3D', 'acodec': 'vorbis', 'abr': 192, 'vcodec': 'vp8', 'preference': -20},
 466         '102': {'ext': 'webm', 'height': 720, 'format_note': '3D', 'acodec': 'vorbis', 'abr': 192, 'vcodec': 'vp8', 'preference': -20},
 467
 468         # Apple HTTP Live Streaming
 469         '91': {'ext': 'mp4', 'height': 144, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 48, 'vcodec': 'h264', 'preference': -10},
 470         '92': {'ext': 'mp4', 'height': 240, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 48, 'vcodec': 'h264', 'preference': -10},
 471         '93': {'ext': 'mp4', 'height': 360, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264', 'preference': -10},
 472         '94': {'ext': 'mp4', 'height': 480, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264', 'preference': -10},
 473         '95': {'ext': 'mp4', 'height': 720, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 256, 'vcodec': 'h264', 'preference': -10},
 474         '96': {'ext': 'mp4', 'height': 1080, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 256, 'vcodec': 'h264', 'preference': -10},
 475         '132': {'ext': 'mp4', 'height': 240, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 48, 'vcodec': 'h264', 'preference': -10},
 476         '151': {'ext': 'mp4', 'height': 72, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 24, 'vcodec': 'h264', 'preference': -10},
 477
 478         # DASH mp4 video
 479         '133': {'ext': 'mp4', 'height': 240, 'format_note': 'DASH video', 'vcodec': 'h264'},
 480         '134': {'ext': 'mp4', 'height': 360, 'format_note': 'DASH video', 'vcodec': 'h264'},
 481         '135': {'ext': 'mp4', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'h264'},
 482         '136': {'ext': 'mp4', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'h264'},
 483         '137': {'ext': 'mp4', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'h264'},
 484         '138': {'ext': 'mp4', 'format_note': 'DASH video', 'vcodec': 'h264'},  # Height can vary (https://github.com/ytdl-org/youtube-dl/issues/4559)
 485         '160': {'ext': 'mp4', 'height': 144, 'format_note': 'DASH video', 'vcodec': 'h264'},
 486         '212': {'ext': 'mp4', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'h264'},
 487         '264': {'ext': 'mp4', 'height': 1440, 'format_note': 'DASH video', 'vcodec': 'h264'},
 488         '298': {'ext': 'mp4', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'h264', 'fps': 60},
 489         '299': {'ext': 'mp4', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'h264', 'fps': 60},
 490         '266': {'ext': 'mp4', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'h264'},
 491
 492         # Dash mp4 audio
 493         '139': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'abr': 48, 'container': 'm4a_dash'},
 494         '140': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'abr': 128, 'container': 'm4a_dash'},
 495         '141': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'abr': 256, 'container': 'm4a_dash'},
 496         '256': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'container': 'm4a_dash'},
 497         '258': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'container': 'm4a_dash'},
 498         '325': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'dtse', 'container': 'm4a_dash'},
 499         '328': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'ec-3', 'container': 'm4a_dash'},
 500
 501         # Dash webm
 502         '167': {'ext': 'webm', 'height': 360, 'width': 640, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
 503         '168': {'ext': 'webm', 'height': 480, 'width': 854, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
 504         '169': {'ext': 'webm', 'height': 720, 'width': 1280, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
 505         '170': {'ext': 'webm', 'height': 1080, 'width': 1920, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
 506         '218': {'ext': 'webm', 'height': 480, 'width': 854, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
 507         '219': {'ext': 'webm', 'height': 480, 'width': 854, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
 508         '278': {'ext': 'webm', 'height': 144, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp9'},
 509         '242': {'ext': 'webm', 'height': 240, 'format_note': 'DASH video', 'vcodec': 'vp9'},
 510         '243': {'ext': 'webm', 'height': 360, 'format_note': 'DASH video', 'vcodec': 'vp9'},
 511         '244': {'ext': 'webm', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'vp9'},
 512         '245': {'ext': 'webm', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'vp9'},
 513         '246': {'ext': 'webm', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'vp9'},
 514         '247': {'ext': 'webm', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'vp9'},
 515         '248': {'ext': 'webm', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'vp9'},
 516         '271': {'ext': 'webm', 'height': 1440, 'format_note': 'DASH video', 'vcodec': 'vp9'},
 517         # itag 272 videos are either 3840x2160 (e.g. RtoitU2A-3E) or 7680x4320 (sLprVF6d7Ug)
 518         '272': {'ext': 'webm', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'vp9'},
 519         '302': {'ext': 'webm', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'vp9', 'fps': 60},
 520         '303': {'ext': 'webm', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'vp9', 'fps': 60},
 521         '308': {'ext': 'webm', 'height': 1440, 'format_note': 'DASH video', 'vcodec': 'vp9', 'fps': 60},
 522         '313': {'ext': 'webm', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'vp9'},
 523         '315': {'ext': 'webm', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'vp9', 'fps': 60},
 524
 525         # Dash webm audio
 526         '171': {'ext': 'webm', 'acodec': 'vorbis', 'format_note': 'DASH audio', 'abr': 128},
 527         '172': {'ext': 'webm', 'acodec': 'vorbis', 'format_note': 'DASH audio', 'abr': 256},
 528
 529         # Dash webm audio with opus inside
 530         '249': {'ext': 'webm', 'format_note': 'DASH audio', 'acodec': 'opus', 'abr': 50},
 531         '250': {'ext': 'webm', 'format_note': 'DASH audio', 'acodec': 'opus', 'abr': 70},
 532         '251': {'ext': 'webm', 'format_note': 'DASH audio', 'acodec': 'opus', 'abr': 160},
 533
 534         # RTMP (unnamed)
 535         '_rtmp': {'protocol': 'rtmp'},
 536
 537         # av01 video only formats sometimes served with "unknown" codecs
 538         '394': {'acodec': 'none', 'vcodec': 'av01.0.05M.08'},
 539         '395': {'acodec': 'none', 'vcodec': 'av01.0.05M.08'},
 540         '396': {'acodec': 'none', 'vcodec': 'av01.0.05M.08'},
 541         '397': {'acodec': 'none', 'vcodec': 'av01.0.05M.08'},
 542     }
 543     _SUBTITLE_FORMATS = ('json3', 'srv1', 'srv2', 'srv3', 'ttml', 'vtt')
 544
 545     _GEO_BYPASS = False
 546
 547     IE_NAME = 'youtube'
 548     _TESTS = [
 549         {
 550             'url': 'https://www.youtube.com/watch?v=BaW_jenozKc&t=1s&end=9',
 551             'info_dict': {
 552                 'id': 'BaW_jenozKc',
 553                 'ext': 'mp4',
 554                 'title': 'youtube-dl test video "\'/\\ä↭𝕐',
 555                 'uploader': 'Philipp Hagemeister',
 556                 'uploader_id': 'phihag',
 557                 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/phihag',
 558                 'channel_id': 'UCLqxVugv74EIW3VWh2NOa3Q',
 559                 'channel_url': r're:https?://(?:www\.)?youtube\.com/channel/UCLqxVugv74EIW3VWh2NOa3Q',
 560                 'upload_date': '20121002',
 561                 'description': 'test chars:  "\'/\\ä↭𝕐\ntest URL: https://github.com/rg3/youtube-dl/issues/1892\n\nThis is a test video for youtube-dl.\n\nFor more information, contact phihag@phihag.de .',
 562                 'categories': ['Science & Technology'],
 563                 'tags': ['youtube-dl'],
 564                 'duration': 10,
 565                 'view_count': int,
 566                 'like_count': int,
 567                 'dislike_count': int,
 568                 'start_time': 1,
 569                 'end_time': 9,
 570             }
 571         },
 572         {
 573             'url': '//www.YouTube.com/watch?v=yZIXLfi8CZQ',
 574             'note': 'Embed-only video (#1746)',
 575             'info_dict': {
 576                 'id': 'yZIXLfi8CZQ',
 577                 'ext': 'mp4',
 578                 'upload_date': '20120608',
 579                 'title': 'Principal Sexually Assaults A Teacher - Episode 117 - 8th June 2012',
 580                 'description': 'md5:09b78bd971f1e3e289601dfba15ca4f7',
 581                 'uploader': 'SET India',
 582                 'uploader_id': 'setindia',
 583                 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/setindia',
 584                 'age_limit': 18,
 585             }
 586         },
 587         {
 588             'url': 'https://www.youtube.com/watch?v=BaW_jenozKc&v=yZIXLfi8CZQ',
 589             'note': 'Use the first video ID in the URL',
 590             'info_dict': {
 591                 'id': 'BaW_jenozKc',
 592                 'ext': 'mp4',
 593                 'title': 'youtube-dl test video "\'/\\ä↭𝕐',
 594                 'uploader': 'Philipp Hagemeister',
 595                 'uploader_id': 'phihag',
 596                 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/phihag',
 597                 'upload_date': '20121002',
 598                 'description': 'test chars:  "\'/\\ä↭𝕐\ntest URL: https://github.com/rg3/youtube-dl/issues/1892\n\nThis is a test video for youtube-dl.\n\nFor more information, contact phihag@phihag.de .',
 599                 'categories': ['Science & Technology'],
 600                 'tags': ['youtube-dl'],
 601                 'duration': 10,
 602                 'view_count': int,
 603                 'like_count': int,
 604                 'dislike_count': int,
 605             },
 606             'params': {
 607                 'skip_download': True,
 608             },
 609         },
 610         {
 611             'url': 'https://www.youtube.com/watch?v=a9LDPn-MO4I',
 612             'note': '256k DASH audio (format 141) via DASH manifest',
 613             'info_dict': {
 614                 'id': 'a9LDPn-MO4I',
 615                 'ext': 'm4a',
 616                 'upload_date': '20121002',
 617                 'uploader_id': '8KVIDEO',
 618                 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/8KVIDEO',
 619                 'description': '',
 620                 'uploader': '8KVIDEO',
 621                 'title': 'UHDTV TEST 8K VIDEO.mp4'
 622             },
 623             'params': {
 624                 'youtube_include_dash_manifest': True,
 625                 'format': '141',
 626             },
 627             'skip': 'format 141 not served anymore',
 628         },
 629         # DASH manifest with encrypted signature
 630         {
 631             'url': 'https://www.youtube.com/watch?v=IB3lcPjvWLA',
 632             'info_dict': {
 633                 'id': 'IB3lcPjvWLA',
 634                 'ext': 'm4a',
 635                 'title': 'Afrojack, Spree Wilson - The Spark (Official Music Video) ft. Spree Wilson',
 636                 'description': 'md5:8f5e2b82460520b619ccac1f509d43bf',
 637                 'duration': 244,
 638                 'uploader': 'AfrojackVEVO',
 639                 'uploader_id': 'AfrojackVEVO',
 640                 'upload_date': '20131011',
 641             },
 642             'params': {
 643                 'youtube_include_dash_manifest': True,
 644                 'format': '141/bestaudio[ext=m4a]',
 645             },
 646         },
 647         # Controversy video
 648         {
 649             'url': 'https://www.youtube.com/watch?v=T4XJQO3qol8',
 650             'info_dict': {
 651                 'id': 'T4XJQO3qol8',
 652                 'ext': 'mp4',
 653                 'duration': 219,
 654                 'upload_date': '20100909',
 655                 'uploader': 'Amazing Atheist',
 656                 'uploader_id': 'TheAmazingAtheist',
 657                 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/TheAmazingAtheist',
 658                 'title': 'Burning Everyone\'s Koran',
 659                 'description': 'SUBSCRIBE: http://www.youtube.com/saturninefilms\n\nEven Obama has taken a stand against freedom on this issue: http://www.huffingtonpost.com/2010/09/09/obama-gma-interview-quran_n_710282.html',
 660             }
 661         },
 662         # Normal age-gate video (embed allowed)
 663         {
 664             'url': 'https://youtube.com/watch?v=HtVdAasjOgU',
 665             'info_dict': {
 666                 'id': 'HtVdAasjOgU',
 667                 'ext': 'mp4',
 668                 'title': 'The Witcher 3: Wild Hunt - The Sword Of Destiny Trailer',
 669                 'description': r're:(?s).{100,}About the Game\n.*?The Witcher 3: Wild Hunt.{100,}',
 670                 'duration': 142,
 671                 'uploader': 'The Witcher',
 672                 'uploader_id': 'WitcherGame',
 673                 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/WitcherGame',
 674                 'upload_date': '20140605',
 675                 'age_limit': 18,
 676             },
 677         },
 678         # video_info is None (https://github.com/ytdl-org/youtube-dl/issues/4421)
 679         # YouTube Red ad is not captured for creator
 680         {
 681             'url': '__2ABJjxzNo',
 682             'info_dict': {
 683                 'id': '__2ABJjxzNo',
 684                 'ext': 'mp4',
 685                 'duration': 266,
 686                 'upload_date': '20100430',
 687                 'uploader_id': 'deadmau5',
 688                 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/deadmau5',
 689                 'creator': 'Dada Life, deadmau5',
 690                 'description': 'md5:12c56784b8032162bb936a5f76d55360',
 691                 'uploader': 'deadmau5',
 692                 'title': 'Deadmau5 - Some Chords (HD)',
 693                 'alt_title': 'This Machine Kills Some Chords',
 694             },
 695             'expected_warnings': [
 696                 'DASH manifest missing',
 697             ]
 698         },
 699         # Olympics (https://github.com/ytdl-org/youtube-dl/issues/4431)
 700         {
 701             'url': 'lqQg6PlCWgI',
 702             'info_dict': {
 703                 'id': 'lqQg6PlCWgI',
 704                 'ext': 'mp4',
 705                 'duration': 6085,
 706                 'upload_date': '20150827',
 707                 'uploader_id': 'olympic',
 708                 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/olympic',
 709                 'description': 'HO09  - Women -  GER-AUS - Hockey - 31 July 2012 - London 2012 Olympic Games',
 710                 'uploader': 'Olympic',
 711                 'title': 'Hockey - Women -  GER-AUS - London 2012 Olympic Games',
 712             },
 713             'params': {
 714                 'skip_download': 'requires avconv',
 715             }
 716         },
 717         # Non-square pixels
 718         {
 719             'url': 'https://www.youtube.com/watch?v=_b-2C3KPAM0',
 720             'info_dict': {
 721                 'id': '_b-2C3KPAM0',
 722                 'ext': 'mp4',
 723                 'stretched_ratio': 16 / 9.,
 724                 'duration': 85,
 725                 'upload_date': '20110310',
 726                 'uploader_id': 'AllenMeow',
 727                 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/AllenMeow',
 728                 'description': 'made by Wacom from Korea | 字幕&加油添醋 by TY\'s Allen | 感謝heylisa00cavey1001同學熱情提供梗及翻譯',
 729                 'uploader': '孫ᄋᄅ',
 730                 'title': '[A-made] 變態妍字幕版 太妍 我就是這樣的人',
 731             },
 732         },
 733         # url_encoded_fmt_stream_map is empty string
 734         {
 735             'url': 'qEJwOuvDf7I',
 736             'info_dict': {
 737                 'id': 'qEJwOuvDf7I',
 738                 'ext': 'webm',
 739                 'title': 'Обсуждение судебной практики по выборам 14 сентября 2014 года в Санкт-Петербурге',
 740                 'description': '',
 741                 'upload_date': '20150404',
 742                 'uploader_id': 'spbelect',
 743                 'uploader': 'Наблюдатели Петербурга',
 744             },
 745             'params': {
 746                 'skip_download': 'requires avconv',
 747             },
 748             'skip': 'This live event has ended.',
 749         },
 750         # Extraction from multiple DASH manifests (https://github.com/ytdl-org/youtube-dl/pull/6097)
 751         {
 752             'url': 'https://www.youtube.com/watch?v=FIl7x6_3R5Y',
 753             'info_dict': {
 754                 'id': 'FIl7x6_3R5Y',
 755                 'ext': 'webm',
 756                 'title': 'md5:7b81415841e02ecd4313668cde88737a',
 757                 'description': 'md5:116377fd2963b81ec4ce64b542173306',
 758                 'duration': 220,
 759                 'upload_date': '20150625',
 760                 'uploader_id': 'dorappi2000',
 761                 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/dorappi2000',
 762                 'uploader': 'dorappi2000',
 763                 'formats': 'mincount:31',
 764             },
 765             'skip': 'not actual anymore',
 766         },
 767         # DASH manifest with segment_list
 768         {
 769             'url': 'https://www.youtube.com/embed/CsmdDsKjzN8',
 770             'md5': '8ce563a1d667b599d21064e982ab9e31',
 771             'info_dict': {
 772                 'id': 'CsmdDsKjzN8',
 773                 'ext': 'mp4',
 774                 'upload_date': '20150501',  # According to '<meta itemprop="datePublished"', but in other places it's 20150510
 775                 'uploader': 'Airtek',
 776                 'description': 'Retransmisión en directo de la XVIII media maratón de Zaragoza.',
 777                 'uploader_id': 'UCzTzUmjXxxacNnL8I3m4LnQ',
 778                 'title': 'Retransmisión XVIII Media maratón Zaragoza 2015',
 779             },
 780             'params': {
 781                 'youtube_include_dash_manifest': True,
 782                 'format': '135',  # bestvideo
 783             },
 784             'skip': 'This live event has ended.',
 785         },
 786         {
 787             # Multifeed videos (multiple cameras), URL is for Main Camera
 788             'url': 'https://www.youtube.com/watch?v=jqWvoWXjCVs',
 789             'info_dict': {
 790                 'id': 'jqWvoWXjCVs',
 791                 'title': 'teamPGP: Rocket League Noob Stream',
 792                 'description': 'md5:dc7872fb300e143831327f1bae3af010',
 793             },
 794             'playlist': [{
 795                 'info_dict': {
 796                     'id': 'jqWvoWXjCVs',
 797                     'ext': 'mp4',
 798                     'title': 'teamPGP: Rocket League Noob Stream (Main Camera)',
 799                     'description': 'md5:dc7872fb300e143831327f1bae3af010',
 800                     'duration': 7335,
 801                     'upload_date': '20150721',
 802                     'uploader': 'Beer Games Beer',
 803                     'uploader_id': 'beergamesbeer',
 804                     'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/beergamesbeer',
 805                     'license': 'Standard YouTube License',
 806                 },
 807             }, {
 808                 'info_dict': {
 809                     'id': '6h8e8xoXJzg',
 810                     'ext': 'mp4',
 811                     'title': 'teamPGP: Rocket League Noob Stream (kreestuh)',
 812                     'description': 'md5:dc7872fb300e143831327f1bae3af010',
 813                     'duration': 7337,
 814                     'upload_date': '20150721',
 815                     'uploader': 'Beer Games Beer',
 816                     'uploader_id': 'beergamesbeer',
 817                     'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/beergamesbeer',
 818                     'license': 'Standard YouTube License',
 819                 },
 820             }, {
 821                 'info_dict': {
 822                     'id': 'PUOgX5z9xZw',
 823                     'ext': 'mp4',
 824                     'title': 'teamPGP: Rocket League Noob Stream (grizzle)',
 825                     'description': 'md5:dc7872fb300e143831327f1bae3af010',
 826                     'duration': 7337,
 827                     'upload_date': '20150721',
 828                     'uploader': 'Beer Games Beer',
 829                     'uploader_id': 'beergamesbeer',
 830                     'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/beergamesbeer',
 831                     'license': 'Standard YouTube License',
 832                 },
 833             }, {
 834                 'info_dict': {
 835                     'id': 'teuwxikvS5k',
 836                     'ext': 'mp4',
 837                     'title': 'teamPGP: Rocket League Noob Stream (zim)',
 838                     'description': 'md5:dc7872fb300e143831327f1bae3af010',
 839                     'duration': 7334,
 840                     'upload_date': '20150721',
 841                     'uploader': 'Beer Games Beer',
 842                     'uploader_id': 'beergamesbeer',
 843                     'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/beergamesbeer',
 844                     'license': 'Standard YouTube License',
 845                 },
 846             }],
 847             'params': {
 848                 'skip_download': True,
 849             },
 850             'skip': 'This video is not available.',
 851         },
 852         {
 853             # Multifeed video with comma in title (see https://github.com/ytdl-org/youtube-dl/issues/8536)
 854             'url': 'https://www.youtube.com/watch?v=gVfLd0zydlo',
 855             'info_dict': {
 856                 'id': 'gVfLd0zydlo',
 857                 'title': 'DevConf.cz 2016 Day 2 Workshops 1 14:00 - 15:30',
 858             },
 859             'playlist_count': 2,
 860             'skip': 'Not multifeed anymore',
 861         },
 862         {
 863             'url': 'https://vid.plus/FlRa-iH7PGw',
 864             'only_matching': True,
 865         },
 866         {
 867             'url': 'https://zwearz.com/watch/9lWxNJF-ufM/electra-woman-dyna-girl-official-trailer-grace-helbig.html',
 868             'only_matching': True,
 869         },
 870         {
 871             # Title with JS-like syntax "};" (see https://github.com/ytdl-org/youtube-dl/issues/7468)
 872             # Also tests cut-off URL expansion in video description (see
 873             # https://github.com/ytdl-org/youtube-dl/issues/1892,
 874             # https://github.com/ytdl-org/youtube-dl/issues/8164)
 875             'url': 'https://www.youtube.com/watch?v=lsguqyKfVQg',
 876             'info_dict': {
 877                 'id': 'lsguqyKfVQg',
 878                 'ext': 'mp4',
 879                 'title': '{dark walk}; Loki/AC/Dishonored; collab w/Elflover21',
 880                 'alt_title': 'Dark Walk - Position Music',
 881                 'description': 'md5:8085699c11dc3f597ce0410b0dcbb34a',
 882                 'duration': 133,
 883                 'upload_date': '20151119',
 884                 'uploader_id': 'IronSoulElf',
 885                 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/IronSoulElf',
 886                 'uploader': 'IronSoulElf',
 887                 'creator': 'Todd Haberman,  Daniel Law Heath and Aaron Kaplan',
 888                 'track': 'Dark Walk - Position Music',
 889                 'artist': 'Todd Haberman,  Daniel Law Heath and Aaron Kaplan',
 890                 'album': 'Position Music - Production Music Vol. 143 - Dark Walk',
 891             },
 892             'params': {
 893                 'skip_download': True,
 894             },
 895         },
 896         {
 897             # Tags with '};' (see https://github.com/ytdl-org/youtube-dl/issues/7468)
 898             'url': 'https://www.youtube.com/watch?v=Ms7iBXnlUO8',
 899             'only_matching': True,
 900         },
 901         {
 902             # Video with yt:stretch=17:0
 903             'url': 'https://www.youtube.com/watch?v=Q39EVAstoRM',
 904             'info_dict': {
 905                 'id': 'Q39EVAstoRM',
 906                 'ext': 'mp4',
 907                 'title': 'Clash Of Clans#14 Dicas De Ataque Para CV 4',
 908                 'description': 'md5:ee18a25c350637c8faff806845bddee9',
 909                 'upload_date': '20151107',
 910                 'uploader_id': 'UCCr7TALkRbo3EtFzETQF1LA',
 911                 'uploader': 'CH GAMER DROID',
 912             },
 913             'params': {
 914                 'skip_download': True,
 915             },
 916             'skip': 'This video does not exist.',
 917         },
 918         {
 919             # Video licensed under Creative Commons
 920             'url': 'https://www.youtube.com/watch?v=M4gD1WSo5mA',
 921             'info_dict': {
 922                 'id': 'M4gD1WSo5mA',
 923                 'ext': 'mp4',
 924                 'title': 'md5:e41008789470fc2533a3252216f1c1d1',
 925                 'description': 'md5:a677553cf0840649b731a3024aeff4cc',
 926                 'duration': 721,
 927                 'upload_date': '20150127',
 928                 'uploader_id': 'BerkmanCenter',
 929                 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/BerkmanCenter',
 930                 'uploader': 'The Berkman Klein Center for Internet & Society',
 931                 'license': 'Creative Commons Attribution license (reuse allowed)',
 932             },
 933             'params': {
 934                 'skip_download': True,
 935             },
 936         },
 937         {
 938             # Channel-like uploader_url
 939             'url': 'https://www.youtube.com/watch?v=eQcmzGIKrzg',
 940             'info_dict': {
 941                 'id': 'eQcmzGIKrzg',
 942                 'ext': 'mp4',
 943                 'title': 'Democratic Socialism and Foreign Policy | Bernie Sanders',
 944                 'description': 'md5:dda0d780d5a6e120758d1711d062a867',
 945                 'duration': 4060,
 946                 'upload_date': '20151119',
 947                 'uploader': 'Bernie Sanders',
 948                 'uploader_id': 'UCH1dpzjCEiGAt8CXkryhkZg',
 949                 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UCH1dpzjCEiGAt8CXkryhkZg',
 950                 'license': 'Creative Commons Attribution license (reuse allowed)',
 951             },
 952             'params': {
 953                 'skip_download': True,
 954             },
 955         },
 956         {
 957             'url': 'https://www.youtube.com/watch?feature=player_embedded&amp;amp;v=V36LpHqtcDY',
 958             'only_matching': True,
 959         },
 960         {
 961             # YouTube Red paid video (https://github.com/ytdl-org/youtube-dl/issues/10059)
 962             'url': 'https://www.youtube.com/watch?v=i1Ko8UG-Tdo',
 963             'only_matching': True,
 964         },
 965         {
 966             # Rental video preview
 967             'url': 'https://www.youtube.com/watch?v=yYr8q0y5Jfg',
 968             'info_dict': {
 969                 'id': 'uGpuVWrhIzE',
 970                 'ext': 'mp4',
 971                 'title': 'Piku - Trailer',
 972                 'description': 'md5:c36bd60c3fd6f1954086c083c72092eb',
 973                 'upload_date': '20150811',
 974                 'uploader': 'FlixMatrix',
 975                 'uploader_id': 'FlixMatrixKaravan',
 976                 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/FlixMatrixKaravan',
 977                 'license': 'Standard YouTube License',
 978             },
 979             'params': {
 980                 'skip_download': True,
 981             },
 982             'skip': 'This video is not available.',
 983         },
 984         {
 985             # YouTube Red video with episode data
 986             'url': 'https://www.youtube.com/watch?v=iqKdEhx-dD4',
 987             'info_dict': {
 988                 'id': 'iqKdEhx-dD4',
 989                 'ext': 'mp4',
 990                 'title': 'Isolation - Mind Field (Ep 1)',
 991                 'description': 'md5:46a29be4ceffa65b92d277b93f463c0f',
 992                 'duration': 2085,
 993                 'upload_date': '20170118',
 994                 'uploader': 'Vsauce',
 995                 'uploader_id': 'Vsauce',
 996                 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/Vsauce',
 997                 'series': 'Mind Field',
 998                 'season_number': 1,
 999                 'episode_number': 1,
1000             },
1001             'params': {
1002                 'skip_download': True,
1003             },
1004             'expected_warnings': [
1005                 'Skipping DASH manifest',
1006             ],
1007         },
1008         {
1009             # The following content has been identified by the YouTube community
1010             # as inappropriate or offensive to some audiences.
1011             'url': 'https://www.youtube.com/watch?v=6SJNVb0GnPI',
1012             'info_dict': {
1013                 'id': '6SJNVb0GnPI',
1014                 'ext': 'mp4',
1015                 'title': 'Race Differences in Intelligence',
1016                 'description': 'md5:5d161533167390427a1f8ee89a1fc6f1',
1017                 'duration': 965,
1018                 'upload_date': '20140124',
1019                 'uploader': 'New Century Foundation',
1020                 'uploader_id': 'UCEJYpZGqgUob0zVVEaLhvVg',
1021                 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UCEJYpZGqgUob0zVVEaLhvVg',
1022             },
1023             'params': {
1024                 'skip_download': True,
1025             },
1026         },
1027         {
1028             # itag 212
1029             'url': '1t24XAntNCY',
1030             'only_matching': True,
1031         },
1032         {
1033             # geo restricted to JP
1034             'url': 'sJL6WA-aGkQ',
1035             'only_matching': True,
1036         },
1037         {
1038             'url': 'https://invidio.us/watch?v=BaW_jenozKc',
1039             'only_matching': True,
1040         },
1041         {
1042             # DRM protected
1043             'url': 'https://www.youtube.com/watch?v=s7_qI6_mIXc',
1044             'only_matching': True,
1045         },
1046         {
1047             # Video with unsupported adaptive stream type formats
1048             'url': 'https://www.youtube.com/watch?v=Z4Vy8R84T1U',
1049             'info_dict': {
1050                 'id': 'Z4Vy8R84T1U',
1051                 'ext': 'mp4',
1052                 'title': 'saman SMAN 53 Jakarta(Sancety) opening COFFEE4th at SMAN 53 Jakarta',
1053                 'description': 'md5:d41d8cd98f00b204e9800998ecf8427e',
1054                 'duration': 433,
1055                 'upload_date': '20130923',
1056                 'uploader': 'Amelia Putri Harwita',
1057                 'uploader_id': 'UCpOxM49HJxmC1qCalXyB3_Q',
1058                 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UCpOxM49HJxmC1qCalXyB3_Q',
1059                 'formats': 'maxcount:10',
1060             },
1061             'params': {
1062                 'skip_download': True,
1063                 'youtube_include_dash_manifest': False,
1064             },
1065             'skip': 'not actual anymore',
1066         },
1067         {
1068             # Youtube Music Auto-generated description
1069             'url': 'https://music.youtube.com/watch?v=MgNrAu2pzNs',
1070             'info_dict': {
1071                 'id': 'MgNrAu2pzNs',
1072                 'ext': 'mp4',
1073                 'title': 'Voyeur Girl',
1074                 'description': 'md5:7ae382a65843d6df2685993e90a8628f',
1075                 'upload_date': '20190312',
1076                 'uploader': 'Stephen - Topic',
1077                 'uploader_id': 'UC-pWHpBjdGG69N9mM2auIAA',
1078                 'artist': 'Stephen',
1079                 'track': 'Voyeur Girl',
1080                 'album': 'it\'s too much love to know my dear',
1081                 'release_date': '20190313',
1082                 'release_year': 2019,
1083             },
1084             'params': {
1085                 'skip_download': True,
1086             },
1087         },
1088         {
1089             'url': 'https://www.youtubekids.com/watch?v=3b8nCWDgZ6Q',
1090             'only_matching': True,
1091         },
1092         {
1093             # invalid -> valid video id redirection
1094             'url': 'DJztXj2GPfl',
1095             'info_dict': {
1096                 'id': 'DJztXj2GPfk',
1097                 'ext': 'mp4',
1098                 'title': 'Panjabi MC - Mundian To Bach Ke (The Dictator Soundtrack)',
1099                 'description': 'md5:bf577a41da97918e94fa9798d9228825',
1100                 'upload_date': '20090125',
1101                 'uploader': 'Prochorowka',
1102                 'uploader_id': 'Prochorowka',
1103                 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/Prochorowka',
1104                 'artist': 'Panjabi MC',
1105                 'track': 'Beware of the Boys (Mundian to Bach Ke) - Motivo Hi-Lectro Remix',
1106                 'album': 'Beware of the Boys (Mundian To Bach Ke)',
1107             },
1108             'params': {
1109                 'skip_download': True,
1110             },
1111         },
1112         {
1113             # empty description results in an empty string
1114             'url': 'https://www.youtube.com/watch?v=x41yOUIvK2k',
1115             'info_dict': {
1116                 'id': 'x41yOUIvK2k',
1117                 'ext': 'mp4',
1118                 'title': 'IMG 3456',
1119                 'description': '',
1120                 'upload_date': '20170613',
1121                 'uploader_id': 'ElevageOrVert',
1122                 'uploader': 'ElevageOrVert',
1123             },
1124             'params': {
1125                 'skip_download': True,
1126             },
1127         },
1128         {
1129             # with '};' inside yt initial data (see [1])
1130             # see [2] for an example with '};' inside ytInitialPlayerResponse
1131             # 1. https://github.com/ytdl-org/youtube-dl/issues/27093
1132             # 2. https://github.com/ytdl-org/youtube-dl/issues/27216
1133             'url': 'https://www.youtube.com/watch?v=CHqg6qOn4no',
1134             'info_dict': {
1135                 'id': 'CHqg6qOn4no',
1136                 'ext': 'mp4',
1137                 'title': 'Part 77   Sort a list of simple types in c#',
1138                 'description': 'md5:b8746fa52e10cdbf47997903f13b20dc',
1139                 'upload_date': '20130831',
1140                 'uploader_id': 'kudvenkat',
1141                 'uploader': 'kudvenkat',
1142             },
1143             'params': {
1144                 'skip_download': True,
1145             },
1146         },
1147         {
1148             # another example of '};' in ytInitialData
1149             'url': 'https://www.youtube.com/watch?v=gVfgbahppCY',
1150             'only_matching': True,
1151         },
1152         {
1153             'url': 'https://www.youtube.com/watch_popup?v=63RmMXCd_bQ',
1154             'only_matching': True,
1155         },
1156     ]
1157
1158     def __init__(self, *args, **kwargs):
1159         super(YoutubeIE, self).__init__(*args, **kwargs)
1160         self._player_cache = {}
1161
1162     def report_video_info_webpage_download(self, video_id):
1163         """Report attempt to download video info webpage."""
1164         self.to_screen('%s: Downloading video info webpage' % video_id)
1165
1166     def report_information_extraction(self, video_id):
1167         """Report attempt to extract video information."""
1168         self.to_screen('%s: Extracting video information' % video_id)
1169
1170     def report_unavailable_format(self, video_id, format):
1171         """Report extracted video URL."""
1172         self.to_screen('%s: Format %s not available' % (video_id, format))
1173
1174     def report_rtmp_download(self):
1175         """Indicate the download will use the RTMP protocol."""
1176         self.to_screen('RTMP download detected')
1177
1178     def _signature_cache_id(self, example_sig):
1179         """ Return a string representation of a signature """
1180         return '.'.join(compat_str(len(part)) for part in example_sig.split('.'))
1181
1182     @classmethod
1183     def _extract_player_info(cls, player_url):
1184         for player_re in cls._PLAYER_INFO_RE:
1185             id_m = re.search(player_re, player_url)
1186             if id_m:
1187                 break
1188         else:
1189             raise ExtractorError('Cannot identify player %r' % player_url)
1190         return id_m.group('ext'), id_m.group('id')
1191
1192     def _extract_signature_function(self, video_id, player_url, example_sig):
1193         player_type, player_id = self._extract_player_info(player_url)
1194
1195         # Read from filesystem cache
1196         func_id = '%s_%s_%s' % (
1197             player_type, player_id, self._signature_cache_id(example_sig))
1198         assert os.path.basename(func_id) == func_id
1199
1200         cache_spec = self._downloader.cache.load('youtube-sigfuncs', func_id)
1201         if cache_spec is not None:
1202             return lambda s: ''.join(s[i] for i in cache_spec)
1203
1204         download_note = (
1205             'Downloading player %s' % player_url
1206             if self._downloader.params.get('verbose') else
1207             'Downloading %s player %s' % (player_type, player_id)
1208         )
1209         if player_type == 'js':
1210             code = self._download_webpage(
1211                 player_url, video_id,
1212                 note=download_note,
1213                 errnote='Download of %s failed' % player_url)
1214             res = self._parse_sig_js(code)
1215         elif player_type == 'swf':
1216             urlh = self._request_webpage(
1217                 player_url, video_id,
1218                 note=download_note,
1219                 errnote='Download of %s failed' % player_url)
1220             code = urlh.read()
1221             res = self._parse_sig_swf(code)
1222         else:
1223             assert False, 'Invalid player type %r' % player_type
1224
1225         test_string = ''.join(map(compat_chr, range(len(example_sig))))
1226         cache_res = res(test_string)
1227         cache_spec = [ord(c) for c in cache_res]
1228
1229         self._downloader.cache.store('youtube-sigfuncs', func_id, cache_spec)
1230         return res
1231
1232     def _print_sig_code(self, func, example_sig):
1233         def gen_sig_code(idxs):
1234             def _genslice(start, end, step):
1235                 starts = '' if start == 0 else str(start)
1236                 ends = (':%d' % (end + step)) if end + step >= 0 else ':'
1237                 steps = '' if step == 1 else (':%d' % step)
1238                 return 's[%s%s%s]' % (starts, ends, steps)
1239
1240             step = None
1241             # Quelch pyflakes warnings - start will be set when step is set
1242             start = '(Never used)'
1243             for i, prev in zip(idxs[1:], idxs[:-1]):
1244                 if step is not None:
1245                     if i - prev == step:
1246                         continue
1247                     yield _genslice(start, prev, step)
1248                     step = None
1249                     continue
1250                 if i - prev in [-1, 1]:
1251                     step = i - prev
1252                     start = prev
1253                     continue
1254                 else:
1255                     yield 's[%d]' % prev
1256             if step is None:
1257                 yield 's[%d]' % i
1258             else:
1259                 yield _genslice(start, i, step)
1260
1261         test_string = ''.join(map(compat_chr, range(len(example_sig))))
1262         cache_res = func(test_string)
1263         cache_spec = [ord(c) for c in cache_res]
1264         expr_code = ' + '.join(gen_sig_code(cache_spec))
1265         signature_id_tuple = '(%s)' % (
1266             ', '.join(compat_str(len(p)) for p in example_sig.split('.')))
1267         code = ('if tuple(len(p) for p in s.split(\'.\')) == %s:\n'
1268                 '    return %s\n') % (signature_id_tuple, expr_code)
1269         self.to_screen('Extracted signature function:\n' + code)
1270
1271     def _parse_sig_js(self, jscode):
1272         funcname = self._search_regex(
1273             (r'\b[cs]\s*&&\s*[adf]\.set\([^,]+\s*,\s*encodeURIComponent\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\(',
1274              r'\b[a-zA-Z0-9]+\s*&&\s*[a-zA-Z0-9]+\.set\([^,]+\s*,\s*encodeURIComponent\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\(',
1275              r'(?:\b|[^a-zA-Z0-9$])(?P<sig>[a-zA-Z0-9$]{2})\s*=\s*function\(\s*a\s*\)\s*{\s*a\s*=\s*a\.split\(\s*""\s*\)',
1276              r'(?P<sig>[a-zA-Z0-9$]+)\s*=\s*function\(\s*a\s*\)\s*{\s*a\s*=\s*a\.split\(\s*""\s*\)',
1277              # Obsolete patterns
1278              r'(["\'])signature\1\s*,\s*(?P<sig>[a-zA-Z0-9$]+)\(',
1279              r'\.sig\|\|(?P<sig>[a-zA-Z0-9$]+)\(',
1280              r'yt\.akamaized\.net/\)\s*\|\|\s*.*?\s*[cs]\s*&&\s*[adf]\.set\([^,]+\s*,\s*(?:encodeURIComponent\s*\()?\s*(?P<sig>[a-zA-Z0-9$]+)\(',
1281              r'\b[cs]\s*&&\s*[adf]\.set\([^,]+\s*,\s*(?P<sig>[a-zA-Z0-9$]+)\(',
1282              r'\b[a-zA-Z0-9]+\s*&&\s*[a-zA-Z0-9]+\.set\([^,]+\s*,\s*(?P<sig>[a-zA-Z0-9$]+)\(',
1283              r'\bc\s*&&\s*a\.set\([^,]+\s*,\s*\([^)]*\)\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\(',
1284              r'\bc\s*&&\s*[a-zA-Z0-9]+\.set\([^,]+\s*,\s*\([^)]*\)\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\(',
1285              r'\bc\s*&&\s*[a-zA-Z0-9]+\.set\([^,]+\s*,\s*\([^)]*\)\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\('),
1286             jscode, 'Initial JS player signature function name', group='sig')
1287
1288         jsi = JSInterpreter(jscode)
1289         initial_function = jsi.extract_function(funcname)
1290         return lambda s: initial_function([s])
1291
1292     def _parse_sig_swf(self, file_contents):
1293         swfi = SWFInterpreter(file_contents)
1294         TARGET_CLASSNAME = 'SignatureDecipher'
1295         searched_class = swfi.extract_class(TARGET_CLASSNAME)
1296         initial_function = swfi.extract_function(searched_class, 'decipher')
1297         return lambda s: initial_function([s])
1298
1299     def _decrypt_signature(self, s, video_id, player_url, age_gate=False):
1300         """Turn the encrypted s field into a working signature"""
1301
1302         if player_url is None:
1303             raise ExtractorError('Cannot decrypt signature without player_url')
1304
1305         if player_url.startswith('//'):
1306             player_url = 'https:' + player_url
1307         elif not re.match(r'https?://', player_url):
1308             player_url = compat_urlparse.urljoin(
1309                 'https://www.youtube.com', player_url)
1310         try:
1311             player_id = (player_url, self._signature_cache_id(s))
1312             if player_id not in self._player_cache:
1313                 func = self._extract_signature_function(
1314                     video_id, player_url, s
1315                 )
1316                 self._player_cache[player_id] = func
1317             func = self._player_cache[player_id]
1318             if self._downloader.params.get('youtube_print_sig_code'):
1319                 self._print_sig_code(func, s)
1320             return func(s)
1321         except Exception as e:
1322             tb = traceback.format_exc()
1323             raise ExtractorError(
1324                 'Signature extraction failed: ' + tb, cause=e)
1325
1326     def _get_subtitles(self, video_id, webpage, has_live_chat_replay):
1327         try:
1328             subs_doc = self._download_xml(
1329                 'https://video.google.com/timedtext?hl=en&type=list&v=%s' % video_id,
1330                 video_id, note=False)
1331         except ExtractorError as err:
1332             self._downloader.report_warning('unable to download video subtitles: %s' % error_to_compat_str(err))
1333             return {}
1334
1335         sub_lang_list = {}
1336         for track in subs_doc.findall('track'):
1337             lang = track.attrib['lang_code']
1338             if lang in sub_lang_list:
1339                 continue
1340             sub_formats = []
1341             for ext in self._SUBTITLE_FORMATS:
1342                 params = compat_urllib_parse_urlencode({
1343                     'lang': lang,
1344                     'v': video_id,
1345                     'fmt': ext,
1346                     'name': track.attrib['name'].encode('utf-8'),
1347                 })
1348                 sub_formats.append({
1349                     'url': 'https://www.youtube.com/api/timedtext?' + params,
1350                     'ext': ext,
1351                 })
1352             sub_lang_list[lang] = sub_formats
1353         if has_live_chat_replay:
1354             sub_lang_list['live_chat'] = [
1355                 {
1356                     'video_id': video_id,
1357                     'ext': 'json',
1358                     'protocol': 'youtube_live_chat_replay',
1359                 },
1360             ]
1361         if not sub_lang_list:
1362             self._downloader.report_warning('video doesn\'t have subtitles')
1363             return {}
1364         return sub_lang_list
1365
1366     def _get_ytplayer_config(self, video_id, webpage):
1367         patterns = (
1368             # User data may contain arbitrary character sequences that may affect
1369             # JSON extraction with regex, e.g. when '};' is contained the second
1370             # regex won't capture the whole JSON. Yet working around by trying more
1371             # concrete regex first keeping in mind proper quoted string handling
1372             # to be implemented in future that will replace this workaround (see
1373             # https://github.com/ytdl-org/youtube-dl/issues/7468,
1374             # https://github.com/ytdl-org/youtube-dl/pull/7599)
1375             r';ytplayer\.config\s*=\s*({.+?});ytplayer',
1376             r';ytplayer\.config\s*=\s*({.+?});',
1377         )
1378         config = self._search_regex(
1379             patterns, webpage, 'ytplayer.config', default=None)
1380         if config:
1381             return self._parse_json(
1382                 uppercase_escape(config), video_id, fatal=False)
1383
1384     def _get_automatic_captions(self, video_id, player_response, player_config):
1385         """We need the webpage for getting the captions url, pass it as an
1386            argument to speed up the process."""
1387         self.to_screen('%s: Looking for automatic captions' % video_id)
1388         err_msg = 'Couldn\'t find automatic captions for %s' % video_id
1389         if not (player_response or player_config):
1390             self._downloader.report_warning(err_msg)
1391             return {}
1392         try:
1393             args = player_config.get('args') if player_config else {}
1394             caption_url = args.get('ttsurl')
1395             if caption_url:
1396                 timestamp = args['timestamp']
1397                 # We get the available subtitles
1398                 list_params = compat_urllib_parse_urlencode({
1399                     'type': 'list',
1400                     'tlangs': 1,
1401                     'asrs': 1,
1402                 })
1403                 list_url = caption_url + '&' + list_params
1404                 caption_list = self._download_xml(list_url, video_id)
1405                 original_lang_node = caption_list.find('track')
1406                 if original_lang_node is None:
1407                     self._downloader.report_warning('Video doesn\'t have automatic captions')
1408                     return {}
1409                 original_lang = original_lang_node.attrib['lang_code']
1410                 caption_kind = original_lang_node.attrib.get('kind', '')
1411
1412                 sub_lang_list = {}
1413                 for lang_node in caption_list.findall('target'):
1414                     sub_lang = lang_node.attrib['lang_code']
1415                     sub_formats = []
1416                     for ext in self._SUBTITLE_FORMATS:
1417                         params = compat_urllib_parse_urlencode({
1418                             'lang': original_lang,
1419                             'tlang': sub_lang,
1420                             'fmt': ext,
1421                             'ts': timestamp,
1422                             'kind': caption_kind,
1423                         })
1424                         sub_formats.append({
1425                             'url': caption_url + '&' + params,
1426                             'ext': ext,
1427                         })
1428                     sub_lang_list[sub_lang] = sub_formats
1429                 return sub_lang_list
1430
1431             def make_captions(sub_url, sub_langs):
1432                 parsed_sub_url = compat_urllib_parse_urlparse(sub_url)
1433                 caption_qs = compat_parse_qs(parsed_sub_url.query)
1434                 captions = {}
1435                 for sub_lang in sub_langs:
1436                     sub_formats = []
1437                     for ext in self._SUBTITLE_FORMATS:
1438                         caption_qs.update({
1439                             'tlang': [sub_lang],
1440                             'fmt': [ext],
1441                         })
1442                         sub_url = compat_urlparse.urlunparse(parsed_sub_url._replace(
1443                             query=compat_urllib_parse_urlencode(caption_qs, True)))
1444                         sub_formats.append({
1445                             'url': sub_url,
1446                             'ext': ext,
1447                         })
1448                     captions[sub_lang] = sub_formats
1449                 return captions
1450
1451             # New captions format as of 22.06.2017
1452             if player_response:
1453                 renderer = player_response['captions']['playerCaptionsTracklistRenderer']
1454                 base_url = renderer['captionTracks'][0]['baseUrl']
1455                 sub_lang_list = []
1456                 for lang in renderer['translationLanguages']:
1457                     lang_code = lang.get('languageCode')
1458                     if lang_code:
1459                         sub_lang_list.append(lang_code)
1460                 return make_captions(base_url, sub_lang_list)
1461
1462             # Some videos don't provide ttsurl but rather caption_tracks and
1463             # caption_translation_languages (e.g. 20LmZk1hakA)
1464             # Does not used anymore as of 22.06.2017
1465             caption_tracks = args['caption_tracks']
1466             caption_translation_languages = args['caption_translation_languages']
1467             caption_url = compat_parse_qs(caption_tracks.split(',')[0])['u'][0]
1468             sub_lang_list = []
1469             for lang in caption_translation_languages.split(','):
1470                 lang_qs = compat_parse_qs(compat_urllib_parse_unquote_plus(lang))
1471                 sub_lang = lang_qs.get('lc', [None])[0]
1472                 if sub_lang:
1473                     sub_lang_list.append(sub_lang)
1474             return make_captions(caption_url, sub_lang_list)
1475         # An extractor error can be raise by the download process if there are
1476         # no automatic captions but there are subtitles
1477         except (KeyError, IndexError, ExtractorError):
1478             self._downloader.report_warning(err_msg)
1479             return {}
1480
1481     def _mark_watched(self, video_id, video_info, player_response):
1482         playback_url = url_or_none(try_get(
1483             player_response,
1484             lambda x: x['playbackTracking']['videostatsPlaybackUrl']['baseUrl']) or try_get(
1485             video_info, lambda x: x['videostats_playback_base_url'][0]))
1486         if not playback_url:
1487             return
1488         parsed_playback_url = compat_urlparse.urlparse(playback_url)
1489         qs = compat_urlparse.parse_qs(parsed_playback_url.query)
1490
1491         # cpn generation algorithm is reverse engineered from base.js.
1492         # In fact it works even with dummy cpn.
1493         CPN_ALPHABET = 'abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789-_'
1494         cpn = ''.join((CPN_ALPHABET[random.randint(0, 256) & 63] for _ in range(0, 16)))
1495
1496         qs.update({
1497             'ver': ['2'],
1498             'cpn': [cpn],
1499         })
1500         playback_url = compat_urlparse.urlunparse(
1501             parsed_playback_url._replace(query=compat_urllib_parse_urlencode(qs, True)))
1502
1503         self._download_webpage(
1504             playback_url, video_id, 'Marking watched',
1505             'Unable to mark watched', fatal=False)
1506
1507     @staticmethod
1508     def _extract_urls(webpage):
1509         # Embedded YouTube player
1510         entries = [
1511             unescapeHTML(mobj.group('url'))
1512             for mobj in re.finditer(r'''(?x)
1513             (?:
1514                 <iframe[^>]+?src=|
1515                 data-video-url=|
1516                 <embed[^>]+?src=|
1517                 embedSWF\(?:\s*|
1518                 <object[^>]+data=|
1519                 new\s+SWFObject\(
1520             )
1521             (["\'])
1522                 (?P<url>(?:https?:)?//(?:www\.)?youtube(?:-nocookie)?\.com/
1523                 (?:embed|v|p)/[0-9A-Za-z_-]{11}.*?)
1524             \1''', webpage)]
1525
1526         # lazyYT YouTube embed
1527         entries.extend(list(map(
1528             unescapeHTML,
1529             re.findall(r'class="lazyYT" data-youtube-id="([^"]+)"', webpage))))
1530
1531         # Wordpress "YouTube Video Importer" plugin
1532         matches = re.findall(r'''(?x)<div[^>]+
1533             class=(?P<q1>[\'"])[^\'"]*\byvii_single_video_player\b[^\'"]*(?P=q1)[^>]+
1534             data-video_id=(?P<q2>[\'"])([^\'"]+)(?P=q2)''', webpage)
1535         entries.extend(m[-1] for m in matches)
1536
1537         return entries
1538
1539     @staticmethod
1540     def _extract_url(webpage):
1541         urls = YoutubeIE._extract_urls(webpage)
1542         return urls[0] if urls else None
1543
1544     @classmethod
1545     def extract_id(cls, url):
1546         mobj = re.match(cls._VALID_URL, url, re.VERBOSE)
1547         if mobj is None:
1548             raise ExtractorError('Invalid URL: %s' % url)
1549         video_id = mobj.group(2)
1550         return video_id
1551
1552     def _extract_chapters_from_json(self, webpage, video_id, duration):
1553         if not webpage:
1554             return
1555         data = self._extract_yt_initial_data(video_id, webpage)
1556         if not data or not isinstance(data, dict):
1557             return
1558         chapters_list = try_get(
1559             data,
1560             lambda x: x['playerOverlays']
1561                        ['playerOverlayRenderer']
1562                        ['decoratedPlayerBarRenderer']
1563                        ['decoratedPlayerBarRenderer']
1564                        ['playerBar']
1565                        ['chapteredPlayerBarRenderer']
1566                        ['chapters'],
1567             list)
1568         if not chapters_list:
1569             return
1570
1571         def chapter_time(chapter):
1572             return float_or_none(
1573                 try_get(
1574                     chapter,
1575                     lambda x: x['chapterRenderer']['timeRangeStartMillis'],
1576                     int),
1577                 scale=1000)
1578         chapters = []
1579         for next_num, chapter in enumerate(chapters_list, start=1):
1580             start_time = chapter_time(chapter)
1581             if start_time is None:
1582                 continue
1583             end_time = (chapter_time(chapters_list[next_num])
1584                         if next_num < len(chapters_list) else duration)
1585             if end_time is None:
1586                 continue
1587             title = try_get(
1588                 chapter, lambda x: x['chapterRenderer']['title']['simpleText'],
1589                 compat_str)
1590             chapters.append({
1591                 'start_time': start_time,
1592                 'end_time': end_time,
1593                 'title': title,
1594             })
1595         return chapters
1596
1597     @staticmethod
1598     def _extract_chapters_from_description(description, duration):
1599         if not description:
1600             return None
1601         chapter_lines = re.findall(
1602             r'(?:^|<br\s*/>)([^<]*<a[^>]+onclick=["\']yt\.www\.watch\.player\.seekTo[^>]+>(\d{1,2}:\d{1,2}(?::\d{1,2})?)</a>[^>]*)(?=$|<br\s*/>)',
1603             description)
1604         if not chapter_lines:
1605             return None
1606         chapters = []
1607         for next_num, (chapter_line, time_point) in enumerate(
1608                 chapter_lines, start=1):
1609             start_time = parse_duration(time_point)
1610             if start_time is None:
1611                 continue
1612             if start_time > duration:
1613                 break
1614             end_time = (duration if next_num == len(chapter_lines)
1615                         else parse_duration(chapter_lines[next_num][1]))
1616             if end_time is None:
1617                 continue
1618             if end_time > duration:
1619                 end_time = duration
1620             if start_time > end_time:
1621                 break
1622             chapter_title = re.sub(
1623                 r'<a[^>]+>[^<]+</a>', '', chapter_line).strip(' \t-')
1624             chapter_title = re.sub(r'\s+', ' ', chapter_title)
1625             chapters.append({
1626                 'start_time': start_time,
1627                 'end_time': end_time,
1628                 'title': chapter_title,
1629             })
1630         return chapters
1631
1632     def _extract_chapters(self, webpage, description, video_id, duration):
1633         return (self._extract_chapters_from_json(webpage, video_id, duration)
1634                 or self._extract_chapters_from_description(description, duration))
1635
1636     def _real_extract(self, url):
1637         url, smuggled_data = unsmuggle_url(url, {})
1638
1639         proto = (
1640             'http' if self._downloader.params.get('prefer_insecure', False)
1641             else 'https')
1642
1643         start_time = None
1644         end_time = None
1645         parsed_url = compat_urllib_parse_urlparse(url)
1646         for component in [parsed_url.fragment, parsed_url.query]:
1647             query = compat_parse_qs(component)
1648             if start_time is None and 't' in query:
1649                 start_time = parse_duration(query['t'][0])
1650             if start_time is None and 'start' in query:
1651                 start_time = parse_duration(query['start'][0])
1652             if end_time is None and 'end' in query:
1653                 end_time = parse_duration(query['end'][0])
1654
1655         # Extract original video URL from URL with redirection, like age verification, using next_url parameter
1656         mobj = re.search(self._NEXT_URL_RE, url)
1657         if mobj:
1658             url = proto + '://www.youtube.com/' + compat_urllib_parse_unquote(mobj.group(1)).lstrip('/')
1659         video_id = self.extract_id(url)
1660
1661         # Get video webpage
1662         url = proto + '://www.youtube.com/watch?v=%s&gl=US&hl=en&has_verified=1&bpctr=9999999999' % video_id
1663         video_webpage, urlh = self._download_webpage_handle(url, video_id)
1664
1665         qs = compat_parse_qs(compat_urllib_parse_urlparse(urlh.geturl()).query)
1666         video_id = qs.get('v', [None])[0] or video_id
1667
1668         # Attempt to extract SWF player URL
1669         mobj = re.search(r'swfConfig.*?"(https?:\\/\\/.*?watch.*?-.*?\.swf)"', video_webpage)
1670         if mobj is not None:
1671             player_url = re.sub(r'\\(.)', r'\1', mobj.group(1))
1672         else:
1673             player_url = None
1674
1675         dash_mpds = []
1676
1677         def add_dash_mpd(video_info):
1678             dash_mpd = video_info.get('dashmpd')
1679             if dash_mpd and dash_mpd[0] not in dash_mpds:
1680                 dash_mpds.append(dash_mpd[0])
1681
1682         def add_dash_mpd_pr(pl_response):
1683             dash_mpd = url_or_none(try_get(
1684                 pl_response, lambda x: x['streamingData']['dashManifestUrl'],
1685                 compat_str))
1686             if dash_mpd and dash_mpd not in dash_mpds:
1687                 dash_mpds.append(dash_mpd)
1688
1689         is_live = None
1690         view_count = None
1691
1692         def extract_view_count(v_info):
1693             return int_or_none(try_get(v_info, lambda x: x['view_count'][0]))
1694
1695         def extract_player_response(player_response, video_id):
1696             pl_response = str_or_none(player_response)
1697             if not pl_response:
1698                 return
1699             pl_response = self._parse_json(pl_response, video_id, fatal=False)
1700             if isinstance(pl_response, dict):
1701                 add_dash_mpd_pr(pl_response)
1702                 return pl_response
1703
1704         def extract_embedded_config(embed_webpage, video_id):
1705             embedded_config = self._search_regex(
1706                 r'setConfig\(({.*})\);',
1707                 embed_webpage, 'ytInitialData', default=None)
1708             if embedded_config:
1709                 return embedded_config
1710
1711         video_info = {}
1712         player_response = {}
1713         ytplayer_config = None
1714         embed_webpage = None
1715
1716         # Get video info
1717         if (self._og_search_property('restrictions:age', video_webpage, default=None) == '18+'
1718                 or re.search(r'player-age-gate-content">', video_webpage) is not None):
1719             cookie_keys = self._get_cookies('https://www.youtube.com').keys()
1720             age_gate = True
1721             # We simulate the access to the video from www.youtube.com/v/{video_id}
1722             # this can be viewed without login into Youtube
1723             url = proto + '://www.youtube.com/embed/%s' % video_id
1724             embed_webpage = self._download_webpage(url, video_id, 'Downloading embed webpage')
1725             ext = extract_embedded_config(embed_webpage, video_id)
1726             # playabilityStatus = re.search(r'{\\\"status\\\":\\\"(?P<playabilityStatus>[^\"]+)\\\"', ext)
1727             playable_in_embed = re.search(r'{\\\"playableInEmbed\\\":(?P<playableinEmbed>[^\,]+)', ext)
1728             if not playable_in_embed:
1729                 self.to_screen('Could not determine whether playabale in embed for video %s' % video_id)
1730                 playable_in_embed = ''
1731             else:
1732                 playable_in_embed = playable_in_embed.group('playableinEmbed')
1733             # check if video is only playable on youtube in other words not playable in embed - if so it requires auth (cookies)
1734             # if re.search(r'player-unavailable">', embed_webpage) is not None:
1735             if playable_in_embed == 'false':
1736                 '''
1737                 # TODO apply this patch when Support for Python 2.6(!) and above drops
1738                 if ({'VISITOR_INFO1_LIVE', 'HSID', 'SSID', 'SID'} <= cookie_keys
1739                         or {'VISITOR_INFO1_LIVE', '__Secure-3PSID', 'LOGIN_INFO'} <= cookie_keys):
1740                 '''
1741                 if (set(('VISITOR_INFO1_LIVE', 'HSID', 'SSID', 'SID')) <= set(cookie_keys)
1742                         or set(('VISITOR_INFO1_LIVE', '__Secure-3PSID', 'LOGIN_INFO')) <= set(cookie_keys)):
1743                     age_gate = False
1744                     # Try looking directly into the video webpage
1745                     ytplayer_config = self._get_ytplayer_config(video_id, video_webpage)
1746                     if ytplayer_config:
1747                         args = ytplayer_config.get("args")
1748                         if args is not None:
1749                             if args.get('url_encoded_fmt_stream_map') or args.get('hlsvp'):
1750                                 # Convert to the same format returned by compat_parse_qs
1751                                 video_info = dict((k, [v]) for k, v in args.items())
1752                                 add_dash_mpd(video_info)
1753                             # Rental video is not rented but preview is available (e.g.
1754                             # https://www.youtube.com/watch?v=yYr8q0y5Jfg,
1755                             # https://github.com/ytdl-org/youtube-dl/issues/10532)
1756                             if not video_info and args.get('ypc_vid'):
1757                                 return self.url_result(
1758                                     args['ypc_vid'], YoutubeIE.ie_key(), video_id=args['ypc_vid'])
1759                             if args.get('livestream') == '1' or args.get('live_playback') == 1:
1760                                 is_live = True
1761                             if not player_response:
1762                                 player_response = extract_player_response(args.get('player_response'), video_id)
1763                         elif not player_response:
1764                             player_response = ytplayer_config
1765                     if not video_info or self._downloader.params.get('youtube_include_dash_manifest', True):
1766                         add_dash_mpd_pr(player_response)
1767                 else:
1768                     raise ExtractorError('Video is age restricted and only playable on Youtube. Requires cookies!', expected=True)
1769             else:
1770                 data = compat_urllib_parse_urlencode({
1771                     'video_id': video_id,
1772                     'eurl': 'https://youtube.googleapis.com/v/' + video_id,
1773                     'sts': self._search_regex(
1774                         r'"sts"\s*:\s*(\d+)', embed_webpage, 'sts', default=''),
1775                 })
1776                 video_info_url = proto + '://www.youtube.com/get_video_info?' + data
1777                 try:
1778                     video_info_webpage = self._download_webpage(
1779                         video_info_url, video_id,
1780                         note='Refetching age-gated info webpage',
1781                         errnote='unable to download video info webpage')
1782                 except ExtractorError:
1783                     video_info_webpage = None
1784                 if video_info_webpage:
1785                     video_info = compat_parse_qs(video_info_webpage)
1786                     pl_response = video_info.get('player_response', [None])[0]
1787                     player_response = extract_player_response(pl_response, video_id)
1788                     add_dash_mpd(video_info)
1789                     view_count = extract_view_count(video_info)
1790         else:
1791             age_gate = False
1792             # Try looking directly into the video webpage
1793             ytplayer_config = self._get_ytplayer_config(video_id, video_webpage)
1794             if ytplayer_config:
1795                 args = ytplayer_config.get('args', {})
1796                 if args.get('url_encoded_fmt_stream_map') or args.get('hlsvp'):
1797                     # Convert to the same format returned by compat_parse_qs
1798                     video_info = dict((k, [v]) for k, v in args.items())
1799                     add_dash_mpd(video_info)
1800                 # Rental video is not rented but preview is available (e.g.
1801                 # https://www.youtube.com/watch?v=yYr8q0y5Jfg,
1802                 # https://github.com/ytdl-org/youtube-dl/issues/10532)
1803                 if not video_info and args.get('ypc_vid'):
1804                     return self.url_result(
1805                         args['ypc_vid'], YoutubeIE.ie_key(), video_id=args['ypc_vid'])
1806                 if args.get('livestream') == '1' or args.get('live_playback') == 1:
1807                     is_live = True
1808                 if not player_response:
1809                     player_response = extract_player_response(args.get('player_response'), video_id)
1810             if not video_info or self._downloader.params.get('youtube_include_dash_manifest', True):
1811                 add_dash_mpd_pr(player_response)
1812
1813         if not video_info and not player_response:
1814             player_response = extract_player_response(
1815                 self._search_regex(
1816                     (r'%s\s*%s' % (self._YT_INITIAL_PLAYER_RESPONSE_RE, self._YT_INITIAL_BOUNDARY_RE),
1817                      self._YT_INITIAL_PLAYER_RESPONSE_RE), video_webpage,
1818                     'initial player response', default='{}'),
1819                 video_id)
1820
1821         def extract_unavailable_message():
1822             messages = []
1823             for tag, kind in (('h1', 'message'), ('div', 'submessage')):
1824                 msg = self._html_search_regex(
1825                     r'(?s)<{tag}[^>]+id=["\']unavailable-{kind}["\'][^>]*>(.+?)</{tag}>'.format(tag=tag, kind=kind),
1826                     video_webpage, 'unavailable %s' % kind, default=None)
1827                 if msg:
1828                     messages.append(msg)
1829             if messages:
1830                 return '\n'.join(messages)
1831
1832         if not video_info and not player_response:
1833             unavailable_message = extract_unavailable_message()
1834             if not unavailable_message:
1835                 unavailable_message = 'Unable to extract video data'
1836             raise ExtractorError(
1837                 'YouTube said: %s' % unavailable_message, expected=True, video_id=video_id)
1838
1839         if not isinstance(video_info, dict):
1840             video_info = {}
1841
1842         playable_in_embed = try_get(
1843             player_response, lambda x: x['playabilityStatus']['playableInEmbed'])
1844
1845         video_details = try_get(
1846             player_response, lambda x: x['videoDetails'], dict) or {}
1847
1848         microformat = try_get(
1849             player_response, lambda x: x['microformat']['playerMicroformatRenderer'], dict) or {}
1850
1851         video_title = video_info.get('title', [None])[0] or video_details.get('title')
1852         if not video_title:
1853             self._downloader.report_warning('Unable to extract video title')
1854             video_title = '_'
1855
1856         description_original = video_description = get_element_by_id("eow-description", video_webpage)
1857         if video_description:
1858
1859             def replace_url(m):
1860                 redir_url = compat_urlparse.urljoin(url, m.group(1))
1861                 parsed_redir_url = compat_urllib_parse_urlparse(redir_url)
1862                 if re.search(r'^(?:www\.)?(?:youtube(?:-nocookie)?\.com|youtu\.be)$', parsed_redir_url.netloc) and parsed_redir_url.path == '/redirect':
1863                     qs = compat_parse_qs(parsed_redir_url.query)
1864                     q = qs.get('q')
1865                     if q and q[0]:
1866                         return q[0]
1867                 return redir_url
1868
1869             description_original = video_description = re.sub(r'''(?x)
1870                 <a\s+
1871                     (?:[a-zA-Z-]+="[^"]*"\s+)*?
1872                     (?:title|href)="([^"]+)"\s+
1873                     (?:[a-zA-Z-]+="[^"]*"\s+)*?
1874                     class="[^"]*"[^>]*>
1875                 [^<]+\.{3}\s*
1876                 </a>
1877             ''', replace_url, video_description)
1878             video_description = clean_html(video_description)
1879         else:
1880             video_description = video_details.get('shortDescription')
1881             if video_description is None:
1882                 video_description = self._html_search_meta('description', video_webpage)
1883
1884         if not smuggled_data.get('force_singlefeed', False):
1885             if not self._downloader.params.get('noplaylist'):
1886                 multifeed_metadata_list = try_get(
1887                     player_response,
1888                     lambda x: x['multicamera']['playerLegacyMulticameraRenderer']['metadataList'],
1889                     compat_str) or try_get(
1890                     video_info, lambda x: x['multifeed_metadata_list'][0], compat_str)
1891                 if multifeed_metadata_list:
1892                     entries = []
1893                     feed_ids = []
1894                     for feed in multifeed_metadata_list.split(','):
1895                         # Unquote should take place before split on comma (,) since textual
1896                         # fields may contain comma as well (see
1897                         # https://github.com/ytdl-org/youtube-dl/issues/8536)
1898                         feed_data = compat_parse_qs(compat_urllib_parse_unquote_plus(feed))
1899
1900                         def feed_entry(name):
1901                             return try_get(feed_data, lambda x: x[name][0], compat_str)
1902
1903                         feed_id = feed_entry('id')
1904                         if not feed_id:
1905                             continue
1906                         feed_title = feed_entry('title')
1907                         title = video_title
1908                         if feed_title:
1909                             title += ' (%s)' % feed_title
1910                         entries.append({
1911                             '_type': 'url_transparent',
1912                             'ie_key': 'Youtube',
1913                             'url': smuggle_url(
1914                                 '%s://www.youtube.com/watch?v=%s' % (proto, feed_data['id'][0]),
1915                                 {'force_singlefeed': True}),
1916                             'title': title,
1917                         })
1918                         feed_ids.append(feed_id)
1919                     self.to_screen(
1920                         'Downloading multifeed video (%s) - add --no-playlist to just download video %s'
1921                         % (', '.join(feed_ids), video_id))
1922                     return self.playlist_result(entries, video_id, video_title, video_description)
1923             else:
1924                 self.to_screen('Downloading just video %s because of --no-playlist' % video_id)
1925
1926         if view_count is None:
1927             view_count = extract_view_count(video_info)
1928         if view_count is None and video_details:
1929             view_count = int_or_none(video_details.get('viewCount'))
1930         if view_count is None and microformat:
1931             view_count = int_or_none(microformat.get('viewCount'))
1932
1933         if is_live is None:
1934             is_live = bool_or_none(video_details.get('isLive'))
1935
1936         has_live_chat_replay = False
1937         if not is_live:
1938             yt_initial_data = self._extract_yt_initial_data(video_id, video_webpage)
1939             try:
1940                 yt_initial_data['contents']['twoColumnWatchNextResults']['conversationBar']['liveChatRenderer']['continuations'][0]['reloadContinuationData']['continuation']
1941                 has_live_chat_replay = True
1942             except (KeyError, IndexError, TypeError):
1943                 pass
1944
1945         # Check for "rental" videos
1946         if 'ypc_video_rental_bar_text' in video_info and 'author' not in video_info:
1947             raise ExtractorError('"rental" videos not supported. See https://github.com/ytdl-org/youtube-dl/issues/359 for more information.', expected=True)
1948
1949         def _extract_filesize(media_url):
1950             return int_or_none(self._search_regex(
1951                 r'\bclen[=/](\d+)', media_url, 'filesize', default=None))
1952
1953         streaming_formats = try_get(player_response, lambda x: x['streamingData']['formats'], list) or []
1954         streaming_formats.extend(try_get(player_response, lambda x: x['streamingData']['adaptiveFormats'], list) or [])
1955
1956         if 'conn' in video_info and video_info['conn'][0].startswith('rtmp'):
1957             self.report_rtmp_download()
1958             formats = [{
1959                 'format_id': '_rtmp',
1960                 'protocol': 'rtmp',
1961                 'url': video_info['conn'][0],
1962                 'player_url': player_url,
1963             }]
1964         elif not is_live and (streaming_formats or len(video_info.get('url_encoded_fmt_stream_map', [''])[0]) >= 1 or len(video_info.get('adaptive_fmts', [''])[0]) >= 1):
1965             encoded_url_map = video_info.get('url_encoded_fmt_stream_map', [''])[0] + ',' + video_info.get('adaptive_fmts', [''])[0]
1966             if 'rtmpe%3Dyes' in encoded_url_map:
1967                 raise ExtractorError('rtmpe downloads are not supported, see https://github.com/ytdl-org/youtube-dl/issues/343 for more information.', expected=True)
1968             formats = []
1969             formats_spec = {}
1970             fmt_list = video_info.get('fmt_list', [''])[0]
1971             if fmt_list:
1972                 for fmt in fmt_list.split(','):
1973                     spec = fmt.split('/')
1974                     if len(spec) > 1:
1975                         width_height = spec[1].split('x')
1976                         if len(width_height) == 2:
1977                             formats_spec[spec[0]] = {
1978                                 'resolution': spec[1],
1979                                 'width': int_or_none(width_height[0]),
1980                                 'height': int_or_none(width_height[1]),
1981                             }
1982             for fmt in streaming_formats:
1983                 itag = str_or_none(fmt.get('itag'))
1984                 if not itag:
1985                     continue
1986                 quality = fmt.get('quality')
1987                 quality_label = fmt.get('qualityLabel') or quality
1988                 formats_spec[itag] = {
1989                     'asr': int_or_none(fmt.get('audioSampleRate')),
1990                     'filesize': int_or_none(fmt.get('contentLength')),
1991                     'format_note': quality_label,
1992                     'fps': int_or_none(fmt.get('fps')),
1993                     'height': int_or_none(fmt.get('height')),
1994                     # bitrate for itag 43 is always 2147483647
1995                     'tbr': float_or_none(fmt.get('averageBitrate') or fmt.get('bitrate'), 1000) if itag != '43' else None,
1996                     'width': int_or_none(fmt.get('width')),
1997                 }
1998
1999             for fmt in streaming_formats:
2000                 if fmt.get('drmFamilies') or fmt.get('drm_families'):
2001                     continue
2002                 url = url_or_none(fmt.get('url'))
2003
2004                 if not url:
2005                     cipher = fmt.get('cipher') or fmt.get('signatureCipher')
2006                     if not cipher:
2007                         continue
2008                     url_data = compat_parse_qs(cipher)
2009                     url = url_or_none(try_get(url_data, lambda x: x['url'][0], compat_str))
2010                     if not url:
2011                         continue
2012                 else:
2013                     cipher = None
2014                     url_data = compat_parse_qs(compat_urllib_parse_urlparse(url).query)
2015
2016                 stream_type = int_or_none(try_get(url_data, lambda x: x['stream_type'][0]))
2017                 # Unsupported FORMAT_STREAM_TYPE_OTF
2018                 if stream_type == 3:
2019                     continue
2020
2021                 format_id = fmt.get('itag') or url_data['itag'][0]
2022                 if not format_id:
2023                     continue
2024                 format_id = compat_str(format_id)
2025
2026                 if cipher:
2027                     if 's' in url_data or self._downloader.params.get('youtube_include_dash_manifest', True):
2028                         ASSETS_RE = (
2029                             r'<script[^>]+\bsrc=("[^"]+")[^>]+\bname=["\']player_ias/base',
2030                             r'"jsUrl"\s*:\s*("[^"]+")',
2031                             r'"assets":.+?"js":\s*("[^"]+")')
2032                         jsplayer_url_json = self._search_regex(
2033                             ASSETS_RE,
2034                             embed_webpage if age_gate else video_webpage,
2035                             'JS player URL (1)', default=None)
2036                         if not jsplayer_url_json and not age_gate:
2037                             # We need the embed website after all
2038                             if embed_webpage is None:
2039                                 embed_url = proto + '://www.youtube.com/embed/%s' % video_id
2040                                 embed_webpage = self._download_webpage(
2041                                     embed_url, video_id, 'Downloading embed webpage')
2042                             jsplayer_url_json = self._search_regex(
2043                                 ASSETS_RE, embed_webpage, 'JS player URL')
2044
2045                         player_url = json.loads(jsplayer_url_json)
2046                         if player_url is None:
2047                             player_url_json = self._search_regex(
2048                                 r'ytplayer\.config.*?"url"\s*:\s*("[^"]+")',
2049                                 video_webpage, 'age gate player URL')
2050                             player_url = json.loads(player_url_json)
2051
2052                     if 'sig' in url_data:
2053                         url += '&signature=' + url_data['sig'][0]
2054                     elif 's' in url_data:
2055                         encrypted_sig = url_data['s'][0]
2056
2057                         if self._downloader.params.get('verbose'):
2058                             if player_url is None:
2059                                 player_desc = 'unknown'
2060                             else:
2061                                 player_type, player_version = self._extract_player_info(player_url)
2062                                 player_desc = '%s player %s' % ('flash' if player_type == 'swf' else 'html5', player_version)
2063                             parts_sizes = self._signature_cache_id(encrypted_sig)
2064                             self.to_screen('{%s} signature length %s, %s' %
2065                                            (format_id, parts_sizes, player_desc))
2066
2067                         signature = self._decrypt_signature(
2068                             encrypted_sig, video_id, player_url, age_gate)
2069                         sp = try_get(url_data, lambda x: x['sp'][0], compat_str) or 'signature'
2070                         url += '&%s=%s' % (sp, signature)
2071                 if 'ratebypass' not in url:
2072                     url += '&ratebypass=yes'
2073
2074                 dct = {
2075                     'format_id': format_id,
2076                     'url': url,
2077                     'player_url': player_url,
2078                 }
2079                 if format_id in self._formats:
2080                     dct.update(self._formats[format_id])
2081                 if format_id in formats_spec:
2082                     dct.update(formats_spec[format_id])
2083
2084                 # Some itags are not included in DASH manifest thus corresponding formats will
2085                 # lack metadata (see https://github.com/ytdl-org/youtube-dl/pull/5993).
2086                 # Trying to extract metadata from url_encoded_fmt_stream_map entry.
2087                 mobj = re.search(r'^(?P<width>\d+)[xX](?P<height>\d+)$', url_data.get('size', [''])[0])
2088                 width, height = (int(mobj.group('width')), int(mobj.group('height'))) if mobj else (None, None)
2089
2090                 if width is None:
2091                     width = int_or_none(fmt.get('width'))
2092                 if height is None:
2093                     height = int_or_none(fmt.get('height'))
2094
2095                 filesize = int_or_none(url_data.get(
2096                     'clen', [None])[0]) or _extract_filesize(url)
2097
2098                 quality = url_data.get('quality', [None])[0] or fmt.get('quality')
2099                 quality_label = url_data.get('quality_label', [None])[0] or fmt.get('qualityLabel')
2100
2101                 tbr = (float_or_none(url_data.get('bitrate', [None])[0], 1000)
2102                        or float_or_none(fmt.get('bitrate'), 1000)) if format_id != '43' else None
2103                 fps = int_or_none(url_data.get('fps', [None])[0]) or int_or_none(fmt.get('fps'))
2104
2105                 more_fields = {
2106                     'filesize': filesize,
2107                     'tbr': tbr,
2108                     'width': width,
2109                     'height': height,
2110                     'fps': fps,
2111                     'format_note': quality_label or quality,
2112                 }
2113                 for key, value in more_fields.items():
2114                     if value:
2115                         dct[key] = value
2116                 type_ = url_data.get('type', [None])[0] or fmt.get('mimeType')
2117                 if type_:
2118                     type_split = type_.split(';')
2119                     kind_ext = type_split[0].split('/')
2120                     if len(kind_ext) == 2:
2121                         kind, _ = kind_ext
2122                         dct['ext'] = mimetype2ext(type_split[0])
2123                         if kind in ('audio', 'video'):
2124                             codecs = None
2125                             for mobj in re.finditer(
2126                                     r'(?P<key>[a-zA-Z_-]+)=(?P<quote>["\']?)(?P<val>.+?)(?P=quote)(?:;|$)', type_):
2127                                 if mobj.group('key') == 'codecs':
2128                                     codecs = mobj.group('val')
2129                                     break
2130                             if codecs:
2131                                 dct.update(parse_codecs(codecs))
2132                 if dct.get('acodec') == 'none' or dct.get('vcodec') == 'none':
2133                     dct['downloader_options'] = {
2134                         # Youtube throttles chunks >~10M
2135                         'http_chunk_size': 10485760,
2136                     }
2137                 formats.append(dct)
2138         else:
2139             manifest_url = (
2140                 url_or_none(try_get(
2141                     player_response,
2142                     lambda x: x['streamingData']['hlsManifestUrl'],
2143                     compat_str))
2144                 or url_or_none(try_get(
2145                     video_info, lambda x: x['hlsvp'][0], compat_str)))
2146             if manifest_url:
2147                 formats = []
2148                 m3u8_formats = self._extract_m3u8_formats(
2149                     manifest_url, video_id, 'mp4', fatal=False)
2150                 for a_format in m3u8_formats:
2151                     itag = self._search_regex(
2152                         r'/itag/(\d+)/', a_format['url'], 'itag', default=None)
2153                     if itag:
2154                         a_format['format_id'] = itag
2155                         if itag in self._formats:
2156                             dct = self._formats[itag].copy()
2157                             dct.update(a_format)
2158                             a_format = dct
2159                     a_format['player_url'] = player_url
2160                     # Accept-Encoding header causes failures in live streams on Youtube and Youtube Gaming
2161                     a_format.setdefault('http_headers', {})['Youtubedl-no-compression'] = 'True'
2162                     if self._downloader.params.get('youtube_include_hls_manifest', True):
2163                         formats.append(a_format)
2164             else:
2165                 error_message = extract_unavailable_message()
2166                 if not error_message:
2167                     reason_list = try_get(
2168                         player_response,
2169                         lambda x: x['playabilityStatus']['errorScreen']['playerErrorMessageRenderer']['subreason']['runs'],
2170                         list) or []
2171                     for reason in reason_list:
2172                         if not isinstance(reason, dict):
2173                             continue
2174                         reason_text = try_get(reason, lambda x: x['text'], compat_str)
2175                         if reason_text:
2176                             if not error_message:
2177                                 error_message = ''
2178                             error_message += reason_text
2179                     if error_message:
2180                         error_message = clean_html(error_message)
2181                 if not error_message:
2182                     error_message = clean_html(try_get(
2183                         player_response, lambda x: x['playabilityStatus']['reason'],
2184                         compat_str))
2185                 if not error_message:
2186                     error_message = clean_html(
2187                         try_get(video_info, lambda x: x['reason'][0], compat_str))
2188                 if error_message:
2189                     raise ExtractorError(error_message, expected=True)
2190                 raise ExtractorError('no conn, hlsvp, hlsManifestUrl or url_encoded_fmt_stream_map information found in video info')
2191
2192         # uploader
2193         video_uploader = try_get(
2194             video_info, lambda x: x['author'][0],
2195             compat_str) or str_or_none(video_details.get('author'))
2196         if video_uploader:
2197             video_uploader = compat_urllib_parse_unquote_plus(video_uploader)
2198         else:
2199             self._downloader.report_warning('unable to extract uploader name')
2200
2201         # uploader_id
2202         video_uploader_id = None
2203         video_uploader_url = None
2204         mobj = re.search(
2205             r'<link itemprop="url" href="(?P<uploader_url>https?://www\.youtube\.com/(?:user|channel)/(?P<uploader_id>[^"]+))">',
2206             video_webpage)
2207         if mobj is not None:
2208             video_uploader_id = mobj.group('uploader_id')
2209             video_uploader_url = mobj.group('uploader_url')
2210         else:
2211             owner_profile_url = url_or_none(microformat.get('ownerProfileUrl'))
2212             if owner_profile_url:
2213                 video_uploader_id = self._search_regex(
2214                     r'(?:user|channel)/([^/]+)', owner_profile_url, 'uploader id',
2215                     default=None)
2216                 video_uploader_url = owner_profile_url
2217
2218         channel_id = (
2219             str_or_none(video_details.get('channelId'))
2220             or self._html_search_meta(
2221                 'channelId', video_webpage, 'channel id', default=None)
2222             or self._search_regex(
2223                 r'data-channel-external-id=(["\'])(?P<id>(?:(?!\1).)+)\1',
2224                 video_webpage, 'channel id', default=None, group='id'))
2225         channel_url = 'http://www.youtube.com/channel/%s' % channel_id if channel_id else None
2226
2227         thumbnails = []
2228         thumbnails_list = try_get(
2229             video_details, lambda x: x['thumbnail']['thumbnails'], list) or []
2230         for t in thumbnails_list:
2231             if not isinstance(t, dict):
2232                 continue
2233             thumbnail_url = url_or_none(t.get('url'))
2234             if not thumbnail_url:
2235                 continue
2236             thumbnails.append({
2237                 'url': thumbnail_url,
2238                 'width': int_or_none(t.get('width')),
2239                 'height': int_or_none(t.get('height')),
2240             })
2241
2242         if not thumbnails:
2243             video_thumbnail = None
2244             # We try first to get a high quality image:
2245             m_thumb = re.search(r'<span itemprop="thumbnail".*?href="(.*?)">',
2246                                 video_webpage, re.DOTALL)
2247             if m_thumb is not None:
2248                 video_thumbnail = m_thumb.group(1)
2249             thumbnail_url = try_get(video_info, lambda x: x['thumbnail_url'][0], compat_str)
2250             if thumbnail_url:
2251                 video_thumbnail = compat_urllib_parse_unquote_plus(thumbnail_url)
2252             if video_thumbnail:
2253                 thumbnails.append({'url': video_thumbnail})
2254
2255         # upload date
2256         upload_date = self._html_search_meta(
2257             'datePublished', video_webpage, 'upload date', default=None)
2258         if not upload_date:
2259             upload_date = self._search_regex(
2260                 [r'(?s)id="eow-date.*?>(.*?)</span>',
2261                  r'(?:id="watch-uploader-info".*?>.*?|["\']simpleText["\']\s*:\s*["\'])(?:Published|Uploaded|Streamed live|Started) on (.+?)[<"\']'],
2262                 video_webpage, 'upload date', default=None)
2263         if not upload_date:
2264             upload_date = microformat.get('publishDate') or microformat.get('uploadDate')
2265         upload_date = unified_strdate(upload_date)
2266
2267         video_license = self._html_search_regex(
2268             r'<h4[^>]+class="title"[^>]*>\s*License\s*</h4>\s*<ul[^>]*>\s*<li>(.+?)</li',
2269             video_webpage, 'license', default=None)
2270
2271         m_music = re.search(
2272             r'''(?x)
2273                 <h4[^>]+class="title"[^>]*>\s*Music\s*</h4>\s*
2274                 <ul[^>]*>\s*
2275                 <li>(?P<title>.+?)
2276                 by (?P<creator>.+?)
2277                 (?:
2278                     \(.+?\)|
2279                     <a[^>]*
2280                         (?:
2281                             \bhref=["\']/red[^>]*>|             # drop possible
2282                             >\s*Listen ad-free with YouTube Red # YouTube Red ad
2283                         )
2284                     .*?
2285                 )?</li
2286             ''',
2287             video_webpage)
2288         if m_music:
2289             video_alt_title = remove_quotes(unescapeHTML(m_music.group('title')))
2290             video_creator = clean_html(m_music.group('creator'))
2291         else:
2292             video_alt_title = video_creator = None
2293
2294         def extract_meta(field):
2295             return self._html_search_regex(
2296                 r'<h4[^>]+class="title"[^>]*>\s*%s\s*</h4>\s*<ul[^>]*>\s*<li>(.+?)</li>\s*' % field,
2297                 video_webpage, field, default=None)
2298
2299         track = extract_meta('Song')
2300         artist = extract_meta('Artist')
2301         album = extract_meta('Album')
2302
2303         # Youtube Music Auto-generated description
2304         release_date = release_year = None
2305         if video_description:
2306             mobj = re.search(r'(?s)(?P<track>[^·\n]+)·(?P<artist>[^\n]+)\n+(?P<album>[^\n]+)(?:.+?℗\s*(?P<release_year>\d{4})(?!\d))?(?:.+?Released on\s*:\s*(?P<release_date>\d{4}-\d{2}-\d{2}))?(.+?\nArtist\s*:\s*(?P<clean_artist>[^\n]+))?.+\nAuto-generated by YouTube\.\s*$', video_description)
2307             if mobj:
2308                 if not track:
2309                     track = mobj.group('track').strip()
2310                 if not artist:
2311                     artist = mobj.group('clean_artist') or ', '.join(a.strip() for a in mobj.group('artist').split('·'))
2312                 if not album:
2313                     album = mobj.group('album'.strip())
2314                 release_year = mobj.group('release_year')
2315                 release_date = mobj.group('release_date')
2316                 if release_date:
2317                     release_date = release_date.replace('-', '')
2318                     if not release_year:
2319                         release_year = int(release_date[:4])
2320                 if release_year:
2321                     release_year = int(release_year)
2322
2323         yt_initial_data = self._extract_yt_initial_data(video_id, video_webpage)
2324         contents = try_get(yt_initial_data, lambda x: x['contents']['twoColumnWatchNextResults']['results']['results']['contents'], list) or []
2325         for content in contents:
2326             rows = try_get(content, lambda x: x['videoSecondaryInfoRenderer']['metadataRowContainer']['metadataRowContainerRenderer']['rows'], list) or []
2327             multiple_songs = False
2328             for row in rows:
2329                 if try_get(row, lambda x: x['metadataRowRenderer']['hasDividerLine']) is True:
2330                     multiple_songs = True
2331                     break
2332             for row in rows:
2333                 mrr = row.get('metadataRowRenderer') or {}
2334                 mrr_title = try_get(
2335                     mrr, lambda x: x['title']['simpleText'], compat_str)
2336                 mrr_contents = try_get(
2337                     mrr, lambda x: x['contents'][0], dict) or {}
2338                 mrr_contents_text = try_get(mrr_contents, [lambda x: x['simpleText'], lambda x: x['runs'][0]['text']], compat_str)
2339                 if not (mrr_title and mrr_contents_text):
2340                     continue
2341                 if mrr_title == 'License':
2342                     video_license = mrr_contents_text
2343                 elif not multiple_songs:
2344                     if mrr_title == 'Album':
2345                         album = mrr_contents_text
2346                     elif mrr_title == 'Artist':
2347                         artist = mrr_contents_text
2348                     elif mrr_title == 'Song':
2349                         track = mrr_contents_text
2350
2351         m_episode = re.search(
2352             r'<div[^>]+id="watch7-headline"[^>]*>\s*<span[^>]*>.*?>(?P<series>[^<]+)</a></b>\s*S(?P<season>\d+)\s*•\s*E(?P<episode>\d+)</span>',
2353             video_webpage)
2354         if m_episode:
2355             series = unescapeHTML(m_episode.group('series'))
2356             season_number = int(m_episode.group('season'))
2357             episode_number = int(m_episode.group('episode'))
2358         else:
2359             series = season_number = episode_number = None
2360
2361         m_cat_container = self._search_regex(
2362             r'(?s)<h4[^>]*>\s*Category\s*</h4>\s*<ul[^>]*>(.*?)</ul>',
2363             video_webpage, 'categories', default=None)
2364         category = None
2365         if m_cat_container:
2366             category = self._html_search_regex(
2367                 r'(?s)<a[^<]+>(.*?)</a>', m_cat_container, 'category',
2368                 default=None)
2369         if not category:
2370             category = try_get(
2371                 microformat, lambda x: x['category'], compat_str)
2372         video_categories = None if category is None else [category]
2373
2374         video_tags = [
2375             unescapeHTML(m.group('content'))
2376             for m in re.finditer(self._meta_regex('og:video:tag'), video_webpage)]
2377         if not video_tags:
2378             video_tags = try_get(video_details, lambda x: x['keywords'], list)
2379
2380         def _extract_count(count_name):
2381             return str_to_int(self._search_regex(
2382                 (r'-%s-button[^>]+><span[^>]+class="yt-uix-button-content"[^>]*>([\d,]+)</span>' % re.escape(count_name),
2383                  r'["\']label["\']\s*:\s*["\']([\d,.]+)\s+%ss["\']' % re.escape(count_name)),
2384                 video_webpage, count_name, default=None))
2385
2386         like_count = _extract_count('like')
2387         dislike_count = _extract_count('dislike')
2388
2389         if view_count is None:
2390             view_count = str_to_int(self._search_regex(
2391                 r'<[^>]+class=["\']watch-view-count[^>]+>\s*([\d,\s]+)', video_webpage,
2392                 'view count', default=None))
2393
2394         average_rating = (
2395             float_or_none(video_details.get('averageRating'))
2396             or try_get(video_info, lambda x: float_or_none(x['avg_rating'][0])))
2397
2398         # subtitles
2399         video_subtitles = self.extract_subtitles(
2400             video_id, video_webpage, has_live_chat_replay)
2401         automatic_captions = self.extract_automatic_captions(video_id, player_response, ytplayer_config)
2402
2403         video_duration = try_get(
2404             video_info, lambda x: int_or_none(x['length_seconds'][0]))
2405         if not video_duration:
2406             video_duration = int_or_none(video_details.get('lengthSeconds'))
2407         if not video_duration:
2408             video_duration = parse_duration(self._html_search_meta(
2409                 'duration', video_webpage, 'video duration'))
2410
2411         # Get Subscriber Count of channel
2412         subscriber_count = parse_count(self._search_regex(
2413             r'"text":"([\d\.]+\w?) subscribers"',
2414             video_webpage,
2415             'subscriber count',
2416             default=None
2417         ))
2418
2419         # get xsrf for annotations or comments
2420         get_annotations = self._downloader.params.get('writeannotations', False)
2421         get_comments = self._downloader.params.get('getcomments', False)
2422         if get_annotations or get_comments:
2423             xsrf_token = None
2424             ytcfg = self._extract_ytcfg(video_id, video_webpage)
2425             if ytcfg:
2426                 xsrf_token = try_get(ytcfg, lambda x: x['XSRF_TOKEN'], compat_str)
2427             if not xsrf_token:
2428                 xsrf_token = self._search_regex(
2429                     r'([\'"])XSRF_TOKEN\1\s*:\s*([\'"])(?P<xsrf_token>(?:(?!\2).)+)\2',
2430                     video_webpage, 'xsrf token', group='xsrf_token', fatal=False)
2431
2432         # annotations
2433         video_annotations = None
2434         if get_annotations:
2435             invideo_url = try_get(
2436                 player_response, lambda x: x['annotations'][0]['playerAnnotationsUrlsRenderer']['invideoUrl'], compat_str)
2437             if xsrf_token and invideo_url:
2438                 xsrf_field_name = None
2439                 if ytcfg:
2440                     xsrf_field_name = try_get(ytcfg, lambda x: x['XSRF_FIELD_NAME'], compat_str)
2441                 if not xsrf_field_name:
2442                     xsrf_field_name = self._search_regex(
2443                         r'([\'"])XSRF_FIELD_NAME\1\s*:\s*([\'"])(?P<xsrf_field_name>\w+)\2',
2444                         video_webpage, 'xsrf field name',
2445                         group='xsrf_field_name', default='session_token')
2446                 video_annotations = self._download_webpage(
2447                     self._proto_relative_url(invideo_url),
2448                     video_id, note='Downloading annotations',
2449                     errnote='Unable to download video annotations', fatal=False,
2450                     data=urlencode_postdata({xsrf_field_name: xsrf_token}))
2451
2452         chapters = self._extract_chapters(video_webpage, description_original, video_id, video_duration)
2453
2454         # Get comments
2455         # TODO: Refactor and move to seperate function
2456         if get_comments:
2457             expected_video_comment_count = 0
2458             video_comments = []
2459
2460             def find_value(html, key, num_chars=2, separator='"'):
2461                 pos_begin = html.find(key) + len(key) + num_chars
2462                 pos_end = html.find(separator, pos_begin)
2463                 return html[pos_begin: pos_end]
2464
2465             def search_dict(partial, key):
2466                 if isinstance(partial, dict):
2467                     for k, v in partial.items():
2468                         if k == key:
2469                             yield v
2470                         else:
2471                             for o in search_dict(v, key):
2472                                 yield o
2473                 elif isinstance(partial, list):
2474                     for i in partial:
2475                         for o in search_dict(i, key):
2476                             yield o
2477
2478             try:
2479                 ncd = next(search_dict(yt_initial_data, 'nextContinuationData'))
2480                 continuations = [ncd['continuation']]
2481             # Handle videos where comments have been disabled entirely
2482             except StopIteration:
2483                 continuations = []
2484
2485             def get_continuation(continuation, session_token, replies=False):
2486                 query = {
2487                     'pbj': 1,
2488                     'ctoken': continuation,
2489                 }
2490                 if replies:
2491                     query['action_get_comment_replies'] = 1
2492                 else:
2493                     query['action_get_comments'] = 1
2494
2495                 while True:
2496                     content, handle = self._download_webpage_handle(
2497                         'https://www.youtube.com/comment_service_ajax',
2498                         video_id,
2499                         note=False,
2500                         expected_status=[413],
2501                         data=urlencode_postdata({
2502                             'session_token': session_token
2503                         }),
2504                         query=query,
2505                         headers={
2506                             'Accept': '*/*',
2507                             'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:76.0) Gecko/20100101 Firefox/76.0',
2508                             'X-YouTube-Client-Name': '1',
2509                             'X-YouTube-Client-Version': '2.20201202.06.01'
2510                         }
2511                     )
2512
2513                     response_code = handle.getcode()
2514                     if (response_code == 200):
2515                         return self._parse_json(content, video_id)
2516                     if (response_code == 413):
2517                         return None
2518                     raise ExtractorError('Unexpected HTTP error code: %s' % response_code)
2519
2520             first_continuation = True
2521             while continuations:
2522                 continuation, itct = continuations.pop()
2523                 comment_response = get_continuation(continuation, xsrf_token)
2524                 if not comment_response:
2525                     continue
2526                 if list(search_dict(comment_response, 'externalErrorMessage')):
2527                     raise ExtractorError('Error returned from server: ' + next(search_dict(comment_response, 'externalErrorMessage')))
2528
2529                 if 'continuationContents' not in comment_response['response']:
2530                     # Something is wrong here. Youtube won't accept this continuation token for some reason and responds with a user satisfaction dialog (error?)
2531                     continue
2532                 # not sure if this actually helps
2533                 if 'xsrf_token' in comment_response:
2534                     xsrf_token = comment_response['xsrf_token']
2535
2536                 item_section = comment_response['response']['continuationContents']['itemSectionContinuation']
2537                 if first_continuation:
2538                     expected_video_comment_count = int(item_section['header']['commentsHeaderRenderer']['countText']['runs'][0]['text'].replace(' Comments', '').replace('1 Comment', '1').replace(',', ''))
2539                     first_continuation = False
2540                 if 'contents' not in item_section:
2541                     # continuation returned no comments?
2542                     # set an empty array as to not break the for loop
2543                     item_section['contents'] = []
2544
2545                 for meta_comment in item_section['contents']:
2546                     comment = meta_comment['commentThreadRenderer']['comment']['commentRenderer']
2547                     video_comments.append({
2548                         'id': comment['commentId'],
2549                         'text': ''.join([c['text'] for c in comment['contentText']['runs']]),
2550                         'time_text': ''.join([c['text'] for c in comment['publishedTimeText']['runs']]),
2551                         'author': comment.get('authorText', {}).get('simpleText', ''),
2552                         'votes': comment.get('voteCount', {}).get('simpleText', '0'),
2553                         'author_thumbnail': comment['authorThumbnail']['thumbnails'][-1]['url'],
2554                         'parent': 'root'
2555                     })
2556                     if 'replies' not in meta_comment['commentThreadRenderer']:
2557                         continue
2558
2559                     reply_continuations = [rcn['nextContinuationData']['continuation'] for rcn in meta_comment['commentThreadRenderer']['replies']['commentRepliesRenderer']['continuations']]
2560                     while reply_continuations:
2561                         time.sleep(1)
2562                         continuation = reply_continuations.pop()
2563                         replies_data = get_continuation(continuation, xsrf_token, True)
2564                         if not replies_data or 'continuationContents' not in replies_data[1]['response']:
2565                             continue
2566
2567                         if self._downloader.params.get('verbose', False):
2568                             self.to_screen('[debug] Comments downloaded (chain %s) %s of ~%s' % (comment['commentId'], len(video_comments), expected_video_comment_count))
2569                         reply_comment_meta = replies_data[1]['response']['continuationContents']['commentRepliesContinuation']
2570                         for reply_meta in replies_data[1]['response']['continuationContents']['commentRepliesContinuation']['contents']:
2571                             reply_comment = reply_meta['commentRenderer']
2572                             video_comments.append({
2573                                 'id': reply_comment['commentId'],
2574                                 'text': ''.join([c['text'] for c in reply_comment['contentText']['runs']]),
2575                                 'time_text': ''.join([c['text'] for c in reply_comment['publishedTimeText']['runs']]),
2576                                 'author': reply_comment.get('authorText', {}).get('simpleText', ''),
2577                                 'votes': reply_comment.get('voteCount', {}).get('simpleText', '0'),
2578                                 'author_thumbnail': reply_comment['authorThumbnail']['thumbnails'][-1]['url'],
2579                                 'parent': comment['commentId']
2580                             })
2581                         if 'continuations' not in reply_comment_meta or len(reply_comment_meta['continuations']) == 0:
2582                             continue
2583
2584                         reply_continuations += [rcn['nextContinuationData']['continuation'] for rcn in reply_comment_meta['continuations']]
2585
2586                 self.to_screen('Comments downloaded %s of ~%s' % (len(video_comments), expected_video_comment_count))
2587
2588                 if 'continuations' in item_section:
2589                     continuations += [ncd['nextContinuationData']['continuation'] for ncd in item_section['continuations']]
2590                 time.sleep(1)
2591
2592             self.to_screen('Total comments downloaded %s of ~%s' % (len(video_comments), expected_video_comment_count))
2593         else:
2594             expected_video_comment_count = None
2595             video_comments = None
2596
2597         # Look for the DASH manifest
2598         if self._downloader.params.get('youtube_include_dash_manifest', True):
2599             dash_mpd_fatal = True
2600             for mpd_url in dash_mpds:
2601                 dash_formats = {}
2602                 try:
2603                     def decrypt_sig(mobj):
2604                         s = mobj.group(1)
2605                         dec_s = self._decrypt_signature(s, video_id, player_url, age_gate)
2606                         return '/signature/%s' % dec_s
2607
2608                     mpd_url = re.sub(r'/s/([a-fA-F0-9\.]+)', decrypt_sig, mpd_url)
2609
2610                     for df in self._extract_mpd_formats(
2611                             mpd_url, video_id, fatal=dash_mpd_fatal,
2612                             formats_dict=self._formats):
2613                         if not df.get('filesize'):
2614                             df['filesize'] = _extract_filesize(df['url'])
2615                         # Do not overwrite DASH format found in some previous DASH manifest
2616                         if df['format_id'] not in dash_formats:
2617                             dash_formats[df['format_id']] = df
2618                         # Additional DASH manifests may end up in HTTP Error 403 therefore
2619                         # allow them to fail without bug report message if we already have
2620                         # some DASH manifest succeeded. This is temporary workaround to reduce
2621                         # burst of bug reports until we figure out the reason and whether it
2622                         # can be fixed at all.
2623                         dash_mpd_fatal = False
2624                 except (ExtractorError, KeyError) as e:
2625                     self.report_warning(
2626                         'Skipping DASH manifest: %r' % e, video_id)
2627                 if dash_formats:
2628                     # Remove the formats we found through non-DASH, they
2629                     # contain less info and it can be wrong, because we use
2630                     # fixed values (for example the resolution). See
2631                     # https://github.com/ytdl-org/youtube-dl/issues/5774 for an
2632                     # example.
2633                     formats = [f for f in formats if f['format_id'] not in dash_formats.keys()]
2634                     formats.extend(dash_formats.values())
2635
2636         # Check for malformed aspect ratio
2637         stretched_m = re.search(
2638             r'<meta\s+property="og:video:tag".*?content="yt:stretch=(?P<w>[0-9]+):(?P<h>[0-9]+)">',
2639             video_webpage)
2640         if stretched_m:
2641             w = float(stretched_m.group('w'))
2642             h = float(stretched_m.group('h'))
2643             # yt:stretch may hold invalid ratio data (e.g. for Q39EVAstoRM ratio is 17:0).
2644             # We will only process correct ratios.
2645             if w > 0 and h > 0:
2646                 ratio = w / h
2647                 for f in formats:
2648                     if f.get('vcodec') != 'none':
2649                         f['stretched_ratio'] = ratio
2650
2651         if not formats:
2652             if 'reason' in video_info:
2653                 if 'The uploader has not made this video available in your country.' in video_info['reason']:
2654                     regions_allowed = self._html_search_meta(
2655                         'regionsAllowed', video_webpage, default=None)
2656                     countries = regions_allowed.split(',') if regions_allowed else None
2657                     self.raise_geo_restricted(
2658                         msg=video_info['reason'][0], countries=countries)
2659                 reason = video_info['reason'][0]
2660                 if 'Invalid parameters' in reason:
2661                     unavailable_message = extract_unavailable_message()
2662                     if unavailable_message:
2663                         reason = unavailable_message
2664                 raise ExtractorError(
2665                     'YouTube said: %s' % reason,
2666                     expected=True, video_id=video_id)
2667             if video_info.get('license_info') or try_get(player_response, lambda x: x['streamingData']['licenseInfos']):
2668                 raise ExtractorError('This video is DRM protected.', expected=True)
2669
2670         self._sort_formats(formats)
2671
2672         self.mark_watched(video_id, video_info, player_response)
2673
2674         return {
2675             'id': video_id,
2676             'uploader': video_uploader,
2677             'uploader_id': video_uploader_id,
2678             'uploader_url': video_uploader_url,
2679             'channel': video_uploader,
2680             'channel_id': channel_id,
2681             'channel_url': channel_url,
2682             'upload_date': upload_date,
2683             'license': video_license,
2684             'creator': video_creator or artist,
2685             'title': video_title,
2686             'alt_title': video_alt_title or track,
2687             'thumbnails': thumbnails,
2688             'description': video_description,
2689             'categories': video_categories,
2690             'tags': video_tags,
2691             'subtitles': video_subtitles,
2692             'automatic_captions': automatic_captions,
2693             'duration': video_duration,
2694             'age_limit': 18 if age_gate else 0,
2695             'annotations': video_annotations,
2696             'chapters': chapters,
2697             'webpage_url': proto + '://www.youtube.com/watch?v=%s' % video_id,
2698             'view_count': view_count,
2699             'like_count': like_count,
2700             'dislike_count': dislike_count,
2701             'average_rating': average_rating,
2702             'formats': formats,
2703             'is_live': is_live,
2704             'start_time': start_time,
2705             'end_time': end_time,
2706             'series': series,
2707             'season_number': season_number,
2708             'episode_number': episode_number,
2709             'track': track,
2710             'artist': artist,
2711             'album': album,
2712             'release_date': release_date,
2713             'release_year': release_year,
2714             'subscriber_count': subscriber_count,
2715             'playable_in_embed': playable_in_embed,
2716             'comments': video_comments,
2717             'comment_count': expected_video_comment_count,
2718         }
2719
2720
2721 class YoutubeTabIE(YoutubeBaseInfoExtractor):
2722     IE_DESC = 'YouTube.com tab'
2723     _VALID_URL = r'''(?x)
2724                     https?://
2725                         (?:\w+\.)?
2726                         (?:
2727                             youtube(?:kids)?\.com|
2728                             invidio\.us
2729                         )/
2730                         (?:
2731                             (?:channel|c|user)/|
2732                             (?P<not_channel>
2733                                 feed/|
2734                                 (?:playlist|watch)\?.*?\blist=
2735                             )|
2736                             (?!(?:%s)\b)  # Direct URLs
2737                         )
2738                         (?P<id>[^/?\#&]+)
2739                     ''' % YoutubeBaseInfoExtractor._RESERVED_NAMES
2740     IE_NAME = 'youtube:tab'
2741
2742     _TESTS = [{
2743         # playlists, multipage
2744         'url': 'https://www.youtube.com/c/ИгорьКлейнер/playlists?view=1&flow=grid',
2745         'playlist_mincount': 94,
2746         'info_dict': {
2747             'id': 'UCqj7Cz7revf5maW9g5pgNcg',
2748             'title': 'Игорь Клейнер - Playlists',
2749             'description': 'md5:be97ee0f14ee314f1f002cf187166ee2',
2750         },
2751     }, {
2752         # playlists, multipage, different order
2753         'url': 'https://www.youtube.com/user/igorkle1/playlists?view=1&sort=dd',
2754         'playlist_mincount': 94,
2755         'info_dict': {
2756             'id': 'UCqj7Cz7revf5maW9g5pgNcg',
2757             'title': 'Игорь Клейнер - Playlists',
2758             'description': 'md5:be97ee0f14ee314f1f002cf187166ee2',
2759         },
2760     }, {
2761         # playlists, singlepage
2762         'url': 'https://www.youtube.com/user/ThirstForScience/playlists',
2763         'playlist_mincount': 4,
2764         'info_dict': {
2765             'id': 'UCAEtajcuhQ6an9WEzY9LEMQ',
2766             'title': 'ThirstForScience - Playlists',
2767             'description': 'md5:609399d937ea957b0f53cbffb747a14c',
2768         }
2769     }, {
2770         'url': 'https://www.youtube.com/c/ChristophLaimer/playlists',
2771         'only_matching': True,
2772     }, {
2773         # basic, single video playlist
2774         'url': 'https://www.youtube.com/playlist?list=PL4lCao7KL_QFVb7Iudeipvc2BCavECqzc',
2775         'info_dict': {
2776             'uploader_id': 'UCmlqkdCBesrv2Lak1mF_MxA',
2777             'uploader': 'Sergey M.',
2778             'id': 'PL4lCao7KL_QFVb7Iudeipvc2BCavECqzc',
2779             'title': 'youtube-dl public playlist',
2780         },
2781         'playlist_count': 1,
2782     }, {
2783         # empty playlist
2784         'url': 'https://www.youtube.com/playlist?list=PL4lCao7KL_QFodcLWhDpGCYnngnHtQ-Xf',
2785         'info_dict': {
2786             'uploader_id': 'UCmlqkdCBesrv2Lak1mF_MxA',
2787             'uploader': 'Sergey M.',
2788             'id': 'PL4lCao7KL_QFodcLWhDpGCYnngnHtQ-Xf',
2789             'title': 'youtube-dl empty playlist',
2790         },
2791         'playlist_count': 0,
2792     }, {
2793         # Home tab
2794         'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/featured',
2795         'info_dict': {
2796             'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
2797             'title': 'lex will - Home',
2798             'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488',
2799         },
2800         'playlist_mincount': 2,
2801     }, {
2802         # Videos tab
2803         'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/videos',
2804         'info_dict': {
2805             'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
2806             'title': 'lex will - Videos',
2807             'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488',
2808         },
2809         'playlist_mincount': 975,
2810     }, {
2811         # Videos tab, sorted by popular
2812         'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/videos?view=0&sort=p&flow=grid',
2813         'info_dict': {
2814             'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
2815             'title': 'lex will - Videos',
2816             'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488',
2817         },
2818         'playlist_mincount': 199,
2819     }, {
2820         # Playlists tab
2821         'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/playlists',
2822         'info_dict': {
2823             'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
2824             'title': 'lex will - Playlists',
2825             'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488',
2826         },
2827         'playlist_mincount': 17,
2828     }, {
2829         # Community tab
2830         'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/community',
2831         'info_dict': {
2832             'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
2833             'title': 'lex will - Community',
2834             'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488',
2835         },
2836         'playlist_mincount': 18,
2837     }, {
2838         # Channels tab
2839         'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/channels',
2840         'info_dict': {
2841             'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
2842             'title': 'lex will - Channels',
2843             'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488',
2844         },
2845         'playlist_mincount': 138,
2846     }, {
2847         'url': 'https://invidio.us/channel/UCmlqkdCBesrv2Lak1mF_MxA',
2848         'only_matching': True,
2849     }, {
2850         'url': 'https://www.youtubekids.com/channel/UCmlqkdCBesrv2Lak1mF_MxA',
2851         'only_matching': True,
2852     }, {
2853         'url': 'https://music.youtube.com/channel/UCmlqkdCBesrv2Lak1mF_MxA',
2854         'only_matching': True,
2855     }, {
2856         'note': 'Playlist with deleted videos (#651). As a bonus, the video #51 is also twice in this list.',
2857         'url': 'https://www.youtube.com/playlist?list=PLwP_SiAcdui0KVebT0mU9Apz359a4ubsC',
2858         'info_dict': {
2859             'title': '29C3: Not my department',
2860             'id': 'PLwP_SiAcdui0KVebT0mU9Apz359a4ubsC',
2861             'uploader': 'Christiaan008',
2862             'uploader_id': 'UCEPzS1rYsrkqzSLNp76nrcg',
2863         },
2864         'playlist_count': 96,
2865     }, {
2866         'note': 'Large playlist',
2867         'url': 'https://www.youtube.com/playlist?list=UUBABnxM4Ar9ten8Mdjj1j0Q',
2868         'info_dict': {
2869             'title': 'Uploads from Cauchemar',
2870             'id': 'UUBABnxM4Ar9ten8Mdjj1j0Q',
2871             'uploader': 'Cauchemar',
2872             'uploader_id': 'UCBABnxM4Ar9ten8Mdjj1j0Q',
2873         },
2874         'playlist_mincount': 1123,
2875     }, {
2876         # even larger playlist, 8832 videos
2877         'url': 'http://www.youtube.com/user/NASAgovVideo/videos',
2878         'only_matching': True,
2879     }, {
2880         'note': 'Buggy playlist: the webpage has a "Load more" button but it doesn\'t have more videos',
2881         'url': 'https://www.youtube.com/playlist?list=UUXw-G3eDE9trcvY2sBMM_aA',
2882         'info_dict': {
2883             'title': 'Uploads from Interstellar Movie',
2884             'id': 'UUXw-G3eDE9trcvY2sBMM_aA',
2885             'uploader': 'Interstellar Movie',
2886             'uploader_id': 'UCXw-G3eDE9trcvY2sBMM_aA',
2887         },
2888         'playlist_mincount': 21,
2889     }, {
2890         # https://github.com/ytdl-org/youtube-dl/issues/21844
2891         'url': 'https://www.youtube.com/playlist?list=PLzH6n4zXuckpfMu_4Ff8E7Z1behQks5ba',
2892         'info_dict': {
2893             'title': 'Data Analysis with Dr Mike Pound',
2894             'id': 'PLzH6n4zXuckpfMu_4Ff8E7Z1behQks5ba',
2895             'uploader_id': 'UC9-y-6csu5WGm29I7JiwpnA',
2896             'uploader': 'Computerphile',
2897         },
2898         'playlist_mincount': 11,
2899     }, {
2900         'url': 'https://invidio.us/playlist?list=PL4lCao7KL_QFVb7Iudeipvc2BCavECqzc',
2901         'only_matching': True,
2902     }, {
2903         # Playlist URL that does not actually serve a playlist
2904         'url': 'https://www.youtube.com/watch?v=FqZTN594JQw&list=PLMYEtVRpaqY00V9W81Cwmzp6N6vZqfUKD4',
2905         'info_dict': {
2906             'id': 'FqZTN594JQw',
2907             'ext': 'webm',
2908             'title': "Smiley's People 01 detective, Adventure Series, Action",
2909             'uploader': 'STREEM',
2910             'uploader_id': 'UCyPhqAZgwYWZfxElWVbVJng',
2911             'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UCyPhqAZgwYWZfxElWVbVJng',
2912             'upload_date': '20150526',
2913             'license': 'Standard YouTube License',
2914             'description': 'md5:507cdcb5a49ac0da37a920ece610be80',
2915             'categories': ['People & Blogs'],
2916             'tags': list,
2917             'view_count': int,
2918             'like_count': int,
2919             'dislike_count': int,
2920         },
2921         'params': {
2922             'skip_download': True,
2923         },
2924         'skip': 'This video is not available.',
2925         'add_ie': [YoutubeIE.ie_key()],
2926     }, {
2927         'url': 'https://www.youtubekids.com/watch?v=Agk7R8I8o5U&list=PUZ6jURNr1WQZCNHF0ao-c0g',
2928         'only_matching': True,
2929     }, {
2930         'url': 'https://www.youtube.com/watch?v=MuAGGZNfUkU&list=RDMM',
2931         'only_matching': True,
2932     }, {
2933         'url': 'https://www.youtube.com/channel/UCoMdktPbSTixAyNGwb-UYkQ/live',
2934         'info_dict': {
2935             'id': '9Auq9mYxFEE',
2936             'ext': 'mp4',
2937             'title': 'Watch Sky News live',
2938             'uploader': 'Sky News',
2939             'uploader_id': 'skynews',
2940             'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/skynews',
2941             'upload_date': '20191102',
2942             'description': 'md5:78de4e1c2359d0ea3ed829678e38b662',
2943             'categories': ['News & Politics'],
2944             'tags': list,
2945             'like_count': int,
2946             'dislike_count': int,
2947         },
2948         'params': {
2949             'skip_download': True,
2950         },
2951     }, {
2952         'url': 'https://www.youtube.com/user/TheYoungTurks/live',
2953         'info_dict': {
2954             'id': 'a48o2S1cPoo',
2955             'ext': 'mp4',
2956             'title': 'The Young Turks - Live Main Show',
2957             'uploader': 'The Young Turks',
2958             'uploader_id': 'TheYoungTurks',
2959             'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/TheYoungTurks',
2960             'upload_date': '20150715',
2961             'license': 'Standard YouTube License',
2962             'description': 'md5:438179573adcdff3c97ebb1ee632b891',
2963             'categories': ['News & Politics'],
2964             'tags': ['Cenk Uygur (TV Program Creator)', 'The Young Turks (Award-Winning Work)', 'Talk Show (TV Genre)'],
2965             'like_count': int,
2966             'dislike_count': int,
2967         },
2968         'params': {
2969             'skip_download': True,
2970         },
2971         'only_matching': True,
2972     }, {
2973         'url': 'https://www.youtube.com/channel/UC1yBKRuGpC1tSM73A0ZjYjQ/live',
2974         'only_matching': True,
2975     }, {
2976         'url': 'https://www.youtube.com/c/CommanderVideoHq/live',
2977         'only_matching': True,
2978     }, {
2979         'url': 'https://www.youtube.com/feed/trending',
2980         'only_matching': True,
2981     }, {
2982         # needs auth
2983         'url': 'https://www.youtube.com/feed/library',
2984         'only_matching': True,
2985     }, {
2986         # needs auth
2987         'url': 'https://www.youtube.com/feed/history',
2988         'only_matching': True,
2989     }, {
2990         # needs auth
2991         'url': 'https://www.youtube.com/feed/subscriptions',
2992         'only_matching': True,
2993     }, {
2994         # needs auth
2995         'url': 'https://www.youtube.com/feed/watch_later',
2996         'only_matching': True,
2997     }, {
2998         # no longer available?
2999         'url': 'https://www.youtube.com/feed/recommended',
3000         'only_matching': True,
3001     }, {
3002         # inline playlist with not always working continuations
3003         'url': 'https://www.youtube.com/watch?v=UC6u0Tct-Fo&list=PL36D642111D65BE7C',
3004         'only_matching': True,
3005     }, {
3006         'url': 'https://www.youtube.com/course?list=ECUl4u3cNGP61MdtwGTqZA0MreSaDybji8',
3007         'only_matching': True,
3008     }, {
3009         'url': 'https://www.youtube.com/course',
3010         'only_matching': True,
3011     }, {
3012         'url': 'https://www.youtube.com/zsecurity',
3013         'only_matching': True,
3014     }, {
3015         'url': 'http://www.youtube.com/NASAgovVideo/videos',
3016         'only_matching': True,
3017     }, {
3018         'url': 'https://www.youtube.com/TheYoungTurks/live',
3019         'only_matching': True,
3020     }]
3021
3022     @classmethod
3023     def suitable(cls, url):
3024         return False if YoutubeIE.suitable(url) else super(
3025             YoutubeTabIE, cls).suitable(url)
3026
3027     def _extract_channel_id(self, webpage):
3028         channel_id = self._html_search_meta(
3029             'channelId', webpage, 'channel id', default=None)
3030         if channel_id:
3031             return channel_id
3032         channel_url = self._html_search_meta(
3033             ('og:url', 'al:ios:url', 'al:android:url', 'al:web:url',
3034              'twitter:url', 'twitter:app:url:iphone', 'twitter:app:url:ipad',
3035              'twitter:app:url:googleplay'), webpage, 'channel url')
3036         return self._search_regex(
3037             r'https?://(?:www\.)?youtube\.com/channel/([^/?#&])+',
3038             channel_url, 'channel id')
3039
3040     @staticmethod
3041     def _extract_grid_item_renderer(item):
3042         for item_kind in ('Playlist', 'Video', 'Channel'):
3043             renderer = item.get('grid%sRenderer' % item_kind)
3044             if renderer:
3045                 return renderer
3046
3047     def _grid_entries(self, grid_renderer):
3048         for item in grid_renderer['items']:
3049             if not isinstance(item, dict):
3050                 continue
3051             renderer = self._extract_grid_item_renderer(item)
3052             if not isinstance(renderer, dict):
3053                 continue
3054             title = try_get(
3055                 renderer, lambda x: x['title']['runs'][0]['text'], compat_str)
3056             # playlist
3057             playlist_id = renderer.get('playlistId')
3058             if playlist_id:
3059                 yield self.url_result(
3060                     'https://www.youtube.com/playlist?list=%s' % playlist_id,
3061                     ie=YoutubeTabIE.ie_key(), video_id=playlist_id,
3062                     video_title=title)
3063             # video
3064             video_id = renderer.get('videoId')
3065             if video_id:
3066                 yield self._extract_video(renderer)
3067             # channel
3068             channel_id = renderer.get('channelId')
3069             if channel_id:
3070                 title = try_get(
3071                     renderer, lambda x: x['title']['simpleText'], compat_str)
3072                 yield self.url_result(
3073                     'https://www.youtube.com/channel/%s' % channel_id,
3074                     ie=YoutubeTabIE.ie_key(), video_title=title)
3075
3076     def _shelf_entries_from_content(self, shelf_renderer):
3077         content = shelf_renderer.get('content')
3078         if not isinstance(content, dict):
3079             return
3080         renderer = content.get('gridRenderer')
3081         if renderer:
3082             # TODO: add support for nested playlists so each shelf is processed
3083             # as separate playlist
3084             # TODO: this includes only first N items
3085             for entry in self._grid_entries(renderer):
3086                 yield entry
3087         renderer = content.get('horizontalListRenderer')
3088         if renderer:
3089             # TODO
3090             pass
3091
3092     def _shelf_entries(self, shelf_renderer, skip_channels=False):
3093         ep = try_get(
3094             shelf_renderer, lambda x: x['endpoint']['commandMetadata']['webCommandMetadata']['url'],
3095             compat_str)
3096         shelf_url = urljoin('https://www.youtube.com', ep)
3097         if shelf_url:
3098             # Skipping links to another channels, note that checking for
3099             # endpoint.commandMetadata.webCommandMetadata.webPageTypwebPageType == WEB_PAGE_TYPE_CHANNEL
3100             # will not work
3101             if skip_channels and '/channels?' in shelf_url:
3102                 return
3103             title = try_get(
3104                 shelf_renderer, lambda x: x['title']['runs'][0]['text'], compat_str)
3105             yield self.url_result(shelf_url, video_title=title)
3106         # Shelf may not contain shelf URL, fallback to extraction from content
3107         for entry in self._shelf_entries_from_content(shelf_renderer):
3108             yield entry
3109
3110     def _playlist_entries(self, video_list_renderer):
3111         for content in video_list_renderer['contents']:
3112             if not isinstance(content, dict):
3113                 continue
3114             renderer = content.get('playlistVideoRenderer') or content.get('playlistPanelVideoRenderer')
3115             if not isinstance(renderer, dict):
3116                 continue
3117             video_id = renderer.get('videoId')
3118             if not video_id:
3119                 continue
3120             yield self._extract_video(renderer)
3121
3122     r""" # Not needed in the new implementation
3123     def _itemSection_entries(self, item_sect_renderer):
3124         for content in item_sect_renderer['contents']:
3125             if not isinstance(content, dict):
3126                 continue
3127             renderer = content.get('videoRenderer', {})
3128             if not isinstance(renderer, dict):
3129                 continue
3130             video_id = renderer.get('videoId')
3131             if not video_id:
3132                 continue
3133             yield self._extract_video(renderer)
3134     """
3135
3136     def _rich_entries(self, rich_grid_renderer):
3137         renderer = try_get(
3138             rich_grid_renderer, lambda x: x['content']['videoRenderer'], dict) or {}
3139         video_id = renderer.get('videoId')
3140         if not video_id:
3141             return
3142         yield self._extract_video(renderer)
3143
3144     def _video_entry(self, video_renderer):
3145         video_id = video_renderer.get('videoId')
3146         if video_id:
3147             return self._extract_video(video_renderer)
3148
3149     def _post_thread_entries(self, post_thread_renderer):
3150         post_renderer = try_get(
3151             post_thread_renderer, lambda x: x['post']['backstagePostRenderer'], dict)
3152         if not post_renderer:
3153             return
3154         # video attachment
3155         video_renderer = try_get(
3156             post_renderer, lambda x: x['backstageAttachment']['videoRenderer'], dict)
3157         video_id = None
3158         if video_renderer:
3159             entry = self._video_entry(video_renderer)
3160             if entry:
3161                 yield entry
3162         # inline video links
3163         runs = try_get(post_renderer, lambda x: x['contentText']['runs'], list) or []
3164         for run in runs:
3165             if not isinstance(run, dict):
3166                 continue
3167             ep_url = try_get(
3168                 run, lambda x: x['navigationEndpoint']['urlEndpoint']['url'], compat_str)
3169             if not ep_url:
3170                 continue
3171             if not YoutubeIE.suitable(ep_url):
3172                 continue
3173             ep_video_id = YoutubeIE._match_id(ep_url)
3174             if video_id == ep_video_id:
3175                 continue
3176             yield self.url_result(ep_url, ie=YoutubeIE.ie_key(), video_id=video_id)
3177
3178     def _post_thread_continuation_entries(self, post_thread_continuation):
3179         contents = post_thread_continuation.get('contents')
3180         if not isinstance(contents, list):
3181             return
3182         for content in contents:
3183             renderer = content.get('backstagePostThreadRenderer')
3184             if not isinstance(renderer, dict):
3185                 continue
3186             for entry in self._post_thread_entries(renderer):
3187                 yield entry
3188
3189     @staticmethod
3190     def _build_continuation_query(continuation, ctp=None):
3191         query = {
3192             'ctoken': continuation,
3193             'continuation': continuation,
3194         }
3195         if ctp:
3196             query['itct'] = ctp
3197         return query
3198
3199     @staticmethod
3200     def _extract_next_continuation_data(renderer):
3201         next_continuation = try_get(
3202             renderer, lambda x: x['continuations'][0]['nextContinuationData'], dict)
3203         if not next_continuation:
3204             return
3205         continuation = next_continuation.get('continuation')
3206         if not continuation:
3207             return
3208         ctp = next_continuation.get('clickTrackingParams')
3209         return YoutubeTabIE._build_continuation_query(continuation, ctp)
3210
3211     @classmethod
3212     def _extract_continuation(cls, renderer):
3213         next_continuation = cls._extract_next_continuation_data(renderer)
3214         if next_continuation:
3215             return next_continuation
3216         contents = renderer.get('contents')
3217         if not isinstance(contents, list):
3218             return
3219         for content in contents:
3220             if not isinstance(content, dict):
3221                 continue
3222             continuation_ep = try_get(
3223                 content, lambda x: x['continuationItemRenderer']['continuationEndpoint'],
3224                 dict)
3225             if not continuation_ep:
3226                 continue
3227             continuation = try_get(
3228                 continuation_ep, lambda x: x['continuationCommand']['token'], compat_str)
3229             if not continuation:
3230                 continue
3231             ctp = continuation_ep.get('clickTrackingParams')
3232             return YoutubeTabIE._build_continuation_query(continuation, ctp)
3233
3234     def _entries(self, tab, identity_token):
3235
3236         def extract_entries(parent_renderer):  # this needs to called again for continuation to work with feeds
3237             contents = try_get(parent_renderer, lambda x: x['contents'], list) or []
3238             for content in contents:
3239                 if not isinstance(content, dict):
3240                     continue
3241                 is_renderer = try_get(content, lambda x: x['itemSectionRenderer'], dict)
3242                 if not is_renderer:
3243                     renderer = content.get('richItemRenderer')
3244                     if renderer:
3245                         for entry in self._rich_entries(renderer):
3246                             yield entry
3247                         continuation_list[0] = self._extract_continuation(parent_renderer)
3248                     continue
3249                 isr_contents = try_get(is_renderer, lambda x: x['contents'], list) or []
3250                 for isr_content in isr_contents:
3251                     if not isinstance(isr_content, dict):
3252                         continue
3253                     renderer = isr_content.get('playlistVideoListRenderer')
3254                     if renderer:
3255                         for entry in self._playlist_entries(renderer):
3256                             yield entry
3257                         continuation_list[0] = self._extract_continuation(renderer)
3258                         continue
3259                     renderer = isr_content.get('gridRenderer')
3260                     if renderer:
3261                         for entry in self._grid_entries(renderer):
3262                             yield entry
3263                         continuation_list[0] = self._extract_continuation(renderer)
3264                         continue
3265                     renderer = isr_content.get('shelfRenderer')
3266                     if renderer:
3267                         is_channels_tab = tab.get('title') == 'Channels'
3268                         for entry in self._shelf_entries(renderer, not is_channels_tab):
3269                             yield entry
3270                         continue
3271                     renderer = isr_content.get('backstagePostThreadRenderer')
3272                     if renderer:
3273                         for entry in self._post_thread_entries(renderer):
3274                             yield entry
3275                         continuation_list[0] = self._extract_continuation(renderer)
3276                         continue
3277                     renderer = isr_content.get('videoRenderer')
3278                     if renderer:
3279                         entry = self._video_entry(renderer)
3280                         if entry:
3281                             yield entry
3282
3283                 if not continuation_list[0]:
3284                     continuation_list[0] = self._extract_continuation(is_renderer)
3285
3286             if not continuation_list[0]:
3287                 continuation_list[0] = self._extract_continuation(parent_renderer)
3288
3289         continuation_list = [None]  # Python 2 doesnot support nonlocal
3290         tab_content = try_get(tab, lambda x: x['content'], dict)
3291         if not tab_content:
3292             return
3293         parent_renderer = (
3294             try_get(tab_content, lambda x: x['sectionListRenderer'], dict)
3295             or try_get(tab_content, lambda x: x['richGridRenderer'], dict) or {})
3296         for entry in extract_entries(parent_renderer):
3297             yield entry
3298         continuation = continuation_list[0]
3299
3300         headers = {
3301             'x-youtube-client-name': '1',
3302             'x-youtube-client-version': '2.20201112.04.01',
3303         }
3304         if identity_token:
3305             headers['x-youtube-identity-token'] = identity_token
3306
3307         for page_num in itertools.count(1):
3308             if not continuation:
3309                 break
3310             count = 0
3311             retries = 3
3312             while count <= retries:
3313                 try:
3314                     # Downloading page may result in intermittent 5xx HTTP error
3315                     # that is usually worked around with a retry
3316                     browse = self._download_json(
3317                         'https://www.youtube.com/browse_ajax', None,
3318                         'Downloading page %d%s'
3319                         % (page_num, ' (retry #%d)' % count if count else ''),
3320                         headers=headers, query=continuation)
3321                     break
3322                 except ExtractorError as e:
3323                     if isinstance(e.cause, compat_HTTPError) and e.cause.code in (500, 503):
3324                         count += 1
3325                         if count <= retries:
3326                             continue
3327                     raise
3328             if not browse:
3329                 break
3330             response = try_get(browse, lambda x: x[1]['response'], dict)
3331             if not response:
3332                 break
3333
3334             continuation_contents = try_get(
3335                 response, lambda x: x['continuationContents'], dict)
3336             if continuation_contents:
3337                 continuation_renderer = continuation_contents.get('playlistVideoListContinuation')
3338                 if continuation_renderer:
3339                     for entry in self._playlist_entries(continuation_renderer):
3340                         yield entry
3341                     continuation = self._extract_continuation(continuation_renderer)
3342                     continue
3343                 continuation_renderer = continuation_contents.get('gridContinuation')
3344                 if continuation_renderer:
3345                     for entry in self._grid_entries(continuation_renderer):
3346                         yield entry
3347                     continuation = self._extract_continuation(continuation_renderer)
3348                     continue
3349                 continuation_renderer = continuation_contents.get('itemSectionContinuation')
3350                 if continuation_renderer:
3351                     for entry in self._post_thread_continuation_entries(continuation_renderer):
3352                         yield entry
3353                     continuation = self._extract_continuation(continuation_renderer)
3354                     continue
3355                 continuation_renderer = continuation_contents.get('sectionListContinuation')  # for feeds
3356                 if continuation_renderer:
3357                     continuation_list = [None]
3358                     for entry in extract_entries(continuation_renderer):
3359                         yield entry
3360                     continuation = continuation_list[0]
3361                     continue
3362
3363             continuation_items = try_get(
3364                 response, lambda x: x['onResponseReceivedActions'][0]['appendContinuationItemsAction']['continuationItems'], list)
3365             if continuation_items:
3366                 continuation_item = continuation_items[0]
3367                 if not isinstance(continuation_item, dict):
3368                     continue
3369                 renderer = continuation_item.get('playlistVideoRenderer') or continuation_item.get('itemSectionRenderer')
3370                 if renderer:
3371                     video_list_renderer = {'contents': continuation_items}
3372                     for entry in self._playlist_entries(video_list_renderer):
3373                         yield entry
3374                     continuation = self._extract_continuation(video_list_renderer)
3375                     continue
3376             break
3377
3378     @staticmethod
3379     def _extract_selected_tab(tabs):
3380         for tab in tabs:
3381             if try_get(tab, lambda x: x['tabRenderer']['selected'], bool):
3382                 return tab['tabRenderer']
3383         else:
3384             raise ExtractorError('Unable to find selected tab')
3385
3386     @staticmethod
3387     def _extract_uploader(data):
3388         uploader = {}
3389         sidebar_renderer = try_get(
3390             data, lambda x: x['sidebar']['playlistSidebarRenderer']['items'], list)
3391         if sidebar_renderer:
3392             for item in sidebar_renderer:
3393                 if not isinstance(item, dict):
3394                     continue
3395                 renderer = item.get('playlistSidebarSecondaryInfoRenderer')
3396                 if not isinstance(renderer, dict):
3397                     continue
3398                 owner = try_get(
3399                     renderer, lambda x: x['videoOwner']['videoOwnerRenderer']['title']['runs'][0], dict)
3400                 if owner:
3401                     uploader['uploader'] = owner.get('text')
3402                     uploader['uploader_id'] = try_get(
3403                         owner, lambda x: x['navigationEndpoint']['browseEndpoint']['browseId'], compat_str)
3404                     uploader['uploader_url'] = urljoin(
3405                         'https://www.youtube.com/',
3406                         try_get(owner, lambda x: x['navigationEndpoint']['browseEndpoint']['canonicalBaseUrl'], compat_str))
3407         return {k: v for k, v in uploader.items() if v is not None}
3408
3409     def _extract_from_tabs(self, item_id, webpage, data, tabs, identity_token):
3410         playlist_id = title = description = channel_url = channel_name = channel_id = None
3411         thumbnails_list = tags = []
3412
3413         selected_tab = self._extract_selected_tab(tabs)
3414         renderer = try_get(
3415             data, lambda x: x['metadata']['channelMetadataRenderer'], dict)
3416         if renderer:
3417             channel_name = renderer.get('title')
3418             channel_url = renderer.get('channelUrl')
3419             channel_id = renderer.get('externalId')
3420
3421         if not renderer:
3422             renderer = try_get(
3423                 data, lambda x: x['metadata']['playlistMetadataRenderer'], dict)
3424         if renderer:
3425             title = renderer.get('title')
3426             description = renderer.get('description')
3427             playlist_id = channel_id
3428             tags = renderer.get('keywords', '').split()
3429             thumbnails_list = (
3430                 try_get(renderer, lambda x: x['avatar']['thumbnails'], list)
3431                 or data['sidebar']['playlistSidebarRenderer']['items'][0]['playlistSidebarPrimaryInfoRenderer']['thumbnailRenderer']['playlistVideoThumbnailRenderer']['thumbnail']['thumbnails']
3432                 or [])
3433
3434         thumbnails = []
3435         for t in thumbnails_list:
3436             if not isinstance(t, dict):
3437                 continue
3438             thumbnail_url = url_or_none(t.get('url'))
3439             if not thumbnail_url:
3440                 continue
3441             thumbnails.append({
3442                 'url': thumbnail_url,
3443                 'width': int_or_none(t.get('width')),
3444                 'height': int_or_none(t.get('height')),
3445             })
3446
3447         if playlist_id is None:
3448             playlist_id = item_id
3449         if title is None:
3450             title = playlist_id
3451         title += format_field(selected_tab, 'title', ' - %s')
3452
3453         metadata = {
3454             'playlist_id': playlist_id,
3455             'playlist_title': title,
3456             'playlist_description': description,
3457             'uploader': channel_name,
3458             'uploader_id': channel_id,
3459             'uploader_url': channel_url,
3460             'thumbnails': thumbnails,
3461             'tags': tags,
3462         }
3463         if not channel_id:
3464             metadata.update(self._extract_uploader(data))
3465         metadata.update({
3466             'channel': metadata['uploader'],
3467             'channel_id': metadata['uploader_id'],
3468             'channel_url': metadata['uploader_url']})
3469         return self.playlist_result(
3470             self._entries(selected_tab, identity_token),
3471             **metadata)
3472
3473     def _extract_from_playlist(self, item_id, url, data, playlist):
3474         title = playlist.get('title') or try_get(
3475             data, lambda x: x['titleText']['simpleText'], compat_str)
3476         playlist_id = playlist.get('playlistId') or item_id
3477         # Inline playlist rendition continuation does not always work
3478         # at Youtube side, so delegating regular tab-based playlist URL
3479         # processing whenever possible.
3480         playlist_url = urljoin(url, try_get(
3481             playlist, lambda x: x['endpoint']['commandMetadata']['webCommandMetadata']['url'],
3482             compat_str))
3483         if playlist_url and playlist_url != url:
3484             return self.url_result(
3485                 playlist_url, ie=YoutubeTabIE.ie_key(), video_id=playlist_id,
3486                 video_title=title)
3487         return self.playlist_result(
3488             self._playlist_entries(playlist), playlist_id=playlist_id,
3489             playlist_title=title)
3490
3491     @staticmethod
3492     def _extract_alerts(data):
3493         for alert_dict in try_get(data, lambda x: x['alerts'], list) or []:
3494             if not isinstance(alert_dict, dict):
3495                 continue
3496             for renderer in alert_dict:
3497                 alert = alert_dict[renderer]
3498                 alert_type = alert.get('type')
3499                 if not alert_type:
3500                     continue
3501                 message = try_get(alert, lambda x: x['text']['simpleText'], compat_str)
3502                 if message:
3503                     yield alert_type, message
3504                 for run in try_get(alert, lambda x: x['text']['runs'], list) or []:
3505                     message = try_get(run, lambda x: x['text'], compat_str)
3506                     if message:
3507                         yield alert_type, message
3508
3509     def _extract_identity_token(self, webpage, item_id):
3510         ytcfg = self._extract_ytcfg(item_id, webpage)
3511         if ytcfg:
3512             token = try_get(ytcfg, lambda x: x['ID_TOKEN'], compat_str)
3513             if token:
3514                 return token
3515         return self._search_regex(
3516             r'\bID_TOKEN["\']\s*:\s*["\'](.+?)["\']', webpage,
3517             'identity token', default=None)
3518
3519     def _real_extract(self, url):
3520         item_id = self._match_id(url)
3521         url = compat_urlparse.urlunparse(
3522             compat_urlparse.urlparse(url)._replace(netloc='www.youtube.com'))
3523         is_home = re.match(r'(?P<pre>%s)(?P<post>/?(?![^#?]).*$)' % self._VALID_URL, url)
3524         if is_home is not None and is_home.group('not_channel') is None and item_id != 'feed':
3525             self._downloader.report_warning(
3526                 'A channel/user page was given. All the channel\'s videos will be downloaded. '
3527                 'To download only the videos in the home page, add a "/featured" to the URL')
3528             url = '%s/videos%s' % (is_home.group('pre'), is_home.group('post') or '')
3529
3530         # Handle both video/playlist URLs
3531         qs = compat_urlparse.parse_qs(compat_urlparse.urlparse(url).query)
3532         video_id = qs.get('v', [None])[0]
3533         playlist_id = qs.get('list', [None])[0]
3534
3535         if is_home is not None and is_home.group('not_channel') is not None and is_home.group('not_channel').startswith('watch') and not video_id:
3536             if playlist_id:
3537                 self._downloader.report_warning('%s is not a valid Youtube URL. Trying to download playlist %s' % (url, playlist_id))
3538                 url = 'https://www.youtube.com/playlist?list=%s' % playlist_id
3539                 # return self.url_result(playlist_id, ie=YoutubePlaylistIE.ie_key())
3540             else:
3541                 raise ExtractorError('Unable to recognize tab page')
3542         if video_id and playlist_id:
3543             if self._downloader.params.get('noplaylist'):
3544                 self.to_screen('Downloading just video %s because of --no-playlist' % video_id)
3545                 return self.url_result(video_id, ie=YoutubeIE.ie_key(), video_id=video_id)
3546             self.to_screen('Downloading playlist %s - add --no-playlist to just download video %s' % (playlist_id, video_id))
3547
3548         webpage = self._download_webpage(url, item_id)
3549         identity_token = self._extract_identity_token(webpage, item_id)
3550         data = self._extract_yt_initial_data(item_id, webpage)
3551         err_msg = None
3552         for alert_type, alert_message in self._extract_alerts(data):
3553             if alert_type.lower() == 'error':
3554                 if err_msg:
3555                     self._downloader.report_warning('YouTube said: %s - %s' % ('ERROR', err_msg))
3556                 err_msg = alert_message
3557             else:
3558                 self._downloader.report_warning('YouTube said: %s - %s' % (alert_type, alert_message))
3559         if err_msg:
3560             raise ExtractorError('YouTube said: %s' % err_msg, expected=True)
3561         tabs = try_get(
3562             data, lambda x: x['contents']['twoColumnBrowseResultsRenderer']['tabs'], list)
3563         if tabs:
3564             return self._extract_from_tabs(item_id, webpage, data, tabs, identity_token)
3565         playlist = try_get(
3566             data, lambda x: x['contents']['twoColumnWatchNextResults']['playlist']['playlist'], dict)
3567         if playlist:
3568             return self._extract_from_playlist(item_id, url, data, playlist)
3569         # Fallback to video extraction if no playlist alike page is recognized.
3570         # First check for the current video then try the v attribute of URL query.
3571         video_id = try_get(
3572             data, lambda x: x['currentVideoEndpoint']['watchEndpoint']['videoId'],
3573             compat_str) or video_id
3574         if video_id:
3575             return self.url_result(video_id, ie=YoutubeIE.ie_key(), video_id=video_id)
3576         # Failed to recognize
3577         raise ExtractorError('Unable to recognize tab page')
3578
3579
3580 class YoutubePlaylistIE(InfoExtractor):
3581     IE_DESC = 'YouTube.com playlists'
3582     _VALID_URL = r'''(?x)(?:
3583                         (?:https?://)?
3584                         (?:\w+\.)?
3585                         (?:
3586                             (?:
3587                                 youtube(?:kids)?\.com|
3588                                 invidio\.us
3589                             )
3590                             /.*?\?.*?\blist=
3591                         )?
3592                         (?P<id>%(playlist_id)s)
3593                      )''' % {'playlist_id': YoutubeBaseInfoExtractor._PLAYLIST_ID_RE}
3594     IE_NAME = 'youtube:playlist'
3595     _TESTS = [{
3596         'note': 'issue #673',
3597         'url': 'PLBB231211A4F62143',
3598         'info_dict': {
3599             'title': '[OLD]Team Fortress 2 (Class-based LP)',
3600             'id': 'PLBB231211A4F62143',
3601             'uploader': 'Wickydoo',
3602             'uploader_id': 'UCKSpbfbl5kRQpTdL7kMc-1Q',
3603         },
3604         'playlist_mincount': 29,
3605     }, {
3606         'url': 'PLtPgu7CB4gbY9oDN3drwC3cMbJggS7dKl',
3607         'info_dict': {
3608             'title': 'YDL_safe_search',
3609             'id': 'PLtPgu7CB4gbY9oDN3drwC3cMbJggS7dKl',
3610         },
3611         'playlist_count': 2,
3612         'skip': 'This playlist is private',
3613     }, {
3614         'note': 'embedded',
3615         'url': 'https://www.youtube.com/embed/videoseries?list=PL6IaIsEjSbf96XFRuNccS_RuEXwNdsoEu',
3616         'playlist_count': 4,
3617         'info_dict': {
3618             'title': 'JODA15',
3619             'id': 'PL6IaIsEjSbf96XFRuNccS_RuEXwNdsoEu',
3620             'uploader': 'milan',
3621             'uploader_id': 'UCEI1-PVPcYXjB73Hfelbmaw',
3622         }
3623     }, {
3624         'url': 'http://www.youtube.com/embed/_xDOZElKyNU?list=PLsyOSbh5bs16vubvKePAQ1x3PhKavfBIl',
3625         'playlist_mincount': 982,
3626         'info_dict': {
3627             'title': '2018 Chinese New Singles (11/6 updated)',
3628             'id': 'PLsyOSbh5bs16vubvKePAQ1x3PhKavfBIl',
3629             'uploader': 'LBK',
3630             'uploader_id': 'UC21nz3_MesPLqtDqwdvnoxA',
3631         }
3632     }, {
3633         'url': 'TLGGrESM50VT6acwMjAyMjAxNw',
3634         'only_matching': True,
3635     }, {
3636         # music album playlist
3637         'url': 'OLAK5uy_m4xAFdmMC5rX3Ji3g93pQe3hqLZw_9LhM',
3638         'only_matching': True,
3639     }]
3640
3641     @classmethod
3642     def suitable(cls, url):
3643         return False if YoutubeTabIE.suitable(url) else super(
3644             YoutubePlaylistIE, cls).suitable(url)
3645
3646     def _real_extract(self, url):
3647         playlist_id = self._match_id(url)
3648         qs = compat_urlparse.parse_qs(compat_urlparse.urlparse(url).query)
3649         if not qs:
3650             qs = {'list': playlist_id}
3651         return self.url_result(
3652             update_url_query('https://www.youtube.com/playlist', qs),
3653             ie=YoutubeTabIE.ie_key(), video_id=playlist_id)
3654
3655
3656 class YoutubeYtBeIE(InfoExtractor):
3657     IE_DESC = 'youtu.be'
3658     _VALID_URL = r'https?://youtu\.be/(?P<id>[0-9A-Za-z_-]{11})/*?.*?\blist=(?P<playlist_id>%(playlist_id)s)' % {'playlist_id': YoutubeBaseInfoExtractor._PLAYLIST_ID_RE}
3659     _TESTS = [{
3660         'url': 'https://youtu.be/yeWKywCrFtk?list=PL2qgrgXsNUG5ig9cat4ohreBjYLAPC0J5',
3661         'info_dict': {
3662             'id': 'yeWKywCrFtk',
3663             'ext': 'mp4',
3664             'title': 'Small Scale Baler and Braiding Rugs',
3665             'uploader': 'Backus-Page House Museum',
3666             'uploader_id': 'backuspagemuseum',
3667             'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/backuspagemuseum',
3668             'upload_date': '20161008',
3669             'description': 'md5:800c0c78d5eb128500bffd4f0b4f2e8a',
3670             'categories': ['Nonprofits & Activism'],
3671             'tags': list,
3672             'like_count': int,
3673             'dislike_count': int,
3674         },
3675         'params': {
3676             'noplaylist': True,
3677             'skip_download': True,
3678         },
3679     }, {
3680         'url': 'https://youtu.be/uWyaPkt-VOI?list=PL9D9FC436B881BA21',
3681         'only_matching': True,
3682     }]
3683
3684     def _real_extract(self, url):
3685         mobj = re.match(self._VALID_URL, url)
3686         video_id = mobj.group('id')
3687         playlist_id = mobj.group('playlist_id')
3688         return self.url_result(
3689             update_url_query('https://www.youtube.com/watch', {
3690                 'v': video_id,
3691                 'list': playlist_id,
3692                 'feature': 'youtu.be',
3693             }), ie=YoutubeTabIE.ie_key(), video_id=playlist_id)
3694
3695
3696 class YoutubeYtUserIE(InfoExtractor):
3697     IE_DESC = 'YouTube.com user videos, URL or "ytuser" keyword'
3698     _VALID_URL = r'ytuser:(?P<id>.+)'
3699     _TESTS = [{
3700         'url': 'ytuser:phihag',
3701         'only_matching': True,
3702     }]
3703
3704     def _real_extract(self, url):
3705         user_id = self._match_id(url)
3706         return self.url_result(
3707             'https://www.youtube.com/user/%s' % user_id,
3708             ie=YoutubeTabIE.ie_key(), video_id=user_id)
3709
3710
3711 class YoutubeFavouritesIE(YoutubeBaseInfoExtractor):
3712     IE_NAME = 'youtube:favorites'
3713     IE_DESC = 'YouTube.com liked videos, ":ytfav" for short (requires authentication)'
3714     _VALID_URL = r':ytfav(?:ou?rite)?s?'
3715     _LOGIN_REQUIRED = True
3716     _TESTS = [{
3717         'url': ':ytfav',
3718         'only_matching': True,
3719     }, {
3720         'url': ':ytfavorites',
3721         'only_matching': True,
3722     }]
3723
3724     def _real_extract(self, url):
3725         return self.url_result(
3726             'https://www.youtube.com/playlist?list=LL',
3727             ie=YoutubeTabIE.ie_key())
3728
3729
3730 class YoutubeSearchIE(SearchInfoExtractor, YoutubeBaseInfoExtractor):
3731     IE_DESC = 'YouTube.com searches'
3732     # there doesn't appear to be a real limit, for example if you search for
3733     # 'python' you get more than 8.000.000 results
3734     _MAX_RESULTS = float('inf')
3735     IE_NAME = 'youtube:search'
3736     _SEARCH_KEY = 'ytsearch'
3737     _SEARCH_PARAMS = None
3738     _TESTS = []
3739
3740     def _entries(self, query, n):
3741         data = {
3742             'context': {
3743                 'client': {
3744                     'clientName': 'WEB',
3745                     'clientVersion': '2.20201021.03.00',
3746                 }
3747             },
3748             'query': query,
3749         }
3750         if self._SEARCH_PARAMS:
3751             data['params'] = self._SEARCH_PARAMS
3752         total = 0
3753         for page_num in itertools.count(1):
3754             search = self._download_json(
3755                 'https://www.youtube.com/youtubei/v1/search?key=AIzaSyAO_FJ2SlqU8Q4STEHLGCilw_Y9_11qcW8',
3756                 video_id='query "%s"' % query,
3757                 note='Downloading page %s' % page_num,
3758                 errnote='Unable to download API page', fatal=False,
3759                 data=json.dumps(data).encode('utf8'),
3760                 headers={'content-type': 'application/json'})
3761             if not search:
3762                 break
3763             slr_contents = try_get(
3764                 search,
3765                 (lambda x: x['contents']['twoColumnSearchResultsRenderer']['primaryContents']['sectionListRenderer']['contents'],
3766                  lambda x: x['onResponseReceivedCommands'][0]['appendContinuationItemsAction']['continuationItems']),
3767                 list)
3768             if not slr_contents:
3769                 break
3770
3771             # Youtube sometimes adds promoted content to searches,
3772             # changing the index location of videos and token.
3773             # So we search through all entries till we find them.
3774             continuation_token = None
3775             for slr_content in slr_contents:
3776                 isr_contents = try_get(
3777                     slr_content,
3778                     lambda x: x['itemSectionRenderer']['contents'],
3779                     list)
3780                 if not isr_contents:
3781                     continue
3782                 for content in isr_contents:
3783                     if not isinstance(content, dict):
3784                         continue
3785                     video = content.get('videoRenderer')
3786                     if not isinstance(video, dict):
3787                         continue
3788                     video_id = video.get('videoId')
3789                     if not video_id:
3790                         continue
3791
3792                     yield self._extract_video(video)
3793                     total += 1
3794                     if total == n:
3795                         return
3796
3797                 if continuation_token is None:
3798                     continuation_token = try_get(
3799                         slr_content,
3800                         lambda x: x['continuationItemRenderer']['continuationEndpoint']['continuationCommand']['token'],
3801                         compat_str)
3802
3803             if not continuation_token:
3804                 break
3805             data['continuation'] = continuation_token
3806
3807     def _get_n_results(self, query, n):
3808         """Get a specified number of results for a query"""
3809         return self.playlist_result(self._entries(query, n), query)
3810
3811
3812 class YoutubeSearchDateIE(YoutubeSearchIE):
3813     IE_NAME = YoutubeSearchIE.IE_NAME + ':date'
3814     _SEARCH_KEY = 'ytsearchdate'
3815     IE_DESC = 'YouTube.com searches, newest videos first, "ytsearchdate" keyword'
3816     _SEARCH_PARAMS = 'CAI%3D'
3817
3818
3819 class YoutubeSearchURLIE(YoutubeSearchIE):
3820     IE_DESC = 'YouTube.com searches, "ytsearch" keyword'
3821     IE_NAME = YoutubeSearchIE.IE_NAME + '_url'
3822     _VALID_URL = r'https?://(?:www\.)?youtube\.com/results\?(.*?&)?(?:search_query|q)=(?:[^&]+)(?:[&]|$)'
3823     # _MAX_RESULTS = 100
3824     _TESTS = [{
3825         'url': 'https://www.youtube.com/results?baz=bar&search_query=youtube-dl+test+video&filters=video&lclk=video',
3826         'playlist_mincount': 5,
3827         'info_dict': {
3828             'title': 'youtube-dl test video',
3829         }
3830     }, {
3831         'url': 'https://www.youtube.com/results?q=test&sp=EgQIBBgB',
3832         'only_matching': True,
3833     }]
3834
3835     @classmethod
3836     def _make_valid_url(cls):
3837         return cls._VALID_URL
3838
3839     def _real_extract(self, url):
3840         qs = compat_parse_qs(compat_urllib_parse_urlparse(url).query)
3841         query = (qs.get('search_query') or qs.get('q'))[0]
3842         self._SEARCH_PARAMS = qs.get('sp', ('',))[0]
3843         return self._get_n_results(query, self._MAX_RESULTS)
3844
3845
3846 class YoutubeFeedsInfoExtractor(YoutubeTabIE):
3847     """
3848     Base class for feed extractors
3849     Subclasses must define the _FEED_NAME property.
3850     """
3851     _LOGIN_REQUIRED = True
3852     # _MAX_PAGES = 5
3853     _TESTS = []
3854
3855     @property
3856     def IE_NAME(self):
3857         return 'youtube:%s' % self._FEED_NAME
3858
3859     def _real_initialize(self):
3860         self._login()
3861
3862     def _real_extract(self, url):
3863         return self.url_result(
3864             'https://www.youtube.com/feed/%s' % self._FEED_NAME,
3865             ie=YoutubeTabIE.ie_key())
3866
3867
3868 class YoutubeWatchLaterIE(InfoExtractor):
3869     IE_NAME = 'youtube:watchlater'
3870     IE_DESC = 'Youtube watch later list, ":ytwatchlater" for short (requires authentication)'
3871     _VALID_URL = r':ytwatchlater'
3872     _TESTS = [{
3873         'url': ':ytwatchlater',
3874         'only_matching': True,
3875     }]
3876
3877     def _real_extract(self, url):
3878         return self.url_result(
3879             'https://www.youtube.com/playlist?list=WL', ie=YoutubeTabIE.ie_key())
3880
3881
3882 class YoutubeRecommendedIE(YoutubeFeedsInfoExtractor):
3883     IE_DESC = 'YouTube.com recommended videos, ":ytrec" for short (requires authentication)'
3884     _VALID_URL = r'https?://(?:www\.)?youtube\.com/?(?:[?#]|$)|:ytrec(?:ommended)?'
3885     _FEED_NAME = 'recommended'
3886     _TESTS = [{
3887         'url': ':ytrec',
3888         'only_matching': True,
3889     }, {
3890         'url': ':ytrecommended',
3891         'only_matching': True,
3892     }, {
3893         'url': 'https://youtube.com',
3894         'only_matching': True,
3895     }]
3896
3897
3898 class YoutubeSubscriptionsIE(YoutubeFeedsInfoExtractor):
3899     IE_DESC = 'YouTube.com subscriptions feed, ":ytsubs" for short (requires authentication)'
3900     _VALID_URL = r':ytsub(?:scription)?s?'
3901     _FEED_NAME = 'subscriptions'
3902     _TESTS = [{
3903         'url': ':ytsubs',
3904         'only_matching': True,
3905     }, {
3906         'url': ':ytsubscriptions',
3907         'only_matching': True,
3908     }]
3909
3910
3911 class YoutubeHistoryIE(YoutubeFeedsInfoExtractor):
3912     IE_DESC = 'Youtube watch history, ":ythistory" for short (requires authentication)'
3913     _VALID_URL = r':ythistory'
3914     _FEED_NAME = 'history'
3915     _TESTS = [{
3916         'url': ':ythistory',
3917         'only_matching': True,
3918     }]
3919
3920
3921 class YoutubeTruncatedURLIE(InfoExtractor):
3922     IE_NAME = 'youtube:truncated_url'
3923     IE_DESC = False  # Do not list
3924     _VALID_URL = r'''(?x)
3925         (?:https?://)?
3926         (?:\w+\.)?[yY][oO][uU][tT][uU][bB][eE](?:-nocookie)?\.com/
3927         (?:watch\?(?:
3928             feature=[a-z_]+|
3929             annotation_id=annotation_[^&]+|
3930             x-yt-cl=[0-9]+|
3931             hl=[^&]*|
3932             t=[0-9]+
3933         )?
3934         |
3935             attribution_link\?a=[^&]+
3936         )
3937         $
3938     '''
3939
3940     _TESTS = [{
3941         'url': 'https://www.youtube.com/watch?annotation_id=annotation_3951667041',
3942         'only_matching': True,
3943     }, {
3944         'url': 'https://www.youtube.com/watch?',
3945         'only_matching': True,
3946     }, {
3947         'url': 'https://www.youtube.com/watch?x-yt-cl=84503534',
3948         'only_matching': True,
3949     }, {
3950         'url': 'https://www.youtube.com/watch?feature=foo',
3951         'only_matching': True,
3952     }, {
3953         'url': 'https://www.youtube.com/watch?hl=en-GB',
3954         'only_matching': True,
3955     }, {
3956         'url': 'https://www.youtube.com/watch?t=2372',
3957         'only_matching': True,
3958     }]
3959
3960     def _real_extract(self, url):
3961         raise ExtractorError(
3962             'Did you forget to quote the URL? Remember that & is a meta '
3963             'character in most shells, so you want to put the URL in quotes, '
3964             'like  youtube-dl '
3965             '"https://www.youtube.com/watch?feature=foo&v=BaW_jenozKc" '
3966             ' or simply  youtube-dl BaW_jenozKc  .',
3967             expected=True)
3968
3969
3970 class YoutubeTruncatedIDIE(InfoExtractor):
3971     IE_NAME = 'youtube:truncated_id'
3972     IE_DESC = False  # Do not list
3973     _VALID_URL = r'https?://(?:www\.)?youtube\.com/watch\?v=(?P<id>[0-9A-Za-z_-]{1,10})$'
3974
3975     _TESTS = [{
3976         'url': 'https://www.youtube.com/watch?v=N_708QY7Ob',
3977         'only_matching': True,
3978     }]
3979
3980     def _real_extract(self, url):
3981         video_id = self._match_id(url)
3982         raise ExtractorError(
3983             'Incomplete YouTube ID %s. URL %s looks truncated.' % (video_id, url),
3984             expected=True)
3985
3986
3987 # Do Youtube show urls even exist anymore? I couldn't find any
3988 r'''
3989 class YoutubeShowIE(YoutubeTabIE):
3990     IE_DESC = 'YouTube.com (multi-season) shows'
3991     _VALID_URL = r'https?://(?:www\.)?youtube\.com/show/(?P<id>[^?#]*)'
3992     IE_NAME = 'youtube:show'
3993     _TESTS = [{
3994         'url': 'https://www.youtube.com/show/airdisasters',
3995         'playlist_mincount': 5,
3996         'info_dict': {
3997             'id': 'airdisasters',
3998             'title': 'Air Disasters',
3999         }
4000     }]
4001
4002     def _real_extract(self, url):
4003         playlist_id = self._match_id(url)
4004         return super(YoutubeShowIE, self)._real_extract(
4005             'https://www.youtube.com/show/%s/playlists' % playlist_id)
4006 '''