youtube_dlc/extractor/youtube.py

   1 # coding: utf-8
   2
   3 from __future__ import unicode_literals
   4
   5
   6 import itertools
   7 import json
   8 import os.path
   9 import random
  10 import re
  11 import time
  12 import traceback
  13
  14 from .common import InfoExtractor, SearchInfoExtractor
  15 from ..jsinterp import JSInterpreter
  16 from ..swfinterp import SWFInterpreter
  17 from ..compat import (
  18     compat_chr,
  19     compat_HTTPError,
  20     compat_kwargs,
  21     compat_parse_qs,
  22     compat_urllib_parse_unquote,
  23     compat_urllib_parse_unquote_plus,
  24     compat_urllib_parse_urlencode,
  25     compat_urllib_parse_urlparse,
  26     compat_urlparse,
  27     compat_str,
  28 )
  29 from ..utils import (
  30     bool_or_none,
  31     clean_html,
  32     error_to_compat_str,
  33     extract_attributes,
  34     ExtractorError,
  35     float_or_none,
  36     get_element_by_attribute,
  37     get_element_by_id,
  38     int_or_none,
  39     mimetype2ext,
  40     orderedSet,
  41     parse_codecs,
  42     parse_duration,
  43     remove_quotes,
  44     remove_start,
  45     smuggle_url,
  46     str_or_none,
  47     str_to_int,
  48     try_get,
  49     unescapeHTML,
  50     unified_strdate,
  51     unsmuggle_url,
  52     uppercase_escape,
  53     url_or_none,
  54     urlencode_postdata,
  55 )
  56
  57
  58 class YoutubeBaseInfoExtractor(InfoExtractor):
  59     """Provide base functions for Youtube extractors"""
  60     _LOGIN_URL = 'https://accounts.google.com/ServiceLogin'
  61     _TWOFACTOR_URL = 'https://accounts.google.com/signin/challenge'
  62
  63     _LOOKUP_URL = 'https://accounts.google.com/_/signin/sl/lookup'
  64     _CHALLENGE_URL = 'https://accounts.google.com/_/signin/sl/challenge'
  65     _TFA_URL = 'https://accounts.google.com/_/signin/challenge?hl=en&TL={0}'
  66
  67     _NETRC_MACHINE = 'youtube'
  68     # If True it will raise an error if no login info is provided
  69     _LOGIN_REQUIRED = False
  70
  71     _PLAYLIST_ID_RE = r'(?:PL|LL|EC|UU|FL|RD|UL|TL|PU|OLAK5uy_)[0-9A-Za-z-_]{10,}'
  72
  73     _YOUTUBE_CLIENT_HEADERS = {
  74         'x-youtube-client-name': '1',
  75         'x-youtube-client-version': '1.20200609.04.02',
  76     }
  77
  78     def _set_language(self):
  79         self._set_cookie(
  80             '.youtube.com', 'PREF', 'f1=50000000&f6=8&hl=en',
  81             # YouTube sets the expire time to about two months
  82             expire_time=time.time() + 2 * 30 * 24 * 3600)
  83
  84     def _ids_to_results(self, ids):
  85         return [
  86             self.url_result(vid_id, 'Youtube', video_id=vid_id)
  87             for vid_id in ids]
  88
  89     def _login(self):
  90         """
  91         Attempt to log in to YouTube.
  92         True is returned if successful or skipped.
  93         False is returned if login failed.
  94
  95         If _LOGIN_REQUIRED is set and no authentication was provided, an error is raised.
  96         """
  97         username, password = self._get_login_info()
  98         # No authentication to be performed
  99         if username is None:
 100             if self._LOGIN_REQUIRED and self._downloader.params.get('cookiefile') is None:
 101                 raise ExtractorError('No login info available, needed for using %s.' % self.IE_NAME, expected=True)
 102             return True
 103
 104         login_page = self._download_webpage(
 105             self._LOGIN_URL, None,
 106             note='Downloading login page',
 107             errnote='unable to fetch login page', fatal=False)
 108         if login_page is False:
 109             return
 110
 111         login_form = self._hidden_inputs(login_page)
 112
 113         def req(url, f_req, note, errnote):
 114             data = login_form.copy()
 115             data.update({
 116                 'pstMsg': 1,
 117                 'checkConnection': 'youtube',
 118                 'checkedDomains': 'youtube',
 119                 'hl': 'en',
 120                 'deviceinfo': '[null,null,null,[],null,"US",null,null,[],"GlifWebSignIn",null,[null,null,[]]]',
 121                 'f.req': json.dumps(f_req),
 122                 'flowName': 'GlifWebSignIn',
 123                 'flowEntry': 'ServiceLogin',
 124                 # TODO: reverse actual botguard identifier generation algo
 125                 'bgRequest': '["identifier",""]',
 126             })
 127             return self._download_json(
 128                 url, None, note=note, errnote=errnote,
 129                 transform_source=lambda s: re.sub(r'^[^[]*', '', s),
 130                 fatal=False,
 131                 data=urlencode_postdata(data), headers={
 132                     'Content-Type': 'application/x-www-form-urlencoded;charset=utf-8',
 133                     'Google-Accounts-XSRF': 1,
 134                 })
 135
 136         def warn(message):
 137             self._downloader.report_warning(message)
 138
 139         lookup_req = [
 140             username,
 141             None, [], None, 'US', None, None, 2, False, True,
 142             [
 143                 None, None,
 144                 [2, 1, None, 1,
 145                  'https://accounts.google.com/ServiceLogin?passive=true&continue=https%3A%2F%2Fwww.youtube.com%2Fsignin%3Fnext%3D%252F%26action_handle_signin%3Dtrue%26hl%3Den%26app%3Ddesktop%26feature%3Dsign_in_button&hl=en&service=youtube&uilel=3&requestPath=%2FServiceLogin&Page=PasswordSeparationSignIn',
 146                  None, [], 4],
 147                 1, [None, None, []], None, None, None, True
 148             ],
 149             username,
 150         ]
 151
 152         lookup_results = req(
 153             self._LOOKUP_URL, lookup_req,
 154             'Looking up account info', 'Unable to look up account info')
 155
 156         if lookup_results is False:
 157             return False
 158
 159         user_hash = try_get(lookup_results, lambda x: x[0][2], compat_str)
 160         if not user_hash:
 161             warn('Unable to extract user hash')
 162             return False
 163
 164         challenge_req = [
 165             user_hash,
 166             None, 1, None, [1, None, None, None, [password, None, True]],
 167             [
 168                 None, None, [2, 1, None, 1, 'https://accounts.google.com/ServiceLogin?passive=true&continue=https%3A%2F%2Fwww.youtube.com%2Fsignin%3Fnext%3D%252F%26action_handle_signin%3Dtrue%26hl%3Den%26app%3Ddesktop%26feature%3Dsign_in_button&hl=en&service=youtube&uilel=3&requestPath=%2FServiceLogin&Page=PasswordSeparationSignIn', None, [], 4],
 169                 1, [None, None, []], None, None, None, True
 170             ]]
 171
 172         challenge_results = req(
 173             self._CHALLENGE_URL, challenge_req,
 174             'Logging in', 'Unable to log in')
 175
 176         if challenge_results is False:
 177             return
 178
 179         login_res = try_get(challenge_results, lambda x: x[0][5], list)
 180         if login_res:
 181             login_msg = try_get(login_res, lambda x: x[5], compat_str)
 182             warn(
 183                 'Unable to login: %s' % 'Invalid password'
 184                 if login_msg == 'INCORRECT_ANSWER_ENTERED' else login_msg)
 185             return False
 186
 187         res = try_get(challenge_results, lambda x: x[0][-1], list)
 188         if not res:
 189             warn('Unable to extract result entry')
 190             return False
 191
 192         login_challenge = try_get(res, lambda x: x[0][0], list)
 193         if login_challenge:
 194             challenge_str = try_get(login_challenge, lambda x: x[2], compat_str)
 195             if challenge_str == 'TWO_STEP_VERIFICATION':
 196                 # SEND_SUCCESS - TFA code has been successfully sent to phone
 197                 # QUOTA_EXCEEDED - reached the limit of TFA codes
 198                 status = try_get(login_challenge, lambda x: x[5], compat_str)
 199                 if status == 'QUOTA_EXCEEDED':
 200                     warn('Exceeded the limit of TFA codes, try later')
 201                     return False
 202
 203                 tl = try_get(challenge_results, lambda x: x[1][2], compat_str)
 204                 if not tl:
 205                     warn('Unable to extract TL')
 206                     return False
 207
 208                 tfa_code = self._get_tfa_info('2-step verification code')
 209
 210                 if not tfa_code:
 211                     warn(
 212                         'Two-factor authentication required. Provide it either interactively or with --twofactor <code>'
 213                         '(Note that only TOTP (Google Authenticator App) codes work at this time.)')
 214                     return False
 215
 216                 tfa_code = remove_start(tfa_code, 'G-')
 217
 218                 tfa_req = [
 219                     user_hash, None, 2, None,
 220                     [
 221                         9, None, None, None, None, None, None, None,
 222                         [None, tfa_code, True, 2]
 223                     ]]
 224
 225                 tfa_results = req(
 226                     self._TFA_URL.format(tl), tfa_req,
 227                     'Submitting TFA code', 'Unable to submit TFA code')
 228
 229                 if tfa_results is False:
 230                     return False
 231
 232                 tfa_res = try_get(tfa_results, lambda x: x[0][5], list)
 233                 if tfa_res:
 234                     tfa_msg = try_get(tfa_res, lambda x: x[5], compat_str)
 235                     warn(
 236                         'Unable to finish TFA: %s' % 'Invalid TFA code'
 237                         if tfa_msg == 'INCORRECT_ANSWER_ENTERED' else tfa_msg)
 238                     return False
 239
 240                 check_cookie_url = try_get(
 241                     tfa_results, lambda x: x[0][-1][2], compat_str)
 242             else:
 243                 CHALLENGES = {
 244                     'LOGIN_CHALLENGE': "This device isn't recognized. For your security, Google wants to make sure it's really you.",
 245                     'USERNAME_RECOVERY': 'Please provide additional information to aid in the recovery process.',
 246                     'REAUTH': "There is something unusual about your activity. For your security, Google wants to make sure it's really you.",
 247                 }
 248                 challenge = CHALLENGES.get(
 249                     challenge_str,
 250                     '%s returned error %s.' % (self.IE_NAME, challenge_str))
 251                 warn('%s\nGo to https://accounts.google.com/, login and solve a challenge.' % challenge)
 252                 return False
 253         else:
 254             check_cookie_url = try_get(res, lambda x: x[2], compat_str)
 255
 256         if not check_cookie_url:
 257             warn('Unable to extract CheckCookie URL')
 258             return False
 259
 260         check_cookie_results = self._download_webpage(
 261             check_cookie_url, None, 'Checking cookie', fatal=False)
 262
 263         if check_cookie_results is False:
 264             return False
 265
 266         if 'https://myaccount.google.com/' not in check_cookie_results:
 267             warn('Unable to log in')
 268             return False
 269
 270         return True
 271
 272     def _download_webpage_handle(self, *args, **kwargs):
 273         query = kwargs.get('query', {}).copy()
 274         query['disable_polymer'] = 'true'
 275         kwargs['query'] = query
 276         return super(YoutubeBaseInfoExtractor, self)._download_webpage_handle(
 277             *args, **compat_kwargs(kwargs))
 278
 279     def _real_initialize(self):
 280         if self._downloader is None:
 281             return
 282         self._set_language()
 283         if not self._login():
 284             return
 285
 286
 287 class YoutubeEntryListBaseInfoExtractor(YoutubeBaseInfoExtractor):
 288     # Extract entries from page with "Load more" button
 289     def _entries(self, page, playlist_id):
 290         more_widget_html = content_html = page
 291         for page_num in itertools.count(1):
 292             for entry in self._process_page(content_html):
 293                 yield entry
 294
 295             mobj = re.search(r'data-uix-load-more-href="/?(?P<more>[^"]+)"', more_widget_html)
 296             if not mobj:
 297                 break
 298
 299             count = 0
 300             retries = 3
 301             while count <= retries:
 302                 try:
 303                     # Downloading page may result in intermittent 5xx HTTP error
 304                     # that is usually worked around with a retry
 305                     more = self._download_json(
 306                         'https://www.youtube.com/%s' % mobj.group('more'), playlist_id,
 307                         'Downloading page #%s%s'
 308                         % (page_num, ' (retry #%d)' % count if count else ''),
 309                         transform_source=uppercase_escape,
 310                         headers=self._YOUTUBE_CLIENT_HEADERS)
 311                     break
 312                 except ExtractorError as e:
 313                     if isinstance(e.cause, compat_HTTPError) and e.cause.code in (500, 503):
 314                         count += 1
 315                         if count <= retries:
 316                             continue
 317                     raise
 318
 319             content_html = more['content_html']
 320             if not content_html.strip():
 321                 # Some webpages show a "Load more" button but they don't
 322                 # have more videos
 323                 break
 324             more_widget_html = more['load_more_widget_html']
 325
 326
 327 class YoutubePlaylistBaseInfoExtractor(YoutubeEntryListBaseInfoExtractor):
 328     def _process_page(self, content):
 329         for video_id, video_title in self.extract_videos_from_page(content):
 330             yield self.url_result(video_id, 'Youtube', video_id, video_title)
 331
 332     def extract_videos_from_page_impl(self, video_re, page, ids_in_page, titles_in_page):
 333         for mobj in re.finditer(video_re, page):
 334             # The link with index 0 is not the first video of the playlist (not sure if still actual)
 335             if 'index' in mobj.groupdict() and mobj.group('id') == '0':
 336                 continue
 337             video_id = mobj.group('id')
 338             video_title = unescapeHTML(
 339                 mobj.group('title')) if 'title' in mobj.groupdict() else None
 340             if video_title:
 341                 video_title = video_title.strip()
 342             if video_title == '► Play all':
 343                 video_title = None
 344             try:
 345                 idx = ids_in_page.index(video_id)
 346                 if video_title and not titles_in_page[idx]:
 347                     titles_in_page[idx] = video_title
 348             except ValueError:
 349                 ids_in_page.append(video_id)
 350                 titles_in_page.append(video_title)
 351
 352     def extract_videos_from_page(self, page):
 353         ids_in_page = []
 354         titles_in_page = []
 355         self.extract_videos_from_page_impl(
 356             self._VIDEO_RE, page, ids_in_page, titles_in_page)
 357         return zip(ids_in_page, titles_in_page)
 358
 359
 360 class YoutubePlaylistsBaseInfoExtractor(YoutubeEntryListBaseInfoExtractor):
 361     def _process_page(self, content):
 362         for playlist_id in orderedSet(re.findall(
 363                 r'<h3[^>]+class="[^"]*yt-lockup-title[^"]*"[^>]*><a[^>]+href="/?playlist\?list=([0-9A-Za-z-_]{10,})"',
 364                 content)):
 365             yield self.url_result(
 366                 'https://www.youtube.com/playlist?list=%s' % playlist_id, 'YoutubePlaylist')
 367
 368     def _real_extract(self, url):
 369         playlist_id = self._match_id(url)
 370         webpage = self._download_webpage(url, playlist_id)
 371         title = self._og_search_title(webpage, fatal=False)
 372         return self.playlist_result(self._entries(webpage, playlist_id), playlist_id, title)
 373
 374
 375 class YoutubeIE(YoutubeBaseInfoExtractor):
 376     IE_DESC = 'YouTube.com'
 377     _VALID_URL = r"""(?x)^
 378                      (
 379                          (?:https?://|//)                                    # http(s):// or protocol-independent URL
 380                          (?:(?:(?:(?:\w+\.)?[yY][oO][uU][tT][uU][bB][eE](?:-nocookie|kids)?\.com/|
 381                             (?:www\.)?deturl\.com/www\.youtube\.com/|
 382                             (?:www\.)?pwnyoutube\.com/|
 383                             (?:www\.)?hooktube\.com/|
 384                             (?:www\.)?yourepeat\.com/|
 385                             tube\.majestyc\.net/|
 386                             # Invidious instances taken from https://github.com/omarroth/invidious/wiki/Invidious-Instances
 387                             (?:(?:www|dev)\.)?invidio\.us/|
 388                             (?:(?:www|no)\.)?invidiou\.sh/|
 389                             (?:(?:www|fi|de)\.)?invidious\.snopyta\.org/|
 390                             (?:www\.)?invidious\.kabi\.tk/|
 391                             (?:www\.)?invidious\.13ad\.de/|
 392                             (?:www\.)?invidious\.mastodon\.host/|
 393                             (?:www\.)?invidious\.nixnet\.xyz/|
 394                             (?:www\.)?invidious\.drycat\.fr/|
 395                             (?:www\.)?tube\.poal\.co/|
 396                             (?:www\.)?vid\.wxzm\.sx/|
 397                             (?:www\.)?yewtu\.be/|
 398                             (?:www\.)?yt\.elukerio\.org/|
 399                             (?:www\.)?yt\.lelux\.fi/|
 400                             (?:www\.)?invidious\.ggc-project\.de/|
 401                             (?:www\.)?yt\.maisputain\.ovh/|
 402                             (?:www\.)?invidious\.13ad\.de/|
 403                             (?:www\.)?invidious\.toot\.koeln/|
 404                             (?:www\.)?invidious\.fdn\.fr/|
 405                             (?:www\.)?watch\.nettohikari\.com/|
 406                             (?:www\.)?kgg2m7yk5aybusll\.onion/|
 407                             (?:www\.)?qklhadlycap4cnod\.onion/|
 408                             (?:www\.)?axqzx4s6s54s32yentfqojs3x5i7faxza6xo3ehd4bzzsg2ii4fv2iid\.onion/|
 409                             (?:www\.)?c7hqkpkpemu6e7emz5b4vyz7idjgdvgaaa3dyimmeojqbgpea3xqjoid\.onion/|
 410                             (?:www\.)?fz253lmuao3strwbfbmx46yu7acac2jz27iwtorgmbqlkurlclmancad\.onion/|
 411                             (?:www\.)?invidious\.l4qlywnpwqsluw65ts7md3khrivpirse744un3x7mlskqauz5pyuzgqd\.onion/|
 412                             (?:www\.)?owxfohz4kjyv25fvlqilyxast7inivgiktls3th44jhk3ej3i7ya\.b32\.i2p/|
 413                             (?:www\.)?4l2dgddgsrkf2ous66i6seeyi6etzfgrue332grh2n7madpwopotugyd\.onion/|
 414                             youtube\.googleapis\.com/)                        # the various hostnames, with wildcard subdomains
 415                          (?:.*?\#/)?                                          # handle anchor (#/) redirect urls
 416                          (?:                                                  # the various things that can precede the ID:
 417                              (?:(?:v|embed|e)/(?!videoseries))                # v/ or embed/ or e/
 418                              |(?:                                             # or the v= param in all its forms
 419                                  (?:(?:watch|movie)(?:_popup)?(?:\.php)?/?)?  # preceding watch(_popup|.php) or nothing (like /?v=xxxx)
 420                                  (?:\?|\#!?)                                  # the params delimiter ? or # or #!
 421                                  (?:.*?[&;])??                                # any other preceding param (like /?s=tuff&v=xxxx or ?s=tuff&amp;v=V36LpHqtcDY)
 422                                  v=
 423                              )
 424                          ))
 425                          |(?:
 426                             youtu\.be|                                        # just youtu.be/xxxx
 427                             vid\.plus|                                        # or vid.plus/xxxx
 428                             zwearz\.com/watch|                                # or zwearz.com/watch/xxxx
 429                          )/
 430                          |(?:www\.)?cleanvideosearch\.com/media/action/yt/watch\?videoId=
 431                          )
 432                      )?                                                       # all until now is optional -> you can pass the naked ID
 433                      ([0-9A-Za-z_-]{11})                                      # here is it! the YouTube video ID
 434                      (?!.*?\blist=
 435                         (?:
 436                             %(playlist_id)s|                                  # combined list/video URLs are handled by the playlist IE
 437                             WL                                                # WL are handled by the watch later IE
 438                         )
 439                      )
 440                      (?(1).+)?                                                # if we found the ID, everything can follow
 441                      $""" % {'playlist_id': YoutubeBaseInfoExtractor._PLAYLIST_ID_RE}
 442     _NEXT_URL_RE = r'[\?&]next_url=([^&]+)'
 443     _PLAYER_INFO_RE = (
 444         r'/(?P<id>[a-zA-Z0-9_-]{8,})/player_ias\.vflset(?:/[a-zA-Z]{2,3}_[a-zA-Z]{2,3})?/base\.(?P<ext>[a-z]+)$',
 445         r'\b(?P<id>vfl[a-zA-Z0-9_-]+)\b.*?\.(?P<ext>[a-z]+)$',
 446     )
 447     _formats = {
 448         '5': {'ext': 'flv', 'width': 400, 'height': 240, 'acodec': 'mp3', 'abr': 64, 'vcodec': 'h263'},
 449         '6': {'ext': 'flv', 'width': 450, 'height': 270, 'acodec': 'mp3', 'abr': 64, 'vcodec': 'h263'},
 450         '13': {'ext': '3gp', 'acodec': 'aac', 'vcodec': 'mp4v'},
 451         '17': {'ext': '3gp', 'width': 176, 'height': 144, 'acodec': 'aac', 'abr': 24, 'vcodec': 'mp4v'},
 452         '18': {'ext': 'mp4', 'width': 640, 'height': 360, 'acodec': 'aac', 'abr': 96, 'vcodec': 'h264'},
 453         '22': {'ext': 'mp4', 'width': 1280, 'height': 720, 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264'},
 454         '34': {'ext': 'flv', 'width': 640, 'height': 360, 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264'},
 455         '35': {'ext': 'flv', 'width': 854, 'height': 480, 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264'},
 456         # itag 36 videos are either 320x180 (BaW_jenozKc) or 320x240 (__2ABJjxzNo), abr varies as well
 457         '36': {'ext': '3gp', 'width': 320, 'acodec': 'aac', 'vcodec': 'mp4v'},
 458         '37': {'ext': 'mp4', 'width': 1920, 'height': 1080, 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264'},
 459         '38': {'ext': 'mp4', 'width': 4096, 'height': 3072, 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264'},
 460         '43': {'ext': 'webm', 'width': 640, 'height': 360, 'acodec': 'vorbis', 'abr': 128, 'vcodec': 'vp8'},
 461         '44': {'ext': 'webm', 'width': 854, 'height': 480, 'acodec': 'vorbis', 'abr': 128, 'vcodec': 'vp8'},
 462         '45': {'ext': 'webm', 'width': 1280, 'height': 720, 'acodec': 'vorbis', 'abr': 192, 'vcodec': 'vp8'},
 463         '46': {'ext': 'webm', 'width': 1920, 'height': 1080, 'acodec': 'vorbis', 'abr': 192, 'vcodec': 'vp8'},
 464         '59': {'ext': 'mp4', 'width': 854, 'height': 480, 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264'},
 465         '78': {'ext': 'mp4', 'width': 854, 'height': 480, 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264'},
 466
 467
 468         # 3D videos
 469         '82': {'ext': 'mp4', 'height': 360, 'format_note': '3D', 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264', 'preference': -20},
 470         '83': {'ext': 'mp4', 'height': 480, 'format_note': '3D', 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264', 'preference': -20},
 471         '84': {'ext': 'mp4', 'height': 720, 'format_note': '3D', 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264', 'preference': -20},
 472         '85': {'ext': 'mp4', 'height': 1080, 'format_note': '3D', 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264', 'preference': -20},
 473         '100': {'ext': 'webm', 'height': 360, 'format_note': '3D', 'acodec': 'vorbis', 'abr': 128, 'vcodec': 'vp8', 'preference': -20},
 474         '101': {'ext': 'webm', 'height': 480, 'format_note': '3D', 'acodec': 'vorbis', 'abr': 192, 'vcodec': 'vp8', 'preference': -20},
 475         '102': {'ext': 'webm', 'height': 720, 'format_note': '3D', 'acodec': 'vorbis', 'abr': 192, 'vcodec': 'vp8', 'preference': -20},
 476
 477         # Apple HTTP Live Streaming
 478         '91': {'ext': 'mp4', 'height': 144, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 48, 'vcodec': 'h264', 'preference': -10},
 479         '92': {'ext': 'mp4', 'height': 240, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 48, 'vcodec': 'h264', 'preference': -10},
 480         '93': {'ext': 'mp4', 'height': 360, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264', 'preference': -10},
 481         '94': {'ext': 'mp4', 'height': 480, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264', 'preference': -10},
 482         '95': {'ext': 'mp4', 'height': 720, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 256, 'vcodec': 'h264', 'preference': -10},
 483         '96': {'ext': 'mp4', 'height': 1080, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 256, 'vcodec': 'h264', 'preference': -10},
 484         '132': {'ext': 'mp4', 'height': 240, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 48, 'vcodec': 'h264', 'preference': -10},
 485         '151': {'ext': 'mp4', 'height': 72, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 24, 'vcodec': 'h264', 'preference': -10},
 486
 487         # DASH mp4 video
 488         '133': {'ext': 'mp4', 'height': 240, 'format_note': 'DASH video', 'vcodec': 'h264'},
 489         '134': {'ext': 'mp4', 'height': 360, 'format_note': 'DASH video', 'vcodec': 'h264'},
 490         '135': {'ext': 'mp4', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'h264'},
 491         '136': {'ext': 'mp4', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'h264'},
 492         '137': {'ext': 'mp4', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'h264'},
 493         '138': {'ext': 'mp4', 'format_note': 'DASH video', 'vcodec': 'h264'},  # Height can vary (https://github.com/ytdl-org/youtube-dl/issues/4559)
 494         '160': {'ext': 'mp4', 'height': 144, 'format_note': 'DASH video', 'vcodec': 'h264'},
 495         '212': {'ext': 'mp4', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'h264'},
 496         '264': {'ext': 'mp4', 'height': 1440, 'format_note': 'DASH video', 'vcodec': 'h264'},
 497         '298': {'ext': 'mp4', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'h264', 'fps': 60},
 498         '299': {'ext': 'mp4', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'h264', 'fps': 60},
 499         '266': {'ext': 'mp4', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'h264'},
 500
 501         # Dash mp4 audio
 502         '139': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'abr': 48, 'container': 'm4a_dash'},
 503         '140': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'abr': 128, 'container': 'm4a_dash'},
 504         '141': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'abr': 256, 'container': 'm4a_dash'},
 505         '256': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'container': 'm4a_dash'},
 506         '258': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'container': 'm4a_dash'},
 507         '325': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'dtse', 'container': 'm4a_dash'},
 508         '328': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'ec-3', 'container': 'm4a_dash'},
 509
 510         # Dash webm
 511         '167': {'ext': 'webm', 'height': 360, 'width': 640, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
 512         '168': {'ext': 'webm', 'height': 480, 'width': 854, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
 513         '169': {'ext': 'webm', 'height': 720, 'width': 1280, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
 514         '170': {'ext': 'webm', 'height': 1080, 'width': 1920, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
 515         '218': {'ext': 'webm', 'height': 480, 'width': 854, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
 516         '219': {'ext': 'webm', 'height': 480, 'width': 854, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
 517         '278': {'ext': 'webm', 'height': 144, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp9'},
 518         '242': {'ext': 'webm', 'height': 240, 'format_note': 'DASH video', 'vcodec': 'vp9'},
 519         '243': {'ext': 'webm', 'height': 360, 'format_note': 'DASH video', 'vcodec': 'vp9'},
 520         '244': {'ext': 'webm', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'vp9'},
 521         '245': {'ext': 'webm', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'vp9'},
 522         '246': {'ext': 'webm', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'vp9'},
 523         '247': {'ext': 'webm', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'vp9'},
 524         '248': {'ext': 'webm', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'vp9'},
 525         '271': {'ext': 'webm', 'height': 1440, 'format_note': 'DASH video', 'vcodec': 'vp9'},
 526         # itag 272 videos are either 3840x2160 (e.g. RtoitU2A-3E) or 7680x4320 (sLprVF6d7Ug)
 527         '272': {'ext': 'webm', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'vp9'},
 528         '302': {'ext': 'webm', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'vp9', 'fps': 60},
 529         '303': {'ext': 'webm', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'vp9', 'fps': 60},
 530         '308': {'ext': 'webm', 'height': 1440, 'format_note': 'DASH video', 'vcodec': 'vp9', 'fps': 60},
 531         '313': {'ext': 'webm', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'vp9'},
 532         '315': {'ext': 'webm', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'vp9', 'fps': 60},
 533
 534         # Dash webm audio
 535         '171': {'ext': 'webm', 'acodec': 'vorbis', 'format_note': 'DASH audio', 'abr': 128},
 536         '172': {'ext': 'webm', 'acodec': 'vorbis', 'format_note': 'DASH audio', 'abr': 256},
 537
 538         # Dash webm audio with opus inside
 539         '249': {'ext': 'webm', 'format_note': 'DASH audio', 'acodec': 'opus', 'abr': 50},
 540         '250': {'ext': 'webm', 'format_note': 'DASH audio', 'acodec': 'opus', 'abr': 70},
 541         '251': {'ext': 'webm', 'format_note': 'DASH audio', 'acodec': 'opus', 'abr': 160},
 542
 543         # RTMP (unnamed)
 544         '_rtmp': {'protocol': 'rtmp'},
 545
 546         # av01 video only formats sometimes served with "unknown" codecs
 547         '394': {'acodec': 'none', 'vcodec': 'av01.0.05M.08'},
 548         '395': {'acodec': 'none', 'vcodec': 'av01.0.05M.08'},
 549         '396': {'acodec': 'none', 'vcodec': 'av01.0.05M.08'},
 550         '397': {'acodec': 'none', 'vcodec': 'av01.0.05M.08'},
 551     }
 552     _SUBTITLE_FORMATS = ('srv1', 'srv2', 'srv3', 'ttml', 'vtt', 'json3')
 553
 554     _GEO_BYPASS = False
 555
 556     IE_NAME = 'youtube'
 557     _TESTS = [
 558         {
 559             'url': 'https://www.youtube.com/watch?v=BaW_jenozKc&t=1s&end=9',
 560             'info_dict': {
 561                 'id': 'BaW_jenozKc',
 562                 'ext': 'mp4',
 563                 'title': 'youtube-dl test video "\'/\\ä↭𝕐',
 564                 'uploader': 'Philipp Hagemeister',
 565                 'uploader_id': 'phihag',
 566                 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/phihag',
 567                 'channel_id': 'UCLqxVugv74EIW3VWh2NOa3Q',
 568                 'channel_url': r're:https?://(?:www\.)?youtube\.com/channel/UCLqxVugv74EIW3VWh2NOa3Q',
 569                 'upload_date': '20121002',
 570                 'description': 'test chars:  "\'/\\ä↭𝕐\ntest URL: https://github.com/rg3/youtube-dl/issues/1892\n\nThis is a test video for youtube-dl.\n\nFor more information, contact phihag@phihag.de .',
 571                 'categories': ['Science & Technology'],
 572                 'tags': ['youtube-dl'],
 573                 'duration': 10,
 574                 'view_count': int,
 575                 'like_count': int,
 576                 'dislike_count': int,
 577                 'start_time': 1,
 578                 'end_time': 9,
 579             }
 580         },
 581         {
 582             'url': 'https://www.youtube.com/watch?v=UxxajLWwzqY',
 583             'note': 'Test generic use_cipher_signature video (#897)',
 584             'info_dict': {
 585                 'id': 'UxxajLWwzqY',
 586                 'ext': 'mp4',
 587                 'upload_date': '20120506',
 588                 'title': 'Icona Pop - I Love It (feat. Charli XCX) [OFFICIAL VIDEO]',
 589                 'alt_title': 'I Love It (feat. Charli XCX)',
 590                 'description': 'md5:19a2f98d9032b9311e686ed039564f63',
 591                 'tags': ['Icona Pop i love it', 'sweden', 'pop music', 'big beat records', 'big beat', 'charli',
 592                          'xcx', 'charli xcx', 'girls', 'hbo', 'i love it', "i don't care", 'icona', 'pop',
 593                          'iconic ep', 'iconic', 'love', 'it'],
 594                 'duration': 180,
 595                 'uploader': 'Icona Pop',
 596                 'uploader_id': 'IconaPop',
 597                 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/IconaPop',
 598                 'creator': 'Icona Pop',
 599                 'track': 'I Love It (feat. Charli XCX)',
 600                 'artist': 'Icona Pop',
 601             }
 602         },
 603         {
 604             'url': 'https://www.youtube.com/watch?v=07FYdnEawAQ',
 605             'note': 'Test VEVO video with age protection (#956)',
 606             'info_dict': {
 607                 'id': '07FYdnEawAQ',
 608                 'ext': 'mp4',
 609                 'upload_date': '20130703',
 610                 'title': 'Justin Timberlake - Tunnel Vision (Official Music Video) (Explicit)',
 611                 'alt_title': 'Tunnel Vision',
 612                 'description': 'md5:07dab3356cde4199048e4c7cd93471e1',
 613                 'duration': 419,
 614                 'uploader': 'justintimberlakeVEVO',
 615                 'uploader_id': 'justintimberlakeVEVO',
 616                 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/justintimberlakeVEVO',
 617                 'creator': 'Justin Timberlake',
 618                 'track': 'Tunnel Vision',
 619                 'artist': 'Justin Timberlake',
 620                 'age_limit': 18,
 621             }
 622         },
 623         {
 624             'url': '//www.YouTube.com/watch?v=yZIXLfi8CZQ',
 625             'note': 'Embed-only video (#1746)',
 626             'info_dict': {
 627                 'id': 'yZIXLfi8CZQ',
 628                 'ext': 'mp4',
 629                 'upload_date': '20120608',
 630                 'title': 'Principal Sexually Assaults A Teacher - Episode 117 - 8th June 2012',
 631                 'description': 'md5:09b78bd971f1e3e289601dfba15ca4f7',
 632                 'uploader': 'SET India',
 633                 'uploader_id': 'setindia',
 634                 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/setindia',
 635                 'age_limit': 18,
 636             }
 637         },
 638         {
 639             'url': 'https://www.youtube.com/watch?v=BaW_jenozKc&v=UxxajLWwzqY',
 640             'note': 'Use the first video ID in the URL',
 641             'info_dict': {
 642                 'id': 'BaW_jenozKc',
 643                 'ext': 'mp4',
 644                 'title': 'youtube-dl test video "\'/\\ä↭𝕐',
 645                 'uploader': 'Philipp Hagemeister',
 646                 'uploader_id': 'phihag',
 647                 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/phihag',
 648                 'upload_date': '20121002',
 649                 'description': 'test chars:  "\'/\\ä↭𝕐\ntest URL: https://github.com/rg3/youtube-dl/issues/1892\n\nThis is a test video for youtube-dl.\n\nFor more information, contact phihag@phihag.de .',
 650                 'categories': ['Science & Technology'],
 651                 'tags': ['youtube-dl'],
 652                 'duration': 10,
 653                 'view_count': int,
 654                 'like_count': int,
 655                 'dislike_count': int,
 656             },
 657             'params': {
 658                 'skip_download': True,
 659             },
 660         },
 661         {
 662             'url': 'https://www.youtube.com/watch?v=a9LDPn-MO4I',
 663             'note': '256k DASH audio (format 141) via DASH manifest',
 664             'info_dict': {
 665                 'id': 'a9LDPn-MO4I',
 666                 'ext': 'm4a',
 667                 'upload_date': '20121002',
 668                 'uploader_id': '8KVIDEO',
 669                 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/8KVIDEO',
 670                 'description': '',
 671                 'uploader': '8KVIDEO',
 672                 'title': 'UHDTV TEST 8K VIDEO.mp4'
 673             },
 674             'params': {
 675                 'youtube_include_dash_manifest': True,
 676                 'format': '141',
 677             },
 678             'skip': 'format 141 not served anymore',
 679         },
 680         # DASH manifest with encrypted signature
 681         {
 682             'url': 'https://www.youtube.com/watch?v=IB3lcPjvWLA',
 683             'info_dict': {
 684                 'id': 'IB3lcPjvWLA',
 685                 'ext': 'm4a',
 686                 'title': 'Afrojack, Spree Wilson - The Spark (Official Music Video) ft. Spree Wilson',
 687                 'description': 'md5:8f5e2b82460520b619ccac1f509d43bf',
 688                 'duration': 244,
 689                 'uploader': 'AfrojackVEVO',
 690                 'uploader_id': 'AfrojackVEVO',
 691                 'upload_date': '20131011',
 692             },
 693             'params': {
 694                 'youtube_include_dash_manifest': True,
 695                 'format': '141/bestaudio[ext=m4a]',
 696             },
 697         },
 698         # JS player signature function name containing $
 699         {
 700             'url': 'https://www.youtube.com/watch?v=nfWlot6h_JM',
 701             'info_dict': {
 702                 'id': 'nfWlot6h_JM',
 703                 'ext': 'm4a',
 704                 'title': 'Taylor Swift - Shake It Off',
 705                 'description': 'md5:307195cd21ff7fa352270fe884570ef0',
 706                 'duration': 242,
 707                 'uploader': 'TaylorSwiftVEVO',
 708                 'uploader_id': 'TaylorSwiftVEVO',
 709                 'upload_date': '20140818',
 710             },
 711             'params': {
 712                 'youtube_include_dash_manifest': True,
 713                 'format': '141/bestaudio[ext=m4a]',
 714             },
 715         },
 716         # Controversy video
 717         {
 718             'url': 'https://www.youtube.com/watch?v=T4XJQO3qol8',
 719             'info_dict': {
 720                 'id': 'T4XJQO3qol8',
 721                 'ext': 'mp4',
 722                 'duration': 219,
 723                 'upload_date': '20100909',
 724                 'uploader': 'Amazing Atheist',
 725                 'uploader_id': 'TheAmazingAtheist',
 726                 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/TheAmazingAtheist',
 727                 'title': 'Burning Everyone\'s Koran',
 728                 'description': 'SUBSCRIBE: http://www.youtube.com/saturninefilms\n\nEven Obama has taken a stand against freedom on this issue: http://www.huffingtonpost.com/2010/09/09/obama-gma-interview-quran_n_710282.html',
 729             }
 730         },
 731         # Normal age-gate video (No vevo, embed allowed)
 732         {
 733             'url': 'https://youtube.com/watch?v=HtVdAasjOgU',
 734             'info_dict': {
 735                 'id': 'HtVdAasjOgU',
 736                 'ext': 'mp4',
 737                 'title': 'The Witcher 3: Wild Hunt - The Sword Of Destiny Trailer',
 738                 'description': r're:(?s).{100,}About the Game\n.*?The Witcher 3: Wild Hunt.{100,}',
 739                 'duration': 142,
 740                 'uploader': 'The Witcher',
 741                 'uploader_id': 'WitcherGame',
 742                 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/WitcherGame',
 743                 'upload_date': '20140605',
 744                 'age_limit': 18,
 745             },
 746         },
 747         # Age-gate video with encrypted signature
 748         {
 749             'url': 'https://www.youtube.com/watch?v=6kLq3WMV1nU',
 750             'info_dict': {
 751                 'id': '6kLq3WMV1nU',
 752                 'ext': 'mp4',
 753                 'title': 'Dedication To My Ex (Miss That) (Lyric Video)',
 754                 'description': 'md5:33765bb339e1b47e7e72b5490139bb41',
 755                 'duration': 246,
 756                 'uploader': 'LloydVEVO',
 757                 'uploader_id': 'LloydVEVO',
 758                 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/LloydVEVO',
 759                 'upload_date': '20110629',
 760                 'age_limit': 18,
 761             },
 762         },
 763         # video_info is None (https://github.com/ytdl-org/youtube-dl/issues/4421)
 764         # YouTube Red ad is not captured for creator
 765         {
 766             'url': '__2ABJjxzNo',
 767             'info_dict': {
 768                 'id': '__2ABJjxzNo',
 769                 'ext': 'mp4',
 770                 'duration': 266,
 771                 'upload_date': '20100430',
 772                 'uploader_id': 'deadmau5',
 773                 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/deadmau5',
 774                 'creator': 'Dada Life, deadmau5',
 775                 'description': 'md5:12c56784b8032162bb936a5f76d55360',
 776                 'uploader': 'deadmau5',
 777                 'title': 'Deadmau5 - Some Chords (HD)',
 778                 'alt_title': 'This Machine Kills Some Chords',
 779             },
 780             'expected_warnings': [
 781                 'DASH manifest missing',
 782             ]
 783         },
 784         # Olympics (https://github.com/ytdl-org/youtube-dl/issues/4431)
 785         {
 786             'url': 'lqQg6PlCWgI',
 787             'info_dict': {
 788                 'id': 'lqQg6PlCWgI',
 789                 'ext': 'mp4',
 790                 'duration': 6085,
 791                 'upload_date': '20150827',
 792                 'uploader_id': 'olympic',
 793                 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/olympic',
 794                 'description': 'HO09  - Women -  GER-AUS - Hockey - 31 July 2012 - London 2012 Olympic Games',
 795                 'uploader': 'Olympic',
 796                 'title': 'Hockey - Women -  GER-AUS - London 2012 Olympic Games',
 797             },
 798             'params': {
 799                 'skip_download': 'requires avconv',
 800             }
 801         },
 802         # Non-square pixels
 803         {
 804             'url': 'https://www.youtube.com/watch?v=_b-2C3KPAM0',
 805             'info_dict': {
 806                 'id': '_b-2C3KPAM0',
 807                 'ext': 'mp4',
 808                 'stretched_ratio': 16 / 9.,
 809                 'duration': 85,
 810                 'upload_date': '20110310',
 811                 'uploader_id': 'AllenMeow',
 812                 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/AllenMeow',
 813                 'description': 'made by Wacom from Korea | 字幕&加油添醋 by TY\'s Allen | 感謝heylisa00cavey1001同學熱情提供梗及翻譯',
 814                 'uploader': '孫ᄋᄅ',
 815                 'title': '[A-made] 變態妍字幕版 太妍 我就是這樣的人',
 816             },
 817         },
 818         # url_encoded_fmt_stream_map is empty string
 819         {
 820             'url': 'qEJwOuvDf7I',
 821             'info_dict': {
 822                 'id': 'qEJwOuvDf7I',
 823                 'ext': 'webm',
 824                 'title': 'Обсуждение судебной практики по выборам 14 сентября 2014 года в Санкт-Петербурге',
 825                 'description': '',
 826                 'upload_date': '20150404',
 827                 'uploader_id': 'spbelect',
 828                 'uploader': 'Наблюдатели Петербурга',
 829             },
 830             'params': {
 831                 'skip_download': 'requires avconv',
 832             },
 833             'skip': 'This live event has ended.',
 834         },
 835         # Extraction from multiple DASH manifests (https://github.com/ytdl-org/youtube-dl/pull/6097)
 836         {
 837             'url': 'https://www.youtube.com/watch?v=FIl7x6_3R5Y',
 838             'info_dict': {
 839                 'id': 'FIl7x6_3R5Y',
 840                 'ext': 'webm',
 841                 'title': 'md5:7b81415841e02ecd4313668cde88737a',
 842                 'description': 'md5:116377fd2963b81ec4ce64b542173306',
 843                 'duration': 220,
 844                 'upload_date': '20150625',
 845                 'uploader_id': 'dorappi2000',
 846                 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/dorappi2000',
 847                 'uploader': 'dorappi2000',
 848                 'formats': 'mincount:31',
 849             },
 850             'skip': 'not actual anymore',
 851         },
 852         # DASH manifest with segment_list
 853         {
 854             'url': 'https://www.youtube.com/embed/CsmdDsKjzN8',
 855             'md5': '8ce563a1d667b599d21064e982ab9e31',
 856             'info_dict': {
 857                 'id': 'CsmdDsKjzN8',
 858                 'ext': 'mp4',
 859                 'upload_date': '20150501',  # According to '<meta itemprop="datePublished"', but in other places it's 20150510
 860                 'uploader': 'Airtek',
 861                 'description': 'Retransmisión en directo de la XVIII media maratón de Zaragoza.',
 862                 'uploader_id': 'UCzTzUmjXxxacNnL8I3m4LnQ',
 863                 'title': 'Retransmisión XVIII Media maratón Zaragoza 2015',
 864             },
 865             'params': {
 866                 'youtube_include_dash_manifest': True,
 867                 'format': '135',  # bestvideo
 868             },
 869             'skip': 'This live event has ended.',
 870         },
 871         {
 872             # Multifeed videos (multiple cameras), URL is for Main Camera
 873             'url': 'https://www.youtube.com/watch?v=jqWvoWXjCVs',
 874             'info_dict': {
 875                 'id': 'jqWvoWXjCVs',
 876                 'title': 'teamPGP: Rocket League Noob Stream',
 877                 'description': 'md5:dc7872fb300e143831327f1bae3af010',
 878             },
 879             'playlist': [{
 880                 'info_dict': {
 881                     'id': 'jqWvoWXjCVs',
 882                     'ext': 'mp4',
 883                     'title': 'teamPGP: Rocket League Noob Stream (Main Camera)',
 884                     'description': 'md5:dc7872fb300e143831327f1bae3af010',
 885                     'duration': 7335,
 886                     'upload_date': '20150721',
 887                     'uploader': 'Beer Games Beer',
 888                     'uploader_id': 'beergamesbeer',
 889                     'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/beergamesbeer',
 890                     'license': 'Standard YouTube License',
 891                 },
 892             }, {
 893                 'info_dict': {
 894                     'id': '6h8e8xoXJzg',
 895                     'ext': 'mp4',
 896                     'title': 'teamPGP: Rocket League Noob Stream (kreestuh)',
 897                     'description': 'md5:dc7872fb300e143831327f1bae3af010',
 898                     'duration': 7337,
 899                     'upload_date': '20150721',
 900                     'uploader': 'Beer Games Beer',
 901                     'uploader_id': 'beergamesbeer',
 902                     'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/beergamesbeer',
 903                     'license': 'Standard YouTube License',
 904                 },
 905             }, {
 906                 'info_dict': {
 907                     'id': 'PUOgX5z9xZw',
 908                     'ext': 'mp4',
 909                     'title': 'teamPGP: Rocket League Noob Stream (grizzle)',
 910                     'description': 'md5:dc7872fb300e143831327f1bae3af010',
 911                     'duration': 7337,
 912                     'upload_date': '20150721',
 913                     'uploader': 'Beer Games Beer',
 914                     'uploader_id': 'beergamesbeer',
 915                     'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/beergamesbeer',
 916                     'license': 'Standard YouTube License',
 917                 },
 918             }, {
 919                 'info_dict': {
 920                     'id': 'teuwxikvS5k',
 921                     'ext': 'mp4',
 922                     'title': 'teamPGP: Rocket League Noob Stream (zim)',
 923                     'description': 'md5:dc7872fb300e143831327f1bae3af010',
 924                     'duration': 7334,
 925                     'upload_date': '20150721',
 926                     'uploader': 'Beer Games Beer',
 927                     'uploader_id': 'beergamesbeer',
 928                     'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/beergamesbeer',
 929                     'license': 'Standard YouTube License',
 930                 },
 931             }],
 932             'params': {
 933                 'skip_download': True,
 934             },
 935             'skip': 'This video is not available.',
 936         },
 937         {
 938             # Multifeed video with comma in title (see https://github.com/ytdl-org/youtube-dl/issues/8536)
 939             'url': 'https://www.youtube.com/watch?v=gVfLd0zydlo',
 940             'info_dict': {
 941                 'id': 'gVfLd0zydlo',
 942                 'title': 'DevConf.cz 2016 Day 2 Workshops 1 14:00 - 15:30',
 943             },
 944             'playlist_count': 2,
 945             'skip': 'Not multifeed anymore',
 946         },
 947         {
 948             'url': 'https://vid.plus/FlRa-iH7PGw',
 949             'only_matching': True,
 950         },
 951         {
 952             'url': 'https://zwearz.com/watch/9lWxNJF-ufM/electra-woman-dyna-girl-official-trailer-grace-helbig.html',
 953             'only_matching': True,
 954         },
 955         {
 956             # Title with JS-like syntax "};" (see https://github.com/ytdl-org/youtube-dl/issues/7468)
 957             # Also tests cut-off URL expansion in video description (see
 958             # https://github.com/ytdl-org/youtube-dl/issues/1892,
 959             # https://github.com/ytdl-org/youtube-dl/issues/8164)
 960             'url': 'https://www.youtube.com/watch?v=lsguqyKfVQg',
 961             'info_dict': {
 962                 'id': 'lsguqyKfVQg',
 963                 'ext': 'mp4',
 964                 'title': '{dark walk}; Loki/AC/Dishonored; collab w/Elflover21',
 965                 'alt_title': 'Dark Walk - Position Music',
 966                 'description': 'md5:8085699c11dc3f597ce0410b0dcbb34a',
 967                 'duration': 133,
 968                 'upload_date': '20151119',
 969                 'uploader_id': 'IronSoulElf',
 970                 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/IronSoulElf',
 971                 'uploader': 'IronSoulElf',
 972                 'creator': 'Todd Haberman,  Daniel Law Heath and Aaron Kaplan',
 973                 'track': 'Dark Walk - Position Music',
 974                 'artist': 'Todd Haberman,  Daniel Law Heath and Aaron Kaplan',
 975                 'album': 'Position Music - Production Music Vol. 143 - Dark Walk',
 976             },
 977             'params': {
 978                 'skip_download': True,
 979             },
 980         },
 981         {
 982             # Tags with '};' (see https://github.com/ytdl-org/youtube-dl/issues/7468)
 983             'url': 'https://www.youtube.com/watch?v=Ms7iBXnlUO8',
 984             'only_matching': True,
 985         },
 986         {
 987             # Video with yt:stretch=17:0
 988             'url': 'https://www.youtube.com/watch?v=Q39EVAstoRM',
 989             'info_dict': {
 990                 'id': 'Q39EVAstoRM',
 991                 'ext': 'mp4',
 992                 'title': 'Clash Of Clans#14 Dicas De Ataque Para CV 4',
 993                 'description': 'md5:ee18a25c350637c8faff806845bddee9',
 994                 'upload_date': '20151107',
 995                 'uploader_id': 'UCCr7TALkRbo3EtFzETQF1LA',
 996                 'uploader': 'CH GAMER DROID',
 997             },
 998             'params': {
 999                 'skip_download': True,
1000             },
1001             'skip': 'This video does not exist.',
1002         },
1003         {
1004             # Video licensed under Creative Commons
1005             'url': 'https://www.youtube.com/watch?v=M4gD1WSo5mA',
1006             'info_dict': {
1007                 'id': 'M4gD1WSo5mA',
1008                 'ext': 'mp4',
1009                 'title': 'md5:e41008789470fc2533a3252216f1c1d1',
1010                 'description': 'md5:a677553cf0840649b731a3024aeff4cc',
1011                 'duration': 721,
1012                 'upload_date': '20150127',
1013                 'uploader_id': 'BerkmanCenter',
1014                 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/BerkmanCenter',
1015                 'uploader': 'The Berkman Klein Center for Internet & Society',
1016                 'license': 'Creative Commons Attribution license (reuse allowed)',
1017             },
1018             'params': {
1019                 'skip_download': True,
1020             },
1021         },
1022         {
1023             # Channel-like uploader_url
1024             'url': 'https://www.youtube.com/watch?v=eQcmzGIKrzg',
1025             'info_dict': {
1026                 'id': 'eQcmzGIKrzg',
1027                 'ext': 'mp4',
1028                 'title': 'Democratic Socialism and Foreign Policy | Bernie Sanders',
1029                 'description': 'md5:dda0d780d5a6e120758d1711d062a867',
1030                 'duration': 4060,
1031                 'upload_date': '20151119',
1032                 'uploader': 'Bernie Sanders',
1033                 'uploader_id': 'UCH1dpzjCEiGAt8CXkryhkZg',
1034                 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UCH1dpzjCEiGAt8CXkryhkZg',
1035                 'license': 'Creative Commons Attribution license (reuse allowed)',
1036             },
1037             'params': {
1038                 'skip_download': True,
1039             },
1040         },
1041         {
1042             'url': 'https://www.youtube.com/watch?feature=player_embedded&amp;amp;v=V36LpHqtcDY',
1043             'only_matching': True,
1044         },
1045         {
1046             # YouTube Red paid video (https://github.com/ytdl-org/youtube-dl/issues/10059)
1047             'url': 'https://www.youtube.com/watch?v=i1Ko8UG-Tdo',
1048             'only_matching': True,
1049         },
1050         {
1051             # Rental video preview
1052             'url': 'https://www.youtube.com/watch?v=yYr8q0y5Jfg',
1053             'info_dict': {
1054                 'id': 'uGpuVWrhIzE',
1055                 'ext': 'mp4',
1056                 'title': 'Piku - Trailer',
1057                 'description': 'md5:c36bd60c3fd6f1954086c083c72092eb',
1058                 'upload_date': '20150811',
1059                 'uploader': 'FlixMatrix',
1060                 'uploader_id': 'FlixMatrixKaravan',
1061                 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/FlixMatrixKaravan',
1062                 'license': 'Standard YouTube License',
1063             },
1064             'params': {
1065                 'skip_download': True,
1066             },
1067             'skip': 'This video is not available.',
1068         },
1069         {
1070             # YouTube Red video with episode data
1071             'url': 'https://www.youtube.com/watch?v=iqKdEhx-dD4',
1072             'info_dict': {
1073                 'id': 'iqKdEhx-dD4',
1074                 'ext': 'mp4',
1075                 'title': 'Isolation - Mind Field (Ep 1)',
1076                 'description': 'md5:46a29be4ceffa65b92d277b93f463c0f',
1077                 'duration': 2085,
1078                 'upload_date': '20170118',
1079                 'uploader': 'Vsauce',
1080                 'uploader_id': 'Vsauce',
1081                 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/Vsauce',
1082                 'series': 'Mind Field',
1083                 'season_number': 1,
1084                 'episode_number': 1,
1085             },
1086             'params': {
1087                 'skip_download': True,
1088             },
1089             'expected_warnings': [
1090                 'Skipping DASH manifest',
1091             ],
1092         },
1093         {
1094             # The following content has been identified by the YouTube community
1095             # as inappropriate or offensive to some audiences.
1096             'url': 'https://www.youtube.com/watch?v=6SJNVb0GnPI',
1097             'info_dict': {
1098                 'id': '6SJNVb0GnPI',
1099                 'ext': 'mp4',
1100                 'title': 'Race Differences in Intelligence',
1101                 'description': 'md5:5d161533167390427a1f8ee89a1fc6f1',
1102                 'duration': 965,
1103                 'upload_date': '20140124',
1104                 'uploader': 'New Century Foundation',
1105                 'uploader_id': 'UCEJYpZGqgUob0zVVEaLhvVg',
1106                 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UCEJYpZGqgUob0zVVEaLhvVg',
1107             },
1108             'params': {
1109                 'skip_download': True,
1110             },
1111         },
1112         {
1113             # itag 212
1114             'url': '1t24XAntNCY',
1115             'only_matching': True,
1116         },
1117         {
1118             # geo restricted to JP
1119             'url': 'sJL6WA-aGkQ',
1120             'only_matching': True,
1121         },
1122         {
1123             'url': 'https://www.youtube.com/watch?v=MuAGGZNfUkU&list=RDMM',
1124             'only_matching': True,
1125         },
1126         {
1127             'url': 'https://invidio.us/watch?v=BaW_jenozKc',
1128             'only_matching': True,
1129         },
1130         {
1131             # DRM protected
1132             'url': 'https://www.youtube.com/watch?v=s7_qI6_mIXc',
1133             'only_matching': True,
1134         },
1135         {
1136             # Video with unsupported adaptive stream type formats
1137             'url': 'https://www.youtube.com/watch?v=Z4Vy8R84T1U',
1138             'info_dict': {
1139                 'id': 'Z4Vy8R84T1U',
1140                 'ext': 'mp4',
1141                 'title': 'saman SMAN 53 Jakarta(Sancety) opening COFFEE4th at SMAN 53 Jakarta',
1142                 'description': 'md5:d41d8cd98f00b204e9800998ecf8427e',
1143                 'duration': 433,
1144                 'upload_date': '20130923',
1145                 'uploader': 'Amelia Putri Harwita',
1146                 'uploader_id': 'UCpOxM49HJxmC1qCalXyB3_Q',
1147                 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UCpOxM49HJxmC1qCalXyB3_Q',
1148                 'formats': 'maxcount:10',
1149             },
1150             'params': {
1151                 'skip_download': True,
1152                 'youtube_include_dash_manifest': False,
1153             },
1154             'skip': 'not actual anymore',
1155         },
1156         {
1157             # Youtube Music Auto-generated description
1158             'url': 'https://music.youtube.com/watch?v=MgNrAu2pzNs',
1159             'info_dict': {
1160                 'id': 'MgNrAu2pzNs',
1161                 'ext': 'mp4',
1162                 'title': 'Voyeur Girl',
1163                 'description': 'md5:7ae382a65843d6df2685993e90a8628f',
1164                 'upload_date': '20190312',
1165                 'uploader': 'Stephen - Topic',
1166                 'uploader_id': 'UC-pWHpBjdGG69N9mM2auIAA',
1167                 'artist': 'Stephen',
1168                 'track': 'Voyeur Girl',
1169                 'album': 'it\'s too much love to know my dear',
1170                 'release_date': '20190313',
1171                 'release_year': 2019,
1172             },
1173             'params': {
1174                 'skip_download': True,
1175             },
1176         },
1177         {
1178             # Youtube Music Auto-generated description
1179             # Retrieve 'artist' field from 'Artist:' in video description
1180             # when it is present on youtube music video
1181             'url': 'https://www.youtube.com/watch?v=k0jLE7tTwjY',
1182             'info_dict': {
1183                 'id': 'k0jLE7tTwjY',
1184                 'ext': 'mp4',
1185                 'title': 'Latch Feat. Sam Smith',
1186                 'description': 'md5:3cb1e8101a7c85fcba9b4fb41b951335',
1187                 'upload_date': '20150110',
1188                 'uploader': 'Various Artists - Topic',
1189                 'uploader_id': 'UCNkEcmYdjrH4RqtNgh7BZ9w',
1190                 'artist': 'Disclosure',
1191                 'track': 'Latch Feat. Sam Smith',
1192                 'album': 'Latch Featuring Sam Smith',
1193                 'release_date': '20121008',
1194                 'release_year': 2012,
1195             },
1196             'params': {
1197                 'skip_download': True,
1198             },
1199         },
1200         {
1201             # Youtube Music Auto-generated description
1202             # handle multiple artists on youtube music video
1203             'url': 'https://www.youtube.com/watch?v=74qn0eJSjpA',
1204             'info_dict': {
1205                 'id': '74qn0eJSjpA',
1206                 'ext': 'mp4',
1207                 'title': 'Eastside',
1208                 'description': 'md5:290516bb73dcbfab0dcc4efe6c3de5f2',
1209                 'upload_date': '20180710',
1210                 'uploader': 'Benny Blanco - Topic',
1211                 'uploader_id': 'UCzqz_ksRu_WkIzmivMdIS7A',
1212                 'artist': 'benny blanco, Halsey, Khalid',
1213                 'track': 'Eastside',
1214                 'album': 'Eastside',
1215                 'release_date': '20180713',
1216                 'release_year': 2018,
1217             },
1218             'params': {
1219                 'skip_download': True,
1220             },
1221         },
1222         {
1223             # Youtube Music Auto-generated description
1224             # handle youtube music video with release_year and no release_date
1225             'url': 'https://www.youtube.com/watch?v=-hcAI0g-f5M',
1226             'info_dict': {
1227                 'id': '-hcAI0g-f5M',
1228                 'ext': 'mp4',
1229                 'title': 'Put It On Me',
1230                 'description': 'md5:f6422397c07c4c907c6638e1fee380a5',
1231                 'upload_date': '20180426',
1232                 'uploader': 'Matt Maeson - Topic',
1233                 'uploader_id': 'UCnEkIGqtGcQMLk73Kp-Q5LQ',
1234                 'artist': 'Matt Maeson',
1235                 'track': 'Put It On Me',
1236                 'album': 'The Hearse',
1237                 'release_date': None,
1238                 'release_year': 2018,
1239             },
1240             'params': {
1241                 'skip_download': True,
1242             },
1243         },
1244         {
1245             'url': 'https://www.youtubekids.com/watch?v=3b8nCWDgZ6Q',
1246             'only_matching': True,
1247         },
1248         {
1249             # invalid -> valid video id redirection
1250             'url': 'DJztXj2GPfl',
1251             'info_dict': {
1252                 'id': 'DJztXj2GPfk',
1253                 'ext': 'mp4',
1254                 'title': 'Panjabi MC - Mundian To Bach Ke (The Dictator Soundtrack)',
1255                 'description': 'md5:bf577a41da97918e94fa9798d9228825',
1256                 'upload_date': '20090125',
1257                 'uploader': 'Prochorowka',
1258                 'uploader_id': 'Prochorowka',
1259                 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/Prochorowka',
1260                 'artist': 'Panjabi MC',
1261                 'track': 'Beware of the Boys (Mundian to Bach Ke) - Motivo Hi-Lectro Remix',
1262                 'album': 'Beware of the Boys (Mundian To Bach Ke)',
1263             },
1264             'params': {
1265                 'skip_download': True,
1266             },
1267         }
1268     ]
1269
1270     def __init__(self, *args, **kwargs):
1271         super(YoutubeIE, self).__init__(*args, **kwargs)
1272         self._player_cache = {}
1273
1274     def report_video_info_webpage_download(self, video_id):
1275         """Report attempt to download video info webpage."""
1276         self.to_screen('%s: Downloading video info webpage' % video_id)
1277
1278     def report_information_extraction(self, video_id):
1279         """Report attempt to extract video information."""
1280         self.to_screen('%s: Extracting video information' % video_id)
1281
1282     def report_unavailable_format(self, video_id, format):
1283         """Report extracted video URL."""
1284         self.to_screen('%s: Format %s not available' % (video_id, format))
1285
1286     def report_rtmp_download(self):
1287         """Indicate the download will use the RTMP protocol."""
1288         self.to_screen('RTMP download detected')
1289
1290     def _signature_cache_id(self, example_sig):
1291         """ Return a string representation of a signature """
1292         return '.'.join(compat_str(len(part)) for part in example_sig.split('.'))
1293
1294     @classmethod
1295     def _extract_player_info(cls, player_url):
1296         for player_re in cls._PLAYER_INFO_RE:
1297             id_m = re.search(player_re, player_url)
1298             if id_m:
1299                 break
1300         else:
1301             raise ExtractorError('Cannot identify player %r' % player_url)
1302         return id_m.group('ext'), id_m.group('id')
1303
1304     def _extract_signature_function(self, video_id, player_url, example_sig):
1305         player_type, player_id = self._extract_player_info(player_url)
1306
1307         # Read from filesystem cache
1308         func_id = '%s_%s_%s' % (
1309             player_type, player_id, self._signature_cache_id(example_sig))
1310         assert os.path.basename(func_id) == func_id
1311
1312         cache_spec = self._downloader.cache.load('youtube-sigfuncs', func_id)
1313         if cache_spec is not None:
1314             return lambda s: ''.join(s[i] for i in cache_spec)
1315
1316         download_note = (
1317             'Downloading player %s' % player_url
1318             if self._downloader.params.get('verbose') else
1319             'Downloading %s player %s' % (player_type, player_id)
1320         )
1321         if player_type == 'js':
1322             code = self._download_webpage(
1323                 player_url, video_id,
1324                 note=download_note,
1325                 errnote='Download of %s failed' % player_url)
1326             res = self._parse_sig_js(code)
1327         elif player_type == 'swf':
1328             urlh = self._request_webpage(
1329                 player_url, video_id,
1330                 note=download_note,
1331                 errnote='Download of %s failed' % player_url)
1332             code = urlh.read()
1333             res = self._parse_sig_swf(code)
1334         else:
1335             assert False, 'Invalid player type %r' % player_type
1336
1337         test_string = ''.join(map(compat_chr, range(len(example_sig))))
1338         cache_res = res(test_string)
1339         cache_spec = [ord(c) for c in cache_res]
1340
1341         self._downloader.cache.store('youtube-sigfuncs', func_id, cache_spec)
1342         return res
1343
1344     def _print_sig_code(self, func, example_sig):
1345         def gen_sig_code(idxs):
1346             def _genslice(start, end, step):
1347                 starts = '' if start == 0 else str(start)
1348                 ends = (':%d' % (end + step)) if end + step >= 0 else ':'
1349                 steps = '' if step == 1 else (':%d' % step)
1350                 return 's[%s%s%s]' % (starts, ends, steps)
1351
1352             step = None
1353             # Quelch pyflakes warnings - start will be set when step is set
1354             start = '(Never used)'
1355             for i, prev in zip(idxs[1:], idxs[:-1]):
1356                 if step is not None:
1357                     if i - prev == step:
1358                         continue
1359                     yield _genslice(start, prev, step)
1360                     step = None
1361                     continue
1362                 if i - prev in [-1, 1]:
1363                     step = i - prev
1364                     start = prev
1365                     continue
1366                 else:
1367                     yield 's[%d]' % prev
1368             if step is None:
1369                 yield 's[%d]' % i
1370             else:
1371                 yield _genslice(start, i, step)
1372
1373         test_string = ''.join(map(compat_chr, range(len(example_sig))))
1374         cache_res = func(test_string)
1375         cache_spec = [ord(c) for c in cache_res]
1376         expr_code = ' + '.join(gen_sig_code(cache_spec))
1377         signature_id_tuple = '(%s)' % (
1378             ', '.join(compat_str(len(p)) for p in example_sig.split('.')))
1379         code = ('if tuple(len(p) for p in s.split(\'.\')) == %s:\n'
1380                 '    return %s\n') % (signature_id_tuple, expr_code)
1381         self.to_screen('Extracted signature function:\n' + code)
1382
1383     def _parse_sig_js(self, jscode):
1384         funcname = self._search_regex(
1385             (r'\b[cs]\s*&&\s*[adf]\.set\([^,]+\s*,\s*encodeURIComponent\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\(',
1386              r'\b[a-zA-Z0-9]+\s*&&\s*[a-zA-Z0-9]+\.set\([^,]+\s*,\s*encodeURIComponent\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\(',
1387              r'(?:\b|[^a-zA-Z0-9$])(?P<sig>[a-zA-Z0-9$]{2})\s*=\s*function\(\s*a\s*\)\s*{\s*a\s*=\s*a\.split\(\s*""\s*\)',
1388              r'(?P<sig>[a-zA-Z0-9$]+)\s*=\s*function\(\s*a\s*\)\s*{\s*a\s*=\s*a\.split\(\s*""\s*\)',
1389              # Obsolete patterns
1390              r'(["\'])signature\1\s*,\s*(?P<sig>[a-zA-Z0-9$]+)\(',
1391              r'\.sig\|\|(?P<sig>[a-zA-Z0-9$]+)\(',
1392              r'yt\.akamaized\.net/\)\s*\|\|\s*.*?\s*[cs]\s*&&\s*[adf]\.set\([^,]+\s*,\s*(?:encodeURIComponent\s*\()?\s*(?P<sig>[a-zA-Z0-9$]+)\(',
1393              r'\b[cs]\s*&&\s*[adf]\.set\([^,]+\s*,\s*(?P<sig>[a-zA-Z0-9$]+)\(',
1394              r'\b[a-zA-Z0-9]+\s*&&\s*[a-zA-Z0-9]+\.set\([^,]+\s*,\s*(?P<sig>[a-zA-Z0-9$]+)\(',
1395              r'\bc\s*&&\s*a\.set\([^,]+\s*,\s*\([^)]*\)\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\(',
1396              r'\bc\s*&&\s*[a-zA-Z0-9]+\.set\([^,]+\s*,\s*\([^)]*\)\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\(',
1397              r'\bc\s*&&\s*[a-zA-Z0-9]+\.set\([^,]+\s*,\s*\([^)]*\)\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\('),
1398             jscode, 'Initial JS player signature function name', group='sig')
1399
1400         jsi = JSInterpreter(jscode)
1401         initial_function = jsi.extract_function(funcname)
1402         return lambda s: initial_function([s])
1403
1404     def _parse_sig_swf(self, file_contents):
1405         swfi = SWFInterpreter(file_contents)
1406         TARGET_CLASSNAME = 'SignatureDecipher'
1407         searched_class = swfi.extract_class(TARGET_CLASSNAME)
1408         initial_function = swfi.extract_function(searched_class, 'decipher')
1409         return lambda s: initial_function([s])
1410
1411     def _decrypt_signature(self, s, video_id, player_url, age_gate=False):
1412         """Turn the encrypted s field into a working signature"""
1413
1414         if player_url is None:
1415             raise ExtractorError('Cannot decrypt signature without player_url')
1416
1417         if player_url.startswith('//'):
1418             player_url = 'https:' + player_url
1419         elif not re.match(r'https?://', player_url):
1420             player_url = compat_urlparse.urljoin(
1421                 'https://www.youtube.com', player_url)
1422         try:
1423             player_id = (player_url, self._signature_cache_id(s))
1424             if player_id not in self._player_cache:
1425                 func = self._extract_signature_function(
1426                     video_id, player_url, s
1427                 )
1428                 self._player_cache[player_id] = func
1429             func = self._player_cache[player_id]
1430             if self._downloader.params.get('youtube_print_sig_code'):
1431                 self._print_sig_code(func, s)
1432             return func(s)
1433         except Exception as e:
1434             tb = traceback.format_exc()
1435             raise ExtractorError(
1436                 'Signature extraction failed: ' + tb, cause=e)
1437
1438     def _get_subtitles(self, video_id, webpage, has_live_chat_replay):
1439         try:
1440             subs_doc = self._download_xml(
1441                 'https://video.google.com/timedtext?hl=en&type=list&v=%s' % video_id,
1442                 video_id, note=False)
1443         except ExtractorError as err:
1444             self._downloader.report_warning('unable to download video subtitles: %s' % error_to_compat_str(err))
1445             return {}
1446
1447         sub_lang_list = {}
1448         for track in subs_doc.findall('track'):
1449             lang = track.attrib['lang_code']
1450             if lang in sub_lang_list:
1451                 continue
1452             sub_formats = []
1453             for ext in self._SUBTITLE_FORMATS:
1454                 params = compat_urllib_parse_urlencode({
1455                     'lang': lang,
1456                     'v': video_id,
1457                     'fmt': ext,
1458                     'name': track.attrib['name'].encode('utf-8'),
1459                 })
1460                 sub_formats.append({
1461                     'url': 'https://www.youtube.com/api/timedtext?' + params,
1462                     'ext': ext,
1463                 })
1464             sub_lang_list[lang] = sub_formats
1465         if has_live_chat_replay:
1466             sub_lang_list['live_chat'] = [
1467                 {
1468                     'video_id': video_id,
1469                     'ext': 'json',
1470                     'protocol': 'youtube_live_chat_replay',
1471                 },
1472             ]
1473         if not sub_lang_list:
1474             self._downloader.report_warning('video doesn\'t have subtitles')
1475             return {}
1476         return sub_lang_list
1477
1478     def _get_ytplayer_config(self, video_id, webpage):
1479         patterns = (
1480             # User data may contain arbitrary character sequences that may affect
1481             # JSON extraction with regex, e.g. when '};' is contained the second
1482             # regex won't capture the whole JSON. Yet working around by trying more
1483             # concrete regex first keeping in mind proper quoted string handling
1484             # to be implemented in future that will replace this workaround (see
1485             # https://github.com/ytdl-org/youtube-dl/issues/7468,
1486             # https://github.com/ytdl-org/youtube-dl/pull/7599)
1487             r';ytplayer\.config\s*=\s*({.+?});ytplayer',
1488             r';ytplayer\.config\s*=\s*({.+?});',
1489         )
1490         config = self._search_regex(
1491             patterns, webpage, 'ytplayer.config', default=None)
1492         if config:
1493             return self._parse_json(
1494                 uppercase_escape(config), video_id, fatal=False)
1495
1496     def _get_yt_initial_data(self, video_id, webpage):
1497         config = self._search_regex(
1498             (r'window\["ytInitialData"\]\s*=\s*(.*?)(?<=});',
1499              r'var\s+ytInitialData\s*=\s*(.*?)(?<=});'),
1500             webpage, 'ytInitialData', default=None)
1501         if config:
1502             return self._parse_json(
1503                 uppercase_escape(config), video_id, fatal=False)
1504
1505     def _get_automatic_captions(self, video_id, webpage):
1506         """We need the webpage for getting the captions url, pass it as an
1507            argument to speed up the process."""
1508         self.to_screen('%s: Looking for automatic captions' % video_id)
1509         player_config = self._get_ytplayer_config(video_id, webpage)
1510         err_msg = 'Couldn\'t find automatic captions for %s' % video_id
1511         if not player_config:
1512             self._downloader.report_warning(err_msg)
1513             return {}
1514         try:
1515             args = player_config['args']
1516             caption_url = args.get('ttsurl')
1517             if caption_url:
1518                 timestamp = args['timestamp']
1519                 # We get the available subtitles
1520                 list_params = compat_urllib_parse_urlencode({
1521                     'type': 'list',
1522                     'tlangs': 1,
1523                     'asrs': 1,
1524                 })
1525                 list_url = caption_url + '&' + list_params
1526                 caption_list = self._download_xml(list_url, video_id)
1527                 original_lang_node = caption_list.find('track')
1528                 if original_lang_node is None:
1529                     self._downloader.report_warning('Video doesn\'t have automatic captions')
1530                     return {}
1531                 original_lang = original_lang_node.attrib['lang_code']
1532                 caption_kind = original_lang_node.attrib.get('kind', '')
1533
1534                 sub_lang_list = {}
1535                 for lang_node in caption_list.findall('target'):
1536                     sub_lang = lang_node.attrib['lang_code']
1537                     sub_formats = []
1538                     for ext in self._SUBTITLE_FORMATS:
1539                         params = compat_urllib_parse_urlencode({
1540                             'lang': original_lang,
1541                             'tlang': sub_lang,
1542                             'fmt': ext,
1543                             'ts': timestamp,
1544                             'kind': caption_kind,
1545                         })
1546                         sub_formats.append({
1547                             'url': caption_url + '&' + params,
1548                             'ext': ext,
1549                         })
1550                     sub_lang_list[sub_lang] = sub_formats
1551                 return sub_lang_list
1552
1553             def make_captions(sub_url, sub_langs):
1554                 parsed_sub_url = compat_urllib_parse_urlparse(sub_url)
1555                 caption_qs = compat_parse_qs(parsed_sub_url.query)
1556                 captions = {}
1557                 for sub_lang in sub_langs:
1558                     sub_formats = []
1559                     for ext in self._SUBTITLE_FORMATS:
1560                         caption_qs.update({
1561                             'tlang': [sub_lang],
1562                             'fmt': [ext],
1563                         })
1564                         sub_url = compat_urlparse.urlunparse(parsed_sub_url._replace(
1565                             query=compat_urllib_parse_urlencode(caption_qs, True)))
1566                         sub_formats.append({
1567                             'url': sub_url,
1568                             'ext': ext,
1569                         })
1570                     captions[sub_lang] = sub_formats
1571                 return captions
1572
1573             # New captions format as of 22.06.2017
1574             player_response = args.get('player_response')
1575             if player_response and isinstance(player_response, compat_str):
1576                 player_response = self._parse_json(
1577                     player_response, video_id, fatal=False)
1578                 if player_response:
1579                     renderer = player_response['captions']['playerCaptionsTracklistRenderer']
1580                     caption_tracks = renderer['captionTracks']
1581                     for caption_track in caption_tracks:
1582                         if 'kind' not in caption_track:
1583                             # not an automatic transcription
1584                             continue
1585                         base_url = caption_track['baseUrl']
1586                         sub_lang_list = []
1587                         for lang in renderer['translationLanguages']:
1588                             lang_code = lang.get('languageCode')
1589                             if lang_code:
1590                                 sub_lang_list.append(lang_code)
1591                         return make_captions(base_url, sub_lang_list)
1592
1593                     self._downloader.report_warning("Couldn't find automatic captions for %s" % video_id)
1594                     return {}
1595             # Some videos don't provide ttsurl but rather caption_tracks and
1596             # caption_translation_languages (e.g. 20LmZk1hakA)
1597             # Does not used anymore as of 22.06.2017
1598             caption_tracks = args['caption_tracks']
1599             caption_translation_languages = args['caption_translation_languages']
1600             caption_url = compat_parse_qs(caption_tracks.split(',')[0])['u'][0]
1601             sub_lang_list = []
1602             for lang in caption_translation_languages.split(','):
1603                 lang_qs = compat_parse_qs(compat_urllib_parse_unquote_plus(lang))
1604                 sub_lang = lang_qs.get('lc', [None])[0]
1605                 if sub_lang:
1606                     sub_lang_list.append(sub_lang)
1607             return make_captions(caption_url, sub_lang_list)
1608         # An extractor error can be raise by the download process if there are
1609         # no automatic captions but there are subtitles
1610         except (KeyError, IndexError, ExtractorError):
1611             self._downloader.report_warning(err_msg)
1612             return {}
1613
1614     def _mark_watched(self, video_id, video_info, player_response):
1615         playback_url = url_or_none(try_get(
1616             player_response,
1617             lambda x: x['playbackTracking']['videostatsPlaybackUrl']['baseUrl']) or try_get(
1618             video_info, lambda x: x['videostats_playback_base_url'][0]))
1619         if not playback_url:
1620             return
1621         parsed_playback_url = compat_urlparse.urlparse(playback_url)
1622         qs = compat_urlparse.parse_qs(parsed_playback_url.query)
1623
1624         # cpn generation algorithm is reverse engineered from base.js.
1625         # In fact it works even with dummy cpn.
1626         CPN_ALPHABET = 'abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789-_'
1627         cpn = ''.join((CPN_ALPHABET[random.randint(0, 256) & 63] for _ in range(0, 16)))
1628
1629         qs.update({
1630             'ver': ['2'],
1631             'cpn': [cpn],
1632         })
1633         playback_url = compat_urlparse.urlunparse(
1634             parsed_playback_url._replace(query=compat_urllib_parse_urlencode(qs, True)))
1635
1636         self._download_webpage(
1637             playback_url, video_id, 'Marking watched',
1638             'Unable to mark watched', fatal=False)
1639
1640     @staticmethod
1641     def _extract_urls(webpage):
1642         # Embedded YouTube player
1643         entries = [
1644             unescapeHTML(mobj.group('url'))
1645             for mobj in re.finditer(r'''(?x)
1646             (?:
1647                 <iframe[^>]+?src=|
1648                 data-video-url=|
1649                 <embed[^>]+?src=|
1650                 embedSWF\(?:\s*|
1651                 <object[^>]+data=|
1652                 new\s+SWFObject\(
1653             )
1654             (["\'])
1655                 (?P<url>(?:https?:)?//(?:www\.)?youtube(?:-nocookie)?\.com/
1656                 (?:embed|v|p)/[0-9A-Za-z_-]{11}.*?)
1657             \1''', webpage)]
1658
1659         # lazyYT YouTube embed
1660         entries.extend(list(map(
1661             unescapeHTML,
1662             re.findall(r'class="lazyYT" data-youtube-id="([^"]+)"', webpage))))
1663
1664         # Wordpress "YouTube Video Importer" plugin
1665         matches = re.findall(r'''(?x)<div[^>]+
1666             class=(?P<q1>[\'"])[^\'"]*\byvii_single_video_player\b[^\'"]*(?P=q1)[^>]+
1667             data-video_id=(?P<q2>[\'"])([^\'"]+)(?P=q2)''', webpage)
1668         entries.extend(m[-1] for m in matches)
1669
1670         return entries
1671
1672     @staticmethod
1673     def _extract_url(webpage):
1674         urls = YoutubeIE._extract_urls(webpage)
1675         return urls[0] if urls else None
1676
1677     @classmethod
1678     def extract_id(cls, url):
1679         mobj = re.match(cls._VALID_URL, url, re.VERBOSE)
1680         if mobj is None:
1681             raise ExtractorError('Invalid URL: %s' % url)
1682         video_id = mobj.group(2)
1683         return video_id
1684
1685     def _extract_chapters_from_json(self, webpage, video_id, duration):
1686         if not webpage:
1687             return
1688         initial_data = self._parse_json(
1689             self._search_regex(
1690                 r'window\["ytInitialData"\] = (.+);\n', webpage,
1691                 'player args', default='{}'),
1692             video_id, fatal=False)
1693         if not initial_data or not isinstance(initial_data, dict):
1694             return
1695         chapters_list = try_get(
1696             initial_data,
1697             lambda x: x['playerOverlays']
1698                        ['playerOverlayRenderer']
1699                        ['decoratedPlayerBarRenderer']
1700                        ['decoratedPlayerBarRenderer']
1701                        ['playerBar']
1702                        ['chapteredPlayerBarRenderer']
1703                        ['chapters'],
1704             list)
1705         if not chapters_list:
1706             return
1707
1708         def chapter_time(chapter):
1709             return float_or_none(
1710                 try_get(
1711                     chapter,
1712                     lambda x: x['chapterRenderer']['timeRangeStartMillis'],
1713                     int),
1714                 scale=1000)
1715         chapters = []
1716         for next_num, chapter in enumerate(chapters_list, start=1):
1717             start_time = chapter_time(chapter)
1718             if start_time is None:
1719                 continue
1720             end_time = (chapter_time(chapters_list[next_num])
1721                         if next_num < len(chapters_list) else duration)
1722             if end_time is None:
1723                 continue
1724             title = try_get(
1725                 chapter, lambda x: x['chapterRenderer']['title']['simpleText'],
1726                 compat_str)
1727             chapters.append({
1728                 'start_time': start_time,
1729                 'end_time': end_time,
1730                 'title': title,
1731             })
1732         return chapters
1733
1734     @staticmethod
1735     def _extract_chapters_from_description(description, duration):
1736         if not description:
1737             return None
1738         chapter_lines = re.findall(
1739             r'(?:^|<br\s*/>)([^<]*<a[^>]+onclick=["\']yt\.www\.watch\.player\.seekTo[^>]+>(\d{1,2}:\d{1,2}(?::\d{1,2})?)</a>[^>]*)(?=$|<br\s*/>)',
1740             description)
1741         if not chapter_lines:
1742             return None
1743         chapters = []
1744         for next_num, (chapter_line, time_point) in enumerate(
1745                 chapter_lines, start=1):
1746             start_time = parse_duration(time_point)
1747             if start_time is None:
1748                 continue
1749             if start_time > duration:
1750                 break
1751             end_time = (duration if next_num == len(chapter_lines)
1752                         else parse_duration(chapter_lines[next_num][1]))
1753             if end_time is None:
1754                 continue
1755             if end_time > duration:
1756                 end_time = duration
1757             if start_time > end_time:
1758                 break
1759             chapter_title = re.sub(
1760                 r'<a[^>]+>[^<]+</a>', '', chapter_line).strip(' \t-')
1761             chapter_title = re.sub(r'\s+', ' ', chapter_title)
1762             chapters.append({
1763                 'start_time': start_time,
1764                 'end_time': end_time,
1765                 'title': chapter_title,
1766             })
1767         return chapters
1768
1769     def _extract_chapters(self, webpage, description, video_id, duration):
1770         return (self._extract_chapters_from_json(webpage, video_id, duration)
1771                 or self._extract_chapters_from_description(description, duration))
1772
1773     def _real_extract(self, url):
1774         url, smuggled_data = unsmuggle_url(url, {})
1775
1776         proto = (
1777             'http' if self._downloader.params.get('prefer_insecure', False)
1778             else 'https')
1779
1780         start_time = None
1781         end_time = None
1782         parsed_url = compat_urllib_parse_urlparse(url)
1783         for component in [parsed_url.fragment, parsed_url.query]:
1784             query = compat_parse_qs(component)
1785             if start_time is None and 't' in query:
1786                 start_time = parse_duration(query['t'][0])
1787             if start_time is None and 'start' in query:
1788                 start_time = parse_duration(query['start'][0])
1789             if end_time is None and 'end' in query:
1790                 end_time = parse_duration(query['end'][0])
1791
1792         # Extract original video URL from URL with redirection, like age verification, using next_url parameter
1793         mobj = re.search(self._NEXT_URL_RE, url)
1794         if mobj:
1795             url = proto + '://www.youtube.com/' + compat_urllib_parse_unquote(mobj.group(1)).lstrip('/')
1796         video_id = self.extract_id(url)
1797
1798         # Get video webpage
1799         url = proto + '://www.youtube.com/watch?v=%s&gl=US&hl=en&has_verified=1&bpctr=9999999999' % video_id
1800         video_webpage, urlh = self._download_webpage_handle(url, video_id)
1801
1802         qs = compat_parse_qs(compat_urllib_parse_urlparse(urlh.geturl()).query)
1803         video_id = qs.get('v', [None])[0] or video_id
1804
1805         # Attempt to extract SWF player URL
1806         mobj = re.search(r'swfConfig.*?"(https?:\\/\\/.*?watch.*?-.*?\.swf)"', video_webpage)
1807         if mobj is not None:
1808             player_url = re.sub(r'\\(.)', r'\1', mobj.group(1))
1809         else:
1810             player_url = None
1811
1812         dash_mpds = []
1813
1814         def add_dash_mpd(video_info):
1815             dash_mpd = video_info.get('dashmpd')
1816             if dash_mpd and dash_mpd[0] not in dash_mpds:
1817                 dash_mpds.append(dash_mpd[0])
1818
1819         def add_dash_mpd_pr(pl_response):
1820             dash_mpd = url_or_none(try_get(
1821                 pl_response, lambda x: x['streamingData']['dashManifestUrl'],
1822                 compat_str))
1823             if dash_mpd and dash_mpd not in dash_mpds:
1824                 dash_mpds.append(dash_mpd)
1825
1826         is_live = None
1827         view_count = None
1828
1829         def extract_view_count(v_info):
1830             return int_or_none(try_get(v_info, lambda x: x['view_count'][0]))
1831
1832         def extract_player_response(player_response, video_id):
1833             pl_response = str_or_none(player_response)
1834             if not pl_response:
1835                 return
1836             pl_response = self._parse_json(pl_response, video_id, fatal=False)
1837             if isinstance(pl_response, dict):
1838                 add_dash_mpd_pr(pl_response)
1839                 return pl_response
1840
1841         player_response = {}
1842
1843         # Get video info
1844         video_info = {}
1845         embed_webpage = None
1846         if (self._og_search_property('restrictions:age', video_webpage, default=None) == '18+'
1847                 or re.search(r'player-age-gate-content">', video_webpage) is not None):
1848             age_gate = True
1849             # We simulate the access to the video from www.youtube.com/v/{video_id}
1850             # this can be viewed without login into Youtube
1851             url = proto + '://www.youtube.com/embed/%s' % video_id
1852             embed_webpage = self._download_webpage(url, video_id, 'Downloading embed webpage')
1853             data = compat_urllib_parse_urlencode({
1854                 'video_id': video_id,
1855                 'eurl': 'https://youtube.googleapis.com/v/' + video_id,
1856                 'sts': self._search_regex(
1857                     r'"sts"\s*:\s*(\d+)', embed_webpage, 'sts', default=''),
1858             })
1859             video_info_url = proto + '://www.youtube.com/get_video_info?' + data
1860             try:
1861                 video_info_webpage = self._download_webpage(
1862                     video_info_url, video_id,
1863                     note='Refetching age-gated info webpage',
1864                     errnote='unable to download video info webpage')
1865             except ExtractorError:
1866                 video_info_webpage = None
1867             if video_info_webpage:
1868                 video_info = compat_parse_qs(video_info_webpage)
1869                 pl_response = video_info.get('player_response', [None])[0]
1870                 player_response = extract_player_response(pl_response, video_id)
1871                 add_dash_mpd(video_info)
1872                 view_count = extract_view_count(video_info)
1873         else:
1874             age_gate = False
1875             # Try looking directly into the video webpage
1876             ytplayer_config = self._get_ytplayer_config(video_id, video_webpage)
1877             if ytplayer_config:
1878                 args = ytplayer_config['args']
1879                 if args.get('url_encoded_fmt_stream_map') or args.get('hlsvp'):
1880                     # Convert to the same format returned by compat_parse_qs
1881                     video_info = dict((k, [v]) for k, v in args.items())
1882                     add_dash_mpd(video_info)
1883                 # Rental video is not rented but preview is available (e.g.
1884                 # https://www.youtube.com/watch?v=yYr8q0y5Jfg,
1885                 # https://github.com/ytdl-org/youtube-dl/issues/10532)
1886                 if not video_info and args.get('ypc_vid'):
1887                     return self.url_result(
1888                         args['ypc_vid'], YoutubeIE.ie_key(), video_id=args['ypc_vid'])
1889                 if args.get('livestream') == '1' or args.get('live_playback') == 1:
1890                     is_live = True
1891                 if not player_response:
1892                     player_response = extract_player_response(args.get('player_response'), video_id)
1893             if not video_info or self._downloader.params.get('youtube_include_dash_manifest', True):
1894                 add_dash_mpd_pr(player_response)
1895
1896         def extract_unavailable_message():
1897             messages = []
1898             for tag, kind in (('h1', 'message'), ('div', 'submessage')):
1899                 msg = self._html_search_regex(
1900                     r'(?s)<{tag}[^>]+id=["\']unavailable-{kind}["\'][^>]*>(.+?)</{tag}>'.format(tag=tag, kind=kind),
1901                     video_webpage, 'unavailable %s' % kind, default=None)
1902                 if msg:
1903                     messages.append(msg)
1904             if messages:
1905                 return '\n'.join(messages)
1906
1907         if not video_info and not player_response:
1908             unavailable_message = extract_unavailable_message()
1909             if not unavailable_message:
1910                 unavailable_message = 'Unable to extract video data'
1911             raise ExtractorError(
1912                 'YouTube said: %s' % unavailable_message, expected=True, video_id=video_id)
1913
1914         if not isinstance(video_info, dict):
1915             video_info = {}
1916
1917         video_details = try_get(
1918             player_response, lambda x: x['videoDetails'], dict) or {}
1919
1920         microformat = try_get(
1921             player_response, lambda x: x['microformat']['playerMicroformatRenderer'], dict) or {}
1922
1923         video_title = video_info.get('title', [None])[0] or video_details.get('title')
1924         if not video_title:
1925             self._downloader.report_warning('Unable to extract video title')
1926             video_title = '_'
1927
1928         description_original = video_description = get_element_by_id("eow-description", video_webpage)
1929         if video_description:
1930
1931             def replace_url(m):
1932                 redir_url = compat_urlparse.urljoin(url, m.group(1))
1933                 parsed_redir_url = compat_urllib_parse_urlparse(redir_url)
1934                 if re.search(r'^(?:www\.)?(?:youtube(?:-nocookie)?\.com|youtu\.be)$', parsed_redir_url.netloc) and parsed_redir_url.path == '/redirect':
1935                     qs = compat_parse_qs(parsed_redir_url.query)
1936                     q = qs.get('q')
1937                     if q and q[0]:
1938                         return q[0]
1939                 return redir_url
1940
1941             description_original = video_description = re.sub(r'''(?x)
1942                 <a\s+
1943                     (?:[a-zA-Z-]+="[^"]*"\s+)*?
1944                     (?:title|href)="([^"]+)"\s+
1945                     (?:[a-zA-Z-]+="[^"]*"\s+)*?
1946                     class="[^"]*"[^>]*>
1947                 [^<]+\.{3}\s*
1948                 </a>
1949             ''', replace_url, video_description)
1950             video_description = clean_html(video_description)
1951         else:
1952             video_description = video_details.get('shortDescription') or self._html_search_meta('description', video_webpage)
1953
1954         if not smuggled_data.get('force_singlefeed', False):
1955             if not self._downloader.params.get('noplaylist'):
1956                 multifeed_metadata_list = try_get(
1957                     player_response,
1958                     lambda x: x['multicamera']['playerLegacyMulticameraRenderer']['metadataList'],
1959                     compat_str) or try_get(
1960                     video_info, lambda x: x['multifeed_metadata_list'][0], compat_str)
1961                 if multifeed_metadata_list:
1962                     entries = []
1963                     feed_ids = []
1964                     for feed in multifeed_metadata_list.split(','):
1965                         # Unquote should take place before split on comma (,) since textual
1966                         # fields may contain comma as well (see
1967                         # https://github.com/ytdl-org/youtube-dl/issues/8536)
1968                         feed_data = compat_parse_qs(compat_urllib_parse_unquote_plus(feed))
1969
1970                         def feed_entry(name):
1971                             return try_get(feed_data, lambda x: x[name][0], compat_str)
1972
1973                         feed_id = feed_entry('id')
1974                         if not feed_id:
1975                             continue
1976                         feed_title = feed_entry('title')
1977                         title = video_title
1978                         if feed_title:
1979                             title += ' (%s)' % feed_title
1980                         entries.append({
1981                             '_type': 'url_transparent',
1982                             'ie_key': 'Youtube',
1983                             'url': smuggle_url(
1984                                 '%s://www.youtube.com/watch?v=%s' % (proto, feed_data['id'][0]),
1985                                 {'force_singlefeed': True}),
1986                             'title': title,
1987                         })
1988                         feed_ids.append(feed_id)
1989                     self.to_screen(
1990                         'Downloading multifeed video (%s) - add --no-playlist to just download video %s'
1991                         % (', '.join(feed_ids), video_id))
1992                     return self.playlist_result(entries, video_id, video_title, video_description)
1993             else:
1994                 self.to_screen('Downloading just video %s because of --no-playlist' % video_id)
1995
1996         if view_count is None:
1997             view_count = extract_view_count(video_info)
1998         if view_count is None and video_details:
1999             view_count = int_or_none(video_details.get('viewCount'))
2000         if view_count is None and microformat:
2001             view_count = int_or_none(microformat.get('viewCount'))
2002
2003         if is_live is None:
2004             is_live = bool_or_none(video_details.get('isLive'))
2005
2006         has_live_chat_replay = False
2007         if not is_live:
2008             yt_initial_data = self._get_yt_initial_data(video_id, video_webpage)
2009             try:
2010                 yt_initial_data['contents']['twoColumnWatchNextResults']['conversationBar']['liveChatRenderer']['continuations'][0]['reloadContinuationData']['continuation']
2011                 has_live_chat_replay = True
2012             except (KeyError, IndexError, TypeError):
2013                 pass
2014
2015         # Check for "rental" videos
2016         if 'ypc_video_rental_bar_text' in video_info and 'author' not in video_info:
2017             raise ExtractorError('"rental" videos not supported. See https://github.com/ytdl-org/youtube-dl/issues/359 for more information.', expected=True)
2018
2019         def _extract_filesize(media_url):
2020             return int_or_none(self._search_regex(
2021                 r'\bclen[=/](\d+)', media_url, 'filesize', default=None))
2022
2023         streaming_formats = try_get(player_response, lambda x: x['streamingData']['formats'], list) or []
2024         streaming_formats.extend(try_get(player_response, lambda x: x['streamingData']['adaptiveFormats'], list) or [])
2025
2026         if 'conn' in video_info and video_info['conn'][0].startswith('rtmp'):
2027             self.report_rtmp_download()
2028             formats = [{
2029                 'format_id': '_rtmp',
2030                 'protocol': 'rtmp',
2031                 'url': video_info['conn'][0],
2032                 'player_url': player_url,
2033             }]
2034         elif not is_live and (streaming_formats or len(video_info.get('url_encoded_fmt_stream_map', [''])[0]) >= 1 or len(video_info.get('adaptive_fmts', [''])[0]) >= 1):
2035             encoded_url_map = video_info.get('url_encoded_fmt_stream_map', [''])[0] + ',' + video_info.get('adaptive_fmts', [''])[0]
2036             if 'rtmpe%3Dyes' in encoded_url_map:
2037                 raise ExtractorError('rtmpe downloads are not supported, see https://github.com/ytdl-org/youtube-dl/issues/343 for more information.', expected=True)
2038             formats = []
2039             formats_spec = {}
2040             fmt_list = video_info.get('fmt_list', [''])[0]
2041             if fmt_list:
2042                 for fmt in fmt_list.split(','):
2043                     spec = fmt.split('/')
2044                     if len(spec) > 1:
2045                         width_height = spec[1].split('x')
2046                         if len(width_height) == 2:
2047                             formats_spec[spec[0]] = {
2048                                 'resolution': spec[1],
2049                                 'width': int_or_none(width_height[0]),
2050                                 'height': int_or_none(width_height[1]),
2051                             }
2052             for fmt in streaming_formats:
2053                 itag = str_or_none(fmt.get('itag'))
2054                 if not itag:
2055                     continue
2056                 quality = fmt.get('quality')
2057                 quality_label = fmt.get('qualityLabel') or quality
2058                 formats_spec[itag] = {
2059                     'asr': int_or_none(fmt.get('audioSampleRate')),
2060                     'filesize': int_or_none(fmt.get('contentLength')),
2061                     'format_note': quality_label,
2062                     'fps': int_or_none(fmt.get('fps')),
2063                     'height': int_or_none(fmt.get('height')),
2064                     # bitrate for itag 43 is always 2147483647
2065                     'tbr': float_or_none(fmt.get('averageBitrate') or fmt.get('bitrate'), 1000) if itag != '43' else None,
2066                     'width': int_or_none(fmt.get('width')),
2067                 }
2068
2069             for fmt in streaming_formats:
2070                 if fmt.get('drmFamilies') or fmt.get('drm_families'):
2071                     continue
2072                 url = url_or_none(fmt.get('url'))
2073
2074                 if not url:
2075                     cipher = fmt.get('cipher') or fmt.get('signatureCipher')
2076                     if not cipher:
2077                         continue
2078                     url_data = compat_parse_qs(cipher)
2079                     url = url_or_none(try_get(url_data, lambda x: x['url'][0], compat_str))
2080                     if not url:
2081                         continue
2082                 else:
2083                     cipher = None
2084                     url_data = compat_parse_qs(compat_urllib_parse_urlparse(url).query)
2085
2086                 stream_type = int_or_none(try_get(url_data, lambda x: x['stream_type'][0]))
2087                 # Unsupported FORMAT_STREAM_TYPE_OTF
2088                 if stream_type == 3:
2089                     continue
2090
2091                 format_id = fmt.get('itag') or url_data['itag'][0]
2092                 if not format_id:
2093                     continue
2094                 format_id = compat_str(format_id)
2095
2096                 if cipher:
2097                     if 's' in url_data or self._downloader.params.get('youtube_include_dash_manifest', True):
2098                         ASSETS_RE = r'"assets":.+?"js":\s*("[^"]+")'
2099                         jsplayer_url_json = self._search_regex(
2100                             ASSETS_RE,
2101                             embed_webpage if age_gate else video_webpage,
2102                             'JS player URL (1)', default=None)
2103                         if not jsplayer_url_json and not age_gate:
2104                             # We need the embed website after all
2105                             if embed_webpage is None:
2106                                 embed_url = proto + '://www.youtube.com/embed/%s' % video_id
2107                                 embed_webpage = self._download_webpage(
2108                                     embed_url, video_id, 'Downloading embed webpage')
2109                             jsplayer_url_json = self._search_regex(
2110                                 ASSETS_RE, embed_webpage, 'JS player URL')
2111
2112                         player_url = json.loads(jsplayer_url_json)
2113                         if player_url is None:
2114                             player_url_json = self._search_regex(
2115                                 r'ytplayer\.config.*?"url"\s*:\s*("[^"]+")',
2116                                 video_webpage, 'age gate player URL')
2117                             player_url = json.loads(player_url_json)
2118
2119                     if 'sig' in url_data:
2120                         url += '&signature=' + url_data['sig'][0]
2121                     elif 's' in url_data:
2122                         encrypted_sig = url_data['s'][0]
2123
2124                         if self._downloader.params.get('verbose'):
2125                             if player_url is None:
2126                                 player_desc = 'unknown'
2127                             else:
2128                                 player_type, player_version = self._extract_player_info(player_url)
2129                                 player_desc = '%s player %s' % ('flash' if player_type == 'swf' else 'html5', player_version)
2130                             parts_sizes = self._signature_cache_id(encrypted_sig)
2131                             self.to_screen('{%s} signature length %s, %s' %
2132                                            (format_id, parts_sizes, player_desc))
2133
2134                         signature = self._decrypt_signature(
2135                             encrypted_sig, video_id, player_url, age_gate)
2136                         sp = try_get(url_data, lambda x: x['sp'][0], compat_str) or 'signature'
2137                         url += '&%s=%s' % (sp, signature)
2138                 if 'ratebypass' not in url:
2139                     url += '&ratebypass=yes'
2140
2141                 dct = {
2142                     'format_id': format_id,
2143                     'url': url,
2144                     'player_url': player_url,
2145                 }
2146                 if format_id in self._formats:
2147                     dct.update(self._formats[format_id])
2148                 if format_id in formats_spec:
2149                     dct.update(formats_spec[format_id])
2150
2151                 # Some itags are not included in DASH manifest thus corresponding formats will
2152                 # lack metadata (see https://github.com/ytdl-org/youtube-dl/pull/5993).
2153                 # Trying to extract metadata from url_encoded_fmt_stream_map entry.
2154                 mobj = re.search(r'^(?P<width>\d+)[xX](?P<height>\d+)$', url_data.get('size', [''])[0])
2155                 width, height = (int(mobj.group('width')), int(mobj.group('height'))) if mobj else (None, None)
2156
2157                 if width is None:
2158                     width = int_or_none(fmt.get('width'))
2159                 if height is None:
2160                     height = int_or_none(fmt.get('height'))
2161
2162                 filesize = int_or_none(url_data.get(
2163                     'clen', [None])[0]) or _extract_filesize(url)
2164
2165                 quality = url_data.get('quality', [None])[0] or fmt.get('quality')
2166                 quality_label = url_data.get('quality_label', [None])[0] or fmt.get('qualityLabel')
2167
2168                 tbr = (float_or_none(url_data.get('bitrate', [None])[0], 1000)
2169                        or float_or_none(fmt.get('bitrate'), 1000)) if format_id != '43' else None
2170                 fps = int_or_none(url_data.get('fps', [None])[0]) or int_or_none(fmt.get('fps'))
2171
2172                 more_fields = {
2173                     'filesize': filesize,
2174                     'tbr': tbr,
2175                     'width': width,
2176                     'height': height,
2177                     'fps': fps,
2178                     'format_note': quality_label or quality,
2179                 }
2180                 for key, value in more_fields.items():
2181                     if value:
2182                         dct[key] = value
2183                 type_ = url_data.get('type', [None])[0] or fmt.get('mimeType')
2184                 if type_:
2185                     type_split = type_.split(';')
2186                     kind_ext = type_split[0].split('/')
2187                     if len(kind_ext) == 2:
2188                         kind, _ = kind_ext
2189                         dct['ext'] = mimetype2ext(type_split[0])
2190                         if kind in ('audio', 'video'):
2191                             codecs = None
2192                             for mobj in re.finditer(
2193                                     r'(?P<key>[a-zA-Z_-]+)=(?P<quote>["\']?)(?P<val>.+?)(?P=quote)(?:;|$)', type_):
2194                                 if mobj.group('key') == 'codecs':
2195                                     codecs = mobj.group('val')
2196                                     break
2197                             if codecs:
2198                                 dct.update(parse_codecs(codecs))
2199                 if dct.get('acodec') == 'none' or dct.get('vcodec') == 'none':
2200                     dct['downloader_options'] = {
2201                         # Youtube throttles chunks >~10M
2202                         'http_chunk_size': 10485760,
2203                     }
2204                 formats.append(dct)
2205         else:
2206             manifest_url = (
2207                 url_or_none(try_get(
2208                     player_response,
2209                     lambda x: x['streamingData']['hlsManifestUrl'],
2210                     compat_str))
2211                 or url_or_none(try_get(
2212                     video_info, lambda x: x['hlsvp'][0], compat_str)))
2213             if manifest_url:
2214                 formats = []
2215                 m3u8_formats = self._extract_m3u8_formats(
2216                     manifest_url, video_id, 'mp4', fatal=False)
2217                 for a_format in m3u8_formats:
2218                     itag = self._search_regex(
2219                         r'/itag/(\d+)/', a_format['url'], 'itag', default=None)
2220                     if itag:
2221                         a_format['format_id'] = itag
2222                         if itag in self._formats:
2223                             dct = self._formats[itag].copy()
2224                             dct.update(a_format)
2225                             a_format = dct
2226                     a_format['player_url'] = player_url
2227                     # Accept-Encoding header causes failures in live streams on Youtube and Youtube Gaming
2228                     a_format.setdefault('http_headers', {})['Youtubedl-no-compression'] = 'True'
2229                     formats.append(a_format)
2230             else:
2231                 error_message = extract_unavailable_message()
2232                 if not error_message:
2233                     error_message = clean_html(try_get(
2234                         player_response, lambda x: x['playabilityStatus']['reason'],
2235                         compat_str))
2236                 if not error_message:
2237                     error_message = clean_html(
2238                         try_get(video_info, lambda x: x['reason'][0], compat_str))
2239                 if error_message:
2240                     raise ExtractorError(error_message, expected=True)
2241                 raise ExtractorError('no conn, hlsvp, hlsManifestUrl or url_encoded_fmt_stream_map information found in video info')
2242
2243         # uploader
2244         video_uploader = try_get(
2245             video_info, lambda x: x['author'][0],
2246             compat_str) or str_or_none(video_details.get('author'))
2247         if video_uploader:
2248             video_uploader = compat_urllib_parse_unquote_plus(video_uploader)
2249         else:
2250             self._downloader.report_warning('unable to extract uploader name')
2251
2252         # uploader_id
2253         video_uploader_id = None
2254         video_uploader_url = None
2255         mobj = re.search(
2256             r'<link itemprop="url" href="(?P<uploader_url>https?://www\.youtube\.com/(?:user|channel)/(?P<uploader_id>[^"]+))">',
2257             video_webpage)
2258         if mobj is not None:
2259             video_uploader_id = mobj.group('uploader_id')
2260             video_uploader_url = mobj.group('uploader_url')
2261         else:
2262             owner_profile_url = url_or_none(microformat.get('ownerProfileUrl'))
2263             if owner_profile_url:
2264                 video_uploader_id = self._search_regex(
2265                     r'(?:user|channel)/([^/]+)', owner_profile_url, 'uploader id',
2266                     default=None)
2267                 video_uploader_url = owner_profile_url
2268
2269         channel_id = (
2270             str_or_none(video_details.get('channelId'))
2271             or self._html_search_meta(
2272                 'channelId', video_webpage, 'channel id', default=None)
2273             or self._search_regex(
2274                 r'data-channel-external-id=(["\'])(?P<id>(?:(?!\1).)+)\1',
2275                 video_webpage, 'channel id', default=None, group='id'))
2276         channel_url = 'http://www.youtube.com/channel/%s' % channel_id if channel_id else None
2277
2278         thumbnails = []
2279         thumbnails_list = try_get(
2280             video_details, lambda x: x['thumbnail']['thumbnails'], list) or []
2281         for t in thumbnails_list:
2282             if not isinstance(t, dict):
2283                 continue
2284             thumbnail_url = url_or_none(t.get('url'))
2285             if not thumbnail_url:
2286                 continue
2287             thumbnails.append({
2288                 'url': thumbnail_url,
2289                 'width': int_or_none(t.get('width')),
2290                 'height': int_or_none(t.get('height')),
2291             })
2292
2293         if not thumbnails:
2294             video_thumbnail = None
2295             # We try first to get a high quality image:
2296             m_thumb = re.search(r'<span itemprop="thumbnail".*?href="(.*?)">',
2297                                 video_webpage, re.DOTALL)
2298             if m_thumb is not None:
2299                 video_thumbnail = m_thumb.group(1)
2300             thumbnail_url = try_get(video_info, lambda x: x['thumbnail_url'][0], compat_str)
2301             if thumbnail_url:
2302                 video_thumbnail = compat_urllib_parse_unquote_plus(thumbnail_url)
2303             if video_thumbnail:
2304                 thumbnails.append({'url': video_thumbnail})
2305
2306         # upload date
2307         upload_date = self._html_search_meta(
2308             'datePublished', video_webpage, 'upload date', default=None)
2309         if not upload_date:
2310             upload_date = self._search_regex(
2311                 [r'(?s)id="eow-date.*?>(.*?)</span>',
2312                  r'(?:id="watch-uploader-info".*?>.*?|["\']simpleText["\']\s*:\s*["\'])(?:Published|Uploaded|Streamed live|Started) on (.+?)[<"\']'],
2313                 video_webpage, 'upload date', default=None)
2314         if not upload_date:
2315             upload_date = microformat.get('publishDate') or microformat.get('uploadDate')
2316         upload_date = unified_strdate(upload_date)
2317
2318         video_license = self._html_search_regex(
2319             r'<h4[^>]+class="title"[^>]*>\s*License\s*</h4>\s*<ul[^>]*>\s*<li>(.+?)</li',
2320             video_webpage, 'license', default=None)
2321
2322         m_music = re.search(
2323             r'''(?x)
2324                 <h4[^>]+class="title"[^>]*>\s*Music\s*</h4>\s*
2325                 <ul[^>]*>\s*
2326                 <li>(?P<title>.+?)
2327                 by (?P<creator>.+?)
2328                 (?:
2329                     \(.+?\)|
2330                     <a[^>]*
2331                         (?:
2332                             \bhref=["\']/red[^>]*>|             # drop possible
2333                             >\s*Listen ad-free with YouTube Red # YouTube Red ad
2334                         )
2335                     .*?
2336                 )?</li
2337             ''',
2338             video_webpage)
2339         if m_music:
2340             video_alt_title = remove_quotes(unescapeHTML(m_music.group('title')))
2341             video_creator = clean_html(m_music.group('creator'))
2342         else:
2343             video_alt_title = video_creator = None
2344
2345         def extract_meta(field):
2346             return self._html_search_regex(
2347                 r'<h4[^>]+class="title"[^>]*>\s*%s\s*</h4>\s*<ul[^>]*>\s*<li>(.+?)</li>\s*' % field,
2348                 video_webpage, field, default=None)
2349
2350         track = extract_meta('Song')
2351         artist = extract_meta('Artist')
2352         album = extract_meta('Album')
2353
2354         # Youtube Music Auto-generated description
2355         release_date = release_year = None
2356         if video_description:
2357             mobj = re.search(r'(?s)Provided to YouTube by [^\n]+\n+(?P<track>[^·]+)·(?P<artist>[^\n]+)\n+(?P<album>[^\n]+)(?:.+?℗\s*(?P<release_year>\d{4})(?!\d))?(?:.+?Released on\s*:\s*(?P<release_date>\d{4}-\d{2}-\d{2}))?(.+?\nArtist\s*:\s*(?P<clean_artist>[^\n]+))?', video_description)
2358             if mobj:
2359                 if not track:
2360                     track = mobj.group('track').strip()
2361                 if not artist:
2362                     artist = mobj.group('clean_artist') or ', '.join(a.strip() for a in mobj.group('artist').split('·'))
2363                 if not album:
2364                     album = mobj.group('album'.strip())
2365                 release_year = mobj.group('release_year')
2366                 release_date = mobj.group('release_date')
2367                 if release_date:
2368                     release_date = release_date.replace('-', '')
2369                     if not release_year:
2370                         release_year = int(release_date[:4])
2371                 if release_year:
2372                     release_year = int(release_year)
2373
2374         m_episode = re.search(
2375             r'<div[^>]+id="watch7-headline"[^>]*>\s*<span[^>]*>.*?>(?P<series>[^<]+)</a></b>\s*S(?P<season>\d+)\s*•\s*E(?P<episode>\d+)</span>',
2376             video_webpage)
2377         if m_episode:
2378             series = unescapeHTML(m_episode.group('series'))
2379             season_number = int(m_episode.group('season'))
2380             episode_number = int(m_episode.group('episode'))
2381         else:
2382             series = season_number = episode_number = None
2383
2384         m_cat_container = self._search_regex(
2385             r'(?s)<h4[^>]*>\s*Category\s*</h4>\s*<ul[^>]*>(.*?)</ul>',
2386             video_webpage, 'categories', default=None)
2387         category = None
2388         if m_cat_container:
2389             category = self._html_search_regex(
2390                 r'(?s)<a[^<]+>(.*?)</a>', m_cat_container, 'category',
2391                 default=None)
2392         if not category:
2393             category = try_get(
2394                 microformat, lambda x: x['category'], compat_str)
2395         video_categories = None if category is None else [category]
2396
2397         video_tags = [
2398             unescapeHTML(m.group('content'))
2399             for m in re.finditer(self._meta_regex('og:video:tag'), video_webpage)]
2400         if not video_tags:
2401             video_tags = try_get(video_details, lambda x: x['keywords'], list)
2402
2403         def _extract_count(count_name):
2404             return str_to_int(self._search_regex(
2405                 r'-%s-button[^>]+><span[^>]+class="yt-uix-button-content"[^>]*>([\d,]+)</span>'
2406                 % re.escape(count_name),
2407                 video_webpage, count_name, default=None))
2408
2409         like_count = _extract_count('like')
2410         dislike_count = _extract_count('dislike')
2411
2412         if view_count is None:
2413             view_count = str_to_int(self._search_regex(
2414                 r'<[^>]+class=["\']watch-view-count[^>]+>\s*([\d,\s]+)', video_webpage,
2415                 'view count', default=None))
2416
2417         average_rating = (
2418             float_or_none(video_details.get('averageRating'))
2419             or try_get(video_info, lambda x: float_or_none(x['avg_rating'][0])))
2420
2421         # subtitles
2422         video_subtitles = self.extract_subtitles(
2423             video_id, video_webpage, has_live_chat_replay)
2424         automatic_captions = self.extract_automatic_captions(video_id, video_webpage)
2425
2426         video_duration = try_get(
2427             video_info, lambda x: int_or_none(x['length_seconds'][0]))
2428         if not video_duration:
2429             video_duration = int_or_none(video_details.get('lengthSeconds'))
2430         if not video_duration:
2431             video_duration = parse_duration(self._html_search_meta(
2432                 'duration', video_webpage, 'video duration'))
2433
2434         # annotations
2435         video_annotations = None
2436         if self._downloader.params.get('writeannotations', False):
2437             xsrf_token = self._search_regex(
2438                 r'([\'"])XSRF_TOKEN\1\s*:\s*([\'"])(?P<xsrf_token>[A-Za-z0-9+/=]+)\2',
2439                 video_webpage, 'xsrf token', group='xsrf_token', fatal=False)
2440             invideo_url = try_get(
2441                 player_response, lambda x: x['annotations'][0]['playerAnnotationsUrlsRenderer']['invideoUrl'], compat_str)
2442             if xsrf_token and invideo_url:
2443                 xsrf_field_name = self._search_regex(
2444                     r'([\'"])XSRF_FIELD_NAME\1\s*:\s*([\'"])(?P<xsrf_field_name>\w+)\2',
2445                     video_webpage, 'xsrf field name',
2446                     group='xsrf_field_name', default='session_token')
2447                 video_annotations = self._download_webpage(
2448                     self._proto_relative_url(invideo_url),
2449                     video_id, note='Downloading annotations',
2450                     errnote='Unable to download video annotations', fatal=False,
2451                     data=urlencode_postdata({xsrf_field_name: xsrf_token}))
2452
2453         chapters = self._extract_chapters(video_webpage, description_original, video_id, video_duration)
2454
2455         # Look for the DASH manifest
2456         if self._downloader.params.get('youtube_include_dash_manifest', True):
2457             dash_mpd_fatal = True
2458             for mpd_url in dash_mpds:
2459                 dash_formats = {}
2460                 try:
2461                     def decrypt_sig(mobj):
2462                         s = mobj.group(1)
2463                         dec_s = self._decrypt_signature(s, video_id, player_url, age_gate)
2464                         return '/signature/%s' % dec_s
2465
2466                     mpd_url = re.sub(r'/s/([a-fA-F0-9\.]+)', decrypt_sig, mpd_url)
2467
2468                     for df in self._extract_mpd_formats(
2469                             mpd_url, video_id, fatal=dash_mpd_fatal,
2470                             formats_dict=self._formats):
2471                         if not df.get('filesize'):
2472                             df['filesize'] = _extract_filesize(df['url'])
2473                         # Do not overwrite DASH format found in some previous DASH manifest
2474                         if df['format_id'] not in dash_formats:
2475                             dash_formats[df['format_id']] = df
2476                         # Additional DASH manifests may end up in HTTP Error 403 therefore
2477                         # allow them to fail without bug report message if we already have
2478                         # some DASH manifest succeeded. This is temporary workaround to reduce
2479                         # burst of bug reports until we figure out the reason and whether it
2480                         # can be fixed at all.
2481                         dash_mpd_fatal = False
2482                 except (ExtractorError, KeyError) as e:
2483                     self.report_warning(
2484                         'Skipping DASH manifest: %r' % e, video_id)
2485                 if dash_formats:
2486                     # Remove the formats we found through non-DASH, they
2487                     # contain less info and it can be wrong, because we use
2488                     # fixed values (for example the resolution). See
2489                     # https://github.com/ytdl-org/youtube-dl/issues/5774 for an
2490                     # example.
2491                     formats = [f for f in formats if f['format_id'] not in dash_formats.keys()]
2492                     formats.extend(dash_formats.values())
2493
2494         # Check for malformed aspect ratio
2495         stretched_m = re.search(
2496             r'<meta\s+property="og:video:tag".*?content="yt:stretch=(?P<w>[0-9]+):(?P<h>[0-9]+)">',
2497             video_webpage)
2498         if stretched_m:
2499             w = float(stretched_m.group('w'))
2500             h = float(stretched_m.group('h'))
2501             # yt:stretch may hold invalid ratio data (e.g. for Q39EVAstoRM ratio is 17:0).
2502             # We will only process correct ratios.
2503             if w > 0 and h > 0:
2504                 ratio = w / h
2505                 for f in formats:
2506                     if f.get('vcodec') != 'none':
2507                         f['stretched_ratio'] = ratio
2508
2509         if not formats:
2510             if 'reason' in video_info:
2511                 if 'The uploader has not made this video available in your country.' in video_info['reason']:
2512                     regions_allowed = self._html_search_meta(
2513                         'regionsAllowed', video_webpage, default=None)
2514                     countries = regions_allowed.split(',') if regions_allowed else None
2515                     self.raise_geo_restricted(
2516                         msg=video_info['reason'][0], countries=countries)
2517                 reason = video_info['reason'][0]
2518                 if 'Invalid parameters' in reason:
2519                     unavailable_message = extract_unavailable_message()
2520                     if unavailable_message:
2521                         reason = unavailable_message
2522                 raise ExtractorError(
2523                     'YouTube said: %s' % reason,
2524                     expected=True, video_id=video_id)
2525             if video_info.get('license_info') or try_get(player_response, lambda x: x['streamingData']['licenseInfos']):
2526                 raise ExtractorError('This video is DRM protected.', expected=True)
2527
2528         self._sort_formats(formats)
2529
2530         self.mark_watched(video_id, video_info, player_response)
2531
2532         return {
2533             'id': video_id,
2534             'uploader': video_uploader,
2535             'uploader_id': video_uploader_id,
2536             'uploader_url': video_uploader_url,
2537             'channel_id': channel_id,
2538             'channel_url': channel_url,
2539             'upload_date': upload_date,
2540             'license': video_license,
2541             'creator': video_creator or artist,
2542             'title': video_title,
2543             'alt_title': video_alt_title or track,
2544             'thumbnails': thumbnails,
2545             'description': video_description,
2546             'categories': video_categories,
2547             'tags': video_tags,
2548             'subtitles': video_subtitles,
2549             'automatic_captions': automatic_captions,
2550             'duration': video_duration,
2551             'age_limit': 18 if age_gate else 0,
2552             'annotations': video_annotations,
2553             'chapters': chapters,
2554             'webpage_url': proto + '://www.youtube.com/watch?v=%s' % video_id,
2555             'view_count': view_count,
2556             'like_count': like_count,
2557             'dislike_count': dislike_count,
2558             'average_rating': average_rating,
2559             'formats': formats,
2560             'is_live': is_live,
2561             'start_time': start_time,
2562             'end_time': end_time,
2563             'series': series,
2564             'season_number': season_number,
2565             'episode_number': episode_number,
2566             'track': track,
2567             'artist': artist,
2568             'album': album,
2569             'release_date': release_date,
2570             'release_year': release_year,
2571         }
2572
2573
2574 class YoutubePlaylistIE(YoutubePlaylistBaseInfoExtractor):
2575     IE_DESC = 'YouTube.com playlists'
2576     _VALID_URL = r"""(?x)(?:
2577                         (?:https?://)?
2578                         (?:\w+\.)?
2579                         (?:
2580                             (?:
2581                                 youtube(?:kids)?\.com|
2582                                 invidio\.us
2583                             )
2584                             /
2585                             (?:
2586                                (?:course|view_play_list|my_playlists|artist|playlist|watch|embed/(?:videoseries|[0-9A-Za-z_-]{11}))
2587                                \? (?:.*?[&;])*? (?:p|a|list)=
2588                             |  p/
2589                             )|
2590                             youtu\.be/[0-9A-Za-z_-]{11}\?.*?\blist=
2591                         )
2592                         (
2593                             (?:PL|LL|EC|UU|FL|RD|UL|TL|PU|OLAK5uy_)?[0-9A-Za-z-_]{10,}
2594                             # Top tracks, they can also include dots
2595                             |(?:MC)[\w\.]*
2596                         )
2597                         .*
2598                      |
2599                         (%(playlist_id)s)
2600                      )""" % {'playlist_id': YoutubeBaseInfoExtractor._PLAYLIST_ID_RE}
2601     _TEMPLATE_URL = 'https://www.youtube.com/playlist?list=%s'
2602     _VIDEO_RE_TPL = r'href="\s*/watch\?v=%s(?:&amp;(?:[^"]*?index=(?P<index>\d+))?(?:[^>]+>(?P<title>[^<]+))?)?'
2603     _VIDEO_RE = _VIDEO_RE_TPL % r'(?P<id>[0-9A-Za-z_-]{11})'
2604     IE_NAME = 'youtube:playlist'
2605     _TESTS = [{
2606         'url': 'https://www.youtube.com/playlist?list=PL4lCao7KL_QFVb7Iudeipvc2BCavECqzc',
2607         'info_dict': {
2608             'uploader_id': 'UCmlqkdCBesrv2Lak1mF_MxA',
2609             'uploader': 'Sergey M.',
2610             'id': 'PL4lCao7KL_QFVb7Iudeipvc2BCavECqzc',
2611             'title': 'youtube-dl public playlist',
2612         },
2613         'playlist_count': 1,
2614     }, {
2615         'url': 'https://www.youtube.com/playlist?list=PL4lCao7KL_QFodcLWhDpGCYnngnHtQ-Xf',
2616         'info_dict': {
2617             'uploader_id': 'UCmlqkdCBesrv2Lak1mF_MxA',
2618             'uploader': 'Sergey M.',
2619             'id': 'PL4lCao7KL_QFodcLWhDpGCYnngnHtQ-Xf',
2620             'title': 'youtube-dl empty playlist',
2621         },
2622         'playlist_count': 0,
2623     }, {
2624         'note': 'Playlist with deleted videos (#651). As a bonus, the video #51 is also twice in this list.',
2625         'url': 'https://www.youtube.com/playlist?list=PLwP_SiAcdui0KVebT0mU9Apz359a4ubsC',
2626         'info_dict': {
2627             'title': '29C3: Not my department',
2628             'id': 'PLwP_SiAcdui0KVebT0mU9Apz359a4ubsC',
2629             'uploader': 'Christiaan008',
2630             'uploader_id': 'ChRiStIaAn008',
2631         },
2632         'playlist_count': 96,
2633     }, {
2634         'note': 'issue #673',
2635         'url': 'PLBB231211A4F62143',
2636         'info_dict': {
2637             'title': '[OLD]Team Fortress 2 (Class-based LP)',
2638             'id': 'PLBB231211A4F62143',
2639             'uploader': 'Wickydoo',
2640             'uploader_id': 'Wickydoo',
2641         },
2642         'playlist_mincount': 26,
2643     }, {
2644         'note': 'Large playlist',
2645         'url': 'https://www.youtube.com/playlist?list=UUBABnxM4Ar9ten8Mdjj1j0Q',
2646         'info_dict': {
2647             'title': 'Uploads from Cauchemar',
2648             'id': 'UUBABnxM4Ar9ten8Mdjj1j0Q',
2649             'uploader': 'Cauchemar',
2650             'uploader_id': 'Cauchemar89',
2651         },
2652         'playlist_mincount': 799,
2653     }, {
2654         'url': 'PLtPgu7CB4gbY9oDN3drwC3cMbJggS7dKl',
2655         'info_dict': {
2656             'title': 'YDL_safe_search',
2657             'id': 'PLtPgu7CB4gbY9oDN3drwC3cMbJggS7dKl',
2658         },
2659         'playlist_count': 2,
2660         'skip': 'This playlist is private',
2661     }, {
2662         'note': 'embedded',
2663         'url': 'https://www.youtube.com/embed/videoseries?list=PL6IaIsEjSbf96XFRuNccS_RuEXwNdsoEu',
2664         'playlist_count': 4,
2665         'info_dict': {
2666             'title': 'JODA15',
2667             'id': 'PL6IaIsEjSbf96XFRuNccS_RuEXwNdsoEu',
2668             'uploader': 'milan',
2669             'uploader_id': 'UCEI1-PVPcYXjB73Hfelbmaw',
2670         }
2671     }, {
2672         'url': 'http://www.youtube.com/embed/_xDOZElKyNU?list=PLsyOSbh5bs16vubvKePAQ1x3PhKavfBIl',
2673         'playlist_mincount': 485,
2674         'info_dict': {
2675             'title': '2018 Chinese New Singles (11/6 updated)',
2676             'id': 'PLsyOSbh5bs16vubvKePAQ1x3PhKavfBIl',
2677             'uploader': 'LBK',
2678             'uploader_id': 'sdragonfang',
2679         }
2680     }, {
2681         'note': 'Embedded SWF player',
2682         'url': 'https://www.youtube.com/p/YN5VISEtHet5D4NEvfTd0zcgFk84NqFZ?hl=en_US&fs=1&rel=0',
2683         'playlist_count': 4,
2684         'info_dict': {
2685             'title': 'JODA7',
2686             'id': 'YN5VISEtHet5D4NEvfTd0zcgFk84NqFZ',
2687         },
2688         'skip': 'This playlist does not exist',
2689     }, {
2690         'note': 'Buggy playlist: the webpage has a "Load more" button but it doesn\'t have more videos',
2691         'url': 'https://www.youtube.com/playlist?list=UUXw-G3eDE9trcvY2sBMM_aA',
2692         'info_dict': {
2693             'title': 'Uploads from Interstellar Movie',
2694             'id': 'UUXw-G3eDE9trcvY2sBMM_aA',
2695             'uploader': 'Interstellar Movie',
2696             'uploader_id': 'InterstellarMovie1',
2697         },
2698         'playlist_mincount': 21,
2699     }, {
2700         # Playlist URL that does not actually serve a playlist
2701         'url': 'https://www.youtube.com/watch?v=FqZTN594JQw&list=PLMYEtVRpaqY00V9W81Cwmzp6N6vZqfUKD4',
2702         'info_dict': {
2703             'id': 'FqZTN594JQw',
2704             'ext': 'webm',
2705             'title': "Smiley's People 01 detective, Adventure Series, Action",
2706             'uploader': 'STREEM',
2707             'uploader_id': 'UCyPhqAZgwYWZfxElWVbVJng',
2708             'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UCyPhqAZgwYWZfxElWVbVJng',
2709             'upload_date': '20150526',
2710             'license': 'Standard YouTube License',
2711             'description': 'md5:507cdcb5a49ac0da37a920ece610be80',
2712             'categories': ['People & Blogs'],
2713             'tags': list,
2714             'view_count': int,
2715             'like_count': int,
2716             'dislike_count': int,
2717         },
2718         'params': {
2719             'skip_download': True,
2720         },
2721         'skip': 'This video is not available.',
2722         'add_ie': [YoutubeIE.ie_key()],
2723     }, {
2724         'url': 'https://youtu.be/yeWKywCrFtk?list=PL2qgrgXsNUG5ig9cat4ohreBjYLAPC0J5',
2725         'info_dict': {
2726             'id': 'yeWKywCrFtk',
2727             'ext': 'mp4',
2728             'title': 'Small Scale Baler and Braiding Rugs',
2729             'uploader': 'Backus-Page House Museum',
2730             'uploader_id': 'backuspagemuseum',
2731             'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/backuspagemuseum',
2732             'upload_date': '20161008',
2733             'description': 'md5:800c0c78d5eb128500bffd4f0b4f2e8a',
2734             'categories': ['Nonprofits & Activism'],
2735             'tags': list,
2736             'like_count': int,
2737             'dislike_count': int,
2738         },
2739         'params': {
2740             'noplaylist': True,
2741             'skip_download': True,
2742         },
2743     }, {
2744         # https://github.com/ytdl-org/youtube-dl/issues/21844
2745         'url': 'https://www.youtube.com/playlist?list=PLzH6n4zXuckpfMu_4Ff8E7Z1behQks5ba',
2746         'info_dict': {
2747             'title': 'Data Analysis with Dr Mike Pound',
2748             'id': 'PLzH6n4zXuckpfMu_4Ff8E7Z1behQks5ba',
2749             'uploader_id': 'Computerphile',
2750             'uploader': 'Computerphile',
2751         },
2752         'playlist_mincount': 11,
2753     }, {
2754         'url': 'https://youtu.be/uWyaPkt-VOI?list=PL9D9FC436B881BA21',
2755         'only_matching': True,
2756     }, {
2757         'url': 'TLGGrESM50VT6acwMjAyMjAxNw',
2758         'only_matching': True,
2759     }, {
2760         # music album playlist
2761         'url': 'OLAK5uy_m4xAFdmMC5rX3Ji3g93pQe3hqLZw_9LhM',
2762         'only_matching': True,
2763     }, {
2764         'url': 'https://invidio.us/playlist?list=PLDIoUOhQQPlXr63I_vwF9GD8sAKh77dWU',
2765         'only_matching': True,
2766     }, {
2767         'url': 'https://www.youtubekids.com/watch?v=Agk7R8I8o5U&list=PUZ6jURNr1WQZCNHF0ao-c0g',
2768         'only_matching': True,
2769     }]
2770
2771     def _real_initialize(self):
2772         self._login()
2773
2774     def extract_videos_from_page(self, page):
2775         ids_in_page = []
2776         titles_in_page = []
2777
2778         for item in re.findall(
2779                 r'(<[^>]*\bdata-video-id\s*=\s*["\'][0-9A-Za-z_-]{11}[^>]+>)', page):
2780             attrs = extract_attributes(item)
2781             video_id = attrs['data-video-id']
2782             video_title = unescapeHTML(attrs.get('data-title'))
2783             if video_title:
2784                 video_title = video_title.strip()
2785             ids_in_page.append(video_id)
2786             titles_in_page.append(video_title)
2787
2788         # Fallback with old _VIDEO_RE
2789         self.extract_videos_from_page_impl(
2790             self._VIDEO_RE, page, ids_in_page, titles_in_page)
2791
2792         # Relaxed fallbacks
2793         self.extract_videos_from_page_impl(
2794             r'href="\s*/watch\?v\s*=\s*(?P<id>[0-9A-Za-z_-]{11})', page,
2795             ids_in_page, titles_in_page)
2796         self.extract_videos_from_page_impl(
2797             r'data-video-ids\s*=\s*["\'](?P<id>[0-9A-Za-z_-]{11})', page,
2798             ids_in_page, titles_in_page)
2799
2800         return zip(ids_in_page, titles_in_page)
2801
2802     def _extract_mix(self, playlist_id):
2803         # The mixes are generated from a single video
2804         # the id of the playlist is just 'RD' + video_id
2805         ids = []
2806         last_id = playlist_id[-11:]
2807         for n in itertools.count(1):
2808             url = 'https://www.youtube.com/watch?v=%s&list=%s' % (last_id, playlist_id)
2809             webpage = self._download_webpage(
2810                 url, playlist_id, 'Downloading page {0} of Youtube mix'.format(n))
2811             new_ids = orderedSet(re.findall(
2812                 r'''(?xs)data-video-username=".*?".*?
2813                            href="/watch\?v=([0-9A-Za-z_-]{11})&amp;[^"]*?list=%s''' % re.escape(playlist_id),
2814                 webpage))
2815             # Fetch new pages until all the videos are repeated, it seems that
2816             # there are always 51 unique videos.
2817             new_ids = [_id for _id in new_ids if _id not in ids]
2818             if not new_ids:
2819                 break
2820             ids.extend(new_ids)
2821             last_id = ids[-1]
2822
2823         url_results = self._ids_to_results(ids)
2824
2825         search_title = lambda class_name: get_element_by_attribute('class', class_name, webpage)
2826         title_span = (
2827             search_title('playlist-title')
2828             or search_title('title long-title')
2829             or search_title('title'))
2830         title = clean_html(title_span)
2831
2832         return self.playlist_result(url_results, playlist_id, title)
2833
2834     def _extract_playlist(self, playlist_id):
2835         url = self._TEMPLATE_URL % playlist_id
2836         page = self._download_webpage(url, playlist_id)
2837
2838         # the yt-alert-message now has tabindex attribute (see https://github.com/ytdl-org/youtube-dl/issues/11604)
2839         for match in re.findall(r'<div class="yt-alert-message"[^>]*>([^<]+)</div>', page):
2840             match = match.strip()
2841             # Check if the playlist exists or is private
2842             mobj = re.match(r'[^<]*(?:The|This) playlist (?P<reason>does not exist|is private)[^<]*', match)
2843             if mobj:
2844                 reason = mobj.group('reason')
2845                 message = 'This playlist %s' % reason
2846                 if 'private' in reason:
2847                     message += ', use --username or --netrc to access it'
2848                 message += '.'
2849                 raise ExtractorError(message, expected=True)
2850             elif re.match(r'[^<]*Invalid parameters[^<]*', match):
2851                 raise ExtractorError(
2852                     'Invalid parameters. Maybe URL is incorrect.',
2853                     expected=True)
2854             elif re.match(r'[^<]*Choose your language[^<]*', match):
2855                 continue
2856             else:
2857                 self.report_warning('Youtube gives an alert message: ' + match)
2858
2859         playlist_title = self._html_search_regex(
2860             r'(?s)<h1 class="pl-header-title[^"]*"[^>]*>\s*(.*?)\s*</h1>',
2861             page, 'title', default=None)
2862
2863         _UPLOADER_BASE = r'class=["\']pl-header-details[^>]+>\s*<li>\s*<a[^>]+\bhref='
2864         uploader = self._html_search_regex(
2865             r'%s["\']/(?:user|channel)/[^>]+>([^<]+)' % _UPLOADER_BASE,
2866             page, 'uploader', default=None)
2867         mobj = re.search(
2868             r'%s(["\'])(?P<path>/(?:user|channel)/(?P<uploader_id>.+?))\1' % _UPLOADER_BASE,
2869             page)
2870         if mobj:
2871             uploader_id = mobj.group('uploader_id')
2872             uploader_url = compat_urlparse.urljoin(url, mobj.group('path'))
2873         else:
2874             uploader_id = uploader_url = None
2875
2876         has_videos = True
2877
2878         if not playlist_title:
2879             try:
2880                 # Some playlist URLs don't actually serve a playlist (e.g.
2881                 # https://www.youtube.com/watch?v=FqZTN594JQw&list=PLMYEtVRpaqY00V9W81Cwmzp6N6vZqfUKD4)
2882                 next(self._entries(page, playlist_id))
2883             except StopIteration:
2884                 has_videos = False
2885
2886         playlist = self.playlist_result(
2887             self._entries(page, playlist_id), playlist_id, playlist_title)
2888         playlist.update({
2889             'uploader': uploader,
2890             'uploader_id': uploader_id,
2891             'uploader_url': uploader_url,
2892         })
2893
2894         return has_videos, playlist
2895
2896     def _check_download_just_video(self, url, playlist_id):
2897         # Check if it's a video-specific URL
2898         query_dict = compat_urlparse.parse_qs(compat_urlparse.urlparse(url).query)
2899         video_id = query_dict.get('v', [None])[0] or self._search_regex(
2900             r'(?:(?:^|//)youtu\.be/|youtube\.com/embed/(?!videoseries))([0-9A-Za-z_-]{11})', url,
2901             'video id', default=None)
2902         if video_id:
2903             if self._downloader.params.get('noplaylist'):
2904                 self.to_screen('Downloading just video %s because of --no-playlist' % video_id)
2905                 return video_id, self.url_result(video_id, 'Youtube', video_id=video_id)
2906             else:
2907                 self.to_screen('Downloading playlist %s - add --no-playlist to just download video %s' % (playlist_id, video_id))
2908                 return video_id, None
2909         return None, None
2910
2911     def _real_extract(self, url):
2912         # Extract playlist id
2913         mobj = re.match(self._VALID_URL, url)
2914         if mobj is None:
2915             raise ExtractorError('Invalid URL: %s' % url)
2916         playlist_id = mobj.group(1) or mobj.group(2)
2917
2918         video_id, video = self._check_download_just_video(url, playlist_id)
2919         if video:
2920             return video
2921
2922         if playlist_id.startswith(('RD', 'UL', 'PU')):
2923             # Mixes require a custom extraction process
2924             return self._extract_mix(playlist_id)
2925
2926         has_videos, playlist = self._extract_playlist(playlist_id)
2927         if has_videos or not video_id:
2928             return playlist
2929
2930         # Some playlist URLs don't actually serve a playlist (see
2931         # https://github.com/ytdl-org/youtube-dl/issues/10537).
2932         # Fallback to plain video extraction if there is a video id
2933         # along with playlist id.
2934         return self.url_result(video_id, 'Youtube', video_id=video_id)
2935
2936
2937 class YoutubeChannelIE(YoutubePlaylistBaseInfoExtractor):
2938     IE_DESC = 'YouTube.com channels'
2939     _VALID_URL = r'https?://(?:youtu\.be|(?:\w+\.)?youtube(?:-nocookie|kids)?\.com|(?:www\.)?invidio\.us)/channel/(?P<id>[0-9A-Za-z_-]+)'
2940     _TEMPLATE_URL = 'https://www.youtube.com/channel/%s/videos'
2941     _VIDEO_RE = r'(?:title="(?P<title>[^"]+)"[^>]+)?href="/watch\?v=(?P<id>[0-9A-Za-z_-]+)&?'
2942     IE_NAME = 'youtube:channel'
2943     _TESTS = [{
2944         'note': 'paginated channel',
2945         'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w',
2946         'playlist_mincount': 91,
2947         'info_dict': {
2948             'id': 'UUKfVa3S1e4PHvxWcwyMMg8w',
2949             'title': 'Uploads from lex will',
2950             'uploader': 'lex will',
2951             'uploader_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
2952         }
2953     }, {
2954         'note': 'Age restricted channel',
2955         # from https://www.youtube.com/user/DeusExOfficial
2956         'url': 'https://www.youtube.com/channel/UCs0ifCMCm1icqRbqhUINa0w',
2957         'playlist_mincount': 64,
2958         'info_dict': {
2959             'id': 'UUs0ifCMCm1icqRbqhUINa0w',
2960             'title': 'Uploads from Deus Ex',
2961             'uploader': 'Deus Ex',
2962             'uploader_id': 'DeusExOfficial',
2963         },
2964     }, {
2965         'url': 'https://invidio.us/channel/UC23qupoDRn9YOAVzeoxjOQA',
2966         'only_matching': True,
2967     }, {
2968         'url': 'https://www.youtubekids.com/channel/UCyu8StPfZWapR6rfW_JgqcA',
2969         'only_matching': True,
2970     }]
2971
2972     @classmethod
2973     def suitable(cls, url):
2974         return (False if YoutubePlaylistsIE.suitable(url) or YoutubeLiveIE.suitable(url)
2975                 else super(YoutubeChannelIE, cls).suitable(url))
2976
2977     def _build_template_url(self, url, channel_id):
2978         return self._TEMPLATE_URL % channel_id
2979
2980     def _real_extract(self, url):
2981         channel_id = self._match_id(url)
2982
2983         url = self._build_template_url(url, channel_id)
2984
2985         # Channel by page listing is restricted to 35 pages of 30 items, i.e. 1050 videos total (see #5778)
2986         # Workaround by extracting as a playlist if managed to obtain channel playlist URL
2987         # otherwise fallback on channel by page extraction
2988         channel_page = self._download_webpage(
2989             url + '?view=57', channel_id,
2990             'Downloading channel page', fatal=False)
2991         if channel_page is False:
2992             channel_playlist_id = False
2993         else:
2994             channel_playlist_id = self._html_search_meta(
2995                 'channelId', channel_page, 'channel id', default=None)
2996             if not channel_playlist_id:
2997                 channel_url = self._html_search_meta(
2998                     ('al:ios:url', 'twitter:app:url:iphone', 'twitter:app:url:ipad'),
2999                     channel_page, 'channel url', default=None)
3000                 if channel_url:
3001                     channel_playlist_id = self._search_regex(
3002                         r'vnd\.youtube://user/([0-9A-Za-z_-]+)',
3003                         channel_url, 'channel id', default=None)
3004         if channel_playlist_id and channel_playlist_id.startswith('UC'):
3005             playlist_id = 'UU' + channel_playlist_id[2:]
3006             return self.url_result(
3007                 compat_urlparse.urljoin(url, '/playlist?list=%s' % playlist_id), 'YoutubePlaylist')
3008
3009         channel_page = self._download_webpage(url, channel_id, 'Downloading page #1')
3010         autogenerated = re.search(r'''(?x)
3011                 class="[^"]*?(?:
3012                     channel-header-autogenerated-label|
3013                     yt-channel-title-autogenerated
3014                 )[^"]*"''', channel_page) is not None
3015
3016         if autogenerated:
3017             # The videos are contained in a single page
3018             # the ajax pages can't be used, they are empty
3019             entries = [
3020                 self.url_result(
3021                     video_id, 'Youtube', video_id=video_id,
3022                     video_title=video_title)
3023                 for video_id, video_title in self.extract_videos_from_page(channel_page)]
3024             return self.playlist_result(entries, channel_id)
3025
3026         try:
3027             next(self._entries(channel_page, channel_id))
3028         except StopIteration:
3029             alert_message = self._html_search_regex(
3030                 r'(?s)<div[^>]+class=(["\']).*?\byt-alert-message\b.*?\1[^>]*>(?P<alert>[^<]+)</div>',
3031                 channel_page, 'alert', default=None, group='alert')
3032             if alert_message:
3033                 raise ExtractorError('Youtube said: %s' % alert_message, expected=True)
3034
3035         return self.playlist_result(self._entries(channel_page, channel_id), channel_id)
3036
3037
3038 class YoutubeUserIE(YoutubeChannelIE):
3039     IE_DESC = 'YouTube.com user videos (URL or "ytuser" keyword)'
3040     _VALID_URL = r'(?:(?:https?://(?:\w+\.)?youtube\.com/(?:(?P<user>user|c)/)?(?!(?:attribution_link|watch|results|shared)(?:$|[^a-z_A-Z0-9%-])))|ytuser:)(?!feed/)(?P<id>[A-Za-z0-9_%-]+)'
3041     _TEMPLATE_URL = 'https://www.youtube.com/%s/%s/videos'
3042     IE_NAME = 'youtube:user'
3043
3044     _TESTS = [{
3045         'url': 'https://www.youtube.com/user/TheLinuxFoundation',
3046         'playlist_mincount': 320,
3047         'info_dict': {
3048             'id': 'UUfX55Sx5hEFjoC3cNs6mCUQ',
3049             'title': 'Uploads from The Linux Foundation',
3050             'uploader': 'The Linux Foundation',
3051             'uploader_id': 'TheLinuxFoundation',
3052         }
3053     }, {
3054         # Only available via https://www.youtube.com/c/12minuteathlete/videos
3055         # but not https://www.youtube.com/user/12minuteathlete/videos
3056         'url': 'https://www.youtube.com/c/12minuteathlete/videos',
3057         'playlist_mincount': 249,
3058         'info_dict': {
3059             'id': 'UUVjM-zV6_opMDx7WYxnjZiQ',
3060             'title': 'Uploads from 12 Minute Athlete',
3061             'uploader': '12 Minute Athlete',
3062             'uploader_id': 'the12minuteathlete',
3063         }
3064     }, {
3065         'url': 'ytuser:phihag',
3066         'only_matching': True,
3067     }, {
3068         'url': 'https://www.youtube.com/c/gametrailers',
3069         'only_matching': True,
3070     }, {
3071         'url': 'https://www.youtube.com/c/Pawe%C5%82Zadro%C5%BCniak',
3072         'only_matching': True,
3073     }, {
3074         'url': 'https://www.youtube.com/gametrailers',
3075         'only_matching': True,
3076     }, {
3077         # This channel is not available, geo restricted to JP
3078         'url': 'https://www.youtube.com/user/kananishinoSMEJ/videos',
3079         'only_matching': True,
3080     }]
3081
3082     @classmethod
3083     def suitable(cls, url):
3084         # Don't return True if the url can be extracted with other youtube
3085         # extractor, the regex would is too permissive and it would match.
3086         other_yt_ies = iter(klass for (name, klass) in globals().items() if name.startswith('Youtube') and name.endswith('IE') and klass is not cls)
3087         if any(ie.suitable(url) for ie in other_yt_ies):
3088             return False
3089         else:
3090             return super(YoutubeUserIE, cls).suitable(url)
3091
3092     def _build_template_url(self, url, channel_id):
3093         mobj = re.match(self._VALID_URL, url)
3094         return self._TEMPLATE_URL % (mobj.group('user') or 'user', mobj.group('id'))
3095
3096
3097 class YoutubeLiveIE(YoutubeBaseInfoExtractor):
3098     IE_DESC = 'YouTube.com live streams'
3099     _VALID_URL = r'(?P<base_url>https?://(?:\w+\.)?youtube\.com/(?:(?:user|channel|c)/)?(?P<id>[^/]+))/live'
3100     IE_NAME = 'youtube:live'
3101
3102     _TESTS = [{
3103         'url': 'https://www.youtube.com/user/TheYoungTurks/live',
3104         'info_dict': {
3105             'id': 'a48o2S1cPoo',
3106             'ext': 'mp4',
3107             'title': 'The Young Turks - Live Main Show',
3108             'uploader': 'The Young Turks',
3109             'uploader_id': 'TheYoungTurks',
3110             'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/TheYoungTurks',
3111             'upload_date': '20150715',
3112             'license': 'Standard YouTube License',
3113             'description': 'md5:438179573adcdff3c97ebb1ee632b891',
3114             'categories': ['News & Politics'],
3115             'tags': ['Cenk Uygur (TV Program Creator)', 'The Young Turks (Award-Winning Work)', 'Talk Show (TV Genre)'],
3116             'like_count': int,
3117             'dislike_count': int,
3118         },
3119         'params': {
3120             'skip_download': True,
3121         },
3122     }, {
3123         'url': 'https://www.youtube.com/channel/UC1yBKRuGpC1tSM73A0ZjYjQ/live',
3124         'only_matching': True,
3125     }, {
3126         'url': 'https://www.youtube.com/c/CommanderVideoHq/live',
3127         'only_matching': True,
3128     }, {
3129         'url': 'https://www.youtube.com/TheYoungTurks/live',
3130         'only_matching': True,
3131     }]
3132
3133     def _real_extract(self, url):
3134         mobj = re.match(self._VALID_URL, url)
3135         channel_id = mobj.group('id')
3136         base_url = mobj.group('base_url')
3137         webpage = self._download_webpage(url, channel_id, fatal=False)
3138         if webpage:
3139             page_type = self._og_search_property(
3140                 'type', webpage, 'page type', default='')
3141             video_id = self._html_search_meta(
3142                 'videoId', webpage, 'video id', default=None)
3143             if page_type.startswith('video') and video_id and re.match(
3144                     r'^[0-9A-Za-z_-]{11}$', video_id):
3145                 return self.url_result(video_id, YoutubeIE.ie_key())
3146         return self.url_result(base_url)
3147
3148
3149 class YoutubePlaylistsIE(YoutubePlaylistsBaseInfoExtractor):
3150     IE_DESC = 'YouTube.com user/channel playlists'
3151     _VALID_URL = r'https?://(?:\w+\.)?youtube\.com/(?:user|channel|c)/(?P<id>[^/]+)/playlists'
3152     IE_NAME = 'youtube:playlists'
3153
3154     _TESTS = [{
3155         'url': 'https://www.youtube.com/user/ThirstForScience/playlists',
3156         'playlist_mincount': 4,
3157         'info_dict': {
3158             'id': 'ThirstForScience',
3159             'title': 'ThirstForScience',
3160         },
3161     }, {
3162         # with "Load more" button
3163         'url': 'https://www.youtube.com/user/igorkle1/playlists?view=1&sort=dd',
3164         'playlist_mincount': 70,
3165         'info_dict': {
3166             'id': 'igorkle1',
3167             'title': 'Игорь Клейнер',
3168         },
3169     }, {
3170         'url': 'https://www.youtube.com/channel/UCiU1dHvZObB2iP6xkJ__Icw/playlists',
3171         'playlist_mincount': 17,
3172         'info_dict': {
3173             'id': 'UCiU1dHvZObB2iP6xkJ__Icw',
3174             'title': 'Chem Player',
3175         },
3176         'skip': 'Blocked',
3177     }, {
3178         'url': 'https://www.youtube.com/c/ChristophLaimer/playlists',
3179         'only_matching': True,
3180     }]
3181
3182
3183 class YoutubeSearchBaseInfoExtractor(YoutubePlaylistBaseInfoExtractor):
3184     _VIDEO_RE = r'href="\s*/watch\?v=(?P<id>[0-9A-Za-z_-]{11})(?:[^"]*"[^>]+\btitle="(?P<title>[^"]+))?'
3185
3186
3187 class YoutubeSearchIE(SearchInfoExtractor, YoutubeSearchBaseInfoExtractor):
3188     IE_DESC = 'YouTube.com searches'
3189     # there doesn't appear to be a real limit, for example if you search for
3190     # 'python' you get more than 8.000.000 results
3191     _MAX_RESULTS = float('inf')
3192     IE_NAME = 'youtube:search'
3193     _SEARCH_KEY = 'ytsearch'
3194     _EXTRA_QUERY_ARGS = {}
3195     _TESTS = []
3196
3197     def _get_n_results(self, query, n):
3198         """Get a specified number of results for a query"""
3199
3200         videos = []
3201         limit = n
3202
3203         url_query = {
3204             'search_query': query.encode('utf-8'),
3205         }
3206         url_query.update(self._EXTRA_QUERY_ARGS)
3207         result_url = 'https://www.youtube.com/results?' + compat_urllib_parse_urlencode(url_query)
3208
3209         for pagenum in itertools.count(1):
3210             data = self._download_json(
3211                 result_url, video_id='query "%s"' % query,
3212                 note='Downloading page %s' % pagenum,
3213                 errnote='Unable to download API page',
3214                 query={'spf': 'navigate'})
3215             html_content = data[1]['body']['content']
3216
3217             if 'class="search-message' in html_content:
3218                 raise ExtractorError(
3219                     '[youtube] No video results', expected=True)
3220
3221             new_videos = list(self._process_page(html_content))
3222             videos += new_videos
3223             if not new_videos or len(videos) > limit:
3224                 break
3225             next_link = self._html_search_regex(
3226                 r'href="(/results\?[^"]*\bsp=[^"]+)"[^>]*>\s*<span[^>]+class="[^"]*\byt-uix-button-content\b[^"]*"[^>]*>Next',
3227                 html_content, 'next link', default=None)
3228             if next_link is None:
3229                 break
3230             result_url = compat_urlparse.urljoin('https://www.youtube.com/', next_link)
3231
3232         if len(videos) > n:
3233             videos = videos[:n]
3234         return self.playlist_result(videos, query)
3235
3236
3237 class YoutubeSearchDateIE(YoutubeSearchIE):
3238     IE_NAME = YoutubeSearchIE.IE_NAME + ':date'
3239     _SEARCH_KEY = 'ytsearchdate'
3240     IE_DESC = 'YouTube.com searches, newest videos first'
3241     _EXTRA_QUERY_ARGS = {'search_sort': 'video_date_uploaded'}
3242
3243
3244 class YoutubeSearchURLIE(YoutubeSearchBaseInfoExtractor):
3245     IE_DESC = 'YouTube.com search URLs'
3246     IE_NAME = 'youtube:search_url'
3247     _VALID_URL = r'https?://(?:www\.)?youtube\.com/results\?(.*?&)?(?:search_query|q)=(?P<query>[^&]+)(?:[&]|$)'
3248     _TESTS = [{
3249         'url': 'https://www.youtube.com/results?baz=bar&search_query=youtube-dl+test+video&filters=video&lclk=video',
3250         'playlist_mincount': 5,
3251         'info_dict': {
3252             'title': 'youtube-dl test video',
3253         }
3254     }, {
3255         'url': 'https://www.youtube.com/results?q=test&sp=EgQIBBgB',
3256         'only_matching': True,
3257     }]
3258
3259     def _real_extract(self, url):
3260         mobj = re.match(self._VALID_URL, url)
3261         query = compat_urllib_parse_unquote_plus(mobj.group('query'))
3262         webpage = self._download_webpage(url, query)
3263         return self.playlist_result(self._process_page(webpage), playlist_title=query)
3264
3265
3266 class YoutubeShowIE(YoutubePlaylistsBaseInfoExtractor):
3267     IE_DESC = 'YouTube.com (multi-season) shows'
3268     _VALID_URL = r'https?://(?:www\.)?youtube\.com/show/(?P<id>[^?#]*)'
3269     IE_NAME = 'youtube:show'
3270     _TESTS = [{
3271         'url': 'https://www.youtube.com/show/airdisasters',
3272         'playlist_mincount': 5,
3273         'info_dict': {
3274             'id': 'airdisasters',
3275             'title': 'Air Disasters',
3276         }
3277     }]
3278
3279     def _real_extract(self, url):
3280         playlist_id = self._match_id(url)
3281         return super(YoutubeShowIE, self)._real_extract(
3282             'https://www.youtube.com/show/%s/playlists' % playlist_id)
3283
3284
3285 class YoutubeFeedsInfoExtractor(YoutubeBaseInfoExtractor):
3286     """
3287     Base class for feed extractors
3288     Subclasses must define the _FEED_NAME and _PLAYLIST_TITLE properties.
3289     """
3290     _LOGIN_REQUIRED = True
3291
3292     @property
3293     def IE_NAME(self):
3294         return 'youtube:%s' % self._FEED_NAME
3295
3296     def _real_initialize(self):
3297         self._login()
3298
3299     def _entries(self, page):
3300         # The extraction process is the same as for playlists, but the regex
3301         # for the video ids doesn't contain an index
3302         ids = []
3303         more_widget_html = content_html = page
3304         for page_num in itertools.count(1):
3305             matches = re.findall(r'href="\s*/watch\?v=([0-9A-Za-z_-]{11})', content_html)
3306
3307             # 'recommended' feed has infinite 'load more' and each new portion spins
3308             # the same videos in (sometimes) slightly different order, so we'll check
3309             # for unicity and break when portion has no new videos
3310             new_ids = list(filter(lambda video_id: video_id not in ids, orderedSet(matches)))
3311             if not new_ids:
3312                 break
3313
3314             ids.extend(new_ids)
3315
3316             for entry in self._ids_to_results(new_ids):
3317                 yield entry
3318
3319             mobj = re.search(r'data-uix-load-more-href="/?(?P<more>[^"]+)"', more_widget_html)
3320             if not mobj:
3321                 break
3322
3323             more = self._download_json(
3324                 'https://www.youtube.com/%s' % mobj.group('more'), self._PLAYLIST_TITLE,
3325                 'Downloading page #%s' % page_num,
3326                 transform_source=uppercase_escape,
3327                 headers=self._YOUTUBE_CLIENT_HEADERS)
3328             content_html = more['content_html']
3329             more_widget_html = more['load_more_widget_html']
3330
3331     def _real_extract(self, url):
3332         page = self._download_webpage(
3333             'https://www.youtube.com/feed/%s' % self._FEED_NAME,
3334             self._PLAYLIST_TITLE)
3335         return self.playlist_result(
3336             self._entries(page), playlist_title=self._PLAYLIST_TITLE)
3337
3338
3339 class YoutubeWatchLaterIE(YoutubePlaylistIE):
3340     IE_NAME = 'youtube:watchlater'
3341     IE_DESC = 'Youtube watch later list, ":ytwatchlater" for short (requires authentication)'
3342     _VALID_URL = r'https?://(?:www\.)?youtube\.com/(?:feed/watch_later|(?:playlist|watch)\?(?:.+&)?list=WL)|:ytwatchlater'
3343
3344     _TESTS = [{
3345         'url': 'https://www.youtube.com/playlist?list=WL',
3346         'only_matching': True,
3347     }, {
3348         'url': 'https://www.youtube.com/watch?v=bCNU9TrbiRk&index=1&list=WL',
3349         'only_matching': True,
3350     }]
3351
3352     def _real_extract(self, url):
3353         _, video = self._check_download_just_video(url, 'WL')
3354         if video:
3355             return video
3356         _, playlist = self._extract_playlist('WL')
3357         return playlist
3358
3359
3360 class YoutubeFavouritesIE(YoutubeBaseInfoExtractor):
3361     IE_NAME = 'youtube:favorites'
3362     IE_DESC = 'YouTube.com favourite videos, ":ytfav" for short (requires authentication)'
3363     _VALID_URL = r'https?://(?:www\.)?youtube\.com/my_favorites|:ytfav(?:ou?rites)?'
3364     _LOGIN_REQUIRED = True
3365
3366     def _real_extract(self, url):
3367         webpage = self._download_webpage('https://www.youtube.com/my_favorites', 'Youtube Favourites videos')
3368         playlist_id = self._search_regex(r'list=(.+?)["&]', webpage, 'favourites playlist id')
3369         return self.url_result(playlist_id, 'YoutubePlaylist')
3370
3371
3372 class YoutubeRecommendedIE(YoutubeFeedsInfoExtractor):
3373     IE_DESC = 'YouTube.com recommended videos, ":ytrec" for short (requires authentication)'
3374     _VALID_URL = r'https?://(?:www\.)?youtube\.com/feed/recommended|:ytrec(?:ommended)?'
3375     _FEED_NAME = 'recommended'
3376     _PLAYLIST_TITLE = 'Youtube Recommended videos'
3377
3378
3379 class YoutubeSubscriptionsIE(YoutubeFeedsInfoExtractor):
3380     IE_DESC = 'YouTube.com subscriptions feed, "ytsubs" keyword (requires authentication)'
3381     _VALID_URL = r'https?://(?:www\.)?youtube\.com/feed/subscriptions|:ytsubs(?:criptions)?'
3382     _FEED_NAME = 'subscriptions'
3383     _PLAYLIST_TITLE = 'Youtube Subscriptions'
3384
3385
3386 class YoutubeHistoryIE(YoutubeFeedsInfoExtractor):
3387     IE_DESC = 'Youtube watch history, ":ythistory" for short (requires authentication)'
3388     _VALID_URL = r'https?://(?:www\.)?youtube\.com/feed/history|:ythistory'
3389     _FEED_NAME = 'history'
3390     _PLAYLIST_TITLE = 'Youtube History'
3391
3392
3393 class YoutubeTruncatedURLIE(InfoExtractor):
3394     IE_NAME = 'youtube:truncated_url'
3395     IE_DESC = False  # Do not list
3396     _VALID_URL = r'''(?x)
3397         (?:https?://)?
3398         (?:\w+\.)?[yY][oO][uU][tT][uU][bB][eE](?:-nocookie)?\.com/
3399         (?:watch\?(?:
3400             feature=[a-z_]+|
3401             annotation_id=annotation_[^&]+|
3402             x-yt-cl=[0-9]+|
3403             hl=[^&]*|
3404             t=[0-9]+
3405         )?
3406         |
3407             attribution_link\?a=[^&]+
3408         )
3409         $
3410     '''
3411
3412     _TESTS = [{
3413         'url': 'https://www.youtube.com/watch?annotation_id=annotation_3951667041',
3414         'only_matching': True,
3415     }, {
3416         'url': 'https://www.youtube.com/watch?',
3417         'only_matching': True,
3418     }, {
3419         'url': 'https://www.youtube.com/watch?x-yt-cl=84503534',
3420         'only_matching': True,
3421     }, {
3422         'url': 'https://www.youtube.com/watch?feature=foo',
3423         'only_matching': True,
3424     }, {
3425         'url': 'https://www.youtube.com/watch?hl=en-GB',
3426         'only_matching': True,
3427     }, {
3428         'url': 'https://www.youtube.com/watch?t=2372',
3429         'only_matching': True,
3430     }]
3431
3432     def _real_extract(self, url):
3433         raise ExtractorError(
3434             'Did you forget to quote the URL? Remember that & is a meta '
3435             'character in most shells, so you want to put the URL in quotes, '
3436             'like  youtube-dl '
3437             '"https://www.youtube.com/watch?feature=foo&v=BaW_jenozKc" '
3438             ' or simply  youtube-dl BaW_jenozKc  .',
3439             expected=True)
3440
3441
3442 class YoutubeTruncatedIDIE(InfoExtractor):
3443     IE_NAME = 'youtube:truncated_id'
3444     IE_DESC = False  # Do not list
3445     _VALID_URL = r'https?://(?:www\.)?youtube\.com/watch\?v=(?P<id>[0-9A-Za-z_-]{1,10})$'
3446
3447     _TESTS = [{
3448         'url': 'https://www.youtube.com/watch?v=N_708QY7Ob',
3449         'only_matching': True,
3450     }]
3451
3452     def _real_extract(self, url):
3453         video_id = self._match_id(url)
3454         raise ExtractorError(
3455             'Incomplete YouTube ID %s. URL %s looks truncated.' % (video_id, url),
3456             expected=True)