youtube_dl/extractor/youtube.py

   1 # coding: utf-8
   2
   3 from __future__ import unicode_literals
   4
   5
   6 import itertools
   7 import json
   8 import os.path
   9 import random
  10 import re
  11 import time
  12 import traceback
  13
  14 from .common import InfoExtractor, SearchInfoExtractor
  15 from ..jsinterp import JSInterpreter
  16 from ..swfinterp import SWFInterpreter
  17 from ..compat import (
  18     compat_chr,
  19     compat_HTTPError,
  20     compat_kwargs,
  21     compat_parse_qs,
  22     compat_urllib_parse_unquote,
  23     compat_urllib_parse_unquote_plus,
  24     compat_urllib_parse_urlencode,
  25     compat_urllib_parse_urlparse,
  26     compat_urlparse,
  27     compat_str,
  28 )
  29 from ..utils import (
  30     bool_or_none,
  31     clean_html,
  32     error_to_compat_str,
  33     extract_attributes,
  34     ExtractorError,
  35     float_or_none,
  36     get_element_by_attribute,
  37     get_element_by_id,
  38     int_or_none,
  39     mimetype2ext,
  40     orderedSet,
  41     parse_codecs,
  42     parse_duration,
  43     remove_quotes,
  44     remove_start,
  45     smuggle_url,
  46     str_or_none,
  47     str_to_int,
  48     try_get,
  49     unescapeHTML,
  50     unified_strdate,
  51     unsmuggle_url,
  52     uppercase_escape,
  53     url_or_none,
  54     urlencode_postdata,
  55 )
  56
  57
  58 class YoutubeBaseInfoExtractor(InfoExtractor):
  59     """Provide base functions for Youtube extractors"""
  60     _LOGIN_URL = 'https://accounts.google.com/ServiceLogin'
  61     _TWOFACTOR_URL = 'https://accounts.google.com/signin/challenge'
  62
  63     _LOOKUP_URL = 'https://accounts.google.com/_/signin/sl/lookup'
  64     _CHALLENGE_URL = 'https://accounts.google.com/_/signin/sl/challenge'
  65     _TFA_URL = 'https://accounts.google.com/_/signin/challenge?hl=en&TL={0}'
  66
  67     _NETRC_MACHINE = 'youtube'
  68     # If True it will raise an error if no login info is provided
  69     _LOGIN_REQUIRED = False
  70
  71     _PLAYLIST_ID_RE = r'(?:PL|LL|EC|UU|FL|RD|UL|TL|PU|OLAK5uy_)[0-9A-Za-z-_]{10,}'
  72
  73     _YOUTUBE_CLIENT_HEADERS = {
  74         'x-youtube-client-name': '1',
  75         'x-youtube-client-version': '1.20200609.04.02',
  76     }
  77
  78     def _set_language(self):
  79         self._set_cookie(
  80             '.youtube.com', 'PREF', 'f1=50000000&f6=8&hl=en',
  81             # YouTube sets the expire time to about two months
  82             expire_time=time.time() + 2 * 30 * 24 * 3600)
  83
  84     def _ids_to_results(self, ids):
  85         return [
  86             self.url_result(vid_id, 'Youtube', video_id=vid_id)
  87             for vid_id in ids]
  88
  89     def _login(self):
  90         """
  91         Attempt to log in to YouTube.
  92         True is returned if successful or skipped.
  93         False is returned if login failed.
  94
  95         If _LOGIN_REQUIRED is set and no authentication was provided, an error is raised.
  96         """
  97         username, password = self._get_login_info()
  98         # No authentication to be performed
  99         if username is None:
 100             if self._LOGIN_REQUIRED and self._downloader.params.get('cookiefile') is None:
 101                 raise ExtractorError('No login info available, needed for using %s.' % self.IE_NAME, expected=True)
 102             return True
 103
 104         login_page = self._download_webpage(
 105             self._LOGIN_URL, None,
 106             note='Downloading login page',
 107             errnote='unable to fetch login page', fatal=False)
 108         if login_page is False:
 109             return
 110
 111         login_form = self._hidden_inputs(login_page)
 112
 113         def req(url, f_req, note, errnote):
 114             data = login_form.copy()
 115             data.update({
 116                 'pstMsg': 1,
 117                 'checkConnection': 'youtube',
 118                 'checkedDomains': 'youtube',
 119                 'hl': 'en',
 120                 'deviceinfo': '[null,null,null,[],null,"US",null,null,[],"GlifWebSignIn",null,[null,null,[]]]',
 121                 'f.req': json.dumps(f_req),
 122                 'flowName': 'GlifWebSignIn',
 123                 'flowEntry': 'ServiceLogin',
 124                 # TODO: reverse actual botguard identifier generation algo
 125                 'bgRequest': '["identifier",""]',
 126             })
 127             return self._download_json(
 128                 url, None, note=note, errnote=errnote,
 129                 transform_source=lambda s: re.sub(r'^[^[]*', '', s),
 130                 fatal=False,
 131                 data=urlencode_postdata(data), headers={
 132                     'Content-Type': 'application/x-www-form-urlencoded;charset=utf-8',
 133                     'Google-Accounts-XSRF': 1,
 134                 })
 135
 136         def warn(message):
 137             self._downloader.report_warning(message)
 138
 139         lookup_req = [
 140             username,
 141             None, [], None, 'US', None, None, 2, False, True,
 142             [
 143                 None, None,
 144                 [2, 1, None, 1,
 145                  'https://accounts.google.com/ServiceLogin?passive=true&continue=https%3A%2F%2Fwww.youtube.com%2Fsignin%3Fnext%3D%252F%26action_handle_signin%3Dtrue%26hl%3Den%26app%3Ddesktop%26feature%3Dsign_in_button&hl=en&service=youtube&uilel=3&requestPath=%2FServiceLogin&Page=PasswordSeparationSignIn',
 146                  None, [], 4],
 147                 1, [None, None, []], None, None, None, True
 148             ],
 149             username,
 150         ]
 151
 152         lookup_results = req(
 153             self._LOOKUP_URL, lookup_req,
 154             'Looking up account info', 'Unable to look up account info')
 155
 156         if lookup_results is False:
 157             return False
 158
 159         user_hash = try_get(lookup_results, lambda x: x[0][2], compat_str)
 160         if not user_hash:
 161             warn('Unable to extract user hash')
 162             return False
 163
 164         challenge_req = [
 165             user_hash,
 166             None, 1, None, [1, None, None, None, [password, None, True]],
 167             [
 168                 None, None, [2, 1, None, 1, 'https://accounts.google.com/ServiceLogin?passive=true&continue=https%3A%2F%2Fwww.youtube.com%2Fsignin%3Fnext%3D%252F%26action_handle_signin%3Dtrue%26hl%3Den%26app%3Ddesktop%26feature%3Dsign_in_button&hl=en&service=youtube&uilel=3&requestPath=%2FServiceLogin&Page=PasswordSeparationSignIn', None, [], 4],
 169                 1, [None, None, []], None, None, None, True
 170             ]]
 171
 172         challenge_results = req(
 173             self._CHALLENGE_URL, challenge_req,
 174             'Logging in', 'Unable to log in')
 175
 176         if challenge_results is False:
 177             return
 178
 179         login_res = try_get(challenge_results, lambda x: x[0][5], list)
 180         if login_res:
 181             login_msg = try_get(login_res, lambda x: x[5], compat_str)
 182             warn(
 183                 'Unable to login: %s' % 'Invalid password'
 184                 if login_msg == 'INCORRECT_ANSWER_ENTERED' else login_msg)
 185             return False
 186
 187         res = try_get(challenge_results, lambda x: x[0][-1], list)
 188         if not res:
 189             warn('Unable to extract result entry')
 190             return False
 191
 192         login_challenge = try_get(res, lambda x: x[0][0], list)
 193         if login_challenge:
 194             challenge_str = try_get(login_challenge, lambda x: x[2], compat_str)
 195             if challenge_str == 'TWO_STEP_VERIFICATION':
 196                 # SEND_SUCCESS - TFA code has been successfully sent to phone
 197                 # QUOTA_EXCEEDED - reached the limit of TFA codes
 198                 status = try_get(login_challenge, lambda x: x[5], compat_str)
 199                 if status == 'QUOTA_EXCEEDED':
 200                     warn('Exceeded the limit of TFA codes, try later')
 201                     return False
 202
 203                 tl = try_get(challenge_results, lambda x: x[1][2], compat_str)
 204                 if not tl:
 205                     warn('Unable to extract TL')
 206                     return False
 207
 208                 tfa_code = self._get_tfa_info('2-step verification code')
 209
 210                 if not tfa_code:
 211                     warn(
 212                         'Two-factor authentication required. Provide it either interactively or with --twofactor <code>'
 213                         '(Note that only TOTP (Google Authenticator App) codes work at this time.)')
 214                     return False
 215
 216                 tfa_code = remove_start(tfa_code, 'G-')
 217
 218                 tfa_req = [
 219                     user_hash, None, 2, None,
 220                     [
 221                         9, None, None, None, None, None, None, None,
 222                         [None, tfa_code, True, 2]
 223                     ]]
 224
 225                 tfa_results = req(
 226                     self._TFA_URL.format(tl), tfa_req,
 227                     'Submitting TFA code', 'Unable to submit TFA code')
 228
 229                 if tfa_results is False:
 230                     return False
 231
 232                 tfa_res = try_get(tfa_results, lambda x: x[0][5], list)
 233                 if tfa_res:
 234                     tfa_msg = try_get(tfa_res, lambda x: x[5], compat_str)
 235                     warn(
 236                         'Unable to finish TFA: %s' % 'Invalid TFA code'
 237                         if tfa_msg == 'INCORRECT_ANSWER_ENTERED' else tfa_msg)
 238                     return False
 239
 240                 check_cookie_url = try_get(
 241                     tfa_results, lambda x: x[0][-1][2], compat_str)
 242             else:
 243                 CHALLENGES = {
 244                     'LOGIN_CHALLENGE': "This device isn't recognized. For your security, Google wants to make sure it's really you.",
 245                     'USERNAME_RECOVERY': 'Please provide additional information to aid in the recovery process.',
 246                     'REAUTH': "There is something unusual about your activity. For your security, Google wants to make sure it's really you.",
 247                 }
 248                 challenge = CHALLENGES.get(
 249                     challenge_str,
 250                     '%s returned error %s.' % (self.IE_NAME, challenge_str))
 251                 warn('%s\nGo to https://accounts.google.com/, login and solve a challenge.' % challenge)
 252                 return False
 253         else:
 254             check_cookie_url = try_get(res, lambda x: x[2], compat_str)
 255
 256         if not check_cookie_url:
 257             warn('Unable to extract CheckCookie URL')
 258             return False
 259
 260         check_cookie_results = self._download_webpage(
 261             check_cookie_url, None, 'Checking cookie', fatal=False)
 262
 263         if check_cookie_results is False:
 264             return False
 265
 266         if 'https://myaccount.google.com/' not in check_cookie_results:
 267             warn('Unable to log in')
 268             return False
 269
 270         return True
 271
 272     def _download_webpage_handle(self, *args, **kwargs):
 273         query = kwargs.get('query', {}).copy()
 274         query['disable_polymer'] = 'true'
 275         kwargs['query'] = query
 276         return super(YoutubeBaseInfoExtractor, self)._download_webpage_handle(
 277             *args, **compat_kwargs(kwargs))
 278
 279     def _real_initialize(self):
 280         if self._downloader is None:
 281             return
 282         self._set_language()
 283         if not self._login():
 284             return
 285
 286
 287 class YoutubeEntryListBaseInfoExtractor(YoutubeBaseInfoExtractor):
 288     # Extract entries from page with "Load more" button
 289     def _entries(self, page, playlist_id):
 290         more_widget_html = content_html = page
 291         for page_num in itertools.count(1):
 292             for entry in self._process_page(content_html):
 293                 yield entry
 294
 295             mobj = re.search(r'data-uix-load-more-href="/?(?P<more>[^"]+)"', more_widget_html)
 296             if not mobj:
 297                 break
 298
 299             count = 0
 300             retries = 3
 301             while count <= retries:
 302                 try:
 303                     # Downloading page may result in intermittent 5xx HTTP error
 304                     # that is usually worked around with a retry
 305                     more = self._download_json(
 306                         'https://www.youtube.com/%s' % mobj.group('more'), playlist_id,
 307                         'Downloading page #%s%s'
 308                         % (page_num, ' (retry #%d)' % count if count else ''),
 309                         transform_source=uppercase_escape,
 310                         headers=self._YOUTUBE_CLIENT_HEADERS)
 311                     break
 312                 except ExtractorError as e:
 313                     if isinstance(e.cause, compat_HTTPError) and e.cause.code in (500, 503):
 314                         count += 1
 315                         if count <= retries:
 316                             continue
 317                     raise
 318
 319             content_html = more['content_html']
 320             if not content_html.strip():
 321                 # Some webpages show a "Load more" button but they don't
 322                 # have more videos
 323                 break
 324             more_widget_html = more['load_more_widget_html']
 325
 326
 327 class YoutubePlaylistBaseInfoExtractor(YoutubeEntryListBaseInfoExtractor):
 328     def _process_page(self, content):
 329         for video_id, video_title in self.extract_videos_from_page(content):
 330             yield self.url_result(video_id, 'Youtube', video_id, video_title)
 331
 332     def extract_videos_from_page_impl(self, video_re, page, ids_in_page, titles_in_page):
 333         for mobj in re.finditer(video_re, page):
 334             # The link with index 0 is not the first video of the playlist (not sure if still actual)
 335             if 'index' in mobj.groupdict() and mobj.group('id') == '0':
 336                 continue
 337             video_id = mobj.group('id')
 338             video_title = unescapeHTML(
 339                 mobj.group('title')) if 'title' in mobj.groupdict() else None
 340             if video_title:
 341                 video_title = video_title.strip()
 342             if video_title == '► Play all':
 343                 video_title = None
 344             try:
 345                 idx = ids_in_page.index(video_id)
 346                 if video_title and not titles_in_page[idx]:
 347                     titles_in_page[idx] = video_title
 348             except ValueError:
 349                 ids_in_page.append(video_id)
 350                 titles_in_page.append(video_title)
 351
 352     def extract_videos_from_page(self, page):
 353         ids_in_page = []
 354         titles_in_page = []
 355         self.extract_videos_from_page_impl(
 356             self._VIDEO_RE, page, ids_in_page, titles_in_page)
 357         return zip(ids_in_page, titles_in_page)
 358
 359
 360 class YoutubePlaylistsBaseInfoExtractor(YoutubeEntryListBaseInfoExtractor):
 361     def _process_page(self, content):
 362         for playlist_id in orderedSet(re.findall(
 363                 r'<h3[^>]+class="[^"]*yt-lockup-title[^"]*"[^>]*><a[^>]+href="/?playlist\?list=([0-9A-Za-z-_]{10,})"',
 364                 content)):
 365             yield self.url_result(
 366                 'https://www.youtube.com/playlist?list=%s' % playlist_id, 'YoutubePlaylist')
 367
 368     def _real_extract(self, url):
 369         playlist_id = self._match_id(url)
 370         webpage = self._download_webpage(url, playlist_id)
 371         title = self._og_search_title(webpage, fatal=False)
 372         return self.playlist_result(self._entries(webpage, playlist_id), playlist_id, title)
 373
 374
 375 class YoutubeIE(YoutubeBaseInfoExtractor):
 376     IE_DESC = 'YouTube.com'
 377     _VALID_URL = r"""(?x)^
 378                      (
 379                          (?:https?://|//)                                    # http(s):// or protocol-independent URL
 380                          (?:(?:(?:(?:\w+\.)?[yY][oO][uU][tT][uU][bB][eE](?:-nocookie|kids)?\.com/|
 381                             (?:www\.)?deturl\.com/www\.youtube\.com/|
 382                             (?:www\.)?pwnyoutube\.com/|
 383                             (?:www\.)?hooktube\.com/|
 384                             (?:www\.)?yourepeat\.com/|
 385                             tube\.majestyc\.net/|
 386                             # Invidious instances taken from https://github.com/omarroth/invidious/wiki/Invidious-Instances
 387                             (?:(?:www|dev)\.)?invidio\.us/|
 388                             (?:(?:www|no)\.)?invidiou\.sh/|
 389                             (?:(?:www|fi|de)\.)?invidious\.snopyta\.org/|
 390                             (?:www\.)?invidious\.kabi\.tk/|
 391                             (?:www\.)?invidious\.13ad\.de/|
 392                             (?:www\.)?invidious\.mastodon\.host/|
 393                             (?:www\.)?invidious\.nixnet\.xyz/|
 394                             (?:www\.)?invidious\.drycat\.fr/|
 395                             (?:www\.)?tube\.poal\.co/|
 396                             (?:www\.)?vid\.wxzm\.sx/|
 397                             (?:www\.)?yewtu\.be/|
 398                             (?:www\.)?yt\.elukerio\.org/|
 399                             (?:www\.)?yt\.lelux\.fi/|
 400                             (?:www\.)?invidious\.ggc-project\.de/|
 401                             (?:www\.)?yt\.maisputain\.ovh/|
 402                             (?:www\.)?invidious\.13ad\.de/|
 403                             (?:www\.)?invidious\.toot\.koeln/|
 404                             (?:www\.)?invidious\.fdn\.fr/|
 405                             (?:www\.)?watch\.nettohikari\.com/|
 406                             (?:www\.)?kgg2m7yk5aybusll\.onion/|
 407                             (?:www\.)?qklhadlycap4cnod\.onion/|
 408                             (?:www\.)?axqzx4s6s54s32yentfqojs3x5i7faxza6xo3ehd4bzzsg2ii4fv2iid\.onion/|
 409                             (?:www\.)?c7hqkpkpemu6e7emz5b4vyz7idjgdvgaaa3dyimmeojqbgpea3xqjoid\.onion/|
 410                             (?:www\.)?fz253lmuao3strwbfbmx46yu7acac2jz27iwtorgmbqlkurlclmancad\.onion/|
 411                             (?:www\.)?invidious\.l4qlywnpwqsluw65ts7md3khrivpirse744un3x7mlskqauz5pyuzgqd\.onion/|
 412                             (?:www\.)?owxfohz4kjyv25fvlqilyxast7inivgiktls3th44jhk3ej3i7ya\.b32\.i2p/|
 413                             (?:www\.)?4l2dgddgsrkf2ous66i6seeyi6etzfgrue332grh2n7madpwopotugyd\.onion/|
 414                             youtube\.googleapis\.com/)                        # the various hostnames, with wildcard subdomains
 415                          (?:.*?\#/)?                                          # handle anchor (#/) redirect urls
 416                          (?:                                                  # the various things that can precede the ID:
 417                              (?:(?:v|embed|e)/(?!videoseries))                # v/ or embed/ or e/
 418                              |(?:                                             # or the v= param in all its forms
 419                                  (?:(?:watch|movie)(?:_popup)?(?:\.php)?/?)?  # preceding watch(_popup|.php) or nothing (like /?v=xxxx)
 420                                  (?:\?|\#!?)                                  # the params delimiter ? or # or #!
 421                                  (?:.*?[&;])??                                # any other preceding param (like /?s=tuff&v=xxxx or ?s=tuff&amp;v=V36LpHqtcDY)
 422                                  v=
 423                              )
 424                          ))
 425                          |(?:
 426                             youtu\.be|                                        # just youtu.be/xxxx
 427                             vid\.plus|                                        # or vid.plus/xxxx
 428                             zwearz\.com/watch|                                # or zwearz.com/watch/xxxx
 429                          )/
 430                          |(?:www\.)?cleanvideosearch\.com/media/action/yt/watch\?videoId=
 431                          )
 432                      )?                                                       # all until now is optional -> you can pass the naked ID
 433                      ([0-9A-Za-z_-]{11})                                      # here is it! the YouTube video ID
 434                      (?!.*?\blist=
 435                         (?:
 436                             %(playlist_id)s|                                  # combined list/video URLs are handled by the playlist IE
 437                             WL                                                # WL are handled by the watch later IE
 438                         )
 439                      )
 440                      (?(1).+)?                                                # if we found the ID, everything can follow
 441                      $""" % {'playlist_id': YoutubeBaseInfoExtractor._PLAYLIST_ID_RE}
 442     _NEXT_URL_RE = r'[\?&]next_url=([^&]+)'
 443     _PLAYER_INFO_RE = (
 444         r'/(?P<id>[a-zA-Z0-9_-]{8,})/player_ias\.vflset(?:/[a-zA-Z]{2,3}_[a-zA-Z]{2,3})?/base\.(?P<ext>[a-z]+)$',
 445         r'\b(?P<id>vfl[a-zA-Z0-9_-]+)\b.*?\.(?P<ext>[a-z]+)$',
 446     )
 447     _formats = {
 448         '5': {'ext': 'flv', 'width': 400, 'height': 240, 'acodec': 'mp3', 'abr': 64, 'vcodec': 'h263'},
 449         '6': {'ext': 'flv', 'width': 450, 'height': 270, 'acodec': 'mp3', 'abr': 64, 'vcodec': 'h263'},
 450         '13': {'ext': '3gp', 'acodec': 'aac', 'vcodec': 'mp4v'},
 451         '17': {'ext': '3gp', 'width': 176, 'height': 144, 'acodec': 'aac', 'abr': 24, 'vcodec': 'mp4v'},
 452         '18': {'ext': 'mp4', 'width': 640, 'height': 360, 'acodec': 'aac', 'abr': 96, 'vcodec': 'h264'},
 453         '22': {'ext': 'mp4', 'width': 1280, 'height': 720, 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264'},
 454         '34': {'ext': 'flv', 'width': 640, 'height': 360, 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264'},
 455         '35': {'ext': 'flv', 'width': 854, 'height': 480, 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264'},
 456         # itag 36 videos are either 320x180 (BaW_jenozKc) or 320x240 (__2ABJjxzNo), abr varies as well
 457         '36': {'ext': '3gp', 'width': 320, 'acodec': 'aac', 'vcodec': 'mp4v'},
 458         '37': {'ext': 'mp4', 'width': 1920, 'height': 1080, 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264'},
 459         '38': {'ext': 'mp4', 'width': 4096, 'height': 3072, 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264'},
 460         '43': {'ext': 'webm', 'width': 640, 'height': 360, 'acodec': 'vorbis', 'abr': 128, 'vcodec': 'vp8'},
 461         '44': {'ext': 'webm', 'width': 854, 'height': 480, 'acodec': 'vorbis', 'abr': 128, 'vcodec': 'vp8'},
 462         '45': {'ext': 'webm', 'width': 1280, 'height': 720, 'acodec': 'vorbis', 'abr': 192, 'vcodec': 'vp8'},
 463         '46': {'ext': 'webm', 'width': 1920, 'height': 1080, 'acodec': 'vorbis', 'abr': 192, 'vcodec': 'vp8'},
 464         '59': {'ext': 'mp4', 'width': 854, 'height': 480, 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264'},
 465         '78': {'ext': 'mp4', 'width': 854, 'height': 480, 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264'},
 466
 467
 468         # 3D videos
 469         '82': {'ext': 'mp4', 'height': 360, 'format_note': '3D', 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264', 'preference': -20},
 470         '83': {'ext': 'mp4', 'height': 480, 'format_note': '3D', 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264', 'preference': -20},
 471         '84': {'ext': 'mp4', 'height': 720, 'format_note': '3D', 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264', 'preference': -20},
 472         '85': {'ext': 'mp4', 'height': 1080, 'format_note': '3D', 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264', 'preference': -20},
 473         '100': {'ext': 'webm', 'height': 360, 'format_note': '3D', 'acodec': 'vorbis', 'abr': 128, 'vcodec': 'vp8', 'preference': -20},
 474         '101': {'ext': 'webm', 'height': 480, 'format_note': '3D', 'acodec': 'vorbis', 'abr': 192, 'vcodec': 'vp8', 'preference': -20},
 475         '102': {'ext': 'webm', 'height': 720, 'format_note': '3D', 'acodec': 'vorbis', 'abr': 192, 'vcodec': 'vp8', 'preference': -20},
 476
 477         # Apple HTTP Live Streaming
 478         '91': {'ext': 'mp4', 'height': 144, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 48, 'vcodec': 'h264', 'preference': -10},
 479         '92': {'ext': 'mp4', 'height': 240, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 48, 'vcodec': 'h264', 'preference': -10},
 480         '93': {'ext': 'mp4', 'height': 360, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264', 'preference': -10},
 481         '94': {'ext': 'mp4', 'height': 480, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264', 'preference': -10},
 482         '95': {'ext': 'mp4', 'height': 720, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 256, 'vcodec': 'h264', 'preference': -10},
 483         '96': {'ext': 'mp4', 'height': 1080, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 256, 'vcodec': 'h264', 'preference': -10},
 484         '132': {'ext': 'mp4', 'height': 240, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 48, 'vcodec': 'h264', 'preference': -10},
 485         '151': {'ext': 'mp4', 'height': 72, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 24, 'vcodec': 'h264', 'preference': -10},
 486
 487         # DASH mp4 video
 488         '133': {'ext': 'mp4', 'height': 240, 'format_note': 'DASH video', 'vcodec': 'h264'},
 489         '134': {'ext': 'mp4', 'height': 360, 'format_note': 'DASH video', 'vcodec': 'h264'},
 490         '135': {'ext': 'mp4', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'h264'},
 491         '136': {'ext': 'mp4', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'h264'},
 492         '137': {'ext': 'mp4', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'h264'},
 493         '138': {'ext': 'mp4', 'format_note': 'DASH video', 'vcodec': 'h264'},  # Height can vary (https://github.com/ytdl-org/youtube-dl/issues/4559)
 494         '160': {'ext': 'mp4', 'height': 144, 'format_note': 'DASH video', 'vcodec': 'h264'},
 495         '212': {'ext': 'mp4', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'h264'},
 496         '264': {'ext': 'mp4', 'height': 1440, 'format_note': 'DASH video', 'vcodec': 'h264'},
 497         '298': {'ext': 'mp4', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'h264', 'fps': 60},
 498         '299': {'ext': 'mp4', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'h264', 'fps': 60},
 499         '266': {'ext': 'mp4', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'h264'},
 500
 501         # Dash mp4 audio
 502         '139': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'abr': 48, 'container': 'm4a_dash'},
 503         '140': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'abr': 128, 'container': 'm4a_dash'},
 504         '141': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'abr': 256, 'container': 'm4a_dash'},
 505         '256': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'container': 'm4a_dash'},
 506         '258': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'container': 'm4a_dash'},
 507         '325': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'dtse', 'container': 'm4a_dash'},
 508         '328': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'ec-3', 'container': 'm4a_dash'},
 509
 510         # Dash webm
 511         '167': {'ext': 'webm', 'height': 360, 'width': 640, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
 512         '168': {'ext': 'webm', 'height': 480, 'width': 854, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
 513         '169': {'ext': 'webm', 'height': 720, 'width': 1280, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
 514         '170': {'ext': 'webm', 'height': 1080, 'width': 1920, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
 515         '218': {'ext': 'webm', 'height': 480, 'width': 854, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
 516         '219': {'ext': 'webm', 'height': 480, 'width': 854, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
 517         '278': {'ext': 'webm', 'height': 144, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp9'},
 518         '242': {'ext': 'webm', 'height': 240, 'format_note': 'DASH video', 'vcodec': 'vp9'},
 519         '243': {'ext': 'webm', 'height': 360, 'format_note': 'DASH video', 'vcodec': 'vp9'},
 520         '244': {'ext': 'webm', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'vp9'},
 521         '245': {'ext': 'webm', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'vp9'},
 522         '246': {'ext': 'webm', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'vp9'},
 523         '247': {'ext': 'webm', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'vp9'},
 524         '248': {'ext': 'webm', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'vp9'},
 525         '271': {'ext': 'webm', 'height': 1440, 'format_note': 'DASH video', 'vcodec': 'vp9'},
 526         # itag 272 videos are either 3840x2160 (e.g. RtoitU2A-3E) or 7680x4320 (sLprVF6d7Ug)
 527         '272': {'ext': 'webm', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'vp9'},
 528         '302': {'ext': 'webm', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'vp9', 'fps': 60},
 529         '303': {'ext': 'webm', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'vp9', 'fps': 60},
 530         '308': {'ext': 'webm', 'height': 1440, 'format_note': 'DASH video', 'vcodec': 'vp9', 'fps': 60},
 531         '313': {'ext': 'webm', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'vp9'},
 532         '315': {'ext': 'webm', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'vp9', 'fps': 60},
 533
 534         # Dash webm audio
 535         '171': {'ext': 'webm', 'acodec': 'vorbis', 'format_note': 'DASH audio', 'abr': 128},
 536         '172': {'ext': 'webm', 'acodec': 'vorbis', 'format_note': 'DASH audio', 'abr': 256},
 537
 538         # Dash webm audio with opus inside
 539         '249': {'ext': 'webm', 'format_note': 'DASH audio', 'acodec': 'opus', 'abr': 50},
 540         '250': {'ext': 'webm', 'format_note': 'DASH audio', 'acodec': 'opus', 'abr': 70},
 541         '251': {'ext': 'webm', 'format_note': 'DASH audio', 'acodec': 'opus', 'abr': 160},
 542
 543         # RTMP (unnamed)
 544         '_rtmp': {'protocol': 'rtmp'},
 545
 546         # av01 video only formats sometimes served with "unknown" codecs
 547         '394': {'acodec': 'none', 'vcodec': 'av01.0.05M.08'},
 548         '395': {'acodec': 'none', 'vcodec': 'av01.0.05M.08'},
 549         '396': {'acodec': 'none', 'vcodec': 'av01.0.05M.08'},
 550         '397': {'acodec': 'none', 'vcodec': 'av01.0.05M.08'},
 551     }
 552     _SUBTITLE_FORMATS = ('srv1', 'srv2', 'srv3', 'ttml', 'vtt')
 553
 554     _GEO_BYPASS = False
 555
 556     IE_NAME = 'youtube'
 557     _TESTS = [
 558         {
 559             'url': 'https://www.youtube.com/watch?v=BaW_jenozKc&t=1s&end=9',
 560             'info_dict': {
 561                 'id': 'BaW_jenozKc',
 562                 'ext': 'mp4',
 563                 'title': 'youtube-dl test video "\'/\\ä↭𝕐',
 564                 'uploader': 'Philipp Hagemeister',
 565                 'uploader_id': 'phihag',
 566                 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/phihag',
 567                 'channel_id': 'UCLqxVugv74EIW3VWh2NOa3Q',
 568                 'channel_url': r're:https?://(?:www\.)?youtube\.com/channel/UCLqxVugv74EIW3VWh2NOa3Q',
 569                 'upload_date': '20121002',
 570                 'description': 'test chars:  "\'/\\ä↭𝕐\ntest URL: https://github.com/rg3/youtube-dl/issues/1892\n\nThis is a test video for youtube-dl.\n\nFor more information, contact phihag@phihag.de .',
 571                 'categories': ['Science & Technology'],
 572                 'tags': ['youtube-dl'],
 573                 'duration': 10,
 574                 'view_count': int,
 575                 'like_count': int,
 576                 'dislike_count': int,
 577                 'start_time': 1,
 578                 'end_time': 9,
 579             }
 580         },
 581         {
 582             'url': 'https://www.youtube.com/watch?v=UxxajLWwzqY',
 583             'note': 'Test generic use_cipher_signature video (#897)',
 584             'info_dict': {
 585                 'id': 'UxxajLWwzqY',
 586                 'ext': 'mp4',
 587                 'upload_date': '20120506',
 588                 'title': 'Icona Pop - I Love It (feat. Charli XCX) [OFFICIAL VIDEO]',
 589                 'alt_title': 'I Love It (feat. Charli XCX)',
 590                 'description': 'md5:19a2f98d9032b9311e686ed039564f63',
 591                 'tags': ['Icona Pop i love it', 'sweden', 'pop music', 'big beat records', 'big beat', 'charli',
 592                          'xcx', 'charli xcx', 'girls', 'hbo', 'i love it', "i don't care", 'icona', 'pop',
 593                          'iconic ep', 'iconic', 'love', 'it'],
 594                 'duration': 180,
 595                 'uploader': 'Icona Pop',
 596                 'uploader_id': 'IconaPop',
 597                 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/IconaPop',
 598                 'creator': 'Icona Pop',
 599                 'track': 'I Love It (feat. Charli XCX)',
 600                 'artist': 'Icona Pop',
 601             }
 602         },
 603         {
 604             'url': 'https://www.youtube.com/watch?v=07FYdnEawAQ',
 605             'note': 'Test VEVO video with age protection (#956)',
 606             'info_dict': {
 607                 'id': '07FYdnEawAQ',
 608                 'ext': 'mp4',
 609                 'upload_date': '20130703',
 610                 'title': 'Justin Timberlake - Tunnel Vision (Official Music Video) (Explicit)',
 611                 'alt_title': 'Tunnel Vision',
 612                 'description': 'md5:07dab3356cde4199048e4c7cd93471e1',
 613                 'duration': 419,
 614                 'uploader': 'justintimberlakeVEVO',
 615                 'uploader_id': 'justintimberlakeVEVO',
 616                 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/justintimberlakeVEVO',
 617                 'creator': 'Justin Timberlake',
 618                 'track': 'Tunnel Vision',
 619                 'artist': 'Justin Timberlake',
 620                 'age_limit': 18,
 621             }
 622         },
 623         {
 624             'url': '//www.YouTube.com/watch?v=yZIXLfi8CZQ',
 625             'note': 'Embed-only video (#1746)',
 626             'info_dict': {
 627                 'id': 'yZIXLfi8CZQ',
 628                 'ext': 'mp4',
 629                 'upload_date': '20120608',
 630                 'title': 'Principal Sexually Assaults A Teacher - Episode 117 - 8th June 2012',
 631                 'description': 'md5:09b78bd971f1e3e289601dfba15ca4f7',
 632                 'uploader': 'SET India',
 633                 'uploader_id': 'setindia',
 634                 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/setindia',
 635                 'age_limit': 18,
 636             }
 637         },
 638         {
 639             'url': 'https://www.youtube.com/watch?v=BaW_jenozKc&v=UxxajLWwzqY',
 640             'note': 'Use the first video ID in the URL',
 641             'info_dict': {
 642                 'id': 'BaW_jenozKc',
 643                 'ext': 'mp4',
 644                 'title': 'youtube-dl test video "\'/\\ä↭𝕐',
 645                 'uploader': 'Philipp Hagemeister',
 646                 'uploader_id': 'phihag',
 647                 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/phihag',
 648                 'upload_date': '20121002',
 649                 'description': 'test chars:  "\'/\\ä↭𝕐\ntest URL: https://github.com/rg3/youtube-dl/issues/1892\n\nThis is a test video for youtube-dl.\n\nFor more information, contact phihag@phihag.de .',
 650                 'categories': ['Science & Technology'],
 651                 'tags': ['youtube-dl'],
 652                 'duration': 10,
 653                 'view_count': int,
 654                 'like_count': int,
 655                 'dislike_count': int,
 656             },
 657             'params': {
 658                 'skip_download': True,
 659             },
 660         },
 661         {
 662             'url': 'https://www.youtube.com/watch?v=a9LDPn-MO4I',
 663             'note': '256k DASH audio (format 141) via DASH manifest',
 664             'info_dict': {
 665                 'id': 'a9LDPn-MO4I',
 666                 'ext': 'm4a',
 667                 'upload_date': '20121002',
 668                 'uploader_id': '8KVIDEO',
 669                 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/8KVIDEO',
 670                 'description': '',
 671                 'uploader': '8KVIDEO',
 672                 'title': 'UHDTV TEST 8K VIDEO.mp4'
 673             },
 674             'params': {
 675                 'youtube_include_dash_manifest': True,
 676                 'format': '141',
 677             },
 678             'skip': 'format 141 not served anymore',
 679         },
 680         # DASH manifest with encrypted signature
 681         {
 682             'url': 'https://www.youtube.com/watch?v=IB3lcPjvWLA',
 683             'info_dict': {
 684                 'id': 'IB3lcPjvWLA',
 685                 'ext': 'm4a',
 686                 'title': 'Afrojack, Spree Wilson - The Spark (Official Music Video) ft. Spree Wilson',
 687                 'description': 'md5:8f5e2b82460520b619ccac1f509d43bf',
 688                 'duration': 244,
 689                 'uploader': 'AfrojackVEVO',
 690                 'uploader_id': 'AfrojackVEVO',
 691                 'upload_date': '20131011',
 692             },
 693             'params': {
 694                 'youtube_include_dash_manifest': True,
 695                 'format': '141/bestaudio[ext=m4a]',
 696             },
 697         },
 698         # JS player signature function name containing $
 699         {
 700             'url': 'https://www.youtube.com/watch?v=nfWlot6h_JM',
 701             'info_dict': {
 702                 'id': 'nfWlot6h_JM',
 703                 'ext': 'm4a',
 704                 'title': 'Taylor Swift - Shake It Off',
 705                 'description': 'md5:307195cd21ff7fa352270fe884570ef0',
 706                 'duration': 242,
 707                 'uploader': 'TaylorSwiftVEVO',
 708                 'uploader_id': 'TaylorSwiftVEVO',
 709                 'upload_date': '20140818',
 710             },
 711             'params': {
 712                 'youtube_include_dash_manifest': True,
 713                 'format': '141/bestaudio[ext=m4a]',
 714             },
 715         },
 716         # Controversy video
 717         {
 718             'url': 'https://www.youtube.com/watch?v=T4XJQO3qol8',
 719             'info_dict': {
 720                 'id': 'T4XJQO3qol8',
 721                 'ext': 'mp4',
 722                 'duration': 219,
 723                 'upload_date': '20100909',
 724                 'uploader': 'Amazing Atheist',
 725                 'uploader_id': 'TheAmazingAtheist',
 726                 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/TheAmazingAtheist',
 727                 'title': 'Burning Everyone\'s Koran',
 728                 'description': 'SUBSCRIBE: http://www.youtube.com/saturninefilms\n\nEven Obama has taken a stand against freedom on this issue: http://www.huffingtonpost.com/2010/09/09/obama-gma-interview-quran_n_710282.html',
 729             }
 730         },
 731         # Normal age-gate video (No vevo, embed allowed)
 732         {
 733             'url': 'https://youtube.com/watch?v=HtVdAasjOgU',
 734             'info_dict': {
 735                 'id': 'HtVdAasjOgU',
 736                 'ext': 'mp4',
 737                 'title': 'The Witcher 3: Wild Hunt - The Sword Of Destiny Trailer',
 738                 'description': r're:(?s).{100,}About the Game\n.*?The Witcher 3: Wild Hunt.{100,}',
 739                 'duration': 142,
 740                 'uploader': 'The Witcher',
 741                 'uploader_id': 'WitcherGame',
 742                 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/WitcherGame',
 743                 'upload_date': '20140605',
 744                 'age_limit': 18,
 745             },
 746         },
 747         # Age-gate video with encrypted signature
 748         {
 749             'url': 'https://www.youtube.com/watch?v=6kLq3WMV1nU',
 750             'info_dict': {
 751                 'id': '6kLq3WMV1nU',
 752                 'ext': 'mp4',
 753                 'title': 'Dedication To My Ex (Miss That) (Lyric Video)',
 754                 'description': 'md5:33765bb339e1b47e7e72b5490139bb41',
 755                 'duration': 246,
 756                 'uploader': 'LloydVEVO',
 757                 'uploader_id': 'LloydVEVO',
 758                 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/LloydVEVO',
 759                 'upload_date': '20110629',
 760                 'age_limit': 18,
 761             },
 762         },
 763         # video_info is None (https://github.com/ytdl-org/youtube-dl/issues/4421)
 764         # YouTube Red ad is not captured for creator
 765         {
 766             'url': '__2ABJjxzNo',
 767             'info_dict': {
 768                 'id': '__2ABJjxzNo',
 769                 'ext': 'mp4',
 770                 'duration': 266,
 771                 'upload_date': '20100430',
 772                 'uploader_id': 'deadmau5',
 773                 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/deadmau5',
 774                 'creator': 'Dada Life, deadmau5',
 775                 'description': 'md5:12c56784b8032162bb936a5f76d55360',
 776                 'uploader': 'deadmau5',
 777                 'title': 'Deadmau5 - Some Chords (HD)',
 778                 'alt_title': 'This Machine Kills Some Chords',
 779             },
 780             'expected_warnings': [
 781                 'DASH manifest missing',
 782             ]
 783         },
 784         # Olympics (https://github.com/ytdl-org/youtube-dl/issues/4431)
 785         {
 786             'url': 'lqQg6PlCWgI',
 787             'info_dict': {
 788                 'id': 'lqQg6PlCWgI',
 789                 'ext': 'mp4',
 790                 'duration': 6085,
 791                 'upload_date': '20150827',
 792                 'uploader_id': 'olympic',
 793                 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/olympic',
 794                 'description': 'HO09  - Women -  GER-AUS - Hockey - 31 July 2012 - London 2012 Olympic Games',
 795                 'uploader': 'Olympic',
 796                 'title': 'Hockey - Women -  GER-AUS - London 2012 Olympic Games',
 797             },
 798             'params': {
 799                 'skip_download': 'requires avconv',
 800             }
 801         },
 802         # Non-square pixels
 803         {
 804             'url': 'https://www.youtube.com/watch?v=_b-2C3KPAM0',
 805             'info_dict': {
 806                 'id': '_b-2C3KPAM0',
 807                 'ext': 'mp4',
 808                 'stretched_ratio': 16 / 9.,
 809                 'duration': 85,
 810                 'upload_date': '20110310',
 811                 'uploader_id': 'AllenMeow',
 812                 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/AllenMeow',
 813                 'description': 'made by Wacom from Korea | 字幕&加油添醋 by TY\'s Allen | 感謝heylisa00cavey1001同學熱情提供梗及翻譯',
 814                 'uploader': '孫ᄋᄅ',
 815                 'title': '[A-made] 變態妍字幕版 太妍 我就是這樣的人',
 816             },
 817         },
 818         # url_encoded_fmt_stream_map is empty string
 819         {
 820             'url': 'qEJwOuvDf7I',
 821             'info_dict': {
 822                 'id': 'qEJwOuvDf7I',
 823                 'ext': 'webm',
 824                 'title': 'Обсуждение судебной практики по выборам 14 сентября 2014 года в Санкт-Петербурге',
 825                 'description': '',
 826                 'upload_date': '20150404',
 827                 'uploader_id': 'spbelect',
 828                 'uploader': 'Наблюдатели Петербурга',
 829             },
 830             'params': {
 831                 'skip_download': 'requires avconv',
 832             },
 833             'skip': 'This live event has ended.',
 834         },
 835         # Extraction from multiple DASH manifests (https://github.com/ytdl-org/youtube-dl/pull/6097)
 836         {
 837             'url': 'https://www.youtube.com/watch?v=FIl7x6_3R5Y',
 838             'info_dict': {
 839                 'id': 'FIl7x6_3R5Y',
 840                 'ext': 'webm',
 841                 'title': 'md5:7b81415841e02ecd4313668cde88737a',
 842                 'description': 'md5:116377fd2963b81ec4ce64b542173306',
 843                 'duration': 220,
 844                 'upload_date': '20150625',
 845                 'uploader_id': 'dorappi2000',
 846                 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/dorappi2000',
 847                 'uploader': 'dorappi2000',
 848                 'formats': 'mincount:31',
 849             },
 850             'skip': 'not actual anymore',
 851         },
 852         # DASH manifest with segment_list
 853         {
 854             'url': 'https://www.youtube.com/embed/CsmdDsKjzN8',
 855             'md5': '8ce563a1d667b599d21064e982ab9e31',
 856             'info_dict': {
 857                 'id': 'CsmdDsKjzN8',
 858                 'ext': 'mp4',
 859                 'upload_date': '20150501',  # According to '<meta itemprop="datePublished"', but in other places it's 20150510
 860                 'uploader': 'Airtek',
 861                 'description': 'Retransmisión en directo de la XVIII media maratón de Zaragoza.',
 862                 'uploader_id': 'UCzTzUmjXxxacNnL8I3m4LnQ',
 863                 'title': 'Retransmisión XVIII Media maratón Zaragoza 2015',
 864             },
 865             'params': {
 866                 'youtube_include_dash_manifest': True,
 867                 'format': '135',  # bestvideo
 868             },
 869             'skip': 'This live event has ended.',
 870         },
 871         {
 872             # Multifeed videos (multiple cameras), URL is for Main Camera
 873             'url': 'https://www.youtube.com/watch?v=jqWvoWXjCVs',
 874             'info_dict': {
 875                 'id': 'jqWvoWXjCVs',
 876                 'title': 'teamPGP: Rocket League Noob Stream',
 877                 'description': 'md5:dc7872fb300e143831327f1bae3af010',
 878             },
 879             'playlist': [{
 880                 'info_dict': {
 881                     'id': 'jqWvoWXjCVs',
 882                     'ext': 'mp4',
 883                     'title': 'teamPGP: Rocket League Noob Stream (Main Camera)',
 884                     'description': 'md5:dc7872fb300e143831327f1bae3af010',
 885                     'duration': 7335,
 886                     'upload_date': '20150721',
 887                     'uploader': 'Beer Games Beer',
 888                     'uploader_id': 'beergamesbeer',
 889                     'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/beergamesbeer',
 890                     'license': 'Standard YouTube License',
 891                 },
 892             }, {
 893                 'info_dict': {
 894                     'id': '6h8e8xoXJzg',
 895                     'ext': 'mp4',
 896                     'title': 'teamPGP: Rocket League Noob Stream (kreestuh)',
 897                     'description': 'md5:dc7872fb300e143831327f1bae3af010',
 898                     'duration': 7337,
 899                     'upload_date': '20150721',
 900                     'uploader': 'Beer Games Beer',
 901                     'uploader_id': 'beergamesbeer',
 902                     'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/beergamesbeer',
 903                     'license': 'Standard YouTube License',
 904                 },
 905             }, {
 906                 'info_dict': {
 907                     'id': 'PUOgX5z9xZw',
 908                     'ext': 'mp4',
 909                     'title': 'teamPGP: Rocket League Noob Stream (grizzle)',
 910                     'description': 'md5:dc7872fb300e143831327f1bae3af010',
 911                     'duration': 7337,
 912                     'upload_date': '20150721',
 913                     'uploader': 'Beer Games Beer',
 914                     'uploader_id': 'beergamesbeer',
 915                     'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/beergamesbeer',
 916                     'license': 'Standard YouTube License',
 917                 },
 918             }, {
 919                 'info_dict': {
 920                     'id': 'teuwxikvS5k',
 921                     'ext': 'mp4',
 922                     'title': 'teamPGP: Rocket League Noob Stream (zim)',
 923                     'description': 'md5:dc7872fb300e143831327f1bae3af010',
 924                     'duration': 7334,
 925                     'upload_date': '20150721',
 926                     'uploader': 'Beer Games Beer',
 927                     'uploader_id': 'beergamesbeer',
 928                     'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/beergamesbeer',
 929                     'license': 'Standard YouTube License',
 930                 },
 931             }],
 932             'params': {
 933                 'skip_download': True,
 934             },
 935             'skip': 'This video is not available.',
 936         },
 937         {
 938             # Multifeed video with comma in title (see https://github.com/ytdl-org/youtube-dl/issues/8536)
 939             'url': 'https://www.youtube.com/watch?v=gVfLd0zydlo',
 940             'info_dict': {
 941                 'id': 'gVfLd0zydlo',
 942                 'title': 'DevConf.cz 2016 Day 2 Workshops 1 14:00 - 15:30',
 943             },
 944             'playlist_count': 2,
 945             'skip': 'Not multifeed anymore',
 946         },
 947         {
 948             'url': 'https://vid.plus/FlRa-iH7PGw',
 949             'only_matching': True,
 950         },
 951         {
 952             'url': 'https://zwearz.com/watch/9lWxNJF-ufM/electra-woman-dyna-girl-official-trailer-grace-helbig.html',
 953             'only_matching': True,
 954         },
 955         {
 956             # Title with JS-like syntax "};" (see https://github.com/ytdl-org/youtube-dl/issues/7468)
 957             # Also tests cut-off URL expansion in video description (see
 958             # https://github.com/ytdl-org/youtube-dl/issues/1892,
 959             # https://github.com/ytdl-org/youtube-dl/issues/8164)
 960             'url': 'https://www.youtube.com/watch?v=lsguqyKfVQg',
 961             'info_dict': {
 962                 'id': 'lsguqyKfVQg',
 963                 'ext': 'mp4',
 964                 'title': '{dark walk}; Loki/AC/Dishonored; collab w/Elflover21',
 965                 'alt_title': 'Dark Walk - Position Music',
 966                 'description': 'md5:8085699c11dc3f597ce0410b0dcbb34a',
 967                 'duration': 133,
 968                 'upload_date': '20151119',
 969                 'uploader_id': 'IronSoulElf',
 970                 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/IronSoulElf',
 971                 'uploader': 'IronSoulElf',
 972                 'creator': 'Todd Haberman,  Daniel Law Heath and Aaron Kaplan',
 973                 'track': 'Dark Walk - Position Music',
 974                 'artist': 'Todd Haberman,  Daniel Law Heath and Aaron Kaplan',
 975                 'album': 'Position Music - Production Music Vol. 143 - Dark Walk',
 976             },
 977             'params': {
 978                 'skip_download': True,
 979             },
 980         },
 981         {
 982             # Tags with '};' (see https://github.com/ytdl-org/youtube-dl/issues/7468)
 983             'url': 'https://www.youtube.com/watch?v=Ms7iBXnlUO8',
 984             'only_matching': True,
 985         },
 986         {
 987             # Video with yt:stretch=17:0
 988             'url': 'https://www.youtube.com/watch?v=Q39EVAstoRM',
 989             'info_dict': {
 990                 'id': 'Q39EVAstoRM',
 991                 'ext': 'mp4',
 992                 'title': 'Clash Of Clans#14 Dicas De Ataque Para CV 4',
 993                 'description': 'md5:ee18a25c350637c8faff806845bddee9',
 994                 'upload_date': '20151107',
 995                 'uploader_id': 'UCCr7TALkRbo3EtFzETQF1LA',
 996                 'uploader': 'CH GAMER DROID',
 997             },
 998             'params': {
 999                 'skip_download': True,
1000             },
1001             'skip': 'This video does not exist.',
1002         },
1003         {
1004             # Video licensed under Creative Commons
1005             'url': 'https://www.youtube.com/watch?v=M4gD1WSo5mA',
1006             'info_dict': {
1007                 'id': 'M4gD1WSo5mA',
1008                 'ext': 'mp4',
1009                 'title': 'md5:e41008789470fc2533a3252216f1c1d1',
1010                 'description': 'md5:a677553cf0840649b731a3024aeff4cc',
1011                 'duration': 721,
1012                 'upload_date': '20150127',
1013                 'uploader_id': 'BerkmanCenter',
1014                 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/BerkmanCenter',
1015                 'uploader': 'The Berkman Klein Center for Internet & Society',
1016                 'license': 'Creative Commons Attribution license (reuse allowed)',
1017             },
1018             'params': {
1019                 'skip_download': True,
1020             },
1021         },
1022         {
1023             # Channel-like uploader_url
1024             'url': 'https://www.youtube.com/watch?v=eQcmzGIKrzg',
1025             'info_dict': {
1026                 'id': 'eQcmzGIKrzg',
1027                 'ext': 'mp4',
1028                 'title': 'Democratic Socialism and Foreign Policy | Bernie Sanders',
1029                 'description': 'md5:dda0d780d5a6e120758d1711d062a867',
1030                 'duration': 4060,
1031                 'upload_date': '20151119',
1032                 'uploader': 'Bernie Sanders',
1033                 'uploader_id': 'UCH1dpzjCEiGAt8CXkryhkZg',
1034                 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UCH1dpzjCEiGAt8CXkryhkZg',
1035                 'license': 'Creative Commons Attribution license (reuse allowed)',
1036             },
1037             'params': {
1038                 'skip_download': True,
1039             },
1040         },
1041         {
1042             'url': 'https://www.youtube.com/watch?feature=player_embedded&amp;amp;v=V36LpHqtcDY',
1043             'only_matching': True,
1044         },
1045         {
1046             # YouTube Red paid video (https://github.com/ytdl-org/youtube-dl/issues/10059)
1047             'url': 'https://www.youtube.com/watch?v=i1Ko8UG-Tdo',
1048             'only_matching': True,
1049         },
1050         {
1051             # Rental video preview
1052             'url': 'https://www.youtube.com/watch?v=yYr8q0y5Jfg',
1053             'info_dict': {
1054                 'id': 'uGpuVWrhIzE',
1055                 'ext': 'mp4',
1056                 'title': 'Piku - Trailer',
1057                 'description': 'md5:c36bd60c3fd6f1954086c083c72092eb',
1058                 'upload_date': '20150811',
1059                 'uploader': 'FlixMatrix',
1060                 'uploader_id': 'FlixMatrixKaravan',
1061                 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/FlixMatrixKaravan',
1062                 'license': 'Standard YouTube License',
1063             },
1064             'params': {
1065                 'skip_download': True,
1066             },
1067             'skip': 'This video is not available.',
1068         },
1069         {
1070             # YouTube Red video with episode data
1071             'url': 'https://www.youtube.com/watch?v=iqKdEhx-dD4',
1072             'info_dict': {
1073                 'id': 'iqKdEhx-dD4',
1074                 'ext': 'mp4',
1075                 'title': 'Isolation - Mind Field (Ep 1)',
1076                 'description': 'md5:46a29be4ceffa65b92d277b93f463c0f',
1077                 'duration': 2085,
1078                 'upload_date': '20170118',
1079                 'uploader': 'Vsauce',
1080                 'uploader_id': 'Vsauce',
1081                 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/Vsauce',
1082                 'series': 'Mind Field',
1083                 'season_number': 1,
1084                 'episode_number': 1,
1085             },
1086             'params': {
1087                 'skip_download': True,
1088             },
1089             'expected_warnings': [
1090                 'Skipping DASH manifest',
1091             ],
1092         },
1093         {
1094             # The following content has been identified by the YouTube community
1095             # as inappropriate or offensive to some audiences.
1096             'url': 'https://www.youtube.com/watch?v=6SJNVb0GnPI',
1097             'info_dict': {
1098                 'id': '6SJNVb0GnPI',
1099                 'ext': 'mp4',
1100                 'title': 'Race Differences in Intelligence',
1101                 'description': 'md5:5d161533167390427a1f8ee89a1fc6f1',
1102                 'duration': 965,
1103                 'upload_date': '20140124',
1104                 'uploader': 'New Century Foundation',
1105                 'uploader_id': 'UCEJYpZGqgUob0zVVEaLhvVg',
1106                 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UCEJYpZGqgUob0zVVEaLhvVg',
1107             },
1108             'params': {
1109                 'skip_download': True,
1110             },
1111         },
1112         {
1113             # itag 212
1114             'url': '1t24XAntNCY',
1115             'only_matching': True,
1116         },
1117         {
1118             # geo restricted to JP
1119             'url': 'sJL6WA-aGkQ',
1120             'only_matching': True,
1121         },
1122         {
1123             'url': 'https://www.youtube.com/watch?v=MuAGGZNfUkU&list=RDMM',
1124             'only_matching': True,
1125         },
1126         {
1127             'url': 'https://invidio.us/watch?v=BaW_jenozKc',
1128             'only_matching': True,
1129         },
1130         {
1131             # DRM protected
1132             'url': 'https://www.youtube.com/watch?v=s7_qI6_mIXc',
1133             'only_matching': True,
1134         },
1135         {
1136             # Video with unsupported adaptive stream type formats
1137             'url': 'https://www.youtube.com/watch?v=Z4Vy8R84T1U',
1138             'info_dict': {
1139                 'id': 'Z4Vy8R84T1U',
1140                 'ext': 'mp4',
1141                 'title': 'saman SMAN 53 Jakarta(Sancety) opening COFFEE4th at SMAN 53 Jakarta',
1142                 'description': 'md5:d41d8cd98f00b204e9800998ecf8427e',
1143                 'duration': 433,
1144                 'upload_date': '20130923',
1145                 'uploader': 'Amelia Putri Harwita',
1146                 'uploader_id': 'UCpOxM49HJxmC1qCalXyB3_Q',
1147                 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UCpOxM49HJxmC1qCalXyB3_Q',
1148                 'formats': 'maxcount:10',
1149             },
1150             'params': {
1151                 'skip_download': True,
1152                 'youtube_include_dash_manifest': False,
1153             },
1154             'skip': 'not actual anymore',
1155         },
1156         {
1157             # Youtube Music Auto-generated description
1158             'url': 'https://music.youtube.com/watch?v=MgNrAu2pzNs',
1159             'info_dict': {
1160                 'id': 'MgNrAu2pzNs',
1161                 'ext': 'mp4',
1162                 'title': 'Voyeur Girl',
1163                 'description': 'md5:7ae382a65843d6df2685993e90a8628f',
1164                 'upload_date': '20190312',
1165                 'uploader': 'Stephen - Topic',
1166                 'uploader_id': 'UC-pWHpBjdGG69N9mM2auIAA',
1167                 'artist': 'Stephen',
1168                 'track': 'Voyeur Girl',
1169                 'album': 'it\'s too much love to know my dear',
1170                 'release_date': '20190313',
1171                 'release_year': 2019,
1172             },
1173             'params': {
1174                 'skip_download': True,
1175             },
1176         },
1177         {
1178             # Youtube Music Auto-generated description
1179             # Retrieve 'artist' field from 'Artist:' in video description
1180             # when it is present on youtube music video
1181             'url': 'https://www.youtube.com/watch?v=k0jLE7tTwjY',
1182             'info_dict': {
1183                 'id': 'k0jLE7tTwjY',
1184                 'ext': 'mp4',
1185                 'title': 'Latch Feat. Sam Smith',
1186                 'description': 'md5:3cb1e8101a7c85fcba9b4fb41b951335',
1187                 'upload_date': '20150110',
1188                 'uploader': 'Various Artists - Topic',
1189                 'uploader_id': 'UCNkEcmYdjrH4RqtNgh7BZ9w',
1190                 'artist': 'Disclosure',
1191                 'track': 'Latch Feat. Sam Smith',
1192                 'album': 'Latch Featuring Sam Smith',
1193                 'release_date': '20121008',
1194                 'release_year': 2012,
1195             },
1196             'params': {
1197                 'skip_download': True,
1198             },
1199         },
1200         {
1201             # Youtube Music Auto-generated description
1202             # handle multiple artists on youtube music video
1203             'url': 'https://www.youtube.com/watch?v=74qn0eJSjpA',
1204             'info_dict': {
1205                 'id': '74qn0eJSjpA',
1206                 'ext': 'mp4',
1207                 'title': 'Eastside',
1208                 'description': 'md5:290516bb73dcbfab0dcc4efe6c3de5f2',
1209                 'upload_date': '20180710',
1210                 'uploader': 'Benny Blanco - Topic',
1211                 'uploader_id': 'UCzqz_ksRu_WkIzmivMdIS7A',
1212                 'artist': 'benny blanco, Halsey, Khalid',
1213                 'track': 'Eastside',
1214                 'album': 'Eastside',
1215                 'release_date': '20180713',
1216                 'release_year': 2018,
1217             },
1218             'params': {
1219                 'skip_download': True,
1220             },
1221         },
1222         {
1223             # Youtube Music Auto-generated description
1224             # handle youtube music video with release_year and no release_date
1225             'url': 'https://www.youtube.com/watch?v=-hcAI0g-f5M',
1226             'info_dict': {
1227                 'id': '-hcAI0g-f5M',
1228                 'ext': 'mp4',
1229                 'title': 'Put It On Me',
1230                 'description': 'md5:f6422397c07c4c907c6638e1fee380a5',
1231                 'upload_date': '20180426',
1232                 'uploader': 'Matt Maeson - Topic',
1233                 'uploader_id': 'UCnEkIGqtGcQMLk73Kp-Q5LQ',
1234                 'artist': 'Matt Maeson',
1235                 'track': 'Put It On Me',
1236                 'album': 'The Hearse',
1237                 'release_date': None,
1238                 'release_year': 2018,
1239             },
1240             'params': {
1241                 'skip_download': True,
1242             },
1243         },
1244         {
1245             'url': 'https://www.youtubekids.com/watch?v=3b8nCWDgZ6Q',
1246             'only_matching': True,
1247         },
1248         {
1249             # invalid -> valid video id redirection
1250             'url': 'DJztXj2GPfl',
1251             'info_dict': {
1252                 'id': 'DJztXj2GPfk',
1253                 'ext': 'mp4',
1254                 'title': 'Panjabi MC - Mundian To Bach Ke (The Dictator Soundtrack)',
1255                 'description': 'md5:bf577a41da97918e94fa9798d9228825',
1256                 'upload_date': '20090125',
1257                 'uploader': 'Prochorowka',
1258                 'uploader_id': 'Prochorowka',
1259                 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/Prochorowka',
1260                 'artist': 'Panjabi MC',
1261                 'track': 'Beware of the Boys (Mundian to Bach Ke) - Motivo Hi-Lectro Remix',
1262                 'album': 'Beware of the Boys (Mundian To Bach Ke)',
1263             },
1264             'params': {
1265                 'skip_download': True,
1266             },
1267         }
1268     ]
1269
1270     def __init__(self, *args, **kwargs):
1271         super(YoutubeIE, self).__init__(*args, **kwargs)
1272         self._player_cache = {}
1273
1274     def report_video_info_webpage_download(self, video_id):
1275         """Report attempt to download video info webpage."""
1276         self.to_screen('%s: Downloading video info webpage' % video_id)
1277
1278     def report_information_extraction(self, video_id):
1279         """Report attempt to extract video information."""
1280         self.to_screen('%s: Extracting video information' % video_id)
1281
1282     def report_unavailable_format(self, video_id, format):
1283         """Report extracted video URL."""
1284         self.to_screen('%s: Format %s not available' % (video_id, format))
1285
1286     def report_rtmp_download(self):
1287         """Indicate the download will use the RTMP protocol."""
1288         self.to_screen('RTMP download detected')
1289
1290     def _signature_cache_id(self, example_sig):
1291         """ Return a string representation of a signature """
1292         return '.'.join(compat_str(len(part)) for part in example_sig.split('.'))
1293
1294     @classmethod
1295     def _extract_player_info(cls, player_url):
1296         for player_re in cls._PLAYER_INFO_RE:
1297             id_m = re.search(player_re, player_url)
1298             if id_m:
1299                 break
1300         else:
1301             raise ExtractorError('Cannot identify player %r' % player_url)
1302         return id_m.group('ext'), id_m.group('id')
1303
1304     def _extract_signature_function(self, video_id, player_url, example_sig):
1305         player_type, player_id = self._extract_player_info(player_url)
1306
1307         # Read from filesystem cache
1308         func_id = '%s_%s_%s' % (
1309             player_type, player_id, self._signature_cache_id(example_sig))
1310         assert os.path.basename(func_id) == func_id
1311
1312         cache_spec = self._downloader.cache.load('youtube-sigfuncs', func_id)
1313         if cache_spec is not None:
1314             return lambda s: ''.join(s[i] for i in cache_spec)
1315
1316         download_note = (
1317             'Downloading player %s' % player_url
1318             if self._downloader.params.get('verbose') else
1319             'Downloading %s player %s' % (player_type, player_id)
1320         )
1321         if player_type == 'js':
1322             code = self._download_webpage(
1323                 player_url, video_id,
1324                 note=download_note,
1325                 errnote='Download of %s failed' % player_url)
1326             res = self._parse_sig_js(code)
1327         elif player_type == 'swf':
1328             urlh = self._request_webpage(
1329                 player_url, video_id,
1330                 note=download_note,
1331                 errnote='Download of %s failed' % player_url)
1332             code = urlh.read()
1333             res = self._parse_sig_swf(code)
1334         else:
1335             assert False, 'Invalid player type %r' % player_type
1336
1337         test_string = ''.join(map(compat_chr, range(len(example_sig))))
1338         cache_res = res(test_string)
1339         cache_spec = [ord(c) for c in cache_res]
1340
1341         self._downloader.cache.store('youtube-sigfuncs', func_id, cache_spec)
1342         return res
1343
1344     def _print_sig_code(self, func, example_sig):
1345         def gen_sig_code(idxs):
1346             def _genslice(start, end, step):
1347                 starts = '' if start == 0 else str(start)
1348                 ends = (':%d' % (end + step)) if end + step >= 0 else ':'
1349                 steps = '' if step == 1 else (':%d' % step)
1350                 return 's[%s%s%s]' % (starts, ends, steps)
1351
1352             step = None
1353             # Quelch pyflakes warnings - start will be set when step is set
1354             start = '(Never used)'
1355             for i, prev in zip(idxs[1:], idxs[:-1]):
1356                 if step is not None:
1357                     if i - prev == step:
1358                         continue
1359                     yield _genslice(start, prev, step)
1360                     step = None
1361                     continue
1362                 if i - prev in [-1, 1]:
1363                     step = i - prev
1364                     start = prev
1365                     continue
1366                 else:
1367                     yield 's[%d]' % prev
1368             if step is None:
1369                 yield 's[%d]' % i
1370             else:
1371                 yield _genslice(start, i, step)
1372
1373         test_string = ''.join(map(compat_chr, range(len(example_sig))))
1374         cache_res = func(test_string)
1375         cache_spec = [ord(c) for c in cache_res]
1376         expr_code = ' + '.join(gen_sig_code(cache_spec))
1377         signature_id_tuple = '(%s)' % (
1378             ', '.join(compat_str(len(p)) for p in example_sig.split('.')))
1379         code = ('if tuple(len(p) for p in s.split(\'.\')) == %s:\n'
1380                 '    return %s\n') % (signature_id_tuple, expr_code)
1381         self.to_screen('Extracted signature function:\n' + code)
1382
1383     def _parse_sig_js(self, jscode):
1384         funcname = self._search_regex(
1385             (r'\b[cs]\s*&&\s*[adf]\.set\([^,]+\s*,\s*encodeURIComponent\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\(',
1386              r'\b[a-zA-Z0-9]+\s*&&\s*[a-zA-Z0-9]+\.set\([^,]+\s*,\s*encodeURIComponent\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\(',
1387              r'(?:\b|[^a-zA-Z0-9$])(?P<sig>[a-zA-Z0-9$]{2})\s*=\s*function\(\s*a\s*\)\s*{\s*a\s*=\s*a\.split\(\s*""\s*\)',
1388              r'(?P<sig>[a-zA-Z0-9$]+)\s*=\s*function\(\s*a\s*\)\s*{\s*a\s*=\s*a\.split\(\s*""\s*\)',
1389              # Obsolete patterns
1390              r'(["\'])signature\1\s*,\s*(?P<sig>[a-zA-Z0-9$]+)\(',
1391              r'\.sig\|\|(?P<sig>[a-zA-Z0-9$]+)\(',
1392              r'yt\.akamaized\.net/\)\s*\|\|\s*.*?\s*[cs]\s*&&\s*[adf]\.set\([^,]+\s*,\s*(?:encodeURIComponent\s*\()?\s*(?P<sig>[a-zA-Z0-9$]+)\(',
1393              r'\b[cs]\s*&&\s*[adf]\.set\([^,]+\s*,\s*(?P<sig>[a-zA-Z0-9$]+)\(',
1394              r'\b[a-zA-Z0-9]+\s*&&\s*[a-zA-Z0-9]+\.set\([^,]+\s*,\s*(?P<sig>[a-zA-Z0-9$]+)\(',
1395              r'\bc\s*&&\s*a\.set\([^,]+\s*,\s*\([^)]*\)\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\(',
1396              r'\bc\s*&&\s*[a-zA-Z0-9]+\.set\([^,]+\s*,\s*\([^)]*\)\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\(',
1397              r'\bc\s*&&\s*[a-zA-Z0-9]+\.set\([^,]+\s*,\s*\([^)]*\)\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\('),
1398             jscode, 'Initial JS player signature function name', group='sig')
1399
1400         jsi = JSInterpreter(jscode)
1401         initial_function = jsi.extract_function(funcname)
1402         return lambda s: initial_function([s])
1403
1404     def _parse_sig_swf(self, file_contents):
1405         swfi = SWFInterpreter(file_contents)
1406         TARGET_CLASSNAME = 'SignatureDecipher'
1407         searched_class = swfi.extract_class(TARGET_CLASSNAME)
1408         initial_function = swfi.extract_function(searched_class, 'decipher')
1409         return lambda s: initial_function([s])
1410
1411     def _decrypt_signature(self, s, video_id, player_url, age_gate=False):
1412         """Turn the encrypted s field into a working signature"""
1413
1414         if player_url is None:
1415             raise ExtractorError('Cannot decrypt signature without player_url')
1416
1417         if player_url.startswith('//'):
1418             player_url = 'https:' + player_url
1419         elif not re.match(r'https?://', player_url):
1420             player_url = compat_urlparse.urljoin(
1421                 'https://www.youtube.com', player_url)
1422         try:
1423             player_id = (player_url, self._signature_cache_id(s))
1424             if player_id not in self._player_cache:
1425                 func = self._extract_signature_function(
1426                     video_id, player_url, s
1427                 )
1428                 self._player_cache[player_id] = func
1429             func = self._player_cache[player_id]
1430             if self._downloader.params.get('youtube_print_sig_code'):
1431                 self._print_sig_code(func, s)
1432             return func(s)
1433         except Exception as e:
1434             tb = traceback.format_exc()
1435             raise ExtractorError(
1436                 'Signature extraction failed: ' + tb, cause=e)
1437
1438     def _get_subtitles(self, video_id, webpage, has_live_chat_replay):
1439         try:
1440             subs_doc = self._download_xml(
1441                 'https://video.google.com/timedtext?hl=en&type=list&v=%s' % video_id,
1442                 video_id, note=False)
1443         except ExtractorError as err:
1444             self._downloader.report_warning('unable to download video subtitles: %s' % error_to_compat_str(err))
1445             return {}
1446
1447         sub_lang_list = {}
1448         for track in subs_doc.findall('track'):
1449             lang = track.attrib['lang_code']
1450             if lang in sub_lang_list:
1451                 continue
1452             sub_formats = []
1453             for ext in self._SUBTITLE_FORMATS:
1454                 params = compat_urllib_parse_urlencode({
1455                     'lang': lang,
1456                     'v': video_id,
1457                     'fmt': ext,
1458                     'name': track.attrib['name'].encode('utf-8'),
1459                 })
1460                 sub_formats.append({
1461                     'url': 'https://www.youtube.com/api/timedtext?' + params,
1462                     'ext': ext,
1463                 })
1464             sub_lang_list[lang] = sub_formats
1465         if has_live_chat_replay:
1466             sub_lang_list['live_chat'] = [
1467                 {
1468                     'video_id': video_id,
1469                     'ext': 'json',
1470                     'protocol': 'youtube_live_chat_replay',
1471                 },
1472             ]
1473         if not sub_lang_list:
1474             self._downloader.report_warning('video doesn\'t have subtitles')
1475             return {}
1476         return sub_lang_list
1477
1478     def _get_ytplayer_config(self, video_id, webpage):
1479         patterns = (
1480             # User data may contain arbitrary character sequences that may affect
1481             # JSON extraction with regex, e.g. when '};' is contained the second
1482             # regex won't capture the whole JSON. Yet working around by trying more
1483             # concrete regex first keeping in mind proper quoted string handling
1484             # to be implemented in future that will replace this workaround (see
1485             # https://github.com/ytdl-org/youtube-dl/issues/7468,
1486             # https://github.com/ytdl-org/youtube-dl/pull/7599)
1487             r';ytplayer\.config\s*=\s*({.+?});ytplayer',
1488             r';ytplayer\.config\s*=\s*({.+?});',
1489         )
1490         config = self._search_regex(
1491             patterns, webpage, 'ytplayer.config', default=None)
1492         if config:
1493             return self._parse_json(
1494                 uppercase_escape(config), video_id, fatal=False)
1495
1496     def _get_yt_initial_data(self, video_id, webpage):
1497         config = self._search_regex(
1498             (r'window\["ytInitialData"\]\s*=\s*(.*?)(?<=});',
1499              r'var\s+ytInitialData\s*=\s*(.*?)(?<=});'),
1500             webpage, 'ytInitialData', default=None)
1501         if config:
1502             return self._parse_json(
1503                 uppercase_escape(config), video_id, fatal=False)
1504
1505     def _get_automatic_captions(self, video_id, webpage):
1506         """We need the webpage for getting the captions url, pass it as an
1507            argument to speed up the process."""
1508         self.to_screen('%s: Looking for automatic captions' % video_id)
1509         player_config = self._get_ytplayer_config(video_id, webpage)
1510         err_msg = 'Couldn\'t find automatic captions for %s' % video_id
1511         if not player_config:
1512             self._downloader.report_warning(err_msg)
1513             return {}
1514         try:
1515             args = player_config['args']
1516             caption_url = args.get('ttsurl')
1517             if caption_url:
1518                 timestamp = args['timestamp']
1519                 # We get the available subtitles
1520                 list_params = compat_urllib_parse_urlencode({
1521                     'type': 'list',
1522                     'tlangs': 1,
1523                     'asrs': 1,
1524                 })
1525                 list_url = caption_url + '&' + list_params
1526                 caption_list = self._download_xml(list_url, video_id)
1527                 original_lang_node = caption_list.find('track')
1528                 if original_lang_node is None:
1529                     self._downloader.report_warning('Video doesn\'t have automatic captions')
1530                     return {}
1531                 original_lang = original_lang_node.attrib['lang_code']
1532                 caption_kind = original_lang_node.attrib.get('kind', '')
1533
1534                 sub_lang_list = {}
1535                 for lang_node in caption_list.findall('target'):
1536                     sub_lang = lang_node.attrib['lang_code']
1537                     sub_formats = []
1538                     for ext in self._SUBTITLE_FORMATS:
1539                         params = compat_urllib_parse_urlencode({
1540                             'lang': original_lang,
1541                             'tlang': sub_lang,
1542                             'fmt': ext,
1543                             'ts': timestamp,
1544                             'kind': caption_kind,
1545                         })
1546                         sub_formats.append({
1547                             'url': caption_url + '&' + params,
1548                             'ext': ext,
1549                         })
1550                     sub_lang_list[sub_lang] = sub_formats
1551                 return sub_lang_list
1552
1553             def make_captions(sub_url, sub_langs):
1554                 parsed_sub_url = compat_urllib_parse_urlparse(sub_url)
1555                 caption_qs = compat_parse_qs(parsed_sub_url.query)
1556                 captions = {}
1557                 for sub_lang in sub_langs:
1558                     sub_formats = []
1559                     for ext in self._SUBTITLE_FORMATS:
1560                         caption_qs.update({
1561                             'tlang': [sub_lang],
1562                             'fmt': [ext],
1563                         })
1564                         sub_url = compat_urlparse.urlunparse(parsed_sub_url._replace(
1565                             query=compat_urllib_parse_urlencode(caption_qs, True)))
1566                         sub_formats.append({
1567                             'url': sub_url,
1568                             'ext': ext,
1569                         })
1570                     captions[sub_lang] = sub_formats
1571                 return captions
1572
1573             # New captions format as of 22.06.2017
1574             player_response = args.get('player_response')
1575             if player_response and isinstance(player_response, compat_str):
1576                 player_response = self._parse_json(
1577                     player_response, video_id, fatal=False)
1578                 if player_response:
1579                     renderer = player_response['captions']['playerCaptionsTracklistRenderer']
1580                     base_url = renderer['captionTracks'][0]['baseUrl']
1581                     sub_lang_list = []
1582                     for lang in renderer['translationLanguages']:
1583                         lang_code = lang.get('languageCode')
1584                         if lang_code:
1585                             sub_lang_list.append(lang_code)
1586                     return make_captions(base_url, sub_lang_list)
1587
1588             # Some videos don't provide ttsurl but rather caption_tracks and
1589             # caption_translation_languages (e.g. 20LmZk1hakA)
1590             # Does not used anymore as of 22.06.2017
1591             caption_tracks = args['caption_tracks']
1592             caption_translation_languages = args['caption_translation_languages']
1593             caption_url = compat_parse_qs(caption_tracks.split(',')[0])['u'][0]
1594             sub_lang_list = []
1595             for lang in caption_translation_languages.split(','):
1596                 lang_qs = compat_parse_qs(compat_urllib_parse_unquote_plus(lang))
1597                 sub_lang = lang_qs.get('lc', [None])[0]
1598                 if sub_lang:
1599                     sub_lang_list.append(sub_lang)
1600             return make_captions(caption_url, sub_lang_list)
1601         # An extractor error can be raise by the download process if there are
1602         # no automatic captions but there are subtitles
1603         except (KeyError, IndexError, ExtractorError):
1604             self._downloader.report_warning(err_msg)
1605             return {}
1606
1607     def _mark_watched(self, video_id, video_info, player_response):
1608         playback_url = url_or_none(try_get(
1609             player_response,
1610             lambda x: x['playbackTracking']['videostatsPlaybackUrl']['baseUrl']) or try_get(
1611             video_info, lambda x: x['videostats_playback_base_url'][0]))
1612         if not playback_url:
1613             return
1614         parsed_playback_url = compat_urlparse.urlparse(playback_url)
1615         qs = compat_urlparse.parse_qs(parsed_playback_url.query)
1616
1617         # cpn generation algorithm is reverse engineered from base.js.
1618         # In fact it works even with dummy cpn.
1619         CPN_ALPHABET = 'abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789-_'
1620         cpn = ''.join((CPN_ALPHABET[random.randint(0, 256) & 63] for _ in range(0, 16)))
1621
1622         qs.update({
1623             'ver': ['2'],
1624             'cpn': [cpn],
1625         })
1626         playback_url = compat_urlparse.urlunparse(
1627             parsed_playback_url._replace(query=compat_urllib_parse_urlencode(qs, True)))
1628
1629         self._download_webpage(
1630             playback_url, video_id, 'Marking watched',
1631             'Unable to mark watched', fatal=False)
1632
1633     @staticmethod
1634     def _extract_urls(webpage):
1635         # Embedded YouTube player
1636         entries = [
1637             unescapeHTML(mobj.group('url'))
1638             for mobj in re.finditer(r'''(?x)
1639             (?:
1640                 <iframe[^>]+?src=|
1641                 data-video-url=|
1642                 <embed[^>]+?src=|
1643                 embedSWF\(?:\s*|
1644                 <object[^>]+data=|
1645                 new\s+SWFObject\(
1646             )
1647             (["\'])
1648                 (?P<url>(?:https?:)?//(?:www\.)?youtube(?:-nocookie)?\.com/
1649                 (?:embed|v|p)/[0-9A-Za-z_-]{11}.*?)
1650             \1''', webpage)]
1651
1652         # lazyYT YouTube embed
1653         entries.extend(list(map(
1654             unescapeHTML,
1655             re.findall(r'class="lazyYT" data-youtube-id="([^"]+)"', webpage))))
1656
1657         # Wordpress "YouTube Video Importer" plugin
1658         matches = re.findall(r'''(?x)<div[^>]+
1659             class=(?P<q1>[\'"])[^\'"]*\byvii_single_video_player\b[^\'"]*(?P=q1)[^>]+
1660             data-video_id=(?P<q2>[\'"])([^\'"]+)(?P=q2)''', webpage)
1661         entries.extend(m[-1] for m in matches)
1662
1663         return entries
1664
1665     @staticmethod
1666     def _extract_url(webpage):
1667         urls = YoutubeIE._extract_urls(webpage)
1668         return urls[0] if urls else None
1669
1670     @classmethod
1671     def extract_id(cls, url):
1672         mobj = re.match(cls._VALID_URL, url, re.VERBOSE)
1673         if mobj is None:
1674             raise ExtractorError('Invalid URL: %s' % url)
1675         video_id = mobj.group(2)
1676         return video_id
1677
1678     def _extract_chapters_from_json(self, webpage, video_id, duration):
1679         if not webpage:
1680             return
1681         initial_data = self._parse_json(
1682             self._search_regex(
1683                 r'window\["ytInitialData"\] = (.+);\n', webpage,
1684                 'player args', default='{}'),
1685             video_id, fatal=False)
1686         if not initial_data or not isinstance(initial_data, dict):
1687             return
1688         chapters_list = try_get(
1689             initial_data,
1690             lambda x: x['playerOverlays']
1691                        ['playerOverlayRenderer']
1692                        ['decoratedPlayerBarRenderer']
1693                        ['decoratedPlayerBarRenderer']
1694                        ['playerBar']
1695                        ['chapteredPlayerBarRenderer']
1696                        ['chapters'],
1697             list)
1698         if not chapters_list:
1699             return
1700
1701         def chapter_time(chapter):
1702             return float_or_none(
1703                 try_get(
1704                     chapter,
1705                     lambda x: x['chapterRenderer']['timeRangeStartMillis'],
1706                     int),
1707                 scale=1000)
1708         chapters = []
1709         for next_num, chapter in enumerate(chapters_list, start=1):
1710             start_time = chapter_time(chapter)
1711             if start_time is None:
1712                 continue
1713             end_time = (chapter_time(chapters_list[next_num])
1714                         if next_num < len(chapters_list) else duration)
1715             if end_time is None:
1716                 continue
1717             title = try_get(
1718                 chapter, lambda x: x['chapterRenderer']['title']['simpleText'],
1719                 compat_str)
1720             chapters.append({
1721                 'start_time': start_time,
1722                 'end_time': end_time,
1723                 'title': title,
1724             })
1725         return chapters
1726
1727     @staticmethod
1728     def _extract_chapters_from_description(description, duration):
1729         if not description:
1730             return None
1731         chapter_lines = re.findall(
1732             r'(?:^|<br\s*/>)([^<]*<a[^>]+onclick=["\']yt\.www\.watch\.player\.seekTo[^>]+>(\d{1,2}:\d{1,2}(?::\d{1,2})?)</a>[^>]*)(?=$|<br\s*/>)',
1733             description)
1734         if not chapter_lines:
1735             return None
1736         chapters = []
1737         for next_num, (chapter_line, time_point) in enumerate(
1738                 chapter_lines, start=1):
1739             start_time = parse_duration(time_point)
1740             if start_time is None:
1741                 continue
1742             if start_time > duration:
1743                 break
1744             end_time = (duration if next_num == len(chapter_lines)
1745                         else parse_duration(chapter_lines[next_num][1]))
1746             if end_time is None:
1747                 continue
1748             if end_time > duration:
1749                 end_time = duration
1750             if start_time > end_time:
1751                 break
1752             chapter_title = re.sub(
1753                 r'<a[^>]+>[^<]+</a>', '', chapter_line).strip(' \t-')
1754             chapter_title = re.sub(r'\s+', ' ', chapter_title)
1755             chapters.append({
1756                 'start_time': start_time,
1757                 'end_time': end_time,
1758                 'title': chapter_title,
1759             })
1760         return chapters
1761
1762     def _extract_chapters(self, webpage, description, video_id, duration):
1763         return (self._extract_chapters_from_json(webpage, video_id, duration)
1764                 or self._extract_chapters_from_description(description, duration))
1765
1766     def _real_extract(self, url):
1767         url, smuggled_data = unsmuggle_url(url, {})
1768
1769         proto = (
1770             'http' if self._downloader.params.get('prefer_insecure', False)
1771             else 'https')
1772
1773         start_time = None
1774         end_time = None
1775         parsed_url = compat_urllib_parse_urlparse(url)
1776         for component in [parsed_url.fragment, parsed_url.query]:
1777             query = compat_parse_qs(component)
1778             if start_time is None and 't' in query:
1779                 start_time = parse_duration(query['t'][0])
1780             if start_time is None and 'start' in query:
1781                 start_time = parse_duration(query['start'][0])
1782             if end_time is None and 'end' in query:
1783                 end_time = parse_duration(query['end'][0])
1784
1785         # Extract original video URL from URL with redirection, like age verification, using next_url parameter
1786         mobj = re.search(self._NEXT_URL_RE, url)
1787         if mobj:
1788             url = proto + '://www.youtube.com/' + compat_urllib_parse_unquote(mobj.group(1)).lstrip('/')
1789         video_id = self.extract_id(url)
1790
1791         # Get video webpage
1792         url = proto + '://www.youtube.com/watch?v=%s&gl=US&hl=en&has_verified=1&bpctr=9999999999' % video_id
1793         video_webpage, urlh = self._download_webpage_handle(url, video_id)
1794
1795         qs = compat_parse_qs(compat_urllib_parse_urlparse(urlh.geturl()).query)
1796         video_id = qs.get('v', [None])[0] or video_id
1797
1798         # Attempt to extract SWF player URL
1799         mobj = re.search(r'swfConfig.*?"(https?:\\/\\/.*?watch.*?-.*?\.swf)"', video_webpage)
1800         if mobj is not None:
1801             player_url = re.sub(r'\\(.)', r'\1', mobj.group(1))
1802         else:
1803             player_url = None
1804
1805         dash_mpds = []
1806
1807         def add_dash_mpd(video_info):
1808             dash_mpd = video_info.get('dashmpd')
1809             if dash_mpd and dash_mpd[0] not in dash_mpds:
1810                 dash_mpds.append(dash_mpd[0])
1811
1812         def add_dash_mpd_pr(pl_response):
1813             dash_mpd = url_or_none(try_get(
1814                 pl_response, lambda x: x['streamingData']['dashManifestUrl'],
1815                 compat_str))
1816             if dash_mpd and dash_mpd not in dash_mpds:
1817                 dash_mpds.append(dash_mpd)
1818
1819         is_live = None
1820         view_count = None
1821
1822         def extract_view_count(v_info):
1823             return int_or_none(try_get(v_info, lambda x: x['view_count'][0]))
1824
1825         def extract_player_response(player_response, video_id):
1826             pl_response = str_or_none(player_response)
1827             if not pl_response:
1828                 return
1829             pl_response = self._parse_json(pl_response, video_id, fatal=False)
1830             if isinstance(pl_response, dict):
1831                 add_dash_mpd_pr(pl_response)
1832                 return pl_response
1833
1834         player_response = {}
1835
1836         # Get video info
1837         video_info = {}
1838         embed_webpage = None
1839         if self._html_search_meta('og:restrictions:age', video_webpage, default=None) == "18+":
1840             age_gate = True
1841             # We simulate the access to the video from www.youtube.com/v/{video_id}
1842             # this can be viewed without login into Youtube
1843             url = proto + '://www.youtube.com/embed/%s' % video_id
1844             embed_webpage = self._download_webpage(url, video_id, 'Downloading embed webpage')
1845             data = compat_urllib_parse_urlencode({
1846                 'video_id': video_id,
1847                 'eurl': 'https://youtube.googleapis.com/v/' + video_id,
1848                 'sts': self._search_regex(
1849                     r'"sts"\s*:\s*(\d+)', embed_webpage, 'sts', default=''),
1850             })
1851             video_info_url = proto + '://www.youtube.com/get_video_info?' + data
1852             try:
1853                 video_info_webpage = self._download_webpage(
1854                     video_info_url, video_id,
1855                     note='Refetching age-gated info webpage',
1856                     errnote='unable to download video info webpage')
1857             except ExtractorError:
1858                 video_info_webpage = None
1859             if video_info_webpage:
1860                 video_info = compat_parse_qs(video_info_webpage)
1861                 pl_response = video_info.get('player_response', [None])[0]
1862                 player_response = extract_player_response(pl_response, video_id)
1863                 add_dash_mpd(video_info)
1864                 view_count = extract_view_count(video_info)
1865         else:
1866             age_gate = False
1867             # Try looking directly into the video webpage
1868             ytplayer_config = self._get_ytplayer_config(video_id, video_webpage)
1869             if ytplayer_config:
1870                 args = ytplayer_config['args']
1871                 if args.get('url_encoded_fmt_stream_map') or args.get('hlsvp'):
1872                     # Convert to the same format returned by compat_parse_qs
1873                     video_info = dict((k, [v]) for k, v in args.items())
1874                     add_dash_mpd(video_info)
1875                 # Rental video is not rented but preview is available (e.g.
1876                 # https://www.youtube.com/watch?v=yYr8q0y5Jfg,
1877                 # https://github.com/ytdl-org/youtube-dl/issues/10532)
1878                 if not video_info and args.get('ypc_vid'):
1879                     return self.url_result(
1880                         args['ypc_vid'], YoutubeIE.ie_key(), video_id=args['ypc_vid'])
1881                 if args.get('livestream') == '1' or args.get('live_playback') == 1:
1882                     is_live = True
1883                 if not player_response:
1884                     player_response = extract_player_response(args.get('player_response'), video_id)
1885             if not video_info or self._downloader.params.get('youtube_include_dash_manifest', True):
1886                 add_dash_mpd_pr(player_response)
1887
1888         def extract_unavailable_message():
1889             messages = []
1890             for tag, kind in (('h1', 'message'), ('div', 'submessage')):
1891                 msg = self._html_search_regex(
1892                     r'(?s)<{tag}[^>]+id=["\']unavailable-{kind}["\'][^>]*>(.+?)</{tag}>'.format(tag=tag, kind=kind),
1893                     video_webpage, 'unavailable %s' % kind, default=None)
1894                 if msg:
1895                     messages.append(msg)
1896             if messages:
1897                 return '\n'.join(messages)
1898
1899         if not video_info and not player_response:
1900             unavailable_message = extract_unavailable_message()
1901             if not unavailable_message:
1902                 unavailable_message = 'Unable to extract video data'
1903             raise ExtractorError(
1904                 'YouTube said: %s' % unavailable_message, expected=True, video_id=video_id)
1905
1906         if not isinstance(video_info, dict):
1907             video_info = {}
1908
1909         video_details = try_get(
1910             player_response, lambda x: x['videoDetails'], dict) or {}
1911
1912         microformat = try_get(
1913             player_response, lambda x: x['microformat']['playerMicroformatRenderer'], dict) or {}
1914
1915         video_title = video_info.get('title', [None])[0] or video_details.get('title')
1916         if not video_title:
1917             self._downloader.report_warning('Unable to extract video title')
1918             video_title = '_'
1919
1920         description_original = video_description = get_element_by_id("eow-description", video_webpage)
1921         if video_description:
1922
1923             def replace_url(m):
1924                 redir_url = compat_urlparse.urljoin(url, m.group(1))
1925                 parsed_redir_url = compat_urllib_parse_urlparse(redir_url)
1926                 if re.search(r'^(?:www\.)?(?:youtube(?:-nocookie)?\.com|youtu\.be)$', parsed_redir_url.netloc) and parsed_redir_url.path == '/redirect':
1927                     qs = compat_parse_qs(parsed_redir_url.query)
1928                     q = qs.get('q')
1929                     if q and q[0]:
1930                         return q[0]
1931                 return redir_url
1932
1933             description_original = video_description = re.sub(r'''(?x)
1934                 <a\s+
1935                     (?:[a-zA-Z-]+="[^"]*"\s+)*?
1936                     (?:title|href)="([^"]+)"\s+
1937                     (?:[a-zA-Z-]+="[^"]*"\s+)*?
1938                     class="[^"]*"[^>]*>
1939                 [^<]+\.{3}\s*
1940                 </a>
1941             ''', replace_url, video_description)
1942             video_description = clean_html(video_description)
1943         else:
1944             video_description = video_details.get('shortDescription') or self._html_search_meta('description', video_webpage)
1945
1946         if not smuggled_data.get('force_singlefeed', False):
1947             if not self._downloader.params.get('noplaylist'):
1948                 multifeed_metadata_list = try_get(
1949                     player_response,
1950                     lambda x: x['multicamera']['playerLegacyMulticameraRenderer']['metadataList'],
1951                     compat_str) or try_get(
1952                     video_info, lambda x: x['multifeed_metadata_list'][0], compat_str)
1953                 if multifeed_metadata_list:
1954                     entries = []
1955                     feed_ids = []
1956                     for feed in multifeed_metadata_list.split(','):
1957                         # Unquote should take place before split on comma (,) since textual
1958                         # fields may contain comma as well (see
1959                         # https://github.com/ytdl-org/youtube-dl/issues/8536)
1960                         feed_data = compat_parse_qs(compat_urllib_parse_unquote_plus(feed))
1961
1962                         def feed_entry(name):
1963                             return try_get(feed_data, lambda x: x[name][0], compat_str)
1964
1965                         feed_id = feed_entry('id')
1966                         if not feed_id:
1967                             continue
1968                         feed_title = feed_entry('title')
1969                         title = video_title
1970                         if feed_title:
1971                             title += ' (%s)' % feed_title
1972                         entries.append({
1973                             '_type': 'url_transparent',
1974                             'ie_key': 'Youtube',
1975                             'url': smuggle_url(
1976                                 '%s://www.youtube.com/watch?v=%s' % (proto, feed_data['id'][0]),
1977                                 {'force_singlefeed': True}),
1978                             'title': title,
1979                         })
1980                         feed_ids.append(feed_id)
1981                     self.to_screen(
1982                         'Downloading multifeed video (%s) - add --no-playlist to just download video %s'
1983                         % (', '.join(feed_ids), video_id))
1984                     return self.playlist_result(entries, video_id, video_title, video_description)
1985             else:
1986                 self.to_screen('Downloading just video %s because of --no-playlist' % video_id)
1987
1988         if view_count is None:
1989             view_count = extract_view_count(video_info)
1990         if view_count is None and video_details:
1991             view_count = int_or_none(video_details.get('viewCount'))
1992         if view_count is None and microformat:
1993             view_count = int_or_none(microformat.get('viewCount'))
1994
1995         if is_live is None:
1996             is_live = bool_or_none(video_details.get('isLive'))
1997
1998         has_live_chat_replay = False
1999         if not is_live:
2000             yt_initial_data = self._get_yt_initial_data(video_id, video_webpage)
2001             try:
2002                 yt_initial_data['contents']['twoColumnWatchNextResults']['conversationBar']['liveChatRenderer']['continuations'][0]['reloadContinuationData']['continuation']
2003                 has_live_chat_replay = True
2004             except (KeyError, IndexError, TypeError):
2005                 pass
2006
2007         # Check for "rental" videos
2008         if 'ypc_video_rental_bar_text' in video_info and 'author' not in video_info:
2009             raise ExtractorError('"rental" videos not supported. See https://github.com/ytdl-org/youtube-dl/issues/359 for more information.', expected=True)
2010
2011         def _extract_filesize(media_url):
2012             return int_or_none(self._search_regex(
2013                 r'\bclen[=/](\d+)', media_url, 'filesize', default=None))
2014
2015         streaming_formats = try_get(player_response, lambda x: x['streamingData']['formats'], list) or []
2016         streaming_formats.extend(try_get(player_response, lambda x: x['streamingData']['adaptiveFormats'], list) or [])
2017
2018         if 'conn' in video_info and video_info['conn'][0].startswith('rtmp'):
2019             self.report_rtmp_download()
2020             formats = [{
2021                 'format_id': '_rtmp',
2022                 'protocol': 'rtmp',
2023                 'url': video_info['conn'][0],
2024                 'player_url': player_url,
2025             }]
2026         elif not is_live and (streaming_formats or len(video_info.get('url_encoded_fmt_stream_map', [''])[0]) >= 1 or len(video_info.get('adaptive_fmts', [''])[0]) >= 1):
2027             encoded_url_map = video_info.get('url_encoded_fmt_stream_map', [''])[0] + ',' + video_info.get('adaptive_fmts', [''])[0]
2028             if 'rtmpe%3Dyes' in encoded_url_map:
2029                 raise ExtractorError('rtmpe downloads are not supported, see https://github.com/ytdl-org/youtube-dl/issues/343 for more information.', expected=True)
2030             formats = []
2031             formats_spec = {}
2032             fmt_list = video_info.get('fmt_list', [''])[0]
2033             if fmt_list:
2034                 for fmt in fmt_list.split(','):
2035                     spec = fmt.split('/')
2036                     if len(spec) > 1:
2037                         width_height = spec[1].split('x')
2038                         if len(width_height) == 2:
2039                             formats_spec[spec[0]] = {
2040                                 'resolution': spec[1],
2041                                 'width': int_or_none(width_height[0]),
2042                                 'height': int_or_none(width_height[1]),
2043                             }
2044             for fmt in streaming_formats:
2045                 itag = str_or_none(fmt.get('itag'))
2046                 if not itag:
2047                     continue
2048                 quality = fmt.get('quality')
2049                 quality_label = fmt.get('qualityLabel') or quality
2050                 formats_spec[itag] = {
2051                     'asr': int_or_none(fmt.get('audioSampleRate')),
2052                     'filesize': int_or_none(fmt.get('contentLength')),
2053                     'format_note': quality_label,
2054                     'fps': int_or_none(fmt.get('fps')),
2055                     'height': int_or_none(fmt.get('height')),
2056                     # bitrate for itag 43 is always 2147483647
2057                     'tbr': float_or_none(fmt.get('averageBitrate') or fmt.get('bitrate'), 1000) if itag != '43' else None,
2058                     'width': int_or_none(fmt.get('width')),
2059                 }
2060
2061             for fmt in streaming_formats:
2062                 if fmt.get('drmFamilies') or fmt.get('drm_families'):
2063                     continue
2064                 url = url_or_none(fmt.get('url'))
2065
2066                 if not url:
2067                     cipher = fmt.get('cipher') or fmt.get('signatureCipher')
2068                     if not cipher:
2069                         continue
2070                     url_data = compat_parse_qs(cipher)
2071                     url = url_or_none(try_get(url_data, lambda x: x['url'][0], compat_str))
2072                     if not url:
2073                         continue
2074                 else:
2075                     cipher = None
2076                     url_data = compat_parse_qs(compat_urllib_parse_urlparse(url).query)
2077
2078                 stream_type = int_or_none(try_get(url_data, lambda x: x['stream_type'][0]))
2079                 # Unsupported FORMAT_STREAM_TYPE_OTF
2080                 if stream_type == 3:
2081                     continue
2082
2083                 format_id = fmt.get('itag') or url_data['itag'][0]
2084                 if not format_id:
2085                     continue
2086                 format_id = compat_str(format_id)
2087
2088                 if cipher:
2089                     if 's' in url_data or self._downloader.params.get('youtube_include_dash_manifest', True):
2090                         ASSETS_RE = r'"assets":.+?"js":\s*("[^"]+")'
2091                         jsplayer_url_json = self._search_regex(
2092                             ASSETS_RE,
2093                             embed_webpage if age_gate else video_webpage,
2094                             'JS player URL (1)', default=None)
2095                         if not jsplayer_url_json and not age_gate:
2096                             # We need the embed website after all
2097                             if embed_webpage is None:
2098                                 embed_url = proto + '://www.youtube.com/embed/%s' % video_id
2099                                 embed_webpage = self._download_webpage(
2100                                     embed_url, video_id, 'Downloading embed webpage')
2101                             jsplayer_url_json = self._search_regex(
2102                                 ASSETS_RE, embed_webpage, 'JS player URL')
2103
2104                         player_url = json.loads(jsplayer_url_json)
2105                         if player_url is None:
2106                             player_url_json = self._search_regex(
2107                                 r'ytplayer\.config.*?"url"\s*:\s*("[^"]+")',
2108                                 video_webpage, 'age gate player URL')
2109                             player_url = json.loads(player_url_json)
2110
2111                     if 'sig' in url_data:
2112                         url += '&signature=' + url_data['sig'][0]
2113                     elif 's' in url_data:
2114                         encrypted_sig = url_data['s'][0]
2115
2116                         if self._downloader.params.get('verbose'):
2117                             if player_url is None:
2118                                 player_desc = 'unknown'
2119                             else:
2120                                 player_type, player_version = self._extract_player_info(player_url)
2121                                 player_desc = '%s player %s' % ('flash' if player_type == 'swf' else 'html5', player_version)
2122                             parts_sizes = self._signature_cache_id(encrypted_sig)
2123                             self.to_screen('{%s} signature length %s, %s' %
2124                                            (format_id, parts_sizes, player_desc))
2125
2126                         signature = self._decrypt_signature(
2127                             encrypted_sig, video_id, player_url, age_gate)
2128                         sp = try_get(url_data, lambda x: x['sp'][0], compat_str) or 'signature'
2129                         url += '&%s=%s' % (sp, signature)
2130                 if 'ratebypass' not in url:
2131                     url += '&ratebypass=yes'
2132
2133                 dct = {
2134                     'format_id': format_id,
2135                     'url': url,
2136                     'player_url': player_url,
2137                 }
2138                 if format_id in self._formats:
2139                     dct.update(self._formats[format_id])
2140                 if format_id in formats_spec:
2141                     dct.update(formats_spec[format_id])
2142
2143                 # Some itags are not included in DASH manifest thus corresponding formats will
2144                 # lack metadata (see https://github.com/ytdl-org/youtube-dl/pull/5993).
2145                 # Trying to extract metadata from url_encoded_fmt_stream_map entry.
2146                 mobj = re.search(r'^(?P<width>\d+)[xX](?P<height>\d+)$', url_data.get('size', [''])[0])
2147                 width, height = (int(mobj.group('width')), int(mobj.group('height'))) if mobj else (None, None)
2148
2149                 if width is None:
2150                     width = int_or_none(fmt.get('width'))
2151                 if height is None:
2152                     height = int_or_none(fmt.get('height'))
2153
2154                 filesize = int_or_none(url_data.get(
2155                     'clen', [None])[0]) or _extract_filesize(url)
2156
2157                 quality = url_data.get('quality', [None])[0] or fmt.get('quality')
2158                 quality_label = url_data.get('quality_label', [None])[0] or fmt.get('qualityLabel')
2159
2160                 tbr = (float_or_none(url_data.get('bitrate', [None])[0], 1000)
2161                        or float_or_none(fmt.get('bitrate'), 1000)) if format_id != '43' else None
2162                 fps = int_or_none(url_data.get('fps', [None])[0]) or int_or_none(fmt.get('fps'))
2163
2164                 more_fields = {
2165                     'filesize': filesize,
2166                     'tbr': tbr,
2167                     'width': width,
2168                     'height': height,
2169                     'fps': fps,
2170                     'format_note': quality_label or quality,
2171                 }
2172                 for key, value in more_fields.items():
2173                     if value:
2174                         dct[key] = value
2175                 type_ = url_data.get('type', [None])[0] or fmt.get('mimeType')
2176                 if type_:
2177                     type_split = type_.split(';')
2178                     kind_ext = type_split[0].split('/')
2179                     if len(kind_ext) == 2:
2180                         kind, _ = kind_ext
2181                         dct['ext'] = mimetype2ext(type_split[0])
2182                         if kind in ('audio', 'video'):
2183                             codecs = None
2184                             for mobj in re.finditer(
2185                                     r'(?P<key>[a-zA-Z_-]+)=(?P<quote>["\']?)(?P<val>.+?)(?P=quote)(?:;|$)', type_):
2186                                 if mobj.group('key') == 'codecs':
2187                                     codecs = mobj.group('val')
2188                                     break
2189                             if codecs:
2190                                 dct.update(parse_codecs(codecs))
2191                 if dct.get('acodec') == 'none' or dct.get('vcodec') == 'none':
2192                     dct['downloader_options'] = {
2193                         # Youtube throttles chunks >~10M
2194                         'http_chunk_size': 10485760,
2195                     }
2196                 formats.append(dct)
2197         else:
2198             manifest_url = (
2199                 url_or_none(try_get(
2200                     player_response,
2201                     lambda x: x['streamingData']['hlsManifestUrl'],
2202                     compat_str))
2203                 or url_or_none(try_get(
2204                     video_info, lambda x: x['hlsvp'][0], compat_str)))
2205             if manifest_url:
2206                 formats = []
2207                 m3u8_formats = self._extract_m3u8_formats(
2208                     manifest_url, video_id, 'mp4', fatal=False)
2209                 for a_format in m3u8_formats:
2210                     itag = self._search_regex(
2211                         r'/itag/(\d+)/', a_format['url'], 'itag', default=None)
2212                     if itag:
2213                         a_format['format_id'] = itag
2214                         if itag in self._formats:
2215                             dct = self._formats[itag].copy()
2216                             dct.update(a_format)
2217                             a_format = dct
2218                     a_format['player_url'] = player_url
2219                     # Accept-Encoding header causes failures in live streams on Youtube and Youtube Gaming
2220                     a_format.setdefault('http_headers', {})['Youtubedl-no-compression'] = 'True'
2221                     formats.append(a_format)
2222             else:
2223                 error_message = extract_unavailable_message()
2224                 if not error_message:
2225                     error_message = clean_html(try_get(
2226                         player_response, lambda x: x['playabilityStatus']['reason'],
2227                         compat_str))
2228                 if not error_message:
2229                     error_message = clean_html(
2230                         try_get(video_info, lambda x: x['reason'][0], compat_str))
2231                 if error_message:
2232                     raise ExtractorError(error_message, expected=True)
2233                 raise ExtractorError('no conn, hlsvp, hlsManifestUrl or url_encoded_fmt_stream_map information found in video info')
2234
2235         # uploader
2236         video_uploader = try_get(
2237             video_info, lambda x: x['author'][0],
2238             compat_str) or str_or_none(video_details.get('author'))
2239         if video_uploader:
2240             video_uploader = compat_urllib_parse_unquote_plus(video_uploader)
2241         else:
2242             self._downloader.report_warning('unable to extract uploader name')
2243
2244         # uploader_id
2245         video_uploader_id = None
2246         video_uploader_url = None
2247         mobj = re.search(
2248             r'<link itemprop="url" href="(?P<uploader_url>https?://www\.youtube\.com/(?:user|channel)/(?P<uploader_id>[^"]+))">',
2249             video_webpage)
2250         if mobj is not None:
2251             video_uploader_id = mobj.group('uploader_id')
2252             video_uploader_url = mobj.group('uploader_url')
2253         else:
2254             owner_profile_url = url_or_none(microformat.get('ownerProfileUrl'))
2255             if owner_profile_url:
2256                 video_uploader_id = self._search_regex(
2257                     r'(?:user|channel)/([^/]+)', owner_profile_url, 'uploader id',
2258                     default=None)
2259                 video_uploader_url = owner_profile_url
2260
2261         channel_id = (
2262             str_or_none(video_details.get('channelId'))
2263             or self._html_search_meta(
2264                 'channelId', video_webpage, 'channel id', default=None)
2265             or self._search_regex(
2266                 r'data-channel-external-id=(["\'])(?P<id>(?:(?!\1).)+)\1',
2267                 video_webpage, 'channel id', default=None, group='id'))
2268         channel_url = 'http://www.youtube.com/channel/%s' % channel_id if channel_id else None
2269
2270         thumbnails = []
2271         thumbnails_list = try_get(
2272             video_details, lambda x: x['thumbnail']['thumbnails'], list) or []
2273         for t in thumbnails_list:
2274             if not isinstance(t, dict):
2275                 continue
2276             thumbnail_url = url_or_none(t.get('url'))
2277             if not thumbnail_url:
2278                 continue
2279             thumbnails.append({
2280                 'url': thumbnail_url,
2281                 'width': int_or_none(t.get('width')),
2282                 'height': int_or_none(t.get('height')),
2283             })
2284
2285         if not thumbnails:
2286             video_thumbnail = None
2287             # We try first to get a high quality image:
2288             m_thumb = re.search(r'<span itemprop="thumbnail".*?href="(.*?)">',
2289                                 video_webpage, re.DOTALL)
2290             if m_thumb is not None:
2291                 video_thumbnail = m_thumb.group(1)
2292             thumbnail_url = try_get(video_info, lambda x: x['thumbnail_url'][0], compat_str)
2293             if thumbnail_url:
2294                 video_thumbnail = compat_urllib_parse_unquote_plus(thumbnail_url)
2295             if video_thumbnail:
2296                 thumbnails.append({'url': video_thumbnail})
2297
2298         # upload date
2299         upload_date = self._html_search_meta(
2300             'datePublished', video_webpage, 'upload date', default=None)
2301         if not upload_date:
2302             upload_date = self._search_regex(
2303                 [r'(?s)id="eow-date.*?>(.*?)</span>',
2304                  r'(?:id="watch-uploader-info".*?>.*?|["\']simpleText["\']\s*:\s*["\'])(?:Published|Uploaded|Streamed live|Started) on (.+?)[<"\']'],
2305                 video_webpage, 'upload date', default=None)
2306         if not upload_date:
2307             upload_date = microformat.get('publishDate') or microformat.get('uploadDate')
2308         upload_date = unified_strdate(upload_date)
2309
2310         video_license = self._html_search_regex(
2311             r'<h4[^>]+class="title"[^>]*>\s*License\s*</h4>\s*<ul[^>]*>\s*<li>(.+?)</li',
2312             video_webpage, 'license', default=None)
2313
2314         m_music = re.search(
2315             r'''(?x)
2316                 <h4[^>]+class="title"[^>]*>\s*Music\s*</h4>\s*
2317                 <ul[^>]*>\s*
2318                 <li>(?P<title>.+?)
2319                 by (?P<creator>.+?)
2320                 (?:
2321                     \(.+?\)|
2322                     <a[^>]*
2323                         (?:
2324                             \bhref=["\']/red[^>]*>|             # drop possible
2325                             >\s*Listen ad-free with YouTube Red # YouTube Red ad
2326                         )
2327                     .*?
2328                 )?</li
2329             ''',
2330             video_webpage)
2331         if m_music:
2332             video_alt_title = remove_quotes(unescapeHTML(m_music.group('title')))
2333             video_creator = clean_html(m_music.group('creator'))
2334         else:
2335             video_alt_title = video_creator = None
2336
2337         def extract_meta(field):
2338             return self._html_search_regex(
2339                 r'<h4[^>]+class="title"[^>]*>\s*%s\s*</h4>\s*<ul[^>]*>\s*<li>(.+?)</li>\s*' % field,
2340                 video_webpage, field, default=None)
2341
2342         track = extract_meta('Song')
2343         artist = extract_meta('Artist')
2344         album = extract_meta('Album')
2345
2346         # Youtube Music Auto-generated description
2347         release_date = release_year = None
2348         if video_description:
2349             mobj = re.search(r'(?s)Provided to YouTube by [^\n]+\n+(?P<track>[^·]+)·(?P<artist>[^\n]+)\n+(?P<album>[^\n]+)(?:.+?℗\s*(?P<release_year>\d{4})(?!\d))?(?:.+?Released on\s*:\s*(?P<release_date>\d{4}-\d{2}-\d{2}))?(.+?\nArtist\s*:\s*(?P<clean_artist>[^\n]+))?', video_description)
2350             if mobj:
2351                 if not track:
2352                     track = mobj.group('track').strip()
2353                 if not artist:
2354                     artist = mobj.group('clean_artist') or ', '.join(a.strip() for a in mobj.group('artist').split('·'))
2355                 if not album:
2356                     album = mobj.group('album'.strip())
2357                 release_year = mobj.group('release_year')
2358                 release_date = mobj.group('release_date')
2359                 if release_date:
2360                     release_date = release_date.replace('-', '')
2361                     if not release_year:
2362                         release_year = int(release_date[:4])
2363                 if release_year:
2364                     release_year = int(release_year)
2365
2366         m_episode = re.search(
2367             r'<div[^>]+id="watch7-headline"[^>]*>\s*<span[^>]*>.*?>(?P<series>[^<]+)</a></b>\s*S(?P<season>\d+)\s*•\s*E(?P<episode>\d+)</span>',
2368             video_webpage)
2369         if m_episode:
2370             series = unescapeHTML(m_episode.group('series'))
2371             season_number = int(m_episode.group('season'))
2372             episode_number = int(m_episode.group('episode'))
2373         else:
2374             series = season_number = episode_number = None
2375
2376         m_cat_container = self._search_regex(
2377             r'(?s)<h4[^>]*>\s*Category\s*</h4>\s*<ul[^>]*>(.*?)</ul>',
2378             video_webpage, 'categories', default=None)
2379         category = None
2380         if m_cat_container:
2381             category = self._html_search_regex(
2382                 r'(?s)<a[^<]+>(.*?)</a>', m_cat_container, 'category',
2383                 default=None)
2384         if not category:
2385             category = try_get(
2386                 microformat, lambda x: x['category'], compat_str)
2387         video_categories = None if category is None else [category]
2388
2389         video_tags = [
2390             unescapeHTML(m.group('content'))
2391             for m in re.finditer(self._meta_regex('og:video:tag'), video_webpage)]
2392         if not video_tags:
2393             video_tags = try_get(video_details, lambda x: x['keywords'], list)
2394
2395         def _extract_count(count_name):
2396             return str_to_int(self._search_regex(
2397                 r'-%s-button[^>]+><span[^>]+class="yt-uix-button-content"[^>]*>([\d,]+)</span>'
2398                 % re.escape(count_name),
2399                 video_webpage, count_name, default=None))
2400
2401         like_count = _extract_count('like')
2402         dislike_count = _extract_count('dislike')
2403
2404         if view_count is None:
2405             view_count = str_to_int(self._search_regex(
2406                 r'<[^>]+class=["\']watch-view-count[^>]+>\s*([\d,\s]+)', video_webpage,
2407                 'view count', default=None))
2408
2409         average_rating = (
2410             float_or_none(video_details.get('averageRating'))
2411             or try_get(video_info, lambda x: float_or_none(x['avg_rating'][0])))
2412
2413         # subtitles
2414         video_subtitles = self.extract_subtitles(
2415             video_id, video_webpage, has_live_chat_replay)
2416         automatic_captions = self.extract_automatic_captions(video_id, video_webpage)
2417
2418         video_duration = try_get(
2419             video_info, lambda x: int_or_none(x['length_seconds'][0]))
2420         if not video_duration:
2421             video_duration = int_or_none(video_details.get('lengthSeconds'))
2422         if not video_duration:
2423             video_duration = parse_duration(self._html_search_meta(
2424                 'duration', video_webpage, 'video duration'))
2425
2426         # annotations
2427         video_annotations = None
2428         if self._downloader.params.get('writeannotations', False):
2429             xsrf_token = self._search_regex(
2430                 r'([\'"])XSRF_TOKEN\1\s*:\s*([\'"])(?P<xsrf_token>[A-Za-z0-9+/=]+)\2',
2431                 video_webpage, 'xsrf token', group='xsrf_token', fatal=False)
2432             invideo_url = try_get(
2433                 player_response, lambda x: x['annotations'][0]['playerAnnotationsUrlsRenderer']['invideoUrl'], compat_str)
2434             if xsrf_token and invideo_url:
2435                 xsrf_field_name = self._search_regex(
2436                     r'([\'"])XSRF_FIELD_NAME\1\s*:\s*([\'"])(?P<xsrf_field_name>\w+)\2',
2437                     video_webpage, 'xsrf field name',
2438                     group='xsrf_field_name', default='session_token')
2439                 video_annotations = self._download_webpage(
2440                     self._proto_relative_url(invideo_url),
2441                     video_id, note='Downloading annotations',
2442                     errnote='Unable to download video annotations', fatal=False,
2443                     data=urlencode_postdata({xsrf_field_name: xsrf_token}))
2444
2445         chapters = self._extract_chapters(video_webpage, description_original, video_id, video_duration)
2446
2447         # Look for the DASH manifest
2448         if self._downloader.params.get('youtube_include_dash_manifest', True):
2449             dash_mpd_fatal = True
2450             for mpd_url in dash_mpds:
2451                 dash_formats = {}
2452                 try:
2453                     def decrypt_sig(mobj):
2454                         s = mobj.group(1)
2455                         dec_s = self._decrypt_signature(s, video_id, player_url, age_gate)
2456                         return '/signature/%s' % dec_s
2457
2458                     mpd_url = re.sub(r'/s/([a-fA-F0-9\.]+)', decrypt_sig, mpd_url)
2459
2460                     for df in self._extract_mpd_formats(
2461                             mpd_url, video_id, fatal=dash_mpd_fatal,
2462                             formats_dict=self._formats):
2463                         if not df.get('filesize'):
2464                             df['filesize'] = _extract_filesize(df['url'])
2465                         # Do not overwrite DASH format found in some previous DASH manifest
2466                         if df['format_id'] not in dash_formats:
2467                             dash_formats[df['format_id']] = df
2468                         # Additional DASH manifests may end up in HTTP Error 403 therefore
2469                         # allow them to fail without bug report message if we already have
2470                         # some DASH manifest succeeded. This is temporary workaround to reduce
2471                         # burst of bug reports until we figure out the reason and whether it
2472                         # can be fixed at all.
2473                         dash_mpd_fatal = False
2474                 except (ExtractorError, KeyError) as e:
2475                     self.report_warning(
2476                         'Skipping DASH manifest: %r' % e, video_id)
2477                 if dash_formats:
2478                     # Remove the formats we found through non-DASH, they
2479                     # contain less info and it can be wrong, because we use
2480                     # fixed values (for example the resolution). See
2481                     # https://github.com/ytdl-org/youtube-dl/issues/5774 for an
2482                     # example.
2483                     formats = [f for f in formats if f['format_id'] not in dash_formats.keys()]
2484                     formats.extend(dash_formats.values())
2485
2486         # Check for malformed aspect ratio
2487         stretched_m = re.search(
2488             r'<meta\s+property="og:video:tag".*?content="yt:stretch=(?P<w>[0-9]+):(?P<h>[0-9]+)">',
2489             video_webpage)
2490         if stretched_m:
2491             w = float(stretched_m.group('w'))
2492             h = float(stretched_m.group('h'))
2493             # yt:stretch may hold invalid ratio data (e.g. for Q39EVAstoRM ratio is 17:0).
2494             # We will only process correct ratios.
2495             if w > 0 and h > 0:
2496                 ratio = w / h
2497                 for f in formats:
2498                     if f.get('vcodec') != 'none':
2499                         f['stretched_ratio'] = ratio
2500
2501         if not formats:
2502             if 'reason' in video_info:
2503                 if 'The uploader has not made this video available in your country.' in video_info['reason']:
2504                     regions_allowed = self._html_search_meta(
2505                         'regionsAllowed', video_webpage, default=None)
2506                     countries = regions_allowed.split(',') if regions_allowed else None
2507                     self.raise_geo_restricted(
2508                         msg=video_info['reason'][0], countries=countries)
2509                 reason = video_info['reason'][0]
2510                 if 'Invalid parameters' in reason:
2511                     unavailable_message = extract_unavailable_message()
2512                     if unavailable_message:
2513                         reason = unavailable_message
2514                 raise ExtractorError(
2515                     'YouTube said: %s' % reason,
2516                     expected=True, video_id=video_id)
2517             if video_info.get('license_info') or try_get(player_response, lambda x: x['streamingData']['licenseInfos']):
2518                 raise ExtractorError('This video is DRM protected.', expected=True)
2519
2520         self._sort_formats(formats)
2521
2522         self.mark_watched(video_id, video_info, player_response)
2523
2524         return {
2525             'id': video_id,
2526             'uploader': video_uploader,
2527             'uploader_id': video_uploader_id,
2528             'uploader_url': video_uploader_url,
2529             'channel_id': channel_id,
2530             'channel_url': channel_url,
2531             'upload_date': upload_date,
2532             'license': video_license,
2533             'creator': video_creator or artist,
2534             'title': video_title,
2535             'alt_title': video_alt_title or track,
2536             'thumbnails': thumbnails,
2537             'description': video_description,
2538             'categories': video_categories,
2539             'tags': video_tags,
2540             'subtitles': video_subtitles,
2541             'automatic_captions': automatic_captions,
2542             'duration': video_duration,
2543             'age_limit': 18 if age_gate else 0,
2544             'annotations': video_annotations,
2545             'chapters': chapters,
2546             'webpage_url': proto + '://www.youtube.com/watch?v=%s' % video_id,
2547             'view_count': view_count,
2548             'like_count': like_count,
2549             'dislike_count': dislike_count,
2550             'average_rating': average_rating,
2551             'formats': formats,
2552             'is_live': is_live,
2553             'start_time': start_time,
2554             'end_time': end_time,
2555             'series': series,
2556             'season_number': season_number,
2557             'episode_number': episode_number,
2558             'track': track,
2559             'artist': artist,
2560             'album': album,
2561             'release_date': release_date,
2562             'release_year': release_year,
2563         }
2564
2565
2566 class YoutubePlaylistIE(YoutubePlaylistBaseInfoExtractor):
2567     IE_DESC = 'YouTube.com playlists'
2568     _VALID_URL = r"""(?x)(?:
2569                         (?:https?://)?
2570                         (?:\w+\.)?
2571                         (?:
2572                             (?:
2573                                 youtube(?:kids)?\.com|
2574                                 invidio\.us
2575                             )
2576                             /
2577                             (?:
2578                                (?:course|view_play_list|my_playlists|artist|playlist|watch|embed/(?:videoseries|[0-9A-Za-z_-]{11}))
2579                                \? (?:.*?[&;])*? (?:p|a|list)=
2580                             |  p/
2581                             )|
2582                             youtu\.be/[0-9A-Za-z_-]{11}\?.*?\blist=
2583                         )
2584                         (
2585                             (?:PL|LL|EC|UU|FL|RD|UL|TL|PU|OLAK5uy_)?[0-9A-Za-z-_]{10,}
2586                             # Top tracks, they can also include dots
2587                             |(?:MC)[\w\.]*
2588                         )
2589                         .*
2590                      |
2591                         (%(playlist_id)s)
2592                      )""" % {'playlist_id': YoutubeBaseInfoExtractor._PLAYLIST_ID_RE}
2593     _TEMPLATE_URL = 'https://www.youtube.com/playlist?list=%s'
2594     _VIDEO_RE_TPL = r'href="\s*/watch\?v=%s(?:&amp;(?:[^"]*?index=(?P<index>\d+))?(?:[^>]+>(?P<title>[^<]+))?)?'
2595     _VIDEO_RE = _VIDEO_RE_TPL % r'(?P<id>[0-9A-Za-z_-]{11})'
2596     IE_NAME = 'youtube:playlist'
2597     _TESTS = [{
2598         'url': 'https://www.youtube.com/playlist?list=PL4lCao7KL_QFVb7Iudeipvc2BCavECqzc',
2599         'info_dict': {
2600             'uploader_id': 'UCmlqkdCBesrv2Lak1mF_MxA',
2601             'uploader': 'Sergey M.',
2602             'id': 'PL4lCao7KL_QFVb7Iudeipvc2BCavECqzc',
2603             'title': 'youtube-dl public playlist',
2604         },
2605         'playlist_count': 1,
2606     }, {
2607         'url': 'https://www.youtube.com/playlist?list=PL4lCao7KL_QFodcLWhDpGCYnngnHtQ-Xf',
2608         'info_dict': {
2609             'uploader_id': 'UCmlqkdCBesrv2Lak1mF_MxA',
2610             'uploader': 'Sergey M.',
2611             'id': 'PL4lCao7KL_QFodcLWhDpGCYnngnHtQ-Xf',
2612             'title': 'youtube-dl empty playlist',
2613         },
2614         'playlist_count': 0,
2615     }, {
2616         'note': 'Playlist with deleted videos (#651). As a bonus, the video #51 is also twice in this list.',
2617         'url': 'https://www.youtube.com/playlist?list=PLwP_SiAcdui0KVebT0mU9Apz359a4ubsC',
2618         'info_dict': {
2619             'title': '29C3: Not my department',
2620             'id': 'PLwP_SiAcdui0KVebT0mU9Apz359a4ubsC',
2621             'uploader': 'Christiaan008',
2622             'uploader_id': 'ChRiStIaAn008',
2623         },
2624         'playlist_count': 96,
2625     }, {
2626         'note': 'issue #673',
2627         'url': 'PLBB231211A4F62143',
2628         'info_dict': {
2629             'title': '[OLD]Team Fortress 2 (Class-based LP)',
2630             'id': 'PLBB231211A4F62143',
2631             'uploader': 'Wickydoo',
2632             'uploader_id': 'Wickydoo',
2633         },
2634         'playlist_mincount': 26,
2635     }, {
2636         'note': 'Large playlist',
2637         'url': 'https://www.youtube.com/playlist?list=UUBABnxM4Ar9ten8Mdjj1j0Q',
2638         'info_dict': {
2639             'title': 'Uploads from Cauchemar',
2640             'id': 'UUBABnxM4Ar9ten8Mdjj1j0Q',
2641             'uploader': 'Cauchemar',
2642             'uploader_id': 'Cauchemar89',
2643         },
2644         'playlist_mincount': 799,
2645     }, {
2646         'url': 'PLtPgu7CB4gbY9oDN3drwC3cMbJggS7dKl',
2647         'info_dict': {
2648             'title': 'YDL_safe_search',
2649             'id': 'PLtPgu7CB4gbY9oDN3drwC3cMbJggS7dKl',
2650         },
2651         'playlist_count': 2,
2652         'skip': 'This playlist is private',
2653     }, {
2654         'note': 'embedded',
2655         'url': 'https://www.youtube.com/embed/videoseries?list=PL6IaIsEjSbf96XFRuNccS_RuEXwNdsoEu',
2656         'playlist_count': 4,
2657         'info_dict': {
2658             'title': 'JODA15',
2659             'id': 'PL6IaIsEjSbf96XFRuNccS_RuEXwNdsoEu',
2660             'uploader': 'milan',
2661             'uploader_id': 'UCEI1-PVPcYXjB73Hfelbmaw',
2662         }
2663     }, {
2664         'url': 'http://www.youtube.com/embed/_xDOZElKyNU?list=PLsyOSbh5bs16vubvKePAQ1x3PhKavfBIl',
2665         'playlist_mincount': 485,
2666         'info_dict': {
2667             'title': '2018 Chinese New Singles (11/6 updated)',
2668             'id': 'PLsyOSbh5bs16vubvKePAQ1x3PhKavfBIl',
2669             'uploader': 'LBK',
2670             'uploader_id': 'sdragonfang',
2671         }
2672     }, {
2673         'note': 'Embedded SWF player',
2674         'url': 'https://www.youtube.com/p/YN5VISEtHet5D4NEvfTd0zcgFk84NqFZ?hl=en_US&fs=1&rel=0',
2675         'playlist_count': 4,
2676         'info_dict': {
2677             'title': 'JODA7',
2678             'id': 'YN5VISEtHet5D4NEvfTd0zcgFk84NqFZ',
2679         },
2680         'skip': 'This playlist does not exist',
2681     }, {
2682         'note': 'Buggy playlist: the webpage has a "Load more" button but it doesn\'t have more videos',
2683         'url': 'https://www.youtube.com/playlist?list=UUXw-G3eDE9trcvY2sBMM_aA',
2684         'info_dict': {
2685             'title': 'Uploads from Interstellar Movie',
2686             'id': 'UUXw-G3eDE9trcvY2sBMM_aA',
2687             'uploader': 'Interstellar Movie',
2688             'uploader_id': 'InterstellarMovie1',
2689         },
2690         'playlist_mincount': 21,
2691     }, {
2692         # Playlist URL that does not actually serve a playlist
2693         'url': 'https://www.youtube.com/watch?v=FqZTN594JQw&list=PLMYEtVRpaqY00V9W81Cwmzp6N6vZqfUKD4',
2694         'info_dict': {
2695             'id': 'FqZTN594JQw',
2696             'ext': 'webm',
2697             'title': "Smiley's People 01 detective, Adventure Series, Action",
2698             'uploader': 'STREEM',
2699             'uploader_id': 'UCyPhqAZgwYWZfxElWVbVJng',
2700             'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UCyPhqAZgwYWZfxElWVbVJng',
2701             'upload_date': '20150526',
2702             'license': 'Standard YouTube License',
2703             'description': 'md5:507cdcb5a49ac0da37a920ece610be80',
2704             'categories': ['People & Blogs'],
2705             'tags': list,
2706             'view_count': int,
2707             'like_count': int,
2708             'dislike_count': int,
2709         },
2710         'params': {
2711             'skip_download': True,
2712         },
2713         'skip': 'This video is not available.',
2714         'add_ie': [YoutubeIE.ie_key()],
2715     }, {
2716         'url': 'https://youtu.be/yeWKywCrFtk?list=PL2qgrgXsNUG5ig9cat4ohreBjYLAPC0J5',
2717         'info_dict': {
2718             'id': 'yeWKywCrFtk',
2719             'ext': 'mp4',
2720             'title': 'Small Scale Baler and Braiding Rugs',
2721             'uploader': 'Backus-Page House Museum',
2722             'uploader_id': 'backuspagemuseum',
2723             'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/backuspagemuseum',
2724             'upload_date': '20161008',
2725             'description': 'md5:800c0c78d5eb128500bffd4f0b4f2e8a',
2726             'categories': ['Nonprofits & Activism'],
2727             'tags': list,
2728             'like_count': int,
2729             'dislike_count': int,
2730         },
2731         'params': {
2732             'noplaylist': True,
2733             'skip_download': True,
2734         },
2735     }, {
2736         # https://github.com/ytdl-org/youtube-dl/issues/21844
2737         'url': 'https://www.youtube.com/playlist?list=PLzH6n4zXuckpfMu_4Ff8E7Z1behQks5ba',
2738         'info_dict': {
2739             'title': 'Data Analysis with Dr Mike Pound',
2740             'id': 'PLzH6n4zXuckpfMu_4Ff8E7Z1behQks5ba',
2741             'uploader_id': 'Computerphile',
2742             'uploader': 'Computerphile',
2743         },
2744         'playlist_mincount': 11,
2745     }, {
2746         'url': 'https://youtu.be/uWyaPkt-VOI?list=PL9D9FC436B881BA21',
2747         'only_matching': True,
2748     }, {
2749         'url': 'TLGGrESM50VT6acwMjAyMjAxNw',
2750         'only_matching': True,
2751     }, {
2752         # music album playlist
2753         'url': 'OLAK5uy_m4xAFdmMC5rX3Ji3g93pQe3hqLZw_9LhM',
2754         'only_matching': True,
2755     }, {
2756         'url': 'https://invidio.us/playlist?list=PLDIoUOhQQPlXr63I_vwF9GD8sAKh77dWU',
2757         'only_matching': True,
2758     }, {
2759         'url': 'https://www.youtubekids.com/watch?v=Agk7R8I8o5U&list=PUZ6jURNr1WQZCNHF0ao-c0g',
2760         'only_matching': True,
2761     }]
2762
2763     def _real_initialize(self):
2764         self._login()
2765
2766     def extract_videos_from_page(self, page):
2767         ids_in_page = []
2768         titles_in_page = []
2769
2770         for item in re.findall(
2771                 r'(<[^>]*\bdata-video-id\s*=\s*["\'][0-9A-Za-z_-]{11}[^>]+>)', page):
2772             attrs = extract_attributes(item)
2773             video_id = attrs['data-video-id']
2774             video_title = unescapeHTML(attrs.get('data-title'))
2775             if video_title:
2776                 video_title = video_title.strip()
2777             ids_in_page.append(video_id)
2778             titles_in_page.append(video_title)
2779
2780         # Fallback with old _VIDEO_RE
2781         self.extract_videos_from_page_impl(
2782             self._VIDEO_RE, page, ids_in_page, titles_in_page)
2783
2784         # Relaxed fallbacks
2785         self.extract_videos_from_page_impl(
2786             r'href="\s*/watch\?v\s*=\s*(?P<id>[0-9A-Za-z_-]{11})', page,
2787             ids_in_page, titles_in_page)
2788         self.extract_videos_from_page_impl(
2789             r'data-video-ids\s*=\s*["\'](?P<id>[0-9A-Za-z_-]{11})', page,
2790             ids_in_page, titles_in_page)
2791
2792         return zip(ids_in_page, titles_in_page)
2793
2794     def _extract_mix(self, playlist_id):
2795         # The mixes are generated from a single video
2796         # the id of the playlist is just 'RD' + video_id
2797         ids = []
2798         last_id = playlist_id[-11:]
2799         for n in itertools.count(1):
2800             url = 'https://www.youtube.com/watch?v=%s&list=%s' % (last_id, playlist_id)
2801             webpage = self._download_webpage(
2802                 url, playlist_id, 'Downloading page {0} of Youtube mix'.format(n))
2803             new_ids = orderedSet(re.findall(
2804                 r'''(?xs)data-video-username=".*?".*?
2805                            href="/watch\?v=([0-9A-Za-z_-]{11})&amp;[^"]*?list=%s''' % re.escape(playlist_id),
2806                 webpage))
2807             # Fetch new pages until all the videos are repeated, it seems that
2808             # there are always 51 unique videos.
2809             new_ids = [_id for _id in new_ids if _id not in ids]
2810             if not new_ids:
2811                 break
2812             ids.extend(new_ids)
2813             last_id = ids[-1]
2814
2815         url_results = self._ids_to_results(ids)
2816
2817         search_title = lambda class_name: get_element_by_attribute('class', class_name, webpage)
2818         title_span = (
2819             search_title('playlist-title')
2820             or search_title('title long-title')
2821             or search_title('title'))
2822         title = clean_html(title_span)
2823
2824         return self.playlist_result(url_results, playlist_id, title)
2825
2826     def _extract_playlist(self, playlist_id):
2827         url = self._TEMPLATE_URL % playlist_id
2828         page = self._download_webpage(url, playlist_id)
2829
2830         # the yt-alert-message now has tabindex attribute (see https://github.com/ytdl-org/youtube-dl/issues/11604)
2831         for match in re.findall(r'<div class="yt-alert-message"[^>]*>([^<]+)</div>', page):
2832             match = match.strip()
2833             # Check if the playlist exists or is private
2834             mobj = re.match(r'[^<]*(?:The|This) playlist (?P<reason>does not exist|is private)[^<]*', match)
2835             if mobj:
2836                 reason = mobj.group('reason')
2837                 message = 'This playlist %s' % reason
2838                 if 'private' in reason:
2839                     message += ', use --username or --netrc to access it'
2840                 message += '.'
2841                 raise ExtractorError(message, expected=True)
2842             elif re.match(r'[^<]*Invalid parameters[^<]*', match):
2843                 raise ExtractorError(
2844                     'Invalid parameters. Maybe URL is incorrect.',
2845                     expected=True)
2846             elif re.match(r'[^<]*Choose your language[^<]*', match):
2847                 continue
2848             else:
2849                 self.report_warning('Youtube gives an alert message: ' + match)
2850
2851         playlist_title = self._html_search_regex(
2852             r'(?s)<h1 class="pl-header-title[^"]*"[^>]*>\s*(.*?)\s*</h1>',
2853             page, 'title', default=None)
2854
2855         _UPLOADER_BASE = r'class=["\']pl-header-details[^>]+>\s*<li>\s*<a[^>]+\bhref='
2856         uploader = self._html_search_regex(
2857             r'%s["\']/(?:user|channel)/[^>]+>([^<]+)' % _UPLOADER_BASE,
2858             page, 'uploader', default=None)
2859         mobj = re.search(
2860             r'%s(["\'])(?P<path>/(?:user|channel)/(?P<uploader_id>.+?))\1' % _UPLOADER_BASE,
2861             page)
2862         if mobj:
2863             uploader_id = mobj.group('uploader_id')
2864             uploader_url = compat_urlparse.urljoin(url, mobj.group('path'))
2865         else:
2866             uploader_id = uploader_url = None
2867
2868         has_videos = True
2869
2870         if not playlist_title:
2871             try:
2872                 # Some playlist URLs don't actually serve a playlist (e.g.
2873                 # https://www.youtube.com/watch?v=FqZTN594JQw&list=PLMYEtVRpaqY00V9W81Cwmzp6N6vZqfUKD4)
2874                 next(self._entries(page, playlist_id))
2875             except StopIteration:
2876                 has_videos = False
2877
2878         playlist = self.playlist_result(
2879             self._entries(page, playlist_id), playlist_id, playlist_title)
2880         playlist.update({
2881             'uploader': uploader,
2882             'uploader_id': uploader_id,
2883             'uploader_url': uploader_url,
2884         })
2885
2886         return has_videos, playlist
2887
2888     def _check_download_just_video(self, url, playlist_id):
2889         # Check if it's a video-specific URL
2890         query_dict = compat_urlparse.parse_qs(compat_urlparse.urlparse(url).query)
2891         video_id = query_dict.get('v', [None])[0] or self._search_regex(
2892             r'(?:(?:^|//)youtu\.be/|youtube\.com/embed/(?!videoseries))([0-9A-Za-z_-]{11})', url,
2893             'video id', default=None)
2894         if video_id:
2895             if self._downloader.params.get('noplaylist'):
2896                 self.to_screen('Downloading just video %s because of --no-playlist' % video_id)
2897                 return video_id, self.url_result(video_id, 'Youtube', video_id=video_id)
2898             else:
2899                 self.to_screen('Downloading playlist %s - add --no-playlist to just download video %s' % (playlist_id, video_id))
2900                 return video_id, None
2901         return None, None
2902
2903     def _real_extract(self, url):
2904         # Extract playlist id
2905         mobj = re.match(self._VALID_URL, url)
2906         if mobj is None:
2907             raise ExtractorError('Invalid URL: %s' % url)
2908         playlist_id = mobj.group(1) or mobj.group(2)
2909
2910         video_id, video = self._check_download_just_video(url, playlist_id)
2911         if video:
2912             return video
2913
2914         if playlist_id.startswith(('RD', 'UL', 'PU')):
2915             # Mixes require a custom extraction process
2916             return self._extract_mix(playlist_id)
2917
2918         has_videos, playlist = self._extract_playlist(playlist_id)
2919         if has_videos or not video_id:
2920             return playlist
2921
2922         # Some playlist URLs don't actually serve a playlist (see
2923         # https://github.com/ytdl-org/youtube-dl/issues/10537).
2924         # Fallback to plain video extraction if there is a video id
2925         # along with playlist id.
2926         return self.url_result(video_id, 'Youtube', video_id=video_id)
2927
2928
2929 class YoutubeChannelIE(YoutubePlaylistBaseInfoExtractor):
2930     IE_DESC = 'YouTube.com channels'
2931     _VALID_URL = r'https?://(?:youtu\.be|(?:\w+\.)?youtube(?:-nocookie|kids)?\.com|(?:www\.)?invidio\.us)/channel/(?P<id>[0-9A-Za-z_-]+)'
2932     _TEMPLATE_URL = 'https://www.youtube.com/channel/%s/videos'
2933     _VIDEO_RE = r'(?:title="(?P<title>[^"]+)"[^>]+)?href="/watch\?v=(?P<id>[0-9A-Za-z_-]+)&?'
2934     IE_NAME = 'youtube:channel'
2935     _TESTS = [{
2936         'note': 'paginated channel',
2937         'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w',
2938         'playlist_mincount': 91,
2939         'info_dict': {
2940             'id': 'UUKfVa3S1e4PHvxWcwyMMg8w',
2941             'title': 'Uploads from lex will',
2942             'uploader': 'lex will',
2943             'uploader_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
2944         }
2945     }, {
2946         'note': 'Age restricted channel',
2947         # from https://www.youtube.com/user/DeusExOfficial
2948         'url': 'https://www.youtube.com/channel/UCs0ifCMCm1icqRbqhUINa0w',
2949         'playlist_mincount': 64,
2950         'info_dict': {
2951             'id': 'UUs0ifCMCm1icqRbqhUINa0w',
2952             'title': 'Uploads from Deus Ex',
2953             'uploader': 'Deus Ex',
2954             'uploader_id': 'DeusExOfficial',
2955         },
2956     }, {
2957         'url': 'https://invidio.us/channel/UC23qupoDRn9YOAVzeoxjOQA',
2958         'only_matching': True,
2959     }, {
2960         'url': 'https://www.youtubekids.com/channel/UCyu8StPfZWapR6rfW_JgqcA',
2961         'only_matching': True,
2962     }]
2963
2964     @classmethod
2965     def suitable(cls, url):
2966         return (False if YoutubePlaylistsIE.suitable(url) or YoutubeLiveIE.suitable(url)
2967                 else super(YoutubeChannelIE, cls).suitable(url))
2968
2969     def _build_template_url(self, url, channel_id):
2970         return self._TEMPLATE_URL % channel_id
2971
2972     def _real_extract(self, url):
2973         channel_id = self._match_id(url)
2974
2975         url = self._build_template_url(url, channel_id)
2976
2977         # Channel by page listing is restricted to 35 pages of 30 items, i.e. 1050 videos total (see #5778)
2978         # Workaround by extracting as a playlist if managed to obtain channel playlist URL
2979         # otherwise fallback on channel by page extraction
2980         channel_page = self._download_webpage(
2981             url + '?view=57', channel_id,
2982             'Downloading channel page', fatal=False)
2983         if channel_page is False:
2984             channel_playlist_id = False
2985         else:
2986             channel_playlist_id = self._html_search_meta(
2987                 'channelId', channel_page, 'channel id', default=None)
2988             if not channel_playlist_id:
2989                 channel_url = self._html_search_meta(
2990                     ('al:ios:url', 'twitter:app:url:iphone', 'twitter:app:url:ipad'),
2991                     channel_page, 'channel url', default=None)
2992                 if channel_url:
2993                     channel_playlist_id = self._search_regex(
2994                         r'vnd\.youtube://user/([0-9A-Za-z_-]+)',
2995                         channel_url, 'channel id', default=None)
2996         if channel_playlist_id and channel_playlist_id.startswith('UC'):
2997             playlist_id = 'UU' + channel_playlist_id[2:]
2998             return self.url_result(
2999                 compat_urlparse.urljoin(url, '/playlist?list=%s' % playlist_id), 'YoutubePlaylist')
3000
3001         channel_page = self._download_webpage(url, channel_id, 'Downloading page #1')
3002         autogenerated = re.search(r'''(?x)
3003                 class="[^"]*?(?:
3004                     channel-header-autogenerated-label|
3005                     yt-channel-title-autogenerated
3006                 )[^"]*"''', channel_page) is not None
3007
3008         if autogenerated:
3009             # The videos are contained in a single page
3010             # the ajax pages can't be used, they are empty
3011             entries = [
3012                 self.url_result(
3013                     video_id, 'Youtube', video_id=video_id,
3014                     video_title=video_title)
3015                 for video_id, video_title in self.extract_videos_from_page(channel_page)]
3016             return self.playlist_result(entries, channel_id)
3017
3018         try:
3019             next(self._entries(channel_page, channel_id))
3020         except StopIteration:
3021             alert_message = self._html_search_regex(
3022                 r'(?s)<div[^>]+class=(["\']).*?\byt-alert-message\b.*?\1[^>]*>(?P<alert>[^<]+)</div>',
3023                 channel_page, 'alert', default=None, group='alert')
3024             if alert_message:
3025                 raise ExtractorError('Youtube said: %s' % alert_message, expected=True)
3026
3027         return self.playlist_result(self._entries(channel_page, channel_id), channel_id)
3028
3029
3030 class YoutubeUserIE(YoutubeChannelIE):
3031     IE_DESC = 'YouTube.com user videos (URL or "ytuser" keyword)'
3032     _VALID_URL = r'(?:(?:https?://(?:\w+\.)?youtube\.com/(?:(?P<user>user|c)/)?(?!(?:attribution_link|watch|results|shared)(?:$|[^a-z_A-Z0-9-])))|ytuser:)(?!feed/)(?P<id>[A-Za-z0-9_-]+)'
3033     _TEMPLATE_URL = 'https://www.youtube.com/%s/%s/videos'
3034     IE_NAME = 'youtube:user'
3035
3036     _TESTS = [{
3037         'url': 'https://www.youtube.com/user/TheLinuxFoundation',
3038         'playlist_mincount': 320,
3039         'info_dict': {
3040             'id': 'UUfX55Sx5hEFjoC3cNs6mCUQ',
3041             'title': 'Uploads from The Linux Foundation',
3042             'uploader': 'The Linux Foundation',
3043             'uploader_id': 'TheLinuxFoundation',
3044         }
3045     }, {
3046         # Only available via https://www.youtube.com/c/12minuteathlete/videos
3047         # but not https://www.youtube.com/user/12minuteathlete/videos
3048         'url': 'https://www.youtube.com/c/12minuteathlete/videos',
3049         'playlist_mincount': 249,
3050         'info_dict': {
3051             'id': 'UUVjM-zV6_opMDx7WYxnjZiQ',
3052             'title': 'Uploads from 12 Minute Athlete',
3053             'uploader': '12 Minute Athlete',
3054             'uploader_id': 'the12minuteathlete',
3055         }
3056     }, {
3057         'url': 'ytuser:phihag',
3058         'only_matching': True,
3059     }, {
3060         'url': 'https://www.youtube.com/c/gametrailers',
3061         'only_matching': True,
3062     }, {
3063         'url': 'https://www.youtube.com/gametrailers',
3064         'only_matching': True,
3065     }, {
3066         # This channel is not available, geo restricted to JP
3067         'url': 'https://www.youtube.com/user/kananishinoSMEJ/videos',
3068         'only_matching': True,
3069     }]
3070
3071     @classmethod
3072     def suitable(cls, url):
3073         # Don't return True if the url can be extracted with other youtube
3074         # extractor, the regex would is too permissive and it would match.
3075         other_yt_ies = iter(klass for (name, klass) in globals().items() if name.startswith('Youtube') and name.endswith('IE') and klass is not cls)
3076         if any(ie.suitable(url) for ie in other_yt_ies):
3077             return False
3078         else:
3079             return super(YoutubeUserIE, cls).suitable(url)
3080
3081     def _build_template_url(self, url, channel_id):
3082         mobj = re.match(self._VALID_URL, url)
3083         return self._TEMPLATE_URL % (mobj.group('user') or 'user', mobj.group('id'))
3084
3085
3086 class YoutubeLiveIE(YoutubeBaseInfoExtractor):
3087     IE_DESC = 'YouTube.com live streams'
3088     _VALID_URL = r'(?P<base_url>https?://(?:\w+\.)?youtube\.com/(?:(?:user|channel|c)/)?(?P<id>[^/]+))/live'
3089     IE_NAME = 'youtube:live'
3090
3091     _TESTS = [{
3092         'url': 'https://www.youtube.com/user/TheYoungTurks/live',
3093         'info_dict': {
3094             'id': 'a48o2S1cPoo',
3095             'ext': 'mp4',
3096             'title': 'The Young Turks - Live Main Show',
3097             'uploader': 'The Young Turks',
3098             'uploader_id': 'TheYoungTurks',
3099             'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/TheYoungTurks',
3100             'upload_date': '20150715',
3101             'license': 'Standard YouTube License',
3102             'description': 'md5:438179573adcdff3c97ebb1ee632b891',
3103             'categories': ['News & Politics'],
3104             'tags': ['Cenk Uygur (TV Program Creator)', 'The Young Turks (Award-Winning Work)', 'Talk Show (TV Genre)'],
3105             'like_count': int,
3106             'dislike_count': int,
3107         },
3108         'params': {
3109             'skip_download': True,
3110         },
3111     }, {
3112         'url': 'https://www.youtube.com/channel/UC1yBKRuGpC1tSM73A0ZjYjQ/live',
3113         'only_matching': True,
3114     }, {
3115         'url': 'https://www.youtube.com/c/CommanderVideoHq/live',
3116         'only_matching': True,
3117     }, {
3118         'url': 'https://www.youtube.com/TheYoungTurks/live',
3119         'only_matching': True,
3120     }]
3121
3122     def _real_extract(self, url):
3123         mobj = re.match(self._VALID_URL, url)
3124         channel_id = mobj.group('id')
3125         base_url = mobj.group('base_url')
3126         webpage = self._download_webpage(url, channel_id, fatal=False)
3127         if webpage:
3128             page_type = self._og_search_property(
3129                 'type', webpage, 'page type', default='')
3130             video_id = self._html_search_meta(
3131                 'videoId', webpage, 'video id', default=None)
3132             if page_type.startswith('video') and video_id and re.match(
3133                     r'^[0-9A-Za-z_-]{11}$', video_id):
3134                 return self.url_result(video_id, YoutubeIE.ie_key())
3135         return self.url_result(base_url)
3136
3137
3138 class YoutubePlaylistsIE(YoutubePlaylistsBaseInfoExtractor):
3139     IE_DESC = 'YouTube.com user/channel playlists'
3140     _VALID_URL = r'https?://(?:\w+\.)?youtube\.com/(?:user|channel|c)/(?P<id>[^/]+)/playlists'
3141     IE_NAME = 'youtube:playlists'
3142
3143     _TESTS = [{
3144         'url': 'https://www.youtube.com/user/ThirstForScience/playlists',
3145         'playlist_mincount': 4,
3146         'info_dict': {
3147             'id': 'ThirstForScience',
3148             'title': 'ThirstForScience',
3149         },
3150     }, {
3151         # with "Load more" button
3152         'url': 'https://www.youtube.com/user/igorkle1/playlists?view=1&sort=dd',
3153         'playlist_mincount': 70,
3154         'info_dict': {
3155             'id': 'igorkle1',
3156             'title': 'Игорь Клейнер',
3157         },
3158     }, {
3159         'url': 'https://www.youtube.com/channel/UCiU1dHvZObB2iP6xkJ__Icw/playlists',
3160         'playlist_mincount': 17,
3161         'info_dict': {
3162             'id': 'UCiU1dHvZObB2iP6xkJ__Icw',
3163             'title': 'Chem Player',
3164         },
3165         'skip': 'Blocked',
3166     }, {
3167         'url': 'https://www.youtube.com/c/ChristophLaimer/playlists',
3168         'only_matching': True,
3169     }]
3170
3171
3172 class YoutubeSearchBaseInfoExtractor(YoutubePlaylistBaseInfoExtractor):
3173     _VIDEO_RE = r'href="\s*/watch\?v=(?P<id>[0-9A-Za-z_-]{11})(?:[^"]*"[^>]+\btitle="(?P<title>[^"]+))?'
3174
3175
3176 class YoutubeSearchIE(SearchInfoExtractor, YoutubeSearchBaseInfoExtractor):
3177     IE_DESC = 'YouTube.com searches'
3178     # there doesn't appear to be a real limit, for example if you search for
3179     # 'python' you get more than 8.000.000 results
3180     _MAX_RESULTS = float('inf')
3181     IE_NAME = 'youtube:search'
3182     _SEARCH_KEY = 'ytsearch'
3183     _EXTRA_QUERY_ARGS = {}
3184     _TESTS = []
3185
3186     def _get_n_results(self, query, n):
3187         """Get a specified number of results for a query"""
3188
3189         videos = []
3190         limit = n
3191
3192         url_query = {
3193             'search_query': query.encode('utf-8'),
3194         }
3195         url_query.update(self._EXTRA_QUERY_ARGS)
3196         result_url = 'https://www.youtube.com/results?' + compat_urllib_parse_urlencode(url_query)
3197
3198         for pagenum in itertools.count(1):
3199             data = self._download_json(
3200                 result_url, video_id='query "%s"' % query,
3201                 note='Downloading page %s' % pagenum,
3202                 errnote='Unable to download API page',
3203                 query={'spf': 'navigate'})
3204             html_content = data[1]['body']['content']
3205
3206             if 'class="search-message' in html_content:
3207                 raise ExtractorError(
3208                     '[youtube] No video results', expected=True)
3209
3210             new_videos = list(self._process_page(html_content))
3211             videos += new_videos
3212             if not new_videos or len(videos) > limit:
3213                 break
3214             next_link = self._html_search_regex(
3215                 r'href="(/results\?[^"]*\bsp=[^"]+)"[^>]*>\s*<span[^>]+class="[^"]*\byt-uix-button-content\b[^"]*"[^>]*>Next',
3216                 html_content, 'next link', default=None)
3217             if next_link is None:
3218                 break
3219             result_url = compat_urlparse.urljoin('https://www.youtube.com/', next_link)
3220
3221         if len(videos) > n:
3222             videos = videos[:n]
3223         return self.playlist_result(videos, query)
3224
3225
3226 class YoutubeSearchDateIE(YoutubeSearchIE):
3227     IE_NAME = YoutubeSearchIE.IE_NAME + ':date'
3228     _SEARCH_KEY = 'ytsearchdate'
3229     IE_DESC = 'YouTube.com searches, newest videos first'
3230     _EXTRA_QUERY_ARGS = {'search_sort': 'video_date_uploaded'}
3231
3232
3233 class YoutubeSearchURLIE(YoutubeSearchBaseInfoExtractor):
3234     IE_DESC = 'YouTube.com search URLs'
3235     IE_NAME = 'youtube:search_url'
3236     _VALID_URL = r'https?://(?:www\.)?youtube\.com/results\?(.*?&)?(?:search_query|q)=(?P<query>[^&]+)(?:[&]|$)'
3237     _TESTS = [{
3238         'url': 'https://www.youtube.com/results?baz=bar&search_query=youtube-dl+test+video&filters=video&lclk=video',
3239         'playlist_mincount': 5,
3240         'info_dict': {
3241             'title': 'youtube-dl test video',
3242         }
3243     }, {
3244         'url': 'https://www.youtube.com/results?q=test&sp=EgQIBBgB',
3245         'only_matching': True,
3246     }]
3247
3248     def _real_extract(self, url):
3249         mobj = re.match(self._VALID_URL, url)
3250         query = compat_urllib_parse_unquote_plus(mobj.group('query'))
3251         webpage = self._download_webpage(url, query)
3252         return self.playlist_result(self._process_page(webpage), playlist_title=query)
3253
3254
3255 class YoutubeShowIE(YoutubePlaylistsBaseInfoExtractor):
3256     IE_DESC = 'YouTube.com (multi-season) shows'
3257     _VALID_URL = r'https?://(?:www\.)?youtube\.com/show/(?P<id>[^?#]*)'
3258     IE_NAME = 'youtube:show'
3259     _TESTS = [{
3260         'url': 'https://www.youtube.com/show/airdisasters',
3261         'playlist_mincount': 5,
3262         'info_dict': {
3263             'id': 'airdisasters',
3264             'title': 'Air Disasters',
3265         }
3266     }]
3267
3268     def _real_extract(self, url):
3269         playlist_id = self._match_id(url)
3270         return super(YoutubeShowIE, self)._real_extract(
3271             'https://www.youtube.com/show/%s/playlists' % playlist_id)
3272
3273
3274 class YoutubeFeedsInfoExtractor(YoutubeBaseInfoExtractor):
3275     """
3276     Base class for feed extractors
3277     Subclasses must define the _FEED_NAME and _PLAYLIST_TITLE properties.
3278     """
3279     _LOGIN_REQUIRED = True
3280
3281     @property
3282     def IE_NAME(self):
3283         return 'youtube:%s' % self._FEED_NAME
3284
3285     def _real_initialize(self):
3286         self._login()
3287
3288     def _entries(self, page):
3289         # The extraction process is the same as for playlists, but the regex
3290         # for the video ids doesn't contain an index
3291         ids = []
3292         more_widget_html = content_html = page
3293         for page_num in itertools.count(1):
3294             matches = re.findall(r'href="\s*/watch\?v=([0-9A-Za-z_-]{11})', content_html)
3295
3296             # 'recommended' feed has infinite 'load more' and each new portion spins
3297             # the same videos in (sometimes) slightly different order, so we'll check
3298             # for unicity and break when portion has no new videos
3299             new_ids = list(filter(lambda video_id: video_id not in ids, orderedSet(matches)))
3300             if not new_ids:
3301                 break
3302
3303             ids.extend(new_ids)
3304
3305             for entry in self._ids_to_results(new_ids):
3306                 yield entry
3307
3308             mobj = re.search(r'data-uix-load-more-href="/?(?P<more>[^"]+)"', more_widget_html)
3309             if not mobj:
3310                 break
3311
3312             more = self._download_json(
3313                 'https://www.youtube.com/%s' % mobj.group('more'), self._PLAYLIST_TITLE,
3314                 'Downloading page #%s' % page_num,
3315                 transform_source=uppercase_escape,
3316                 headers=self._YOUTUBE_CLIENT_HEADERS)
3317             content_html = more['content_html']
3318             more_widget_html = more['load_more_widget_html']
3319
3320     def _real_extract(self, url):
3321         page = self._download_webpage(
3322             'https://www.youtube.com/feed/%s' % self._FEED_NAME,
3323             self._PLAYLIST_TITLE)
3324         return self.playlist_result(
3325             self._entries(page), playlist_title=self._PLAYLIST_TITLE)
3326
3327
3328 class YoutubeWatchLaterIE(YoutubePlaylistIE):
3329     IE_NAME = 'youtube:watchlater'
3330     IE_DESC = 'Youtube watch later list, ":ytwatchlater" for short (requires authentication)'
3331     _VALID_URL = r'https?://(?:www\.)?youtube\.com/(?:feed/watch_later|(?:playlist|watch)\?(?:.+&)?list=WL)|:ytwatchlater'
3332
3333     _TESTS = [{
3334         'url': 'https://www.youtube.com/playlist?list=WL',
3335         'only_matching': True,
3336     }, {
3337         'url': 'https://www.youtube.com/watch?v=bCNU9TrbiRk&index=1&list=WL',
3338         'only_matching': True,
3339     }]
3340
3341     def _real_extract(self, url):
3342         _, video = self._check_download_just_video(url, 'WL')
3343         if video:
3344             return video
3345         _, playlist = self._extract_playlist('WL')
3346         return playlist
3347
3348
3349 class YoutubeFavouritesIE(YoutubeBaseInfoExtractor):
3350     IE_NAME = 'youtube:favorites'
3351     IE_DESC = 'YouTube.com favourite videos, ":ytfav" for short (requires authentication)'
3352     _VALID_URL = r'https?://(?:www\.)?youtube\.com/my_favorites|:ytfav(?:ou?rites)?'
3353     _LOGIN_REQUIRED = True
3354
3355     def _real_extract(self, url):
3356         webpage = self._download_webpage('https://www.youtube.com/my_favorites', 'Youtube Favourites videos')
3357         playlist_id = self._search_regex(r'list=(.+?)["&]', webpage, 'favourites playlist id')
3358         return self.url_result(playlist_id, 'YoutubePlaylist')
3359
3360
3361 class YoutubeRecommendedIE(YoutubeFeedsInfoExtractor):
3362     IE_DESC = 'YouTube.com recommended videos, ":ytrec" for short (requires authentication)'
3363     _VALID_URL = r'https?://(?:www\.)?youtube\.com/feed/recommended|:ytrec(?:ommended)?'
3364     _FEED_NAME = 'recommended'
3365     _PLAYLIST_TITLE = 'Youtube Recommended videos'
3366
3367
3368 class YoutubeSubscriptionsIE(YoutubeFeedsInfoExtractor):
3369     IE_DESC = 'YouTube.com subscriptions feed, "ytsubs" keyword (requires authentication)'
3370     _VALID_URL = r'https?://(?:www\.)?youtube\.com/feed/subscriptions|:ytsubs(?:criptions)?'
3371     _FEED_NAME = 'subscriptions'
3372     _PLAYLIST_TITLE = 'Youtube Subscriptions'
3373
3374
3375 class YoutubeHistoryIE(YoutubeFeedsInfoExtractor):
3376     IE_DESC = 'Youtube watch history, ":ythistory" for short (requires authentication)'
3377     _VALID_URL = r'https?://(?:www\.)?youtube\.com/feed/history|:ythistory'
3378     _FEED_NAME = 'history'
3379     _PLAYLIST_TITLE = 'Youtube History'
3380
3381
3382 class YoutubeTruncatedURLIE(InfoExtractor):
3383     IE_NAME = 'youtube:truncated_url'
3384     IE_DESC = False  # Do not list
3385     _VALID_URL = r'''(?x)
3386         (?:https?://)?
3387         (?:\w+\.)?[yY][oO][uU][tT][uU][bB][eE](?:-nocookie)?\.com/
3388         (?:watch\?(?:
3389             feature=[a-z_]+|
3390             annotation_id=annotation_[^&]+|
3391             x-yt-cl=[0-9]+|
3392             hl=[^&]*|
3393             t=[0-9]+
3394         )?
3395         |
3396             attribution_link\?a=[^&]+
3397         )
3398         $
3399     '''
3400
3401     _TESTS = [{
3402         'url': 'https://www.youtube.com/watch?annotation_id=annotation_3951667041',
3403         'only_matching': True,
3404     }, {
3405         'url': 'https://www.youtube.com/watch?',
3406         'only_matching': True,
3407     }, {
3408         'url': 'https://www.youtube.com/watch?x-yt-cl=84503534',
3409         'only_matching': True,
3410     }, {
3411         'url': 'https://www.youtube.com/watch?feature=foo',
3412         'only_matching': True,
3413     }, {
3414         'url': 'https://www.youtube.com/watch?hl=en-GB',
3415         'only_matching': True,
3416     }, {
3417         'url': 'https://www.youtube.com/watch?t=2372',
3418         'only_matching': True,
3419     }]
3420
3421     def _real_extract(self, url):
3422         raise ExtractorError(
3423             'Did you forget to quote the URL? Remember that & is a meta '
3424             'character in most shells, so you want to put the URL in quotes, '
3425             'like  youtube-dl '
3426             '"https://www.youtube.com/watch?feature=foo&v=BaW_jenozKc" '
3427             ' or simply  youtube-dl BaW_jenozKc  .',
3428             expected=True)
3429
3430
3431 class YoutubeTruncatedIDIE(InfoExtractor):
3432     IE_NAME = 'youtube:truncated_id'
3433     IE_DESC = False  # Do not list
3434     _VALID_URL = r'https?://(?:www\.)?youtube\.com/watch\?v=(?P<id>[0-9A-Za-z_-]{1,10})$'
3435
3436     _TESTS = [{
3437         'url': 'https://www.youtube.com/watch?v=N_708QY7Ob',
3438         'only_matching': True,
3439     }]
3440
3441     def _real_extract(self, url):
3442         video_id = self._match_id(url)
3443         raise ExtractorError(
3444             'Incomplete YouTube ID %s. URL %s looks truncated.' % (video_id, url),
3445             expected=True)