youtube_dlc/extractor/youtube.py

   1 # coding: utf-8
   2
   3 from __future__ import unicode_literals
   4
   5
   6 import itertools
   7 import json
   8 import os.path
   9 import random
  10 import re
  11 import time
  12 import traceback
  13
  14 from .common import InfoExtractor, SearchInfoExtractor
  15 from ..jsinterp import JSInterpreter
  16 from ..swfinterp import SWFInterpreter
  17 from ..compat import (
  18     compat_chr,
  19     compat_HTTPError,
  20     compat_kwargs,
  21     compat_parse_qs,
  22     compat_urllib_parse_unquote,
  23     compat_urllib_parse_unquote_plus,
  24     compat_urllib_parse_urlencode,
  25     compat_urllib_parse_urlparse,
  26     compat_urlparse,
  27     compat_str,
  28 )
  29 from ..utils import (
  30     bool_or_none,
  31     clean_html,
  32     error_to_compat_str,
  33     extract_attributes,
  34     ExtractorError,
  35     float_or_none,
  36     get_element_by_attribute,
  37     get_element_by_id,
  38     int_or_none,
  39     mimetype2ext,
  40     orderedSet,
  41     parse_codecs,
  42     parse_duration,
  43     remove_quotes,
  44     remove_start,
  45     smuggle_url,
  46     str_or_none,
  47     str_to_int,
  48     try_get,
  49     unescapeHTML,
  50     unified_strdate,
  51     unsmuggle_url,
  52     uppercase_escape,
  53     url_or_none,
  54     urlencode_postdata,
  55 )
  56
  57
  58 class YoutubeBaseInfoExtractor(InfoExtractor):
  59     """Provide base functions for Youtube extractors"""
  60     _LOGIN_URL = 'https://accounts.google.com/ServiceLogin'
  61     _TWOFACTOR_URL = 'https://accounts.google.com/signin/challenge'
  62
  63     _LOOKUP_URL = 'https://accounts.google.com/_/signin/sl/lookup'
  64     _CHALLENGE_URL = 'https://accounts.google.com/_/signin/sl/challenge'
  65     _TFA_URL = 'https://accounts.google.com/_/signin/challenge?hl=en&TL={0}'
  66
  67     _NETRC_MACHINE = 'youtube'
  68     # If True it will raise an error if no login info is provided
  69     _LOGIN_REQUIRED = False
  70
  71     _PLAYLIST_ID_RE = r'(?:PL|LL|EC|UU|FL|RD|UL|TL|PU|OLAK5uy_)[0-9A-Za-z-_]{10,}'
  72
  73     _YOUTUBE_CLIENT_HEADERS = {
  74         'x-youtube-client-name': '1',
  75         'x-youtube-client-version': '1.20200609.04.02',
  76     }
  77
  78     def _set_language(self):
  79         self._set_cookie(
  80             '.youtube.com', 'PREF', 'f1=50000000&f6=8&hl=en',
  81             # YouTube sets the expire time to about two months
  82             expire_time=time.time() + 2 * 30 * 24 * 3600)
  83
  84     def _ids_to_results(self, ids):
  85         return [
  86             self.url_result(vid_id, 'Youtube', video_id=vid_id)
  87             for vid_id in ids]
  88
  89     def _login(self):
  90         """
  91         Attempt to log in to YouTube.
  92         True is returned if successful or skipped.
  93         False is returned if login failed.
  94
  95         If _LOGIN_REQUIRED is set and no authentication was provided, an error is raised.
  96         """
  97         username, password = self._get_login_info()
  98         # No authentication to be performed
  99         if username is None:
 100             if self._LOGIN_REQUIRED and self._downloader.params.get('cookiefile') is None:
 101                 raise ExtractorError('No login info available, needed for using %s.' % self.IE_NAME, expected=True)
 102             return True
 103
 104         login_page = self._download_webpage(
 105             self._LOGIN_URL, None,
 106             note='Downloading login page',
 107             errnote='unable to fetch login page', fatal=False)
 108         if login_page is False:
 109             return
 110
 111         login_form = self._hidden_inputs(login_page)
 112
 113         def req(url, f_req, note, errnote):
 114             data = login_form.copy()
 115             data.update({
 116                 'pstMsg': 1,
 117                 'checkConnection': 'youtube',
 118                 'checkedDomains': 'youtube',
 119                 'hl': 'en',
 120                 'deviceinfo': '[null,null,null,[],null,"US",null,null,[],"GlifWebSignIn",null,[null,null,[]]]',
 121                 'f.req': json.dumps(f_req),
 122                 'flowName': 'GlifWebSignIn',
 123                 'flowEntry': 'ServiceLogin',
 124                 # TODO: reverse actual botguard identifier generation algo
 125                 'bgRequest': '["identifier",""]',
 126             })
 127             return self._download_json(
 128                 url, None, note=note, errnote=errnote,
 129                 transform_source=lambda s: re.sub(r'^[^[]*', '', s),
 130                 fatal=False,
 131                 data=urlencode_postdata(data), headers={
 132                     'Content-Type': 'application/x-www-form-urlencoded;charset=utf-8',
 133                     'Google-Accounts-XSRF': 1,
 134                 })
 135
 136         def warn(message):
 137             self._downloader.report_warning(message)
 138
 139         lookup_req = [
 140             username,
 141             None, [], None, 'US', None, None, 2, False, True,
 142             [
 143                 None, None,
 144                 [2, 1, None, 1,
 145                  'https://accounts.google.com/ServiceLogin?passive=true&continue=https%3A%2F%2Fwww.youtube.com%2Fsignin%3Fnext%3D%252F%26action_handle_signin%3Dtrue%26hl%3Den%26app%3Ddesktop%26feature%3Dsign_in_button&hl=en&service=youtube&uilel=3&requestPath=%2FServiceLogin&Page=PasswordSeparationSignIn',
 146                  None, [], 4],
 147                 1, [None, None, []], None, None, None, True
 148             ],
 149             username,
 150         ]
 151
 152         lookup_results = req(
 153             self._LOOKUP_URL, lookup_req,
 154             'Looking up account info', 'Unable to look up account info')
 155
 156         if lookup_results is False:
 157             return False
 158
 159         user_hash = try_get(lookup_results, lambda x: x[0][2], compat_str)
 160         if not user_hash:
 161             warn('Unable to extract user hash')
 162             return False
 163
 164         challenge_req = [
 165             user_hash,
 166             None, 1, None, [1, None, None, None, [password, None, True]],
 167             [
 168                 None, None, [2, 1, None, 1, 'https://accounts.google.com/ServiceLogin?passive=true&continue=https%3A%2F%2Fwww.youtube.com%2Fsignin%3Fnext%3D%252F%26action_handle_signin%3Dtrue%26hl%3Den%26app%3Ddesktop%26feature%3Dsign_in_button&hl=en&service=youtube&uilel=3&requestPath=%2FServiceLogin&Page=PasswordSeparationSignIn', None, [], 4],
 169                 1, [None, None, []], None, None, None, True
 170             ]]
 171
 172         challenge_results = req(
 173             self._CHALLENGE_URL, challenge_req,
 174             'Logging in', 'Unable to log in')
 175
 176         if challenge_results is False:
 177             return
 178
 179         login_res = try_get(challenge_results, lambda x: x[0][5], list)
 180         if login_res:
 181             login_msg = try_get(login_res, lambda x: x[5], compat_str)
 182             warn(
 183                 'Unable to login: %s' % 'Invalid password'
 184                 if login_msg == 'INCORRECT_ANSWER_ENTERED' else login_msg)
 185             return False
 186
 187         res = try_get(challenge_results, lambda x: x[0][-1], list)
 188         if not res:
 189             warn('Unable to extract result entry')
 190             return False
 191
 192         login_challenge = try_get(res, lambda x: x[0][0], list)
 193         if login_challenge:
 194             challenge_str = try_get(login_challenge, lambda x: x[2], compat_str)
 195             if challenge_str == 'TWO_STEP_VERIFICATION':
 196                 # SEND_SUCCESS - TFA code has been successfully sent to phone
 197                 # QUOTA_EXCEEDED - reached the limit of TFA codes
 198                 status = try_get(login_challenge, lambda x: x[5], compat_str)
 199                 if status == 'QUOTA_EXCEEDED':
 200                     warn('Exceeded the limit of TFA codes, try later')
 201                     return False
 202
 203                 tl = try_get(challenge_results, lambda x: x[1][2], compat_str)
 204                 if not tl:
 205                     warn('Unable to extract TL')
 206                     return False
 207
 208                 tfa_code = self._get_tfa_info('2-step verification code')
 209
 210                 if not tfa_code:
 211                     warn(
 212                         'Two-factor authentication required. Provide it either interactively or with --twofactor <code>'
 213                         '(Note that only TOTP (Google Authenticator App) codes work at this time.)')
 214                     return False
 215
 216                 tfa_code = remove_start(tfa_code, 'G-')
 217
 218                 tfa_req = [
 219                     user_hash, None, 2, None,
 220                     [
 221                         9, None, None, None, None, None, None, None,
 222                         [None, tfa_code, True, 2]
 223                     ]]
 224
 225                 tfa_results = req(
 226                     self._TFA_URL.format(tl), tfa_req,
 227                     'Submitting TFA code', 'Unable to submit TFA code')
 228
 229                 if tfa_results is False:
 230                     return False
 231
 232                 tfa_res = try_get(tfa_results, lambda x: x[0][5], list)
 233                 if tfa_res:
 234                     tfa_msg = try_get(tfa_res, lambda x: x[5], compat_str)
 235                     warn(
 236                         'Unable to finish TFA: %s' % 'Invalid TFA code'
 237                         if tfa_msg == 'INCORRECT_ANSWER_ENTERED' else tfa_msg)
 238                     return False
 239
 240                 check_cookie_url = try_get(
 241                     tfa_results, lambda x: x[0][-1][2], compat_str)
 242             else:
 243                 CHALLENGES = {
 244                     'LOGIN_CHALLENGE': "This device isn't recognized. For your security, Google wants to make sure it's really you.",
 245                     'USERNAME_RECOVERY': 'Please provide additional information to aid in the recovery process.',
 246                     'REAUTH': "There is something unusual about your activity. For your security, Google wants to make sure it's really you.",
 247                 }
 248                 challenge = CHALLENGES.get(
 249                     challenge_str,
 250                     '%s returned error %s.' % (self.IE_NAME, challenge_str))
 251                 warn('%s\nGo to https://accounts.google.com/, login and solve a challenge.' % challenge)
 252                 return False
 253         else:
 254             check_cookie_url = try_get(res, lambda x: x[2], compat_str)
 255
 256         if not check_cookie_url:
 257             warn('Unable to extract CheckCookie URL')
 258             return False
 259
 260         check_cookie_results = self._download_webpage(
 261             check_cookie_url, None, 'Checking cookie', fatal=False)
 262
 263         if check_cookie_results is False:
 264             return False
 265
 266         if 'https://myaccount.google.com/' not in check_cookie_results:
 267             warn('Unable to log in')
 268             return False
 269
 270         return True
 271
 272     def _download_webpage_handle(self, *args, **kwargs):
 273         query = kwargs.get('query', {}).copy()
 274         query['disable_polymer'] = 'true'
 275         kwargs['query'] = query
 276         return super(YoutubeBaseInfoExtractor, self)._download_webpage_handle(
 277             *args, **compat_kwargs(kwargs))
 278
 279     def _real_initialize(self):
 280         if self._downloader is None:
 281             return
 282         self._set_language()
 283         if not self._login():
 284             return
 285
 286
 287 class YoutubeEntryListBaseInfoExtractor(YoutubeBaseInfoExtractor):
 288     # Extract entries from page with "Load more" button
 289     def _entries(self, page, playlist_id):
 290         more_widget_html = content_html = page
 291         for page_num in itertools.count(1):
 292             for entry in self._process_page(content_html):
 293                 yield entry
 294
 295             mobj = re.search(r'data-uix-load-more-href="/?(?P<more>[^"]+)"', more_widget_html)
 296             if not mobj:
 297                 break
 298
 299             count = 0
 300             retries = 3
 301             while count <= retries:
 302                 try:
 303                     # Downloading page may result in intermittent 5xx HTTP error
 304                     # that is usually worked around with a retry
 305                     more = self._download_json(
 306                         'https://www.youtube.com/%s' % mobj.group('more'), playlist_id,
 307                         'Downloading page #%s%s'
 308                         % (page_num, ' (retry #%d)' % count if count else ''),
 309                         transform_source=uppercase_escape,
 310                         headers=self._YOUTUBE_CLIENT_HEADERS)
 311                     break
 312                 except ExtractorError as e:
 313                     if isinstance(e.cause, compat_HTTPError) and e.cause.code in (500, 503):
 314                         count += 1
 315                         if count <= retries:
 316                             continue
 317                     raise
 318
 319             content_html = more['content_html']
 320             if not content_html.strip():
 321                 # Some webpages show a "Load more" button but they don't
 322                 # have more videos
 323                 break
 324             more_widget_html = more['load_more_widget_html']
 325
 326
 327 class YoutubePlaylistBaseInfoExtractor(YoutubeEntryListBaseInfoExtractor):
 328     def _process_page(self, content):
 329         for video_id, video_title in self.extract_videos_from_page(content):
 330             yield self.url_result(video_id, 'Youtube', video_id, video_title)
 331
 332     def extract_videos_from_page_impl(self, video_re, page, ids_in_page, titles_in_page):
 333         for mobj in re.finditer(video_re, page):
 334             # The link with index 0 is not the first video of the playlist (not sure if still actual)
 335             if 'index' in mobj.groupdict() and mobj.group('id') == '0':
 336                 continue
 337             video_id = mobj.group('id')
 338             video_title = unescapeHTML(
 339                 mobj.group('title')) if 'title' in mobj.groupdict() else None
 340             if video_title:
 341                 video_title = video_title.strip()
 342             if video_title == '► Play all':
 343                 video_title = None
 344             try:
 345                 idx = ids_in_page.index(video_id)
 346                 if video_title and not titles_in_page[idx]:
 347                     titles_in_page[idx] = video_title
 348             except ValueError:
 349                 ids_in_page.append(video_id)
 350                 titles_in_page.append(video_title)
 351
 352     def extract_videos_from_page(self, page):
 353         ids_in_page = []
 354         titles_in_page = []
 355         self.extract_videos_from_page_impl(
 356             self._VIDEO_RE, page, ids_in_page, titles_in_page)
 357         return zip(ids_in_page, titles_in_page)
 358
 359
 360 class YoutubePlaylistsBaseInfoExtractor(YoutubeEntryListBaseInfoExtractor):
 361     def _process_page(self, content):
 362         for playlist_id in orderedSet(re.findall(
 363                 r'<h3[^>]+class="[^"]*yt-lockup-title[^"]*"[^>]*><a[^>]+href="/?playlist\?list=([0-9A-Za-z-_]{10,})"',
 364                 content)):
 365             yield self.url_result(
 366                 'https://www.youtube.com/playlist?list=%s' % playlist_id, 'YoutubePlaylist')
 367
 368     def _real_extract(self, url):
 369         playlist_id = self._match_id(url)
 370         webpage = self._download_webpage(url, playlist_id)
 371         title = self._og_search_title(webpage, fatal=False)
 372         return self.playlist_result(self._entries(webpage, playlist_id), playlist_id, title)
 373
 374
 375 class YoutubeIE(YoutubeBaseInfoExtractor):
 376     IE_DESC = 'YouTube.com'
 377     _VALID_URL = r"""(?x)^
 378                      (
 379                          (?:https?://|//)                                    # http(s):// or protocol-independent URL
 380                          (?:(?:(?:(?:\w+\.)?[yY][oO][uU][tT][uU][bB][eE](?:-nocookie|kids)?\.com/|
 381                             (?:www\.)?deturl\.com/www\.youtube\.com/|
 382                             (?:www\.)?pwnyoutube\.com/|
 383                             (?:www\.)?hooktube\.com/|
 384                             (?:www\.)?yourepeat\.com/|
 385                             tube\.majestyc\.net/|
 386                             # Invidious instances taken from https://github.com/omarroth/invidious/wiki/Invidious-Instances
 387                             (?:(?:www|dev)\.)?invidio\.us/|
 388                             (?:(?:www|no)\.)?invidiou\.sh/|
 389                             (?:(?:www|fi|de)\.)?invidious\.snopyta\.org/|
 390                             (?:www\.)?invidious\.kabi\.tk/|
 391                             (?:www\.)?invidious\.13ad\.de/|
 392                             (?:www\.)?invidious\.mastodon\.host/|
 393                             (?:www\.)?invidious\.nixnet\.xyz/|
 394                             (?:www\.)?invidious\.drycat\.fr/|
 395                             (?:www\.)?tube\.poal\.co/|
 396                             (?:www\.)?vid\.wxzm\.sx/|
 397                             (?:www\.)?yewtu\.be/|
 398                             (?:www\.)?yt\.elukerio\.org/|
 399                             (?:www\.)?yt\.lelux\.fi/|
 400                             (?:www\.)?invidious\.ggc-project\.de/|
 401                             (?:www\.)?yt\.maisputain\.ovh/|
 402                             (?:www\.)?invidious\.13ad\.de/|
 403                             (?:www\.)?invidious\.toot\.koeln/|
 404                             (?:www\.)?invidious\.fdn\.fr/|
 405                             (?:www\.)?watch\.nettohikari\.com/|
 406                             (?:www\.)?kgg2m7yk5aybusll\.onion/|
 407                             (?:www\.)?qklhadlycap4cnod\.onion/|
 408                             (?:www\.)?axqzx4s6s54s32yentfqojs3x5i7faxza6xo3ehd4bzzsg2ii4fv2iid\.onion/|
 409                             (?:www\.)?c7hqkpkpemu6e7emz5b4vyz7idjgdvgaaa3dyimmeojqbgpea3xqjoid\.onion/|
 410                             (?:www\.)?fz253lmuao3strwbfbmx46yu7acac2jz27iwtorgmbqlkurlclmancad\.onion/|
 411                             (?:www\.)?invidious\.l4qlywnpwqsluw65ts7md3khrivpirse744un3x7mlskqauz5pyuzgqd\.onion/|
 412                             (?:www\.)?owxfohz4kjyv25fvlqilyxast7inivgiktls3th44jhk3ej3i7ya\.b32\.i2p/|
 413                             (?:www\.)?4l2dgddgsrkf2ous66i6seeyi6etzfgrue332grh2n7madpwopotugyd\.onion/|
 414                             youtube\.googleapis\.com/)                        # the various hostnames, with wildcard subdomains
 415                          (?:.*?\#/)?                                          # handle anchor (#/) redirect urls
 416                          (?:                                                  # the various things that can precede the ID:
 417                              (?:(?:v|embed|e)/(?!videoseries))                # v/ or embed/ or e/
 418                              |(?:                                             # or the v= param in all its forms
 419                                  (?:(?:watch|movie)(?:_popup)?(?:\.php)?/?)?  # preceding watch(_popup|.php) or nothing (like /?v=xxxx)
 420                                  (?:\?|\#!?)                                  # the params delimiter ? or # or #!
 421                                  (?:.*?[&;])??                                # any other preceding param (like /?s=tuff&v=xxxx or ?s=tuff&amp;v=V36LpHqtcDY)
 422                                  v=
 423                              )
 424                          ))
 425                          |(?:
 426                             youtu\.be|                                        # just youtu.be/xxxx
 427                             vid\.plus|                                        # or vid.plus/xxxx
 428                             zwearz\.com/watch|                                # or zwearz.com/watch/xxxx
 429                          )/
 430                          |(?:www\.)?cleanvideosearch\.com/media/action/yt/watch\?videoId=
 431                          )
 432                      )?                                                       # all until now is optional -> you can pass the naked ID
 433                      ([0-9A-Za-z_-]{11})                                      # here is it! the YouTube video ID
 434                      (?!.*?\blist=
 435                         (?:
 436                             %(playlist_id)s|                                  # combined list/video URLs are handled by the playlist IE
 437                             WL                                                # WL are handled by the watch later IE
 438                         )
 439                      )
 440                      (?(1).+)?                                                # if we found the ID, everything can follow
 441                      $""" % {'playlist_id': YoutubeBaseInfoExtractor._PLAYLIST_ID_RE}
 442     _NEXT_URL_RE = r'[\?&]next_url=([^&]+)'
 443     _PLAYER_INFO_RE = (
 444         r'/(?P<id>[a-zA-Z0-9_-]{8,})/player_ias\.vflset(?:/[a-zA-Z]{2,3}_[a-zA-Z]{2,3})?/base\.(?P<ext>[a-z]+)$',
 445         r'\b(?P<id>vfl[a-zA-Z0-9_-]+)\b.*?\.(?P<ext>[a-z]+)$',
 446     )
 447     _formats = {
 448         '5': {'ext': 'flv', 'width': 400, 'height': 240, 'acodec': 'mp3', 'abr': 64, 'vcodec': 'h263'},
 449         '6': {'ext': 'flv', 'width': 450, 'height': 270, 'acodec': 'mp3', 'abr': 64, 'vcodec': 'h263'},
 450         '13': {'ext': '3gp', 'acodec': 'aac', 'vcodec': 'mp4v'},
 451         '17': {'ext': '3gp', 'width': 176, 'height': 144, 'acodec': 'aac', 'abr': 24, 'vcodec': 'mp4v'},
 452         '18': {'ext': 'mp4', 'width': 640, 'height': 360, 'acodec': 'aac', 'abr': 96, 'vcodec': 'h264'},
 453         '22': {'ext': 'mp4', 'width': 1280, 'height': 720, 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264'},
 454         '34': {'ext': 'flv', 'width': 640, 'height': 360, 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264'},
 455         '35': {'ext': 'flv', 'width': 854, 'height': 480, 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264'},
 456         # itag 36 videos are either 320x180 (BaW_jenozKc) or 320x240 (__2ABJjxzNo), abr varies as well
 457         '36': {'ext': '3gp', 'width': 320, 'acodec': 'aac', 'vcodec': 'mp4v'},
 458         '37': {'ext': 'mp4', 'width': 1920, 'height': 1080, 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264'},
 459         '38': {'ext': 'mp4', 'width': 4096, 'height': 3072, 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264'},
 460         '43': {'ext': 'webm', 'width': 640, 'height': 360, 'acodec': 'vorbis', 'abr': 128, 'vcodec': 'vp8'},
 461         '44': {'ext': 'webm', 'width': 854, 'height': 480, 'acodec': 'vorbis', 'abr': 128, 'vcodec': 'vp8'},
 462         '45': {'ext': 'webm', 'width': 1280, 'height': 720, 'acodec': 'vorbis', 'abr': 192, 'vcodec': 'vp8'},
 463         '46': {'ext': 'webm', 'width': 1920, 'height': 1080, 'acodec': 'vorbis', 'abr': 192, 'vcodec': 'vp8'},
 464         '59': {'ext': 'mp4', 'width': 854, 'height': 480, 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264'},
 465         '78': {'ext': 'mp4', 'width': 854, 'height': 480, 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264'},
 466
 467
 468         # 3D videos
 469         '82': {'ext': 'mp4', 'height': 360, 'format_note': '3D', 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264', 'preference': -20},
 470         '83': {'ext': 'mp4', 'height': 480, 'format_note': '3D', 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264', 'preference': -20},
 471         '84': {'ext': 'mp4', 'height': 720, 'format_note': '3D', 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264', 'preference': -20},
 472         '85': {'ext': 'mp4', 'height': 1080, 'format_note': '3D', 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264', 'preference': -20},
 473         '100': {'ext': 'webm', 'height': 360, 'format_note': '3D', 'acodec': 'vorbis', 'abr': 128, 'vcodec': 'vp8', 'preference': -20},
 474         '101': {'ext': 'webm', 'height': 480, 'format_note': '3D', 'acodec': 'vorbis', 'abr': 192, 'vcodec': 'vp8', 'preference': -20},
 475         '102': {'ext': 'webm', 'height': 720, 'format_note': '3D', 'acodec': 'vorbis', 'abr': 192, 'vcodec': 'vp8', 'preference': -20},
 476
 477         # Apple HTTP Live Streaming
 478         '91': {'ext': 'mp4', 'height': 144, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 48, 'vcodec': 'h264', 'preference': -10},
 479         '92': {'ext': 'mp4', 'height': 240, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 48, 'vcodec': 'h264', 'preference': -10},
 480         '93': {'ext': 'mp4', 'height': 360, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264', 'preference': -10},
 481         '94': {'ext': 'mp4', 'height': 480, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264', 'preference': -10},
 482         '95': {'ext': 'mp4', 'height': 720, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 256, 'vcodec': 'h264', 'preference': -10},
 483         '96': {'ext': 'mp4', 'height': 1080, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 256, 'vcodec': 'h264', 'preference': -10},
 484         '132': {'ext': 'mp4', 'height': 240, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 48, 'vcodec': 'h264', 'preference': -10},
 485         '151': {'ext': 'mp4', 'height': 72, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 24, 'vcodec': 'h264', 'preference': -10},
 486
 487         # DASH mp4 video
 488         '133': {'ext': 'mp4', 'height': 240, 'format_note': 'DASH video', 'vcodec': 'h264'},
 489         '134': {'ext': 'mp4', 'height': 360, 'format_note': 'DASH video', 'vcodec': 'h264'},
 490         '135': {'ext': 'mp4', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'h264'},
 491         '136': {'ext': 'mp4', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'h264'},
 492         '137': {'ext': 'mp4', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'h264'},
 493         '138': {'ext': 'mp4', 'format_note': 'DASH video', 'vcodec': 'h264'},  # Height can vary (https://github.com/ytdl-org/youtube-dl/issues/4559)
 494         '160': {'ext': 'mp4', 'height': 144, 'format_note': 'DASH video', 'vcodec': 'h264'},
 495         '212': {'ext': 'mp4', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'h264'},
 496         '264': {'ext': 'mp4', 'height': 1440, 'format_note': 'DASH video', 'vcodec': 'h264'},
 497         '298': {'ext': 'mp4', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'h264', 'fps': 60},
 498         '299': {'ext': 'mp4', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'h264', 'fps': 60},
 499         '266': {'ext': 'mp4', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'h264'},
 500
 501         # Dash mp4 audio
 502         '139': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'abr': 48, 'container': 'm4a_dash'},
 503         '140': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'abr': 128, 'container': 'm4a_dash'},
 504         '141': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'abr': 256, 'container': 'm4a_dash'},
 505         '256': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'container': 'm4a_dash'},
 506         '258': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'container': 'm4a_dash'},
 507         '325': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'dtse', 'container': 'm4a_dash'},
 508         '328': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'ec-3', 'container': 'm4a_dash'},
 509
 510         # Dash webm
 511         '167': {'ext': 'webm', 'height': 360, 'width': 640, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
 512         '168': {'ext': 'webm', 'height': 480, 'width': 854, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
 513         '169': {'ext': 'webm', 'height': 720, 'width': 1280, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
 514         '170': {'ext': 'webm', 'height': 1080, 'width': 1920, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
 515         '218': {'ext': 'webm', 'height': 480, 'width': 854, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
 516         '219': {'ext': 'webm', 'height': 480, 'width': 854, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
 517         '278': {'ext': 'webm', 'height': 144, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp9'},
 518         '242': {'ext': 'webm', 'height': 240, 'format_note': 'DASH video', 'vcodec': 'vp9'},
 519         '243': {'ext': 'webm', 'height': 360, 'format_note': 'DASH video', 'vcodec': 'vp9'},
 520         '244': {'ext': 'webm', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'vp9'},
 521         '245': {'ext': 'webm', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'vp9'},
 522         '246': {'ext': 'webm', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'vp9'},
 523         '247': {'ext': 'webm', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'vp9'},
 524         '248': {'ext': 'webm', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'vp9'},
 525         '271': {'ext': 'webm', 'height': 1440, 'format_note': 'DASH video', 'vcodec': 'vp9'},
 526         # itag 272 videos are either 3840x2160 (e.g. RtoitU2A-3E) or 7680x4320 (sLprVF6d7Ug)
 527         '272': {'ext': 'webm', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'vp9'},
 528         '302': {'ext': 'webm', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'vp9', 'fps': 60},
 529         '303': {'ext': 'webm', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'vp9', 'fps': 60},
 530         '308': {'ext': 'webm', 'height': 1440, 'format_note': 'DASH video', 'vcodec': 'vp9', 'fps': 60},
 531         '313': {'ext': 'webm', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'vp9'},
 532         '315': {'ext': 'webm', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'vp9', 'fps': 60},
 533
 534         # Dash webm audio
 535         '171': {'ext': 'webm', 'acodec': 'vorbis', 'format_note': 'DASH audio', 'abr': 128},
 536         '172': {'ext': 'webm', 'acodec': 'vorbis', 'format_note': 'DASH audio', 'abr': 256},
 537
 538         # Dash webm audio with opus inside
 539         '249': {'ext': 'webm', 'format_note': 'DASH audio', 'acodec': 'opus', 'abr': 50},
 540         '250': {'ext': 'webm', 'format_note': 'DASH audio', 'acodec': 'opus', 'abr': 70},
 541         '251': {'ext': 'webm', 'format_note': 'DASH audio', 'acodec': 'opus', 'abr': 160},
 542
 543         # RTMP (unnamed)
 544         '_rtmp': {'protocol': 'rtmp'},
 545
 546         # av01 video only formats sometimes served with "unknown" codecs
 547         '394': {'acodec': 'none', 'vcodec': 'av01.0.05M.08'},
 548         '395': {'acodec': 'none', 'vcodec': 'av01.0.05M.08'},
 549         '396': {'acodec': 'none', 'vcodec': 'av01.0.05M.08'},
 550         '397': {'acodec': 'none', 'vcodec': 'av01.0.05M.08'},
 551     }
 552     _SUBTITLE_FORMATS = ('json3', 'srv1', 'srv2', 'srv3', 'ttml', 'vtt')
 553
 554     _GEO_BYPASS = False
 555
 556     IE_NAME = 'youtube'
 557     _TESTS = [
 558         {
 559             'url': 'https://www.youtube.com/watch?v=BaW_jenozKc&t=1s&end=9',
 560             'info_dict': {
 561                 'id': 'BaW_jenozKc',
 562                 'ext': 'mp4',
 563                 'title': 'youtube-dl test video "\'/\\ä↭𝕐',
 564                 'uploader': 'Philipp Hagemeister',
 565                 'uploader_id': 'phihag',
 566                 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/phihag',
 567                 'channel_id': 'UCLqxVugv74EIW3VWh2NOa3Q',
 568                 'channel_url': r're:https?://(?:www\.)?youtube\.com/channel/UCLqxVugv74EIW3VWh2NOa3Q',
 569                 'upload_date': '20121002',
 570                 'description': 'test chars:  "\'/\\ä↭𝕐\ntest URL: https://github.com/rg3/youtube-dl/issues/1892\n\nThis is a test video for youtube-dl.\n\nFor more information, contact phihag@phihag.de .',
 571                 'categories': ['Science & Technology'],
 572                 'tags': ['youtube-dl'],
 573                 'duration': 10,
 574                 'view_count': int,
 575                 'like_count': int,
 576                 'dislike_count': int,
 577                 'start_time': 1,
 578                 'end_time': 9,
 579             }
 580         },
 581         {
 582             'url': 'https://www.youtube.com/watch?v=UxxajLWwzqY',
 583             'note': 'Test generic use_cipher_signature video (#897)',
 584             'info_dict': {
 585                 'id': 'UxxajLWwzqY',
 586                 'ext': 'mp4',
 587                 'upload_date': '20120506',
 588                 'title': 'Icona Pop - I Love It (feat. Charli XCX) [OFFICIAL VIDEO]',
 589                 'alt_title': 'I Love It (feat. Charli XCX)',
 590                 'description': 'md5:19a2f98d9032b9311e686ed039564f63',
 591                 'tags': ['Icona Pop i love it', 'sweden', 'pop music', 'big beat records', 'big beat', 'charli',
 592                          'xcx', 'charli xcx', 'girls', 'hbo', 'i love it', "i don't care", 'icona', 'pop',
 593                          'iconic ep', 'iconic', 'love', 'it'],
 594                 'duration': 180,
 595                 'uploader': 'Icona Pop',
 596                 'uploader_id': 'IconaPop',
 597                 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/IconaPop',
 598                 'creator': 'Icona Pop',
 599                 'track': 'I Love It (feat. Charli XCX)',
 600                 'artist': 'Icona Pop',
 601             }
 602         },
 603         {
 604             'url': 'https://www.youtube.com/watch?v=07FYdnEawAQ',
 605             'note': 'Test VEVO video with age protection (#956)',
 606             'info_dict': {
 607                 'id': '07FYdnEawAQ',
 608                 'ext': 'mp4',
 609                 'upload_date': '20130703',
 610                 'title': 'Justin Timberlake - Tunnel Vision (Official Music Video) (Explicit)',
 611                 'alt_title': 'Tunnel Vision',
 612                 'description': 'md5:07dab3356cde4199048e4c7cd93471e1',
 613                 'duration': 419,
 614                 'uploader': 'justintimberlakeVEVO',
 615                 'uploader_id': 'justintimberlakeVEVO',
 616                 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/justintimberlakeVEVO',
 617                 'creator': 'Justin Timberlake',
 618                 'track': 'Tunnel Vision',
 619                 'artist': 'Justin Timberlake',
 620                 'age_limit': 18,
 621             }
 622         },
 623         {
 624             'url': '//www.YouTube.com/watch?v=yZIXLfi8CZQ',
 625             'note': 'Embed-only video (#1746)',
 626             'info_dict': {
 627                 'id': 'yZIXLfi8CZQ',
 628                 'ext': 'mp4',
 629                 'upload_date': '20120608',
 630                 'title': 'Principal Sexually Assaults A Teacher - Episode 117 - 8th June 2012',
 631                 'description': 'md5:09b78bd971f1e3e289601dfba15ca4f7',
 632                 'uploader': 'SET India',
 633                 'uploader_id': 'setindia',
 634                 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/setindia',
 635                 'age_limit': 18,
 636             }
 637         },
 638         {
 639             'url': 'https://www.youtube.com/watch?v=BaW_jenozKc&v=UxxajLWwzqY',
 640             'note': 'Use the first video ID in the URL',
 641             'info_dict': {
 642                 'id': 'BaW_jenozKc',
 643                 'ext': 'mp4',
 644                 'title': 'youtube-dl test video "\'/\\ä↭𝕐',
 645                 'uploader': 'Philipp Hagemeister',
 646                 'uploader_id': 'phihag',
 647                 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/phihag',
 648                 'upload_date': '20121002',
 649                 'description': 'test chars:  "\'/\\ä↭𝕐\ntest URL: https://github.com/rg3/youtube-dl/issues/1892\n\nThis is a test video for youtube-dl.\n\nFor more information, contact phihag@phihag.de .',
 650                 'categories': ['Science & Technology'],
 651                 'tags': ['youtube-dl'],
 652                 'duration': 10,
 653                 'view_count': int,
 654                 'like_count': int,
 655                 'dislike_count': int,
 656             },
 657             'params': {
 658                 'skip_download': True,
 659             },
 660         },
 661         {
 662             'url': 'https://www.youtube.com/watch?v=a9LDPn-MO4I',
 663             'note': '256k DASH audio (format 141) via DASH manifest',
 664             'info_dict': {
 665                 'id': 'a9LDPn-MO4I',
 666                 'ext': 'm4a',
 667                 'upload_date': '20121002',
 668                 'uploader_id': '8KVIDEO',
 669                 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/8KVIDEO',
 670                 'description': '',
 671                 'uploader': '8KVIDEO',
 672                 'title': 'UHDTV TEST 8K VIDEO.mp4'
 673             },
 674             'params': {
 675                 'youtube_include_dash_manifest': True,
 676                 'format': '141',
 677             },
 678             'skip': 'format 141 not served anymore',
 679         },
 680         # DASH manifest with encrypted signature
 681         {
 682             'url': 'https://www.youtube.com/watch?v=IB3lcPjvWLA',
 683             'info_dict': {
 684                 'id': 'IB3lcPjvWLA',
 685                 'ext': 'm4a',
 686                 'title': 'Afrojack, Spree Wilson - The Spark (Official Music Video) ft. Spree Wilson',
 687                 'description': 'md5:8f5e2b82460520b619ccac1f509d43bf',
 688                 'duration': 244,
 689                 'uploader': 'AfrojackVEVO',
 690                 'uploader_id': 'AfrojackVEVO',
 691                 'upload_date': '20131011',
 692             },
 693             'params': {
 694                 'youtube_include_dash_manifest': True,
 695                 'format': '141/bestaudio[ext=m4a]',
 696             },
 697         },
 698         # JS player signature function name containing $
 699         {
 700             'url': 'https://www.youtube.com/watch?v=nfWlot6h_JM',
 701             'info_dict': {
 702                 'id': 'nfWlot6h_JM',
 703                 'ext': 'm4a',
 704                 'title': 'Taylor Swift - Shake It Off',
 705                 'description': 'md5:307195cd21ff7fa352270fe884570ef0',
 706                 'duration': 242,
 707                 'uploader': 'TaylorSwiftVEVO',
 708                 'uploader_id': 'TaylorSwiftVEVO',
 709                 'upload_date': '20140818',
 710             },
 711             'params': {
 712                 'youtube_include_dash_manifest': True,
 713                 'format': '141/bestaudio[ext=m4a]',
 714             },
 715         },
 716         # Controversy video
 717         {
 718             'url': 'https://www.youtube.com/watch?v=T4XJQO3qol8',
 719             'info_dict': {
 720                 'id': 'T4XJQO3qol8',
 721                 'ext': 'mp4',
 722                 'duration': 219,
 723                 'upload_date': '20100909',
 724                 'uploader': 'Amazing Atheist',
 725                 'uploader_id': 'TheAmazingAtheist',
 726                 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/TheAmazingAtheist',
 727                 'title': 'Burning Everyone\'s Koran',
 728                 'description': 'SUBSCRIBE: http://www.youtube.com/saturninefilms\n\nEven Obama has taken a stand against freedom on this issue: http://www.huffingtonpost.com/2010/09/09/obama-gma-interview-quran_n_710282.html',
 729             }
 730         },
 731         # Normal age-gate video (No vevo, embed allowed)
 732         {
 733             'url': 'https://youtube.com/watch?v=HtVdAasjOgU',
 734             'info_dict': {
 735                 'id': 'HtVdAasjOgU',
 736                 'ext': 'mp4',
 737                 'title': 'The Witcher 3: Wild Hunt - The Sword Of Destiny Trailer',
 738                 'description': r're:(?s).{100,}About the Game\n.*?The Witcher 3: Wild Hunt.{100,}',
 739                 'duration': 142,
 740                 'uploader': 'The Witcher',
 741                 'uploader_id': 'WitcherGame',
 742                 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/WitcherGame',
 743                 'upload_date': '20140605',
 744                 'age_limit': 18,
 745             },
 746         },
 747         # Age-gate video with encrypted signature
 748         {
 749             'url': 'https://www.youtube.com/watch?v=6kLq3WMV1nU',
 750             'info_dict': {
 751                 'id': '6kLq3WMV1nU',
 752                 'ext': 'mp4',
 753                 'title': 'Dedication To My Ex (Miss That) (Lyric Video)',
 754                 'description': 'md5:33765bb339e1b47e7e72b5490139bb41',
 755                 'duration': 246,
 756                 'uploader': 'LloydVEVO',
 757                 'uploader_id': 'LloydVEVO',
 758                 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/LloydVEVO',
 759                 'upload_date': '20110629',
 760                 'age_limit': 18,
 761             },
 762         },
 763         # video_info is None (https://github.com/ytdl-org/youtube-dl/issues/4421)
 764         # YouTube Red ad is not captured for creator
 765         {
 766             'url': '__2ABJjxzNo',
 767             'info_dict': {
 768                 'id': '__2ABJjxzNo',
 769                 'ext': 'mp4',
 770                 'duration': 266,
 771                 'upload_date': '20100430',
 772                 'uploader_id': 'deadmau5',
 773                 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/deadmau5',
 774                 'creator': 'Dada Life, deadmau5',
 775                 'description': 'md5:12c56784b8032162bb936a5f76d55360',
 776                 'uploader': 'deadmau5',
 777                 'title': 'Deadmau5 - Some Chords (HD)',
 778                 'alt_title': 'This Machine Kills Some Chords',
 779             },
 780             'expected_warnings': [
 781                 'DASH manifest missing',
 782             ]
 783         },
 784         # Olympics (https://github.com/ytdl-org/youtube-dl/issues/4431)
 785         {
 786             'url': 'lqQg6PlCWgI',
 787             'info_dict': {
 788                 'id': 'lqQg6PlCWgI',
 789                 'ext': 'mp4',
 790                 'duration': 6085,
 791                 'upload_date': '20150827',
 792                 'uploader_id': 'olympic',
 793                 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/olympic',
 794                 'description': 'HO09  - Women -  GER-AUS - Hockey - 31 July 2012 - London 2012 Olympic Games',
 795                 'uploader': 'Olympic',
 796                 'title': 'Hockey - Women -  GER-AUS - London 2012 Olympic Games',
 797             },
 798             'params': {
 799                 'skip_download': 'requires avconv',
 800             }
 801         },
 802         # Non-square pixels
 803         {
 804             'url': 'https://www.youtube.com/watch?v=_b-2C3KPAM0',
 805             'info_dict': {
 806                 'id': '_b-2C3KPAM0',
 807                 'ext': 'mp4',
 808                 'stretched_ratio': 16 / 9.,
 809                 'duration': 85,
 810                 'upload_date': '20110310',
 811                 'uploader_id': 'AllenMeow',
 812                 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/AllenMeow',
 813                 'description': 'made by Wacom from Korea | 字幕&加油添醋 by TY\'s Allen | 感謝heylisa00cavey1001同學熱情提供梗及翻譯',
 814                 'uploader': '孫ᄋᄅ',
 815                 'title': '[A-made] 變態妍字幕版 太妍 我就是這樣的人',
 816             },
 817         },
 818         # url_encoded_fmt_stream_map is empty string
 819         {
 820             'url': 'qEJwOuvDf7I',
 821             'info_dict': {
 822                 'id': 'qEJwOuvDf7I',
 823                 'ext': 'webm',
 824                 'title': 'Обсуждение судебной практики по выборам 14 сентября 2014 года в Санкт-Петербурге',
 825                 'description': '',
 826                 'upload_date': '20150404',
 827                 'uploader_id': 'spbelect',
 828                 'uploader': 'Наблюдатели Петербурга',
 829             },
 830             'params': {
 831                 'skip_download': 'requires avconv',
 832             },
 833             'skip': 'This live event has ended.',
 834         },
 835         # Extraction from multiple DASH manifests (https://github.com/ytdl-org/youtube-dl/pull/6097)
 836         {
 837             'url': 'https://www.youtube.com/watch?v=FIl7x6_3R5Y',
 838             'info_dict': {
 839                 'id': 'FIl7x6_3R5Y',
 840                 'ext': 'webm',
 841                 'title': 'md5:7b81415841e02ecd4313668cde88737a',
 842                 'description': 'md5:116377fd2963b81ec4ce64b542173306',
 843                 'duration': 220,
 844                 'upload_date': '20150625',
 845                 'uploader_id': 'dorappi2000',
 846                 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/dorappi2000',
 847                 'uploader': 'dorappi2000',
 848                 'formats': 'mincount:31',
 849             },
 850             'skip': 'not actual anymore',
 851         },
 852         # DASH manifest with segment_list
 853         {
 854             'url': 'https://www.youtube.com/embed/CsmdDsKjzN8',
 855             'md5': '8ce563a1d667b599d21064e982ab9e31',
 856             'info_dict': {
 857                 'id': 'CsmdDsKjzN8',
 858                 'ext': 'mp4',
 859                 'upload_date': '20150501',  # According to '<meta itemprop="datePublished"', but in other places it's 20150510
 860                 'uploader': 'Airtek',
 861                 'description': 'Retransmisión en directo de la XVIII media maratón de Zaragoza.',
 862                 'uploader_id': 'UCzTzUmjXxxacNnL8I3m4LnQ',
 863                 'title': 'Retransmisión XVIII Media maratón Zaragoza 2015',
 864             },
 865             'params': {
 866                 'youtube_include_dash_manifest': True,
 867                 'format': '135',  # bestvideo
 868             },
 869             'skip': 'This live event has ended.',
 870         },
 871         {
 872             # Multifeed videos (multiple cameras), URL is for Main Camera
 873             'url': 'https://www.youtube.com/watch?v=jqWvoWXjCVs',
 874             'info_dict': {
 875                 'id': 'jqWvoWXjCVs',
 876                 'title': 'teamPGP: Rocket League Noob Stream',
 877                 'description': 'md5:dc7872fb300e143831327f1bae3af010',
 878             },
 879             'playlist': [{
 880                 'info_dict': {
 881                     'id': 'jqWvoWXjCVs',
 882                     'ext': 'mp4',
 883                     'title': 'teamPGP: Rocket League Noob Stream (Main Camera)',
 884                     'description': 'md5:dc7872fb300e143831327f1bae3af010',
 885                     'duration': 7335,
 886                     'upload_date': '20150721',
 887                     'uploader': 'Beer Games Beer',
 888                     'uploader_id': 'beergamesbeer',
 889                     'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/beergamesbeer',
 890                     'license': 'Standard YouTube License',
 891                 },
 892             }, {
 893                 'info_dict': {
 894                     'id': '6h8e8xoXJzg',
 895                     'ext': 'mp4',
 896                     'title': 'teamPGP: Rocket League Noob Stream (kreestuh)',
 897                     'description': 'md5:dc7872fb300e143831327f1bae3af010',
 898                     'duration': 7337,
 899                     'upload_date': '20150721',
 900                     'uploader': 'Beer Games Beer',
 901                     'uploader_id': 'beergamesbeer',
 902                     'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/beergamesbeer',
 903                     'license': 'Standard YouTube License',
 904                 },
 905             }, {
 906                 'info_dict': {
 907                     'id': 'PUOgX5z9xZw',
 908                     'ext': 'mp4',
 909                     'title': 'teamPGP: Rocket League Noob Stream (grizzle)',
 910                     'description': 'md5:dc7872fb300e143831327f1bae3af010',
 911                     'duration': 7337,
 912                     'upload_date': '20150721',
 913                     'uploader': 'Beer Games Beer',
 914                     'uploader_id': 'beergamesbeer',
 915                     'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/beergamesbeer',
 916                     'license': 'Standard YouTube License',
 917                 },
 918             }, {
 919                 'info_dict': {
 920                     'id': 'teuwxikvS5k',
 921                     'ext': 'mp4',
 922                     'title': 'teamPGP: Rocket League Noob Stream (zim)',
 923                     'description': 'md5:dc7872fb300e143831327f1bae3af010',
 924                     'duration': 7334,
 925                     'upload_date': '20150721',
 926                     'uploader': 'Beer Games Beer',
 927                     'uploader_id': 'beergamesbeer',
 928                     'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/beergamesbeer',
 929                     'license': 'Standard YouTube License',
 930                 },
 931             }],
 932             'params': {
 933                 'skip_download': True,
 934             },
 935             'skip': 'This video is not available.',
 936         },
 937         {
 938             # Multifeed video with comma in title (see https://github.com/ytdl-org/youtube-dl/issues/8536)
 939             'url': 'https://www.youtube.com/watch?v=gVfLd0zydlo',
 940             'info_dict': {
 941                 'id': 'gVfLd0zydlo',
 942                 'title': 'DevConf.cz 2016 Day 2 Workshops 1 14:00 - 15:30',
 943             },
 944             'playlist_count': 2,
 945             'skip': 'Not multifeed anymore',
 946         },
 947         {
 948             'url': 'https://vid.plus/FlRa-iH7PGw',
 949             'only_matching': True,
 950         },
 951         {
 952             'url': 'https://zwearz.com/watch/9lWxNJF-ufM/electra-woman-dyna-girl-official-trailer-grace-helbig.html',
 953             'only_matching': True,
 954         },
 955         {
 956             # Title with JS-like syntax "};" (see https://github.com/ytdl-org/youtube-dl/issues/7468)
 957             # Also tests cut-off URL expansion in video description (see
 958             # https://github.com/ytdl-org/youtube-dl/issues/1892,
 959             # https://github.com/ytdl-org/youtube-dl/issues/8164)
 960             'url': 'https://www.youtube.com/watch?v=lsguqyKfVQg',
 961             'info_dict': {
 962                 'id': 'lsguqyKfVQg',
 963                 'ext': 'mp4',
 964                 'title': '{dark walk}; Loki/AC/Dishonored; collab w/Elflover21',
 965                 'alt_title': 'Dark Walk - Position Music',
 966                 'description': 'md5:8085699c11dc3f597ce0410b0dcbb34a',
 967                 'duration': 133,
 968                 'upload_date': '20151119',
 969                 'uploader_id': 'IronSoulElf',
 970                 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/IronSoulElf',
 971                 'uploader': 'IronSoulElf',
 972                 'creator': 'Todd Haberman,  Daniel Law Heath and Aaron Kaplan',
 973                 'track': 'Dark Walk - Position Music',
 974                 'artist': 'Todd Haberman,  Daniel Law Heath and Aaron Kaplan',
 975                 'album': 'Position Music - Production Music Vol. 143 - Dark Walk',
 976             },
 977             'params': {
 978                 'skip_download': True,
 979             },
 980         },
 981         {
 982             # Tags with '};' (see https://github.com/ytdl-org/youtube-dl/issues/7468)
 983             'url': 'https://www.youtube.com/watch?v=Ms7iBXnlUO8',
 984             'only_matching': True,
 985         },
 986         {
 987             # Video with yt:stretch=17:0
 988             'url': 'https://www.youtube.com/watch?v=Q39EVAstoRM',
 989             'info_dict': {
 990                 'id': 'Q39EVAstoRM',
 991                 'ext': 'mp4',
 992                 'title': 'Clash Of Clans#14 Dicas De Ataque Para CV 4',
 993                 'description': 'md5:ee18a25c350637c8faff806845bddee9',
 994                 'upload_date': '20151107',
 995                 'uploader_id': 'UCCr7TALkRbo3EtFzETQF1LA',
 996                 'uploader': 'CH GAMER DROID',
 997             },
 998             'params': {
 999                 'skip_download': True,
1000             },
1001             'skip': 'This video does not exist.',
1002         },
1003         {
1004             # Video licensed under Creative Commons
1005             'url': 'https://www.youtube.com/watch?v=M4gD1WSo5mA',
1006             'info_dict': {
1007                 'id': 'M4gD1WSo5mA',
1008                 'ext': 'mp4',
1009                 'title': 'md5:e41008789470fc2533a3252216f1c1d1',
1010                 'description': 'md5:a677553cf0840649b731a3024aeff4cc',
1011                 'duration': 721,
1012                 'upload_date': '20150127',
1013                 'uploader_id': 'BerkmanCenter',
1014                 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/BerkmanCenter',
1015                 'uploader': 'The Berkman Klein Center for Internet & Society',
1016                 'license': 'Creative Commons Attribution license (reuse allowed)',
1017             },
1018             'params': {
1019                 'skip_download': True,
1020             },
1021         },
1022         {
1023             # Channel-like uploader_url
1024             'url': 'https://www.youtube.com/watch?v=eQcmzGIKrzg',
1025             'info_dict': {
1026                 'id': 'eQcmzGIKrzg',
1027                 'ext': 'mp4',
1028                 'title': 'Democratic Socialism and Foreign Policy | Bernie Sanders',
1029                 'description': 'md5:dda0d780d5a6e120758d1711d062a867',
1030                 'duration': 4060,
1031                 'upload_date': '20151119',
1032                 'uploader': 'Bernie Sanders',
1033                 'uploader_id': 'UCH1dpzjCEiGAt8CXkryhkZg',
1034                 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UCH1dpzjCEiGAt8CXkryhkZg',
1035                 'license': 'Creative Commons Attribution license (reuse allowed)',
1036             },
1037             'params': {
1038                 'skip_download': True,
1039             },
1040         },
1041         {
1042             'url': 'https://www.youtube.com/watch?feature=player_embedded&amp;amp;v=V36LpHqtcDY',
1043             'only_matching': True,
1044         },
1045         {
1046             # YouTube Red paid video (https://github.com/ytdl-org/youtube-dl/issues/10059)
1047             'url': 'https://www.youtube.com/watch?v=i1Ko8UG-Tdo',
1048             'only_matching': True,
1049         },
1050         {
1051             # Rental video preview
1052             'url': 'https://www.youtube.com/watch?v=yYr8q0y5Jfg',
1053             'info_dict': {
1054                 'id': 'uGpuVWrhIzE',
1055                 'ext': 'mp4',
1056                 'title': 'Piku - Trailer',
1057                 'description': 'md5:c36bd60c3fd6f1954086c083c72092eb',
1058                 'upload_date': '20150811',
1059                 'uploader': 'FlixMatrix',
1060                 'uploader_id': 'FlixMatrixKaravan',
1061                 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/FlixMatrixKaravan',
1062                 'license': 'Standard YouTube License',
1063             },
1064             'params': {
1065                 'skip_download': True,
1066             },
1067             'skip': 'This video is not available.',
1068         },
1069         {
1070             # YouTube Red video with episode data
1071             'url': 'https://www.youtube.com/watch?v=iqKdEhx-dD4',
1072             'info_dict': {
1073                 'id': 'iqKdEhx-dD4',
1074                 'ext': 'mp4',
1075                 'title': 'Isolation - Mind Field (Ep 1)',
1076                 'description': 'md5:46a29be4ceffa65b92d277b93f463c0f',
1077                 'duration': 2085,
1078                 'upload_date': '20170118',
1079                 'uploader': 'Vsauce',
1080                 'uploader_id': 'Vsauce',
1081                 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/Vsauce',
1082                 'series': 'Mind Field',
1083                 'season_number': 1,
1084                 'episode_number': 1,
1085             },
1086             'params': {
1087                 'skip_download': True,
1088             },
1089             'expected_warnings': [
1090                 'Skipping DASH manifest',
1091             ],
1092         },
1093         {
1094             # The following content has been identified by the YouTube community
1095             # as inappropriate or offensive to some audiences.
1096             'url': 'https://www.youtube.com/watch?v=6SJNVb0GnPI',
1097             'info_dict': {
1098                 'id': '6SJNVb0GnPI',
1099                 'ext': 'mp4',
1100                 'title': 'Race Differences in Intelligence',
1101                 'description': 'md5:5d161533167390427a1f8ee89a1fc6f1',
1102                 'duration': 965,
1103                 'upload_date': '20140124',
1104                 'uploader': 'New Century Foundation',
1105                 'uploader_id': 'UCEJYpZGqgUob0zVVEaLhvVg',
1106                 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UCEJYpZGqgUob0zVVEaLhvVg',
1107             },
1108             'params': {
1109                 'skip_download': True,
1110             },
1111         },
1112         {
1113             # itag 212
1114             'url': '1t24XAntNCY',
1115             'only_matching': True,
1116         },
1117         {
1118             # geo restricted to JP
1119             'url': 'sJL6WA-aGkQ',
1120             'only_matching': True,
1121         },
1122         {
1123             'url': 'https://www.youtube.com/watch?v=MuAGGZNfUkU&list=RDMM',
1124             'only_matching': True,
1125         },
1126         {
1127             'url': 'https://invidio.us/watch?v=BaW_jenozKc',
1128             'only_matching': True,
1129         },
1130         {
1131             # DRM protected
1132             'url': 'https://www.youtube.com/watch?v=s7_qI6_mIXc',
1133             'only_matching': True,
1134         },
1135         {
1136             # Video with unsupported adaptive stream type formats
1137             'url': 'https://www.youtube.com/watch?v=Z4Vy8R84T1U',
1138             'info_dict': {
1139                 'id': 'Z4Vy8R84T1U',
1140                 'ext': 'mp4',
1141                 'title': 'saman SMAN 53 Jakarta(Sancety) opening COFFEE4th at SMAN 53 Jakarta',
1142                 'description': 'md5:d41d8cd98f00b204e9800998ecf8427e',
1143                 'duration': 433,
1144                 'upload_date': '20130923',
1145                 'uploader': 'Amelia Putri Harwita',
1146                 'uploader_id': 'UCpOxM49HJxmC1qCalXyB3_Q',
1147                 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UCpOxM49HJxmC1qCalXyB3_Q',
1148                 'formats': 'maxcount:10',
1149             },
1150             'params': {
1151                 'skip_download': True,
1152                 'youtube_include_dash_manifest': False,
1153             },
1154             'skip': 'not actual anymore',
1155         },
1156         {
1157             # Youtube Music Auto-generated description
1158             'url': 'https://music.youtube.com/watch?v=MgNrAu2pzNs',
1159             'info_dict': {
1160                 'id': 'MgNrAu2pzNs',
1161                 'ext': 'mp4',
1162                 'title': 'Voyeur Girl',
1163                 'description': 'md5:7ae382a65843d6df2685993e90a8628f',
1164                 'upload_date': '20190312',
1165                 'uploader': 'Stephen - Topic',
1166                 'uploader_id': 'UC-pWHpBjdGG69N9mM2auIAA',
1167                 'artist': 'Stephen',
1168                 'track': 'Voyeur Girl',
1169                 'album': 'it\'s too much love to know my dear',
1170                 'release_date': '20190313',
1171                 'release_year': 2019,
1172             },
1173             'params': {
1174                 'skip_download': True,
1175             },
1176         },
1177         {
1178             # Youtube Music Auto-generated description
1179             # Retrieve 'artist' field from 'Artist:' in video description
1180             # when it is present on youtube music video
1181             'url': 'https://www.youtube.com/watch?v=k0jLE7tTwjY',
1182             'info_dict': {
1183                 'id': 'k0jLE7tTwjY',
1184                 'ext': 'mp4',
1185                 'title': 'Latch Feat. Sam Smith',
1186                 'description': 'md5:3cb1e8101a7c85fcba9b4fb41b951335',
1187                 'upload_date': '20150110',
1188                 'uploader': 'Various Artists - Topic',
1189                 'uploader_id': 'UCNkEcmYdjrH4RqtNgh7BZ9w',
1190                 'artist': 'Disclosure',
1191                 'track': 'Latch Feat. Sam Smith',
1192                 'album': 'Latch Featuring Sam Smith',
1193                 'release_date': '20121008',
1194                 'release_year': 2012,
1195             },
1196             'params': {
1197                 'skip_download': True,
1198             },
1199         },
1200         {
1201             # Youtube Music Auto-generated description
1202             # handle multiple artists on youtube music video
1203             'url': 'https://www.youtube.com/watch?v=74qn0eJSjpA',
1204             'info_dict': {
1205                 'id': '74qn0eJSjpA',
1206                 'ext': 'mp4',
1207                 'title': 'Eastside',
1208                 'description': 'md5:290516bb73dcbfab0dcc4efe6c3de5f2',
1209                 'upload_date': '20180710',
1210                 'uploader': 'Benny Blanco - Topic',
1211                 'uploader_id': 'UCzqz_ksRu_WkIzmivMdIS7A',
1212                 'artist': 'benny blanco, Halsey, Khalid',
1213                 'track': 'Eastside',
1214                 'album': 'Eastside',
1215                 'release_date': '20180713',
1216                 'release_year': 2018,
1217             },
1218             'params': {
1219                 'skip_download': True,
1220             },
1221         },
1222         {
1223             # Youtube Music Auto-generated description
1224             # handle youtube music video with release_year and no release_date
1225             'url': 'https://www.youtube.com/watch?v=-hcAI0g-f5M',
1226             'info_dict': {
1227                 'id': '-hcAI0g-f5M',
1228                 'ext': 'mp4',
1229                 'title': 'Put It On Me',
1230                 'description': 'md5:f6422397c07c4c907c6638e1fee380a5',
1231                 'upload_date': '20180426',
1232                 'uploader': 'Matt Maeson - Topic',
1233                 'uploader_id': 'UCnEkIGqtGcQMLk73Kp-Q5LQ',
1234                 'artist': 'Matt Maeson',
1235                 'track': 'Put It On Me',
1236                 'album': 'The Hearse',
1237                 'release_date': None,
1238                 'release_year': 2018,
1239             },
1240             'params': {
1241                 'skip_download': True,
1242             },
1243         },
1244         {
1245             'url': 'https://www.youtubekids.com/watch?v=3b8nCWDgZ6Q',
1246             'only_matching': True,
1247         },
1248         {
1249             # invalid -> valid video id redirection
1250             'url': 'DJztXj2GPfl',
1251             'info_dict': {
1252                 'id': 'DJztXj2GPfk',
1253                 'ext': 'mp4',
1254                 'title': 'Panjabi MC - Mundian To Bach Ke (The Dictator Soundtrack)',
1255                 'description': 'md5:bf577a41da97918e94fa9798d9228825',
1256                 'upload_date': '20090125',
1257                 'uploader': 'Prochorowka',
1258                 'uploader_id': 'Prochorowka',
1259                 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/Prochorowka',
1260                 'artist': 'Panjabi MC',
1261                 'track': 'Beware of the Boys (Mundian to Bach Ke) - Motivo Hi-Lectro Remix',
1262                 'album': 'Beware of the Boys (Mundian To Bach Ke)',
1263             },
1264             'params': {
1265                 'skip_download': True,
1266             },
1267         },
1268         {
1269             # empty description results in an empty string
1270             'url': 'https://www.youtube.com/watch?v=x41yOUIvK2k',
1271             'info_dict': {
1272                 'id': 'x41yOUIvK2k',
1273                 'ext': 'mp4',
1274                 'title': 'IMG 3456',
1275                 'description': '',
1276                 'upload_date': '20170613',
1277                 'uploader_id': 'ElevageOrVert',
1278                 'uploader': 'ElevageOrVert',
1279             },
1280             'params': {
1281                 'skip_download': True,
1282             },
1283         },
1284     ]
1285
1286     def __init__(self, *args, **kwargs):
1287         super(YoutubeIE, self).__init__(*args, **kwargs)
1288         self._player_cache = {}
1289
1290     def report_video_info_webpage_download(self, video_id):
1291         """Report attempt to download video info webpage."""
1292         self.to_screen('%s: Downloading video info webpage' % video_id)
1293
1294     def report_information_extraction(self, video_id):
1295         """Report attempt to extract video information."""
1296         self.to_screen('%s: Extracting video information' % video_id)
1297
1298     def report_unavailable_format(self, video_id, format):
1299         """Report extracted video URL."""
1300         self.to_screen('%s: Format %s not available' % (video_id, format))
1301
1302     def report_rtmp_download(self):
1303         """Indicate the download will use the RTMP protocol."""
1304         self.to_screen('RTMP download detected')
1305
1306     def _signature_cache_id(self, example_sig):
1307         """ Return a string representation of a signature """
1308         return '.'.join(compat_str(len(part)) for part in example_sig.split('.'))
1309
1310     @classmethod
1311     def _extract_player_info(cls, player_url):
1312         for player_re in cls._PLAYER_INFO_RE:
1313             id_m = re.search(player_re, player_url)
1314             if id_m:
1315                 break
1316         else:
1317             raise ExtractorError('Cannot identify player %r' % player_url)
1318         return id_m.group('ext'), id_m.group('id')
1319
1320     def _extract_signature_function(self, video_id, player_url, example_sig):
1321         player_type, player_id = self._extract_player_info(player_url)
1322
1323         # Read from filesystem cache
1324         func_id = '%s_%s_%s' % (
1325             player_type, player_id, self._signature_cache_id(example_sig))
1326         assert os.path.basename(func_id) == func_id
1327
1328         cache_spec = self._downloader.cache.load('youtube-sigfuncs', func_id)
1329         if cache_spec is not None:
1330             return lambda s: ''.join(s[i] for i in cache_spec)
1331
1332         download_note = (
1333             'Downloading player %s' % player_url
1334             if self._downloader.params.get('verbose') else
1335             'Downloading %s player %s' % (player_type, player_id)
1336         )
1337         if player_type == 'js':
1338             code = self._download_webpage(
1339                 player_url, video_id,
1340                 note=download_note,
1341                 errnote='Download of %s failed' % player_url)
1342             res = self._parse_sig_js(code)
1343         elif player_type == 'swf':
1344             urlh = self._request_webpage(
1345                 player_url, video_id,
1346                 note=download_note,
1347                 errnote='Download of %s failed' % player_url)
1348             code = urlh.read()
1349             res = self._parse_sig_swf(code)
1350         else:
1351             assert False, 'Invalid player type %r' % player_type
1352
1353         test_string = ''.join(map(compat_chr, range(len(example_sig))))
1354         cache_res = res(test_string)
1355         cache_spec = [ord(c) for c in cache_res]
1356
1357         self._downloader.cache.store('youtube-sigfuncs', func_id, cache_spec)
1358         return res
1359
1360     def _print_sig_code(self, func, example_sig):
1361         def gen_sig_code(idxs):
1362             def _genslice(start, end, step):
1363                 starts = '' if start == 0 else str(start)
1364                 ends = (':%d' % (end + step)) if end + step >= 0 else ':'
1365                 steps = '' if step == 1 else (':%d' % step)
1366                 return 's[%s%s%s]' % (starts, ends, steps)
1367
1368             step = None
1369             # Quelch pyflakes warnings - start will be set when step is set
1370             start = '(Never used)'
1371             for i, prev in zip(idxs[1:], idxs[:-1]):
1372                 if step is not None:
1373                     if i - prev == step:
1374                         continue
1375                     yield _genslice(start, prev, step)
1376                     step = None
1377                     continue
1378                 if i - prev in [-1, 1]:
1379                     step = i - prev
1380                     start = prev
1381                     continue
1382                 else:
1383                     yield 's[%d]' % prev
1384             if step is None:
1385                 yield 's[%d]' % i
1386             else:
1387                 yield _genslice(start, i, step)
1388
1389         test_string = ''.join(map(compat_chr, range(len(example_sig))))
1390         cache_res = func(test_string)
1391         cache_spec = [ord(c) for c in cache_res]
1392         expr_code = ' + '.join(gen_sig_code(cache_spec))
1393         signature_id_tuple = '(%s)' % (
1394             ', '.join(compat_str(len(p)) for p in example_sig.split('.')))
1395         code = ('if tuple(len(p) for p in s.split(\'.\')) == %s:\n'
1396                 '    return %s\n') % (signature_id_tuple, expr_code)
1397         self.to_screen('Extracted signature function:\n' + code)
1398
1399     def _parse_sig_js(self, jscode):
1400         funcname = self._search_regex(
1401             (r'\b[cs]\s*&&\s*[adf]\.set\([^,]+\s*,\s*encodeURIComponent\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\(',
1402              r'\b[a-zA-Z0-9]+\s*&&\s*[a-zA-Z0-9]+\.set\([^,]+\s*,\s*encodeURIComponent\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\(',
1403              r'(?:\b|[^a-zA-Z0-9$])(?P<sig>[a-zA-Z0-9$]{2})\s*=\s*function\(\s*a\s*\)\s*{\s*a\s*=\s*a\.split\(\s*""\s*\)',
1404              r'(?P<sig>[a-zA-Z0-9$]+)\s*=\s*function\(\s*a\s*\)\s*{\s*a\s*=\s*a\.split\(\s*""\s*\)',
1405              # Obsolete patterns
1406              r'(["\'])signature\1\s*,\s*(?P<sig>[a-zA-Z0-9$]+)\(',
1407              r'\.sig\|\|(?P<sig>[a-zA-Z0-9$]+)\(',
1408              r'yt\.akamaized\.net/\)\s*\|\|\s*.*?\s*[cs]\s*&&\s*[adf]\.set\([^,]+\s*,\s*(?:encodeURIComponent\s*\()?\s*(?P<sig>[a-zA-Z0-9$]+)\(',
1409              r'\b[cs]\s*&&\s*[adf]\.set\([^,]+\s*,\s*(?P<sig>[a-zA-Z0-9$]+)\(',
1410              r'\b[a-zA-Z0-9]+\s*&&\s*[a-zA-Z0-9]+\.set\([^,]+\s*,\s*(?P<sig>[a-zA-Z0-9$]+)\(',
1411              r'\bc\s*&&\s*a\.set\([^,]+\s*,\s*\([^)]*\)\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\(',
1412              r'\bc\s*&&\s*[a-zA-Z0-9]+\.set\([^,]+\s*,\s*\([^)]*\)\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\(',
1413              r'\bc\s*&&\s*[a-zA-Z0-9]+\.set\([^,]+\s*,\s*\([^)]*\)\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\('),
1414             jscode, 'Initial JS player signature function name', group='sig')
1415
1416         jsi = JSInterpreter(jscode)
1417         initial_function = jsi.extract_function(funcname)
1418         return lambda s: initial_function([s])
1419
1420     def _parse_sig_swf(self, file_contents):
1421         swfi = SWFInterpreter(file_contents)
1422         TARGET_CLASSNAME = 'SignatureDecipher'
1423         searched_class = swfi.extract_class(TARGET_CLASSNAME)
1424         initial_function = swfi.extract_function(searched_class, 'decipher')
1425         return lambda s: initial_function([s])
1426
1427     def _decrypt_signature(self, s, video_id, player_url, age_gate=False):
1428         """Turn the encrypted s field into a working signature"""
1429
1430         if player_url is None:
1431             raise ExtractorError('Cannot decrypt signature without player_url')
1432
1433         if player_url.startswith('//'):
1434             player_url = 'https:' + player_url
1435         elif not re.match(r'https?://', player_url):
1436             player_url = compat_urlparse.urljoin(
1437                 'https://www.youtube.com', player_url)
1438         try:
1439             player_id = (player_url, self._signature_cache_id(s))
1440             if player_id not in self._player_cache:
1441                 func = self._extract_signature_function(
1442                     video_id, player_url, s
1443                 )
1444                 self._player_cache[player_id] = func
1445             func = self._player_cache[player_id]
1446             if self._downloader.params.get('youtube_print_sig_code'):
1447                 self._print_sig_code(func, s)
1448             return func(s)
1449         except Exception as e:
1450             tb = traceback.format_exc()
1451             raise ExtractorError(
1452                 'Signature extraction failed: ' + tb, cause=e)
1453
1454     def _get_subtitles(self, video_id, webpage, has_live_chat_replay):
1455         try:
1456             subs_doc = self._download_xml(
1457                 'https://video.google.com/timedtext?hl=en&type=list&v=%s' % video_id,
1458                 video_id, note=False)
1459         except ExtractorError as err:
1460             self._downloader.report_warning('unable to download video subtitles: %s' % error_to_compat_str(err))
1461             return {}
1462
1463         sub_lang_list = {}
1464         for track in subs_doc.findall('track'):
1465             lang = track.attrib['lang_code']
1466             if lang in sub_lang_list:
1467                 continue
1468             sub_formats = []
1469             for ext in self._SUBTITLE_FORMATS:
1470                 params = compat_urllib_parse_urlencode({
1471                     'lang': lang,
1472                     'v': video_id,
1473                     'fmt': ext,
1474                     'name': track.attrib['name'].encode('utf-8'),
1475                 })
1476                 sub_formats.append({
1477                     'url': 'https://www.youtube.com/api/timedtext?' + params,
1478                     'ext': ext,
1479                 })
1480             sub_lang_list[lang] = sub_formats
1481         if has_live_chat_replay:
1482             sub_lang_list['live_chat'] = [
1483                 {
1484                     'video_id': video_id,
1485                     'ext': 'json',
1486                     'protocol': 'youtube_live_chat_replay',
1487                 },
1488             ]
1489         if not sub_lang_list:
1490             self._downloader.report_warning('video doesn\'t have subtitles')
1491             return {}
1492         return sub_lang_list
1493
1494     def _get_ytplayer_config(self, video_id, webpage):
1495         patterns = (
1496             # User data may contain arbitrary character sequences that may affect
1497             # JSON extraction with regex, e.g. when '};' is contained the second
1498             # regex won't capture the whole JSON. Yet working around by trying more
1499             # concrete regex first keeping in mind proper quoted string handling
1500             # to be implemented in future that will replace this workaround (see
1501             # https://github.com/ytdl-org/youtube-dl/issues/7468,
1502             # https://github.com/ytdl-org/youtube-dl/pull/7599)
1503             r';ytplayer\.config\s*=\s*({.+?});ytplayer',
1504             r';ytplayer\.config\s*=\s*({.+?});',
1505         )
1506         config = self._search_regex(
1507             patterns, webpage, 'ytplayer.config', default=None)
1508         if config:
1509             return self._parse_json(
1510                 uppercase_escape(config), video_id, fatal=False)
1511
1512     def _get_yt_initial_data(self, video_id, webpage):
1513         config = self._search_regex(
1514             (r'window\["ytInitialData"\]\s*=\s*(.*?)(?<=});',
1515              r'var\s+ytInitialData\s*=\s*(.*?)(?<=});'),
1516             webpage, 'ytInitialData', default=None)
1517         if config:
1518             return self._parse_json(
1519                 uppercase_escape(config), video_id, fatal=False)
1520
1521     def _get_automatic_captions(self, video_id, webpage):
1522         """We need the webpage for getting the captions url, pass it as an
1523            argument to speed up the process."""
1524         self.to_screen('%s: Looking for automatic captions' % video_id)
1525         player_config = self._get_ytplayer_config(video_id, webpage)
1526         err_msg = 'Couldn\'t find automatic captions for %s' % video_id
1527         if not player_config:
1528             self._downloader.report_warning(err_msg)
1529             return {}
1530         try:
1531             args = player_config['args']
1532             caption_url = args.get('ttsurl')
1533             if caption_url:
1534                 timestamp = args['timestamp']
1535                 # We get the available subtitles
1536                 list_params = compat_urllib_parse_urlencode({
1537                     'type': 'list',
1538                     'tlangs': 1,
1539                     'asrs': 1,
1540                 })
1541                 list_url = caption_url + '&' + list_params
1542                 caption_list = self._download_xml(list_url, video_id)
1543                 original_lang_node = caption_list.find('track')
1544                 if original_lang_node is None:
1545                     self._downloader.report_warning('Video doesn\'t have automatic captions')
1546                     return {}
1547                 original_lang = original_lang_node.attrib['lang_code']
1548                 caption_kind = original_lang_node.attrib.get('kind', '')
1549
1550                 sub_lang_list = {}
1551                 for lang_node in caption_list.findall('target'):
1552                     sub_lang = lang_node.attrib['lang_code']
1553                     sub_formats = []
1554                     for ext in self._SUBTITLE_FORMATS:
1555                         params = compat_urllib_parse_urlencode({
1556                             'lang': original_lang,
1557                             'tlang': sub_lang,
1558                             'fmt': ext,
1559                             'ts': timestamp,
1560                             'kind': caption_kind,
1561                         })
1562                         sub_formats.append({
1563                             'url': caption_url + '&' + params,
1564                             'ext': ext,
1565                         })
1566                     sub_lang_list[sub_lang] = sub_formats
1567                 return sub_lang_list
1568
1569             def make_captions(sub_url, sub_langs):
1570                 parsed_sub_url = compat_urllib_parse_urlparse(sub_url)
1571                 caption_qs = compat_parse_qs(parsed_sub_url.query)
1572                 captions = {}
1573                 for sub_lang in sub_langs:
1574                     sub_formats = []
1575                     for ext in self._SUBTITLE_FORMATS:
1576                         caption_qs.update({
1577                             'tlang': [sub_lang],
1578                             'fmt': [ext],
1579                         })
1580                         sub_url = compat_urlparse.urlunparse(parsed_sub_url._replace(
1581                             query=compat_urllib_parse_urlencode(caption_qs, True)))
1582                         sub_formats.append({
1583                             'url': sub_url,
1584                             'ext': ext,
1585                         })
1586                     captions[sub_lang] = sub_formats
1587                 return captions
1588
1589             # New captions format as of 22.06.2017
1590             player_response = args.get('player_response')
1591             if player_response and isinstance(player_response, compat_str):
1592                 player_response = self._parse_json(
1593                     player_response, video_id, fatal=False)
1594                 if player_response:
1595                     renderer = player_response['captions']['playerCaptionsTracklistRenderer']
1596                     caption_tracks = renderer['captionTracks']
1597                     for caption_track in caption_tracks:
1598                         if 'kind' not in caption_track:
1599                             # not an automatic transcription
1600                             continue
1601                         base_url = caption_track['baseUrl']
1602                         sub_lang_list = []
1603                         for lang in renderer['translationLanguages']:
1604                             lang_code = lang.get('languageCode')
1605                             if lang_code:
1606                                 sub_lang_list.append(lang_code)
1607                         return make_captions(base_url, sub_lang_list)
1608
1609                     self._downloader.report_warning("Couldn't find automatic captions for %s" % video_id)
1610                     return {}
1611             # Some videos don't provide ttsurl but rather caption_tracks and
1612             # caption_translation_languages (e.g. 20LmZk1hakA)
1613             # Does not used anymore as of 22.06.2017
1614             caption_tracks = args['caption_tracks']
1615             caption_translation_languages = args['caption_translation_languages']
1616             caption_url = compat_parse_qs(caption_tracks.split(',')[0])['u'][0]
1617             sub_lang_list = []
1618             for lang in caption_translation_languages.split(','):
1619                 lang_qs = compat_parse_qs(compat_urllib_parse_unquote_plus(lang))
1620                 sub_lang = lang_qs.get('lc', [None])[0]
1621                 if sub_lang:
1622                     sub_lang_list.append(sub_lang)
1623             return make_captions(caption_url, sub_lang_list)
1624         # An extractor error can be raise by the download process if there are
1625         # no automatic captions but there are subtitles
1626         except (KeyError, IndexError, ExtractorError):
1627             self._downloader.report_warning(err_msg)
1628             return {}
1629
1630     def _mark_watched(self, video_id, video_info, player_response):
1631         playback_url = url_or_none(try_get(
1632             player_response,
1633             lambda x: x['playbackTracking']['videostatsPlaybackUrl']['baseUrl']) or try_get(
1634             video_info, lambda x: x['videostats_playback_base_url'][0]))
1635         if not playback_url:
1636             return
1637         parsed_playback_url = compat_urlparse.urlparse(playback_url)
1638         qs = compat_urlparse.parse_qs(parsed_playback_url.query)
1639
1640         # cpn generation algorithm is reverse engineered from base.js.
1641         # In fact it works even with dummy cpn.
1642         CPN_ALPHABET = 'abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789-_'
1643         cpn = ''.join((CPN_ALPHABET[random.randint(0, 256) & 63] for _ in range(0, 16)))
1644
1645         qs.update({
1646             'ver': ['2'],
1647             'cpn': [cpn],
1648         })
1649         playback_url = compat_urlparse.urlunparse(
1650             parsed_playback_url._replace(query=compat_urllib_parse_urlencode(qs, True)))
1651
1652         self._download_webpage(
1653             playback_url, video_id, 'Marking watched',
1654             'Unable to mark watched', fatal=False)
1655
1656     @staticmethod
1657     def _extract_urls(webpage):
1658         # Embedded YouTube player
1659         entries = [
1660             unescapeHTML(mobj.group('url'))
1661             for mobj in re.finditer(r'''(?x)
1662             (?:
1663                 <iframe[^>]+?src=|
1664                 data-video-url=|
1665                 <embed[^>]+?src=|
1666                 embedSWF\(?:\s*|
1667                 <object[^>]+data=|
1668                 new\s+SWFObject\(
1669             )
1670             (["\'])
1671                 (?P<url>(?:https?:)?//(?:www\.)?youtube(?:-nocookie)?\.com/
1672                 (?:embed|v|p)/[0-9A-Za-z_-]{11}.*?)
1673             \1''', webpage)]
1674
1675         # lazyYT YouTube embed
1676         entries.extend(list(map(
1677             unescapeHTML,
1678             re.findall(r'class="lazyYT" data-youtube-id="([^"]+)"', webpage))))
1679
1680         # Wordpress "YouTube Video Importer" plugin
1681         matches = re.findall(r'''(?x)<div[^>]+
1682             class=(?P<q1>[\'"])[^\'"]*\byvii_single_video_player\b[^\'"]*(?P=q1)[^>]+
1683             data-video_id=(?P<q2>[\'"])([^\'"]+)(?P=q2)''', webpage)
1684         entries.extend(m[-1] for m in matches)
1685
1686         return entries
1687
1688     @staticmethod
1689     def _extract_url(webpage):
1690         urls = YoutubeIE._extract_urls(webpage)
1691         return urls[0] if urls else None
1692
1693     @classmethod
1694     def extract_id(cls, url):
1695         mobj = re.match(cls._VALID_URL, url, re.VERBOSE)
1696         if mobj is None:
1697             raise ExtractorError('Invalid URL: %s' % url)
1698         video_id = mobj.group(2)
1699         return video_id
1700
1701     def _extract_chapters_from_json(self, webpage, video_id, duration):
1702         if not webpage:
1703             return
1704         initial_data = self._parse_json(
1705             self._search_regex(
1706                 r'window\["ytInitialData"\] = (.+);\n', webpage,
1707                 'player args', default='{}'),
1708             video_id, fatal=False)
1709         if not initial_data or not isinstance(initial_data, dict):
1710             return
1711         chapters_list = try_get(
1712             initial_data,
1713             lambda x: x['playerOverlays']
1714                        ['playerOverlayRenderer']
1715                        ['decoratedPlayerBarRenderer']
1716                        ['decoratedPlayerBarRenderer']
1717                        ['playerBar']
1718                        ['chapteredPlayerBarRenderer']
1719                        ['chapters'],
1720             list)
1721         if not chapters_list:
1722             return
1723
1724         def chapter_time(chapter):
1725             return float_or_none(
1726                 try_get(
1727                     chapter,
1728                     lambda x: x['chapterRenderer']['timeRangeStartMillis'],
1729                     int),
1730                 scale=1000)
1731         chapters = []
1732         for next_num, chapter in enumerate(chapters_list, start=1):
1733             start_time = chapter_time(chapter)
1734             if start_time is None:
1735                 continue
1736             end_time = (chapter_time(chapters_list[next_num])
1737                         if next_num < len(chapters_list) else duration)
1738             if end_time is None:
1739                 continue
1740             title = try_get(
1741                 chapter, lambda x: x['chapterRenderer']['title']['simpleText'],
1742                 compat_str)
1743             chapters.append({
1744                 'start_time': start_time,
1745                 'end_time': end_time,
1746                 'title': title,
1747             })
1748         return chapters
1749
1750     @staticmethod
1751     def _extract_chapters_from_description(description, duration):
1752         if not description:
1753             return None
1754         chapter_lines = re.findall(
1755             r'(?:^|<br\s*/>)([^<]*<a[^>]+onclick=["\']yt\.www\.watch\.player\.seekTo[^>]+>(\d{1,2}:\d{1,2}(?::\d{1,2})?)</a>[^>]*)(?=$|<br\s*/>)',
1756             description)
1757         if not chapter_lines:
1758             return None
1759         chapters = []
1760         for next_num, (chapter_line, time_point) in enumerate(
1761                 chapter_lines, start=1):
1762             start_time = parse_duration(time_point)
1763             if start_time is None:
1764                 continue
1765             if start_time > duration:
1766                 break
1767             end_time = (duration if next_num == len(chapter_lines)
1768                         else parse_duration(chapter_lines[next_num][1]))
1769             if end_time is None:
1770                 continue
1771             if end_time > duration:
1772                 end_time = duration
1773             if start_time > end_time:
1774                 break
1775             chapter_title = re.sub(
1776                 r'<a[^>]+>[^<]+</a>', '', chapter_line).strip(' \t-')
1777             chapter_title = re.sub(r'\s+', ' ', chapter_title)
1778             chapters.append({
1779                 'start_time': start_time,
1780                 'end_time': end_time,
1781                 'title': chapter_title,
1782             })
1783         return chapters
1784
1785     def _extract_chapters(self, webpage, description, video_id, duration):
1786         return (self._extract_chapters_from_json(webpage, video_id, duration)
1787                 or self._extract_chapters_from_description(description, duration))
1788
1789     def _real_extract(self, url):
1790         url, smuggled_data = unsmuggle_url(url, {})
1791
1792         proto = (
1793             'http' if self._downloader.params.get('prefer_insecure', False)
1794             else 'https')
1795
1796         start_time = None
1797         end_time = None
1798         parsed_url = compat_urllib_parse_urlparse(url)
1799         for component in [parsed_url.fragment, parsed_url.query]:
1800             query = compat_parse_qs(component)
1801             if start_time is None and 't' in query:
1802                 start_time = parse_duration(query['t'][0])
1803             if start_time is None and 'start' in query:
1804                 start_time = parse_duration(query['start'][0])
1805             if end_time is None and 'end' in query:
1806                 end_time = parse_duration(query['end'][0])
1807
1808         # Extract original video URL from URL with redirection, like age verification, using next_url parameter
1809         mobj = re.search(self._NEXT_URL_RE, url)
1810         if mobj:
1811             url = proto + '://www.youtube.com/' + compat_urllib_parse_unquote(mobj.group(1)).lstrip('/')
1812         video_id = self.extract_id(url)
1813
1814         # Get video webpage
1815         url = proto + '://www.youtube.com/watch?v=%s&gl=US&hl=en&has_verified=1&bpctr=9999999999' % video_id
1816         video_webpage, urlh = self._download_webpage_handle(url, video_id)
1817
1818         qs = compat_parse_qs(compat_urllib_parse_urlparse(urlh.geturl()).query)
1819         video_id = qs.get('v', [None])[0] or video_id
1820
1821         # Attempt to extract SWF player URL
1822         mobj = re.search(r'swfConfig.*?"(https?:\\/\\/.*?watch.*?-.*?\.swf)"', video_webpage)
1823         if mobj is not None:
1824             player_url = re.sub(r'\\(.)', r'\1', mobj.group(1))
1825         else:
1826             player_url = None
1827
1828         dash_mpds = []
1829
1830         def add_dash_mpd(video_info):
1831             dash_mpd = video_info.get('dashmpd')
1832             if dash_mpd and dash_mpd[0] not in dash_mpds:
1833                 dash_mpds.append(dash_mpd[0])
1834
1835         def add_dash_mpd_pr(pl_response):
1836             dash_mpd = url_or_none(try_get(
1837                 pl_response, lambda x: x['streamingData']['dashManifestUrl'],
1838                 compat_str))
1839             if dash_mpd and dash_mpd not in dash_mpds:
1840                 dash_mpds.append(dash_mpd)
1841
1842         is_live = None
1843         view_count = None
1844
1845         def extract_view_count(v_info):
1846             return int_or_none(try_get(v_info, lambda x: x['view_count'][0]))
1847
1848         def extract_player_response(player_response, video_id):
1849             pl_response = str_or_none(player_response)
1850             if not pl_response:
1851                 return
1852             pl_response = self._parse_json(pl_response, video_id, fatal=False)
1853             if isinstance(pl_response, dict):
1854                 add_dash_mpd_pr(pl_response)
1855                 return pl_response
1856
1857         player_response = {}
1858
1859         # Get video info
1860         video_info = {}
1861         embed_webpage = None
1862         if (self._og_search_property('restrictions:age', video_webpage, default=None) == '18+'
1863                 or re.search(r'player-age-gate-content">', video_webpage) is not None):
1864             cookie_keys = self._get_cookies('https://www.youtube.com').keys()
1865             age_gate = True
1866             # We simulate the access to the video from www.youtube.com/v/{video_id}
1867             # this can be viewed without login into Youtube
1868             url = proto + '://www.youtube.com/embed/%s' % video_id
1869             embed_webpage = self._download_webpage(url, video_id, 'Downloading embed webpage')
1870             # check if video is only playable on youtube - if so it requires auth (cookies)
1871             if re.search(r'player-unavailable">', embed_webpage) is not None:
1872                 if ({'VISITOR_INFO1_LIVE', 'HSID', 'SSID', 'SID'} <= cookie_keys
1873                     or {'VISITOR_INFO1_LIVE', '__Secure-3PSID', 'LOGIN_INFO'} <= cookie_keys):
1874                         age_gate = False
1875                         # Try looking directly into the video webpage
1876                         ytplayer_config = self._get_ytplayer_config(video_id, video_webpage)
1877                         if ytplayer_config:
1878                             args = ytplayer_config['args']
1879                             if args.get('url_encoded_fmt_stream_map') or args.get('hlsvp'):
1880                                 # Convert to the same format returned by compat_parse_qs
1881                                 video_info = dict((k, [v]) for k, v in args.items())
1882                                 add_dash_mpd(video_info)
1883                             # Rental video is not rented but preview is available (e.g.
1884                             # https://www.youtube.com/watch?v=yYr8q0y5Jfg,
1885                             # https://github.com/ytdl-org/youtube-dl/issues/10532)
1886                             if not video_info and args.get('ypc_vid'):
1887                                 return self.url_result(
1888                                     args['ypc_vid'], YoutubeIE.ie_key(), video_id=args['ypc_vid'])
1889                             if args.get('livestream') == '1' or args.get('live_playback') == 1:
1890                                 is_live = True
1891                             if not player_response:
1892                                 player_response = extract_player_response(args.get('player_response'), video_id)
1893                         if not video_info or self._downloader.params.get('youtube_include_dash_manifest', True):
1894                             add_dash_mpd_pr(player_response)
1895                 else:
1896                     raise ExtractorError('Video is age restricted and only playable on Youtube. Requires cookies!', expected=True)
1897             else:
1898                 data = compat_urllib_parse_urlencode({
1899                     'video_id': video_id,
1900                     'eurl': 'https://youtube.googleapis.com/v/' + video_id,
1901                     'sts': self._search_regex(
1902                         r'"sts"\s*:\s*(\d+)', embed_webpage, 'sts', default=''),
1903                 })
1904                 video_info_url = proto + '://www.youtube.com/get_video_info?' + data
1905                 try:
1906                     video_info_webpage = self._download_webpage(
1907                         video_info_url, video_id,
1908                         note='Refetching age-gated info webpage',
1909                         errnote='unable to download video info webpage')
1910                 except ExtractorError:
1911                     video_info_webpage = None
1912                 if video_info_webpage:
1913                     video_info = compat_parse_qs(video_info_webpage)
1914                     pl_response = video_info.get('player_response', [None])[0]
1915                     player_response = extract_player_response(pl_response, video_id)
1916                     add_dash_mpd(video_info)
1917                     view_count = extract_view_count(video_info)
1918         else:
1919             age_gate = False
1920             # Try looking directly into the video webpage
1921             ytplayer_config = self._get_ytplayer_config(video_id, video_webpage)
1922             if ytplayer_config:
1923                 args = ytplayer_config['args']
1924                 if args.get('url_encoded_fmt_stream_map') or args.get('hlsvp'):
1925                     # Convert to the same format returned by compat_parse_qs
1926                     video_info = dict((k, [v]) for k, v in args.items())
1927                     add_dash_mpd(video_info)
1928                 # Rental video is not rented but preview is available (e.g.
1929                 # https://www.youtube.com/watch?v=yYr8q0y5Jfg,
1930                 # https://github.com/ytdl-org/youtube-dl/issues/10532)
1931                 if not video_info and args.get('ypc_vid'):
1932                     return self.url_result(
1933                         args['ypc_vid'], YoutubeIE.ie_key(), video_id=args['ypc_vid'])
1934                 if args.get('livestream') == '1' or args.get('live_playback') == 1:
1935                     is_live = True
1936                 if not player_response:
1937                     player_response = extract_player_response(args.get('player_response'), video_id)
1938             if not video_info or self._downloader.params.get('youtube_include_dash_manifest', True):
1939                 add_dash_mpd_pr(player_response)
1940
1941         def extract_unavailable_message():
1942             messages = []
1943             for tag, kind in (('h1', 'message'), ('div', 'submessage')):
1944                 msg = self._html_search_regex(
1945                     r'(?s)<{tag}[^>]+id=["\']unavailable-{kind}["\'][^>]*>(.+?)</{tag}>'.format(tag=tag, kind=kind),
1946                     video_webpage, 'unavailable %s' % kind, default=None)
1947                 if msg:
1948                     messages.append(msg)
1949             if messages:
1950                 return '\n'.join(messages)
1951
1952         if not video_info and not player_response:
1953             unavailable_message = extract_unavailable_message()
1954             if not unavailable_message:
1955                 unavailable_message = 'Unable to extract video data'
1956             raise ExtractorError(
1957                 'YouTube said: %s' % unavailable_message, expected=True, video_id=video_id)
1958
1959         if not isinstance(video_info, dict):
1960             video_info = {}
1961
1962         video_details = try_get(
1963             player_response, lambda x: x['videoDetails'], dict) or {}
1964
1965         microformat = try_get(
1966             player_response, lambda x: x['microformat']['playerMicroformatRenderer'], dict) or {}
1967
1968         video_title = video_info.get('title', [None])[0] or video_details.get('title')
1969         if not video_title:
1970             self._downloader.report_warning('Unable to extract video title')
1971             video_title = '_'
1972
1973         description_original = video_description = get_element_by_id("eow-description", video_webpage)
1974         if video_description:
1975
1976             def replace_url(m):
1977                 redir_url = compat_urlparse.urljoin(url, m.group(1))
1978                 parsed_redir_url = compat_urllib_parse_urlparse(redir_url)
1979                 if re.search(r'^(?:www\.)?(?:youtube(?:-nocookie)?\.com|youtu\.be)$', parsed_redir_url.netloc) and parsed_redir_url.path == '/redirect':
1980                     qs = compat_parse_qs(parsed_redir_url.query)
1981                     q = qs.get('q')
1982                     if q and q[0]:
1983                         return q[0]
1984                 return redir_url
1985
1986             description_original = video_description = re.sub(r'''(?x)
1987                 <a\s+
1988                     (?:[a-zA-Z-]+="[^"]*"\s+)*?
1989                     (?:title|href)="([^"]+)"\s+
1990                     (?:[a-zA-Z-]+="[^"]*"\s+)*?
1991                     class="[^"]*"[^>]*>
1992                 [^<]+\.{3}\s*
1993                 </a>
1994             ''', replace_url, video_description)
1995             video_description = clean_html(video_description)
1996         else:
1997             video_description = video_details.get('shortDescription')
1998             if video_description is None:
1999                 video_description = self._html_search_meta('description', video_webpage)
2000
2001         if not smuggled_data.get('force_singlefeed', False):
2002             if not self._downloader.params.get('noplaylist'):
2003                 multifeed_metadata_list = try_get(
2004                     player_response,
2005                     lambda x: x['multicamera']['playerLegacyMulticameraRenderer']['metadataList'],
2006                     compat_str) or try_get(
2007                     video_info, lambda x: x['multifeed_metadata_list'][0], compat_str)
2008                 if multifeed_metadata_list:
2009                     entries = []
2010                     feed_ids = []
2011                     for feed in multifeed_metadata_list.split(','):
2012                         # Unquote should take place before split on comma (,) since textual
2013                         # fields may contain comma as well (see
2014                         # https://github.com/ytdl-org/youtube-dl/issues/8536)
2015                         feed_data = compat_parse_qs(compat_urllib_parse_unquote_plus(feed))
2016
2017                         def feed_entry(name):
2018                             return try_get(feed_data, lambda x: x[name][0], compat_str)
2019
2020                         feed_id = feed_entry('id')
2021                         if not feed_id:
2022                             continue
2023                         feed_title = feed_entry('title')
2024                         title = video_title
2025                         if feed_title:
2026                             title += ' (%s)' % feed_title
2027                         entries.append({
2028                             '_type': 'url_transparent',
2029                             'ie_key': 'Youtube',
2030                             'url': smuggle_url(
2031                                 '%s://www.youtube.com/watch?v=%s' % (proto, feed_data['id'][0]),
2032                                 {'force_singlefeed': True}),
2033                             'title': title,
2034                         })
2035                         feed_ids.append(feed_id)
2036                     self.to_screen(
2037                         'Downloading multifeed video (%s) - add --no-playlist to just download video %s'
2038                         % (', '.join(feed_ids), video_id))
2039                     return self.playlist_result(entries, video_id, video_title, video_description)
2040             else:
2041                 self.to_screen('Downloading just video %s because of --no-playlist' % video_id)
2042
2043         if view_count is None:
2044             view_count = extract_view_count(video_info)
2045         if view_count is None and video_details:
2046             view_count = int_or_none(video_details.get('viewCount'))
2047         if view_count is None and microformat:
2048             view_count = int_or_none(microformat.get('viewCount'))
2049
2050         if is_live is None:
2051             is_live = bool_or_none(video_details.get('isLive'))
2052
2053         has_live_chat_replay = False
2054         if not is_live:
2055             yt_initial_data = self._get_yt_initial_data(video_id, video_webpage)
2056             try:
2057                 yt_initial_data['contents']['twoColumnWatchNextResults']['conversationBar']['liveChatRenderer']['continuations'][0]['reloadContinuationData']['continuation']
2058                 has_live_chat_replay = True
2059             except (KeyError, IndexError, TypeError):
2060                 pass
2061
2062         # Check for "rental" videos
2063         if 'ypc_video_rental_bar_text' in video_info and 'author' not in video_info:
2064             raise ExtractorError('"rental" videos not supported. See https://github.com/ytdl-org/youtube-dl/issues/359 for more information.', expected=True)
2065
2066         def _extract_filesize(media_url):
2067             return int_or_none(self._search_regex(
2068                 r'\bclen[=/](\d+)', media_url, 'filesize', default=None))
2069
2070         streaming_formats = try_get(player_response, lambda x: x['streamingData']['formats'], list) or []
2071         streaming_formats.extend(try_get(player_response, lambda x: x['streamingData']['adaptiveFormats'], list) or [])
2072
2073         if 'conn' in video_info and video_info['conn'][0].startswith('rtmp'):
2074             self.report_rtmp_download()
2075             formats = [{
2076                 'format_id': '_rtmp',
2077                 'protocol': 'rtmp',
2078                 'url': video_info['conn'][0],
2079                 'player_url': player_url,
2080             }]
2081         elif not is_live and (streaming_formats or len(video_info.get('url_encoded_fmt_stream_map', [''])[0]) >= 1 or len(video_info.get('adaptive_fmts', [''])[0]) >= 1):
2082             encoded_url_map = video_info.get('url_encoded_fmt_stream_map', [''])[0] + ',' + video_info.get('adaptive_fmts', [''])[0]
2083             if 'rtmpe%3Dyes' in encoded_url_map:
2084                 raise ExtractorError('rtmpe downloads are not supported, see https://github.com/ytdl-org/youtube-dl/issues/343 for more information.', expected=True)
2085             formats = []
2086             formats_spec = {}
2087             fmt_list = video_info.get('fmt_list', [''])[0]
2088             if fmt_list:
2089                 for fmt in fmt_list.split(','):
2090                     spec = fmt.split('/')
2091                     if len(spec) > 1:
2092                         width_height = spec[1].split('x')
2093                         if len(width_height) == 2:
2094                             formats_spec[spec[0]] = {
2095                                 'resolution': spec[1],
2096                                 'width': int_or_none(width_height[0]),
2097                                 'height': int_or_none(width_height[1]),
2098                             }
2099             for fmt in streaming_formats:
2100                 itag = str_or_none(fmt.get('itag'))
2101                 if not itag:
2102                     continue
2103                 quality = fmt.get('quality')
2104                 quality_label = fmt.get('qualityLabel') or quality
2105                 formats_spec[itag] = {
2106                     'asr': int_or_none(fmt.get('audioSampleRate')),
2107                     'filesize': int_or_none(fmt.get('contentLength')),
2108                     'format_note': quality_label,
2109                     'fps': int_or_none(fmt.get('fps')),
2110                     'height': int_or_none(fmt.get('height')),
2111                     # bitrate for itag 43 is always 2147483647
2112                     'tbr': float_or_none(fmt.get('averageBitrate') or fmt.get('bitrate'), 1000) if itag != '43' else None,
2113                     'width': int_or_none(fmt.get('width')),
2114                 }
2115
2116             for fmt in streaming_formats:
2117                 if fmt.get('drmFamilies') or fmt.get('drm_families'):
2118                     continue
2119                 url = url_or_none(fmt.get('url'))
2120
2121                 if not url:
2122                     cipher = fmt.get('cipher') or fmt.get('signatureCipher')
2123                     if not cipher:
2124                         continue
2125                     url_data = compat_parse_qs(cipher)
2126                     url = url_or_none(try_get(url_data, lambda x: x['url'][0], compat_str))
2127                     if not url:
2128                         continue
2129                 else:
2130                     cipher = None
2131                     url_data = compat_parse_qs(compat_urllib_parse_urlparse(url).query)
2132
2133                 stream_type = int_or_none(try_get(url_data, lambda x: x['stream_type'][0]))
2134                 # Unsupported FORMAT_STREAM_TYPE_OTF
2135                 if stream_type == 3:
2136                     continue
2137
2138                 format_id = fmt.get('itag') or url_data['itag'][0]
2139                 if not format_id:
2140                     continue
2141                 format_id = compat_str(format_id)
2142
2143                 if cipher:
2144                     if 's' in url_data or self._downloader.params.get('youtube_include_dash_manifest', True):
2145                         ASSETS_RE = r'"assets":.+?"js":\s*("[^"]+")'
2146                         jsplayer_url_json = self._search_regex(
2147                             ASSETS_RE,
2148                             embed_webpage if age_gate else video_webpage,
2149                             'JS player URL (1)', default=None)
2150                         if not jsplayer_url_json and not age_gate:
2151                             # We need the embed website after all
2152                             if embed_webpage is None:
2153                                 embed_url = proto + '://www.youtube.com/embed/%s' % video_id
2154                                 embed_webpage = self._download_webpage(
2155                                     embed_url, video_id, 'Downloading embed webpage')
2156                             jsplayer_url_json = self._search_regex(
2157                                 ASSETS_RE, embed_webpage, 'JS player URL')
2158
2159                         player_url = json.loads(jsplayer_url_json)
2160                         if player_url is None:
2161                             player_url_json = self._search_regex(
2162                                 r'ytplayer\.config.*?"url"\s*:\s*("[^"]+")',
2163                                 video_webpage, 'age gate player URL')
2164                             player_url = json.loads(player_url_json)
2165
2166                     if 'sig' in url_data:
2167                         url += '&signature=' + url_data['sig'][0]
2168                     elif 's' in url_data:
2169                         encrypted_sig = url_data['s'][0]
2170
2171                         if self._downloader.params.get('verbose'):
2172                             if player_url is None:
2173                                 player_desc = 'unknown'
2174                             else:
2175                                 player_type, player_version = self._extract_player_info(player_url)
2176                                 player_desc = '%s player %s' % ('flash' if player_type == 'swf' else 'html5', player_version)
2177                             parts_sizes = self._signature_cache_id(encrypted_sig)
2178                             self.to_screen('{%s} signature length %s, %s' %
2179                                            (format_id, parts_sizes, player_desc))
2180
2181                         signature = self._decrypt_signature(
2182                             encrypted_sig, video_id, player_url, age_gate)
2183                         sp = try_get(url_data, lambda x: x['sp'][0], compat_str) or 'signature'
2184                         url += '&%s=%s' % (sp, signature)
2185                 if 'ratebypass' not in url:
2186                     url += '&ratebypass=yes'
2187
2188                 dct = {
2189                     'format_id': format_id,
2190                     'url': url,
2191                     'player_url': player_url,
2192                 }
2193                 if format_id in self._formats:
2194                     dct.update(self._formats[format_id])
2195                 if format_id in formats_spec:
2196                     dct.update(formats_spec[format_id])
2197
2198                 # Some itags are not included in DASH manifest thus corresponding formats will
2199                 # lack metadata (see https://github.com/ytdl-org/youtube-dl/pull/5993).
2200                 # Trying to extract metadata from url_encoded_fmt_stream_map entry.
2201                 mobj = re.search(r'^(?P<width>\d+)[xX](?P<height>\d+)$', url_data.get('size', [''])[0])
2202                 width, height = (int(mobj.group('width')), int(mobj.group('height'))) if mobj else (None, None)
2203
2204                 if width is None:
2205                     width = int_or_none(fmt.get('width'))
2206                 if height is None:
2207                     height = int_or_none(fmt.get('height'))
2208
2209                 filesize = int_or_none(url_data.get(
2210                     'clen', [None])[0]) or _extract_filesize(url)
2211
2212                 quality = url_data.get('quality', [None])[0] or fmt.get('quality')
2213                 quality_label = url_data.get('quality_label', [None])[0] or fmt.get('qualityLabel')
2214
2215                 tbr = (float_or_none(url_data.get('bitrate', [None])[0], 1000)
2216                        or float_or_none(fmt.get('bitrate'), 1000)) if format_id != '43' else None
2217                 fps = int_or_none(url_data.get('fps', [None])[0]) or int_or_none(fmt.get('fps'))
2218
2219                 more_fields = {
2220                     'filesize': filesize,
2221                     'tbr': tbr,
2222                     'width': width,
2223                     'height': height,
2224                     'fps': fps,
2225                     'format_note': quality_label or quality,
2226                 }
2227                 for key, value in more_fields.items():
2228                     if value:
2229                         dct[key] = value
2230                 type_ = url_data.get('type', [None])[0] or fmt.get('mimeType')
2231                 if type_:
2232                     type_split = type_.split(';')
2233                     kind_ext = type_split[0].split('/')
2234                     if len(kind_ext) == 2:
2235                         kind, _ = kind_ext
2236                         dct['ext'] = mimetype2ext(type_split[0])
2237                         if kind in ('audio', 'video'):
2238                             codecs = None
2239                             for mobj in re.finditer(
2240                                     r'(?P<key>[a-zA-Z_-]+)=(?P<quote>["\']?)(?P<val>.+?)(?P=quote)(?:;|$)', type_):
2241                                 if mobj.group('key') == 'codecs':
2242                                     codecs = mobj.group('val')
2243                                     break
2244                             if codecs:
2245                                 dct.update(parse_codecs(codecs))
2246                 if dct.get('acodec') == 'none' or dct.get('vcodec') == 'none':
2247                     dct['downloader_options'] = {
2248                         # Youtube throttles chunks >~10M
2249                         'http_chunk_size': 10485760,
2250                     }
2251                 formats.append(dct)
2252         else:
2253             manifest_url = (
2254                 url_or_none(try_get(
2255                     player_response,
2256                     lambda x: x['streamingData']['hlsManifestUrl'],
2257                     compat_str))
2258                 or url_or_none(try_get(
2259                     video_info, lambda x: x['hlsvp'][0], compat_str)))
2260             if manifest_url:
2261                 formats = []
2262                 m3u8_formats = self._extract_m3u8_formats(
2263                     manifest_url, video_id, 'mp4', fatal=False)
2264                 for a_format in m3u8_formats:
2265                     itag = self._search_regex(
2266                         r'/itag/(\d+)/', a_format['url'], 'itag', default=None)
2267                     if itag:
2268                         a_format['format_id'] = itag
2269                         if itag in self._formats:
2270                             dct = self._formats[itag].copy()
2271                             dct.update(a_format)
2272                             a_format = dct
2273                     a_format['player_url'] = player_url
2274                     # Accept-Encoding header causes failures in live streams on Youtube and Youtube Gaming
2275                     a_format.setdefault('http_headers', {})['Youtubedl-no-compression'] = 'True'
2276                     if self._downloader.params.get('youtube_include_hls_manifest', True):
2277                         formats.append(a_format)
2278             else:
2279                 error_message = extract_unavailable_message()
2280                 if not error_message:
2281                     error_message = clean_html(try_get(
2282                         player_response, lambda x: x['playabilityStatus']['reason'],
2283                         compat_str))
2284                 if not error_message:
2285                     error_message = clean_html(
2286                         try_get(video_info, lambda x: x['reason'][0], compat_str))
2287                 if error_message:
2288                     raise ExtractorError(error_message, expected=True)
2289                 raise ExtractorError('no conn, hlsvp, hlsManifestUrl or url_encoded_fmt_stream_map information found in video info')
2290
2291         # uploader
2292         video_uploader = try_get(
2293             video_info, lambda x: x['author'][0],
2294             compat_str) or str_or_none(video_details.get('author'))
2295         if video_uploader:
2296             video_uploader = compat_urllib_parse_unquote_plus(video_uploader)
2297         else:
2298             self._downloader.report_warning('unable to extract uploader name')
2299
2300         # uploader_id
2301         video_uploader_id = None
2302         video_uploader_url = None
2303         mobj = re.search(
2304             r'<link itemprop="url" href="(?P<uploader_url>https?://www\.youtube\.com/(?:user|channel)/(?P<uploader_id>[^"]+))">',
2305             video_webpage)
2306         if mobj is not None:
2307             video_uploader_id = mobj.group('uploader_id')
2308             video_uploader_url = mobj.group('uploader_url')
2309         else:
2310             owner_profile_url = url_or_none(microformat.get('ownerProfileUrl'))
2311             if owner_profile_url:
2312                 video_uploader_id = self._search_regex(
2313                     r'(?:user|channel)/([^/]+)', owner_profile_url, 'uploader id',
2314                     default=None)
2315                 video_uploader_url = owner_profile_url
2316
2317         channel_id = (
2318             str_or_none(video_details.get('channelId'))
2319             or self._html_search_meta(
2320                 'channelId', video_webpage, 'channel id', default=None)
2321             or self._search_regex(
2322                 r'data-channel-external-id=(["\'])(?P<id>(?:(?!\1).)+)\1',
2323                 video_webpage, 'channel id', default=None, group='id'))
2324         channel_url = 'http://www.youtube.com/channel/%s' % channel_id if channel_id else None
2325
2326         thumbnails = []
2327         thumbnails_list = try_get(
2328             video_details, lambda x: x['thumbnail']['thumbnails'], list) or []
2329         for t in thumbnails_list:
2330             if not isinstance(t, dict):
2331                 continue
2332             thumbnail_url = url_or_none(t.get('url'))
2333             if not thumbnail_url:
2334                 continue
2335             thumbnails.append({
2336                 'url': thumbnail_url,
2337                 'width': int_or_none(t.get('width')),
2338                 'height': int_or_none(t.get('height')),
2339             })
2340
2341         if not thumbnails:
2342             video_thumbnail = None
2343             # We try first to get a high quality image:
2344             m_thumb = re.search(r'<span itemprop="thumbnail".*?href="(.*?)">',
2345                                 video_webpage, re.DOTALL)
2346             if m_thumb is not None:
2347                 video_thumbnail = m_thumb.group(1)
2348             thumbnail_url = try_get(video_info, lambda x: x['thumbnail_url'][0], compat_str)
2349             if thumbnail_url:
2350                 video_thumbnail = compat_urllib_parse_unquote_plus(thumbnail_url)
2351             if video_thumbnail:
2352                 thumbnails.append({'url': video_thumbnail})
2353
2354         # upload date
2355         upload_date = self._html_search_meta(
2356             'datePublished', video_webpage, 'upload date', default=None)
2357         if not upload_date:
2358             upload_date = self._search_regex(
2359                 [r'(?s)id="eow-date.*?>(.*?)</span>',
2360                  r'(?:id="watch-uploader-info".*?>.*?|["\']simpleText["\']\s*:\s*["\'])(?:Published|Uploaded|Streamed live|Started) on (.+?)[<"\']'],
2361                 video_webpage, 'upload date', default=None)
2362         if not upload_date:
2363             upload_date = microformat.get('publishDate') or microformat.get('uploadDate')
2364         upload_date = unified_strdate(upload_date)
2365
2366         video_license = self._html_search_regex(
2367             r'<h4[^>]+class="title"[^>]*>\s*License\s*</h4>\s*<ul[^>]*>\s*<li>(.+?)</li',
2368             video_webpage, 'license', default=None)
2369
2370         m_music = re.search(
2371             r'''(?x)
2372                 <h4[^>]+class="title"[^>]*>\s*Music\s*</h4>\s*
2373                 <ul[^>]*>\s*
2374                 <li>(?P<title>.+?)
2375                 by (?P<creator>.+?)
2376                 (?:
2377                     \(.+?\)|
2378                     <a[^>]*
2379                         (?:
2380                             \bhref=["\']/red[^>]*>|             # drop possible
2381                             >\s*Listen ad-free with YouTube Red # YouTube Red ad
2382                         )
2383                     .*?
2384                 )?</li
2385             ''',
2386             video_webpage)
2387         if m_music:
2388             video_alt_title = remove_quotes(unescapeHTML(m_music.group('title')))
2389             video_creator = clean_html(m_music.group('creator'))
2390         else:
2391             video_alt_title = video_creator = None
2392
2393         def extract_meta(field):
2394             return self._html_search_regex(
2395                 r'<h4[^>]+class="title"[^>]*>\s*%s\s*</h4>\s*<ul[^>]*>\s*<li>(.+?)</li>\s*' % field,
2396                 video_webpage, field, default=None)
2397
2398         track = extract_meta('Song')
2399         artist = extract_meta('Artist')
2400         album = extract_meta('Album')
2401
2402         # Youtube Music Auto-generated description
2403         release_date = release_year = None
2404         if video_description:
2405             mobj = re.search(r'(?s)Provided to YouTube by [^\n]+\n+(?P<track>[^·]+)·(?P<artist>[^\n]+)\n+(?P<album>[^\n]+)(?:.+?℗\s*(?P<release_year>\d{4})(?!\d))?(?:.+?Released on\s*:\s*(?P<release_date>\d{4}-\d{2}-\d{2}))?(.+?\nArtist\s*:\s*(?P<clean_artist>[^\n]+))?', video_description)
2406             if mobj:
2407                 if not track:
2408                     track = mobj.group('track').strip()
2409                 if not artist:
2410                     artist = mobj.group('clean_artist') or ', '.join(a.strip() for a in mobj.group('artist').split('·'))
2411                 if not album:
2412                     album = mobj.group('album'.strip())
2413                 release_year = mobj.group('release_year')
2414                 release_date = mobj.group('release_date')
2415                 if release_date:
2416                     release_date = release_date.replace('-', '')
2417                     if not release_year:
2418                         release_year = int(release_date[:4])
2419                 if release_year:
2420                     release_year = int(release_year)
2421
2422         m_episode = re.search(
2423             r'<div[^>]+id="watch7-headline"[^>]*>\s*<span[^>]*>.*?>(?P<series>[^<]+)</a></b>\s*S(?P<season>\d+)\s*•\s*E(?P<episode>\d+)</span>',
2424             video_webpage)
2425         if m_episode:
2426             series = unescapeHTML(m_episode.group('series'))
2427             season_number = int(m_episode.group('season'))
2428             episode_number = int(m_episode.group('episode'))
2429         else:
2430             series = season_number = episode_number = None
2431
2432         m_cat_container = self._search_regex(
2433             r'(?s)<h4[^>]*>\s*Category\s*</h4>\s*<ul[^>]*>(.*?)</ul>',
2434             video_webpage, 'categories', default=None)
2435         category = None
2436         if m_cat_container:
2437             category = self._html_search_regex(
2438                 r'(?s)<a[^<]+>(.*?)</a>', m_cat_container, 'category',
2439                 default=None)
2440         if not category:
2441             category = try_get(
2442                 microformat, lambda x: x['category'], compat_str)
2443         video_categories = None if category is None else [category]
2444
2445         video_tags = [
2446             unescapeHTML(m.group('content'))
2447             for m in re.finditer(self._meta_regex('og:video:tag'), video_webpage)]
2448         if not video_tags:
2449             video_tags = try_get(video_details, lambda x: x['keywords'], list)
2450
2451         def _extract_count(count_name):
2452             return str_to_int(self._search_regex(
2453                 r'-%s-button[^>]+><span[^>]+class="yt-uix-button-content"[^>]*>([\d,]+)</span>'
2454                 % re.escape(count_name),
2455                 video_webpage, count_name, default=None))
2456
2457         like_count = _extract_count('like')
2458         dislike_count = _extract_count('dislike')
2459
2460         if view_count is None:
2461             view_count = str_to_int(self._search_regex(
2462                 r'<[^>]+class=["\']watch-view-count[^>]+>\s*([\d,\s]+)', video_webpage,
2463                 'view count', default=None))
2464
2465         average_rating = (
2466             float_or_none(video_details.get('averageRating'))
2467             or try_get(video_info, lambda x: float_or_none(x['avg_rating'][0])))
2468
2469         # subtitles
2470         video_subtitles = self.extract_subtitles(
2471             video_id, video_webpage, has_live_chat_replay)
2472         automatic_captions = self.extract_automatic_captions(video_id, video_webpage)
2473
2474         video_duration = try_get(
2475             video_info, lambda x: int_or_none(x['length_seconds'][0]))
2476         if not video_duration:
2477             video_duration = int_or_none(video_details.get('lengthSeconds'))
2478         if not video_duration:
2479             video_duration = parse_duration(self._html_search_meta(
2480                 'duration', video_webpage, 'video duration'))
2481
2482         # annotations
2483         video_annotations = None
2484         if self._downloader.params.get('writeannotations', False):
2485             xsrf_token = self._search_regex(
2486                 r'([\'"])XSRF_TOKEN\1\s*:\s*([\'"])(?P<xsrf_token>[A-Za-z0-9+/=]+)\2',
2487                 video_webpage, 'xsrf token', group='xsrf_token', fatal=False)
2488             invideo_url = try_get(
2489                 player_response, lambda x: x['annotations'][0]['playerAnnotationsUrlsRenderer']['invideoUrl'], compat_str)
2490             if xsrf_token and invideo_url:
2491                 xsrf_field_name = self._search_regex(
2492                     r'([\'"])XSRF_FIELD_NAME\1\s*:\s*([\'"])(?P<xsrf_field_name>\w+)\2',
2493                     video_webpage, 'xsrf field name',
2494                     group='xsrf_field_name', default='session_token')
2495                 video_annotations = self._download_webpage(
2496                     self._proto_relative_url(invideo_url),
2497                     video_id, note='Downloading annotations',
2498                     errnote='Unable to download video annotations', fatal=False,
2499                     data=urlencode_postdata({xsrf_field_name: xsrf_token}))
2500
2501         chapters = self._extract_chapters(video_webpage, description_original, video_id, video_duration)
2502
2503         # Look for the DASH manifest
2504         if self._downloader.params.get('youtube_include_dash_manifest', True):
2505             dash_mpd_fatal = True
2506             for mpd_url in dash_mpds:
2507                 dash_formats = {}
2508                 try:
2509                     def decrypt_sig(mobj):
2510                         s = mobj.group(1)
2511                         dec_s = self._decrypt_signature(s, video_id, player_url, age_gate)
2512                         return '/signature/%s' % dec_s
2513
2514                     mpd_url = re.sub(r'/s/([a-fA-F0-9\.]+)', decrypt_sig, mpd_url)
2515
2516                     for df in self._extract_mpd_formats(
2517                             mpd_url, video_id, fatal=dash_mpd_fatal,
2518                             formats_dict=self._formats):
2519                         if not df.get('filesize'):
2520                             df['filesize'] = _extract_filesize(df['url'])
2521                         # Do not overwrite DASH format found in some previous DASH manifest
2522                         if df['format_id'] not in dash_formats:
2523                             dash_formats[df['format_id']] = df
2524                         # Additional DASH manifests may end up in HTTP Error 403 therefore
2525                         # allow them to fail without bug report message if we already have
2526                         # some DASH manifest succeeded. This is temporary workaround to reduce
2527                         # burst of bug reports until we figure out the reason and whether it
2528                         # can be fixed at all.
2529                         dash_mpd_fatal = False
2530                 except (ExtractorError, KeyError) as e:
2531                     self.report_warning(
2532                         'Skipping DASH manifest: %r' % e, video_id)
2533                 if dash_formats:
2534                     # Remove the formats we found through non-DASH, they
2535                     # contain less info and it can be wrong, because we use
2536                     # fixed values (for example the resolution). See
2537                     # https://github.com/ytdl-org/youtube-dl/issues/5774 for an
2538                     # example.
2539                     formats = [f for f in formats if f['format_id'] not in dash_formats.keys()]
2540                     formats.extend(dash_formats.values())
2541
2542         # Check for malformed aspect ratio
2543         stretched_m = re.search(
2544             r'<meta\s+property="og:video:tag".*?content="yt:stretch=(?P<w>[0-9]+):(?P<h>[0-9]+)">',
2545             video_webpage)
2546         if stretched_m:
2547             w = float(stretched_m.group('w'))
2548             h = float(stretched_m.group('h'))
2549             # yt:stretch may hold invalid ratio data (e.g. for Q39EVAstoRM ratio is 17:0).
2550             # We will only process correct ratios.
2551             if w > 0 and h > 0:
2552                 ratio = w / h
2553                 for f in formats:
2554                     if f.get('vcodec') != 'none':
2555                         f['stretched_ratio'] = ratio
2556
2557         if not formats:
2558             if 'reason' in video_info:
2559                 if 'The uploader has not made this video available in your country.' in video_info['reason']:
2560                     regions_allowed = self._html_search_meta(
2561                         'regionsAllowed', video_webpage, default=None)
2562                     countries = regions_allowed.split(',') if regions_allowed else None
2563                     self.raise_geo_restricted(
2564                         msg=video_info['reason'][0], countries=countries)
2565                 reason = video_info['reason'][0]
2566                 if 'Invalid parameters' in reason:
2567                     unavailable_message = extract_unavailable_message()
2568                     if unavailable_message:
2569                         reason = unavailable_message
2570                 raise ExtractorError(
2571                     'YouTube said: %s' % reason,
2572                     expected=True, video_id=video_id)
2573             if video_info.get('license_info') or try_get(player_response, lambda x: x['streamingData']['licenseInfos']):
2574                 raise ExtractorError('This video is DRM protected.', expected=True)
2575
2576         self._sort_formats(formats)
2577
2578         self.mark_watched(video_id, video_info, player_response)
2579
2580         return {
2581             'id': video_id,
2582             'uploader': video_uploader,
2583             'uploader_id': video_uploader_id,
2584             'uploader_url': video_uploader_url,
2585             'channel_id': channel_id,
2586             'channel_url': channel_url,
2587             'upload_date': upload_date,
2588             'license': video_license,
2589             'creator': video_creator or artist,
2590             'title': video_title,
2591             'alt_title': video_alt_title or track,
2592             'thumbnails': thumbnails,
2593             'description': video_description,
2594             'categories': video_categories,
2595             'tags': video_tags,
2596             'subtitles': video_subtitles,
2597             'automatic_captions': automatic_captions,
2598             'duration': video_duration,
2599             'age_limit': 18 if age_gate else 0,
2600             'annotations': video_annotations,
2601             'chapters': chapters,
2602             'webpage_url': proto + '://www.youtube.com/watch?v=%s' % video_id,
2603             'view_count': view_count,
2604             'like_count': like_count,
2605             'dislike_count': dislike_count,
2606             'average_rating': average_rating,
2607             'formats': formats,
2608             'is_live': is_live,
2609             'start_time': start_time,
2610             'end_time': end_time,
2611             'series': series,
2612             'season_number': season_number,
2613             'episode_number': episode_number,
2614             'track': track,
2615             'artist': artist,
2616             'album': album,
2617             'release_date': release_date,
2618             'release_year': release_year,
2619         }
2620
2621
2622 class YoutubePlaylistIE(YoutubePlaylistBaseInfoExtractor):
2623     IE_DESC = 'YouTube.com playlists'
2624     _VALID_URL = r"""(?x)(?:
2625                         (?:https?://)?
2626                         (?:\w+\.)?
2627                         (?:
2628                             (?:
2629                                 youtube(?:kids)?\.com|
2630                                 invidio\.us
2631                             )
2632                             /
2633                             (?:
2634                                (?:course|view_play_list|my_playlists|artist|playlist|watch|embed/(?:videoseries|[0-9A-Za-z_-]{11}))
2635                                \? (?:.*?[&;])*? (?:p|a|list)=
2636                             |  p/
2637                             )|
2638                             youtu\.be/[0-9A-Za-z_-]{11}\?.*?\blist=
2639                         )
2640                         (
2641                             (?:PL|LL|EC|UU|FL|RD|UL|TL|PU|OLAK5uy_)?[0-9A-Za-z-_]{10,}
2642                             # Top tracks, they can also include dots
2643                             |(?:MC)[\w\.]*
2644                         )
2645                         .*
2646                      |
2647                         (%(playlist_id)s)
2648                      )""" % {'playlist_id': YoutubeBaseInfoExtractor._PLAYLIST_ID_RE}
2649     _TEMPLATE_URL = 'https://www.youtube.com/playlist?list=%s'
2650     _VIDEO_RE_TPL = r'href="\s*/watch\?v=%s(?:&amp;(?:[^"]*?index=(?P<index>\d+))?(?:[^>]+>(?P<title>[^<]+))?)?'
2651     _VIDEO_RE = _VIDEO_RE_TPL % r'(?P<id>[0-9A-Za-z_-]{11})'
2652     IE_NAME = 'youtube:playlist'
2653     _TESTS = [{
2654         'url': 'https://www.youtube.com/playlist?list=PL4lCao7KL_QFVb7Iudeipvc2BCavECqzc',
2655         'info_dict': {
2656             'uploader_id': 'UCmlqkdCBesrv2Lak1mF_MxA',
2657             'uploader': 'Sergey M.',
2658             'id': 'PL4lCao7KL_QFVb7Iudeipvc2BCavECqzc',
2659             'title': 'youtube-dl public playlist',
2660         },
2661         'playlist_count': 1,
2662     }, {
2663         'url': 'https://www.youtube.com/playlist?list=PL4lCao7KL_QFodcLWhDpGCYnngnHtQ-Xf',
2664         'info_dict': {
2665             'uploader_id': 'UCmlqkdCBesrv2Lak1mF_MxA',
2666             'uploader': 'Sergey M.',
2667             'id': 'PL4lCao7KL_QFodcLWhDpGCYnngnHtQ-Xf',
2668             'title': 'youtube-dl empty playlist',
2669         },
2670         'playlist_count': 0,
2671     }, {
2672         'note': 'Playlist with deleted videos (#651). As a bonus, the video #51 is also twice in this list.',
2673         'url': 'https://www.youtube.com/playlist?list=PLwP_SiAcdui0KVebT0mU9Apz359a4ubsC',
2674         'info_dict': {
2675             'title': '29C3: Not my department',
2676             'id': 'PLwP_SiAcdui0KVebT0mU9Apz359a4ubsC',
2677             'uploader': 'Christiaan008',
2678             'uploader_id': 'ChRiStIaAn008',
2679         },
2680         'playlist_count': 96,
2681     }, {
2682         'note': 'issue #673',
2683         'url': 'PLBB231211A4F62143',
2684         'info_dict': {
2685             'title': '[OLD]Team Fortress 2 (Class-based LP)',
2686             'id': 'PLBB231211A4F62143',
2687             'uploader': 'Wickydoo',
2688             'uploader_id': 'Wickydoo',
2689         },
2690         'playlist_mincount': 26,
2691     }, {
2692         'note': 'Large playlist',
2693         'url': 'https://www.youtube.com/playlist?list=UUBABnxM4Ar9ten8Mdjj1j0Q',
2694         'info_dict': {
2695             'title': 'Uploads from Cauchemar',
2696             'id': 'UUBABnxM4Ar9ten8Mdjj1j0Q',
2697             'uploader': 'Cauchemar',
2698             'uploader_id': 'Cauchemar89',
2699         },
2700         'playlist_mincount': 799,
2701     }, {
2702         'url': 'PLtPgu7CB4gbY9oDN3drwC3cMbJggS7dKl',
2703         'info_dict': {
2704             'title': 'YDL_safe_search',
2705             'id': 'PLtPgu7CB4gbY9oDN3drwC3cMbJggS7dKl',
2706         },
2707         'playlist_count': 2,
2708         'skip': 'This playlist is private',
2709     }, {
2710         'note': 'embedded',
2711         'url': 'https://www.youtube.com/embed/videoseries?list=PL6IaIsEjSbf96XFRuNccS_RuEXwNdsoEu',
2712         'playlist_count': 4,
2713         'info_dict': {
2714             'title': 'JODA15',
2715             'id': 'PL6IaIsEjSbf96XFRuNccS_RuEXwNdsoEu',
2716             'uploader': 'milan',
2717             'uploader_id': 'UCEI1-PVPcYXjB73Hfelbmaw',
2718         }
2719     }, {
2720         'url': 'http://www.youtube.com/embed/_xDOZElKyNU?list=PLsyOSbh5bs16vubvKePAQ1x3PhKavfBIl',
2721         'playlist_mincount': 485,
2722         'info_dict': {
2723             'title': '2018 Chinese New Singles (11/6 updated)',
2724             'id': 'PLsyOSbh5bs16vubvKePAQ1x3PhKavfBIl',
2725             'uploader': 'LBK',
2726             'uploader_id': 'sdragonfang',
2727         }
2728     }, {
2729         'note': 'Embedded SWF player',
2730         'url': 'https://www.youtube.com/p/YN5VISEtHet5D4NEvfTd0zcgFk84NqFZ?hl=en_US&fs=1&rel=0',
2731         'playlist_count': 4,
2732         'info_dict': {
2733             'title': 'JODA7',
2734             'id': 'YN5VISEtHet5D4NEvfTd0zcgFk84NqFZ',
2735         },
2736         'skip': 'This playlist does not exist',
2737     }, {
2738         'note': 'Buggy playlist: the webpage has a "Load more" button but it doesn\'t have more videos',
2739         'url': 'https://www.youtube.com/playlist?list=UUXw-G3eDE9trcvY2sBMM_aA',
2740         'info_dict': {
2741             'title': 'Uploads from Interstellar Movie',
2742             'id': 'UUXw-G3eDE9trcvY2sBMM_aA',
2743             'uploader': 'Interstellar Movie',
2744             'uploader_id': 'InterstellarMovie1',
2745         },
2746         'playlist_mincount': 21,
2747     }, {
2748         # Playlist URL that does not actually serve a playlist
2749         'url': 'https://www.youtube.com/watch?v=FqZTN594JQw&list=PLMYEtVRpaqY00V9W81Cwmzp6N6vZqfUKD4',
2750         'info_dict': {
2751             'id': 'FqZTN594JQw',
2752             'ext': 'webm',
2753             'title': "Smiley's People 01 detective, Adventure Series, Action",
2754             'uploader': 'STREEM',
2755             'uploader_id': 'UCyPhqAZgwYWZfxElWVbVJng',
2756             'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UCyPhqAZgwYWZfxElWVbVJng',
2757             'upload_date': '20150526',
2758             'license': 'Standard YouTube License',
2759             'description': 'md5:507cdcb5a49ac0da37a920ece610be80',
2760             'categories': ['People & Blogs'],
2761             'tags': list,
2762             'view_count': int,
2763             'like_count': int,
2764             'dislike_count': int,
2765         },
2766         'params': {
2767             'skip_download': True,
2768         },
2769         'skip': 'This video is not available.',
2770         'add_ie': [YoutubeIE.ie_key()],
2771     }, {
2772         'url': 'https://youtu.be/yeWKywCrFtk?list=PL2qgrgXsNUG5ig9cat4ohreBjYLAPC0J5',
2773         'info_dict': {
2774             'id': 'yeWKywCrFtk',
2775             'ext': 'mp4',
2776             'title': 'Small Scale Baler and Braiding Rugs',
2777             'uploader': 'Backus-Page House Museum',
2778             'uploader_id': 'backuspagemuseum',
2779             'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/backuspagemuseum',
2780             'upload_date': '20161008',
2781             'description': 'md5:800c0c78d5eb128500bffd4f0b4f2e8a',
2782             'categories': ['Nonprofits & Activism'],
2783             'tags': list,
2784             'like_count': int,
2785             'dislike_count': int,
2786         },
2787         'params': {
2788             'noplaylist': True,
2789             'skip_download': True,
2790         },
2791     }, {
2792         # https://github.com/ytdl-org/youtube-dl/issues/21844
2793         'url': 'https://www.youtube.com/playlist?list=PLzH6n4zXuckpfMu_4Ff8E7Z1behQks5ba',
2794         'info_dict': {
2795             'title': 'Data Analysis with Dr Mike Pound',
2796             'id': 'PLzH6n4zXuckpfMu_4Ff8E7Z1behQks5ba',
2797             'uploader_id': 'Computerphile',
2798             'uploader': 'Computerphile',
2799         },
2800         'playlist_mincount': 11,
2801     }, {
2802         'url': 'https://youtu.be/uWyaPkt-VOI?list=PL9D9FC436B881BA21',
2803         'only_matching': True,
2804     }, {
2805         'url': 'TLGGrESM50VT6acwMjAyMjAxNw',
2806         'only_matching': True,
2807     }, {
2808         # music album playlist
2809         'url': 'OLAK5uy_m4xAFdmMC5rX3Ji3g93pQe3hqLZw_9LhM',
2810         'only_matching': True,
2811     }, {
2812         'url': 'https://invidio.us/playlist?list=PLDIoUOhQQPlXr63I_vwF9GD8sAKh77dWU',
2813         'only_matching': True,
2814     }, {
2815         'url': 'https://www.youtubekids.com/watch?v=Agk7R8I8o5U&list=PUZ6jURNr1WQZCNHF0ao-c0g',
2816         'only_matching': True,
2817     }]
2818
2819     def _real_initialize(self):
2820         self._login()
2821
2822     def extract_videos_from_page(self, page):
2823         ids_in_page = []
2824         titles_in_page = []
2825
2826         for item in re.findall(
2827                 r'(<[^>]*\bdata-video-id\s*=\s*["\'][0-9A-Za-z_-]{11}[^>]+>)', page):
2828             attrs = extract_attributes(item)
2829             video_id = attrs['data-video-id']
2830             video_title = unescapeHTML(attrs.get('data-title'))
2831             if video_title:
2832                 video_title = video_title.strip()
2833             ids_in_page.append(video_id)
2834             titles_in_page.append(video_title)
2835
2836         # Fallback with old _VIDEO_RE
2837         self.extract_videos_from_page_impl(
2838             self._VIDEO_RE, page, ids_in_page, titles_in_page)
2839
2840         # Relaxed fallbacks
2841         self.extract_videos_from_page_impl(
2842             r'href="\s*/watch\?v\s*=\s*(?P<id>[0-9A-Za-z_-]{11})', page,
2843             ids_in_page, titles_in_page)
2844         self.extract_videos_from_page_impl(
2845             r'data-video-ids\s*=\s*["\'](?P<id>[0-9A-Za-z_-]{11})', page,
2846             ids_in_page, titles_in_page)
2847
2848         return zip(ids_in_page, titles_in_page)
2849
2850     def _extract_mix(self, playlist_id):
2851         # The mixes are generated from a single video
2852         # the id of the playlist is just 'RD' + video_id
2853         ids = []
2854         last_id = playlist_id[-11:]
2855         for n in itertools.count(1):
2856             url = 'https://www.youtube.com/watch?v=%s&list=%s' % (last_id, playlist_id)
2857             webpage = self._download_webpage(
2858                 url, playlist_id, 'Downloading page {0} of Youtube mix'.format(n))
2859             new_ids = orderedSet(re.findall(
2860                 r'''(?xs)data-video-username=".*?".*?
2861                            href="/watch\?v=([0-9A-Za-z_-]{11})&amp;[^"]*?list=%s''' % re.escape(playlist_id),
2862                 webpage))
2863             # Fetch new pages until all the videos are repeated, it seems that
2864             # there are always 51 unique videos.
2865             new_ids = [_id for _id in new_ids if _id not in ids]
2866             if not new_ids:
2867                 break
2868             ids.extend(new_ids)
2869             last_id = ids[-1]
2870
2871         url_results = self._ids_to_results(ids)
2872
2873         search_title = lambda class_name: get_element_by_attribute('class', class_name, webpage)
2874         title_span = (
2875             search_title('playlist-title')
2876             or search_title('title long-title')
2877             or search_title('title'))
2878         title = clean_html(title_span)
2879
2880         return self.playlist_result(url_results, playlist_id, title)
2881
2882     def _extract_playlist(self, playlist_id):
2883         url = self._TEMPLATE_URL % playlist_id
2884         page = self._download_webpage(url, playlist_id)
2885
2886         # the yt-alert-message now has tabindex attribute (see https://github.com/ytdl-org/youtube-dl/issues/11604)
2887         for match in re.findall(r'<div class="yt-alert-message"[^>]*>([^<]+)</div>', page):
2888             match = match.strip()
2889             # Check if the playlist exists or is private
2890             mobj = re.match(r'[^<]*(?:The|This) playlist (?P<reason>does not exist|is private)[^<]*', match)
2891             if mobj:
2892                 reason = mobj.group('reason')
2893                 message = 'This playlist %s' % reason
2894                 if 'private' in reason:
2895                     message += ', use --username or --netrc to access it'
2896                 message += '.'
2897                 raise ExtractorError(message, expected=True)
2898             elif re.match(r'[^<]*Invalid parameters[^<]*', match):
2899                 raise ExtractorError(
2900                     'Invalid parameters. Maybe URL is incorrect.',
2901                     expected=True)
2902             elif re.match(r'[^<]*Choose your language[^<]*', match):
2903                 continue
2904             else:
2905                 self.report_warning('Youtube gives an alert message: ' + match)
2906
2907         playlist_title = self._html_search_regex(
2908             r'(?s)<h1 class="pl-header-title[^"]*"[^>]*>\s*(.*?)\s*</h1>',
2909             page, 'title', default=None)
2910
2911         _UPLOADER_BASE = r'class=["\']pl-header-details[^>]+>\s*<li>\s*<a[^>]+\bhref='
2912         uploader = self._html_search_regex(
2913             r'%s["\']/(?:user|channel)/[^>]+>([^<]+)' % _UPLOADER_BASE,
2914             page, 'uploader', default=None)
2915         mobj = re.search(
2916             r'%s(["\'])(?P<path>/(?:user|channel)/(?P<uploader_id>.+?))\1' % _UPLOADER_BASE,
2917             page)
2918         if mobj:
2919             uploader_id = mobj.group('uploader_id')
2920             uploader_url = compat_urlparse.urljoin(url, mobj.group('path'))
2921         else:
2922             uploader_id = uploader_url = None
2923
2924         has_videos = True
2925
2926         if not playlist_title:
2927             try:
2928                 # Some playlist URLs don't actually serve a playlist (e.g.
2929                 # https://www.youtube.com/watch?v=FqZTN594JQw&list=PLMYEtVRpaqY00V9W81Cwmzp6N6vZqfUKD4)
2930                 next(self._entries(page, playlist_id))
2931             except StopIteration:
2932                 has_videos = False
2933
2934         playlist = self.playlist_result(
2935             self._entries(page, playlist_id), playlist_id, playlist_title)
2936         playlist.update({
2937             'uploader': uploader,
2938             'uploader_id': uploader_id,
2939             'uploader_url': uploader_url,
2940         })
2941
2942         return has_videos, playlist
2943
2944     def _check_download_just_video(self, url, playlist_id):
2945         # Check if it's a video-specific URL
2946         query_dict = compat_urlparse.parse_qs(compat_urlparse.urlparse(url).query)
2947         video_id = query_dict.get('v', [None])[0] or self._search_regex(
2948             r'(?:(?:^|//)youtu\.be/|youtube\.com/embed/(?!videoseries))([0-9A-Za-z_-]{11})', url,
2949             'video id', default=None)
2950         if video_id:
2951             if self._downloader.params.get('noplaylist'):
2952                 self.to_screen('Downloading just video %s because of --no-playlist' % video_id)
2953                 return video_id, self.url_result(video_id, 'Youtube', video_id=video_id)
2954             else:
2955                 self.to_screen('Downloading playlist %s - add --no-playlist to just download video %s' % (playlist_id, video_id))
2956                 return video_id, None
2957         return None, None
2958
2959     def _real_extract(self, url):
2960         # Extract playlist id
2961         mobj = re.match(self._VALID_URL, url)
2962         if mobj is None:
2963             raise ExtractorError('Invalid URL: %s' % url)
2964         playlist_id = mobj.group(1) or mobj.group(2)
2965
2966         video_id, video = self._check_download_just_video(url, playlist_id)
2967         if video:
2968             return video
2969
2970         if playlist_id.startswith(('RD', 'UL', 'PU')):
2971             # Mixes require a custom extraction process
2972             return self._extract_mix(playlist_id)
2973
2974         has_videos, playlist = self._extract_playlist(playlist_id)
2975         if has_videos or not video_id:
2976             return playlist
2977
2978         # Some playlist URLs don't actually serve a playlist (see
2979         # https://github.com/ytdl-org/youtube-dl/issues/10537).
2980         # Fallback to plain video extraction if there is a video id
2981         # along with playlist id.
2982         return self.url_result(video_id, 'Youtube', video_id=video_id)
2983
2984
2985 class YoutubeChannelIE(YoutubePlaylistBaseInfoExtractor):
2986     IE_DESC = 'YouTube.com channels'
2987     _VALID_URL = r'https?://(?:youtu\.be|(?:\w+\.)?youtube(?:-nocookie|kids)?\.com|(?:www\.)?invidio\.us)/channel/(?P<id>[0-9A-Za-z_-]+)'
2988     _TEMPLATE_URL = 'https://www.youtube.com/channel/%s/videos'
2989     _VIDEO_RE = r'(?:title="(?P<title>[^"]+)"[^>]+)?href="/watch\?v=(?P<id>[0-9A-Za-z_-]+)&?'
2990     IE_NAME = 'youtube:channel'
2991     _TESTS = [{
2992         'note': 'paginated channel',
2993         'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w',
2994         'playlist_mincount': 91,
2995         'info_dict': {
2996             'id': 'UUKfVa3S1e4PHvxWcwyMMg8w',
2997             'title': 'Uploads from lex will',
2998             'uploader': 'lex will',
2999             'uploader_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
3000         }
3001     }, {
3002         'note': 'Age restricted channel',
3003         # from https://www.youtube.com/user/DeusExOfficial
3004         'url': 'https://www.youtube.com/channel/UCs0ifCMCm1icqRbqhUINa0w',
3005         'playlist_mincount': 64,
3006         'info_dict': {
3007             'id': 'UUs0ifCMCm1icqRbqhUINa0w',
3008             'title': 'Uploads from Deus Ex',
3009             'uploader': 'Deus Ex',
3010             'uploader_id': 'DeusExOfficial',
3011         },
3012     }, {
3013         'url': 'https://invidio.us/channel/UC23qupoDRn9YOAVzeoxjOQA',
3014         'only_matching': True,
3015     }, {
3016         'url': 'https://www.youtubekids.com/channel/UCyu8StPfZWapR6rfW_JgqcA',
3017         'only_matching': True,
3018     }]
3019
3020     @classmethod
3021     def suitable(cls, url):
3022         return (False if YoutubePlaylistsIE.suitable(url) or YoutubeLiveIE.suitable(url)
3023                 else super(YoutubeChannelIE, cls).suitable(url))
3024
3025     def _build_template_url(self, url, channel_id):
3026         return self._TEMPLATE_URL % channel_id
3027
3028     def _real_extract(self, url):
3029         channel_id = self._match_id(url)
3030
3031         url = self._build_template_url(url, channel_id)
3032
3033         # Channel by page listing is restricted to 35 pages of 30 items, i.e. 1050 videos total (see #5778)
3034         # Workaround by extracting as a playlist if managed to obtain channel playlist URL
3035         # otherwise fallback on channel by page extraction
3036         channel_page = self._download_webpage(
3037             url + '?view=57', channel_id,
3038             'Downloading channel page', fatal=False)
3039         if channel_page is False:
3040             channel_playlist_id = False
3041         else:
3042             channel_playlist_id = self._html_search_meta(
3043                 'channelId', channel_page, 'channel id', default=None)
3044             if not channel_playlist_id:
3045                 channel_url = self._html_search_meta(
3046                     ('al:ios:url', 'twitter:app:url:iphone', 'twitter:app:url:ipad'),
3047                     channel_page, 'channel url', default=None)
3048                 if channel_url:
3049                     channel_playlist_id = self._search_regex(
3050                         r'vnd\.youtube://user/([0-9A-Za-z_-]+)',
3051                         channel_url, 'channel id', default=None)
3052         if channel_playlist_id and channel_playlist_id.startswith('UC'):
3053             playlist_id = 'UU' + channel_playlist_id[2:]
3054             return self.url_result(
3055                 compat_urlparse.urljoin(url, '/playlist?list=%s' % playlist_id), 'YoutubePlaylist')
3056
3057         channel_page = self._download_webpage(url, channel_id, 'Downloading page #1')
3058         autogenerated = re.search(r'''(?x)
3059                 class="[^"]*?(?:
3060                     channel-header-autogenerated-label|
3061                     yt-channel-title-autogenerated
3062                 )[^"]*"''', channel_page) is not None
3063
3064         if autogenerated:
3065             # The videos are contained in a single page
3066             # the ajax pages can't be used, they are empty
3067             entries = [
3068                 self.url_result(
3069                     video_id, 'Youtube', video_id=video_id,
3070                     video_title=video_title)
3071                 for video_id, video_title in self.extract_videos_from_page(channel_page)]
3072             return self.playlist_result(entries, channel_id)
3073
3074         try:
3075             next(self._entries(channel_page, channel_id))
3076         except StopIteration:
3077             alert_message = self._html_search_regex(
3078                 r'(?s)<div[^>]+class=(["\']).*?\byt-alert-message\b.*?\1[^>]*>(?P<alert>[^<]+)</div>',
3079                 channel_page, 'alert', default=None, group='alert')
3080             if alert_message:
3081                 raise ExtractorError('Youtube said: %s' % alert_message, expected=True)
3082
3083         return self.playlist_result(self._entries(channel_page, channel_id), channel_id)
3084
3085
3086 class YoutubeUserIE(YoutubeChannelIE):
3087     IE_DESC = 'YouTube.com user videos (URL or "ytuser" keyword)'
3088     _VALID_URL = r'(?:(?:https?://(?:\w+\.)?youtube\.com/(?:(?P<user>user|c)/)?(?!(?:attribution_link|watch|results|shared)(?:$|[^a-z_A-Z0-9%-])))|ytuser:)(?!feed/)(?P<id>[A-Za-z0-9_%-]+)'
3089     _TEMPLATE_URL = 'https://www.youtube.com/%s/%s/videos'
3090     IE_NAME = 'youtube:user'
3091
3092     _TESTS = [{
3093         'url': 'https://www.youtube.com/user/TheLinuxFoundation',
3094         'playlist_mincount': 320,
3095         'info_dict': {
3096             'id': 'UUfX55Sx5hEFjoC3cNs6mCUQ',
3097             'title': 'Uploads from The Linux Foundation',
3098             'uploader': 'The Linux Foundation',
3099             'uploader_id': 'TheLinuxFoundation',
3100         }
3101     }, {
3102         # Only available via https://www.youtube.com/c/12minuteathlete/videos
3103         # but not https://www.youtube.com/user/12minuteathlete/videos
3104         'url': 'https://www.youtube.com/c/12minuteathlete/videos',
3105         'playlist_mincount': 249,
3106         'info_dict': {
3107             'id': 'UUVjM-zV6_opMDx7WYxnjZiQ',
3108             'title': 'Uploads from 12 Minute Athlete',
3109             'uploader': '12 Minute Athlete',
3110             'uploader_id': 'the12minuteathlete',
3111         }
3112     }, {
3113         'url': 'ytuser:phihag',
3114         'only_matching': True,
3115     }, {
3116         'url': 'https://www.youtube.com/c/gametrailers',
3117         'only_matching': True,
3118     }, {
3119         'url': 'https://www.youtube.com/c/Pawe%C5%82Zadro%C5%BCniak',
3120         'only_matching': True,
3121     }, {
3122         'url': 'https://www.youtube.com/gametrailers',
3123         'only_matching': True,
3124     }, {
3125         # This channel is not available, geo restricted to JP
3126         'url': 'https://www.youtube.com/user/kananishinoSMEJ/videos',
3127         'only_matching': True,
3128     }]
3129
3130     @classmethod
3131     def suitable(cls, url):
3132         # Don't return True if the url can be extracted with other youtube
3133         # extractor, the regex would is too permissive and it would match.
3134         other_yt_ies = iter(klass for (name, klass) in globals().items() if name.startswith('Youtube') and name.endswith('IE') and klass is not cls)
3135         if any(ie.suitable(url) for ie in other_yt_ies):
3136             return False
3137         else:
3138             return super(YoutubeUserIE, cls).suitable(url)
3139
3140     def _build_template_url(self, url, channel_id):
3141         mobj = re.match(self._VALID_URL, url)
3142         return self._TEMPLATE_URL % (mobj.group('user') or 'user', mobj.group('id'))
3143
3144
3145 class YoutubeLiveIE(YoutubeBaseInfoExtractor):
3146     IE_DESC = 'YouTube.com live streams'
3147     _VALID_URL = r'(?P<base_url>https?://(?:\w+\.)?youtube\.com/(?:(?:user|channel|c)/)?(?P<id>[^/]+))/live'
3148     IE_NAME = 'youtube:live'
3149
3150     _TESTS = [{
3151         'url': 'https://www.youtube.com/user/TheYoungTurks/live',
3152         'info_dict': {
3153             'id': 'a48o2S1cPoo',
3154             'ext': 'mp4',
3155             'title': 'The Young Turks - Live Main Show',
3156             'uploader': 'The Young Turks',
3157             'uploader_id': 'TheYoungTurks',
3158             'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/TheYoungTurks',
3159             'upload_date': '20150715',
3160             'license': 'Standard YouTube License',
3161             'description': 'md5:438179573adcdff3c97ebb1ee632b891',
3162             'categories': ['News & Politics'],
3163             'tags': ['Cenk Uygur (TV Program Creator)', 'The Young Turks (Award-Winning Work)', 'Talk Show (TV Genre)'],
3164             'like_count': int,
3165             'dislike_count': int,
3166         },
3167         'params': {
3168             'skip_download': True,
3169         },
3170     }, {
3171         'url': 'https://www.youtube.com/channel/UC1yBKRuGpC1tSM73A0ZjYjQ/live',
3172         'only_matching': True,
3173     }, {
3174         'url': 'https://www.youtube.com/c/CommanderVideoHq/live',
3175         'only_matching': True,
3176     }, {
3177         'url': 'https://www.youtube.com/TheYoungTurks/live',
3178         'only_matching': True,
3179     }]
3180
3181     def _real_extract(self, url):
3182         mobj = re.match(self._VALID_URL, url)
3183         channel_id = mobj.group('id')
3184         base_url = mobj.group('base_url')
3185         webpage = self._download_webpage(url, channel_id, fatal=False)
3186         if webpage:
3187             page_type = self._og_search_property(
3188                 'type', webpage, 'page type', default='')
3189             video_id = self._html_search_meta(
3190                 'videoId', webpage, 'video id', default=None)
3191             if page_type.startswith('video') and video_id and re.match(
3192                     r'^[0-9A-Za-z_-]{11}$', video_id):
3193                 return self.url_result(video_id, YoutubeIE.ie_key())
3194         return self.url_result(base_url)
3195
3196
3197 class YoutubePlaylistsIE(YoutubePlaylistsBaseInfoExtractor):
3198     IE_DESC = 'YouTube.com user/channel playlists'
3199     _VALID_URL = r'https?://(?:\w+\.)?youtube\.com/(?:user|channel|c)/(?P<id>[^/]+)/playlists'
3200     IE_NAME = 'youtube:playlists'
3201
3202     _TESTS = [{
3203         'url': 'https://www.youtube.com/user/ThirstForScience/playlists',
3204         'playlist_mincount': 4,
3205         'info_dict': {
3206             'id': 'ThirstForScience',
3207             'title': 'ThirstForScience',
3208         },
3209     }, {
3210         # with "Load more" button
3211         'url': 'https://www.youtube.com/user/igorkle1/playlists?view=1&sort=dd',
3212         'playlist_mincount': 70,
3213         'info_dict': {
3214             'id': 'igorkle1',
3215             'title': 'Игорь Клейнер',
3216         },
3217     }, {
3218         'url': 'https://www.youtube.com/channel/UCiU1dHvZObB2iP6xkJ__Icw/playlists',
3219         'playlist_mincount': 17,
3220         'info_dict': {
3221             'id': 'UCiU1dHvZObB2iP6xkJ__Icw',
3222             'title': 'Chem Player',
3223         },
3224         'skip': 'Blocked',
3225     }, {
3226         'url': 'https://www.youtube.com/c/ChristophLaimer/playlists',
3227         'only_matching': True,
3228     }]
3229
3230
3231 class YoutubeSearchBaseInfoExtractor(YoutubePlaylistBaseInfoExtractor):
3232     _VIDEO_RE = r'href="\s*/watch\?v=(?P<id>[0-9A-Za-z_-]{11})(?:[^"]*"[^>]+\btitle="(?P<title>[^"]+))?'
3233
3234
3235 class YoutubeSearchIE(SearchInfoExtractor, YoutubeSearchBaseInfoExtractor):
3236     IE_DESC = 'YouTube.com searches'
3237     # there doesn't appear to be a real limit, for example if you search for
3238     # 'python' you get more than 8.000.000 results
3239     _MAX_RESULTS = float('inf')
3240     IE_NAME = 'youtube:search'
3241     _SEARCH_KEY = 'ytsearch'
3242     _EXTRA_QUERY_ARGS = {}
3243     _TESTS = []
3244
3245     def _get_n_results(self, query, n):
3246         """Get a specified number of results for a query"""
3247
3248         videos = []
3249         limit = n
3250
3251         url_query = {
3252             'search_query': query.encode('utf-8'),
3253         }
3254         url_query.update(self._EXTRA_QUERY_ARGS)
3255         result_url = 'https://www.youtube.com/results?' + compat_urllib_parse_urlencode(url_query)
3256
3257         for pagenum in itertools.count(1):
3258             data = self._download_json(
3259                 result_url, video_id='query "%s"' % query,
3260                 note='Downloading page %s' % pagenum,
3261                 errnote='Unable to download API page',
3262                 query={'spf': 'navigate'})
3263             html_content = data[1]['body']['content']
3264
3265             if 'class="search-message' in html_content:
3266                 raise ExtractorError(
3267                     '[youtube] No video results', expected=True)
3268
3269             new_videos = list(self._process_page(html_content))
3270             videos += new_videos
3271             if not new_videos or len(videos) > limit:
3272                 break
3273             next_link = self._html_search_regex(
3274                 r'href="(/results\?[^"]*\bsp=[^"]+)"[^>]*>\s*<span[^>]+class="[^"]*\byt-uix-button-content\b[^"]*"[^>]*>Next',
3275                 html_content, 'next link', default=None)
3276             if next_link is None:
3277                 break
3278             result_url = compat_urlparse.urljoin('https://www.youtube.com/', next_link)
3279
3280         if len(videos) > n:
3281             videos = videos[:n]
3282         return self.playlist_result(videos, query)
3283
3284
3285 class YoutubeSearchDateIE(YoutubeSearchIE):
3286     IE_NAME = YoutubeSearchIE.IE_NAME + ':date'
3287     _SEARCH_KEY = 'ytsearchdate'
3288     IE_DESC = 'YouTube.com searches, newest videos first'
3289     _EXTRA_QUERY_ARGS = {'search_sort': 'video_date_uploaded'}
3290
3291
3292 class YoutubeSearchURLIE(YoutubeSearchBaseInfoExtractor):
3293     IE_DESC = 'YouTube.com search URLs'
3294     IE_NAME = 'youtube:search_url'
3295     _VALID_URL = r'https?://(?:www\.)?youtube\.com/results\?(.*?&)?(?:search_query|q)=(?P<query>[^&]+)(?:[&]|$)'
3296     _TESTS = [{
3297         'url': 'https://www.youtube.com/results?baz=bar&search_query=youtube-dl+test+video&filters=video&lclk=video',
3298         'playlist_mincount': 5,
3299         'info_dict': {
3300             'title': 'youtube-dl test video',
3301         }
3302     }, {
3303         'url': 'https://www.youtube.com/results?q=test&sp=EgQIBBgB',
3304         'only_matching': True,
3305     }]
3306
3307     def _real_extract(self, url):
3308         mobj = re.match(self._VALID_URL, url)
3309         query = compat_urllib_parse_unquote_plus(mobj.group('query'))
3310         webpage = self._download_webpage(url, query)
3311         return self.playlist_result(self._process_page(webpage), playlist_title=query)
3312
3313
3314 class YoutubeShowIE(YoutubePlaylistsBaseInfoExtractor):
3315     IE_DESC = 'YouTube.com (multi-season) shows'
3316     _VALID_URL = r'https?://(?:www\.)?youtube\.com/show/(?P<id>[^?#]*)'
3317     IE_NAME = 'youtube:show'
3318     _TESTS = [{
3319         'url': 'https://www.youtube.com/show/airdisasters',
3320         'playlist_mincount': 5,
3321         'info_dict': {
3322             'id': 'airdisasters',
3323             'title': 'Air Disasters',
3324         }
3325     }]
3326
3327     def _real_extract(self, url):
3328         playlist_id = self._match_id(url)
3329         return super(YoutubeShowIE, self)._real_extract(
3330             'https://www.youtube.com/show/%s/playlists' % playlist_id)
3331
3332
3333 class YoutubeFeedsInfoExtractor(YoutubeBaseInfoExtractor):
3334     """
3335     Base class for feed extractors
3336     Subclasses must define the _FEED_NAME and _PLAYLIST_TITLE properties.
3337     """
3338     _LOGIN_REQUIRED = True
3339
3340     @property
3341     def IE_NAME(self):
3342         return 'youtube:%s' % self._FEED_NAME
3343
3344     def _real_initialize(self):
3345         self._login()
3346
3347     def _entries(self, page):
3348         # The extraction process is the same as for playlists, but the regex
3349         # for the video ids doesn't contain an index
3350         ids = []
3351         more_widget_html = content_html = page
3352         for page_num in itertools.count(1):
3353             matches = re.findall(r'href="\s*/watch\?v=([0-9A-Za-z_-]{11})', content_html)
3354
3355             # 'recommended' feed has infinite 'load more' and each new portion spins
3356             # the same videos in (sometimes) slightly different order, so we'll check
3357             # for unicity and break when portion has no new videos
3358             new_ids = list(filter(lambda video_id: video_id not in ids, orderedSet(matches)))
3359             if not new_ids:
3360                 break
3361
3362             ids.extend(new_ids)
3363
3364             for entry in self._ids_to_results(new_ids):
3365                 yield entry
3366
3367             mobj = re.search(r'data-uix-load-more-href="/?(?P<more>[^"]+)"', more_widget_html)
3368             if not mobj:
3369                 break
3370
3371             more = self._download_json(
3372                 'https://www.youtube.com/%s' % mobj.group('more'), self._PLAYLIST_TITLE,
3373                 'Downloading page #%s' % page_num,
3374                 transform_source=uppercase_escape,
3375                 headers=self._YOUTUBE_CLIENT_HEADERS)
3376             content_html = more['content_html']
3377             more_widget_html = more['load_more_widget_html']
3378
3379     def _real_extract(self, url):
3380         page = self._download_webpage(
3381             'https://www.youtube.com/feed/%s' % self._FEED_NAME,
3382             self._PLAYLIST_TITLE)
3383         return self.playlist_result(
3384             self._entries(page), playlist_title=self._PLAYLIST_TITLE)
3385
3386
3387 class YoutubeWatchLaterIE(YoutubePlaylistIE):
3388     IE_NAME = 'youtube:watchlater'
3389     IE_DESC = 'Youtube watch later list, ":ytwatchlater" for short (requires authentication)'
3390     _VALID_URL = r'https?://(?:www\.)?youtube\.com/(?:feed/watch_later|(?:playlist|watch)\?(?:.+&)?list=WL)|:ytwatchlater'
3391
3392     _TESTS = [{
3393         'url': 'https://www.youtube.com/playlist?list=WL',
3394         'only_matching': True,
3395     }, {
3396         'url': 'https://www.youtube.com/watch?v=bCNU9TrbiRk&index=1&list=WL',
3397         'only_matching': True,
3398     }]
3399
3400     def _real_extract(self, url):
3401         _, video = self._check_download_just_video(url, 'WL')
3402         if video:
3403             return video
3404         _, playlist = self._extract_playlist('WL')
3405         return playlist
3406
3407
3408 class YoutubeFavouritesIE(YoutubeBaseInfoExtractor):
3409     IE_NAME = 'youtube:favorites'
3410     IE_DESC = 'YouTube.com favourite videos, ":ytfav" for short (requires authentication)'
3411     _VALID_URL = r'https?://(?:www\.)?youtube\.com/my_favorites|:ytfav(?:ou?rites)?'
3412     _LOGIN_REQUIRED = True
3413
3414     def _real_extract(self, url):
3415         webpage = self._download_webpage('https://www.youtube.com/my_favorites', 'Youtube Favourites videos')
3416         playlist_id = self._search_regex(r'list=(.+?)["&]', webpage, 'favourites playlist id')
3417         return self.url_result(playlist_id, 'YoutubePlaylist')
3418
3419
3420 class YoutubeRecommendedIE(YoutubeFeedsInfoExtractor):
3421     IE_DESC = 'YouTube.com recommended videos, ":ytrec" for short (requires authentication)'
3422     _VALID_URL = r'https?://(?:www\.)?youtube\.com/feed/recommended|:ytrec(?:ommended)?'
3423     _FEED_NAME = 'recommended'
3424     _PLAYLIST_TITLE = 'Youtube Recommended videos'
3425
3426
3427 class YoutubeSubscriptionsIE(YoutubeFeedsInfoExtractor):
3428     IE_DESC = 'YouTube.com subscriptions feed, "ytsubs" keyword (requires authentication)'
3429     _VALID_URL = r'https?://(?:www\.)?youtube\.com/feed/subscriptions|:ytsubs(?:criptions)?'
3430     _FEED_NAME = 'subscriptions'
3431     _PLAYLIST_TITLE = 'Youtube Subscriptions'
3432
3433
3434 class YoutubeHistoryIE(YoutubeFeedsInfoExtractor):
3435     IE_DESC = 'Youtube watch history, ":ythistory" for short (requires authentication)'
3436     _VALID_URL = r'https?://(?:www\.)?youtube\.com/feed/history|:ythistory'
3437     _FEED_NAME = 'history'
3438     _PLAYLIST_TITLE = 'Youtube History'
3439
3440
3441 class YoutubeTruncatedURLIE(InfoExtractor):
3442     IE_NAME = 'youtube:truncated_url'
3443     IE_DESC = False  # Do not list
3444     _VALID_URL = r'''(?x)
3445         (?:https?://)?
3446         (?:\w+\.)?[yY][oO][uU][tT][uU][bB][eE](?:-nocookie)?\.com/
3447         (?:watch\?(?:
3448             feature=[a-z_]+|
3449             annotation_id=annotation_[^&]+|
3450             x-yt-cl=[0-9]+|
3451             hl=[^&]*|
3452             t=[0-9]+
3453         )?
3454         |
3455             attribution_link\?a=[^&]+
3456         )
3457         $
3458     '''
3459
3460     _TESTS = [{
3461         'url': 'https://www.youtube.com/watch?annotation_id=annotation_3951667041',
3462         'only_matching': True,
3463     }, {
3464         'url': 'https://www.youtube.com/watch?',
3465         'only_matching': True,
3466     }, {
3467         'url': 'https://www.youtube.com/watch?x-yt-cl=84503534',
3468         'only_matching': True,
3469     }, {
3470         'url': 'https://www.youtube.com/watch?feature=foo',
3471         'only_matching': True,
3472     }, {
3473         'url': 'https://www.youtube.com/watch?hl=en-GB',
3474         'only_matching': True,
3475     }, {
3476         'url': 'https://www.youtube.com/watch?t=2372',
3477         'only_matching': True,
3478     }]
3479
3480     def _real_extract(self, url):
3481         raise ExtractorError(
3482             'Did you forget to quote the URL? Remember that & is a meta '
3483             'character in most shells, so you want to put the URL in quotes, '
3484             'like  youtube-dl '
3485             '"https://www.youtube.com/watch?feature=foo&v=BaW_jenozKc" '
3486             ' or simply  youtube-dl BaW_jenozKc  .',
3487             expected=True)
3488
3489
3490 class YoutubeTruncatedIDIE(InfoExtractor):
3491     IE_NAME = 'youtube:truncated_id'
3492     IE_DESC = False  # Do not list
3493     _VALID_URL = r'https?://(?:www\.)?youtube\.com/watch\?v=(?P<id>[0-9A-Za-z_-]{1,10})$'
3494
3495     _TESTS = [{
3496         'url': 'https://www.youtube.com/watch?v=N_708QY7Ob',
3497         'only_matching': True,
3498     }]
3499
3500     def _real_extract(self, url):
3501         video_id = self._match_id(url)
3502         raise ExtractorError(
3503             'Incomplete YouTube ID %s. URL %s looks truncated.' % (video_id, url),
3504             expected=True)