youtube_dl/extractor/youtube.py

   1 # coding: utf-8
   2
   3 from __future__ import unicode_literals
   4
   5
   6 import itertools
   7 import json
   8 import os.path
   9 import random
  10 import re
  11 import time
  12 import traceback
  13
  14 from .common import InfoExtractor, SearchInfoExtractor
  15 from ..jsinterp import JSInterpreter
  16 from ..swfinterp import SWFInterpreter
  17 from ..compat import (
  18     compat_chr,
  19     compat_HTTPError,
  20     compat_kwargs,
  21     compat_parse_qs,
  22     compat_urllib_parse_unquote,
  23     compat_urllib_parse_unquote_plus,
  24     compat_urllib_parse_urlencode,
  25     compat_urllib_parse_urlparse,
  26     compat_urlparse,
  27     compat_str,
  28 )
  29 from ..utils import (
  30     bool_or_none,
  31     clean_html,
  32     error_to_compat_str,
  33     extract_attributes,
  34     ExtractorError,
  35     float_or_none,
  36     get_element_by_attribute,
  37     get_element_by_id,
  38     int_or_none,
  39     mimetype2ext,
  40     orderedSet,
  41     parse_codecs,
  42     parse_count,
  43     parse_duration,
  44     remove_quotes,
  45     remove_start,
  46     smuggle_url,
  47     str_or_none,
  48     str_to_int,
  49     try_get,
  50     unescapeHTML,
  51     unified_strdate,
  52     unsmuggle_url,
  53     uppercase_escape,
  54     url_or_none,
  55     urlencode_postdata,
  56 )
  57
  58
  59 class YoutubeBaseInfoExtractor(InfoExtractor):
  60     """Provide base functions for Youtube extractors"""
  61     _LOGIN_URL = 'https://accounts.google.com/ServiceLogin'
  62     _TWOFACTOR_URL = 'https://accounts.google.com/signin/challenge'
  63
  64     _LOOKUP_URL = 'https://accounts.google.com/_/signin/sl/lookup'
  65     _CHALLENGE_URL = 'https://accounts.google.com/_/signin/sl/challenge'
  66     _TFA_URL = 'https://accounts.google.com/_/signin/challenge?hl=en&TL={0}'
  67
  68     _NETRC_MACHINE = 'youtube'
  69     # If True it will raise an error if no login info is provided
  70     _LOGIN_REQUIRED = False
  71
  72     _PLAYLIST_ID_RE = r'(?:PL|LL|EC|UU|FL|RD|UL|TL|PU|OLAK5uy_)[0-9A-Za-z-_]{10,}'
  73
  74     _YOUTUBE_CLIENT_HEADERS = {
  75         'x-youtube-client-name': '1',
  76         'x-youtube-client-version': '1.20200609.04.02',
  77     }
  78
  79     def _set_language(self):
  80         self._set_cookie(
  81             '.youtube.com', 'PREF', 'f1=50000000&f6=8&hl=en',
  82             # YouTube sets the expire time to about two months
  83             expire_time=time.time() + 2 * 30 * 24 * 3600)
  84
  85     def _ids_to_results(self, ids):
  86         return [
  87             self.url_result(vid_id, 'Youtube', video_id=vid_id)
  88             for vid_id in ids]
  89
  90     def _login(self):
  91         """
  92         Attempt to log in to YouTube.
  93         True is returned if successful or skipped.
  94         False is returned if login failed.
  95
  96         If _LOGIN_REQUIRED is set and no authentication was provided, an error is raised.
  97         """
  98         username, password = self._get_login_info()
  99         # No authentication to be performed
 100         if username is None:
 101             if self._LOGIN_REQUIRED and self._downloader.params.get('cookiefile') is None:
 102                 raise ExtractorError('No login info available, needed for using %s.' % self.IE_NAME, expected=True)
 103             return True
 104
 105         login_page = self._download_webpage(
 106             self._LOGIN_URL, None,
 107             note='Downloading login page',
 108             errnote='unable to fetch login page', fatal=False)
 109         if login_page is False:
 110             return
 111
 112         login_form = self._hidden_inputs(login_page)
 113
 114         def req(url, f_req, note, errnote):
 115             data = login_form.copy()
 116             data.update({
 117                 'pstMsg': 1,
 118                 'checkConnection': 'youtube',
 119                 'checkedDomains': 'youtube',
 120                 'hl': 'en',
 121                 'deviceinfo': '[null,null,null,[],null,"US",null,null,[],"GlifWebSignIn",null,[null,null,[]]]',
 122                 'f.req': json.dumps(f_req),
 123                 'flowName': 'GlifWebSignIn',
 124                 'flowEntry': 'ServiceLogin',
 125                 # TODO: reverse actual botguard identifier generation algo
 126                 'bgRequest': '["identifier",""]',
 127             })
 128             return self._download_json(
 129                 url, None, note=note, errnote=errnote,
 130                 transform_source=lambda s: re.sub(r'^[^[]*', '', s),
 131                 fatal=False,
 132                 data=urlencode_postdata(data), headers={
 133                     'Content-Type': 'application/x-www-form-urlencoded;charset=utf-8',
 134                     'Google-Accounts-XSRF': 1,
 135                 })
 136
 137         def warn(message):
 138             self._downloader.report_warning(message)
 139
 140         lookup_req = [
 141             username,
 142             None, [], None, 'US', None, None, 2, False, True,
 143             [
 144                 None, None,
 145                 [2, 1, None, 1,
 146                  'https://accounts.google.com/ServiceLogin?passive=true&continue=https%3A%2F%2Fwww.youtube.com%2Fsignin%3Fnext%3D%252F%26action_handle_signin%3Dtrue%26hl%3Den%26app%3Ddesktop%26feature%3Dsign_in_button&hl=en&service=youtube&uilel=3&requestPath=%2FServiceLogin&Page=PasswordSeparationSignIn',
 147                  None, [], 4],
 148                 1, [None, None, []], None, None, None, True
 149             ],
 150             username,
 151         ]
 152
 153         lookup_results = req(
 154             self._LOOKUP_URL, lookup_req,
 155             'Looking up account info', 'Unable to look up account info')
 156
 157         if lookup_results is False:
 158             return False
 159
 160         user_hash = try_get(lookup_results, lambda x: x[0][2], compat_str)
 161         if not user_hash:
 162             warn('Unable to extract user hash')
 163             return False
 164
 165         challenge_req = [
 166             user_hash,
 167             None, 1, None, [1, None, None, None, [password, None, True]],
 168             [
 169                 None, None, [2, 1, None, 1, 'https://accounts.google.com/ServiceLogin?passive=true&continue=https%3A%2F%2Fwww.youtube.com%2Fsignin%3Fnext%3D%252F%26action_handle_signin%3Dtrue%26hl%3Den%26app%3Ddesktop%26feature%3Dsign_in_button&hl=en&service=youtube&uilel=3&requestPath=%2FServiceLogin&Page=PasswordSeparationSignIn', None, [], 4],
 170                 1, [None, None, []], None, None, None, True
 171             ]]
 172
 173         challenge_results = req(
 174             self._CHALLENGE_URL, challenge_req,
 175             'Logging in', 'Unable to log in')
 176
 177         if challenge_results is False:
 178             return
 179
 180         login_res = try_get(challenge_results, lambda x: x[0][5], list)
 181         if login_res:
 182             login_msg = try_get(login_res, lambda x: x[5], compat_str)
 183             warn(
 184                 'Unable to login: %s' % 'Invalid password'
 185                 if login_msg == 'INCORRECT_ANSWER_ENTERED' else login_msg)
 186             return False
 187
 188         res = try_get(challenge_results, lambda x: x[0][-1], list)
 189         if not res:
 190             warn('Unable to extract result entry')
 191             return False
 192
 193         login_challenge = try_get(res, lambda x: x[0][0], list)
 194         if login_challenge:
 195             challenge_str = try_get(login_challenge, lambda x: x[2], compat_str)
 196             if challenge_str == 'TWO_STEP_VERIFICATION':
 197                 # SEND_SUCCESS - TFA code has been successfully sent to phone
 198                 # QUOTA_EXCEEDED - reached the limit of TFA codes
 199                 status = try_get(login_challenge, lambda x: x[5], compat_str)
 200                 if status == 'QUOTA_EXCEEDED':
 201                     warn('Exceeded the limit of TFA codes, try later')
 202                     return False
 203
 204                 tl = try_get(challenge_results, lambda x: x[1][2], compat_str)
 205                 if not tl:
 206                     warn('Unable to extract TL')
 207                     return False
 208
 209                 tfa_code = self._get_tfa_info('2-step verification code')
 210
 211                 if not tfa_code:
 212                     warn(
 213                         'Two-factor authentication required. Provide it either interactively or with --twofactor <code>'
 214                         '(Note that only TOTP (Google Authenticator App) codes work at this time.)')
 215                     return False
 216
 217                 tfa_code = remove_start(tfa_code, 'G-')
 218
 219                 tfa_req = [
 220                     user_hash, None, 2, None,
 221                     [
 222                         9, None, None, None, None, None, None, None,
 223                         [None, tfa_code, True, 2]
 224                     ]]
 225
 226                 tfa_results = req(
 227                     self._TFA_URL.format(tl), tfa_req,
 228                     'Submitting TFA code', 'Unable to submit TFA code')
 229
 230                 if tfa_results is False:
 231                     return False
 232
 233                 tfa_res = try_get(tfa_results, lambda x: x[0][5], list)
 234                 if tfa_res:
 235                     tfa_msg = try_get(tfa_res, lambda x: x[5], compat_str)
 236                     warn(
 237                         'Unable to finish TFA: %s' % 'Invalid TFA code'
 238                         if tfa_msg == 'INCORRECT_ANSWER_ENTERED' else tfa_msg)
 239                     return False
 240
 241                 check_cookie_url = try_get(
 242                     tfa_results, lambda x: x[0][-1][2], compat_str)
 243             else:
 244                 CHALLENGES = {
 245                     'LOGIN_CHALLENGE': "This device isn't recognized. For your security, Google wants to make sure it's really you.",
 246                     'USERNAME_RECOVERY': 'Please provide additional information to aid in the recovery process.',
 247                     'REAUTH': "There is something unusual about your activity. For your security, Google wants to make sure it's really you.",
 248                 }
 249                 challenge = CHALLENGES.get(
 250                     challenge_str,
 251                     '%s returned error %s.' % (self.IE_NAME, challenge_str))
 252                 warn('%s\nGo to https://accounts.google.com/, login and solve a challenge.' % challenge)
 253                 return False
 254         else:
 255             check_cookie_url = try_get(res, lambda x: x[2], compat_str)
 256
 257         if not check_cookie_url:
 258             warn('Unable to extract CheckCookie URL')
 259             return False
 260
 261         check_cookie_results = self._download_webpage(
 262             check_cookie_url, None, 'Checking cookie', fatal=False)
 263
 264         if check_cookie_results is False:
 265             return False
 266
 267         if 'https://myaccount.google.com/' not in check_cookie_results:
 268             warn('Unable to log in')
 269             return False
 270
 271         return True
 272
 273     def _download_webpage_handle(self, *args, **kwargs):
 274         query = kwargs.get('query', {}).copy()
 275         query['disable_polymer'] = 'true'
 276         kwargs['query'] = query
 277         return super(YoutubeBaseInfoExtractor, self)._download_webpage_handle(
 278             *args, **compat_kwargs(kwargs))
 279
 280     def _real_initialize(self):
 281         if self._downloader is None:
 282             return
 283         self._set_language()
 284         if not self._login():
 285             return
 286
 287
 288 class YoutubeEntryListBaseInfoExtractor(YoutubeBaseInfoExtractor):
 289     # Extract entries from page with "Load more" button
 290     def _entries(self, page, playlist_id):
 291         more_widget_html = content_html = page
 292         for page_num in itertools.count(1):
 293             for entry in self._process_page(content_html):
 294                 yield entry
 295
 296             mobj = re.search(r'data-uix-load-more-href="/?(?P<more>[^"]+)"', more_widget_html)
 297             if not mobj:
 298                 break
 299
 300             count = 0
 301             retries = 3
 302             while count <= retries:
 303                 try:
 304                     # Downloading page may result in intermittent 5xx HTTP error
 305                     # that is usually worked around with a retry
 306                     more = self._download_json(
 307                         'https://www.youtube.com/%s' % mobj.group('more'), playlist_id,
 308                         'Downloading page #%s%s'
 309                         % (page_num, ' (retry #%d)' % count if count else ''),
 310                         transform_source=uppercase_escape,
 311                         headers=self._YOUTUBE_CLIENT_HEADERS)
 312                     break
 313                 except ExtractorError as e:
 314                     if isinstance(e.cause, compat_HTTPError) and e.cause.code in (500, 503):
 315                         count += 1
 316                         if count <= retries:
 317                             continue
 318                     raise
 319
 320             content_html = more['content_html']
 321             if not content_html.strip():
 322                 # Some webpages show a "Load more" button but they don't
 323                 # have more videos
 324                 break
 325             more_widget_html = more['load_more_widget_html']
 326
 327
 328 class YoutubePlaylistBaseInfoExtractor(YoutubeEntryListBaseInfoExtractor):
 329     def _process_page(self, content):
 330         for video_id, video_title in self.extract_videos_from_page(content):
 331             yield self.url_result(video_id, 'Youtube', video_id, video_title)
 332
 333     def extract_videos_from_page_impl(self, video_re, page, ids_in_page, titles_in_page):
 334         for mobj in re.finditer(video_re, page):
 335             # The link with index 0 is not the first video of the playlist (not sure if still actual)
 336             if 'index' in mobj.groupdict() and mobj.group('id') == '0':
 337                 continue
 338             video_id = mobj.group('id')
 339             video_title = unescapeHTML(
 340                 mobj.group('title')) if 'title' in mobj.groupdict() else None
 341             if video_title:
 342                 video_title = video_title.strip()
 343             if video_title == '► Play all':
 344                 video_title = None
 345             try:
 346                 idx = ids_in_page.index(video_id)
 347                 if video_title and not titles_in_page[idx]:
 348                     titles_in_page[idx] = video_title
 349             except ValueError:
 350                 ids_in_page.append(video_id)
 351                 titles_in_page.append(video_title)
 352
 353     def extract_videos_from_page(self, page):
 354         ids_in_page = []
 355         titles_in_page = []
 356         self.extract_videos_from_page_impl(
 357             self._VIDEO_RE, page, ids_in_page, titles_in_page)
 358         return zip(ids_in_page, titles_in_page)
 359
 360
 361 class YoutubePlaylistsBaseInfoExtractor(YoutubeEntryListBaseInfoExtractor):
 362     def _process_page(self, content):
 363         for playlist_id in orderedSet(re.findall(
 364                 r'<h3[^>]+class="[^"]*yt-lockup-title[^"]*"[^>]*><a[^>]+href="/?playlist\?list=([0-9A-Za-z-_]{10,})"',
 365                 content)):
 366             yield self.url_result(
 367                 'https://www.youtube.com/playlist?list=%s' % playlist_id, 'YoutubePlaylist')
 368
 369     def _real_extract(self, url):
 370         playlist_id = self._match_id(url)
 371         webpage = self._download_webpage(url, playlist_id)
 372         title = self._og_search_title(webpage, fatal=False)
 373         return self.playlist_result(self._entries(webpage, playlist_id), playlist_id, title)
 374
 375
 376 class YoutubeIE(YoutubeBaseInfoExtractor):
 377     IE_DESC = 'YouTube.com'
 378     _VALID_URL = r"""(?x)^
 379                      (
 380                          (?:https?://|//)                                    # http(s):// or protocol-independent URL
 381                          (?:(?:(?:(?:\w+\.)?[yY][oO][uU][tT][uU][bB][eE](?:-nocookie|kids)?\.com/|
 382                             (?:www\.)?deturl\.com/www\.youtube\.com/|
 383                             (?:www\.)?pwnyoutube\.com/|
 384                             (?:www\.)?hooktube\.com/|
 385                             (?:www\.)?yourepeat\.com/|
 386                             tube\.majestyc\.net/|
 387                             # Invidious instances taken from https://github.com/omarroth/invidious/wiki/Invidious-Instances
 388                             (?:(?:www|dev)\.)?invidio\.us/|
 389                             (?:(?:www|no)\.)?invidiou\.sh/|
 390                             (?:(?:www|fi|de)\.)?invidious\.snopyta\.org/|
 391                             (?:www\.)?invidious\.kabi\.tk/|
 392                             (?:www\.)?invidious\.13ad\.de/|
 393                             (?:www\.)?invidious\.mastodon\.host/|
 394                             (?:www\.)?invidious\.nixnet\.xyz/|
 395                             (?:www\.)?invidious\.drycat\.fr/|
 396                             (?:www\.)?tube\.poal\.co/|
 397                             (?:www\.)?vid\.wxzm\.sx/|
 398                             (?:www\.)?yewtu\.be/|
 399                             (?:www\.)?yt\.elukerio\.org/|
 400                             (?:www\.)?yt\.lelux\.fi/|
 401                             (?:www\.)?invidious\.ggc-project\.de/|
 402                             (?:www\.)?yt\.maisputain\.ovh/|
 403                             (?:www\.)?invidious\.13ad\.de/|
 404                             (?:www\.)?invidious\.toot\.koeln/|
 405                             (?:www\.)?invidious\.fdn\.fr/|
 406                             (?:www\.)?watch\.nettohikari\.com/|
 407                             (?:www\.)?kgg2m7yk5aybusll\.onion/|
 408                             (?:www\.)?qklhadlycap4cnod\.onion/|
 409                             (?:www\.)?axqzx4s6s54s32yentfqojs3x5i7faxza6xo3ehd4bzzsg2ii4fv2iid\.onion/|
 410                             (?:www\.)?c7hqkpkpemu6e7emz5b4vyz7idjgdvgaaa3dyimmeojqbgpea3xqjoid\.onion/|
 411                             (?:www\.)?fz253lmuao3strwbfbmx46yu7acac2jz27iwtorgmbqlkurlclmancad\.onion/|
 412                             (?:www\.)?invidious\.l4qlywnpwqsluw65ts7md3khrivpirse744un3x7mlskqauz5pyuzgqd\.onion/|
 413                             (?:www\.)?owxfohz4kjyv25fvlqilyxast7inivgiktls3th44jhk3ej3i7ya\.b32\.i2p/|
 414                             (?:www\.)?4l2dgddgsrkf2ous66i6seeyi6etzfgrue332grh2n7madpwopotugyd\.onion/|
 415                             youtube\.googleapis\.com/)                        # the various hostnames, with wildcard subdomains
 416                          (?:.*?\#/)?                                          # handle anchor (#/) redirect urls
 417                          (?:                                                  # the various things that can precede the ID:
 418                              (?:(?:v|embed|e)/(?!videoseries))                # v/ or embed/ or e/
 419                              |(?:                                             # or the v= param in all its forms
 420                                  (?:(?:watch|movie)(?:_popup)?(?:\.php)?/?)?  # preceding watch(_popup|.php) or nothing (like /?v=xxxx)
 421                                  (?:\?|\#!?)                                  # the params delimiter ? or # or #!
 422                                  (?:.*?[&;])??                                # any other preceding param (like /?s=tuff&v=xxxx or ?s=tuff&amp;v=V36LpHqtcDY)
 423                                  v=
 424                              )
 425                          ))
 426                          |(?:
 427                             youtu\.be|                                        # just youtu.be/xxxx
 428                             vid\.plus|                                        # or vid.plus/xxxx
 429                             zwearz\.com/watch|                                # or zwearz.com/watch/xxxx
 430                          )/
 431                          |(?:www\.)?cleanvideosearch\.com/media/action/yt/watch\?videoId=
 432                          )
 433                      )?                                                       # all until now is optional -> you can pass the naked ID
 434                      ([0-9A-Za-z_-]{11})                                      # here is it! the YouTube video ID
 435                      (?!.*?\blist=
 436                         (?:
 437                             %(playlist_id)s|                                  # combined list/video URLs are handled by the playlist IE
 438                             WL                                                # WL are handled by the watch later IE
 439                         )
 440                      )
 441                      (?(1).+)?                                                # if we found the ID, everything can follow
 442                      $""" % {'playlist_id': YoutubeBaseInfoExtractor._PLAYLIST_ID_RE}
 443     _NEXT_URL_RE = r'[\?&]next_url=([^&]+)'
 444     _PLAYER_INFO_RE = (
 445         r'/(?P<id>[a-zA-Z0-9_-]{8,})/player_ias\.vflset(?:/[a-zA-Z]{2,3}_[a-zA-Z]{2,3})?/base\.(?P<ext>[a-z]+)$',
 446         r'\b(?P<id>vfl[a-zA-Z0-9_-]+)\b.*?\.(?P<ext>[a-z]+)$',
 447     )
 448     _formats = {
 449         '5': {'ext': 'flv', 'width': 400, 'height': 240, 'acodec': 'mp3', 'abr': 64, 'vcodec': 'h263'},
 450         '6': {'ext': 'flv', 'width': 450, 'height': 270, 'acodec': 'mp3', 'abr': 64, 'vcodec': 'h263'},
 451         '13': {'ext': '3gp', 'acodec': 'aac', 'vcodec': 'mp4v'},
 452         '17': {'ext': '3gp', 'width': 176, 'height': 144, 'acodec': 'aac', 'abr': 24, 'vcodec': 'mp4v'},
 453         '18': {'ext': 'mp4', 'width': 640, 'height': 360, 'acodec': 'aac', 'abr': 96, 'vcodec': 'h264'},
 454         '22': {'ext': 'mp4', 'width': 1280, 'height': 720, 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264'},
 455         '34': {'ext': 'flv', 'width': 640, 'height': 360, 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264'},
 456         '35': {'ext': 'flv', 'width': 854, 'height': 480, 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264'},
 457         # itag 36 videos are either 320x180 (BaW_jenozKc) or 320x240 (__2ABJjxzNo), abr varies as well
 458         '36': {'ext': '3gp', 'width': 320, 'acodec': 'aac', 'vcodec': 'mp4v'},
 459         '37': {'ext': 'mp4', 'width': 1920, 'height': 1080, 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264'},
 460         '38': {'ext': 'mp4', 'width': 4096, 'height': 3072, 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264'},
 461         '43': {'ext': 'webm', 'width': 640, 'height': 360, 'acodec': 'vorbis', 'abr': 128, 'vcodec': 'vp8'},
 462         '44': {'ext': 'webm', 'width': 854, 'height': 480, 'acodec': 'vorbis', 'abr': 128, 'vcodec': 'vp8'},
 463         '45': {'ext': 'webm', 'width': 1280, 'height': 720, 'acodec': 'vorbis', 'abr': 192, 'vcodec': 'vp8'},
 464         '46': {'ext': 'webm', 'width': 1920, 'height': 1080, 'acodec': 'vorbis', 'abr': 192, 'vcodec': 'vp8'},
 465         '59': {'ext': 'mp4', 'width': 854, 'height': 480, 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264'},
 466         '78': {'ext': 'mp4', 'width': 854, 'height': 480, 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264'},
 467
 468
 469         # 3D videos
 470         '82': {'ext': 'mp4', 'height': 360, 'format_note': '3D', 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264', 'preference': -20},
 471         '83': {'ext': 'mp4', 'height': 480, 'format_note': '3D', 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264', 'preference': -20},
 472         '84': {'ext': 'mp4', 'height': 720, 'format_note': '3D', 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264', 'preference': -20},
 473         '85': {'ext': 'mp4', 'height': 1080, 'format_note': '3D', 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264', 'preference': -20},
 474         '100': {'ext': 'webm', 'height': 360, 'format_note': '3D', 'acodec': 'vorbis', 'abr': 128, 'vcodec': 'vp8', 'preference': -20},
 475         '101': {'ext': 'webm', 'height': 480, 'format_note': '3D', 'acodec': 'vorbis', 'abr': 192, 'vcodec': 'vp8', 'preference': -20},
 476         '102': {'ext': 'webm', 'height': 720, 'format_note': '3D', 'acodec': 'vorbis', 'abr': 192, 'vcodec': 'vp8', 'preference': -20},
 477
 478         # Apple HTTP Live Streaming
 479         '91': {'ext': 'mp4', 'height': 144, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 48, 'vcodec': 'h264', 'preference': -10},
 480         '92': {'ext': 'mp4', 'height': 240, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 48, 'vcodec': 'h264', 'preference': -10},
 481         '93': {'ext': 'mp4', 'height': 360, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264', 'preference': -10},
 482         '94': {'ext': 'mp4', 'height': 480, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264', 'preference': -10},
 483         '95': {'ext': 'mp4', 'height': 720, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 256, 'vcodec': 'h264', 'preference': -10},
 484         '96': {'ext': 'mp4', 'height': 1080, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 256, 'vcodec': 'h264', 'preference': -10},
 485         '132': {'ext': 'mp4', 'height': 240, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 48, 'vcodec': 'h264', 'preference': -10},
 486         '151': {'ext': 'mp4', 'height': 72, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 24, 'vcodec': 'h264', 'preference': -10},
 487
 488         # DASH mp4 video
 489         '133': {'ext': 'mp4', 'height': 240, 'format_note': 'DASH video', 'vcodec': 'h264'},
 490         '134': {'ext': 'mp4', 'height': 360, 'format_note': 'DASH video', 'vcodec': 'h264'},
 491         '135': {'ext': 'mp4', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'h264'},
 492         '136': {'ext': 'mp4', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'h264'},
 493         '137': {'ext': 'mp4', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'h264'},
 494         '138': {'ext': 'mp4', 'format_note': 'DASH video', 'vcodec': 'h264'},  # Height can vary (https://github.com/ytdl-org/youtube-dl/issues/4559)
 495         '160': {'ext': 'mp4', 'height': 144, 'format_note': 'DASH video', 'vcodec': 'h264'},
 496         '212': {'ext': 'mp4', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'h264'},
 497         '264': {'ext': 'mp4', 'height': 1440, 'format_note': 'DASH video', 'vcodec': 'h264'},
 498         '298': {'ext': 'mp4', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'h264', 'fps': 60},
 499         '299': {'ext': 'mp4', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'h264', 'fps': 60},
 500         '266': {'ext': 'mp4', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'h264'},
 501
 502         # Dash mp4 audio
 503         '139': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'abr': 48, 'container': 'm4a_dash'},
 504         '140': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'abr': 128, 'container': 'm4a_dash'},
 505         '141': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'abr': 256, 'container': 'm4a_dash'},
 506         '256': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'container': 'm4a_dash'},
 507         '258': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'container': 'm4a_dash'},
 508         '325': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'dtse', 'container': 'm4a_dash'},
 509         '328': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'ec-3', 'container': 'm4a_dash'},
 510
 511         # Dash webm
 512         '167': {'ext': 'webm', 'height': 360, 'width': 640, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
 513         '168': {'ext': 'webm', 'height': 480, 'width': 854, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
 514         '169': {'ext': 'webm', 'height': 720, 'width': 1280, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
 515         '170': {'ext': 'webm', 'height': 1080, 'width': 1920, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
 516         '218': {'ext': 'webm', 'height': 480, 'width': 854, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
 517         '219': {'ext': 'webm', 'height': 480, 'width': 854, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
 518         '278': {'ext': 'webm', 'height': 144, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp9'},
 519         '242': {'ext': 'webm', 'height': 240, 'format_note': 'DASH video', 'vcodec': 'vp9'},
 520         '243': {'ext': 'webm', 'height': 360, 'format_note': 'DASH video', 'vcodec': 'vp9'},
 521         '244': {'ext': 'webm', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'vp9'},
 522         '245': {'ext': 'webm', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'vp9'},
 523         '246': {'ext': 'webm', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'vp9'},
 524         '247': {'ext': 'webm', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'vp9'},
 525         '248': {'ext': 'webm', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'vp9'},
 526         '271': {'ext': 'webm', 'height': 1440, 'format_note': 'DASH video', 'vcodec': 'vp9'},
 527         # itag 272 videos are either 3840x2160 (e.g. RtoitU2A-3E) or 7680x4320 (sLprVF6d7Ug)
 528         '272': {'ext': 'webm', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'vp9'},
 529         '302': {'ext': 'webm', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'vp9', 'fps': 60},
 530         '303': {'ext': 'webm', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'vp9', 'fps': 60},
 531         '308': {'ext': 'webm', 'height': 1440, 'format_note': 'DASH video', 'vcodec': 'vp9', 'fps': 60},
 532         '313': {'ext': 'webm', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'vp9'},
 533         '315': {'ext': 'webm', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'vp9', 'fps': 60},
 534
 535         # Dash webm audio
 536         '171': {'ext': 'webm', 'acodec': 'vorbis', 'format_note': 'DASH audio', 'abr': 128},
 537         '172': {'ext': 'webm', 'acodec': 'vorbis', 'format_note': 'DASH audio', 'abr': 256},
 538
 539         # Dash webm audio with opus inside
 540         '249': {'ext': 'webm', 'format_note': 'DASH audio', 'acodec': 'opus', 'abr': 50},
 541         '250': {'ext': 'webm', 'format_note': 'DASH audio', 'acodec': 'opus', 'abr': 70},
 542         '251': {'ext': 'webm', 'format_note': 'DASH audio', 'acodec': 'opus', 'abr': 160},
 543
 544         # RTMP (unnamed)
 545         '_rtmp': {'protocol': 'rtmp'},
 546
 547         # av01 video only formats sometimes served with "unknown" codecs
 548         '394': {'acodec': 'none', 'vcodec': 'av01.0.05M.08'},
 549         '395': {'acodec': 'none', 'vcodec': 'av01.0.05M.08'},
 550         '396': {'acodec': 'none', 'vcodec': 'av01.0.05M.08'},
 551         '397': {'acodec': 'none', 'vcodec': 'av01.0.05M.08'},
 552     }
 553     _SUBTITLE_FORMATS = ('srv1', 'srv2', 'srv3', 'ttml', 'vtt')
 554
 555     _GEO_BYPASS = False
 556
 557     IE_NAME = 'youtube'
 558     _TESTS = [
 559         {
 560             'url': 'https://www.youtube.com/watch?v=BaW_jenozKc&t=1s&end=9',
 561             'info_dict': {
 562                 'id': 'BaW_jenozKc',
 563                 'ext': 'mp4',
 564                 'title': 'youtube-dl test video "\'/\\ä↭𝕐',
 565                 'uploader': 'Philipp Hagemeister',
 566                 'uploader_id': 'phihag',
 567                 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/phihag',
 568                 'channel_id': 'UCLqxVugv74EIW3VWh2NOa3Q',
 569                 'channel_url': r're:https?://(?:www\.)?youtube\.com/channel/UCLqxVugv74EIW3VWh2NOa3Q',
 570                 'upload_date': '20121002',
 571                 'description': 'test chars:  "\'/\\ä↭𝕐\ntest URL: https://github.com/rg3/youtube-dl/issues/1892\n\nThis is a test video for youtube-dl.\n\nFor more information, contact phihag@phihag.de .',
 572                 'categories': ['Science & Technology'],
 573                 'tags': ['youtube-dl'],
 574                 'duration': 10,
 575                 'view_count': int,
 576                 'like_count': int,
 577                 'dislike_count': int,
 578                 'start_time': 1,
 579                 'end_time': 9,
 580             }
 581         },
 582         {
 583             'url': 'https://www.youtube.com/watch?v=UxxajLWwzqY',
 584             'note': 'Test generic use_cipher_signature video (#897)',
 585             'info_dict': {
 586                 'id': 'UxxajLWwzqY',
 587                 'ext': 'mp4',
 588                 'upload_date': '20120506',
 589                 'title': 'Icona Pop - I Love It (feat. Charli XCX) [OFFICIAL VIDEO]',
 590                 'alt_title': 'I Love It (feat. Charli XCX)',
 591                 'description': 'md5:19a2f98d9032b9311e686ed039564f63',
 592                 'tags': ['Icona Pop i love it', 'sweden', 'pop music', 'big beat records', 'big beat', 'charli',
 593                          'xcx', 'charli xcx', 'girls', 'hbo', 'i love it', "i don't care", 'icona', 'pop',
 594                          'iconic ep', 'iconic', 'love', 'it'],
 595                 'duration': 180,
 596                 'uploader': 'Icona Pop',
 597                 'uploader_id': 'IconaPop',
 598                 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/IconaPop',
 599                 'creator': 'Icona Pop',
 600                 'track': 'I Love It (feat. Charli XCX)',
 601                 'artist': 'Icona Pop',
 602             }
 603         },
 604         {
 605             'url': 'https://www.youtube.com/watch?v=07FYdnEawAQ',
 606             'note': 'Test VEVO video with age protection (#956)',
 607             'info_dict': {
 608                 'id': '07FYdnEawAQ',
 609                 'ext': 'mp4',
 610                 'upload_date': '20130703',
 611                 'title': 'Justin Timberlake - Tunnel Vision (Official Music Video) (Explicit)',
 612                 'alt_title': 'Tunnel Vision',
 613                 'description': 'md5:07dab3356cde4199048e4c7cd93471e1',
 614                 'duration': 419,
 615                 'uploader': 'justintimberlakeVEVO',
 616                 'uploader_id': 'justintimberlakeVEVO',
 617                 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/justintimberlakeVEVO',
 618                 'creator': 'Justin Timberlake',
 619                 'track': 'Tunnel Vision',
 620                 'artist': 'Justin Timberlake',
 621                 'age_limit': 18,
 622             }
 623         },
 624         {
 625             'url': '//www.YouTube.com/watch?v=yZIXLfi8CZQ',
 626             'note': 'Embed-only video (#1746)',
 627             'info_dict': {
 628                 'id': 'yZIXLfi8CZQ',
 629                 'ext': 'mp4',
 630                 'upload_date': '20120608',
 631                 'title': 'Principal Sexually Assaults A Teacher - Episode 117 - 8th June 2012',
 632                 'description': 'md5:09b78bd971f1e3e289601dfba15ca4f7',
 633                 'uploader': 'SET India',
 634                 'uploader_id': 'setindia',
 635                 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/setindia',
 636                 'age_limit': 18,
 637             }
 638         },
 639         {
 640             'url': 'https://www.youtube.com/watch?v=BaW_jenozKc&v=UxxajLWwzqY',
 641             'note': 'Use the first video ID in the URL',
 642             'info_dict': {
 643                 'id': 'BaW_jenozKc',
 644                 'ext': 'mp4',
 645                 'title': 'youtube-dl test video "\'/\\ä↭𝕐',
 646                 'uploader': 'Philipp Hagemeister',
 647                 'uploader_id': 'phihag',
 648                 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/phihag',
 649                 'upload_date': '20121002',
 650                 'description': 'test chars:  "\'/\\ä↭𝕐\ntest URL: https://github.com/rg3/youtube-dl/issues/1892\n\nThis is a test video for youtube-dl.\n\nFor more information, contact phihag@phihag.de .',
 651                 'categories': ['Science & Technology'],
 652                 'tags': ['youtube-dl'],
 653                 'duration': 10,
 654                 'view_count': int,
 655                 'like_count': int,
 656                 'dislike_count': int,
 657             },
 658             'params': {
 659                 'skip_download': True,
 660             },
 661         },
 662         {
 663             'url': 'https://www.youtube.com/watch?v=a9LDPn-MO4I',
 664             'note': '256k DASH audio (format 141) via DASH manifest',
 665             'info_dict': {
 666                 'id': 'a9LDPn-MO4I',
 667                 'ext': 'm4a',
 668                 'upload_date': '20121002',
 669                 'uploader_id': '8KVIDEO',
 670                 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/8KVIDEO',
 671                 'description': '',
 672                 'uploader': '8KVIDEO',
 673                 'title': 'UHDTV TEST 8K VIDEO.mp4'
 674             },
 675             'params': {
 676                 'youtube_include_dash_manifest': True,
 677                 'format': '141',
 678             },
 679             'skip': 'format 141 not served anymore',
 680         },
 681         # DASH manifest with encrypted signature
 682         {
 683             'url': 'https://www.youtube.com/watch?v=IB3lcPjvWLA',
 684             'info_dict': {
 685                 'id': 'IB3lcPjvWLA',
 686                 'ext': 'm4a',
 687                 'title': 'Afrojack, Spree Wilson - The Spark (Official Music Video) ft. Spree Wilson',
 688                 'description': 'md5:8f5e2b82460520b619ccac1f509d43bf',
 689                 'duration': 244,
 690                 'uploader': 'AfrojackVEVO',
 691                 'uploader_id': 'AfrojackVEVO',
 692                 'upload_date': '20131011',
 693             },
 694             'params': {
 695                 'youtube_include_dash_manifest': True,
 696                 'format': '141/bestaudio[ext=m4a]',
 697             },
 698         },
 699         # JS player signature function name containing $
 700         {
 701             'url': 'https://www.youtube.com/watch?v=nfWlot6h_JM',
 702             'info_dict': {
 703                 'id': 'nfWlot6h_JM',
 704                 'ext': 'm4a',
 705                 'title': 'Taylor Swift - Shake It Off',
 706                 'description': 'md5:307195cd21ff7fa352270fe884570ef0',
 707                 'duration': 242,
 708                 'uploader': 'TaylorSwiftVEVO',
 709                 'uploader_id': 'TaylorSwiftVEVO',
 710                 'upload_date': '20140818',
 711             },
 712             'params': {
 713                 'youtube_include_dash_manifest': True,
 714                 'format': '141/bestaudio[ext=m4a]',
 715             },
 716         },
 717         # Controversy video
 718         {
 719             'url': 'https://www.youtube.com/watch?v=T4XJQO3qol8',
 720             'info_dict': {
 721                 'id': 'T4XJQO3qol8',
 722                 'ext': 'mp4',
 723                 'duration': 219,
 724                 'upload_date': '20100909',
 725                 'uploader': 'Amazing Atheist',
 726                 'uploader_id': 'TheAmazingAtheist',
 727                 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/TheAmazingAtheist',
 728                 'title': 'Burning Everyone\'s Koran',
 729                 'description': 'SUBSCRIBE: http://www.youtube.com/saturninefilms\n\nEven Obama has taken a stand against freedom on this issue: http://www.huffingtonpost.com/2010/09/09/obama-gma-interview-quran_n_710282.html',
 730             }
 731         },
 732         # Normal age-gate video (No vevo, embed allowed)
 733         {
 734             'url': 'https://youtube.com/watch?v=HtVdAasjOgU',
 735             'info_dict': {
 736                 'id': 'HtVdAasjOgU',
 737                 'ext': 'mp4',
 738                 'title': 'The Witcher 3: Wild Hunt - The Sword Of Destiny Trailer',
 739                 'description': r're:(?s).{100,}About the Game\n.*?The Witcher 3: Wild Hunt.{100,}',
 740                 'duration': 142,
 741                 'uploader': 'The Witcher',
 742                 'uploader_id': 'WitcherGame',
 743                 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/WitcherGame',
 744                 'upload_date': '20140605',
 745                 'age_limit': 18,
 746             },
 747         },
 748         # Age-gate video with encrypted signature
 749         {
 750             'url': 'https://www.youtube.com/watch?v=6kLq3WMV1nU',
 751             'info_dict': {
 752                 'id': '6kLq3WMV1nU',
 753                 'ext': 'mp4',
 754                 'title': 'Dedication To My Ex (Miss That) (Lyric Video)',
 755                 'description': 'md5:33765bb339e1b47e7e72b5490139bb41',
 756                 'duration': 246,
 757                 'uploader': 'LloydVEVO',
 758                 'uploader_id': 'LloydVEVO',
 759                 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/LloydVEVO',
 760                 'upload_date': '20110629',
 761                 'age_limit': 18,
 762             },
 763         },
 764         # video_info is None (https://github.com/ytdl-org/youtube-dl/issues/4421)
 765         # YouTube Red ad is not captured for creator
 766         {
 767             'url': '__2ABJjxzNo',
 768             'info_dict': {
 769                 'id': '__2ABJjxzNo',
 770                 'ext': 'mp4',
 771                 'duration': 266,
 772                 'upload_date': '20100430',
 773                 'uploader_id': 'deadmau5',
 774                 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/deadmau5',
 775                 'creator': 'Dada Life, deadmau5',
 776                 'description': 'md5:12c56784b8032162bb936a5f76d55360',
 777                 'uploader': 'deadmau5',
 778                 'title': 'Deadmau5 - Some Chords (HD)',
 779                 'alt_title': 'This Machine Kills Some Chords',
 780             },
 781             'expected_warnings': [
 782                 'DASH manifest missing',
 783             ]
 784         },
 785         # Olympics (https://github.com/ytdl-org/youtube-dl/issues/4431)
 786         {
 787             'url': 'lqQg6PlCWgI',
 788             'info_dict': {
 789                 'id': 'lqQg6PlCWgI',
 790                 'ext': 'mp4',
 791                 'duration': 6085,
 792                 'upload_date': '20150827',
 793                 'uploader_id': 'olympic',
 794                 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/olympic',
 795                 'description': 'HO09  - Women -  GER-AUS - Hockey - 31 July 2012 - London 2012 Olympic Games',
 796                 'uploader': 'Olympic',
 797                 'title': 'Hockey - Women -  GER-AUS - London 2012 Olympic Games',
 798             },
 799             'params': {
 800                 'skip_download': 'requires avconv',
 801             }
 802         },
 803         # Non-square pixels
 804         {
 805             'url': 'https://www.youtube.com/watch?v=_b-2C3KPAM0',
 806             'info_dict': {
 807                 'id': '_b-2C3KPAM0',
 808                 'ext': 'mp4',
 809                 'stretched_ratio': 16 / 9.,
 810                 'duration': 85,
 811                 'upload_date': '20110310',
 812                 'uploader_id': 'AllenMeow',
 813                 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/AllenMeow',
 814                 'description': 'made by Wacom from Korea | 字幕&加油添醋 by TY\'s Allen | 感謝heylisa00cavey1001同學熱情提供梗及翻譯',
 815                 'uploader': '孫ᄋᄅ',
 816                 'title': '[A-made] 變態妍字幕版 太妍 我就是這樣的人',
 817             },
 818         },
 819         # url_encoded_fmt_stream_map is empty string
 820         {
 821             'url': 'qEJwOuvDf7I',
 822             'info_dict': {
 823                 'id': 'qEJwOuvDf7I',
 824                 'ext': 'webm',
 825                 'title': 'Обсуждение судебной практики по выборам 14 сентября 2014 года в Санкт-Петербурге',
 826                 'description': '',
 827                 'upload_date': '20150404',
 828                 'uploader_id': 'spbelect',
 829                 'uploader': 'Наблюдатели Петербурга',
 830             },
 831             'params': {
 832                 'skip_download': 'requires avconv',
 833             },
 834             'skip': 'This live event has ended.',
 835         },
 836         # Extraction from multiple DASH manifests (https://github.com/ytdl-org/youtube-dl/pull/6097)
 837         {
 838             'url': 'https://www.youtube.com/watch?v=FIl7x6_3R5Y',
 839             'info_dict': {
 840                 'id': 'FIl7x6_3R5Y',
 841                 'ext': 'webm',
 842                 'title': 'md5:7b81415841e02ecd4313668cde88737a',
 843                 'description': 'md5:116377fd2963b81ec4ce64b542173306',
 844                 'duration': 220,
 845                 'upload_date': '20150625',
 846                 'uploader_id': 'dorappi2000',
 847                 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/dorappi2000',
 848                 'uploader': 'dorappi2000',
 849                 'formats': 'mincount:31',
 850             },
 851             'skip': 'not actual anymore',
 852         },
 853         # DASH manifest with segment_list
 854         {
 855             'url': 'https://www.youtube.com/embed/CsmdDsKjzN8',
 856             'md5': '8ce563a1d667b599d21064e982ab9e31',
 857             'info_dict': {
 858                 'id': 'CsmdDsKjzN8',
 859                 'ext': 'mp4',
 860                 'upload_date': '20150501',  # According to '<meta itemprop="datePublished"', but in other places it's 20150510
 861                 'uploader': 'Airtek',
 862                 'description': 'Retransmisión en directo de la XVIII media maratón de Zaragoza.',
 863                 'uploader_id': 'UCzTzUmjXxxacNnL8I3m4LnQ',
 864                 'title': 'Retransmisión XVIII Media maratón Zaragoza 2015',
 865             },
 866             'params': {
 867                 'youtube_include_dash_manifest': True,
 868                 'format': '135',  # bestvideo
 869             },
 870             'skip': 'This live event has ended.',
 871         },
 872         {
 873             # Multifeed videos (multiple cameras), URL is for Main Camera
 874             'url': 'https://www.youtube.com/watch?v=jqWvoWXjCVs',
 875             'info_dict': {
 876                 'id': 'jqWvoWXjCVs',
 877                 'title': 'teamPGP: Rocket League Noob Stream',
 878                 'description': 'md5:dc7872fb300e143831327f1bae3af010',
 879             },
 880             'playlist': [{
 881                 'info_dict': {
 882                     'id': 'jqWvoWXjCVs',
 883                     'ext': 'mp4',
 884                     'title': 'teamPGP: Rocket League Noob Stream (Main Camera)',
 885                     'description': 'md5:dc7872fb300e143831327f1bae3af010',
 886                     'duration': 7335,
 887                     'upload_date': '20150721',
 888                     'uploader': 'Beer Games Beer',
 889                     'uploader_id': 'beergamesbeer',
 890                     'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/beergamesbeer',
 891                     'license': 'Standard YouTube License',
 892                 },
 893             }, {
 894                 'info_dict': {
 895                     'id': '6h8e8xoXJzg',
 896                     'ext': 'mp4',
 897                     'title': 'teamPGP: Rocket League Noob Stream (kreestuh)',
 898                     'description': 'md5:dc7872fb300e143831327f1bae3af010',
 899                     'duration': 7337,
 900                     'upload_date': '20150721',
 901                     'uploader': 'Beer Games Beer',
 902                     'uploader_id': 'beergamesbeer',
 903                     'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/beergamesbeer',
 904                     'license': 'Standard YouTube License',
 905                 },
 906             }, {
 907                 'info_dict': {
 908                     'id': 'PUOgX5z9xZw',
 909                     'ext': 'mp4',
 910                     'title': 'teamPGP: Rocket League Noob Stream (grizzle)',
 911                     'description': 'md5:dc7872fb300e143831327f1bae3af010',
 912                     'duration': 7337,
 913                     'upload_date': '20150721',
 914                     'uploader': 'Beer Games Beer',
 915                     'uploader_id': 'beergamesbeer',
 916                     'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/beergamesbeer',
 917                     'license': 'Standard YouTube License',
 918                 },
 919             }, {
 920                 'info_dict': {
 921                     'id': 'teuwxikvS5k',
 922                     'ext': 'mp4',
 923                     'title': 'teamPGP: Rocket League Noob Stream (zim)',
 924                     'description': 'md5:dc7872fb300e143831327f1bae3af010',
 925                     'duration': 7334,
 926                     'upload_date': '20150721',
 927                     'uploader': 'Beer Games Beer',
 928                     'uploader_id': 'beergamesbeer',
 929                     'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/beergamesbeer',
 930                     'license': 'Standard YouTube License',
 931                 },
 932             }],
 933             'params': {
 934                 'skip_download': True,
 935             },
 936             'skip': 'This video is not available.',
 937         },
 938         {
 939             # Multifeed video with comma in title (see https://github.com/ytdl-org/youtube-dl/issues/8536)
 940             'url': 'https://www.youtube.com/watch?v=gVfLd0zydlo',
 941             'info_dict': {
 942                 'id': 'gVfLd0zydlo',
 943                 'title': 'DevConf.cz 2016 Day 2 Workshops 1 14:00 - 15:30',
 944             },
 945             'playlist_count': 2,
 946             'skip': 'Not multifeed anymore',
 947         },
 948         {
 949             'url': 'https://vid.plus/FlRa-iH7PGw',
 950             'only_matching': True,
 951         },
 952         {
 953             'url': 'https://zwearz.com/watch/9lWxNJF-ufM/electra-woman-dyna-girl-official-trailer-grace-helbig.html',
 954             'only_matching': True,
 955         },
 956         {
 957             # Title with JS-like syntax "};" (see https://github.com/ytdl-org/youtube-dl/issues/7468)
 958             # Also tests cut-off URL expansion in video description (see
 959             # https://github.com/ytdl-org/youtube-dl/issues/1892,
 960             # https://github.com/ytdl-org/youtube-dl/issues/8164)
 961             'url': 'https://www.youtube.com/watch?v=lsguqyKfVQg',
 962             'info_dict': {
 963                 'id': 'lsguqyKfVQg',
 964                 'ext': 'mp4',
 965                 'title': '{dark walk}; Loki/AC/Dishonored; collab w/Elflover21',
 966                 'alt_title': 'Dark Walk - Position Music',
 967                 'description': 'md5:8085699c11dc3f597ce0410b0dcbb34a',
 968                 'duration': 133,
 969                 'upload_date': '20151119',
 970                 'uploader_id': 'IronSoulElf',
 971                 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/IronSoulElf',
 972                 'uploader': 'IronSoulElf',
 973                 'creator': 'Todd Haberman,  Daniel Law Heath and Aaron Kaplan',
 974                 'track': 'Dark Walk - Position Music',
 975                 'artist': 'Todd Haberman,  Daniel Law Heath and Aaron Kaplan',
 976                 'album': 'Position Music - Production Music Vol. 143 - Dark Walk',
 977             },
 978             'params': {
 979                 'skip_download': True,
 980             },
 981         },
 982         {
 983             # Tags with '};' (see https://github.com/ytdl-org/youtube-dl/issues/7468)
 984             'url': 'https://www.youtube.com/watch?v=Ms7iBXnlUO8',
 985             'only_matching': True,
 986         },
 987         {
 988             # Video with yt:stretch=17:0
 989             'url': 'https://www.youtube.com/watch?v=Q39EVAstoRM',
 990             'info_dict': {
 991                 'id': 'Q39EVAstoRM',
 992                 'ext': 'mp4',
 993                 'title': 'Clash Of Clans#14 Dicas De Ataque Para CV 4',
 994                 'description': 'md5:ee18a25c350637c8faff806845bddee9',
 995                 'upload_date': '20151107',
 996                 'uploader_id': 'UCCr7TALkRbo3EtFzETQF1LA',
 997                 'uploader': 'CH GAMER DROID',
 998             },
 999             'params': {
1000                 'skip_download': True,
1001             },
1002             'skip': 'This video does not exist.',
1003         },
1004         {
1005             # Video licensed under Creative Commons
1006             'url': 'https://www.youtube.com/watch?v=M4gD1WSo5mA',
1007             'info_dict': {
1008                 'id': 'M4gD1WSo5mA',
1009                 'ext': 'mp4',
1010                 'title': 'md5:e41008789470fc2533a3252216f1c1d1',
1011                 'description': 'md5:a677553cf0840649b731a3024aeff4cc',
1012                 'duration': 721,
1013                 'upload_date': '20150127',
1014                 'uploader_id': 'BerkmanCenter',
1015                 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/BerkmanCenter',
1016                 'uploader': 'The Berkman Klein Center for Internet & Society',
1017                 'license': 'Creative Commons Attribution license (reuse allowed)',
1018             },
1019             'params': {
1020                 'skip_download': True,
1021             },
1022         },
1023         {
1024             # Channel-like uploader_url
1025             'url': 'https://www.youtube.com/watch?v=eQcmzGIKrzg',
1026             'info_dict': {
1027                 'id': 'eQcmzGIKrzg',
1028                 'ext': 'mp4',
1029                 'title': 'Democratic Socialism and Foreign Policy | Bernie Sanders',
1030                 'description': 'md5:dda0d780d5a6e120758d1711d062a867',
1031                 'duration': 4060,
1032                 'upload_date': '20151119',
1033                 'uploader': 'Bernie Sanders',
1034                 'uploader_id': 'UCH1dpzjCEiGAt8CXkryhkZg',
1035                 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UCH1dpzjCEiGAt8CXkryhkZg',
1036                 'license': 'Creative Commons Attribution license (reuse allowed)',
1037             },
1038             'params': {
1039                 'skip_download': True,
1040             },
1041         },
1042         {
1043             'url': 'https://www.youtube.com/watch?feature=player_embedded&amp;amp;v=V36LpHqtcDY',
1044             'only_matching': True,
1045         },
1046         {
1047             # YouTube Red paid video (https://github.com/ytdl-org/youtube-dl/issues/10059)
1048             'url': 'https://www.youtube.com/watch?v=i1Ko8UG-Tdo',
1049             'only_matching': True,
1050         },
1051         {
1052             # Rental video preview
1053             'url': 'https://www.youtube.com/watch?v=yYr8q0y5Jfg',
1054             'info_dict': {
1055                 'id': 'uGpuVWrhIzE',
1056                 'ext': 'mp4',
1057                 'title': 'Piku - Trailer',
1058                 'description': 'md5:c36bd60c3fd6f1954086c083c72092eb',
1059                 'upload_date': '20150811',
1060                 'uploader': 'FlixMatrix',
1061                 'uploader_id': 'FlixMatrixKaravan',
1062                 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/FlixMatrixKaravan',
1063                 'license': 'Standard YouTube License',
1064             },
1065             'params': {
1066                 'skip_download': True,
1067             },
1068             'skip': 'This video is not available.',
1069         },
1070         {
1071             # YouTube Red video with episode data
1072             'url': 'https://www.youtube.com/watch?v=iqKdEhx-dD4',
1073             'info_dict': {
1074                 'id': 'iqKdEhx-dD4',
1075                 'ext': 'mp4',
1076                 'title': 'Isolation - Mind Field (Ep 1)',
1077                 'description': 'md5:46a29be4ceffa65b92d277b93f463c0f',
1078                 'duration': 2085,
1079                 'upload_date': '20170118',
1080                 'uploader': 'Vsauce',
1081                 'uploader_id': 'Vsauce',
1082                 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/Vsauce',
1083                 'series': 'Mind Field',
1084                 'season_number': 1,
1085                 'episode_number': 1,
1086             },
1087             'params': {
1088                 'skip_download': True,
1089             },
1090             'expected_warnings': [
1091                 'Skipping DASH manifest',
1092             ],
1093         },
1094         {
1095             # The following content has been identified by the YouTube community
1096             # as inappropriate or offensive to some audiences.
1097             'url': 'https://www.youtube.com/watch?v=6SJNVb0GnPI',
1098             'info_dict': {
1099                 'id': '6SJNVb0GnPI',
1100                 'ext': 'mp4',
1101                 'title': 'Race Differences in Intelligence',
1102                 'description': 'md5:5d161533167390427a1f8ee89a1fc6f1',
1103                 'duration': 965,
1104                 'upload_date': '20140124',
1105                 'uploader': 'New Century Foundation',
1106                 'uploader_id': 'UCEJYpZGqgUob0zVVEaLhvVg',
1107                 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UCEJYpZGqgUob0zVVEaLhvVg',
1108             },
1109             'params': {
1110                 'skip_download': True,
1111             },
1112         },
1113         {
1114             # itag 212
1115             'url': '1t24XAntNCY',
1116             'only_matching': True,
1117         },
1118         {
1119             # geo restricted to JP
1120             'url': 'sJL6WA-aGkQ',
1121             'only_matching': True,
1122         },
1123         {
1124             'url': 'https://www.youtube.com/watch?v=MuAGGZNfUkU&list=RDMM',
1125             'only_matching': True,
1126         },
1127         {
1128             'url': 'https://invidio.us/watch?v=BaW_jenozKc',
1129             'only_matching': True,
1130         },
1131         {
1132             # DRM protected
1133             'url': 'https://www.youtube.com/watch?v=s7_qI6_mIXc',
1134             'only_matching': True,
1135         },
1136         {
1137             # Video with unsupported adaptive stream type formats
1138             'url': 'https://www.youtube.com/watch?v=Z4Vy8R84T1U',
1139             'info_dict': {
1140                 'id': 'Z4Vy8R84T1U',
1141                 'ext': 'mp4',
1142                 'title': 'saman SMAN 53 Jakarta(Sancety) opening COFFEE4th at SMAN 53 Jakarta',
1143                 'description': 'md5:d41d8cd98f00b204e9800998ecf8427e',
1144                 'duration': 433,
1145                 'upload_date': '20130923',
1146                 'uploader': 'Amelia Putri Harwita',
1147                 'uploader_id': 'UCpOxM49HJxmC1qCalXyB3_Q',
1148                 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UCpOxM49HJxmC1qCalXyB3_Q',
1149                 'formats': 'maxcount:10',
1150             },
1151             'params': {
1152                 'skip_download': True,
1153                 'youtube_include_dash_manifest': False,
1154             },
1155             'skip': 'not actual anymore',
1156         },
1157         {
1158             # Youtube Music Auto-generated description
1159             'url': 'https://music.youtube.com/watch?v=MgNrAu2pzNs',
1160             'info_dict': {
1161                 'id': 'MgNrAu2pzNs',
1162                 'ext': 'mp4',
1163                 'title': 'Voyeur Girl',
1164                 'description': 'md5:7ae382a65843d6df2685993e90a8628f',
1165                 'upload_date': '20190312',
1166                 'uploader': 'Stephen - Topic',
1167                 'uploader_id': 'UC-pWHpBjdGG69N9mM2auIAA',
1168                 'artist': 'Stephen',
1169                 'track': 'Voyeur Girl',
1170                 'album': 'it\'s too much love to know my dear',
1171                 'release_date': '20190313',
1172                 'release_year': 2019,
1173             },
1174             'params': {
1175                 'skip_download': True,
1176             },
1177         },
1178         {
1179             # Youtube Music Auto-generated description
1180             # Retrieve 'artist' field from 'Artist:' in video description
1181             # when it is present on youtube music video
1182             'url': 'https://www.youtube.com/watch?v=k0jLE7tTwjY',
1183             'info_dict': {
1184                 'id': 'k0jLE7tTwjY',
1185                 'ext': 'mp4',
1186                 'title': 'Latch Feat. Sam Smith',
1187                 'description': 'md5:3cb1e8101a7c85fcba9b4fb41b951335',
1188                 'upload_date': '20150110',
1189                 'uploader': 'Various Artists - Topic',
1190                 'uploader_id': 'UCNkEcmYdjrH4RqtNgh7BZ9w',
1191                 'artist': 'Disclosure',
1192                 'track': 'Latch Feat. Sam Smith',
1193                 'album': 'Latch Featuring Sam Smith',
1194                 'release_date': '20121008',
1195                 'release_year': 2012,
1196             },
1197             'params': {
1198                 'skip_download': True,
1199             },
1200         },
1201         {
1202             # Youtube Music Auto-generated description
1203             # handle multiple artists on youtube music video
1204             'url': 'https://www.youtube.com/watch?v=74qn0eJSjpA',
1205             'info_dict': {
1206                 'id': '74qn0eJSjpA',
1207                 'ext': 'mp4',
1208                 'title': 'Eastside',
1209                 'description': 'md5:290516bb73dcbfab0dcc4efe6c3de5f2',
1210                 'upload_date': '20180710',
1211                 'uploader': 'Benny Blanco - Topic',
1212                 'uploader_id': 'UCzqz_ksRu_WkIzmivMdIS7A',
1213                 'artist': 'benny blanco, Halsey, Khalid',
1214                 'track': 'Eastside',
1215                 'album': 'Eastside',
1216                 'release_date': '20180713',
1217                 'release_year': 2018,
1218             },
1219             'params': {
1220                 'skip_download': True,
1221             },
1222         },
1223         {
1224             # Youtube Music Auto-generated description
1225             # handle youtube music video with release_year and no release_date
1226             'url': 'https://www.youtube.com/watch?v=-hcAI0g-f5M',
1227             'info_dict': {
1228                 'id': '-hcAI0g-f5M',
1229                 'ext': 'mp4',
1230                 'title': 'Put It On Me',
1231                 'description': 'md5:f6422397c07c4c907c6638e1fee380a5',
1232                 'upload_date': '20180426',
1233                 'uploader': 'Matt Maeson - Topic',
1234                 'uploader_id': 'UCnEkIGqtGcQMLk73Kp-Q5LQ',
1235                 'artist': 'Matt Maeson',
1236                 'track': 'Put It On Me',
1237                 'album': 'The Hearse',
1238                 'release_date': None,
1239                 'release_year': 2018,
1240             },
1241             'params': {
1242                 'skip_download': True,
1243             },
1244         },
1245         {
1246             'url': 'https://www.youtubekids.com/watch?v=3b8nCWDgZ6Q',
1247             'only_matching': True,
1248         },
1249         {
1250             # invalid -> valid video id redirection
1251             'url': 'DJztXj2GPfl',
1252             'info_dict': {
1253                 'id': 'DJztXj2GPfk',
1254                 'ext': 'mp4',
1255                 'title': 'Panjabi MC - Mundian To Bach Ke (The Dictator Soundtrack)',
1256                 'description': 'md5:bf577a41da97918e94fa9798d9228825',
1257                 'upload_date': '20090125',
1258                 'uploader': 'Prochorowka',
1259                 'uploader_id': 'Prochorowka',
1260                 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/Prochorowka',
1261                 'artist': 'Panjabi MC',
1262                 'track': 'Beware of the Boys (Mundian to Bach Ke) - Motivo Hi-Lectro Remix',
1263                 'album': 'Beware of the Boys (Mundian To Bach Ke)',
1264             },
1265             'params': {
1266                 'skip_download': True,
1267             },
1268         },
1269         {
1270             # empty description results in an empty string
1271             'url': 'https://www.youtube.com/watch?v=x41yOUIvK2k',
1272             'info_dict': {
1273                 'id': 'x41yOUIvK2k',
1274                 'ext': 'mp4',
1275                 'title': 'IMG 3456',
1276                 'description': '',
1277                 'upload_date': '20170613',
1278                 'uploader_id': 'ElevageOrVert',
1279                 'uploader': 'ElevageOrVert',
1280             },
1281             'params': {
1282                 'skip_download': True,
1283             },
1284         },
1285     ]
1286
1287     def __init__(self, *args, **kwargs):
1288         super(YoutubeIE, self).__init__(*args, **kwargs)
1289         self._player_cache = {}
1290
1291     def report_video_info_webpage_download(self, video_id):
1292         """Report attempt to download video info webpage."""
1293         self.to_screen('%s: Downloading video info webpage' % video_id)
1294
1295     def report_information_extraction(self, video_id):
1296         """Report attempt to extract video information."""
1297         self.to_screen('%s: Extracting video information' % video_id)
1298
1299     def report_unavailable_format(self, video_id, format):
1300         """Report extracted video URL."""
1301         self.to_screen('%s: Format %s not available' % (video_id, format))
1302
1303     def report_rtmp_download(self):
1304         """Indicate the download will use the RTMP protocol."""
1305         self.to_screen('RTMP download detected')
1306
1307     def _signature_cache_id(self, example_sig):
1308         """ Return a string representation of a signature """
1309         return '.'.join(compat_str(len(part)) for part in example_sig.split('.'))
1310
1311     @classmethod
1312     def _extract_player_info(cls, player_url):
1313         for player_re in cls._PLAYER_INFO_RE:
1314             id_m = re.search(player_re, player_url)
1315             if id_m:
1316                 break
1317         else:
1318             raise ExtractorError('Cannot identify player %r' % player_url)
1319         return id_m.group('ext'), id_m.group('id')
1320
1321     def _extract_signature_function(self, video_id, player_url, example_sig):
1322         player_type, player_id = self._extract_player_info(player_url)
1323
1324         # Read from filesystem cache
1325         func_id = '%s_%s_%s' % (
1326             player_type, player_id, self._signature_cache_id(example_sig))
1327         assert os.path.basename(func_id) == func_id
1328
1329         cache_spec = self._downloader.cache.load('youtube-sigfuncs', func_id)
1330         if cache_spec is not None:
1331             return lambda s: ''.join(s[i] for i in cache_spec)
1332
1333         download_note = (
1334             'Downloading player %s' % player_url
1335             if self._downloader.params.get('verbose') else
1336             'Downloading %s player %s' % (player_type, player_id)
1337         )
1338         if player_type == 'js':
1339             code = self._download_webpage(
1340                 player_url, video_id,
1341                 note=download_note,
1342                 errnote='Download of %s failed' % player_url)
1343             res = self._parse_sig_js(code)
1344         elif player_type == 'swf':
1345             urlh = self._request_webpage(
1346                 player_url, video_id,
1347                 note=download_note,
1348                 errnote='Download of %s failed' % player_url)
1349             code = urlh.read()
1350             res = self._parse_sig_swf(code)
1351         else:
1352             assert False, 'Invalid player type %r' % player_type
1353
1354         test_string = ''.join(map(compat_chr, range(len(example_sig))))
1355         cache_res = res(test_string)
1356         cache_spec = [ord(c) for c in cache_res]
1357
1358         self._downloader.cache.store('youtube-sigfuncs', func_id, cache_spec)
1359         return res
1360
1361     def _print_sig_code(self, func, example_sig):
1362         def gen_sig_code(idxs):
1363             def _genslice(start, end, step):
1364                 starts = '' if start == 0 else str(start)
1365                 ends = (':%d' % (end + step)) if end + step >= 0 else ':'
1366                 steps = '' if step == 1 else (':%d' % step)
1367                 return 's[%s%s%s]' % (starts, ends, steps)
1368
1369             step = None
1370             # Quelch pyflakes warnings - start will be set when step is set
1371             start = '(Never used)'
1372             for i, prev in zip(idxs[1:], idxs[:-1]):
1373                 if step is not None:
1374                     if i - prev == step:
1375                         continue
1376                     yield _genslice(start, prev, step)
1377                     step = None
1378                     continue
1379                 if i - prev in [-1, 1]:
1380                     step = i - prev
1381                     start = prev
1382                     continue
1383                 else:
1384                     yield 's[%d]' % prev
1385             if step is None:
1386                 yield 's[%d]' % i
1387             else:
1388                 yield _genslice(start, i, step)
1389
1390         test_string = ''.join(map(compat_chr, range(len(example_sig))))
1391         cache_res = func(test_string)
1392         cache_spec = [ord(c) for c in cache_res]
1393         expr_code = ' + '.join(gen_sig_code(cache_spec))
1394         signature_id_tuple = '(%s)' % (
1395             ', '.join(compat_str(len(p)) for p in example_sig.split('.')))
1396         code = ('if tuple(len(p) for p in s.split(\'.\')) == %s:\n'
1397                 '    return %s\n') % (signature_id_tuple, expr_code)
1398         self.to_screen('Extracted signature function:\n' + code)
1399
1400     def _parse_sig_js(self, jscode):
1401         funcname = self._search_regex(
1402             (r'\b[cs]\s*&&\s*[adf]\.set\([^,]+\s*,\s*encodeURIComponent\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\(',
1403              r'\b[a-zA-Z0-9]+\s*&&\s*[a-zA-Z0-9]+\.set\([^,]+\s*,\s*encodeURIComponent\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\(',
1404              r'(?:\b|[^a-zA-Z0-9$])(?P<sig>[a-zA-Z0-9$]{2})\s*=\s*function\(\s*a\s*\)\s*{\s*a\s*=\s*a\.split\(\s*""\s*\)',
1405              r'(?P<sig>[a-zA-Z0-9$]+)\s*=\s*function\(\s*a\s*\)\s*{\s*a\s*=\s*a\.split\(\s*""\s*\)',
1406              # Obsolete patterns
1407              r'(["\'])signature\1\s*,\s*(?P<sig>[a-zA-Z0-9$]+)\(',
1408              r'\.sig\|\|(?P<sig>[a-zA-Z0-9$]+)\(',
1409              r'yt\.akamaized\.net/\)\s*\|\|\s*.*?\s*[cs]\s*&&\s*[adf]\.set\([^,]+\s*,\s*(?:encodeURIComponent\s*\()?\s*(?P<sig>[a-zA-Z0-9$]+)\(',
1410              r'\b[cs]\s*&&\s*[adf]\.set\([^,]+\s*,\s*(?P<sig>[a-zA-Z0-9$]+)\(',
1411              r'\b[a-zA-Z0-9]+\s*&&\s*[a-zA-Z0-9]+\.set\([^,]+\s*,\s*(?P<sig>[a-zA-Z0-9$]+)\(',
1412              r'\bc\s*&&\s*a\.set\([^,]+\s*,\s*\([^)]*\)\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\(',
1413              r'\bc\s*&&\s*[a-zA-Z0-9]+\.set\([^,]+\s*,\s*\([^)]*\)\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\(',
1414              r'\bc\s*&&\s*[a-zA-Z0-9]+\.set\([^,]+\s*,\s*\([^)]*\)\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\('),
1415             jscode, 'Initial JS player signature function name', group='sig')
1416
1417         jsi = JSInterpreter(jscode)
1418         initial_function = jsi.extract_function(funcname)
1419         return lambda s: initial_function([s])
1420
1421     def _parse_sig_swf(self, file_contents):
1422         swfi = SWFInterpreter(file_contents)
1423         TARGET_CLASSNAME = 'SignatureDecipher'
1424         searched_class = swfi.extract_class(TARGET_CLASSNAME)
1425         initial_function = swfi.extract_function(searched_class, 'decipher')
1426         return lambda s: initial_function([s])
1427
1428     def _decrypt_signature(self, s, video_id, player_url, age_gate=False):
1429         """Turn the encrypted s field into a working signature"""
1430
1431         if player_url is None:
1432             raise ExtractorError('Cannot decrypt signature without player_url')
1433
1434         if player_url.startswith('//'):
1435             player_url = 'https:' + player_url
1436         elif not re.match(r'https?://', player_url):
1437             player_url = compat_urlparse.urljoin(
1438                 'https://www.youtube.com', player_url)
1439         try:
1440             player_id = (player_url, self._signature_cache_id(s))
1441             if player_id not in self._player_cache:
1442                 func = self._extract_signature_function(
1443                     video_id, player_url, s
1444                 )
1445                 self._player_cache[player_id] = func
1446             func = self._player_cache[player_id]
1447             if self._downloader.params.get('youtube_print_sig_code'):
1448                 self._print_sig_code(func, s)
1449             return func(s)
1450         except Exception as e:
1451             tb = traceback.format_exc()
1452             raise ExtractorError(
1453                 'Signature extraction failed: ' + tb, cause=e)
1454
1455     def _get_subtitles(self, video_id, webpage):
1456         try:
1457             subs_doc = self._download_xml(
1458                 'https://video.google.com/timedtext?hl=en&type=list&v=%s' % video_id,
1459                 video_id, note=False)
1460         except ExtractorError as err:
1461             self._downloader.report_warning('unable to download video subtitles: %s' % error_to_compat_str(err))
1462             return {}
1463
1464         sub_lang_list = {}
1465         for track in subs_doc.findall('track'):
1466             lang = track.attrib['lang_code']
1467             if lang in sub_lang_list:
1468                 continue
1469             sub_formats = []
1470             for ext in self._SUBTITLE_FORMATS:
1471                 params = compat_urllib_parse_urlencode({
1472                     'lang': lang,
1473                     'v': video_id,
1474                     'fmt': ext,
1475                     'name': track.attrib['name'].encode('utf-8'),
1476                 })
1477                 sub_formats.append({
1478                     'url': 'https://www.youtube.com/api/timedtext?' + params,
1479                     'ext': ext,
1480                 })
1481             sub_lang_list[lang] = sub_formats
1482         if not sub_lang_list:
1483             self._downloader.report_warning('video doesn\'t have subtitles')
1484             return {}
1485         return sub_lang_list
1486
1487     def _get_ytplayer_config(self, video_id, webpage):
1488         patterns = (
1489             # User data may contain arbitrary character sequences that may affect
1490             # JSON extraction with regex, e.g. when '};' is contained the second
1491             # regex won't capture the whole JSON. Yet working around by trying more
1492             # concrete regex first keeping in mind proper quoted string handling
1493             # to be implemented in future that will replace this workaround (see
1494             # https://github.com/ytdl-org/youtube-dl/issues/7468,
1495             # https://github.com/ytdl-org/youtube-dl/pull/7599)
1496             r';ytplayer\.config\s*=\s*({.+?});ytplayer',
1497             r';ytplayer\.config\s*=\s*({.+?});',
1498         )
1499         config = self._search_regex(
1500             patterns, webpage, 'ytplayer.config', default=None)
1501         if config:
1502             return self._parse_json(
1503                 uppercase_escape(config), video_id, fatal=False)
1504
1505     def _get_automatic_captions(self, video_id, webpage):
1506         """We need the webpage for getting the captions url, pass it as an
1507            argument to speed up the process."""
1508         self.to_screen('%s: Looking for automatic captions' % video_id)
1509         player_config = self._get_ytplayer_config(video_id, webpage)
1510         err_msg = 'Couldn\'t find automatic captions for %s' % video_id
1511         if not player_config:
1512             self._downloader.report_warning(err_msg)
1513             return {}
1514         try:
1515             args = player_config['args']
1516             caption_url = args.get('ttsurl')
1517             if caption_url:
1518                 timestamp = args['timestamp']
1519                 # We get the available subtitles
1520                 list_params = compat_urllib_parse_urlencode({
1521                     'type': 'list',
1522                     'tlangs': 1,
1523                     'asrs': 1,
1524                 })
1525                 list_url = caption_url + '&' + list_params
1526                 caption_list = self._download_xml(list_url, video_id)
1527                 original_lang_node = caption_list.find('track')
1528                 if original_lang_node is None:
1529                     self._downloader.report_warning('Video doesn\'t have automatic captions')
1530                     return {}
1531                 original_lang = original_lang_node.attrib['lang_code']
1532                 caption_kind = original_lang_node.attrib.get('kind', '')
1533
1534                 sub_lang_list = {}
1535                 for lang_node in caption_list.findall('target'):
1536                     sub_lang = lang_node.attrib['lang_code']
1537                     sub_formats = []
1538                     for ext in self._SUBTITLE_FORMATS:
1539                         params = compat_urllib_parse_urlencode({
1540                             'lang': original_lang,
1541                             'tlang': sub_lang,
1542                             'fmt': ext,
1543                             'ts': timestamp,
1544                             'kind': caption_kind,
1545                         })
1546                         sub_formats.append({
1547                             'url': caption_url + '&' + params,
1548                             'ext': ext,
1549                         })
1550                     sub_lang_list[sub_lang] = sub_formats
1551                 return sub_lang_list
1552
1553             def make_captions(sub_url, sub_langs):
1554                 parsed_sub_url = compat_urllib_parse_urlparse(sub_url)
1555                 caption_qs = compat_parse_qs(parsed_sub_url.query)
1556                 captions = {}
1557                 for sub_lang in sub_langs:
1558                     sub_formats = []
1559                     for ext in self._SUBTITLE_FORMATS:
1560                         caption_qs.update({
1561                             'tlang': [sub_lang],
1562                             'fmt': [ext],
1563                         })
1564                         sub_url = compat_urlparse.urlunparse(parsed_sub_url._replace(
1565                             query=compat_urllib_parse_urlencode(caption_qs, True)))
1566                         sub_formats.append({
1567                             'url': sub_url,
1568                             'ext': ext,
1569                         })
1570                     captions[sub_lang] = sub_formats
1571                 return captions
1572
1573             # New captions format as of 22.06.2017
1574             player_response = args.get('player_response')
1575             if player_response and isinstance(player_response, compat_str):
1576                 player_response = self._parse_json(
1577                     player_response, video_id, fatal=False)
1578                 if player_response:
1579                     renderer = player_response['captions']['playerCaptionsTracklistRenderer']
1580                     base_url = renderer['captionTracks'][0]['baseUrl']
1581                     sub_lang_list = []
1582                     for lang in renderer['translationLanguages']:
1583                         lang_code = lang.get('languageCode')
1584                         if lang_code:
1585                             sub_lang_list.append(lang_code)
1586                     return make_captions(base_url, sub_lang_list)
1587
1588             # Some videos don't provide ttsurl but rather caption_tracks and
1589             # caption_translation_languages (e.g. 20LmZk1hakA)
1590             # Does not used anymore as of 22.06.2017
1591             caption_tracks = args['caption_tracks']
1592             caption_translation_languages = args['caption_translation_languages']
1593             caption_url = compat_parse_qs(caption_tracks.split(',')[0])['u'][0]
1594             sub_lang_list = []
1595             for lang in caption_translation_languages.split(','):
1596                 lang_qs = compat_parse_qs(compat_urllib_parse_unquote_plus(lang))
1597                 sub_lang = lang_qs.get('lc', [None])[0]
1598                 if sub_lang:
1599                     sub_lang_list.append(sub_lang)
1600             return make_captions(caption_url, sub_lang_list)
1601         # An extractor error can be raise by the download process if there are
1602         # no automatic captions but there are subtitles
1603         except (KeyError, IndexError, ExtractorError):
1604             self._downloader.report_warning(err_msg)
1605             return {}
1606
1607     def _mark_watched(self, video_id, video_info, player_response):
1608         playback_url = url_or_none(try_get(
1609             player_response,
1610             lambda x: x['playbackTracking']['videostatsPlaybackUrl']['baseUrl']) or try_get(
1611             video_info, lambda x: x['videostats_playback_base_url'][0]))
1612         if not playback_url:
1613             return
1614         parsed_playback_url = compat_urlparse.urlparse(playback_url)
1615         qs = compat_urlparse.parse_qs(parsed_playback_url.query)
1616
1617         # cpn generation algorithm is reverse engineered from base.js.
1618         # In fact it works even with dummy cpn.
1619         CPN_ALPHABET = 'abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789-_'
1620         cpn = ''.join((CPN_ALPHABET[random.randint(0, 256) & 63] for _ in range(0, 16)))
1621
1622         qs.update({
1623             'ver': ['2'],
1624             'cpn': [cpn],
1625         })
1626         playback_url = compat_urlparse.urlunparse(
1627             parsed_playback_url._replace(query=compat_urllib_parse_urlencode(qs, True)))
1628
1629         self._download_webpage(
1630             playback_url, video_id, 'Marking watched',
1631             'Unable to mark watched', fatal=False)
1632
1633     @staticmethod
1634     def _extract_urls(webpage):
1635         # Embedded YouTube player
1636         entries = [
1637             unescapeHTML(mobj.group('url'))
1638             for mobj in re.finditer(r'''(?x)
1639             (?:
1640                 <iframe[^>]+?src=|
1641                 data-video-url=|
1642                 <embed[^>]+?src=|
1643                 embedSWF\(?:\s*|
1644                 <object[^>]+data=|
1645                 new\s+SWFObject\(
1646             )
1647             (["\'])
1648                 (?P<url>(?:https?:)?//(?:www\.)?youtube(?:-nocookie)?\.com/
1649                 (?:embed|v|p)/[0-9A-Za-z_-]{11}.*?)
1650             \1''', webpage)]
1651
1652         # lazyYT YouTube embed
1653         entries.extend(list(map(
1654             unescapeHTML,
1655             re.findall(r'class="lazyYT" data-youtube-id="([^"]+)"', webpage))))
1656
1657         # Wordpress "YouTube Video Importer" plugin
1658         matches = re.findall(r'''(?x)<div[^>]+
1659             class=(?P<q1>[\'"])[^\'"]*\byvii_single_video_player\b[^\'"]*(?P=q1)[^>]+
1660             data-video_id=(?P<q2>[\'"])([^\'"]+)(?P=q2)''', webpage)
1661         entries.extend(m[-1] for m in matches)
1662
1663         return entries
1664
1665     @staticmethod
1666     def _extract_url(webpage):
1667         urls = YoutubeIE._extract_urls(webpage)
1668         return urls[0] if urls else None
1669
1670     @classmethod
1671     def extract_id(cls, url):
1672         mobj = re.match(cls._VALID_URL, url, re.VERBOSE)
1673         if mobj is None:
1674             raise ExtractorError('Invalid URL: %s' % url)
1675         video_id = mobj.group(2)
1676         return video_id
1677
1678     def _extract_chapters_from_json(self, webpage, video_id, duration):
1679         if not webpage:
1680             return
1681         player = self._parse_json(
1682             self._search_regex(
1683                 r'RELATED_PLAYER_ARGS["\']\s*:\s*({.+})\s*,?\s*\n', webpage,
1684                 'player args', default='{}'),
1685             video_id, fatal=False)
1686         if not player or not isinstance(player, dict):
1687             return
1688         watch_next_response = player.get('watch_next_response')
1689         if not isinstance(watch_next_response, compat_str):
1690             return
1691         response = self._parse_json(watch_next_response, video_id, fatal=False)
1692         if not response or not isinstance(response, dict):
1693             return
1694         chapters_list = try_get(
1695             response,
1696             lambda x: x['playerOverlays']
1697                        ['playerOverlayRenderer']
1698                        ['decoratedPlayerBarRenderer']
1699                        ['decoratedPlayerBarRenderer']
1700                        ['playerBar']
1701                        ['chapteredPlayerBarRenderer']
1702                        ['chapters'],
1703             list)
1704         if not chapters_list:
1705             return
1706
1707         def chapter_time(chapter):
1708             return float_or_none(
1709                 try_get(
1710                     chapter,
1711                     lambda x: x['chapterRenderer']['timeRangeStartMillis'],
1712                     int),
1713                 scale=1000)
1714         chapters = []
1715         for next_num, chapter in enumerate(chapters_list, start=1):
1716             start_time = chapter_time(chapter)
1717             if start_time is None:
1718                 continue
1719             end_time = (chapter_time(chapters_list[next_num])
1720                         if next_num < len(chapters_list) else duration)
1721             if end_time is None:
1722                 continue
1723             title = try_get(
1724                 chapter, lambda x: x['chapterRenderer']['title']['simpleText'],
1725                 compat_str)
1726             chapters.append({
1727                 'start_time': start_time,
1728                 'end_time': end_time,
1729                 'title': title,
1730             })
1731         return chapters
1732
1733     @staticmethod
1734     def _extract_chapters_from_description(description, duration):
1735         if not description:
1736             return None
1737         chapter_lines = re.findall(
1738             r'(?:^|<br\s*/>)([^<]*<a[^>]+onclick=["\']yt\.www\.watch\.player\.seekTo[^>]+>(\d{1,2}:\d{1,2}(?::\d{1,2})?)</a>[^>]*)(?=$|<br\s*/>)',
1739             description)
1740         if not chapter_lines:
1741             return None
1742         chapters = []
1743         for next_num, (chapter_line, time_point) in enumerate(
1744                 chapter_lines, start=1):
1745             start_time = parse_duration(time_point)
1746             if start_time is None:
1747                 continue
1748             if start_time > duration:
1749                 break
1750             end_time = (duration if next_num == len(chapter_lines)
1751                         else parse_duration(chapter_lines[next_num][1]))
1752             if end_time is None:
1753                 continue
1754             if end_time > duration:
1755                 end_time = duration
1756             if start_time > end_time:
1757                 break
1758             chapter_title = re.sub(
1759                 r'<a[^>]+>[^<]+</a>', '', chapter_line).strip(' \t-')
1760             chapter_title = re.sub(r'\s+', ' ', chapter_title)
1761             chapters.append({
1762                 'start_time': start_time,
1763                 'end_time': end_time,
1764                 'title': chapter_title,
1765             })
1766         return chapters
1767
1768     def _extract_chapters(self, webpage, description, video_id, duration):
1769         return (self._extract_chapters_from_json(webpage, video_id, duration)
1770                 or self._extract_chapters_from_description(description, duration))
1771
1772     def _real_extract(self, url):
1773         url, smuggled_data = unsmuggle_url(url, {})
1774
1775         proto = (
1776             'http' if self._downloader.params.get('prefer_insecure', False)
1777             else 'https')
1778
1779         start_time = None
1780         end_time = None
1781         parsed_url = compat_urllib_parse_urlparse(url)
1782         for component in [parsed_url.fragment, parsed_url.query]:
1783             query = compat_parse_qs(component)
1784             if start_time is None and 't' in query:
1785                 start_time = parse_duration(query['t'][0])
1786             if start_time is None and 'start' in query:
1787                 start_time = parse_duration(query['start'][0])
1788             if end_time is None and 'end' in query:
1789                 end_time = parse_duration(query['end'][0])
1790
1791         # Extract original video URL from URL with redirection, like age verification, using next_url parameter
1792         mobj = re.search(self._NEXT_URL_RE, url)
1793         if mobj:
1794             url = proto + '://www.youtube.com/' + compat_urllib_parse_unquote(mobj.group(1)).lstrip('/')
1795         video_id = self.extract_id(url)
1796
1797         # Get video webpage
1798         url = proto + '://www.youtube.com/watch?v=%s&gl=US&hl=en&has_verified=1&bpctr=9999999999' % video_id
1799         video_webpage, urlh = self._download_webpage_handle(url, video_id)
1800
1801         qs = compat_parse_qs(compat_urllib_parse_urlparse(urlh.geturl()).query)
1802         video_id = qs.get('v', [None])[0] or video_id
1803
1804         # Attempt to extract SWF player URL
1805         mobj = re.search(r'swfConfig.*?"(https?:\\/\\/.*?watch.*?-.*?\.swf)"', video_webpage)
1806         if mobj is not None:
1807             player_url = re.sub(r'\\(.)', r'\1', mobj.group(1))
1808         else:
1809             player_url = None
1810
1811         dash_mpds = []
1812
1813         def add_dash_mpd(video_info):
1814             dash_mpd = video_info.get('dashmpd')
1815             if dash_mpd and dash_mpd[0] not in dash_mpds:
1816                 dash_mpds.append(dash_mpd[0])
1817
1818         def add_dash_mpd_pr(pl_response):
1819             dash_mpd = url_or_none(try_get(
1820                 pl_response, lambda x: x['streamingData']['dashManifestUrl'],
1821                 compat_str))
1822             if dash_mpd and dash_mpd not in dash_mpds:
1823                 dash_mpds.append(dash_mpd)
1824
1825         is_live = None
1826         view_count = None
1827
1828         def extract_view_count(v_info):
1829             return int_or_none(try_get(v_info, lambda x: x['view_count'][0]))
1830
1831         def extract_player_response(player_response, video_id):
1832             pl_response = str_or_none(player_response)
1833             if not pl_response:
1834                 return
1835             pl_response = self._parse_json(pl_response, video_id, fatal=False)
1836             if isinstance(pl_response, dict):
1837                 add_dash_mpd_pr(pl_response)
1838                 return pl_response
1839
1840         player_response = {}
1841
1842         # Get video info
1843         video_info = {}
1844         embed_webpage = None
1845         if (self._og_search_property('restrictions:age', video_webpage, default=None) == '18+'
1846                 or re.search(r'player-age-gate-content">', video_webpage) is not None):
1847             age_gate = True
1848             # We simulate the access to the video from www.youtube.com/v/{video_id}
1849             # this can be viewed without login into Youtube
1850             url = proto + '://www.youtube.com/embed/%s' % video_id
1851             embed_webpage = self._download_webpage(url, video_id, 'Downloading embed webpage')
1852             data = compat_urllib_parse_urlencode({
1853                 'video_id': video_id,
1854                 'eurl': 'https://youtube.googleapis.com/v/' + video_id,
1855                 'sts': self._search_regex(
1856                     r'"sts"\s*:\s*(\d+)', embed_webpage, 'sts', default=''),
1857             })
1858             video_info_url = proto + '://www.youtube.com/get_video_info?' + data
1859             try:
1860                 video_info_webpage = self._download_webpage(
1861                     video_info_url, video_id,
1862                     note='Refetching age-gated info webpage',
1863                     errnote='unable to download video info webpage')
1864             except ExtractorError:
1865                 video_info_webpage = None
1866             if video_info_webpage:
1867                 video_info = compat_parse_qs(video_info_webpage)
1868                 pl_response = video_info.get('player_response', [None])[0]
1869                 player_response = extract_player_response(pl_response, video_id)
1870                 add_dash_mpd(video_info)
1871                 view_count = extract_view_count(video_info)
1872         else:
1873             age_gate = False
1874             # Try looking directly into the video webpage
1875             ytplayer_config = self._get_ytplayer_config(video_id, video_webpage)
1876             if ytplayer_config:
1877                 args = ytplayer_config['args']
1878                 if args.get('url_encoded_fmt_stream_map') or args.get('hlsvp'):
1879                     # Convert to the same format returned by compat_parse_qs
1880                     video_info = dict((k, [v]) for k, v in args.items())
1881                     add_dash_mpd(video_info)
1882                 # Rental video is not rented but preview is available (e.g.
1883                 # https://www.youtube.com/watch?v=yYr8q0y5Jfg,
1884                 # https://github.com/ytdl-org/youtube-dl/issues/10532)
1885                 if not video_info and args.get('ypc_vid'):
1886                     return self.url_result(
1887                         args['ypc_vid'], YoutubeIE.ie_key(), video_id=args['ypc_vid'])
1888                 if args.get('livestream') == '1' or args.get('live_playback') == 1:
1889                     is_live = True
1890                 if not player_response:
1891                     player_response = extract_player_response(args.get('player_response'), video_id)
1892             if not video_info or self._downloader.params.get('youtube_include_dash_manifest', True):
1893                 add_dash_mpd_pr(player_response)
1894
1895         def extract_unavailable_message():
1896             messages = []
1897             for tag, kind in (('h1', 'message'), ('div', 'submessage')):
1898                 msg = self._html_search_regex(
1899                     r'(?s)<{tag}[^>]+id=["\']unavailable-{kind}["\'][^>]*>(.+?)</{tag}>'.format(tag=tag, kind=kind),
1900                     video_webpage, 'unavailable %s' % kind, default=None)
1901                 if msg:
1902                     messages.append(msg)
1903             if messages:
1904                 return '\n'.join(messages)
1905
1906         if not video_info and not player_response:
1907             unavailable_message = extract_unavailable_message()
1908             if not unavailable_message:
1909                 unavailable_message = 'Unable to extract video data'
1910             raise ExtractorError(
1911                 'YouTube said: %s' % unavailable_message, expected=True, video_id=video_id)
1912
1913         if not isinstance(video_info, dict):
1914             video_info = {}
1915
1916         video_details = try_get(
1917             player_response, lambda x: x['videoDetails'], dict) or {}
1918
1919         microformat = try_get(
1920             player_response, lambda x: x['microformat']['playerMicroformatRenderer'], dict) or {}
1921
1922         video_title = video_info.get('title', [None])[0] or video_details.get('title')
1923         if not video_title:
1924             self._downloader.report_warning('Unable to extract video title')
1925             video_title = '_'
1926
1927         description_original = video_description = get_element_by_id("eow-description", video_webpage)
1928         if video_description:
1929
1930             def replace_url(m):
1931                 redir_url = compat_urlparse.urljoin(url, m.group(1))
1932                 parsed_redir_url = compat_urllib_parse_urlparse(redir_url)
1933                 if re.search(r'^(?:www\.)?(?:youtube(?:-nocookie)?\.com|youtu\.be)$', parsed_redir_url.netloc) and parsed_redir_url.path == '/redirect':
1934                     qs = compat_parse_qs(parsed_redir_url.query)
1935                     q = qs.get('q')
1936                     if q and q[0]:
1937                         return q[0]
1938                 return redir_url
1939
1940             description_original = video_description = re.sub(r'''(?x)
1941                 <a\s+
1942                     (?:[a-zA-Z-]+="[^"]*"\s+)*?
1943                     (?:title|href)="([^"]+)"\s+
1944                     (?:[a-zA-Z-]+="[^"]*"\s+)*?
1945                     class="[^"]*"[^>]*>
1946                 [^<]+\.{3}\s*
1947                 </a>
1948             ''', replace_url, video_description)
1949             video_description = clean_html(video_description)
1950         else:
1951             video_description = video_details.get('shortDescription')
1952             if video_description is None:
1953                 video_description = self._html_search_meta('description', video_webpage)
1954
1955         if not smuggled_data.get('force_singlefeed', False):
1956             if not self._downloader.params.get('noplaylist'):
1957                 multifeed_metadata_list = try_get(
1958                     player_response,
1959                     lambda x: x['multicamera']['playerLegacyMulticameraRenderer']['metadataList'],
1960                     compat_str) or try_get(
1961                     video_info, lambda x: x['multifeed_metadata_list'][0], compat_str)
1962                 if multifeed_metadata_list:
1963                     entries = []
1964                     feed_ids = []
1965                     for feed in multifeed_metadata_list.split(','):
1966                         # Unquote should take place before split on comma (,) since textual
1967                         # fields may contain comma as well (see
1968                         # https://github.com/ytdl-org/youtube-dl/issues/8536)
1969                         feed_data = compat_parse_qs(compat_urllib_parse_unquote_plus(feed))
1970
1971                         def feed_entry(name):
1972                             return try_get(feed_data, lambda x: x[name][0], compat_str)
1973
1974                         feed_id = feed_entry('id')
1975                         if not feed_id:
1976                             continue
1977                         feed_title = feed_entry('title')
1978                         title = video_title
1979                         if feed_title:
1980                             title += ' (%s)' % feed_title
1981                         entries.append({
1982                             '_type': 'url_transparent',
1983                             'ie_key': 'Youtube',
1984                             'url': smuggle_url(
1985                                 '%s://www.youtube.com/watch?v=%s' % (proto, feed_data['id'][0]),
1986                                 {'force_singlefeed': True}),
1987                             'title': title,
1988                         })
1989                         feed_ids.append(feed_id)
1990                     self.to_screen(
1991                         'Downloading multifeed video (%s) - add --no-playlist to just download video %s'
1992                         % (', '.join(feed_ids), video_id))
1993                     return self.playlist_result(entries, video_id, video_title, video_description)
1994             else:
1995                 self.to_screen('Downloading just video %s because of --no-playlist' % video_id)
1996
1997         if view_count is None:
1998             view_count = extract_view_count(video_info)
1999         if view_count is None and video_details:
2000             view_count = int_or_none(video_details.get('viewCount'))
2001         if view_count is None and microformat:
2002             view_count = int_or_none(microformat.get('viewCount'))
2003
2004         if is_live is None:
2005             is_live = bool_or_none(video_details.get('isLive'))
2006
2007         # Check for "rental" videos
2008         if 'ypc_video_rental_bar_text' in video_info and 'author' not in video_info:
2009             raise ExtractorError('"rental" videos not supported. See https://github.com/ytdl-org/youtube-dl/issues/359 for more information.', expected=True)
2010
2011         def _extract_filesize(media_url):
2012             return int_or_none(self._search_regex(
2013                 r'\bclen[=/](\d+)', media_url, 'filesize', default=None))
2014
2015         streaming_formats = try_get(player_response, lambda x: x['streamingData']['formats'], list) or []
2016         streaming_formats.extend(try_get(player_response, lambda x: x['streamingData']['adaptiveFormats'], list) or [])
2017
2018         if 'conn' in video_info and video_info['conn'][0].startswith('rtmp'):
2019             self.report_rtmp_download()
2020             formats = [{
2021                 'format_id': '_rtmp',
2022                 'protocol': 'rtmp',
2023                 'url': video_info['conn'][0],
2024                 'player_url': player_url,
2025             }]
2026         elif not is_live and (streaming_formats or len(video_info.get('url_encoded_fmt_stream_map', [''])[0]) >= 1 or len(video_info.get('adaptive_fmts', [''])[0]) >= 1):
2027             encoded_url_map = video_info.get('url_encoded_fmt_stream_map', [''])[0] + ',' + video_info.get('adaptive_fmts', [''])[0]
2028             if 'rtmpe%3Dyes' in encoded_url_map:
2029                 raise ExtractorError('rtmpe downloads are not supported, see https://github.com/ytdl-org/youtube-dl/issues/343 for more information.', expected=True)
2030             formats = []
2031             formats_spec = {}
2032             fmt_list = video_info.get('fmt_list', [''])[0]
2033             if fmt_list:
2034                 for fmt in fmt_list.split(','):
2035                     spec = fmt.split('/')
2036                     if len(spec) > 1:
2037                         width_height = spec[1].split('x')
2038                         if len(width_height) == 2:
2039                             formats_spec[spec[0]] = {
2040                                 'resolution': spec[1],
2041                                 'width': int_or_none(width_height[0]),
2042                                 'height': int_or_none(width_height[1]),
2043                             }
2044             for fmt in streaming_formats:
2045                 itag = str_or_none(fmt.get('itag'))
2046                 if not itag:
2047                     continue
2048                 quality = fmt.get('quality')
2049                 quality_label = fmt.get('qualityLabel') or quality
2050                 formats_spec[itag] = {
2051                     'asr': int_or_none(fmt.get('audioSampleRate')),
2052                     'filesize': int_or_none(fmt.get('contentLength')),
2053                     'format_note': quality_label,
2054                     'fps': int_or_none(fmt.get('fps')),
2055                     'height': int_or_none(fmt.get('height')),
2056                     # bitrate for itag 43 is always 2147483647
2057                     'tbr': float_or_none(fmt.get('averageBitrate') or fmt.get('bitrate'), 1000) if itag != '43' else None,
2058                     'width': int_or_none(fmt.get('width')),
2059                 }
2060
2061             for fmt in streaming_formats:
2062                 if fmt.get('drmFamilies') or fmt.get('drm_families'):
2063                     continue
2064                 url = url_or_none(fmt.get('url'))
2065
2066                 if not url:
2067                     cipher = fmt.get('cipher') or fmt.get('signatureCipher')
2068                     if not cipher:
2069                         continue
2070                     url_data = compat_parse_qs(cipher)
2071                     url = url_or_none(try_get(url_data, lambda x: x['url'][0], compat_str))
2072                     if not url:
2073                         continue
2074                 else:
2075                     cipher = None
2076                     url_data = compat_parse_qs(compat_urllib_parse_urlparse(url).query)
2077
2078                 stream_type = int_or_none(try_get(url_data, lambda x: x['stream_type'][0]))
2079                 # Unsupported FORMAT_STREAM_TYPE_OTF
2080                 if stream_type == 3:
2081                     continue
2082
2083                 format_id = fmt.get('itag') or url_data['itag'][0]
2084                 if not format_id:
2085                     continue
2086                 format_id = compat_str(format_id)
2087
2088                 if cipher:
2089                     if 's' in url_data or self._downloader.params.get('youtube_include_dash_manifest', True):
2090                         ASSETS_RE = r'"assets":.+?"js":\s*("[^"]+")'
2091                         jsplayer_url_json = self._search_regex(
2092                             ASSETS_RE,
2093                             embed_webpage if age_gate else video_webpage,
2094                             'JS player URL (1)', default=None)
2095                         if not jsplayer_url_json and not age_gate:
2096                             # We need the embed website after all
2097                             if embed_webpage is None:
2098                                 embed_url = proto + '://www.youtube.com/embed/%s' % video_id
2099                                 embed_webpage = self._download_webpage(
2100                                     embed_url, video_id, 'Downloading embed webpage')
2101                             jsplayer_url_json = self._search_regex(
2102                                 ASSETS_RE, embed_webpage, 'JS player URL')
2103
2104                         player_url = json.loads(jsplayer_url_json)
2105                         if player_url is None:
2106                             player_url_json = self._search_regex(
2107                                 r'ytplayer\.config.*?"url"\s*:\s*("[^"]+")',
2108                                 video_webpage, 'age gate player URL')
2109                             player_url = json.loads(player_url_json)
2110
2111                     if 'sig' in url_data:
2112                         url += '&signature=' + url_data['sig'][0]
2113                     elif 's' in url_data:
2114                         encrypted_sig = url_data['s'][0]
2115
2116                         if self._downloader.params.get('verbose'):
2117                             if player_url is None:
2118                                 player_desc = 'unknown'
2119                             else:
2120                                 player_type, player_version = self._extract_player_info(player_url)
2121                                 player_desc = '%s player %s' % ('flash' if player_type == 'swf' else 'html5', player_version)
2122                             parts_sizes = self._signature_cache_id(encrypted_sig)
2123                             self.to_screen('{%s} signature length %s, %s' %
2124                                            (format_id, parts_sizes, player_desc))
2125
2126                         signature = self._decrypt_signature(
2127                             encrypted_sig, video_id, player_url, age_gate)
2128                         sp = try_get(url_data, lambda x: x['sp'][0], compat_str) or 'signature'
2129                         url += '&%s=%s' % (sp, signature)
2130                 if 'ratebypass' not in url:
2131                     url += '&ratebypass=yes'
2132
2133                 dct = {
2134                     'format_id': format_id,
2135                     'url': url,
2136                     'player_url': player_url,
2137                 }
2138                 if format_id in self._formats:
2139                     dct.update(self._formats[format_id])
2140                 if format_id in formats_spec:
2141                     dct.update(formats_spec[format_id])
2142
2143                 # Some itags are not included in DASH manifest thus corresponding formats will
2144                 # lack metadata (see https://github.com/ytdl-org/youtube-dl/pull/5993).
2145                 # Trying to extract metadata from url_encoded_fmt_stream_map entry.
2146                 mobj = re.search(r'^(?P<width>\d+)[xX](?P<height>\d+)$', url_data.get('size', [''])[0])
2147                 width, height = (int(mobj.group('width')), int(mobj.group('height'))) if mobj else (None, None)
2148
2149                 if width is None:
2150                     width = int_or_none(fmt.get('width'))
2151                 if height is None:
2152                     height = int_or_none(fmt.get('height'))
2153
2154                 filesize = int_or_none(url_data.get(
2155                     'clen', [None])[0]) or _extract_filesize(url)
2156
2157                 quality = url_data.get('quality', [None])[0] or fmt.get('quality')
2158                 quality_label = url_data.get('quality_label', [None])[0] or fmt.get('qualityLabel')
2159
2160                 tbr = (float_or_none(url_data.get('bitrate', [None])[0], 1000)
2161                        or float_or_none(fmt.get('bitrate'), 1000)) if format_id != '43' else None
2162                 fps = int_or_none(url_data.get('fps', [None])[0]) or int_or_none(fmt.get('fps'))
2163
2164                 more_fields = {
2165                     'filesize': filesize,
2166                     'tbr': tbr,
2167                     'width': width,
2168                     'height': height,
2169                     'fps': fps,
2170                     'format_note': quality_label or quality,
2171                 }
2172                 for key, value in more_fields.items():
2173                     if value:
2174                         dct[key] = value
2175                 type_ = url_data.get('type', [None])[0] or fmt.get('mimeType')
2176                 if type_:
2177                     type_split = type_.split(';')
2178                     kind_ext = type_split[0].split('/')
2179                     if len(kind_ext) == 2:
2180                         kind, _ = kind_ext
2181                         dct['ext'] = mimetype2ext(type_split[0])
2182                         if kind in ('audio', 'video'):
2183                             codecs = None
2184                             for mobj in re.finditer(
2185                                     r'(?P<key>[a-zA-Z_-]+)=(?P<quote>["\']?)(?P<val>.+?)(?P=quote)(?:;|$)', type_):
2186                                 if mobj.group('key') == 'codecs':
2187                                     codecs = mobj.group('val')
2188                                     break
2189                             if codecs:
2190                                 dct.update(parse_codecs(codecs))
2191                 if dct.get('acodec') == 'none' or dct.get('vcodec') == 'none':
2192                     dct['downloader_options'] = {
2193                         # Youtube throttles chunks >~10M
2194                         'http_chunk_size': 10485760,
2195                     }
2196                 formats.append(dct)
2197         else:
2198             manifest_url = (
2199                 url_or_none(try_get(
2200                     player_response,
2201                     lambda x: x['streamingData']['hlsManifestUrl'],
2202                     compat_str))
2203                 or url_or_none(try_get(
2204                     video_info, lambda x: x['hlsvp'][0], compat_str)))
2205             if manifest_url:
2206                 formats = []
2207                 m3u8_formats = self._extract_m3u8_formats(
2208                     manifest_url, video_id, 'mp4', fatal=False)
2209                 for a_format in m3u8_formats:
2210                     itag = self._search_regex(
2211                         r'/itag/(\d+)/', a_format['url'], 'itag', default=None)
2212                     if itag:
2213                         a_format['format_id'] = itag
2214                         if itag in self._formats:
2215                             dct = self._formats[itag].copy()
2216                             dct.update(a_format)
2217                             a_format = dct
2218                     a_format['player_url'] = player_url
2219                     # Accept-Encoding header causes failures in live streams on Youtube and Youtube Gaming
2220                     a_format.setdefault('http_headers', {})['Youtubedl-no-compression'] = 'True'
2221                     formats.append(a_format)
2222             else:
2223                 error_message = extract_unavailable_message()
2224                 if not error_message:
2225                     error_message = clean_html(try_get(
2226                         player_response, lambda x: x['playabilityStatus']['reason'],
2227                         compat_str))
2228                 if not error_message:
2229                     error_message = clean_html(
2230                         try_get(video_info, lambda x: x['reason'][0], compat_str))
2231                 if error_message:
2232                     raise ExtractorError(error_message, expected=True)
2233                 raise ExtractorError('no conn, hlsvp, hlsManifestUrl or url_encoded_fmt_stream_map information found in video info')
2234
2235         # uploader
2236         video_uploader = try_get(
2237             video_info, lambda x: x['author'][0],
2238             compat_str) or str_or_none(video_details.get('author'))
2239         if video_uploader:
2240             video_uploader = compat_urllib_parse_unquote_plus(video_uploader)
2241         else:
2242             self._downloader.report_warning('unable to extract uploader name')
2243
2244         # uploader_id
2245         video_uploader_id = None
2246         video_uploader_url = None
2247         mobj = re.search(
2248             r'<link itemprop="url" href="(?P<uploader_url>https?://www\.youtube\.com/(?:user|channel)/(?P<uploader_id>[^"]+))">',
2249             video_webpage)
2250         if mobj is not None:
2251             video_uploader_id = mobj.group('uploader_id')
2252             video_uploader_url = mobj.group('uploader_url')
2253         else:
2254             owner_profile_url = url_or_none(microformat.get('ownerProfileUrl'))
2255             if owner_profile_url:
2256                 video_uploader_id = self._search_regex(
2257                     r'(?:user|channel)/([^/]+)', owner_profile_url, 'uploader id',
2258                     default=None)
2259                 video_uploader_url = owner_profile_url
2260
2261         channel_id = (
2262             str_or_none(video_details.get('channelId'))
2263             or self._html_search_meta(
2264                 'channelId', video_webpage, 'channel id', default=None)
2265             or self._search_regex(
2266                 r'data-channel-external-id=(["\'])(?P<id>(?:(?!\1).)+)\1',
2267                 video_webpage, 'channel id', default=None, group='id'))
2268         channel_url = 'http://www.youtube.com/channel/%s' % channel_id if channel_id else None
2269
2270         thumbnails = []
2271         thumbnails_list = try_get(
2272             video_details, lambda x: x['thumbnail']['thumbnails'], list) or []
2273         for t in thumbnails_list:
2274             if not isinstance(t, dict):
2275                 continue
2276             thumbnail_url = url_or_none(t.get('url'))
2277             if not thumbnail_url:
2278                 continue
2279             thumbnails.append({
2280                 'url': thumbnail_url,
2281                 'width': int_or_none(t.get('width')),
2282                 'height': int_or_none(t.get('height')),
2283             })
2284
2285         if not thumbnails:
2286             video_thumbnail = None
2287             # We try first to get a high quality image:
2288             m_thumb = re.search(r'<span itemprop="thumbnail".*?href="(.*?)">',
2289                                 video_webpage, re.DOTALL)
2290             if m_thumb is not None:
2291                 video_thumbnail = m_thumb.group(1)
2292             thumbnail_url = try_get(video_info, lambda x: x['thumbnail_url'][0], compat_str)
2293             if thumbnail_url:
2294                 video_thumbnail = compat_urllib_parse_unquote_plus(thumbnail_url)
2295             if video_thumbnail:
2296                 thumbnails.append({'url': video_thumbnail})
2297
2298         # upload date
2299         upload_date = self._html_search_meta(
2300             'datePublished', video_webpage, 'upload date', default=None)
2301         if not upload_date:
2302             upload_date = self._search_regex(
2303                 [r'(?s)id="eow-date.*?>(.*?)</span>',
2304                  r'(?:id="watch-uploader-info".*?>.*?|["\']simpleText["\']\s*:\s*["\'])(?:Published|Uploaded|Streamed live|Started) on (.+?)[<"\']'],
2305                 video_webpage, 'upload date', default=None)
2306         if not upload_date:
2307             upload_date = microformat.get('publishDate') or microformat.get('uploadDate')
2308         upload_date = unified_strdate(upload_date)
2309
2310         video_license = self._html_search_regex(
2311             r'<h4[^>]+class="title"[^>]*>\s*License\s*</h4>\s*<ul[^>]*>\s*<li>(.+?)</li',
2312             video_webpage, 'license', default=None)
2313
2314         m_music = re.search(
2315             r'''(?x)
2316                 <h4[^>]+class="title"[^>]*>\s*Music\s*</h4>\s*
2317                 <ul[^>]*>\s*
2318                 <li>(?P<title>.+?)
2319                 by (?P<creator>.+?)
2320                 (?:
2321                     \(.+?\)|
2322                     <a[^>]*
2323                         (?:
2324                             \bhref=["\']/red[^>]*>|             # drop possible
2325                             >\s*Listen ad-free with YouTube Red # YouTube Red ad
2326                         )
2327                     .*?
2328                 )?</li
2329             ''',
2330             video_webpage)
2331         if m_music:
2332             video_alt_title = remove_quotes(unescapeHTML(m_music.group('title')))
2333             video_creator = clean_html(m_music.group('creator'))
2334         else:
2335             video_alt_title = video_creator = None
2336
2337         def extract_meta(field):
2338             return self._html_search_regex(
2339                 r'<h4[^>]+class="title"[^>]*>\s*%s\s*</h4>\s*<ul[^>]*>\s*<li>(.+?)</li>\s*' % field,
2340                 video_webpage, field, default=None)
2341
2342         track = extract_meta('Song')
2343         artist = extract_meta('Artist')
2344         album = extract_meta('Album')
2345
2346         # Youtube Music Auto-generated description
2347         release_date = release_year = None
2348         if video_description:
2349             mobj = re.search(r'(?s)Provided to YouTube by [^\n]+\n+(?P<track>[^·]+)·(?P<artist>[^\n]+)\n+(?P<album>[^\n]+)(?:.+?℗\s*(?P<release_year>\d{4})(?!\d))?(?:.+?Released on\s*:\s*(?P<release_date>\d{4}-\d{2}-\d{2}))?(.+?\nArtist\s*:\s*(?P<clean_artist>[^\n]+))?', video_description)
2350             if mobj:
2351                 if not track:
2352                     track = mobj.group('track').strip()
2353                 if not artist:
2354                     artist = mobj.group('clean_artist') or ', '.join(a.strip() for a in mobj.group('artist').split('·'))
2355                 if not album:
2356                     album = mobj.group('album'.strip())
2357                 release_year = mobj.group('release_year')
2358                 release_date = mobj.group('release_date')
2359                 if release_date:
2360                     release_date = release_date.replace('-', '')
2361                     if not release_year:
2362                         release_year = int(release_date[:4])
2363                 if release_year:
2364                     release_year = int(release_year)
2365
2366         m_episode = re.search(
2367             r'<div[^>]+id="watch7-headline"[^>]*>\s*<span[^>]*>.*?>(?P<series>[^<]+)</a></b>\s*S(?P<season>\d+)\s*•\s*E(?P<episode>\d+)</span>',
2368             video_webpage)
2369         if m_episode:
2370             series = unescapeHTML(m_episode.group('series'))
2371             season_number = int(m_episode.group('season'))
2372             episode_number = int(m_episode.group('episode'))
2373         else:
2374             series = season_number = episode_number = None
2375
2376         m_cat_container = self._search_regex(
2377             r'(?s)<h4[^>]*>\s*Category\s*</h4>\s*<ul[^>]*>(.*?)</ul>',
2378             video_webpage, 'categories', default=None)
2379         category = None
2380         if m_cat_container:
2381             category = self._html_search_regex(
2382                 r'(?s)<a[^<]+>(.*?)</a>', m_cat_container, 'category',
2383                 default=None)
2384         if not category:
2385             category = try_get(
2386                 microformat, lambda x: x['category'], compat_str)
2387         video_categories = None if category is None else [category]
2388
2389         video_tags = [
2390             unescapeHTML(m.group('content'))
2391             for m in re.finditer(self._meta_regex('og:video:tag'), video_webpage)]
2392         if not video_tags:
2393             video_tags = try_get(video_details, lambda x: x['keywords'], list)
2394
2395         def _extract_count(count_name):
2396             return str_to_int(self._search_regex(
2397                 r'-%s-button[^>]+><span[^>]+class="yt-uix-button-content"[^>]*>([\d,]+)</span>'
2398                 % re.escape(count_name),
2399                 video_webpage, count_name, default=None))
2400
2401         like_count = _extract_count('like')
2402         dislike_count = _extract_count('dislike')
2403
2404         if view_count is None:
2405             view_count = str_to_int(self._search_regex(
2406                 r'<[^>]+class=["\']watch-view-count[^>]+>\s*([\d,\s]+)', video_webpage,
2407                 'view count', default=None))
2408
2409         average_rating = (
2410             float_or_none(video_details.get('averageRating'))
2411             or try_get(video_info, lambda x: float_or_none(x['avg_rating'][0])))
2412
2413         # subtitles
2414         video_subtitles = self.extract_subtitles(video_id, video_webpage)
2415         automatic_captions = self.extract_automatic_captions(video_id, video_webpage)
2416
2417         video_duration = try_get(
2418             video_info, lambda x: int_or_none(x['length_seconds'][0]))
2419         if not video_duration:
2420             video_duration = int_or_none(video_details.get('lengthSeconds'))
2421         if not video_duration:
2422             video_duration = parse_duration(self._html_search_meta(
2423                 'duration', video_webpage, 'video duration'))
2424
2425         # Get Subscriber Count of channel
2426         subscriber_count = parse_count(self._search_regex(
2427             r'"text":"([\d\.]+\w?) subscribers"',
2428             video_webpage,
2429             'subscriber count',
2430             default=None
2431         ))
2432
2433         # annotations
2434         video_annotations = None
2435         if self._downloader.params.get('writeannotations', False):
2436             xsrf_token = self._search_regex(
2437                 r'([\'"])XSRF_TOKEN\1\s*:\s*([\'"])(?P<xsrf_token>[A-Za-z0-9+/=]+)\2',
2438                 video_webpage, 'xsrf token', group='xsrf_token', fatal=False)
2439             invideo_url = try_get(
2440                 player_response, lambda x: x['annotations'][0]['playerAnnotationsUrlsRenderer']['invideoUrl'], compat_str)
2441             if xsrf_token and invideo_url:
2442                 xsrf_field_name = self._search_regex(
2443                     r'([\'"])XSRF_FIELD_NAME\1\s*:\s*([\'"])(?P<xsrf_field_name>\w+)\2',
2444                     video_webpage, 'xsrf field name',
2445                     group='xsrf_field_name', default='session_token')
2446                 video_annotations = self._download_webpage(
2447                     self._proto_relative_url(invideo_url),
2448                     video_id, note='Downloading annotations',
2449                     errnote='Unable to download video annotations', fatal=False,
2450                     data=urlencode_postdata({xsrf_field_name: xsrf_token}))
2451
2452         chapters = self._extract_chapters(video_webpage, description_original, video_id, video_duration)
2453
2454         # Look for the DASH manifest
2455         if self._downloader.params.get('youtube_include_dash_manifest', True):
2456             dash_mpd_fatal = True
2457             for mpd_url in dash_mpds:
2458                 dash_formats = {}
2459                 try:
2460                     def decrypt_sig(mobj):
2461                         s = mobj.group(1)
2462                         dec_s = self._decrypt_signature(s, video_id, player_url, age_gate)
2463                         return '/signature/%s' % dec_s
2464
2465                     mpd_url = re.sub(r'/s/([a-fA-F0-9\.]+)', decrypt_sig, mpd_url)
2466
2467                     for df in self._extract_mpd_formats(
2468                             mpd_url, video_id, fatal=dash_mpd_fatal,
2469                             formats_dict=self._formats):
2470                         if not df.get('filesize'):
2471                             df['filesize'] = _extract_filesize(df['url'])
2472                         # Do not overwrite DASH format found in some previous DASH manifest
2473                         if df['format_id'] not in dash_formats:
2474                             dash_formats[df['format_id']] = df
2475                         # Additional DASH manifests may end up in HTTP Error 403 therefore
2476                         # allow them to fail without bug report message if we already have
2477                         # some DASH manifest succeeded. This is temporary workaround to reduce
2478                         # burst of bug reports until we figure out the reason and whether it
2479                         # can be fixed at all.
2480                         dash_mpd_fatal = False
2481                 except (ExtractorError, KeyError) as e:
2482                     self.report_warning(
2483                         'Skipping DASH manifest: %r' % e, video_id)
2484                 if dash_formats:
2485                     # Remove the formats we found through non-DASH, they
2486                     # contain less info and it can be wrong, because we use
2487                     # fixed values (for example the resolution). See
2488                     # https://github.com/ytdl-org/youtube-dl/issues/5774 for an
2489                     # example.
2490                     formats = [f for f in formats if f['format_id'] not in dash_formats.keys()]
2491                     formats.extend(dash_formats.values())
2492
2493         # Check for malformed aspect ratio
2494         stretched_m = re.search(
2495             r'<meta\s+property="og:video:tag".*?content="yt:stretch=(?P<w>[0-9]+):(?P<h>[0-9]+)">',
2496             video_webpage)
2497         if stretched_m:
2498             w = float(stretched_m.group('w'))
2499             h = float(stretched_m.group('h'))
2500             # yt:stretch may hold invalid ratio data (e.g. for Q39EVAstoRM ratio is 17:0).
2501             # We will only process correct ratios.
2502             if w > 0 and h > 0:
2503                 ratio = w / h
2504                 for f in formats:
2505                     if f.get('vcodec') != 'none':
2506                         f['stretched_ratio'] = ratio
2507
2508         if not formats:
2509             if 'reason' in video_info:
2510                 if 'The uploader has not made this video available in your country.' in video_info['reason']:
2511                     regions_allowed = self._html_search_meta(
2512                         'regionsAllowed', video_webpage, default=None)
2513                     countries = regions_allowed.split(',') if regions_allowed else None
2514                     self.raise_geo_restricted(
2515                         msg=video_info['reason'][0], countries=countries)
2516                 reason = video_info['reason'][0]
2517                 if 'Invalid parameters' in reason:
2518                     unavailable_message = extract_unavailable_message()
2519                     if unavailable_message:
2520                         reason = unavailable_message
2521                 raise ExtractorError(
2522                     'YouTube said: %s' % reason,
2523                     expected=True, video_id=video_id)
2524             if video_info.get('license_info') or try_get(player_response, lambda x: x['streamingData']['licenseInfos']):
2525                 raise ExtractorError('This video is DRM protected.', expected=True)
2526
2527         self._sort_formats(formats)
2528
2529         self.mark_watched(video_id, video_info, player_response)
2530
2531         return {
2532             'id': video_id,
2533             'uploader': video_uploader,
2534             'uploader_id': video_uploader_id,
2535             'uploader_url': video_uploader_url,
2536             'channel_id': channel_id,
2537             'channel_url': channel_url,
2538             'upload_date': upload_date,
2539             'license': video_license,
2540             'creator': video_creator or artist,
2541             'title': video_title,
2542             'alt_title': video_alt_title or track,
2543             'thumbnails': thumbnails,
2544             'description': video_description,
2545             'categories': video_categories,
2546             'tags': video_tags,
2547             'subtitles': video_subtitles,
2548             'automatic_captions': automatic_captions,
2549             'duration': video_duration,
2550             'age_limit': 18 if age_gate else 0,
2551             'annotations': video_annotations,
2552             'chapters': chapters,
2553             'webpage_url': proto + '://www.youtube.com/watch?v=%s' % video_id,
2554             'view_count': view_count,
2555             'like_count': like_count,
2556             'dislike_count': dislike_count,
2557             'average_rating': average_rating,
2558             'formats': formats,
2559             'is_live': is_live,
2560             'start_time': start_time,
2561             'end_time': end_time,
2562             'series': series,
2563             'season_number': season_number,
2564             'episode_number': episode_number,
2565             'track': track,
2566             'artist': artist,
2567             'album': album,
2568             'release_date': release_date,
2569             'release_year': release_year,
2570             'subscriber_count': subscriber_count,
2571         }
2572
2573
2574 class YoutubePlaylistIE(YoutubePlaylistBaseInfoExtractor):
2575     IE_DESC = 'YouTube.com playlists'
2576     _VALID_URL = r"""(?x)(?:
2577                         (?:https?://)?
2578                         (?:\w+\.)?
2579                         (?:
2580                             (?:
2581                                 youtube(?:kids)?\.com|
2582                                 invidio\.us
2583                             )
2584                             /
2585                             (?:
2586                                (?:course|view_play_list|my_playlists|artist|playlist|watch|embed/(?:videoseries|[0-9A-Za-z_-]{11}))
2587                                \? (?:.*?[&;])*? (?:p|a|list)=
2588                             |  p/
2589                             )|
2590                             youtu\.be/[0-9A-Za-z_-]{11}\?.*?\blist=
2591                         )
2592                         (
2593                             (?:PL|LL|EC|UU|FL|RD|UL|TL|PU|OLAK5uy_)?[0-9A-Za-z-_]{10,}
2594                             # Top tracks, they can also include dots
2595                             |(?:MC)[\w\.]*
2596                         )
2597                         .*
2598                      |
2599                         (%(playlist_id)s)
2600                      )""" % {'playlist_id': YoutubeBaseInfoExtractor._PLAYLIST_ID_RE}
2601     _TEMPLATE_URL = 'https://www.youtube.com/playlist?list=%s'
2602     _VIDEO_RE_TPL = r'href="\s*/watch\?v=%s(?:&amp;(?:[^"]*?index=(?P<index>\d+))?(?:[^>]+>(?P<title>[^<]+))?)?'
2603     _VIDEO_RE = _VIDEO_RE_TPL % r'(?P<id>[0-9A-Za-z_-]{11})'
2604     IE_NAME = 'youtube:playlist'
2605     _TESTS = [{
2606         'url': 'https://www.youtube.com/playlist?list=PL4lCao7KL_QFVb7Iudeipvc2BCavECqzc',
2607         'info_dict': {
2608             'uploader_id': 'UCmlqkdCBesrv2Lak1mF_MxA',
2609             'uploader': 'Sergey M.',
2610             'id': 'PL4lCao7KL_QFVb7Iudeipvc2BCavECqzc',
2611             'title': 'youtube-dl public playlist',
2612         },
2613         'playlist_count': 1,
2614     }, {
2615         'url': 'https://www.youtube.com/playlist?list=PL4lCao7KL_QFodcLWhDpGCYnngnHtQ-Xf',
2616         'info_dict': {
2617             'uploader_id': 'UCmlqkdCBesrv2Lak1mF_MxA',
2618             'uploader': 'Sergey M.',
2619             'id': 'PL4lCao7KL_QFodcLWhDpGCYnngnHtQ-Xf',
2620             'title': 'youtube-dl empty playlist',
2621         },
2622         'playlist_count': 0,
2623     }, {
2624         'note': 'Playlist with deleted videos (#651). As a bonus, the video #51 is also twice in this list.',
2625         'url': 'https://www.youtube.com/playlist?list=PLwP_SiAcdui0KVebT0mU9Apz359a4ubsC',
2626         'info_dict': {
2627             'title': '29C3: Not my department',
2628             'id': 'PLwP_SiAcdui0KVebT0mU9Apz359a4ubsC',
2629             'uploader': 'Christiaan008',
2630             'uploader_id': 'ChRiStIaAn008',
2631         },
2632         'playlist_count': 96,
2633     }, {
2634         'note': 'issue #673',
2635         'url': 'PLBB231211A4F62143',
2636         'info_dict': {
2637             'title': '[OLD]Team Fortress 2 (Class-based LP)',
2638             'id': 'PLBB231211A4F62143',
2639             'uploader': 'Wickydoo',
2640             'uploader_id': 'Wickydoo',
2641         },
2642         'playlist_mincount': 26,
2643     }, {
2644         'note': 'Large playlist',
2645         'url': 'https://www.youtube.com/playlist?list=UUBABnxM4Ar9ten8Mdjj1j0Q',
2646         'info_dict': {
2647             'title': 'Uploads from Cauchemar',
2648             'id': 'UUBABnxM4Ar9ten8Mdjj1j0Q',
2649             'uploader': 'Cauchemar',
2650             'uploader_id': 'Cauchemar89',
2651         },
2652         'playlist_mincount': 799,
2653     }, {
2654         'url': 'PLtPgu7CB4gbY9oDN3drwC3cMbJggS7dKl',
2655         'info_dict': {
2656             'title': 'YDL_safe_search',
2657             'id': 'PLtPgu7CB4gbY9oDN3drwC3cMbJggS7dKl',
2658         },
2659         'playlist_count': 2,
2660         'skip': 'This playlist is private',
2661     }, {
2662         'note': 'embedded',
2663         'url': 'https://www.youtube.com/embed/videoseries?list=PL6IaIsEjSbf96XFRuNccS_RuEXwNdsoEu',
2664         'playlist_count': 4,
2665         'info_dict': {
2666             'title': 'JODA15',
2667             'id': 'PL6IaIsEjSbf96XFRuNccS_RuEXwNdsoEu',
2668             'uploader': 'milan',
2669             'uploader_id': 'UCEI1-PVPcYXjB73Hfelbmaw',
2670         }
2671     }, {
2672         'url': 'http://www.youtube.com/embed/_xDOZElKyNU?list=PLsyOSbh5bs16vubvKePAQ1x3PhKavfBIl',
2673         'playlist_mincount': 485,
2674         'info_dict': {
2675             'title': '2018 Chinese New Singles (11/6 updated)',
2676             'id': 'PLsyOSbh5bs16vubvKePAQ1x3PhKavfBIl',
2677             'uploader': 'LBK',
2678             'uploader_id': 'sdragonfang',
2679         }
2680     }, {
2681         'note': 'Embedded SWF player',
2682         'url': 'https://www.youtube.com/p/YN5VISEtHet5D4NEvfTd0zcgFk84NqFZ?hl=en_US&fs=1&rel=0',
2683         'playlist_count': 4,
2684         'info_dict': {
2685             'title': 'JODA7',
2686             'id': 'YN5VISEtHet5D4NEvfTd0zcgFk84NqFZ',
2687         },
2688         'skip': 'This playlist does not exist',
2689     }, {
2690         'note': 'Buggy playlist: the webpage has a "Load more" button but it doesn\'t have more videos',
2691         'url': 'https://www.youtube.com/playlist?list=UUXw-G3eDE9trcvY2sBMM_aA',
2692         'info_dict': {
2693             'title': 'Uploads from Interstellar Movie',
2694             'id': 'UUXw-G3eDE9trcvY2sBMM_aA',
2695             'uploader': 'Interstellar Movie',
2696             'uploader_id': 'InterstellarMovie1',
2697         },
2698         'playlist_mincount': 21,
2699     }, {
2700         # Playlist URL that does not actually serve a playlist
2701         'url': 'https://www.youtube.com/watch?v=FqZTN594JQw&list=PLMYEtVRpaqY00V9W81Cwmzp6N6vZqfUKD4',
2702         'info_dict': {
2703             'id': 'FqZTN594JQw',
2704             'ext': 'webm',
2705             'title': "Smiley's People 01 detective, Adventure Series, Action",
2706             'uploader': 'STREEM',
2707             'uploader_id': 'UCyPhqAZgwYWZfxElWVbVJng',
2708             'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UCyPhqAZgwYWZfxElWVbVJng',
2709             'upload_date': '20150526',
2710             'license': 'Standard YouTube License',
2711             'description': 'md5:507cdcb5a49ac0da37a920ece610be80',
2712             'categories': ['People & Blogs'],
2713             'tags': list,
2714             'view_count': int,
2715             'like_count': int,
2716             'dislike_count': int,
2717         },
2718         'params': {
2719             'skip_download': True,
2720         },
2721         'skip': 'This video is not available.',
2722         'add_ie': [YoutubeIE.ie_key()],
2723     }, {
2724         'url': 'https://youtu.be/yeWKywCrFtk?list=PL2qgrgXsNUG5ig9cat4ohreBjYLAPC0J5',
2725         'info_dict': {
2726             'id': 'yeWKywCrFtk',
2727             'ext': 'mp4',
2728             'title': 'Small Scale Baler and Braiding Rugs',
2729             'uploader': 'Backus-Page House Museum',
2730             'uploader_id': 'backuspagemuseum',
2731             'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/backuspagemuseum',
2732             'upload_date': '20161008',
2733             'description': 'md5:800c0c78d5eb128500bffd4f0b4f2e8a',
2734             'categories': ['Nonprofits & Activism'],
2735             'tags': list,
2736             'like_count': int,
2737             'dislike_count': int,
2738         },
2739         'params': {
2740             'noplaylist': True,
2741             'skip_download': True,
2742         },
2743     }, {
2744         # https://github.com/ytdl-org/youtube-dl/issues/21844
2745         'url': 'https://www.youtube.com/playlist?list=PLzH6n4zXuckpfMu_4Ff8E7Z1behQks5ba',
2746         'info_dict': {
2747             'title': 'Data Analysis with Dr Mike Pound',
2748             'id': 'PLzH6n4zXuckpfMu_4Ff8E7Z1behQks5ba',
2749             'uploader_id': 'Computerphile',
2750             'uploader': 'Computerphile',
2751         },
2752         'playlist_mincount': 11,
2753     }, {
2754         'url': 'https://youtu.be/uWyaPkt-VOI?list=PL9D9FC436B881BA21',
2755         'only_matching': True,
2756     }, {
2757         'url': 'TLGGrESM50VT6acwMjAyMjAxNw',
2758         'only_matching': True,
2759     }, {
2760         # music album playlist
2761         'url': 'OLAK5uy_m4xAFdmMC5rX3Ji3g93pQe3hqLZw_9LhM',
2762         'only_matching': True,
2763     }, {
2764         'url': 'https://invidio.us/playlist?list=PLDIoUOhQQPlXr63I_vwF9GD8sAKh77dWU',
2765         'only_matching': True,
2766     }, {
2767         'url': 'https://www.youtubekids.com/watch?v=Agk7R8I8o5U&list=PUZ6jURNr1WQZCNHF0ao-c0g',
2768         'only_matching': True,
2769     }]
2770
2771     def _real_initialize(self):
2772         self._login()
2773
2774     def extract_videos_from_page(self, page):
2775         ids_in_page = []
2776         titles_in_page = []
2777
2778         for item in re.findall(
2779                 r'(<[^>]*\bdata-video-id\s*=\s*["\'][0-9A-Za-z_-]{11}[^>]+>)', page):
2780             attrs = extract_attributes(item)
2781             video_id = attrs['data-video-id']
2782             video_title = unescapeHTML(attrs.get('data-title'))
2783             if video_title:
2784                 video_title = video_title.strip()
2785             ids_in_page.append(video_id)
2786             titles_in_page.append(video_title)
2787
2788         # Fallback with old _VIDEO_RE
2789         self.extract_videos_from_page_impl(
2790             self._VIDEO_RE, page, ids_in_page, titles_in_page)
2791
2792         # Relaxed fallbacks
2793         self.extract_videos_from_page_impl(
2794             r'href="\s*/watch\?v\s*=\s*(?P<id>[0-9A-Za-z_-]{11})', page,
2795             ids_in_page, titles_in_page)
2796         self.extract_videos_from_page_impl(
2797             r'data-video-ids\s*=\s*["\'](?P<id>[0-9A-Za-z_-]{11})', page,
2798             ids_in_page, titles_in_page)
2799
2800         return zip(ids_in_page, titles_in_page)
2801
2802     def _extract_mix(self, playlist_id):
2803         # The mixes are generated from a single video
2804         # the id of the playlist is just 'RD' + video_id
2805         ids = []
2806         last_id = playlist_id[-11:]
2807         for n in itertools.count(1):
2808             url = 'https://www.youtube.com/watch?v=%s&list=%s' % (last_id, playlist_id)
2809             webpage = self._download_webpage(
2810                 url, playlist_id, 'Downloading page {0} of Youtube mix'.format(n))
2811             new_ids = orderedSet(re.findall(
2812                 r'''(?xs)data-video-username=".*?".*?
2813                            href="/watch\?v=([0-9A-Za-z_-]{11})&amp;[^"]*?list=%s''' % re.escape(playlist_id),
2814                 webpage))
2815             # Fetch new pages until all the videos are repeated, it seems that
2816             # there are always 51 unique videos.
2817             new_ids = [_id for _id in new_ids if _id not in ids]
2818             if not new_ids:
2819                 break
2820             ids.extend(new_ids)
2821             last_id = ids[-1]
2822
2823         url_results = self._ids_to_results(ids)
2824
2825         search_title = lambda class_name: get_element_by_attribute('class', class_name, webpage)
2826         title_span = (
2827             search_title('playlist-title')
2828             or search_title('title long-title')
2829             or search_title('title'))
2830         title = clean_html(title_span)
2831
2832         return self.playlist_result(url_results, playlist_id, title)
2833
2834     def _extract_playlist(self, playlist_id):
2835         url = self._TEMPLATE_URL % playlist_id
2836         page = self._download_webpage(url, playlist_id)
2837
2838         # the yt-alert-message now has tabindex attribute (see https://github.com/ytdl-org/youtube-dl/issues/11604)
2839         for match in re.findall(r'<div class="yt-alert-message"[^>]*>([^<]+)</div>', page):
2840             match = match.strip()
2841             # Check if the playlist exists or is private
2842             mobj = re.match(r'[^<]*(?:The|This) playlist (?P<reason>does not exist|is private)[^<]*', match)
2843             if mobj:
2844                 reason = mobj.group('reason')
2845                 message = 'This playlist %s' % reason
2846                 if 'private' in reason:
2847                     message += ', use --username or --netrc to access it'
2848                 message += '.'
2849                 raise ExtractorError(message, expected=True)
2850             elif re.match(r'[^<]*Invalid parameters[^<]*', match):
2851                 raise ExtractorError(
2852                     'Invalid parameters. Maybe URL is incorrect.',
2853                     expected=True)
2854             elif re.match(r'[^<]*Choose your language[^<]*', match):
2855                 continue
2856             else:
2857                 self.report_warning('Youtube gives an alert message: ' + match)
2858
2859         playlist_title = self._html_search_regex(
2860             r'(?s)<h1 class="pl-header-title[^"]*"[^>]*>\s*(.*?)\s*</h1>',
2861             page, 'title', default=None)
2862
2863         _UPLOADER_BASE = r'class=["\']pl-header-details[^>]+>\s*<li>\s*<a[^>]+\bhref='
2864         uploader = self._html_search_regex(
2865             r'%s["\']/(?:user|channel)/[^>]+>([^<]+)' % _UPLOADER_BASE,
2866             page, 'uploader', default=None)
2867         mobj = re.search(
2868             r'%s(["\'])(?P<path>/(?:user|channel)/(?P<uploader_id>.+?))\1' % _UPLOADER_BASE,
2869             page)
2870         if mobj:
2871             uploader_id = mobj.group('uploader_id')
2872             uploader_url = compat_urlparse.urljoin(url, mobj.group('path'))
2873         else:
2874             uploader_id = uploader_url = None
2875
2876         has_videos = True
2877
2878         if not playlist_title:
2879             try:
2880                 # Some playlist URLs don't actually serve a playlist (e.g.
2881                 # https://www.youtube.com/watch?v=FqZTN594JQw&list=PLMYEtVRpaqY00V9W81Cwmzp6N6vZqfUKD4)
2882                 next(self._entries(page, playlist_id))
2883             except StopIteration:
2884                 has_videos = False
2885
2886         playlist = self.playlist_result(
2887             self._entries(page, playlist_id), playlist_id, playlist_title)
2888         playlist.update({
2889             'uploader': uploader,
2890             'uploader_id': uploader_id,
2891             'uploader_url': uploader_url,
2892         })
2893
2894         return has_videos, playlist
2895
2896     def _check_download_just_video(self, url, playlist_id):
2897         # Check if it's a video-specific URL
2898         query_dict = compat_urlparse.parse_qs(compat_urlparse.urlparse(url).query)
2899         video_id = query_dict.get('v', [None])[0] or self._search_regex(
2900             r'(?:(?:^|//)youtu\.be/|youtube\.com/embed/(?!videoseries))([0-9A-Za-z_-]{11})', url,
2901             'video id', default=None)
2902         if video_id:
2903             if self._downloader.params.get('noplaylist'):
2904                 self.to_screen('Downloading just video %s because of --no-playlist' % video_id)
2905                 return video_id, self.url_result(video_id, 'Youtube', video_id=video_id)
2906             else:
2907                 self.to_screen('Downloading playlist %s - add --no-playlist to just download video %s' % (playlist_id, video_id))
2908                 return video_id, None
2909         return None, None
2910
2911     def _real_extract(self, url):
2912         # Extract playlist id
2913         mobj = re.match(self._VALID_URL, url)
2914         if mobj is None:
2915             raise ExtractorError('Invalid URL: %s' % url)
2916         playlist_id = mobj.group(1) or mobj.group(2)
2917
2918         video_id, video = self._check_download_just_video(url, playlist_id)
2919         if video:
2920             return video
2921
2922         if playlist_id.startswith(('RD', 'UL', 'PU')):
2923             # Mixes require a custom extraction process
2924             return self._extract_mix(playlist_id)
2925
2926         has_videos, playlist = self._extract_playlist(playlist_id)
2927         if has_videos or not video_id:
2928             return playlist
2929
2930         # Some playlist URLs don't actually serve a playlist (see
2931         # https://github.com/ytdl-org/youtube-dl/issues/10537).
2932         # Fallback to plain video extraction if there is a video id
2933         # along with playlist id.
2934         return self.url_result(video_id, 'Youtube', video_id=video_id)
2935
2936
2937 class YoutubeChannelIE(YoutubePlaylistBaseInfoExtractor):
2938     IE_DESC = 'YouTube.com channels'
2939     _VALID_URL = r'https?://(?:youtu\.be|(?:\w+\.)?youtube(?:-nocookie|kids)?\.com|(?:www\.)?invidio\.us)/channel/(?P<id>[0-9A-Za-z_-]+)'
2940     _TEMPLATE_URL = 'https://www.youtube.com/channel/%s/videos'
2941     _VIDEO_RE = r'(?:title="(?P<title>[^"]+)"[^>]+)?href="/watch\?v=(?P<id>[0-9A-Za-z_-]+)&?'
2942     IE_NAME = 'youtube:channel'
2943     _TESTS = [{
2944         'note': 'paginated channel',
2945         'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w',
2946         'playlist_mincount': 91,
2947         'info_dict': {
2948             'id': 'UUKfVa3S1e4PHvxWcwyMMg8w',
2949             'title': 'Uploads from lex will',
2950             'uploader': 'lex will',
2951             'uploader_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
2952         }
2953     }, {
2954         'note': 'Age restricted channel',
2955         # from https://www.youtube.com/user/DeusExOfficial
2956         'url': 'https://www.youtube.com/channel/UCs0ifCMCm1icqRbqhUINa0w',
2957         'playlist_mincount': 64,
2958         'info_dict': {
2959             'id': 'UUs0ifCMCm1icqRbqhUINa0w',
2960             'title': 'Uploads from Deus Ex',
2961             'uploader': 'Deus Ex',
2962             'uploader_id': 'DeusExOfficial',
2963         },
2964     }, {
2965         'url': 'https://invidio.us/channel/UC23qupoDRn9YOAVzeoxjOQA',
2966         'only_matching': True,
2967     }, {
2968         'url': 'https://www.youtubekids.com/channel/UCyu8StPfZWapR6rfW_JgqcA',
2969         'only_matching': True,
2970     }]
2971
2972     @classmethod
2973     def suitable(cls, url):
2974         return (False if YoutubePlaylistsIE.suitable(url) or YoutubeLiveIE.suitable(url)
2975                 else super(YoutubeChannelIE, cls).suitable(url))
2976
2977     def _build_template_url(self, url, channel_id):
2978         return self._TEMPLATE_URL % channel_id
2979
2980     def _real_extract(self, url):
2981         channel_id = self._match_id(url)
2982
2983         url = self._build_template_url(url, channel_id)
2984
2985         # Channel by page listing is restricted to 35 pages of 30 items, i.e. 1050 videos total (see #5778)
2986         # Workaround by extracting as a playlist if managed to obtain channel playlist URL
2987         # otherwise fallback on channel by page extraction
2988         channel_page = self._download_webpage(
2989             url + '?view=57', channel_id,
2990             'Downloading channel page', fatal=False)
2991         if channel_page is False:
2992             channel_playlist_id = False
2993         else:
2994             channel_playlist_id = self._html_search_meta(
2995                 'channelId', channel_page, 'channel id', default=None)
2996             if not channel_playlist_id:
2997                 channel_url = self._html_search_meta(
2998                     ('al:ios:url', 'twitter:app:url:iphone', 'twitter:app:url:ipad'),
2999                     channel_page, 'channel url', default=None)
3000                 if channel_url:
3001                     channel_playlist_id = self._search_regex(
3002                         r'vnd\.youtube://user/([0-9A-Za-z_-]+)',
3003                         channel_url, 'channel id', default=None)
3004         if channel_playlist_id and channel_playlist_id.startswith('UC'):
3005             playlist_id = 'UU' + channel_playlist_id[2:]
3006             return self.url_result(
3007                 compat_urlparse.urljoin(url, '/playlist?list=%s' % playlist_id), 'YoutubePlaylist')
3008
3009         channel_page = self._download_webpage(url, channel_id, 'Downloading page #1')
3010         autogenerated = re.search(r'''(?x)
3011                 class="[^"]*?(?:
3012                     channel-header-autogenerated-label|
3013                     yt-channel-title-autogenerated
3014                 )[^"]*"''', channel_page) is not None
3015
3016         if autogenerated:
3017             # The videos are contained in a single page
3018             # the ajax pages can't be used, they are empty
3019             entries = [
3020                 self.url_result(
3021                     video_id, 'Youtube', video_id=video_id,
3022                     video_title=video_title)
3023                 for video_id, video_title in self.extract_videos_from_page(channel_page)]
3024             return self.playlist_result(entries, channel_id)
3025
3026         try:
3027             next(self._entries(channel_page, channel_id))
3028         except StopIteration:
3029             alert_message = self._html_search_regex(
3030                 r'(?s)<div[^>]+class=(["\']).*?\byt-alert-message\b.*?\1[^>]*>(?P<alert>[^<]+)</div>',
3031                 channel_page, 'alert', default=None, group='alert')
3032             if alert_message:
3033                 raise ExtractorError('Youtube said: %s' % alert_message, expected=True)
3034
3035         return self.playlist_result(self._entries(channel_page, channel_id), channel_id)
3036
3037
3038 class YoutubeUserIE(YoutubeChannelIE):
3039     IE_DESC = 'YouTube.com user videos (URL or "ytuser" keyword)'
3040     _VALID_URL = r'(?:(?:https?://(?:\w+\.)?youtube\.com/(?:(?P<user>user|c)/)?(?!(?:attribution_link|watch|results|shared)(?:$|[^a-z_A-Z0-9%-])))|ytuser:)(?!feed/)(?P<id>[A-Za-z0-9_%-]+)'
3041     _TEMPLATE_URL = 'https://www.youtube.com/%s/%s/videos'
3042     IE_NAME = 'youtube:user'
3043
3044     _TESTS = [{
3045         'url': 'https://www.youtube.com/user/TheLinuxFoundation',
3046         'playlist_mincount': 320,
3047         'info_dict': {
3048             'id': 'UUfX55Sx5hEFjoC3cNs6mCUQ',
3049             'title': 'Uploads from The Linux Foundation',
3050             'uploader': 'The Linux Foundation',
3051             'uploader_id': 'TheLinuxFoundation',
3052         }
3053     }, {
3054         # Only available via https://www.youtube.com/c/12minuteathlete/videos
3055         # but not https://www.youtube.com/user/12minuteathlete/videos
3056         'url': 'https://www.youtube.com/c/12minuteathlete/videos',
3057         'playlist_mincount': 249,
3058         'info_dict': {
3059             'id': 'UUVjM-zV6_opMDx7WYxnjZiQ',
3060             'title': 'Uploads from 12 Minute Athlete',
3061             'uploader': '12 Minute Athlete',
3062             'uploader_id': 'the12minuteathlete',
3063         }
3064     }, {
3065         'url': 'ytuser:phihag',
3066         'only_matching': True,
3067     }, {
3068         'url': 'https://www.youtube.com/c/gametrailers',
3069         'only_matching': True,
3070     }, {
3071         'url': 'https://www.youtube.com/c/Pawe%C5%82Zadro%C5%BCniak',
3072         'only_matching': True,
3073     }, {
3074         'url': 'https://www.youtube.com/gametrailers',
3075         'only_matching': True,
3076     }, {
3077         # This channel is not available, geo restricted to JP
3078         'url': 'https://www.youtube.com/user/kananishinoSMEJ/videos',
3079         'only_matching': True,
3080     }]
3081
3082     @classmethod
3083     def suitable(cls, url):
3084         # Don't return True if the url can be extracted with other youtube
3085         # extractor, the regex would is too permissive and it would match.
3086         other_yt_ies = iter(klass for (name, klass) in globals().items() if name.startswith('Youtube') and name.endswith('IE') and klass is not cls)
3087         if any(ie.suitable(url) for ie in other_yt_ies):
3088             return False
3089         else:
3090             return super(YoutubeUserIE, cls).suitable(url)
3091
3092     def _build_template_url(self, url, channel_id):
3093         mobj = re.match(self._VALID_URL, url)
3094         return self._TEMPLATE_URL % (mobj.group('user') or 'user', mobj.group('id'))
3095
3096
3097 class YoutubeLiveIE(YoutubeBaseInfoExtractor):
3098     IE_DESC = 'YouTube.com live streams'
3099     _VALID_URL = r'(?P<base_url>https?://(?:\w+\.)?youtube\.com/(?:(?:user|channel|c)/)?(?P<id>[^/]+))/live'
3100     IE_NAME = 'youtube:live'
3101
3102     _TESTS = [{
3103         'url': 'https://www.youtube.com/user/TheYoungTurks/live',
3104         'info_dict': {
3105             'id': 'a48o2S1cPoo',
3106             'ext': 'mp4',
3107             'title': 'The Young Turks - Live Main Show',
3108             'uploader': 'The Young Turks',
3109             'uploader_id': 'TheYoungTurks',
3110             'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/TheYoungTurks',
3111             'upload_date': '20150715',
3112             'license': 'Standard YouTube License',
3113             'description': 'md5:438179573adcdff3c97ebb1ee632b891',
3114             'categories': ['News & Politics'],
3115             'tags': ['Cenk Uygur (TV Program Creator)', 'The Young Turks (Award-Winning Work)', 'Talk Show (TV Genre)'],
3116             'like_count': int,
3117             'dislike_count': int,
3118         },
3119         'params': {
3120             'skip_download': True,
3121         },
3122     }, {
3123         'url': 'https://www.youtube.com/channel/UC1yBKRuGpC1tSM73A0ZjYjQ/live',
3124         'only_matching': True,
3125     }, {
3126         'url': 'https://www.youtube.com/c/CommanderVideoHq/live',
3127         'only_matching': True,
3128     }, {
3129         'url': 'https://www.youtube.com/TheYoungTurks/live',
3130         'only_matching': True,
3131     }]
3132
3133     def _real_extract(self, url):
3134         mobj = re.match(self._VALID_URL, url)
3135         channel_id = mobj.group('id')
3136         base_url = mobj.group('base_url')
3137         webpage = self._download_webpage(url, channel_id, fatal=False)
3138         if webpage:
3139             page_type = self._og_search_property(
3140                 'type', webpage, 'page type', default='')
3141             video_id = self._html_search_meta(
3142                 'videoId', webpage, 'video id', default=None)
3143             if page_type.startswith('video') and video_id and re.match(
3144                     r'^[0-9A-Za-z_-]{11}$', video_id):
3145                 return self.url_result(video_id, YoutubeIE.ie_key())
3146         return self.url_result(base_url)
3147
3148
3149 class YoutubePlaylistsIE(YoutubePlaylistsBaseInfoExtractor):
3150     IE_DESC = 'YouTube.com user/channel playlists'
3151     _VALID_URL = r'https?://(?:\w+\.)?youtube\.com/(?:user|channel|c)/(?P<id>[^/]+)/playlists'
3152     IE_NAME = 'youtube:playlists'
3153
3154     _TESTS = [{
3155         'url': 'https://www.youtube.com/user/ThirstForScience/playlists',
3156         'playlist_mincount': 4,
3157         'info_dict': {
3158             'id': 'ThirstForScience',
3159             'title': 'ThirstForScience',
3160         },
3161     }, {
3162         # with "Load more" button
3163         'url': 'https://www.youtube.com/user/igorkle1/playlists?view=1&sort=dd',
3164         'playlist_mincount': 70,
3165         'info_dict': {
3166             'id': 'igorkle1',
3167             'title': 'Игорь Клейнер',
3168         },
3169     }, {
3170         'url': 'https://www.youtube.com/channel/UCiU1dHvZObB2iP6xkJ__Icw/playlists',
3171         'playlist_mincount': 17,
3172         'info_dict': {
3173             'id': 'UCiU1dHvZObB2iP6xkJ__Icw',
3174             'title': 'Chem Player',
3175         },
3176         'skip': 'Blocked',
3177     }, {
3178         'url': 'https://www.youtube.com/c/ChristophLaimer/playlists',
3179         'only_matching': True,
3180     }]
3181
3182
3183 class YoutubeSearchBaseInfoExtractor(YoutubePlaylistBaseInfoExtractor):
3184     _VIDEO_RE = r'href="\s*/watch\?v=(?P<id>[0-9A-Za-z_-]{11})(?:[^"]*"[^>]+\btitle="(?P<title>[^"]+))?'
3185
3186
3187 class YoutubeSearchIE(SearchInfoExtractor, YoutubeSearchBaseInfoExtractor):
3188     IE_DESC = 'YouTube.com searches'
3189     # there doesn't appear to be a real limit, for example if you search for
3190     # 'python' you get more than 8.000.000 results
3191     _MAX_RESULTS = float('inf')
3192     IE_NAME = 'youtube:search'
3193     _SEARCH_KEY = 'ytsearch'
3194     _EXTRA_QUERY_ARGS = {}
3195     _TESTS = []
3196
3197     def _get_n_results(self, query, n):
3198         """Get a specified number of results for a query"""
3199
3200         videos = []
3201         limit = n
3202
3203         url_query = {
3204             'search_query': query.encode('utf-8'),
3205         }
3206         url_query.update(self._EXTRA_QUERY_ARGS)
3207         result_url = 'https://www.youtube.com/results?' + compat_urllib_parse_urlencode(url_query)
3208
3209         for pagenum in itertools.count(1):
3210             data = self._download_json(
3211                 result_url, video_id='query "%s"' % query,
3212                 note='Downloading page %s' % pagenum,
3213                 errnote='Unable to download API page',
3214                 query={'spf': 'navigate'})
3215             html_content = data[1]['body']['content']
3216
3217             if 'class="search-message' in html_content:
3218                 raise ExtractorError(
3219                     '[youtube] No video results', expected=True)
3220
3221             new_videos = list(self._process_page(html_content))
3222             videos += new_videos
3223             if not new_videos or len(videos) > limit:
3224                 break
3225             next_link = self._html_search_regex(
3226                 r'href="(/results\?[^"]*\bsp=[^"]+)"[^>]*>\s*<span[^>]+class="[^"]*\byt-uix-button-content\b[^"]*"[^>]*>Next',
3227                 html_content, 'next link', default=None)
3228             if next_link is None:
3229                 break
3230             result_url = compat_urlparse.urljoin('https://www.youtube.com/', next_link)
3231
3232         if len(videos) > n:
3233             videos = videos[:n]
3234         return self.playlist_result(videos, query)
3235
3236
3237 class YoutubeSearchDateIE(YoutubeSearchIE):
3238     IE_NAME = YoutubeSearchIE.IE_NAME + ':date'
3239     _SEARCH_KEY = 'ytsearchdate'
3240     IE_DESC = 'YouTube.com searches, newest videos first'
3241     _EXTRA_QUERY_ARGS = {'search_sort': 'video_date_uploaded'}
3242
3243
3244 class YoutubeSearchURLIE(YoutubeSearchBaseInfoExtractor):
3245     IE_DESC = 'YouTube.com search URLs'
3246     IE_NAME = 'youtube:search_url'
3247     _VALID_URL = r'https?://(?:www\.)?youtube\.com/results\?(.*?&)?(?:search_query|q)=(?P<query>[^&]+)(?:[&]|$)'
3248     _TESTS = [{
3249         'url': 'https://www.youtube.com/results?baz=bar&search_query=youtube-dl+test+video&filters=video&lclk=video',
3250         'playlist_mincount': 5,
3251         'info_dict': {
3252             'title': 'youtube-dl test video',
3253         }
3254     }, {
3255         'url': 'https://www.youtube.com/results?q=test&sp=EgQIBBgB',
3256         'only_matching': True,
3257     }]
3258
3259     def _real_extract(self, url):
3260         mobj = re.match(self._VALID_URL, url)
3261         query = compat_urllib_parse_unquote_plus(mobj.group('query'))
3262         webpage = self._download_webpage(url, query)
3263         return self.playlist_result(self._process_page(webpage), playlist_title=query)
3264
3265
3266 class YoutubeShowIE(YoutubePlaylistsBaseInfoExtractor):
3267     IE_DESC = 'YouTube.com (multi-season) shows'
3268     _VALID_URL = r'https?://(?:www\.)?youtube\.com/show/(?P<id>[^?#]*)'
3269     IE_NAME = 'youtube:show'
3270     _TESTS = [{
3271         'url': 'https://www.youtube.com/show/airdisasters',
3272         'playlist_mincount': 5,
3273         'info_dict': {
3274             'id': 'airdisasters',
3275             'title': 'Air Disasters',
3276         }
3277     }]
3278
3279     def _real_extract(self, url):
3280         playlist_id = self._match_id(url)
3281         return super(YoutubeShowIE, self)._real_extract(
3282             'https://www.youtube.com/show/%s/playlists' % playlist_id)
3283
3284
3285 class YoutubeFeedsInfoExtractor(YoutubeBaseInfoExtractor):
3286     """
3287     Base class for feed extractors
3288     Subclasses must define the _FEED_NAME and _PLAYLIST_TITLE properties.
3289     """
3290     _LOGIN_REQUIRED = True
3291
3292     @property
3293     def IE_NAME(self):
3294         return 'youtube:%s' % self._FEED_NAME
3295
3296     def _real_initialize(self):
3297         self._login()
3298
3299     def _entries(self, page):
3300         # The extraction process is the same as for playlists, but the regex
3301         # for the video ids doesn't contain an index
3302         ids = []
3303         more_widget_html = content_html = page
3304         for page_num in itertools.count(1):
3305             matches = re.findall(r'href="\s*/watch\?v=([0-9A-Za-z_-]{11})', content_html)
3306
3307             # 'recommended' feed has infinite 'load more' and each new portion spins
3308             # the same videos in (sometimes) slightly different order, so we'll check
3309             # for unicity and break when portion has no new videos
3310             new_ids = list(filter(lambda video_id: video_id not in ids, orderedSet(matches)))
3311             if not new_ids:
3312                 break
3313
3314             ids.extend(new_ids)
3315
3316             for entry in self._ids_to_results(new_ids):
3317                 yield entry
3318
3319             mobj = re.search(r'data-uix-load-more-href="/?(?P<more>[^"]+)"', more_widget_html)
3320             if not mobj:
3321                 break
3322
3323             more = self._download_json(
3324                 'https://www.youtube.com/%s' % mobj.group('more'), self._PLAYLIST_TITLE,
3325                 'Downloading page #%s' % page_num,
3326                 transform_source=uppercase_escape,
3327                 headers=self._YOUTUBE_CLIENT_HEADERS)
3328             content_html = more['content_html']
3329             more_widget_html = more['load_more_widget_html']
3330
3331     def _real_extract(self, url):
3332         page = self._download_webpage(
3333             'https://www.youtube.com/feed/%s' % self._FEED_NAME,
3334             self._PLAYLIST_TITLE)
3335         return self.playlist_result(
3336             self._entries(page), playlist_title=self._PLAYLIST_TITLE)
3337
3338
3339 class YoutubeWatchLaterIE(YoutubePlaylistIE):
3340     IE_NAME = 'youtube:watchlater'
3341     IE_DESC = 'Youtube watch later list, ":ytwatchlater" for short (requires authentication)'
3342     _VALID_URL = r'https?://(?:www\.)?youtube\.com/(?:feed/watch_later|(?:playlist|watch)\?(?:.+&)?list=WL)|:ytwatchlater'
3343
3344     _TESTS = [{
3345         'url': 'https://www.youtube.com/playlist?list=WL',
3346         'only_matching': True,
3347     }, {
3348         'url': 'https://www.youtube.com/watch?v=bCNU9TrbiRk&index=1&list=WL',
3349         'only_matching': True,
3350     }]
3351
3352     def _real_extract(self, url):
3353         _, video = self._check_download_just_video(url, 'WL')
3354         if video:
3355             return video
3356         _, playlist = self._extract_playlist('WL')
3357         return playlist
3358
3359
3360 class YoutubeFavouritesIE(YoutubeBaseInfoExtractor):
3361     IE_NAME = 'youtube:favorites'
3362     IE_DESC = 'YouTube.com favourite videos, ":ytfav" for short (requires authentication)'
3363     _VALID_URL = r'https?://(?:www\.)?youtube\.com/my_favorites|:ytfav(?:ou?rites)?'
3364     _LOGIN_REQUIRED = True
3365
3366     def _real_extract(self, url):
3367         webpage = self._download_webpage('https://www.youtube.com/my_favorites', 'Youtube Favourites videos')
3368         playlist_id = self._search_regex(r'list=(.+?)["&]', webpage, 'favourites playlist id')
3369         return self.url_result(playlist_id, 'YoutubePlaylist')
3370
3371
3372 class YoutubeRecommendedIE(YoutubeFeedsInfoExtractor):
3373     IE_DESC = 'YouTube.com recommended videos, ":ytrec" for short (requires authentication)'
3374     _VALID_URL = r'https?://(?:www\.)?youtube\.com/feed/recommended|:ytrec(?:ommended)?'
3375     _FEED_NAME = 'recommended'
3376     _PLAYLIST_TITLE = 'Youtube Recommended videos'
3377
3378
3379 class YoutubeSubscriptionsIE(YoutubeFeedsInfoExtractor):
3380     IE_DESC = 'YouTube.com subscriptions feed, "ytsubs" keyword (requires authentication)'
3381     _VALID_URL = r'https?://(?:www\.)?youtube\.com/feed/subscriptions|:ytsubs(?:criptions)?'
3382     _FEED_NAME = 'subscriptions'
3383     _PLAYLIST_TITLE = 'Youtube Subscriptions'
3384
3385
3386 class YoutubeHistoryIE(YoutubeFeedsInfoExtractor):
3387     IE_DESC = 'Youtube watch history, ":ythistory" for short (requires authentication)'
3388     _VALID_URL = r'https?://(?:www\.)?youtube\.com/feed/history|:ythistory'
3389     _FEED_NAME = 'history'
3390     _PLAYLIST_TITLE = 'Youtube History'
3391
3392
3393 class YoutubeTruncatedURLIE(InfoExtractor):
3394     IE_NAME = 'youtube:truncated_url'
3395     IE_DESC = False  # Do not list
3396     _VALID_URL = r'''(?x)
3397         (?:https?://)?
3398         (?:\w+\.)?[yY][oO][uU][tT][uU][bB][eE](?:-nocookie)?\.com/
3399         (?:watch\?(?:
3400             feature=[a-z_]+|
3401             annotation_id=annotation_[^&]+|
3402             x-yt-cl=[0-9]+|
3403             hl=[^&]*|
3404             t=[0-9]+
3405         )?
3406         |
3407             attribution_link\?a=[^&]+
3408         )
3409         $
3410     '''
3411
3412     _TESTS = [{
3413         'url': 'https://www.youtube.com/watch?annotation_id=annotation_3951667041',
3414         'only_matching': True,
3415     }, {
3416         'url': 'https://www.youtube.com/watch?',
3417         'only_matching': True,
3418     }, {
3419         'url': 'https://www.youtube.com/watch?x-yt-cl=84503534',
3420         'only_matching': True,
3421     }, {
3422         'url': 'https://www.youtube.com/watch?feature=foo',
3423         'only_matching': True,
3424     }, {
3425         'url': 'https://www.youtube.com/watch?hl=en-GB',
3426         'only_matching': True,
3427     }, {
3428         'url': 'https://www.youtube.com/watch?t=2372',
3429         'only_matching': True,
3430     }]
3431
3432     def _real_extract(self, url):
3433         raise ExtractorError(
3434             'Did you forget to quote the URL? Remember that & is a meta '
3435             'character in most shells, so you want to put the URL in quotes, '
3436             'like  youtube-dl '
3437             '"https://www.youtube.com/watch?feature=foo&v=BaW_jenozKc" '
3438             ' or simply  youtube-dl BaW_jenozKc  .',
3439             expected=True)
3440
3441
3442 class YoutubeTruncatedIDIE(InfoExtractor):
3443     IE_NAME = 'youtube:truncated_id'
3444     IE_DESC = False  # Do not list
3445     _VALID_URL = r'https?://(?:www\.)?youtube\.com/watch\?v=(?P<id>[0-9A-Za-z_-]{1,10})$'
3446
3447     _TESTS = [{
3448         'url': 'https://www.youtube.com/watch?v=N_708QY7Ob',
3449         'only_matching': True,
3450     }]
3451
3452     def _real_extract(self, url):
3453         video_id = self._match_id(url)
3454         raise ExtractorError(
3455             'Incomplete YouTube ID %s. URL %s looks truncated.' % (video_id, url),
3456             expected=True)