youtube_dlc/extractor/youtube.py

   1 # coding: utf-8
   2
   3 from __future__ import unicode_literals
   4
   5
   6 import itertools
   7 import json
   8 import os.path
   9 import random
  10 import re
  11 import time
  12 import traceback
  13
  14 from .common import InfoExtractor, SearchInfoExtractor
  15 from ..jsinterp import JSInterpreter
  16 from ..swfinterp import SWFInterpreter
  17 from ..compat import (
  18     compat_chr,
  19     compat_HTTPError,
  20     compat_kwargs,
  21     compat_parse_qs,
  22     compat_urllib_parse_unquote,
  23     compat_urllib_parse_unquote_plus,
  24     compat_urllib_parse_urlencode,
  25     compat_urllib_parse_urlparse,
  26     compat_urlparse,
  27     compat_str,
  28 )
  29 from ..utils import (
  30     bool_or_none,
  31     clean_html,
  32     error_to_compat_str,
  33     extract_attributes,
  34     ExtractorError,
  35     float_or_none,
  36     get_element_by_attribute,
  37     get_element_by_id,
  38     int_or_none,
  39     mimetype2ext,
  40     orderedSet,
  41     parse_codecs,
  42     parse_count,
  43     parse_duration,
  44     remove_quotes,
  45     remove_start,
  46     smuggle_url,
  47     str_or_none,
  48     str_to_int,
  49     try_get,
  50     unescapeHTML,
  51     unified_strdate,
  52     unsmuggle_url,
  53     uppercase_escape,
  54     url_or_none,
  55     urlencode_postdata,
  56 )
  57
  58
  59 class YoutubeBaseInfoExtractor(InfoExtractor):
  60     """Provide base functions for Youtube extractors"""
  61     _LOGIN_URL = 'https://accounts.google.com/ServiceLogin'
  62     _TWOFACTOR_URL = 'https://accounts.google.com/signin/challenge'
  63
  64     _LOOKUP_URL = 'https://accounts.google.com/_/signin/sl/lookup'
  65     _CHALLENGE_URL = 'https://accounts.google.com/_/signin/sl/challenge'
  66     _TFA_URL = 'https://accounts.google.com/_/signin/challenge?hl=en&TL={0}'
  67
  68     _NETRC_MACHINE = 'youtube'
  69     # If True it will raise an error if no login info is provided
  70     _LOGIN_REQUIRED = False
  71
  72     _PLAYLIST_ID_RE = r'(?:PL|LL|EC|UU|FL|RD|UL|TL|PU|OLAK5uy_)[0-9A-Za-z-_]{10,}'
  73
  74     _YOUTUBE_CLIENT_HEADERS = {
  75         'x-youtube-client-name': '1',
  76         'x-youtube-client-version': '1.20200609.04.02',
  77     }
  78
  79     def _set_language(self):
  80         self._set_cookie(
  81             '.youtube.com', 'PREF', 'f1=50000000&f6=8&hl=en',
  82             # YouTube sets the expire time to about two months
  83             expire_time=time.time() + 2 * 30 * 24 * 3600)
  84
  85     def _ids_to_results(self, ids):
  86         return [
  87             self.url_result(vid_id, 'Youtube', video_id=vid_id)
  88             for vid_id in ids]
  89
  90     def _login(self):
  91         """
  92         Attempt to log in to YouTube.
  93         True is returned if successful or skipped.
  94         False is returned if login failed.
  95
  96         If _LOGIN_REQUIRED is set and no authentication was provided, an error is raised.
  97         """
  98         username, password = self._get_login_info()
  99         # No authentication to be performed
 100         if username is None:
 101             if self._LOGIN_REQUIRED and self._downloader.params.get('cookiefile') is None:
 102                 raise ExtractorError('No login info available, needed for using %s.' % self.IE_NAME, expected=True)
 103             if self._downloader.params.get('cookiefile') and False:  # TODO remove 'and False' later - too many people using outdated cookies and open issues, remind them.
 104                 self.to_screen('[Cookies] Reminder - Make sure to always use up to date cookies!')
 105             return True
 106
 107         login_page = self._download_webpage(
 108             self._LOGIN_URL, None,
 109             note='Downloading login page',
 110             errnote='unable to fetch login page', fatal=False)
 111         if login_page is False:
 112             return
 113
 114         login_form = self._hidden_inputs(login_page)
 115
 116         def req(url, f_req, note, errnote):
 117             data = login_form.copy()
 118             data.update({
 119                 'pstMsg': 1,
 120                 'checkConnection': 'youtube',
 121                 'checkedDomains': 'youtube',
 122                 'hl': 'en',
 123                 'deviceinfo': '[null,null,null,[],null,"US",null,null,[],"GlifWebSignIn",null,[null,null,[]]]',
 124                 'f.req': json.dumps(f_req),
 125                 'flowName': 'GlifWebSignIn',
 126                 'flowEntry': 'ServiceLogin',
 127                 # TODO: reverse actual botguard identifier generation algo
 128                 'bgRequest': '["identifier",""]',
 129             })
 130             return self._download_json(
 131                 url, None, note=note, errnote=errnote,
 132                 transform_source=lambda s: re.sub(r'^[^[]*', '', s),
 133                 fatal=False,
 134                 data=urlencode_postdata(data), headers={
 135                     'Content-Type': 'application/x-www-form-urlencoded;charset=utf-8',
 136                     'Google-Accounts-XSRF': 1,
 137                 })
 138
 139         def warn(message):
 140             self._downloader.report_warning(message)
 141
 142         lookup_req = [
 143             username,
 144             None, [], None, 'US', None, None, 2, False, True,
 145             [
 146                 None, None,
 147                 [2, 1, None, 1,
 148                  'https://accounts.google.com/ServiceLogin?passive=true&continue=https%3A%2F%2Fwww.youtube.com%2Fsignin%3Fnext%3D%252F%26action_handle_signin%3Dtrue%26hl%3Den%26app%3Ddesktop%26feature%3Dsign_in_button&hl=en&service=youtube&uilel=3&requestPath=%2FServiceLogin&Page=PasswordSeparationSignIn',
 149                  None, [], 4],
 150                 1, [None, None, []], None, None, None, True
 151             ],
 152             username,
 153         ]
 154
 155         lookup_results = req(
 156             self._LOOKUP_URL, lookup_req,
 157             'Looking up account info', 'Unable to look up account info')
 158
 159         if lookup_results is False:
 160             return False
 161
 162         user_hash = try_get(lookup_results, lambda x: x[0][2], compat_str)
 163         if not user_hash:
 164             warn('Unable to extract user hash')
 165             return False
 166
 167         challenge_req = [
 168             user_hash,
 169             None, 1, None, [1, None, None, None, [password, None, True]],
 170             [
 171                 None, None, [2, 1, None, 1, 'https://accounts.google.com/ServiceLogin?passive=true&continue=https%3A%2F%2Fwww.youtube.com%2Fsignin%3Fnext%3D%252F%26action_handle_signin%3Dtrue%26hl%3Den%26app%3Ddesktop%26feature%3Dsign_in_button&hl=en&service=youtube&uilel=3&requestPath=%2FServiceLogin&Page=PasswordSeparationSignIn', None, [], 4],
 172                 1, [None, None, []], None, None, None, True
 173             ]]
 174
 175         challenge_results = req(
 176             self._CHALLENGE_URL, challenge_req,
 177             'Logging in', 'Unable to log in')
 178
 179         if challenge_results is False:
 180             return
 181
 182         login_res = try_get(challenge_results, lambda x: x[0][5], list)
 183         if login_res:
 184             login_msg = try_get(login_res, lambda x: x[5], compat_str)
 185             warn(
 186                 'Unable to login: %s' % 'Invalid password'
 187                 if login_msg == 'INCORRECT_ANSWER_ENTERED' else login_msg)
 188             return False
 189
 190         res = try_get(challenge_results, lambda x: x[0][-1], list)
 191         if not res:
 192             warn('Unable to extract result entry')
 193             return False
 194
 195         login_challenge = try_get(res, lambda x: x[0][0], list)
 196         if login_challenge:
 197             challenge_str = try_get(login_challenge, lambda x: x[2], compat_str)
 198             if challenge_str == 'TWO_STEP_VERIFICATION':
 199                 # SEND_SUCCESS - TFA code has been successfully sent to phone
 200                 # QUOTA_EXCEEDED - reached the limit of TFA codes
 201                 status = try_get(login_challenge, lambda x: x[5], compat_str)
 202                 if status == 'QUOTA_EXCEEDED':
 203                     warn('Exceeded the limit of TFA codes, try later')
 204                     return False
 205
 206                 tl = try_get(challenge_results, lambda x: x[1][2], compat_str)
 207                 if not tl:
 208                     warn('Unable to extract TL')
 209                     return False
 210
 211                 tfa_code = self._get_tfa_info('2-step verification code')
 212
 213                 if not tfa_code:
 214                     warn(
 215                         'Two-factor authentication required. Provide it either interactively or with --twofactor <code>'
 216                         '(Note that only TOTP (Google Authenticator App) codes work at this time.)')
 217                     return False
 218
 219                 tfa_code = remove_start(tfa_code, 'G-')
 220
 221                 tfa_req = [
 222                     user_hash, None, 2, None,
 223                     [
 224                         9, None, None, None, None, None, None, None,
 225                         [None, tfa_code, True, 2]
 226                     ]]
 227
 228                 tfa_results = req(
 229                     self._TFA_URL.format(tl), tfa_req,
 230                     'Submitting TFA code', 'Unable to submit TFA code')
 231
 232                 if tfa_results is False:
 233                     return False
 234
 235                 tfa_res = try_get(tfa_results, lambda x: x[0][5], list)
 236                 if tfa_res:
 237                     tfa_msg = try_get(tfa_res, lambda x: x[5], compat_str)
 238                     warn(
 239                         'Unable to finish TFA: %s' % 'Invalid TFA code'
 240                         if tfa_msg == 'INCORRECT_ANSWER_ENTERED' else tfa_msg)
 241                     return False
 242
 243                 check_cookie_url = try_get(
 244                     tfa_results, lambda x: x[0][-1][2], compat_str)
 245             else:
 246                 CHALLENGES = {
 247                     'LOGIN_CHALLENGE': "This device isn't recognized. For your security, Google wants to make sure it's really you.",
 248                     'USERNAME_RECOVERY': 'Please provide additional information to aid in the recovery process.',
 249                     'REAUTH': "There is something unusual about your activity. For your security, Google wants to make sure it's really you.",
 250                 }
 251                 challenge = CHALLENGES.get(
 252                     challenge_str,
 253                     '%s returned error %s.' % (self.IE_NAME, challenge_str))
 254                 warn('%s\nGo to https://accounts.google.com/, login and solve a challenge.' % challenge)
 255                 return False
 256         else:
 257             check_cookie_url = try_get(res, lambda x: x[2], compat_str)
 258
 259         if not check_cookie_url:
 260             warn('Unable to extract CheckCookie URL')
 261             return False
 262
 263         check_cookie_results = self._download_webpage(
 264             check_cookie_url, None, 'Checking cookie', fatal=False)
 265
 266         if check_cookie_results is False:
 267             return False
 268
 269         if 'https://myaccount.google.com/' not in check_cookie_results:
 270             warn('Unable to log in')
 271             return False
 272
 273         return True
 274
 275     def _download_webpage_handle(self, *args, **kwargs):
 276         query = kwargs.get('query', {}).copy()
 277         query['disable_polymer'] = 'true'
 278         kwargs['query'] = query
 279         return super(YoutubeBaseInfoExtractor, self)._download_webpage_handle(
 280             *args, **compat_kwargs(kwargs))
 281
 282     def _real_initialize(self):
 283         if self._downloader is None:
 284             return
 285         self._set_language()
 286         if not self._login():
 287             return
 288
 289
 290 class YoutubeEntryListBaseInfoExtractor(YoutubeBaseInfoExtractor):
 291     # Extract entries from page with "Load more" button
 292     def _entries(self, page, playlist_id):
 293         more_widget_html = content_html = page
 294         for page_num in itertools.count(1):
 295             for entry in self._process_page(content_html):
 296                 yield entry
 297
 298             mobj = re.search(r'data-uix-load-more-href="/?(?P<more>[^"]+)"', more_widget_html)
 299             if not mobj:
 300                 break
 301
 302             count = 0
 303             retries = 3
 304             while count <= retries:
 305                 try:
 306                     # Downloading page may result in intermittent 5xx HTTP error
 307                     # that is usually worked around with a retry
 308                     more = self._download_json(
 309                         'https://www.youtube.com/%s' % mobj.group('more'), playlist_id,
 310                         'Downloading page #%s%s'
 311                         % (page_num, ' (retry #%d)' % count if count else ''),
 312                         transform_source=uppercase_escape,
 313                         headers=self._YOUTUBE_CLIENT_HEADERS)
 314                     break
 315                 except ExtractorError as e:
 316                     if isinstance(e.cause, compat_HTTPError) and e.cause.code in (500, 503):
 317                         count += 1
 318                         if count <= retries:
 319                             continue
 320                     raise
 321
 322             content_html = more['content_html']
 323             if not content_html.strip():
 324                 # Some webpages show a "Load more" button but they don't
 325                 # have more videos
 326                 break
 327             more_widget_html = more['load_more_widget_html']
 328
 329
 330 class YoutubePlaylistBaseInfoExtractor(YoutubeEntryListBaseInfoExtractor):
 331     def _process_page(self, content):
 332         for video_id, video_title in self.extract_videos_from_page(content):
 333             yield self.url_result(video_id, 'Youtube', video_id, video_title)
 334
 335     def extract_videos_from_page_impl(self, video_re, page, ids_in_page, titles_in_page):
 336         for mobj in re.finditer(video_re, page):
 337             # The link with index 0 is not the first video of the playlist (not sure if still actual)
 338             if 'index' in mobj.groupdict() and mobj.group('id') == '0':
 339                 continue
 340             video_id = mobj.group('id')
 341             video_title = unescapeHTML(
 342                 mobj.group('title')) if 'title' in mobj.groupdict() else None
 343             if video_title:
 344                 video_title = video_title.strip()
 345             if video_title == '► Play all':
 346                 video_title = None
 347             try:
 348                 idx = ids_in_page.index(video_id)
 349                 if video_title and not titles_in_page[idx]:
 350                     titles_in_page[idx] = video_title
 351             except ValueError:
 352                 ids_in_page.append(video_id)
 353                 titles_in_page.append(video_title)
 354
 355     def extract_videos_from_page(self, page):
 356         ids_in_page = []
 357         titles_in_page = []
 358         self.extract_videos_from_page_impl(
 359             self._VIDEO_RE, page, ids_in_page, titles_in_page)
 360         return zip(ids_in_page, titles_in_page)
 361
 362
 363 class YoutubePlaylistsBaseInfoExtractor(YoutubeEntryListBaseInfoExtractor):
 364     def _process_page(self, content):
 365         for playlist_id in orderedSet(re.findall(
 366                 r'<h3[^>]+class="[^"]*yt-lockup-title[^"]*"[^>]*><a[^>]+href="/?playlist\?list=([0-9A-Za-z-_]{10,})"',
 367                 content)):
 368             yield self.url_result(
 369                 'https://www.youtube.com/playlist?list=%s' % playlist_id, 'YoutubePlaylist')
 370
 371     def _real_extract(self, url):
 372         playlist_id = self._match_id(url)
 373         webpage = self._download_webpage(url, playlist_id)
 374         title = self._og_search_title(webpage, fatal=False)
 375         return self.playlist_result(self._entries(webpage, playlist_id), playlist_id, title)
 376
 377
 378 class YoutubeIE(YoutubeBaseInfoExtractor):
 379     IE_DESC = 'YouTube.com'
 380     _VALID_URL = r"""(?x)^
 381                      (
 382                          (?:https?://|//)                                    # http(s):// or protocol-independent URL
 383                          (?:(?:(?:(?:\w+\.)?[yY][oO][uU][tT][uU][bB][eE](?:-nocookie|kids)?\.com/|
 384                             (?:www\.)?deturl\.com/www\.youtube\.com/|
 385                             (?:www\.)?pwnyoutube\.com/|
 386                             (?:www\.)?hooktube\.com/|
 387                             (?:www\.)?yourepeat\.com/|
 388                             tube\.majestyc\.net/|
 389                             # Invidious instances taken from https://github.com/omarroth/invidious/wiki/Invidious-Instances
 390                             (?:(?:www|dev)\.)?invidio\.us/|
 391                             (?:(?:www|no)\.)?invidiou\.sh/|
 392                             (?:(?:www|fi|de)\.)?invidious\.snopyta\.org/|
 393                             (?:www\.)?invidious\.kabi\.tk/|
 394                             (?:www\.)?invidious\.13ad\.de/|
 395                             (?:www\.)?invidious\.mastodon\.host/|
 396                             (?:www\.)?invidious\.nixnet\.xyz/|
 397                             (?:www\.)?invidious\.drycat\.fr/|
 398                             (?:www\.)?tube\.poal\.co/|
 399                             (?:www\.)?vid\.wxzm\.sx/|
 400                             (?:www\.)?yewtu\.be/|
 401                             (?:www\.)?yt\.elukerio\.org/|
 402                             (?:www\.)?yt\.lelux\.fi/|
 403                             (?:www\.)?invidious\.ggc-project\.de/|
 404                             (?:www\.)?yt\.maisputain\.ovh/|
 405                             (?:www\.)?invidious\.13ad\.de/|
 406                             (?:www\.)?invidious\.toot\.koeln/|
 407                             (?:www\.)?invidious\.fdn\.fr/|
 408                             (?:www\.)?watch\.nettohikari\.com/|
 409                             (?:www\.)?kgg2m7yk5aybusll\.onion/|
 410                             (?:www\.)?qklhadlycap4cnod\.onion/|
 411                             (?:www\.)?axqzx4s6s54s32yentfqojs3x5i7faxza6xo3ehd4bzzsg2ii4fv2iid\.onion/|
 412                             (?:www\.)?c7hqkpkpemu6e7emz5b4vyz7idjgdvgaaa3dyimmeojqbgpea3xqjoid\.onion/|
 413                             (?:www\.)?fz253lmuao3strwbfbmx46yu7acac2jz27iwtorgmbqlkurlclmancad\.onion/|
 414                             (?:www\.)?invidious\.l4qlywnpwqsluw65ts7md3khrivpirse744un3x7mlskqauz5pyuzgqd\.onion/|
 415                             (?:www\.)?owxfohz4kjyv25fvlqilyxast7inivgiktls3th44jhk3ej3i7ya\.b32\.i2p/|
 416                             (?:www\.)?4l2dgddgsrkf2ous66i6seeyi6etzfgrue332grh2n7madpwopotugyd\.onion/|
 417                             youtube\.googleapis\.com/)                        # the various hostnames, with wildcard subdomains
 418                          (?:.*?\#/)?                                          # handle anchor (#/) redirect urls
 419                          (?:                                                  # the various things that can precede the ID:
 420                              (?:(?:v|embed|e)/(?!videoseries))                # v/ or embed/ or e/
 421                              |(?:                                             # or the v= param in all its forms
 422                                  (?:(?:watch|movie)(?:_popup)?(?:\.php)?/?)?  # preceding watch(_popup|.php) or nothing (like /?v=xxxx)
 423                                  (?:\?|\#!?)                                  # the params delimiter ? or # or #!
 424                                  (?:.*?[&;])??                                # any other preceding param (like /?s=tuff&v=xxxx or ?s=tuff&amp;v=V36LpHqtcDY)
 425                                  v=
 426                              )
 427                          ))
 428                          |(?:
 429                             youtu\.be|                                        # just youtu.be/xxxx
 430                             vid\.plus|                                        # or vid.plus/xxxx
 431                             zwearz\.com/watch|                                # or zwearz.com/watch/xxxx
 432                          )/
 433                          |(?:www\.)?cleanvideosearch\.com/media/action/yt/watch\?videoId=
 434                          )
 435                      )?                                                       # all until now is optional -> you can pass the naked ID
 436                      ([0-9A-Za-z_-]{11})                                      # here is it! the YouTube video ID
 437                      (?!.*?\blist=
 438                         (?:
 439                             %(playlist_id)s|                                  # combined list/video URLs are handled by the playlist IE
 440                             WL                                                # WL are handled by the watch later IE
 441                         )
 442                      )
 443                      (?(1).+)?                                                # if we found the ID, everything can follow
 444                      $""" % {'playlist_id': YoutubeBaseInfoExtractor._PLAYLIST_ID_RE}
 445     _NEXT_URL_RE = r'[\?&]next_url=([^&]+)'
 446     _PLAYER_INFO_RE = (
 447         r'/(?P<id>[a-zA-Z0-9_-]{8,})/player_ias\.vflset(?:/[a-zA-Z]{2,3}_[a-zA-Z]{2,3})?/base\.(?P<ext>[a-z]+)$',
 448         r'\b(?P<id>vfl[a-zA-Z0-9_-]+)\b.*?\.(?P<ext>[a-z]+)$',
 449     )
 450     _formats = {
 451         '5': {'ext': 'flv', 'width': 400, 'height': 240, 'acodec': 'mp3', 'abr': 64, 'vcodec': 'h263'},
 452         '6': {'ext': 'flv', 'width': 450, 'height': 270, 'acodec': 'mp3', 'abr': 64, 'vcodec': 'h263'},
 453         '13': {'ext': '3gp', 'acodec': 'aac', 'vcodec': 'mp4v'},
 454         '17': {'ext': '3gp', 'width': 176, 'height': 144, 'acodec': 'aac', 'abr': 24, 'vcodec': 'mp4v'},
 455         '18': {'ext': 'mp4', 'width': 640, 'height': 360, 'acodec': 'aac', 'abr': 96, 'vcodec': 'h264'},
 456         '22': {'ext': 'mp4', 'width': 1280, 'height': 720, 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264'},
 457         '34': {'ext': 'flv', 'width': 640, 'height': 360, 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264'},
 458         '35': {'ext': 'flv', 'width': 854, 'height': 480, 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264'},
 459         # itag 36 videos are either 320x180 (BaW_jenozKc) or 320x240 (__2ABJjxzNo), abr varies as well
 460         '36': {'ext': '3gp', 'width': 320, 'acodec': 'aac', 'vcodec': 'mp4v'},
 461         '37': {'ext': 'mp4', 'width': 1920, 'height': 1080, 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264'},
 462         '38': {'ext': 'mp4', 'width': 4096, 'height': 3072, 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264'},
 463         '43': {'ext': 'webm', 'width': 640, 'height': 360, 'acodec': 'vorbis', 'abr': 128, 'vcodec': 'vp8'},
 464         '44': {'ext': 'webm', 'width': 854, 'height': 480, 'acodec': 'vorbis', 'abr': 128, 'vcodec': 'vp8'},
 465         '45': {'ext': 'webm', 'width': 1280, 'height': 720, 'acodec': 'vorbis', 'abr': 192, 'vcodec': 'vp8'},
 466         '46': {'ext': 'webm', 'width': 1920, 'height': 1080, 'acodec': 'vorbis', 'abr': 192, 'vcodec': 'vp8'},
 467         '59': {'ext': 'mp4', 'width': 854, 'height': 480, 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264'},
 468         '78': {'ext': 'mp4', 'width': 854, 'height': 480, 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264'},
 469
 470
 471         # 3D videos
 472         '82': {'ext': 'mp4', 'height': 360, 'format_note': '3D', 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264', 'preference': -20},
 473         '83': {'ext': 'mp4', 'height': 480, 'format_note': '3D', 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264', 'preference': -20},
 474         '84': {'ext': 'mp4', 'height': 720, 'format_note': '3D', 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264', 'preference': -20},
 475         '85': {'ext': 'mp4', 'height': 1080, 'format_note': '3D', 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264', 'preference': -20},
 476         '100': {'ext': 'webm', 'height': 360, 'format_note': '3D', 'acodec': 'vorbis', 'abr': 128, 'vcodec': 'vp8', 'preference': -20},
 477         '101': {'ext': 'webm', 'height': 480, 'format_note': '3D', 'acodec': 'vorbis', 'abr': 192, 'vcodec': 'vp8', 'preference': -20},
 478         '102': {'ext': 'webm', 'height': 720, 'format_note': '3D', 'acodec': 'vorbis', 'abr': 192, 'vcodec': 'vp8', 'preference': -20},
 479
 480         # Apple HTTP Live Streaming
 481         '91': {'ext': 'mp4', 'height': 144, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 48, 'vcodec': 'h264', 'preference': -10},
 482         '92': {'ext': 'mp4', 'height': 240, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 48, 'vcodec': 'h264', 'preference': -10},
 483         '93': {'ext': 'mp4', 'height': 360, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264', 'preference': -10},
 484         '94': {'ext': 'mp4', 'height': 480, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264', 'preference': -10},
 485         '95': {'ext': 'mp4', 'height': 720, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 256, 'vcodec': 'h264', 'preference': -10},
 486         '96': {'ext': 'mp4', 'height': 1080, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 256, 'vcodec': 'h264', 'preference': -10},
 487         '132': {'ext': 'mp4', 'height': 240, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 48, 'vcodec': 'h264', 'preference': -10},
 488         '151': {'ext': 'mp4', 'height': 72, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 24, 'vcodec': 'h264', 'preference': -10},
 489
 490         # DASH mp4 video
 491         '133': {'ext': 'mp4', 'height': 240, 'format_note': 'DASH video', 'vcodec': 'h264'},
 492         '134': {'ext': 'mp4', 'height': 360, 'format_note': 'DASH video', 'vcodec': 'h264'},
 493         '135': {'ext': 'mp4', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'h264'},
 494         '136': {'ext': 'mp4', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'h264'},
 495         '137': {'ext': 'mp4', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'h264'},
 496         '138': {'ext': 'mp4', 'format_note': 'DASH video', 'vcodec': 'h264'},  # Height can vary (https://github.com/ytdl-org/youtube-dl/issues/4559)
 497         '160': {'ext': 'mp4', 'height': 144, 'format_note': 'DASH video', 'vcodec': 'h264'},
 498         '212': {'ext': 'mp4', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'h264'},
 499         '264': {'ext': 'mp4', 'height': 1440, 'format_note': 'DASH video', 'vcodec': 'h264'},
 500         '298': {'ext': 'mp4', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'h264', 'fps': 60},
 501         '299': {'ext': 'mp4', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'h264', 'fps': 60},
 502         '266': {'ext': 'mp4', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'h264'},
 503
 504         # Dash mp4 audio
 505         '139': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'abr': 48, 'container': 'm4a_dash'},
 506         '140': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'abr': 128, 'container': 'm4a_dash'},
 507         '141': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'abr': 256, 'container': 'm4a_dash'},
 508         '256': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'container': 'm4a_dash'},
 509         '258': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'container': 'm4a_dash'},
 510         '325': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'dtse', 'container': 'm4a_dash'},
 511         '328': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'ec-3', 'container': 'm4a_dash'},
 512
 513         # Dash webm
 514         '167': {'ext': 'webm', 'height': 360, 'width': 640, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
 515         '168': {'ext': 'webm', 'height': 480, 'width': 854, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
 516         '169': {'ext': 'webm', 'height': 720, 'width': 1280, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
 517         '170': {'ext': 'webm', 'height': 1080, 'width': 1920, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
 518         '218': {'ext': 'webm', 'height': 480, 'width': 854, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
 519         '219': {'ext': 'webm', 'height': 480, 'width': 854, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
 520         '278': {'ext': 'webm', 'height': 144, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp9'},
 521         '242': {'ext': 'webm', 'height': 240, 'format_note': 'DASH video', 'vcodec': 'vp9'},
 522         '243': {'ext': 'webm', 'height': 360, 'format_note': 'DASH video', 'vcodec': 'vp9'},
 523         '244': {'ext': 'webm', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'vp9'},
 524         '245': {'ext': 'webm', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'vp9'},
 525         '246': {'ext': 'webm', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'vp9'},
 526         '247': {'ext': 'webm', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'vp9'},
 527         '248': {'ext': 'webm', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'vp9'},
 528         '271': {'ext': 'webm', 'height': 1440, 'format_note': 'DASH video', 'vcodec': 'vp9'},
 529         # itag 272 videos are either 3840x2160 (e.g. RtoitU2A-3E) or 7680x4320 (sLprVF6d7Ug)
 530         '272': {'ext': 'webm', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'vp9'},
 531         '302': {'ext': 'webm', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'vp9', 'fps': 60},
 532         '303': {'ext': 'webm', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'vp9', 'fps': 60},
 533         '308': {'ext': 'webm', 'height': 1440, 'format_note': 'DASH video', 'vcodec': 'vp9', 'fps': 60},
 534         '313': {'ext': 'webm', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'vp9'},
 535         '315': {'ext': 'webm', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'vp9', 'fps': 60},
 536
 537         # Dash webm audio
 538         '171': {'ext': 'webm', 'acodec': 'vorbis', 'format_note': 'DASH audio', 'abr': 128},
 539         '172': {'ext': 'webm', 'acodec': 'vorbis', 'format_note': 'DASH audio', 'abr': 256},
 540
 541         # Dash webm audio with opus inside
 542         '249': {'ext': 'webm', 'format_note': 'DASH audio', 'acodec': 'opus', 'abr': 50},
 543         '250': {'ext': 'webm', 'format_note': 'DASH audio', 'acodec': 'opus', 'abr': 70},
 544         '251': {'ext': 'webm', 'format_note': 'DASH audio', 'acodec': 'opus', 'abr': 160},
 545
 546         # RTMP (unnamed)
 547         '_rtmp': {'protocol': 'rtmp'},
 548
 549         # av01 video only formats sometimes served with "unknown" codecs
 550         '394': {'acodec': 'none', 'vcodec': 'av01.0.05M.08'},
 551         '395': {'acodec': 'none', 'vcodec': 'av01.0.05M.08'},
 552         '396': {'acodec': 'none', 'vcodec': 'av01.0.05M.08'},
 553         '397': {'acodec': 'none', 'vcodec': 'av01.0.05M.08'},
 554     }
 555     _SUBTITLE_FORMATS = ('json3', 'srv1', 'srv2', 'srv3', 'ttml', 'vtt')
 556
 557     _GEO_BYPASS = False
 558
 559     IE_NAME = 'youtube'
 560     _TESTS = [
 561         {
 562             'url': 'https://www.youtube.com/watch?v=BaW_jenozKc&t=1s&end=9',
 563             'info_dict': {
 564                 'id': 'BaW_jenozKc',
 565                 'ext': 'mp4',
 566                 'title': 'youtube-dl test video "\'/\\ä↭𝕐',
 567                 'uploader': 'Philipp Hagemeister',
 568                 'uploader_id': 'phihag',
 569                 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/phihag',
 570                 'channel_id': 'UCLqxVugv74EIW3VWh2NOa3Q',
 571                 'channel_url': r're:https?://(?:www\.)?youtube\.com/channel/UCLqxVugv74EIW3VWh2NOa3Q',
 572                 'upload_date': '20121002',
 573                 'description': 'test chars:  "\'/\\ä↭𝕐\ntest URL: https://github.com/rg3/youtube-dl/issues/1892\n\nThis is a test video for youtube-dl.\n\nFor more information, contact phihag@phihag.de .',
 574                 'categories': ['Science & Technology'],
 575                 'tags': ['youtube-dl'],
 576                 'duration': 10,
 577                 'view_count': int,
 578                 'like_count': int,
 579                 'dislike_count': int,
 580                 'start_time': 1,
 581                 'end_time': 9,
 582             }
 583         },
 584         {
 585             'url': '//www.YouTube.com/watch?v=yZIXLfi8CZQ',
 586             'note': 'Embed-only video (#1746)',
 587             'info_dict': {
 588                 'id': 'yZIXLfi8CZQ',
 589                 'ext': 'mp4',
 590                 'upload_date': '20120608',
 591                 'title': 'Principal Sexually Assaults A Teacher - Episode 117 - 8th June 2012',
 592                 'description': 'md5:09b78bd971f1e3e289601dfba15ca4f7',
 593                 'uploader': 'SET India',
 594                 'uploader_id': 'setindia',
 595                 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/setindia',
 596                 'age_limit': 18,
 597             }
 598         },
 599         {
 600             'url': 'https://www.youtube.com/watch?v=BaW_jenozKc&v=UxxajLWwzqY',
 601             'note': 'Use the first video ID in the URL',
 602             'info_dict': {
 603                 'id': 'BaW_jenozKc',
 604                 'ext': 'mp4',
 605                 'title': 'youtube-dl test video "\'/\\ä↭𝕐',
 606                 'uploader': 'Philipp Hagemeister',
 607                 'uploader_id': 'phihag',
 608                 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/phihag',
 609                 'upload_date': '20121002',
 610                 'description': 'test chars:  "\'/\\ä↭𝕐\ntest URL: https://github.com/rg3/youtube-dl/issues/1892\n\nThis is a test video for youtube-dl.\n\nFor more information, contact phihag@phihag.de .',
 611                 'categories': ['Science & Technology'],
 612                 'tags': ['youtube-dl'],
 613                 'duration': 10,
 614                 'view_count': int,
 615                 'like_count': int,
 616                 'dislike_count': int,
 617             },
 618             'params': {
 619                 'skip_download': True,
 620             },
 621         },
 622         {
 623             'url': 'https://www.youtube.com/watch?v=a9LDPn-MO4I',
 624             'note': '256k DASH audio (format 141) via DASH manifest',
 625             'info_dict': {
 626                 'id': 'a9LDPn-MO4I',
 627                 'ext': 'm4a',
 628                 'upload_date': '20121002',
 629                 'uploader_id': '8KVIDEO',
 630                 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/8KVIDEO',
 631                 'description': '',
 632                 'uploader': '8KVIDEO',
 633                 'title': 'UHDTV TEST 8K VIDEO.mp4'
 634             },
 635             'params': {
 636                 'youtube_include_dash_manifest': True,
 637                 'format': '141',
 638             },
 639             'skip': 'format 141 not served anymore',
 640         },
 641         # Controversy video
 642         {
 643             'url': 'https://www.youtube.com/watch?v=T4XJQO3qol8',
 644             'info_dict': {
 645                 'id': 'T4XJQO3qol8',
 646                 'ext': 'mp4',
 647                 'duration': 219,
 648                 'upload_date': '20100909',
 649                 'uploader': 'Amazing Atheist',
 650                 'uploader_id': 'TheAmazingAtheist',
 651                 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/TheAmazingAtheist',
 652                 'title': 'Burning Everyone\'s Koran',
 653                 'description': 'SUBSCRIBE: http://www.youtube.com/saturninefilms\n\nEven Obama has taken a stand against freedom on this issue: http://www.huffingtonpost.com/2010/09/09/obama-gma-interview-quran_n_710282.html',
 654             }
 655         },
 656         # Normal age-gate video (embed allowed)
 657         {
 658             'url': 'https://youtube.com/watch?v=HtVdAasjOgU',
 659             'info_dict': {
 660                 'id': 'HtVdAasjOgU',
 661                 'ext': 'mp4',
 662                 'title': 'The Witcher 3: Wild Hunt - The Sword Of Destiny Trailer',
 663                 'description': r're:(?s).{100,}About the Game\n.*?The Witcher 3: Wild Hunt.{100,}',
 664                 'duration': 142,
 665                 'uploader': 'The Witcher',
 666                 'uploader_id': 'WitcherGame',
 667                 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/WitcherGame',
 668                 'upload_date': '20140605',
 669                 'age_limit': 18,
 670             },
 671         },
 672         # Olympics (https://github.com/ytdl-org/youtube-dl/issues/4431)
 673         {
 674             'url': 'lqQg6PlCWgI',
 675             'info_dict': {
 676                 'id': 'lqQg6PlCWgI',
 677                 'ext': 'mp4',
 678                 'duration': 6085,
 679                 'upload_date': '20150827',
 680                 'uploader_id': 'olympic',
 681                 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/olympic',
 682                 'description': 'HO09  - Women -  GER-AUS - Hockey - 31 July 2012 - London 2012 Olympic Games',
 683                 'uploader': 'Olympic',
 684                 'title': 'Hockey - Women -  GER-AUS - London 2012 Olympic Games',
 685             },
 686             'params': {
 687                 'skip_download': 'requires avconv',
 688             }
 689         },
 690         # Non-square pixels
 691         {
 692             'url': 'https://www.youtube.com/watch?v=_b-2C3KPAM0',
 693             'info_dict': {
 694                 'id': '_b-2C3KPAM0',
 695                 'ext': 'mp4',
 696                 'stretched_ratio': 16 / 9.,
 697                 'duration': 85,
 698                 'upload_date': '20110310',
 699                 'uploader_id': 'AllenMeow',
 700                 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/AllenMeow',
 701                 'description': 'made by Wacom from Korea | 字幕&加油添醋 by TY\'s Allen | 感謝heylisa00cavey1001同學熱情提供梗及翻譯',
 702                 'uploader': '孫ᄋᄅ',
 703                 'title': '[A-made] 變態妍字幕版 太妍 我就是這樣的人',
 704             },
 705         },
 706         # url_encoded_fmt_stream_map is empty string
 707         {
 708             'url': 'qEJwOuvDf7I',
 709             'info_dict': {
 710                 'id': 'qEJwOuvDf7I',
 711                 'ext': 'webm',
 712                 'title': 'Обсуждение судебной практики по выборам 14 сентября 2014 года в Санкт-Петербурге',
 713                 'description': '',
 714                 'upload_date': '20150404',
 715                 'uploader_id': 'spbelect',
 716                 'uploader': 'Наблюдатели Петербурга',
 717             },
 718             'params': {
 719                 'skip_download': 'requires avconv',
 720             },
 721             'skip': 'This live event has ended.',
 722         },
 723         # Extraction from multiple DASH manifests (https://github.com/ytdl-org/youtube-dl/pull/6097)
 724         {
 725             'url': 'https://www.youtube.com/watch?v=FIl7x6_3R5Y',
 726             'info_dict': {
 727                 'id': 'FIl7x6_3R5Y',
 728                 'ext': 'webm',
 729                 'title': 'md5:7b81415841e02ecd4313668cde88737a',
 730                 'description': 'md5:116377fd2963b81ec4ce64b542173306',
 731                 'duration': 220,
 732                 'upload_date': '20150625',
 733                 'uploader_id': 'dorappi2000',
 734                 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/dorappi2000',
 735                 'uploader': 'dorappi2000',
 736                 'formats': 'mincount:31',
 737             },
 738             'skip': 'not actual anymore',
 739         },
 740         # DASH manifest with segment_list
 741         {
 742             'url': 'https://www.youtube.com/embed/CsmdDsKjzN8',
 743             'md5': '8ce563a1d667b599d21064e982ab9e31',
 744             'info_dict': {
 745                 'id': 'CsmdDsKjzN8',
 746                 'ext': 'mp4',
 747                 'upload_date': '20150501',  # According to '<meta itemprop="datePublished"', but in other places it's 20150510
 748                 'uploader': 'Airtek',
 749                 'description': 'Retransmisión en directo de la XVIII media maratón de Zaragoza.',
 750                 'uploader_id': 'UCzTzUmjXxxacNnL8I3m4LnQ',
 751                 'title': 'Retransmisión XVIII Media maratón Zaragoza 2015',
 752             },
 753             'params': {
 754                 'youtube_include_dash_manifest': True,
 755                 'format': '135',  # bestvideo
 756             },
 757             'skip': 'This live event has ended.',
 758         },
 759         {
 760             # Multifeed videos (multiple cameras), URL is for Main Camera
 761             'url': 'https://www.youtube.com/watch?v=jqWvoWXjCVs',
 762             'info_dict': {
 763                 'id': 'jqWvoWXjCVs',
 764                 'title': 'teamPGP: Rocket League Noob Stream',
 765                 'description': 'md5:dc7872fb300e143831327f1bae3af010',
 766             },
 767             'playlist': [{
 768                 'info_dict': {
 769                     'id': 'jqWvoWXjCVs',
 770                     'ext': 'mp4',
 771                     'title': 'teamPGP: Rocket League Noob Stream (Main Camera)',
 772                     'description': 'md5:dc7872fb300e143831327f1bae3af010',
 773                     'duration': 7335,
 774                     'upload_date': '20150721',
 775                     'uploader': 'Beer Games Beer',
 776                     'uploader_id': 'beergamesbeer',
 777                     'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/beergamesbeer',
 778                     'license': 'Standard YouTube License',
 779                 },
 780             }, {
 781                 'info_dict': {
 782                     'id': '6h8e8xoXJzg',
 783                     'ext': 'mp4',
 784                     'title': 'teamPGP: Rocket League Noob Stream (kreestuh)',
 785                     'description': 'md5:dc7872fb300e143831327f1bae3af010',
 786                     'duration': 7337,
 787                     'upload_date': '20150721',
 788                     'uploader': 'Beer Games Beer',
 789                     'uploader_id': 'beergamesbeer',
 790                     'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/beergamesbeer',
 791                     'license': 'Standard YouTube License',
 792                 },
 793             }, {
 794                 'info_dict': {
 795                     'id': 'PUOgX5z9xZw',
 796                     'ext': 'mp4',
 797                     'title': 'teamPGP: Rocket League Noob Stream (grizzle)',
 798                     'description': 'md5:dc7872fb300e143831327f1bae3af010',
 799                     'duration': 7337,
 800                     'upload_date': '20150721',
 801                     'uploader': 'Beer Games Beer',
 802                     'uploader_id': 'beergamesbeer',
 803                     'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/beergamesbeer',
 804                     'license': 'Standard YouTube License',
 805                 },
 806             }, {
 807                 'info_dict': {
 808                     'id': 'teuwxikvS5k',
 809                     'ext': 'mp4',
 810                     'title': 'teamPGP: Rocket League Noob Stream (zim)',
 811                     'description': 'md5:dc7872fb300e143831327f1bae3af010',
 812                     'duration': 7334,
 813                     'upload_date': '20150721',
 814                     'uploader': 'Beer Games Beer',
 815                     'uploader_id': 'beergamesbeer',
 816                     'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/beergamesbeer',
 817                     'license': 'Standard YouTube License',
 818                 },
 819             }],
 820             'params': {
 821                 'skip_download': True,
 822             },
 823             'skip': 'This video is not available.',
 824         },
 825         {
 826             # Multifeed video with comma in title (see https://github.com/ytdl-org/youtube-dl/issues/8536)
 827             'url': 'https://www.youtube.com/watch?v=gVfLd0zydlo',
 828             'info_dict': {
 829                 'id': 'gVfLd0zydlo',
 830                 'title': 'DevConf.cz 2016 Day 2 Workshops 1 14:00 - 15:30',
 831             },
 832             'playlist_count': 2,
 833             'skip': 'Not multifeed anymore',
 834         },
 835         {
 836             'url': 'https://vid.plus/FlRa-iH7PGw',
 837             'only_matching': True,
 838         },
 839         {
 840             'url': 'https://zwearz.com/watch/9lWxNJF-ufM/electra-woman-dyna-girl-official-trailer-grace-helbig.html',
 841             'only_matching': True,
 842         },
 843         {
 844             # Title with JS-like syntax "};" (see https://github.com/ytdl-org/youtube-dl/issues/7468)
 845             # Also tests cut-off URL expansion in video description (see
 846             # https://github.com/ytdl-org/youtube-dl/issues/1892,
 847             # https://github.com/ytdl-org/youtube-dl/issues/8164)
 848             'url': 'https://www.youtube.com/watch?v=lsguqyKfVQg',
 849             'info_dict': {
 850                 'id': 'lsguqyKfVQg',
 851                 'ext': 'mp4',
 852                 'title': '{dark walk}; Loki/AC/Dishonored; collab w/Elflover21',
 853                 'alt_title': 'Dark Walk - Position Music',
 854                 'description': 'md5:8085699c11dc3f597ce0410b0dcbb34a',
 855                 'duration': 133,
 856                 'upload_date': '20151119',
 857                 'uploader_id': 'IronSoulElf',
 858                 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/IronSoulElf',
 859                 'uploader': 'IronSoulElf',
 860                 'creator': 'Todd Haberman,  Daniel Law Heath and Aaron Kaplan',
 861                 'track': 'Dark Walk - Position Music',
 862                 'artist': 'Todd Haberman,  Daniel Law Heath and Aaron Kaplan',
 863                 'album': 'Position Music - Production Music Vol. 143 - Dark Walk',
 864             },
 865             'params': {
 866                 'skip_download': True,
 867             },
 868         },
 869         {
 870             # Tags with '};' (see https://github.com/ytdl-org/youtube-dl/issues/7468)
 871             'url': 'https://www.youtube.com/watch?v=Ms7iBXnlUO8',
 872             'only_matching': True,
 873         },
 874         {
 875             # Video with yt:stretch=17:0
 876             'url': 'https://www.youtube.com/watch?v=Q39EVAstoRM',
 877             'info_dict': {
 878                 'id': 'Q39EVAstoRM',
 879                 'ext': 'mp4',
 880                 'title': 'Clash Of Clans#14 Dicas De Ataque Para CV 4',
 881                 'description': 'md5:ee18a25c350637c8faff806845bddee9',
 882                 'upload_date': '20151107',
 883                 'uploader_id': 'UCCr7TALkRbo3EtFzETQF1LA',
 884                 'uploader': 'CH GAMER DROID',
 885             },
 886             'params': {
 887                 'skip_download': True,
 888             },
 889             'skip': 'This video does not exist.',
 890         },
 891         {
 892             # Video licensed under Creative Commons
 893             'url': 'https://www.youtube.com/watch?v=M4gD1WSo5mA',
 894             'info_dict': {
 895                 'id': 'M4gD1WSo5mA',
 896                 'ext': 'mp4',
 897                 'title': 'md5:e41008789470fc2533a3252216f1c1d1',
 898                 'description': 'md5:a677553cf0840649b731a3024aeff4cc',
 899                 'duration': 721,
 900                 'upload_date': '20150127',
 901                 'uploader_id': 'BerkmanCenter',
 902                 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/BerkmanCenter',
 903                 'uploader': 'The Berkman Klein Center for Internet & Society',
 904                 'license': 'Creative Commons Attribution license (reuse allowed)',
 905             },
 906             'params': {
 907                 'skip_download': True,
 908             },
 909         },
 910         {
 911             # Channel-like uploader_url
 912             'url': 'https://www.youtube.com/watch?v=eQcmzGIKrzg',
 913             'info_dict': {
 914                 'id': 'eQcmzGIKrzg',
 915                 'ext': 'mp4',
 916                 'title': 'Democratic Socialism and Foreign Policy | Bernie Sanders',
 917                 'description': 'md5:dda0d780d5a6e120758d1711d062a867',
 918                 'duration': 4060,
 919                 'upload_date': '20151119',
 920                 'uploader': 'Bernie Sanders',
 921                 'uploader_id': 'UCH1dpzjCEiGAt8CXkryhkZg',
 922                 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UCH1dpzjCEiGAt8CXkryhkZg',
 923                 'license': 'Creative Commons Attribution license (reuse allowed)',
 924             },
 925             'params': {
 926                 'skip_download': True,
 927             },
 928         },
 929         {
 930             'url': 'https://www.youtube.com/watch?feature=player_embedded&amp;amp;v=V36LpHqtcDY',
 931             'only_matching': True,
 932         },
 933         {
 934             # YouTube Red paid video (https://github.com/ytdl-org/youtube-dl/issues/10059)
 935             'url': 'https://www.youtube.com/watch?v=i1Ko8UG-Tdo',
 936             'only_matching': True,
 937         },
 938         {
 939             # Rental video preview
 940             'url': 'https://www.youtube.com/watch?v=yYr8q0y5Jfg',
 941             'info_dict': {
 942                 'id': 'uGpuVWrhIzE',
 943                 'ext': 'mp4',
 944                 'title': 'Piku - Trailer',
 945                 'description': 'md5:c36bd60c3fd6f1954086c083c72092eb',
 946                 'upload_date': '20150811',
 947                 'uploader': 'FlixMatrix',
 948                 'uploader_id': 'FlixMatrixKaravan',
 949                 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/FlixMatrixKaravan',
 950                 'license': 'Standard YouTube License',
 951             },
 952             'params': {
 953                 'skip_download': True,
 954             },
 955             'skip': 'This video is not available.',
 956         },
 957         {
 958             # YouTube Red video with episode data
 959             'url': 'https://www.youtube.com/watch?v=iqKdEhx-dD4',
 960             'info_dict': {
 961                 'id': 'iqKdEhx-dD4',
 962                 'ext': 'mp4',
 963                 'title': 'Isolation - Mind Field (Ep 1)',
 964                 'description': 'md5:46a29be4ceffa65b92d277b93f463c0f',
 965                 'duration': 2085,
 966                 'upload_date': '20170118',
 967                 'uploader': 'Vsauce',
 968                 'uploader_id': 'Vsauce',
 969                 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/Vsauce',
 970                 'series': 'Mind Field',
 971                 'season_number': 1,
 972                 'episode_number': 1,
 973             },
 974             'params': {
 975                 'skip_download': True,
 976             },
 977             'expected_warnings': [
 978                 'Skipping DASH manifest',
 979             ],
 980         },
 981         {
 982             # The following content has been identified by the YouTube community
 983             # as inappropriate or offensive to some audiences.
 984             'url': 'https://www.youtube.com/watch?v=6SJNVb0GnPI',
 985             'info_dict': {
 986                 'id': '6SJNVb0GnPI',
 987                 'ext': 'mp4',
 988                 'title': 'Race Differences in Intelligence',
 989                 'description': 'md5:5d161533167390427a1f8ee89a1fc6f1',
 990                 'duration': 965,
 991                 'upload_date': '20140124',
 992                 'uploader': 'New Century Foundation',
 993                 'uploader_id': 'UCEJYpZGqgUob0zVVEaLhvVg',
 994                 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UCEJYpZGqgUob0zVVEaLhvVg',
 995             },
 996             'params': {
 997                 'skip_download': True,
 998             },
 999         },
1000         {
1001             # itag 212
1002             'url': '1t24XAntNCY',
1003             'only_matching': True,
1004         },
1005         {
1006             # geo restricted to JP
1007             'url': 'sJL6WA-aGkQ',
1008             'only_matching': True,
1009         },
1010         {
1011             'url': 'https://www.youtube.com/watch?v=MuAGGZNfUkU&list=RDMM',
1012             'only_matching': True,
1013         },
1014         {
1015             'url': 'https://invidio.us/watch?v=BaW_jenozKc',
1016             'only_matching': True,
1017         },
1018         {
1019             # DRM protected
1020             'url': 'https://www.youtube.com/watch?v=s7_qI6_mIXc',
1021             'only_matching': True,
1022         },
1023         {
1024             # Video with unsupported adaptive stream type formats
1025             'url': 'https://www.youtube.com/watch?v=Z4Vy8R84T1U',
1026             'info_dict': {
1027                 'id': 'Z4Vy8R84T1U',
1028                 'ext': 'mp4',
1029                 'title': 'saman SMAN 53 Jakarta(Sancety) opening COFFEE4th at SMAN 53 Jakarta',
1030                 'description': 'md5:d41d8cd98f00b204e9800998ecf8427e',
1031                 'duration': 433,
1032                 'upload_date': '20130923',
1033                 'uploader': 'Amelia Putri Harwita',
1034                 'uploader_id': 'UCpOxM49HJxmC1qCalXyB3_Q',
1035                 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UCpOxM49HJxmC1qCalXyB3_Q',
1036                 'formats': 'maxcount:10',
1037             },
1038             'params': {
1039                 'skip_download': True,
1040                 'youtube_include_dash_manifest': False,
1041             },
1042             'skip': 'not actual anymore',
1043         },
1044         {
1045             # Youtube Music Auto-generated description
1046             'url': 'https://music.youtube.com/watch?v=MgNrAu2pzNs',
1047             'info_dict': {
1048                 'id': 'MgNrAu2pzNs',
1049                 'ext': 'mp4',
1050                 'title': 'Voyeur Girl',
1051                 'description': 'md5:7ae382a65843d6df2685993e90a8628f',
1052                 'upload_date': '20190312',
1053                 'uploader': 'Stephen - Topic',
1054                 'uploader_id': 'UC-pWHpBjdGG69N9mM2auIAA',
1055                 'artist': 'Stephen',
1056                 'track': 'Voyeur Girl',
1057                 'album': 'it\'s too much love to know my dear',
1058                 'release_date': '20190313',
1059                 'release_year': 2019,
1060             },
1061             'params': {
1062                 'skip_download': True,
1063             },
1064         },
1065         {
1066             # Youtube Music Auto-generated description
1067             # Retrieve 'artist' field from 'Artist:' in video description
1068             # when it is present on youtube music video
1069             'url': 'https://www.youtube.com/watch?v=k0jLE7tTwjY',
1070             'info_dict': {
1071                 'id': 'k0jLE7tTwjY',
1072                 'ext': 'mp4',
1073                 'title': 'Latch Feat. Sam Smith',
1074                 'description': 'md5:3cb1e8101a7c85fcba9b4fb41b951335',
1075                 'upload_date': '20150110',
1076                 'uploader': 'Various Artists - Topic',
1077                 'uploader_id': 'UCNkEcmYdjrH4RqtNgh7BZ9w',
1078                 'artist': 'Disclosure',
1079                 'track': 'Latch Feat. Sam Smith',
1080                 'album': 'Latch Featuring Sam Smith',
1081                 'release_date': '20121008',
1082                 'release_year': 2012,
1083             },
1084             'params': {
1085                 'skip_download': True,
1086             },
1087         },
1088         {
1089             # Youtube Music Auto-generated description
1090             # handle multiple artists on youtube music video
1091             'url': 'https://www.youtube.com/watch?v=74qn0eJSjpA',
1092             'info_dict': {
1093                 'id': '74qn0eJSjpA',
1094                 'ext': 'mp4',
1095                 'title': 'Eastside',
1096                 'description': 'md5:290516bb73dcbfab0dcc4efe6c3de5f2',
1097                 'upload_date': '20180710',
1098                 'uploader': 'Benny Blanco - Topic',
1099                 'uploader_id': 'UCzqz_ksRu_WkIzmivMdIS7A',
1100                 'artist': 'benny blanco, Halsey, Khalid',
1101                 'track': 'Eastside',
1102                 'album': 'Eastside',
1103                 'release_date': '20180713',
1104                 'release_year': 2018,
1105             },
1106             'params': {
1107                 'skip_download': True,
1108             },
1109         },
1110         {
1111             # Youtube Music Auto-generated description
1112             # handle youtube music video with release_year and no release_date
1113             'url': 'https://www.youtube.com/watch?v=-hcAI0g-f5M',
1114             'info_dict': {
1115                 'id': '-hcAI0g-f5M',
1116                 'ext': 'mp4',
1117                 'title': 'Put It On Me',
1118                 'description': 'md5:f6422397c07c4c907c6638e1fee380a5',
1119                 'upload_date': '20180426',
1120                 'uploader': 'Matt Maeson - Topic',
1121                 'uploader_id': 'UCnEkIGqtGcQMLk73Kp-Q5LQ',
1122                 'artist': 'Matt Maeson',
1123                 'track': 'Put It On Me',
1124                 'album': 'The Hearse',
1125                 'release_date': None,
1126                 'release_year': 2018,
1127             },
1128             'params': {
1129                 'skip_download': True,
1130             },
1131         },
1132         {
1133             'url': 'https://www.youtubekids.com/watch?v=3b8nCWDgZ6Q',
1134             'only_matching': True,
1135         },
1136         {
1137             # invalid -> valid video id redirection
1138             'url': 'DJztXj2GPfl',
1139             'info_dict': {
1140                 'id': 'DJztXj2GPfk',
1141                 'ext': 'mp4',
1142                 'title': 'Panjabi MC - Mundian To Bach Ke (The Dictator Soundtrack)',
1143                 'description': 'md5:bf577a41da97918e94fa9798d9228825',
1144                 'upload_date': '20090125',
1145                 'uploader': 'Prochorowka',
1146                 'uploader_id': 'Prochorowka',
1147                 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/Prochorowka',
1148                 'artist': 'Panjabi MC',
1149                 'track': 'Beware of the Boys (Mundian to Bach Ke) - Motivo Hi-Lectro Remix',
1150                 'album': 'Beware of the Boys (Mundian To Bach Ke)',
1151             },
1152             'params': {
1153                 'skip_download': True,
1154             },
1155         },
1156         {
1157             # empty description results in an empty string
1158             'url': 'https://www.youtube.com/watch?v=x41yOUIvK2k',
1159             'info_dict': {
1160                 'id': 'x41yOUIvK2k',
1161                 'ext': 'mp4',
1162                 'title': 'IMG 3456',
1163                 'description': '',
1164                 'upload_date': '20170613',
1165                 'uploader_id': 'ElevageOrVert',
1166                 'uploader': 'ElevageOrVert',
1167             },
1168             'params': {
1169                 'skip_download': True,
1170             },
1171         },
1172     ]
1173
1174     def __init__(self, *args, **kwargs):
1175         super(YoutubeIE, self).__init__(*args, **kwargs)
1176         self._player_cache = {}
1177
1178     def report_video_info_webpage_download(self, video_id):
1179         """Report attempt to download video info webpage."""
1180         self.to_screen('%s: Downloading video info webpage' % video_id)
1181
1182     def report_information_extraction(self, video_id):
1183         """Report attempt to extract video information."""
1184         self.to_screen('%s: Extracting video information' % video_id)
1185
1186     def report_unavailable_format(self, video_id, format):
1187         """Report extracted video URL."""
1188         self.to_screen('%s: Format %s not available' % (video_id, format))
1189
1190     def report_rtmp_download(self):
1191         """Indicate the download will use the RTMP protocol."""
1192         self.to_screen('RTMP download detected')
1193
1194     def _signature_cache_id(self, example_sig):
1195         """ Return a string representation of a signature """
1196         return '.'.join(compat_str(len(part)) for part in example_sig.split('.'))
1197
1198     @classmethod
1199     def _extract_player_info(cls, player_url):
1200         for player_re in cls._PLAYER_INFO_RE:
1201             id_m = re.search(player_re, player_url)
1202             if id_m:
1203                 break
1204         else:
1205             raise ExtractorError('Cannot identify player %r' % player_url)
1206         return id_m.group('ext'), id_m.group('id')
1207
1208     def _extract_signature_function(self, video_id, player_url, example_sig):
1209         player_type, player_id = self._extract_player_info(player_url)
1210
1211         # Read from filesystem cache
1212         func_id = '%s_%s_%s' % (
1213             player_type, player_id, self._signature_cache_id(example_sig))
1214         assert os.path.basename(func_id) == func_id
1215
1216         cache_spec = self._downloader.cache.load('youtube-sigfuncs', func_id)
1217         if cache_spec is not None:
1218             return lambda s: ''.join(s[i] for i in cache_spec)
1219
1220         download_note = (
1221             'Downloading player %s' % player_url
1222             if self._downloader.params.get('verbose') else
1223             'Downloading %s player %s' % (player_type, player_id)
1224         )
1225         if player_type == 'js':
1226             code = self._download_webpage(
1227                 player_url, video_id,
1228                 note=download_note,
1229                 errnote='Download of %s failed' % player_url)
1230             res = self._parse_sig_js(code)
1231         elif player_type == 'swf':
1232             urlh = self._request_webpage(
1233                 player_url, video_id,
1234                 note=download_note,
1235                 errnote='Download of %s failed' % player_url)
1236             code = urlh.read()
1237             res = self._parse_sig_swf(code)
1238         else:
1239             assert False, 'Invalid player type %r' % player_type
1240
1241         test_string = ''.join(map(compat_chr, range(len(example_sig))))
1242         cache_res = res(test_string)
1243         cache_spec = [ord(c) for c in cache_res]
1244
1245         self._downloader.cache.store('youtube-sigfuncs', func_id, cache_spec)
1246         return res
1247
1248     def _print_sig_code(self, func, example_sig):
1249         def gen_sig_code(idxs):
1250             def _genslice(start, end, step):
1251                 starts = '' if start == 0 else str(start)
1252                 ends = (':%d' % (end + step)) if end + step >= 0 else ':'
1253                 steps = '' if step == 1 else (':%d' % step)
1254                 return 's[%s%s%s]' % (starts, ends, steps)
1255
1256             step = None
1257             # Quelch pyflakes warnings - start will be set when step is set
1258             start = '(Never used)'
1259             for i, prev in zip(idxs[1:], idxs[:-1]):
1260                 if step is not None:
1261                     if i - prev == step:
1262                         continue
1263                     yield _genslice(start, prev, step)
1264                     step = None
1265                     continue
1266                 if i - prev in [-1, 1]:
1267                     step = i - prev
1268                     start = prev
1269                     continue
1270                 else:
1271                     yield 's[%d]' % prev
1272             if step is None:
1273                 yield 's[%d]' % i
1274             else:
1275                 yield _genslice(start, i, step)
1276
1277         test_string = ''.join(map(compat_chr, range(len(example_sig))))
1278         cache_res = func(test_string)
1279         cache_spec = [ord(c) for c in cache_res]
1280         expr_code = ' + '.join(gen_sig_code(cache_spec))
1281         signature_id_tuple = '(%s)' % (
1282             ', '.join(compat_str(len(p)) for p in example_sig.split('.')))
1283         code = ('if tuple(len(p) for p in s.split(\'.\')) == %s:\n'
1284                 '    return %s\n') % (signature_id_tuple, expr_code)
1285         self.to_screen('Extracted signature function:\n' + code)
1286
1287     def _parse_sig_js(self, jscode):
1288         funcname = self._search_regex(
1289             (r'\b[cs]\s*&&\s*[adf]\.set\([^,]+\s*,\s*encodeURIComponent\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\(',
1290              r'\b[a-zA-Z0-9]+\s*&&\s*[a-zA-Z0-9]+\.set\([^,]+\s*,\s*encodeURIComponent\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\(',
1291              r'(?:\b|[^a-zA-Z0-9$])(?P<sig>[a-zA-Z0-9$]{2})\s*=\s*function\(\s*a\s*\)\s*{\s*a\s*=\s*a\.split\(\s*""\s*\)',
1292              r'(?P<sig>[a-zA-Z0-9$]+)\s*=\s*function\(\s*a\s*\)\s*{\s*a\s*=\s*a\.split\(\s*""\s*\)',
1293              # Obsolete patterns
1294              r'(["\'])signature\1\s*,\s*(?P<sig>[a-zA-Z0-9$]+)\(',
1295              r'\.sig\|\|(?P<sig>[a-zA-Z0-9$]+)\(',
1296              r'yt\.akamaized\.net/\)\s*\|\|\s*.*?\s*[cs]\s*&&\s*[adf]\.set\([^,]+\s*,\s*(?:encodeURIComponent\s*\()?\s*(?P<sig>[a-zA-Z0-9$]+)\(',
1297              r'\b[cs]\s*&&\s*[adf]\.set\([^,]+\s*,\s*(?P<sig>[a-zA-Z0-9$]+)\(',
1298              r'\b[a-zA-Z0-9]+\s*&&\s*[a-zA-Z0-9]+\.set\([^,]+\s*,\s*(?P<sig>[a-zA-Z0-9$]+)\(',
1299              r'\bc\s*&&\s*a\.set\([^,]+\s*,\s*\([^)]*\)\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\(',
1300              r'\bc\s*&&\s*[a-zA-Z0-9]+\.set\([^,]+\s*,\s*\([^)]*\)\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\(',
1301              r'\bc\s*&&\s*[a-zA-Z0-9]+\.set\([^,]+\s*,\s*\([^)]*\)\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\('),
1302             jscode, 'Initial JS player signature function name', group='sig')
1303
1304         jsi = JSInterpreter(jscode)
1305         initial_function = jsi.extract_function(funcname)
1306         return lambda s: initial_function([s])
1307
1308     def _parse_sig_swf(self, file_contents):
1309         swfi = SWFInterpreter(file_contents)
1310         TARGET_CLASSNAME = 'SignatureDecipher'
1311         searched_class = swfi.extract_class(TARGET_CLASSNAME)
1312         initial_function = swfi.extract_function(searched_class, 'decipher')
1313         return lambda s: initial_function([s])
1314
1315     def _decrypt_signature(self, s, video_id, player_url, age_gate=False):
1316         """Turn the encrypted s field into a working signature"""
1317
1318         if player_url is None:
1319             raise ExtractorError('Cannot decrypt signature without player_url')
1320
1321         if player_url.startswith('//'):
1322             player_url = 'https:' + player_url
1323         elif not re.match(r'https?://', player_url):
1324             player_url = compat_urlparse.urljoin(
1325                 'https://www.youtube.com', player_url)
1326         try:
1327             player_id = (player_url, self._signature_cache_id(s))
1328             if player_id not in self._player_cache:
1329                 func = self._extract_signature_function(
1330                     video_id, player_url, s
1331                 )
1332                 self._player_cache[player_id] = func
1333             func = self._player_cache[player_id]
1334             if self._downloader.params.get('youtube_print_sig_code'):
1335                 self._print_sig_code(func, s)
1336             return func(s)
1337         except Exception as e:
1338             tb = traceback.format_exc()
1339             raise ExtractorError(
1340                 'Signature extraction failed: ' + tb, cause=e)
1341
1342     def _get_subtitles(self, video_id, webpage, has_live_chat_replay):
1343         try:
1344             subs_doc = self._download_xml(
1345                 'https://video.google.com/timedtext?hl=en&type=list&v=%s' % video_id,
1346                 video_id, note=False)
1347         except ExtractorError as err:
1348             self._downloader.report_warning('unable to download video subtitles: %s' % error_to_compat_str(err))
1349             return {}
1350
1351         sub_lang_list = {}
1352         for track in subs_doc.findall('track'):
1353             lang = track.attrib['lang_code']
1354             if lang in sub_lang_list:
1355                 continue
1356             sub_formats = []
1357             for ext in self._SUBTITLE_FORMATS:
1358                 params = compat_urllib_parse_urlencode({
1359                     'lang': lang,
1360                     'v': video_id,
1361                     'fmt': ext,
1362                     'name': track.attrib['name'].encode('utf-8'),
1363                 })
1364                 sub_formats.append({
1365                     'url': 'https://www.youtube.com/api/timedtext?' + params,
1366                     'ext': ext,
1367                 })
1368             sub_lang_list[lang] = sub_formats
1369         if has_live_chat_replay:
1370             sub_lang_list['live_chat'] = [
1371                 {
1372                     'video_id': video_id,
1373                     'ext': 'json',
1374                     'protocol': 'youtube_live_chat_replay',
1375                 },
1376             ]
1377         if not sub_lang_list:
1378             self._downloader.report_warning('video doesn\'t have subtitles')
1379             return {}
1380         return sub_lang_list
1381
1382     def _get_ytplayer_config(self, video_id, webpage):
1383         patterns = (
1384             # User data may contain arbitrary character sequences that may affect
1385             # JSON extraction with regex, e.g. when '};' is contained the second
1386             # regex won't capture the whole JSON. Yet working around by trying more
1387             # concrete regex first keeping in mind proper quoted string handling
1388             # to be implemented in future that will replace this workaround (see
1389             # https://github.com/ytdl-org/youtube-dl/issues/7468,
1390             # https://github.com/ytdl-org/youtube-dl/pull/7599)
1391             r';ytplayer\.config\s*=\s*({.+?});ytplayer',
1392             r';ytplayer\.config\s*=\s*({.+?});',
1393         )
1394         config = self._search_regex(
1395             patterns, webpage, 'ytplayer.config', default=None)
1396         if config:
1397             return self._parse_json(
1398                 uppercase_escape(config), video_id, fatal=False)
1399
1400     def _get_yt_initial_data(self, video_id, webpage):
1401         config = self._search_regex(
1402             (r'window\["ytInitialData"\]\s*=\s*(.*?)(?<=});',
1403              r'var\s+ytInitialData\s*=\s*(.*?)(?<=});'),
1404             webpage, 'ytInitialData', default=None)
1405         if config:
1406             return self._parse_json(
1407                 uppercase_escape(config), video_id, fatal=False)
1408
1409     def _get_music_metadata_from_yt_initial(self, yt_initial):
1410         music_metadata = []
1411         key_map = {
1412             'Album': 'album',
1413             'Artist': 'artist',
1414             'Song': 'track'
1415         }
1416         contents = try_get(yt_initial, lambda x: x['contents']['twoColumnWatchNextResults']['results']['results']['contents'])
1417         if type(contents) is list:
1418             for content in contents:
1419                 music_track = {}
1420                 if type(content) is not dict:
1421                     continue
1422                 videoSecondaryInfoRenderer = try_get(content, lambda x: x['videoSecondaryInfoRenderer'])
1423                 if type(videoSecondaryInfoRenderer) is not dict:
1424                     continue
1425                 rows = try_get(videoSecondaryInfoRenderer, lambda x: x['metadataRowContainer']['metadataRowContainerRenderer']['rows'])
1426                 if type(rows) is not list:
1427                     continue
1428                 for row in rows:
1429                     metadataRowRenderer = try_get(row, lambda x: x['metadataRowRenderer'])
1430                     if type(metadataRowRenderer) is not dict:
1431                         continue
1432                     key = try_get(metadataRowRenderer, lambda x: x['title']['simpleText'])
1433                     value = try_get(metadataRowRenderer, lambda x: x['contents'][0]['simpleText']) or \
1434                         try_get(metadataRowRenderer, lambda x: x['contents'][0]['runs'][0]['text'])
1435                     if type(key) is not str or type(value) is not str:
1436                         continue
1437                     if key in key_map:
1438                         if key_map[key] in music_track:
1439                             # we've started on a new track
1440                             music_metadata.append(music_track)
1441                             music_track = {}
1442                         music_track[key_map[key]] = value
1443                 if len(music_track.keys()):
1444                     music_metadata.append(music_track)
1445         return music_metadata
1446
1447     def _get_automatic_captions(self, video_id, webpage):
1448         """We need the webpage for getting the captions url, pass it as an
1449            argument to speed up the process."""
1450         self.to_screen('%s: Looking for automatic captions' % video_id)
1451         player_config = self._get_ytplayer_config(video_id, webpage)
1452         err_msg = 'Couldn\'t find automatic captions for %s' % video_id
1453         if not player_config:
1454             self._downloader.report_warning(err_msg)
1455             return {}
1456         try:
1457             args = player_config['args']
1458             caption_url = args.get('ttsurl')
1459             if caption_url:
1460                 timestamp = args['timestamp']
1461                 # We get the available subtitles
1462                 list_params = compat_urllib_parse_urlencode({
1463                     'type': 'list',
1464                     'tlangs': 1,
1465                     'asrs': 1,
1466                 })
1467                 list_url = caption_url + '&' + list_params
1468                 caption_list = self._download_xml(list_url, video_id)
1469                 original_lang_node = caption_list.find('track')
1470                 if original_lang_node is None:
1471                     self._downloader.report_warning('Video doesn\'t have automatic captions')
1472                     return {}
1473                 original_lang = original_lang_node.attrib['lang_code']
1474                 caption_kind = original_lang_node.attrib.get('kind', '')
1475
1476                 sub_lang_list = {}
1477                 for lang_node in caption_list.findall('target'):
1478                     sub_lang = lang_node.attrib['lang_code']
1479                     sub_formats = []
1480                     for ext in self._SUBTITLE_FORMATS:
1481                         params = compat_urllib_parse_urlencode({
1482                             'lang': original_lang,
1483                             'tlang': sub_lang,
1484                             'fmt': ext,
1485                             'ts': timestamp,
1486                             'kind': caption_kind,
1487                         })
1488                         sub_formats.append({
1489                             'url': caption_url + '&' + params,
1490                             'ext': ext,
1491                         })
1492                     sub_lang_list[sub_lang] = sub_formats
1493                 return sub_lang_list
1494
1495             def make_captions(sub_url, sub_langs):
1496                 parsed_sub_url = compat_urllib_parse_urlparse(sub_url)
1497                 caption_qs = compat_parse_qs(parsed_sub_url.query)
1498                 captions = {}
1499                 for sub_lang in sub_langs:
1500                     sub_formats = []
1501                     for ext in self._SUBTITLE_FORMATS:
1502                         caption_qs.update({
1503                             'tlang': [sub_lang],
1504                             'fmt': [ext],
1505                         })
1506                         sub_url = compat_urlparse.urlunparse(parsed_sub_url._replace(
1507                             query=compat_urllib_parse_urlencode(caption_qs, True)))
1508                         sub_formats.append({
1509                             'url': sub_url,
1510                             'ext': ext,
1511                         })
1512                     captions[sub_lang] = sub_formats
1513                 return captions
1514
1515             # New captions format as of 22.06.2017
1516             player_response = args.get('player_response')
1517             if player_response and isinstance(player_response, compat_str):
1518                 player_response = self._parse_json(
1519                     player_response, video_id, fatal=False)
1520                 if player_response:
1521                     renderer = player_response['captions']['playerCaptionsTracklistRenderer']
1522                     caption_tracks = renderer['captionTracks']
1523                     for caption_track in caption_tracks:
1524                         if 'kind' not in caption_track:
1525                             # not an automatic transcription
1526                             continue
1527                         base_url = caption_track['baseUrl']
1528                         sub_lang_list = []
1529                         for lang in renderer['translationLanguages']:
1530                             lang_code = lang.get('languageCode')
1531                             if lang_code:
1532                                 sub_lang_list.append(lang_code)
1533                         return make_captions(base_url, sub_lang_list)
1534
1535                     self._downloader.report_warning("Couldn't find automatic captions for %s" % video_id)
1536                     return {}
1537             # Some videos don't provide ttsurl but rather caption_tracks and
1538             # caption_translation_languages (e.g. 20LmZk1hakA)
1539             # Does not used anymore as of 22.06.2017
1540             caption_tracks = args['caption_tracks']
1541             caption_translation_languages = args['caption_translation_languages']
1542             caption_url = compat_parse_qs(caption_tracks.split(',')[0])['u'][0]
1543             sub_lang_list = []
1544             for lang in caption_translation_languages.split(','):
1545                 lang_qs = compat_parse_qs(compat_urllib_parse_unquote_plus(lang))
1546                 sub_lang = lang_qs.get('lc', [None])[0]
1547                 if sub_lang:
1548                     sub_lang_list.append(sub_lang)
1549             return make_captions(caption_url, sub_lang_list)
1550         # An extractor error can be raise by the download process if there are
1551         # no automatic captions but there are subtitles
1552         except (KeyError, IndexError, ExtractorError):
1553             self._downloader.report_warning(err_msg)
1554             return {}
1555
1556     def _mark_watched(self, video_id, video_info, player_response):
1557         playback_url = url_or_none(try_get(
1558             player_response,
1559             lambda x: x['playbackTracking']['videostatsPlaybackUrl']['baseUrl']) or try_get(
1560             video_info, lambda x: x['videostats_playback_base_url'][0]))
1561         if not playback_url:
1562             return
1563         parsed_playback_url = compat_urlparse.urlparse(playback_url)
1564         qs = compat_urlparse.parse_qs(parsed_playback_url.query)
1565
1566         # cpn generation algorithm is reverse engineered from base.js.
1567         # In fact it works even with dummy cpn.
1568         CPN_ALPHABET = 'abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789-_'
1569         cpn = ''.join((CPN_ALPHABET[random.randint(0, 256) & 63] for _ in range(0, 16)))
1570
1571         qs.update({
1572             'ver': ['2'],
1573             'cpn': [cpn],
1574         })
1575         playback_url = compat_urlparse.urlunparse(
1576             parsed_playback_url._replace(query=compat_urllib_parse_urlencode(qs, True)))
1577
1578         self._download_webpage(
1579             playback_url, video_id, 'Marking watched',
1580             'Unable to mark watched', fatal=False)
1581
1582     @staticmethod
1583     def _extract_urls(webpage):
1584         # Embedded YouTube player
1585         entries = [
1586             unescapeHTML(mobj.group('url'))
1587             for mobj in re.finditer(r'''(?x)
1588             (?:
1589                 <iframe[^>]+?src=|
1590                 data-video-url=|
1591                 <embed[^>]+?src=|
1592                 embedSWF\(?:\s*|
1593                 <object[^>]+data=|
1594                 new\s+SWFObject\(
1595             )
1596             (["\'])
1597                 (?P<url>(?:https?:)?//(?:www\.)?youtube(?:-nocookie)?\.com/
1598                 (?:embed|v|p)/[0-9A-Za-z_-]{11}.*?)
1599             \1''', webpage)]
1600
1601         # lazyYT YouTube embed
1602         entries.extend(list(map(
1603             unescapeHTML,
1604             re.findall(r'class="lazyYT" data-youtube-id="([^"]+)"', webpage))))
1605
1606         # Wordpress "YouTube Video Importer" plugin
1607         matches = re.findall(r'''(?x)<div[^>]+
1608             class=(?P<q1>[\'"])[^\'"]*\byvii_single_video_player\b[^\'"]*(?P=q1)[^>]+
1609             data-video_id=(?P<q2>[\'"])([^\'"]+)(?P=q2)''', webpage)
1610         entries.extend(m[-1] for m in matches)
1611
1612         return entries
1613
1614     @staticmethod
1615     def _extract_url(webpage):
1616         urls = YoutubeIE._extract_urls(webpage)
1617         return urls[0] if urls else None
1618
1619     @classmethod
1620     def extract_id(cls, url):
1621         mobj = re.match(cls._VALID_URL, url, re.VERBOSE)
1622         if mobj is None:
1623             raise ExtractorError('Invalid URL: %s' % url)
1624         video_id = mobj.group(2)
1625         return video_id
1626
1627     def _extract_chapters_from_json(self, webpage, video_id, duration):
1628         if not webpage:
1629             return
1630         initial_data = self._parse_json(
1631             self._search_regex(
1632                 r'window\["ytInitialData"\] = (.+);\n', webpage,
1633                 'player args', default='{}'),
1634             video_id, fatal=False)
1635         if not initial_data or not isinstance(initial_data, dict):
1636             return
1637         chapters_list = try_get(
1638             initial_data,
1639             lambda x: x['playerOverlays']
1640                        ['playerOverlayRenderer']
1641                        ['decoratedPlayerBarRenderer']
1642                        ['decoratedPlayerBarRenderer']
1643                        ['playerBar']
1644                        ['chapteredPlayerBarRenderer']
1645                        ['chapters'],
1646             list)
1647         if not chapters_list:
1648             return
1649
1650         def chapter_time(chapter):
1651             return float_or_none(
1652                 try_get(
1653                     chapter,
1654                     lambda x: x['chapterRenderer']['timeRangeStartMillis'],
1655                     int),
1656                 scale=1000)
1657         chapters = []
1658         for next_num, chapter in enumerate(chapters_list, start=1):
1659             start_time = chapter_time(chapter)
1660             if start_time is None:
1661                 continue
1662             end_time = (chapter_time(chapters_list[next_num])
1663                         if next_num < len(chapters_list) else duration)
1664             if end_time is None:
1665                 continue
1666             title = try_get(
1667                 chapter, lambda x: x['chapterRenderer']['title']['simpleText'],
1668                 compat_str)
1669             chapters.append({
1670                 'start_time': start_time,
1671                 'end_time': end_time,
1672                 'title': title,
1673             })
1674         return chapters
1675
1676     @staticmethod
1677     def _extract_chapters_from_description(description, duration):
1678         if not description:
1679             return None
1680         chapter_lines = re.findall(
1681             r'(?:^|<br\s*/>)([^<]*<a[^>]+onclick=["\']yt\.www\.watch\.player\.seekTo[^>]+>(\d{1,2}:\d{1,2}(?::\d{1,2})?)</a>[^>]*)(?=$|<br\s*/>)',
1682             description)
1683         if not chapter_lines:
1684             return None
1685         chapters = []
1686         for next_num, (chapter_line, time_point) in enumerate(
1687                 chapter_lines, start=1):
1688             start_time = parse_duration(time_point)
1689             if start_time is None:
1690                 continue
1691             if start_time > duration:
1692                 break
1693             end_time = (duration if next_num == len(chapter_lines)
1694                         else parse_duration(chapter_lines[next_num][1]))
1695             if end_time is None:
1696                 continue
1697             if end_time > duration:
1698                 end_time = duration
1699             if start_time > end_time:
1700                 break
1701             chapter_title = re.sub(
1702                 r'<a[^>]+>[^<]+</a>', '', chapter_line).strip(' \t-')
1703             chapter_title = re.sub(r'\s+', ' ', chapter_title)
1704             chapters.append({
1705                 'start_time': start_time,
1706                 'end_time': end_time,
1707                 'title': chapter_title,
1708             })
1709         return chapters
1710
1711     def _extract_chapters(self, webpage, description, video_id, duration):
1712         return (self._extract_chapters_from_json(webpage, video_id, duration)
1713                 or self._extract_chapters_from_description(description, duration))
1714
1715     def _real_extract(self, url):
1716         url, smuggled_data = unsmuggle_url(url, {})
1717
1718         proto = (
1719             'http' if self._downloader.params.get('prefer_insecure', False)
1720             else 'https')
1721
1722         start_time = None
1723         end_time = None
1724         parsed_url = compat_urllib_parse_urlparse(url)
1725         for component in [parsed_url.fragment, parsed_url.query]:
1726             query = compat_parse_qs(component)
1727             if start_time is None and 't' in query:
1728                 start_time = parse_duration(query['t'][0])
1729             if start_time is None and 'start' in query:
1730                 start_time = parse_duration(query['start'][0])
1731             if end_time is None and 'end' in query:
1732                 end_time = parse_duration(query['end'][0])
1733
1734         # Extract original video URL from URL with redirection, like age verification, using next_url parameter
1735         mobj = re.search(self._NEXT_URL_RE, url)
1736         if mobj:
1737             url = proto + '://www.youtube.com/' + compat_urllib_parse_unquote(mobj.group(1)).lstrip('/')
1738         video_id = self.extract_id(url)
1739
1740         # Get video webpage
1741         url = proto + '://www.youtube.com/watch?v=%s&gl=US&hl=en&has_verified=1&bpctr=9999999999' % video_id
1742         video_webpage, urlh = self._download_webpage_handle(url, video_id)
1743
1744         qs = compat_parse_qs(compat_urllib_parse_urlparse(urlh.geturl()).query)
1745         video_id = qs.get('v', [None])[0] or video_id
1746
1747         # Attempt to extract SWF player URL
1748         mobj = re.search(r'swfConfig.*?"(https?:\\/\\/.*?watch.*?-.*?\.swf)"', video_webpage)
1749         if mobj is not None:
1750             player_url = re.sub(r'\\(.)', r'\1', mobj.group(1))
1751         else:
1752             player_url = None
1753
1754         dash_mpds = []
1755
1756         def add_dash_mpd(video_info):
1757             dash_mpd = video_info.get('dashmpd')
1758             if dash_mpd and dash_mpd[0] not in dash_mpds:
1759                 dash_mpds.append(dash_mpd[0])
1760
1761         def add_dash_mpd_pr(pl_response):
1762             dash_mpd = url_or_none(try_get(
1763                 pl_response, lambda x: x['streamingData']['dashManifestUrl'],
1764                 compat_str))
1765             if dash_mpd and dash_mpd not in dash_mpds:
1766                 dash_mpds.append(dash_mpd)
1767
1768         is_live = None
1769         view_count = None
1770
1771         def extract_view_count(v_info):
1772             return int_or_none(try_get(v_info, lambda x: x['view_count'][0]))
1773
1774         def extract_player_response(player_response, video_id):
1775             pl_response = str_or_none(player_response)
1776             if not pl_response:
1777                 return
1778             pl_response = self._parse_json(pl_response, video_id, fatal=False)
1779             if isinstance(pl_response, dict):
1780                 add_dash_mpd_pr(pl_response)
1781                 return pl_response
1782
1783         def extract_embedded_config(embed_webpage, video_id):
1784             embedded_config = self._search_regex(
1785                 r'setConfig\(({.*})\);',
1786                 embed_webpage, 'ytInitialData', default=None)
1787             if embedded_config:
1788                 return embedded_config
1789
1790         player_response = {}
1791
1792         # Get video info
1793         video_info = {}
1794         embed_webpage = None
1795         if (self._og_search_property('restrictions:age', video_webpage, default=None) == '18+'
1796                 or re.search(r'player-age-gate-content">', video_webpage) is not None):
1797             cookie_keys = self._get_cookies('https://www.youtube.com').keys()
1798             age_gate = True
1799             # We simulate the access to the video from www.youtube.com/v/{video_id}
1800             # this can be viewed without login into Youtube
1801             url = proto + '://www.youtube.com/embed/%s' % video_id
1802             embed_webpage = self._download_webpage(url, video_id, 'Downloading embed webpage')
1803             ext = extract_embedded_config(embed_webpage, video_id)
1804             # playabilityStatus = re.search(r'{\\\"status\\\":\\\"(?P<playabilityStatus>[^\"]+)\\\"', ext)
1805             playable_in_embed = re.search(r'{\\\"playableInEmbed\\\":(?P<playableinEmbed>[^\,]+)', ext)
1806             if not playable_in_embed:
1807                 self.to_screen('Could not determine whether playabale in embed for video %s' % video_id)
1808                 playable_in_embed = ''
1809             else:
1810                 playable_in_embed = playable_in_embed.group('playableinEmbed')
1811             # check if video is only playable on youtube in other words not playable in embed - if so it requires auth (cookies)
1812             # if re.search(r'player-unavailable">', embed_webpage) is not None:
1813             if playable_in_embed == 'false':
1814                 '''
1815                 # TODO apply this patch when Support for Python 2.6(!) and above drops
1816                 if ({'VISITOR_INFO1_LIVE', 'HSID', 'SSID', 'SID'} <= cookie_keys
1817                         or {'VISITOR_INFO1_LIVE', '__Secure-3PSID', 'LOGIN_INFO'} <= cookie_keys):
1818                 '''
1819                 if (set(('VISITOR_INFO1_LIVE', 'HSID', 'SSID', 'SID')) <= set(cookie_keys)
1820                         or set(('VISITOR_INFO1_LIVE', '__Secure-3PSID', 'LOGIN_INFO')) <= set(cookie_keys)):
1821                     age_gate = False
1822                     # Try looking directly into the video webpage
1823                     ytplayer_config = self._get_ytplayer_config(video_id, video_webpage)
1824                     if ytplayer_config:
1825                         args = ytplayer_config['args']
1826                         if args.get('url_encoded_fmt_stream_map') or args.get('hlsvp'):
1827                             # Convert to the same format returned by compat_parse_qs
1828                             video_info = dict((k, [v]) for k, v in args.items())
1829                             add_dash_mpd(video_info)
1830                         # Rental video is not rented but preview is available (e.g.
1831                         # https://www.youtube.com/watch?v=yYr8q0y5Jfg,
1832                         # https://github.com/ytdl-org/youtube-dl/issues/10532)
1833                         if not video_info and args.get('ypc_vid'):
1834                             return self.url_result(
1835                                 args['ypc_vid'], YoutubeIE.ie_key(), video_id=args['ypc_vid'])
1836                         if args.get('livestream') == '1' or args.get('live_playback') == 1:
1837                             is_live = True
1838                         if not player_response:
1839                             player_response = extract_player_response(args.get('player_response'), video_id)
1840                     if not video_info or self._downloader.params.get('youtube_include_dash_manifest', True):
1841                         add_dash_mpd_pr(player_response)
1842                 else:
1843                     raise ExtractorError('Video is age restricted and only playable on Youtube. Requires cookies!', expected=True)
1844             else:
1845                 data = compat_urllib_parse_urlencode({
1846                     'video_id': video_id,
1847                     'eurl': 'https://youtube.googleapis.com/v/' + video_id,
1848                     'sts': self._search_regex(
1849                         r'"sts"\s*:\s*(\d+)', embed_webpage, 'sts', default=''),
1850                 })
1851                 video_info_url = proto + '://www.youtube.com/get_video_info?' + data
1852                 try:
1853                     video_info_webpage = self._download_webpage(
1854                         video_info_url, video_id,
1855                         note='Refetching age-gated info webpage',
1856                         errnote='unable to download video info webpage')
1857                 except ExtractorError:
1858                     video_info_webpage = None
1859                 if video_info_webpage:
1860                     video_info = compat_parse_qs(video_info_webpage)
1861                     pl_response = video_info.get('player_response', [None])[0]
1862                     player_response = extract_player_response(pl_response, video_id)
1863                     add_dash_mpd(video_info)
1864                     view_count = extract_view_count(video_info)
1865         else:
1866             age_gate = False
1867             # Try looking directly into the video webpage
1868             ytplayer_config = self._get_ytplayer_config(video_id, video_webpage)
1869             if ytplayer_config:
1870                 args = ytplayer_config['args']
1871                 if args.get('url_encoded_fmt_stream_map') or args.get('hlsvp'):
1872                     # Convert to the same format returned by compat_parse_qs
1873                     video_info = dict((k, [v]) for k, v in args.items())
1874                     add_dash_mpd(video_info)
1875                 # Rental video is not rented but preview is available (e.g.
1876                 # https://www.youtube.com/watch?v=yYr8q0y5Jfg,
1877                 # https://github.com/ytdl-org/youtube-dl/issues/10532)
1878                 if not video_info and args.get('ypc_vid'):
1879                     return self.url_result(
1880                         args['ypc_vid'], YoutubeIE.ie_key(), video_id=args['ypc_vid'])
1881                 if args.get('livestream') == '1' or args.get('live_playback') == 1:
1882                     is_live = True
1883                 if not player_response:
1884                     player_response = extract_player_response(args.get('player_response'), video_id)
1885             if not video_info or self._downloader.params.get('youtube_include_dash_manifest', True):
1886                 add_dash_mpd_pr(player_response)
1887
1888         def extract_unavailable_message():
1889             messages = []
1890             for tag, kind in (('h1', 'message'), ('div', 'submessage')):
1891                 msg = self._html_search_regex(
1892                     r'(?s)<{tag}[^>]+id=["\']unavailable-{kind}["\'][^>]*>(.+?)</{tag}>'.format(tag=tag, kind=kind),
1893                     video_webpage, 'unavailable %s' % kind, default=None)
1894                 if msg:
1895                     messages.append(msg)
1896             if messages:
1897                 return '\n'.join(messages)
1898
1899         if not video_info and not player_response:
1900             unavailable_message = extract_unavailable_message()
1901             if not unavailable_message:
1902                 unavailable_message = 'Unable to extract video data'
1903             raise ExtractorError(
1904                 'YouTube said: %s' % unavailable_message, expected=True, video_id=video_id)
1905
1906         if not isinstance(video_info, dict):
1907             video_info = {}
1908
1909         video_details = try_get(
1910             player_response, lambda x: x['videoDetails'], dict) or {}
1911
1912         microformat = try_get(
1913             player_response, lambda x: x['microformat']['playerMicroformatRenderer'], dict) or {}
1914
1915         video_title = video_info.get('title', [None])[0] or video_details.get('title')
1916         if not video_title:
1917             self._downloader.report_warning('Unable to extract video title')
1918             video_title = '_'
1919
1920         description_original = video_description = get_element_by_id("eow-description", video_webpage)
1921         if video_description:
1922
1923             def replace_url(m):
1924                 redir_url = compat_urlparse.urljoin(url, m.group(1))
1925                 parsed_redir_url = compat_urllib_parse_urlparse(redir_url)
1926                 if re.search(r'^(?:www\.)?(?:youtube(?:-nocookie)?\.com|youtu\.be)$', parsed_redir_url.netloc) and parsed_redir_url.path == '/redirect':
1927                     qs = compat_parse_qs(parsed_redir_url.query)
1928                     q = qs.get('q')
1929                     if q and q[0]:
1930                         return q[0]
1931                 return redir_url
1932
1933             description_original = video_description = re.sub(r'''(?x)
1934                 <a\s+
1935                     (?:[a-zA-Z-]+="[^"]*"\s+)*?
1936                     (?:title|href)="([^"]+)"\s+
1937                     (?:[a-zA-Z-]+="[^"]*"\s+)*?
1938                     class="[^"]*"[^>]*>
1939                 [^<]+\.{3}\s*
1940                 </a>
1941             ''', replace_url, video_description)
1942             video_description = clean_html(video_description)
1943         else:
1944             video_description = video_details.get('shortDescription')
1945             if video_description is None:
1946                 video_description = self._html_search_meta('description', video_webpage)
1947
1948         if not smuggled_data.get('force_singlefeed', False):
1949             if not self._downloader.params.get('noplaylist'):
1950                 multifeed_metadata_list = try_get(
1951                     player_response,
1952                     lambda x: x['multicamera']['playerLegacyMulticameraRenderer']['metadataList'],
1953                     compat_str) or try_get(
1954                     video_info, lambda x: x['multifeed_metadata_list'][0], compat_str)
1955                 if multifeed_metadata_list:
1956                     entries = []
1957                     feed_ids = []
1958                     for feed in multifeed_metadata_list.split(','):
1959                         # Unquote should take place before split on comma (,) since textual
1960                         # fields may contain comma as well (see
1961                         # https://github.com/ytdl-org/youtube-dl/issues/8536)
1962                         feed_data = compat_parse_qs(compat_urllib_parse_unquote_plus(feed))
1963
1964                         def feed_entry(name):
1965                             return try_get(feed_data, lambda x: x[name][0], compat_str)
1966
1967                         feed_id = feed_entry('id')
1968                         if not feed_id:
1969                             continue
1970                         feed_title = feed_entry('title')
1971                         title = video_title
1972                         if feed_title:
1973                             title += ' (%s)' % feed_title
1974                         entries.append({
1975                             '_type': 'url_transparent',
1976                             'ie_key': 'Youtube',
1977                             'url': smuggle_url(
1978                                 '%s://www.youtube.com/watch?v=%s' % (proto, feed_data['id'][0]),
1979                                 {'force_singlefeed': True}),
1980                             'title': title,
1981                         })
1982                         feed_ids.append(feed_id)
1983                     self.to_screen(
1984                         'Downloading multifeed video (%s) - add --no-playlist to just download video %s'
1985                         % (', '.join(feed_ids), video_id))
1986                     return self.playlist_result(entries, video_id, video_title, video_description)
1987             else:
1988                 self.to_screen('Downloading just video %s because of --no-playlist' % video_id)
1989
1990         if view_count is None:
1991             view_count = extract_view_count(video_info)
1992         if view_count is None and video_details:
1993             view_count = int_or_none(video_details.get('viewCount'))
1994         if view_count is None and microformat:
1995             view_count = int_or_none(microformat.get('viewCount'))
1996
1997         if is_live is None:
1998             is_live = bool_or_none(video_details.get('isLive'))
1999
2000         has_live_chat_replay = False
2001         if not is_live:
2002             yt_initial_data = self._get_yt_initial_data(video_id, video_webpage)
2003             try:
2004                 yt_initial_data['contents']['twoColumnWatchNextResults']['conversationBar']['liveChatRenderer']['continuations'][0]['reloadContinuationData']['continuation']
2005                 has_live_chat_replay = True
2006             except (KeyError, IndexError, TypeError):
2007                 pass
2008
2009         # Check for "rental" videos
2010         if 'ypc_video_rental_bar_text' in video_info and 'author' not in video_info:
2011             raise ExtractorError('"rental" videos not supported. See https://github.com/ytdl-org/youtube-dl/issues/359 for more information.', expected=True)
2012
2013         def _extract_filesize(media_url):
2014             return int_or_none(self._search_regex(
2015                 r'\bclen[=/](\d+)', media_url, 'filesize', default=None))
2016
2017         streaming_formats = try_get(player_response, lambda x: x['streamingData']['formats'], list) or []
2018         streaming_formats.extend(try_get(player_response, lambda x: x['streamingData']['adaptiveFormats'], list) or [])
2019
2020         if 'conn' in video_info and video_info['conn'][0].startswith('rtmp'):
2021             self.report_rtmp_download()
2022             formats = [{
2023                 'format_id': '_rtmp',
2024                 'protocol': 'rtmp',
2025                 'url': video_info['conn'][0],
2026                 'player_url': player_url,
2027             }]
2028         elif not is_live and (streaming_formats or len(video_info.get('url_encoded_fmt_stream_map', [''])[0]) >= 1 or len(video_info.get('adaptive_fmts', [''])[0]) >= 1):
2029             encoded_url_map = video_info.get('url_encoded_fmt_stream_map', [''])[0] + ',' + video_info.get('adaptive_fmts', [''])[0]
2030             if 'rtmpe%3Dyes' in encoded_url_map:
2031                 raise ExtractorError('rtmpe downloads are not supported, see https://github.com/ytdl-org/youtube-dl/issues/343 for more information.', expected=True)
2032             formats = []
2033             formats_spec = {}
2034             fmt_list = video_info.get('fmt_list', [''])[0]
2035             if fmt_list:
2036                 for fmt in fmt_list.split(','):
2037                     spec = fmt.split('/')
2038                     if len(spec) > 1:
2039                         width_height = spec[1].split('x')
2040                         if len(width_height) == 2:
2041                             formats_spec[spec[0]] = {
2042                                 'resolution': spec[1],
2043                                 'width': int_or_none(width_height[0]),
2044                                 'height': int_or_none(width_height[1]),
2045                             }
2046             for fmt in streaming_formats:
2047                 itag = str_or_none(fmt.get('itag'))
2048                 if not itag:
2049                     continue
2050                 quality = fmt.get('quality')
2051                 quality_label = fmt.get('qualityLabel') or quality
2052                 formats_spec[itag] = {
2053                     'asr': int_or_none(fmt.get('audioSampleRate')),
2054                     'filesize': int_or_none(fmt.get('contentLength')),
2055                     'format_note': quality_label,
2056                     'fps': int_or_none(fmt.get('fps')),
2057                     'height': int_or_none(fmt.get('height')),
2058                     # bitrate for itag 43 is always 2147483647
2059                     'tbr': float_or_none(fmt.get('averageBitrate') or fmt.get('bitrate'), 1000) if itag != '43' else None,
2060                     'width': int_or_none(fmt.get('width')),
2061                 }
2062
2063             for fmt in streaming_formats:
2064                 if fmt.get('drmFamilies') or fmt.get('drm_families'):
2065                     continue
2066                 url = url_or_none(fmt.get('url'))
2067
2068                 if not url:
2069                     cipher = fmt.get('cipher') or fmt.get('signatureCipher')
2070                     if not cipher:
2071                         continue
2072                     url_data = compat_parse_qs(cipher)
2073                     url = url_or_none(try_get(url_data, lambda x: x['url'][0], compat_str))
2074                     if not url:
2075                         continue
2076                 else:
2077                     cipher = None
2078                     url_data = compat_parse_qs(compat_urllib_parse_urlparse(url).query)
2079
2080                 stream_type = int_or_none(try_get(url_data, lambda x: x['stream_type'][0]))
2081                 # Unsupported FORMAT_STREAM_TYPE_OTF
2082                 if stream_type == 3:
2083                     continue
2084
2085                 format_id = fmt.get('itag') or url_data['itag'][0]
2086                 if not format_id:
2087                     continue
2088                 format_id = compat_str(format_id)
2089
2090                 if cipher:
2091                     if 's' in url_data or self._downloader.params.get('youtube_include_dash_manifest', True):
2092                         ASSETS_RE = r'(?:"assets":.+?"js":\s*("[^"]+"))|(?:"jsUrl":\s*("[^"]+"))'
2093                         jsplayer_url_json = self._search_regex(
2094                             ASSETS_RE,
2095                             embed_webpage if age_gate else video_webpage,
2096                             'JS player URL (1)', default=None)
2097                         if not jsplayer_url_json and not age_gate:
2098                             # We need the embed website after all
2099                             if embed_webpage is None:
2100                                 embed_url = proto + '://www.youtube.com/embed/%s' % video_id
2101                                 embed_webpage = self._download_webpage(
2102                                     embed_url, video_id, 'Downloading embed webpage')
2103                             jsplayer_url_json = self._search_regex(
2104                                 ASSETS_RE, embed_webpage, 'JS player URL')
2105
2106                         player_url = json.loads(jsplayer_url_json)
2107                         if player_url is None:
2108                             player_url_json = self._search_regex(
2109                                 r'ytplayer\.config.*?"url"\s*:\s*("[^"]+")',
2110                                 video_webpage, 'age gate player URL')
2111                             player_url = json.loads(player_url_json)
2112
2113                     if 'sig' in url_data:
2114                         url += '&signature=' + url_data['sig'][0]
2115                     elif 's' in url_data:
2116                         encrypted_sig = url_data['s'][0]
2117
2118                         if self._downloader.params.get('verbose'):
2119                             if player_url is None:
2120                                 player_desc = 'unknown'
2121                             else:
2122                                 player_type, player_version = self._extract_player_info(player_url)
2123                                 player_desc = '%s player %s' % ('flash' if player_type == 'swf' else 'html5', player_version)
2124                             parts_sizes = self._signature_cache_id(encrypted_sig)
2125                             self.to_screen('{%s} signature length %s, %s' %
2126                                            (format_id, parts_sizes, player_desc))
2127
2128                         signature = self._decrypt_signature(
2129                             encrypted_sig, video_id, player_url, age_gate)
2130                         sp = try_get(url_data, lambda x: x['sp'][0], compat_str) or 'signature'
2131                         url += '&%s=%s' % (sp, signature)
2132                 if 'ratebypass' not in url:
2133                     url += '&ratebypass=yes'
2134
2135                 dct = {
2136                     'format_id': format_id,
2137                     'url': url,
2138                     'player_url': player_url,
2139                 }
2140                 if format_id in self._formats:
2141                     dct.update(self._formats[format_id])
2142                 if format_id in formats_spec:
2143                     dct.update(formats_spec[format_id])
2144
2145                 # Some itags are not included in DASH manifest thus corresponding formats will
2146                 # lack metadata (see https://github.com/ytdl-org/youtube-dl/pull/5993).
2147                 # Trying to extract metadata from url_encoded_fmt_stream_map entry.
2148                 mobj = re.search(r'^(?P<width>\d+)[xX](?P<height>\d+)$', url_data.get('size', [''])[0])
2149                 width, height = (int(mobj.group('width')), int(mobj.group('height'))) if mobj else (None, None)
2150
2151                 if width is None:
2152                     width = int_or_none(fmt.get('width'))
2153                 if height is None:
2154                     height = int_or_none(fmt.get('height'))
2155
2156                 filesize = int_or_none(url_data.get(
2157                     'clen', [None])[0]) or _extract_filesize(url)
2158
2159                 quality = url_data.get('quality', [None])[0] or fmt.get('quality')
2160                 quality_label = url_data.get('quality_label', [None])[0] or fmt.get('qualityLabel')
2161
2162                 tbr = (float_or_none(url_data.get('bitrate', [None])[0], 1000)
2163                        or float_or_none(fmt.get('bitrate'), 1000)) if format_id != '43' else None
2164                 fps = int_or_none(url_data.get('fps', [None])[0]) or int_or_none(fmt.get('fps'))
2165
2166                 more_fields = {
2167                     'filesize': filesize,
2168                     'tbr': tbr,
2169                     'width': width,
2170                     'height': height,
2171                     'fps': fps,
2172                     'format_note': quality_label or quality,
2173                 }
2174                 for key, value in more_fields.items():
2175                     if value:
2176                         dct[key] = value
2177                 type_ = url_data.get('type', [None])[0] or fmt.get('mimeType')
2178                 if type_:
2179                     type_split = type_.split(';')
2180                     kind_ext = type_split[0].split('/')
2181                     if len(kind_ext) == 2:
2182                         kind, _ = kind_ext
2183                         dct['ext'] = mimetype2ext(type_split[0])
2184                         if kind in ('audio', 'video'):
2185                             codecs = None
2186                             for mobj in re.finditer(
2187                                     r'(?P<key>[a-zA-Z_-]+)=(?P<quote>["\']?)(?P<val>.+?)(?P=quote)(?:;|$)', type_):
2188                                 if mobj.group('key') == 'codecs':
2189                                     codecs = mobj.group('val')
2190                                     break
2191                             if codecs:
2192                                 dct.update(parse_codecs(codecs))
2193                 if dct.get('acodec') == 'none' or dct.get('vcodec') == 'none':
2194                     dct['downloader_options'] = {
2195                         # Youtube throttles chunks >~10M
2196                         'http_chunk_size': 10485760,
2197                     }
2198                 formats.append(dct)
2199         else:
2200             manifest_url = (
2201                 url_or_none(try_get(
2202                     player_response,
2203                     lambda x: x['streamingData']['hlsManifestUrl'],
2204                     compat_str))
2205                 or url_or_none(try_get(
2206                     video_info, lambda x: x['hlsvp'][0], compat_str)))
2207             if manifest_url:
2208                 formats = []
2209                 m3u8_formats = self._extract_m3u8_formats(
2210                     manifest_url, video_id, 'mp4', fatal=False)
2211                 for a_format in m3u8_formats:
2212                     itag = self._search_regex(
2213                         r'/itag/(\d+)/', a_format['url'], 'itag', default=None)
2214                     if itag:
2215                         a_format['format_id'] = itag
2216                         if itag in self._formats:
2217                             dct = self._formats[itag].copy()
2218                             dct.update(a_format)
2219                             a_format = dct
2220                     a_format['player_url'] = player_url
2221                     # Accept-Encoding header causes failures in live streams on Youtube and Youtube Gaming
2222                     a_format.setdefault('http_headers', {})['Youtubedl-no-compression'] = 'True'
2223                     if self._downloader.params.get('youtube_include_hls_manifest', True):
2224                         formats.append(a_format)
2225             else:
2226                 error_message = extract_unavailable_message()
2227                 if not error_message:
2228                     error_message = clean_html(try_get(
2229                         player_response, lambda x: x['playabilityStatus']['reason'],
2230                         compat_str))
2231                 if not error_message:
2232                     error_message = clean_html(
2233                         try_get(video_info, lambda x: x['reason'][0], compat_str))
2234                 if error_message:
2235                     raise ExtractorError(error_message, expected=True)
2236                 raise ExtractorError('no conn, hlsvp, hlsManifestUrl or url_encoded_fmt_stream_map information found in video info')
2237
2238         # uploader
2239         video_uploader = try_get(
2240             video_info, lambda x: x['author'][0],
2241             compat_str) or str_or_none(video_details.get('author'))
2242         if video_uploader:
2243             video_uploader = compat_urllib_parse_unquote_plus(video_uploader)
2244         else:
2245             self._downloader.report_warning('unable to extract uploader name')
2246
2247         # uploader_id
2248         video_uploader_id = None
2249         video_uploader_url = None
2250         mobj = re.search(
2251             r'<link itemprop="url" href="(?P<uploader_url>https?://www\.youtube\.com/(?:user|channel)/(?P<uploader_id>[^"]+))">',
2252             video_webpage)
2253         if mobj is not None:
2254             video_uploader_id = mobj.group('uploader_id')
2255             video_uploader_url = mobj.group('uploader_url')
2256         else:
2257             owner_profile_url = url_or_none(microformat.get('ownerProfileUrl'))
2258             if owner_profile_url:
2259                 video_uploader_id = self._search_regex(
2260                     r'(?:user|channel)/([^/]+)', owner_profile_url, 'uploader id',
2261                     default=None)
2262                 video_uploader_url = owner_profile_url
2263
2264         channel_id = (
2265             str_or_none(video_details.get('channelId'))
2266             or self._html_search_meta(
2267                 'channelId', video_webpage, 'channel id', default=None)
2268             or self._search_regex(
2269                 r'data-channel-external-id=(["\'])(?P<id>(?:(?!\1).)+)\1',
2270                 video_webpage, 'channel id', default=None, group='id'))
2271         channel_url = 'http://www.youtube.com/channel/%s' % channel_id if channel_id else None
2272
2273         thumbnails = []
2274         thumbnails_list = try_get(
2275             video_details, lambda x: x['thumbnail']['thumbnails'], list) or []
2276         for t in thumbnails_list:
2277             if not isinstance(t, dict):
2278                 continue
2279             thumbnail_url = url_or_none(t.get('url'))
2280             if not thumbnail_url:
2281                 continue
2282             thumbnails.append({
2283                 'url': thumbnail_url,
2284                 'width': int_or_none(t.get('width')),
2285                 'height': int_or_none(t.get('height')),
2286             })
2287
2288         if not thumbnails:
2289             video_thumbnail = None
2290             # We try first to get a high quality image:
2291             m_thumb = re.search(r'<span itemprop="thumbnail".*?href="(.*?)">',
2292                                 video_webpage, re.DOTALL)
2293             if m_thumb is not None:
2294                 video_thumbnail = m_thumb.group(1)
2295             thumbnail_url = try_get(video_info, lambda x: x['thumbnail_url'][0], compat_str)
2296             if thumbnail_url:
2297                 video_thumbnail = compat_urllib_parse_unquote_plus(thumbnail_url)
2298             if video_thumbnail:
2299                 thumbnails.append({'url': video_thumbnail})
2300
2301         # upload date
2302         upload_date = self._html_search_meta(
2303             'datePublished', video_webpage, 'upload date', default=None)
2304         if not upload_date:
2305             upload_date = self._search_regex(
2306                 [r'(?s)id="eow-date.*?>(.*?)</span>',
2307                  r'(?:id="watch-uploader-info".*?>.*?|["\']simpleText["\']\s*:\s*["\'])(?:Published|Uploaded|Streamed live|Started) on (.+?)[<"\']'],
2308                 video_webpage, 'upload date', default=None)
2309         if not upload_date:
2310             upload_date = microformat.get('publishDate') or microformat.get('uploadDate')
2311         upload_date = unified_strdate(upload_date)
2312
2313         video_license = self._html_search_regex(
2314             r'<h4[^>]+class="title"[^>]*>\s*License\s*</h4>\s*<ul[^>]*>\s*<li>(.+?)</li',
2315             video_webpage, 'license', default=None)
2316
2317         m_music = re.search(
2318             r'''(?x)
2319                 <h4[^>]+class="title"[^>]*>\s*Music\s*</h4>\s*
2320                 <ul[^>]*>\s*
2321                 <li>(?P<title>.+?)
2322                 by (?P<creator>.+?)
2323                 (?:
2324                     \(.+?\)|
2325                     <a[^>]*
2326                         (?:
2327                             \bhref=["\']/red[^>]*>|             # drop possible
2328                             >\s*Listen ad-free with YouTube Red # YouTube Red ad
2329                         )
2330                     .*?
2331                 )?</li
2332             ''',
2333             video_webpage)
2334         if m_music:
2335             video_alt_title = remove_quotes(unescapeHTML(m_music.group('title')))
2336             video_creator = clean_html(m_music.group('creator'))
2337         else:
2338             video_alt_title = video_creator = None
2339
2340         def extract_meta(field):
2341             return self._html_search_regex(
2342                 r'<h4[^>]+class="title"[^>]*>\s*%s\s*</h4>\s*<ul[^>]*>\s*<li>(.+?)</li>\s*' % field,
2343                 video_webpage, field, default=None)
2344
2345         track = extract_meta('Song')
2346         artist = extract_meta('Artist')
2347         album = extract_meta('Album')
2348
2349         # Youtube Music Auto-generated description
2350         release_date = release_year = None
2351         if video_description:
2352             mobj = re.search(r'(?s)Provided to YouTube by [^\n]+\n+(?P<track>[^·]+)·(?P<artist>[^\n]+)\n+(?P<album>[^\n]+)(?:.+?℗\s*(?P<release_year>\d{4})(?!\d))?(?:.+?Released on\s*:\s*(?P<release_date>\d{4}-\d{2}-\d{2}))?(.+?\nArtist\s*:\s*(?P<clean_artist>[^\n]+))?', video_description)
2353             if mobj:
2354                 if not track:
2355                     track = mobj.group('track').strip()
2356                 if not artist:
2357                     artist = mobj.group('clean_artist') or ', '.join(a.strip() for a in mobj.group('artist').split('·'))
2358                 if not album:
2359                     album = mobj.group('album'.strip())
2360                 release_year = mobj.group('release_year')
2361                 release_date = mobj.group('release_date')
2362                 if release_date:
2363                     release_date = release_date.replace('-', '')
2364                     if not release_year:
2365                         release_year = int(release_date[:4])
2366                 if release_year:
2367                     release_year = int(release_year)
2368
2369         yt_initial = self._get_yt_initial_data(video_id, video_webpage)
2370         if yt_initial:
2371             music_metadata = self._get_music_metadata_from_yt_initial(yt_initial)
2372             if len(music_metadata):
2373                 album = music_metadata[0].get('album')
2374                 artist = music_metadata[0].get('artist')
2375                 track = music_metadata[0].get('track')
2376
2377         m_episode = re.search(
2378             r'<div[^>]+id="watch7-headline"[^>]*>\s*<span[^>]*>.*?>(?P<series>[^<]+)</a></b>\s*S(?P<season>\d+)\s*•\s*E(?P<episode>\d+)</span>',
2379             video_webpage)
2380         if m_episode:
2381             series = unescapeHTML(m_episode.group('series'))
2382             season_number = int(m_episode.group('season'))
2383             episode_number = int(m_episode.group('episode'))
2384         else:
2385             series = season_number = episode_number = None
2386
2387         m_cat_container = self._search_regex(
2388             r'(?s)<h4[^>]*>\s*Category\s*</h4>\s*<ul[^>]*>(.*?)</ul>',
2389             video_webpage, 'categories', default=None)
2390         category = None
2391         if m_cat_container:
2392             category = self._html_search_regex(
2393                 r'(?s)<a[^<]+>(.*?)</a>', m_cat_container, 'category',
2394                 default=None)
2395         if not category:
2396             category = try_get(
2397                 microformat, lambda x: x['category'], compat_str)
2398         video_categories = None if category is None else [category]
2399
2400         video_tags = [
2401             unescapeHTML(m.group('content'))
2402             for m in re.finditer(self._meta_regex('og:video:tag'), video_webpage)]
2403         if not video_tags:
2404             video_tags = try_get(video_details, lambda x: x['keywords'], list)
2405
2406         def _extract_count(count_name):
2407             return str_to_int(self._search_regex(
2408                 r'"accessibilityData":\{"label":"([\d,\w]+) %ss"\}'
2409                 % re.escape(count_name),
2410                 video_webpage, count_name, default=None))
2411
2412         like_count = _extract_count('like')
2413         dislike_count = _extract_count('dislike')
2414
2415         if view_count is None:
2416             view_count = str_to_int(self._search_regex(
2417                 r'<[^>]+class=["\']watch-view-count[^>]+>\s*([\d,\s]+)', video_webpage,
2418                 'view count', default=None))
2419
2420         average_rating = (
2421             float_or_none(video_details.get('averageRating'))
2422             or try_get(video_info, lambda x: float_or_none(x['avg_rating'][0])))
2423
2424         # subtitles
2425         video_subtitles = self.extract_subtitles(
2426             video_id, video_webpage, has_live_chat_replay)
2427         automatic_captions = self.extract_automatic_captions(video_id, video_webpage)
2428
2429         video_duration = try_get(
2430             video_info, lambda x: int_or_none(x['length_seconds'][0]))
2431         if not video_duration:
2432             video_duration = int_or_none(video_details.get('lengthSeconds'))
2433         if not video_duration:
2434             video_duration = parse_duration(self._html_search_meta(
2435                 'duration', video_webpage, 'video duration'))
2436
2437         # Get Subscriber Count of channel
2438         subscriber_count = parse_count(self._search_regex(
2439             r'"text":"([\d\.]+\w?) subscribers"',
2440             video_webpage,
2441             'subscriber count',
2442             default=None
2443         ))
2444
2445         # annotations
2446         video_annotations = None
2447         if self._downloader.params.get('writeannotations', False):
2448             xsrf_token = self._search_regex(
2449                 r'([\'"])XSRF_TOKEN\1\s*:\s*([\'"])(?P<xsrf_token>[A-Za-z0-9+/=]+)\2',
2450                 video_webpage, 'xsrf token', group='xsrf_token', fatal=False)
2451             invideo_url = try_get(
2452                 player_response, lambda x: x['annotations'][0]['playerAnnotationsUrlsRenderer']['invideoUrl'], compat_str)
2453             if xsrf_token and invideo_url:
2454                 xsrf_field_name = self._search_regex(
2455                     r'([\'"])XSRF_FIELD_NAME\1\s*:\s*([\'"])(?P<xsrf_field_name>\w+)\2',
2456                     video_webpage, 'xsrf field name',
2457                     group='xsrf_field_name', default='session_token')
2458                 video_annotations = self._download_webpage(
2459                     self._proto_relative_url(invideo_url),
2460                     video_id, note='Downloading annotations',
2461                     errnote='Unable to download video annotations', fatal=False,
2462                     data=urlencode_postdata({xsrf_field_name: xsrf_token}))
2463
2464         chapters = self._extract_chapters(video_webpage, description_original, video_id, video_duration)
2465
2466         # Look for the DASH manifest
2467         if self._downloader.params.get('youtube_include_dash_manifest', True):
2468             dash_mpd_fatal = True
2469             for mpd_url in dash_mpds:
2470                 dash_formats = {}
2471                 try:
2472                     def decrypt_sig(mobj):
2473                         s = mobj.group(1)
2474                         dec_s = self._decrypt_signature(s, video_id, player_url, age_gate)
2475                         return '/signature/%s' % dec_s
2476
2477                     mpd_url = re.sub(r'/s/([a-fA-F0-9\.]+)', decrypt_sig, mpd_url)
2478
2479                     for df in self._extract_mpd_formats(
2480                             mpd_url, video_id, fatal=dash_mpd_fatal,
2481                             formats_dict=self._formats):
2482                         if not df.get('filesize'):
2483                             df['filesize'] = _extract_filesize(df['url'])
2484                         # Do not overwrite DASH format found in some previous DASH manifest
2485                         if df['format_id'] not in dash_formats:
2486                             dash_formats[df['format_id']] = df
2487                         # Additional DASH manifests may end up in HTTP Error 403 therefore
2488                         # allow them to fail without bug report message if we already have
2489                         # some DASH manifest succeeded. This is temporary workaround to reduce
2490                         # burst of bug reports until we figure out the reason and whether it
2491                         # can be fixed at all.
2492                         dash_mpd_fatal = False
2493                 except (ExtractorError, KeyError) as e:
2494                     self.report_warning(
2495                         'Skipping DASH manifest: %r' % e, video_id)
2496                 if dash_formats:
2497                     # Remove the formats we found through non-DASH, they
2498                     # contain less info and it can be wrong, because we use
2499                     # fixed values (for example the resolution). See
2500                     # https://github.com/ytdl-org/youtube-dl/issues/5774 for an
2501                     # example.
2502                     formats = [f for f in formats if f['format_id'] not in dash_formats.keys()]
2503                     formats.extend(dash_formats.values())
2504
2505         # Check for malformed aspect ratio
2506         stretched_m = re.search(
2507             r'<meta\s+property="og:video:tag".*?content="yt:stretch=(?P<w>[0-9]+):(?P<h>[0-9]+)">',
2508             video_webpage)
2509         if stretched_m:
2510             w = float(stretched_m.group('w'))
2511             h = float(stretched_m.group('h'))
2512             # yt:stretch may hold invalid ratio data (e.g. for Q39EVAstoRM ratio is 17:0).
2513             # We will only process correct ratios.
2514             if w > 0 and h > 0:
2515                 ratio = w / h
2516                 for f in formats:
2517                     if f.get('vcodec') != 'none':
2518                         f['stretched_ratio'] = ratio
2519
2520         if not formats:
2521             if 'reason' in video_info:
2522                 if 'The uploader has not made this video available in your country.' in video_info['reason']:
2523                     regions_allowed = self._html_search_meta(
2524                         'regionsAllowed', video_webpage, default=None)
2525                     countries = regions_allowed.split(',') if regions_allowed else None
2526                     self.raise_geo_restricted(
2527                         msg=video_info['reason'][0], countries=countries)
2528                 reason = video_info['reason'][0]
2529                 if 'Invalid parameters' in reason:
2530                     unavailable_message = extract_unavailable_message()
2531                     if unavailable_message:
2532                         reason = unavailable_message
2533                 raise ExtractorError(
2534                     'YouTube said: %s' % reason,
2535                     expected=True, video_id=video_id)
2536             if video_info.get('license_info') or try_get(player_response, lambda x: x['streamingData']['licenseInfos']):
2537                 raise ExtractorError('This video is DRM protected.', expected=True)
2538
2539         self._sort_formats(formats)
2540
2541         self.mark_watched(video_id, video_info, player_response)
2542
2543         return {
2544             'id': video_id,
2545             'uploader': video_uploader,
2546             'uploader_id': video_uploader_id,
2547             'uploader_url': video_uploader_url,
2548             'channel_id': channel_id,
2549             'channel_url': channel_url,
2550             'upload_date': upload_date,
2551             'license': video_license,
2552             'creator': video_creator or artist,
2553             'title': video_title,
2554             'alt_title': video_alt_title or track,
2555             'thumbnails': thumbnails,
2556             'description': video_description,
2557             'categories': video_categories,
2558             'tags': video_tags,
2559             'subtitles': video_subtitles,
2560             'automatic_captions': automatic_captions,
2561             'duration': video_duration,
2562             'age_limit': 18 if age_gate else 0,
2563             'annotations': video_annotations,
2564             'chapters': chapters,
2565             'webpage_url': proto + '://www.youtube.com/watch?v=%s' % video_id,
2566             'view_count': view_count,
2567             'like_count': like_count,
2568             'dislike_count': dislike_count,
2569             'average_rating': average_rating,
2570             'formats': formats,
2571             'is_live': is_live,
2572             'start_time': start_time,
2573             'end_time': end_time,
2574             'series': series,
2575             'season_number': season_number,
2576             'episode_number': episode_number,
2577             'track': track,
2578             'artist': artist,
2579             'album': album,
2580             'release_date': release_date,
2581             'release_year': release_year,
2582             'subscriber_count': subscriber_count,
2583         }
2584
2585
2586 class YoutubePlaylistIE(YoutubePlaylistBaseInfoExtractor):
2587     IE_DESC = 'YouTube.com playlists'
2588     _VALID_URL = r"""(?x)(?:
2589                         (?:https?://)?
2590                         (?:\w+\.)?
2591                         (?:
2592                             (?:
2593                                 youtube(?:kids)?\.com|
2594                                 invidio\.us
2595                             )
2596                             /
2597                             (?:
2598                                (?:course|view_play_list|my_playlists|artist|playlist|watch|embed/(?:videoseries|[0-9A-Za-z_-]{11}))
2599                                \? (?:.*?[&;])*? (?:p|a|list)=
2600                             |  p/
2601                             )|
2602                             youtu\.be/[0-9A-Za-z_-]{11}\?.*?\blist=
2603                         )
2604                         (
2605                             (?:PL|LL|EC|UU|FL|RD|UL|TL|PU|OLAK5uy_)?[0-9A-Za-z-_]{10,}
2606                             # Top tracks, they can also include dots
2607                             |(?:MC)[\w\.]*
2608                         )
2609                         .*
2610                      |
2611                         (%(playlist_id)s)
2612                      )""" % {'playlist_id': YoutubeBaseInfoExtractor._PLAYLIST_ID_RE}
2613     _TEMPLATE_URL = 'https://www.youtube.com/playlist?list=%s'
2614     _VIDEO_RE_TPL = r'href="\s*/watch\?v=%s(?:&amp;(?:[^"]*?index=(?P<index>\d+))?(?:[^>]+>(?P<title>[^<]+))?)?'
2615     _VIDEO_RE = _VIDEO_RE_TPL % r'(?P<id>[0-9A-Za-z_-]{11})'
2616     IE_NAME = 'youtube:playlist'
2617     _TESTS = [{
2618         'url': 'https://www.youtube.com/playlist?list=PL4lCao7KL_QFVb7Iudeipvc2BCavECqzc',
2619         'info_dict': {
2620             'uploader_id': 'UCmlqkdCBesrv2Lak1mF_MxA',
2621             'uploader': 'Sergey M.',
2622             'id': 'PL4lCao7KL_QFVb7Iudeipvc2BCavECqzc',
2623             'title': 'youtube-dl public playlist',
2624         },
2625         'playlist_count': 1,
2626     }, {
2627         'url': 'https://www.youtube.com/playlist?list=PL4lCao7KL_QFodcLWhDpGCYnngnHtQ-Xf',
2628         'info_dict': {
2629             'uploader_id': 'UCmlqkdCBesrv2Lak1mF_MxA',
2630             'uploader': 'Sergey M.',
2631             'id': 'PL4lCao7KL_QFodcLWhDpGCYnngnHtQ-Xf',
2632             'title': 'youtube-dl empty playlist',
2633         },
2634         'playlist_count': 0,
2635     }, {
2636         'note': 'Playlist with deleted videos (#651). As a bonus, the video #51 is also twice in this list.',
2637         'url': 'https://www.youtube.com/playlist?list=PLwP_SiAcdui0KVebT0mU9Apz359a4ubsC',
2638         'info_dict': {
2639             'title': '29C3: Not my department',
2640             'id': 'PLwP_SiAcdui0KVebT0mU9Apz359a4ubsC',
2641             'uploader': 'Christiaan008',
2642             'uploader_id': 'ChRiStIaAn008',
2643         },
2644         'playlist_count': 96,
2645     }, {
2646         'note': 'issue #673',
2647         'url': 'PLBB231211A4F62143',
2648         'info_dict': {
2649             'title': '[OLD]Team Fortress 2 (Class-based LP)',
2650             'id': 'PLBB231211A4F62143',
2651             'uploader': 'Wickydoo',
2652             'uploader_id': 'Wickydoo',
2653         },
2654         'playlist_mincount': 26,
2655     }, {
2656         'note': 'Large playlist',
2657         'url': 'https://www.youtube.com/playlist?list=UUBABnxM4Ar9ten8Mdjj1j0Q',
2658         'info_dict': {
2659             'title': 'Uploads from Cauchemar',
2660             'id': 'UUBABnxM4Ar9ten8Mdjj1j0Q',
2661             'uploader': 'Cauchemar',
2662             'uploader_id': 'Cauchemar89',
2663         },
2664         'playlist_mincount': 799,
2665     }, {
2666         'url': 'PLtPgu7CB4gbY9oDN3drwC3cMbJggS7dKl',
2667         'info_dict': {
2668             'title': 'YDL_safe_search',
2669             'id': 'PLtPgu7CB4gbY9oDN3drwC3cMbJggS7dKl',
2670         },
2671         'playlist_count': 2,
2672         'skip': 'This playlist is private',
2673     }, {
2674         'note': 'embedded',
2675         'url': 'https://www.youtube.com/embed/videoseries?list=PL6IaIsEjSbf96XFRuNccS_RuEXwNdsoEu',
2676         'playlist_count': 4,
2677         'info_dict': {
2678             'title': 'JODA15',
2679             'id': 'PL6IaIsEjSbf96XFRuNccS_RuEXwNdsoEu',
2680             'uploader': 'milan',
2681             'uploader_id': 'UCEI1-PVPcYXjB73Hfelbmaw',
2682         }
2683     }, {
2684         'url': 'http://www.youtube.com/embed/_xDOZElKyNU?list=PLsyOSbh5bs16vubvKePAQ1x3PhKavfBIl',
2685         'playlist_mincount': 485,
2686         'info_dict': {
2687             'title': '2018 Chinese New Singles (11/6 updated)',
2688             'id': 'PLsyOSbh5bs16vubvKePAQ1x3PhKavfBIl',
2689             'uploader': 'LBK',
2690             'uploader_id': 'sdragonfang',
2691         }
2692     }, {
2693         'note': 'Embedded SWF player',
2694         'url': 'https://www.youtube.com/p/YN5VISEtHet5D4NEvfTd0zcgFk84NqFZ?hl=en_US&fs=1&rel=0',
2695         'playlist_count': 4,
2696         'info_dict': {
2697             'title': 'JODA7',
2698             'id': 'YN5VISEtHet5D4NEvfTd0zcgFk84NqFZ',
2699         },
2700         'skip': 'This playlist does not exist',
2701     }, {
2702         'note': 'Buggy playlist: the webpage has a "Load more" button but it doesn\'t have more videos',
2703         'url': 'https://www.youtube.com/playlist?list=UUXw-G3eDE9trcvY2sBMM_aA',
2704         'info_dict': {
2705             'title': 'Uploads from Interstellar Movie',
2706             'id': 'UUXw-G3eDE9trcvY2sBMM_aA',
2707             'uploader': 'Interstellar Movie',
2708             'uploader_id': 'InterstellarMovie1',
2709         },
2710         'playlist_mincount': 21,
2711     }, {
2712         # Playlist URL that does not actually serve a playlist
2713         'url': 'https://www.youtube.com/watch?v=FqZTN594JQw&list=PLMYEtVRpaqY00V9W81Cwmzp6N6vZqfUKD4',
2714         'info_dict': {
2715             'id': 'FqZTN594JQw',
2716             'ext': 'webm',
2717             'title': "Smiley's People 01 detective, Adventure Series, Action",
2718             'uploader': 'STREEM',
2719             'uploader_id': 'UCyPhqAZgwYWZfxElWVbVJng',
2720             'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UCyPhqAZgwYWZfxElWVbVJng',
2721             'upload_date': '20150526',
2722             'license': 'Standard YouTube License',
2723             'description': 'md5:507cdcb5a49ac0da37a920ece610be80',
2724             'categories': ['People & Blogs'],
2725             'tags': list,
2726             'view_count': int,
2727             'like_count': int,
2728             'dislike_count': int,
2729         },
2730         'params': {
2731             'skip_download': True,
2732         },
2733         'skip': 'This video is not available.',
2734         'add_ie': [YoutubeIE.ie_key()],
2735     }, {
2736         'url': 'https://youtu.be/yeWKywCrFtk?list=PL2qgrgXsNUG5ig9cat4ohreBjYLAPC0J5',
2737         'info_dict': {
2738             'id': 'yeWKywCrFtk',
2739             'ext': 'mp4',
2740             'title': 'Small Scale Baler and Braiding Rugs',
2741             'uploader': 'Backus-Page House Museum',
2742             'uploader_id': 'backuspagemuseum',
2743             'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/backuspagemuseum',
2744             'upload_date': '20161008',
2745             'description': 'md5:800c0c78d5eb128500bffd4f0b4f2e8a',
2746             'categories': ['Nonprofits & Activism'],
2747             'tags': list,
2748             'like_count': int,
2749             'dislike_count': int,
2750         },
2751         'params': {
2752             'noplaylist': True,
2753             'skip_download': True,
2754         },
2755     }, {
2756         # https://github.com/ytdl-org/youtube-dl/issues/21844
2757         'url': 'https://www.youtube.com/playlist?list=PLzH6n4zXuckpfMu_4Ff8E7Z1behQks5ba',
2758         'info_dict': {
2759             'title': 'Data Analysis with Dr Mike Pound',
2760             'id': 'PLzH6n4zXuckpfMu_4Ff8E7Z1behQks5ba',
2761             'uploader_id': 'Computerphile',
2762             'uploader': 'Computerphile',
2763         },
2764         'playlist_mincount': 11,
2765     }, {
2766         'url': 'https://youtu.be/uWyaPkt-VOI?list=PL9D9FC436B881BA21',
2767         'only_matching': True,
2768     }, {
2769         'url': 'TLGGrESM50VT6acwMjAyMjAxNw',
2770         'only_matching': True,
2771     }, {
2772         # music album playlist
2773         'url': 'OLAK5uy_m4xAFdmMC5rX3Ji3g93pQe3hqLZw_9LhM',
2774         'only_matching': True,
2775     }, {
2776         'url': 'https://invidio.us/playlist?list=PLDIoUOhQQPlXr63I_vwF9GD8sAKh77dWU',
2777         'only_matching': True,
2778     }, {
2779         'url': 'https://www.youtubekids.com/watch?v=Agk7R8I8o5U&list=PUZ6jURNr1WQZCNHF0ao-c0g',
2780         'only_matching': True,
2781     }]
2782
2783     def _real_initialize(self):
2784         self._login()
2785
2786     def extract_videos_from_page(self, page):
2787         ids_in_page = []
2788         titles_in_page = []
2789
2790         for item in re.findall(
2791                 r'(<[^>]*\bdata-video-id\s*=\s*["\'][0-9A-Za-z_-]{11}[^>]+>)', page):
2792             attrs = extract_attributes(item)
2793             video_id = attrs['data-video-id']
2794             video_title = unescapeHTML(attrs.get('data-title'))
2795             if video_title:
2796                 video_title = video_title.strip()
2797             ids_in_page.append(video_id)
2798             titles_in_page.append(video_title)
2799
2800         # Fallback with old _VIDEO_RE
2801         self.extract_videos_from_page_impl(
2802             self._VIDEO_RE, page, ids_in_page, titles_in_page)
2803
2804         # Relaxed fallbacks
2805         self.extract_videos_from_page_impl(
2806             r'href="\s*/watch\?v\s*=\s*(?P<id>[0-9A-Za-z_-]{11})', page,
2807             ids_in_page, titles_in_page)
2808         self.extract_videos_from_page_impl(
2809             r'data-video-ids\s*=\s*["\'](?P<id>[0-9A-Za-z_-]{11})', page,
2810             ids_in_page, titles_in_page)
2811
2812         return zip(ids_in_page, titles_in_page)
2813
2814     def _extract_mix(self, playlist_id):
2815         # The mixes are generated from a single video
2816         # the id of the playlist is just 'RD' + video_id
2817         ids = []
2818         last_id = playlist_id[-11:]
2819         for n in itertools.count(1):
2820             url = 'https://www.youtube.com/watch?v=%s&list=%s' % (last_id, playlist_id)
2821             webpage = self._download_webpage(
2822                 url, playlist_id, 'Downloading page {0} of Youtube mix'.format(n))
2823             new_ids = orderedSet(re.findall(
2824                 r'''(?xs)data-video-username=".*?".*?
2825                            href="/watch\?v=([0-9A-Za-z_-]{11})&amp;[^"]*?list=%s''' % re.escape(playlist_id),
2826                 webpage))
2827             # Fetch new pages until all the videos are repeated, it seems that
2828             # there are always 51 unique videos.
2829             new_ids = [_id for _id in new_ids if _id not in ids]
2830             if not new_ids:
2831                 break
2832             ids.extend(new_ids)
2833             last_id = ids[-1]
2834
2835         url_results = self._ids_to_results(ids)
2836
2837         search_title = lambda class_name: get_element_by_attribute('class', class_name, webpage)
2838         title_span = (
2839             search_title('playlist-title')
2840             or search_title('title long-title')
2841             or search_title('title'))
2842         title = clean_html(title_span)
2843
2844         return self.playlist_result(url_results, playlist_id, title)
2845
2846     def _extract_playlist(self, playlist_id):
2847         url = self._TEMPLATE_URL % playlist_id
2848         page = self._download_webpage(url, playlist_id)
2849
2850         # the yt-alert-message now has tabindex attribute (see https://github.com/ytdl-org/youtube-dl/issues/11604)
2851         for match in re.findall(r'<div class="yt-alert-message"[^>]*>([^<]+)</div>', page):
2852             match = match.strip()
2853             # Check if the playlist exists or is private
2854             mobj = re.match(r'[^<]*(?:The|This) playlist (?P<reason>does not exist|is private)[^<]*', match)
2855             if mobj:
2856                 reason = mobj.group('reason')
2857                 message = 'This playlist %s' % reason
2858                 if 'private' in reason:
2859                     message += ', use --username or --netrc to access it'
2860                 message += '.'
2861                 raise ExtractorError(message, expected=True)
2862             elif re.match(r'[^<]*Invalid parameters[^<]*', match):
2863                 raise ExtractorError(
2864                     'Invalid parameters. Maybe URL is incorrect.',
2865                     expected=True)
2866             elif re.match(r'[^<]*Choose your language[^<]*', match):
2867                 continue
2868             else:
2869                 self.report_warning('Youtube gives an alert message: ' + match)
2870
2871         playlist_title = self._html_search_regex(
2872             r'(?s)<h1 class="pl-header-title[^"]*"[^>]*>\s*(.*?)\s*</h1>',
2873             page, 'title', default=None)
2874
2875         _UPLOADER_BASE = r'class=["\']pl-header-details[^>]+>\s*<li>\s*<a[^>]+\bhref='
2876         uploader = self._html_search_regex(
2877             r'%s["\']/(?:user|channel)/[^>]+>([^<]+)' % _UPLOADER_BASE,
2878             page, 'uploader', default=None)
2879         mobj = re.search(
2880             r'%s(["\'])(?P<path>/(?:user|channel)/(?P<uploader_id>.+?))\1' % _UPLOADER_BASE,
2881             page)
2882         if mobj:
2883             uploader_id = mobj.group('uploader_id')
2884             uploader_url = compat_urlparse.urljoin(url, mobj.group('path'))
2885         else:
2886             uploader_id = uploader_url = None
2887
2888         has_videos = True
2889
2890         if not playlist_title:
2891             try:
2892                 # Some playlist URLs don't actually serve a playlist (e.g.
2893                 # https://www.youtube.com/watch?v=FqZTN594JQw&list=PLMYEtVRpaqY00V9W81Cwmzp6N6vZqfUKD4)
2894                 next(self._entries(page, playlist_id))
2895             except StopIteration:
2896                 has_videos = False
2897
2898         playlist = self.playlist_result(
2899             self._entries(page, playlist_id), playlist_id, playlist_title)
2900         playlist.update({
2901             'uploader': uploader,
2902             'uploader_id': uploader_id,
2903             'uploader_url': uploader_url,
2904         })
2905
2906         return has_videos, playlist
2907
2908     def _check_download_just_video(self, url, playlist_id):
2909         # Check if it's a video-specific URL
2910         query_dict = compat_urlparse.parse_qs(compat_urlparse.urlparse(url).query)
2911         video_id = query_dict.get('v', [None])[0] or self._search_regex(
2912             r'(?:(?:^|//)youtu\.be/|youtube\.com/embed/(?!videoseries))([0-9A-Za-z_-]{11})', url,
2913             'video id', default=None)
2914         if video_id:
2915             if self._downloader.params.get('noplaylist'):
2916                 self.to_screen('Downloading just video %s because of --no-playlist' % video_id)
2917                 return video_id, self.url_result(video_id, 'Youtube', video_id=video_id)
2918             else:
2919                 self.to_screen('Downloading playlist %s - add --no-playlist to just download video %s' % (playlist_id, video_id))
2920                 return video_id, None
2921         return None, None
2922
2923     def _real_extract(self, url):
2924         # Extract playlist id
2925         mobj = re.match(self._VALID_URL, url)
2926         if mobj is None:
2927             raise ExtractorError('Invalid URL: %s' % url)
2928         playlist_id = mobj.group(1) or mobj.group(2)
2929
2930         video_id, video = self._check_download_just_video(url, playlist_id)
2931         if video:
2932             return video
2933
2934         if playlist_id.startswith(('RD', 'UL', 'PU')):
2935             # Mixes require a custom extraction process
2936             return self._extract_mix(playlist_id)
2937
2938         has_videos, playlist = self._extract_playlist(playlist_id)
2939         if has_videos or not video_id:
2940             return playlist
2941
2942         # Some playlist URLs don't actually serve a playlist (see
2943         # https://github.com/ytdl-org/youtube-dl/issues/10537).
2944         # Fallback to plain video extraction if there is a video id
2945         # along with playlist id.
2946         return self.url_result(video_id, 'Youtube', video_id=video_id)
2947
2948
2949 class YoutubeChannelIE(YoutubePlaylistBaseInfoExtractor):
2950     IE_DESC = 'YouTube.com channels'
2951     _VALID_URL = r'https?://(?:youtu\.be|(?:\w+\.)?youtube(?:-nocookie|kids)?\.com|(?:www\.)?invidio\.us)/channel/(?P<id>[0-9A-Za-z_-]+)'
2952     _TEMPLATE_URL = 'https://www.youtube.com/channel/%s/videos'
2953     _VIDEO_RE = r'(?:title="(?P<title>[^"]+)"[^>]+)?href="/watch\?v=(?P<id>[0-9A-Za-z_-]+)&?'
2954     IE_NAME = 'youtube:channel'
2955     _TESTS = [{
2956         'note': 'paginated channel',
2957         'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w',
2958         'playlist_mincount': 91,
2959         'info_dict': {
2960             'id': 'UUKfVa3S1e4PHvxWcwyMMg8w',
2961             'title': 'Uploads from lex will',
2962             'uploader': 'lex will',
2963             'uploader_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
2964         }
2965     }, {
2966         'note': 'Age restricted channel',
2967         # from https://www.youtube.com/user/DeusExOfficial
2968         'url': 'https://www.youtube.com/channel/UCs0ifCMCm1icqRbqhUINa0w',
2969         'playlist_mincount': 64,
2970         'info_dict': {
2971             'id': 'UUs0ifCMCm1icqRbqhUINa0w',
2972             'title': 'Uploads from Deus Ex',
2973             'uploader': 'Deus Ex',
2974             'uploader_id': 'DeusExOfficial',
2975         },
2976     }, {
2977         'url': 'https://invidio.us/channel/UC23qupoDRn9YOAVzeoxjOQA',
2978         'only_matching': True,
2979     }, {
2980         'url': 'https://www.youtubekids.com/channel/UCyu8StPfZWapR6rfW_JgqcA',
2981         'only_matching': True,
2982     }]
2983
2984     @classmethod
2985     def suitable(cls, url):
2986         return (False if YoutubePlaylistsIE.suitable(url) or YoutubeLiveIE.suitable(url)
2987                 else super(YoutubeChannelIE, cls).suitable(url))
2988
2989     def _build_template_url(self, url, channel_id):
2990         return self._TEMPLATE_URL % channel_id
2991
2992     def _real_extract(self, url):
2993         channel_id = self._match_id(url)
2994
2995         url = self._build_template_url(url, channel_id)
2996
2997         # Channel by page listing is restricted to 35 pages of 30 items, i.e. 1050 videos total (see #5778)
2998         # Workaround by extracting as a playlist if managed to obtain channel playlist URL
2999         # otherwise fallback on channel by page extraction
3000         channel_page = self._download_webpage(
3001             url + '?view=57', channel_id,
3002             'Downloading channel page', fatal=False)
3003         if channel_page is False:
3004             channel_playlist_id = False
3005         else:
3006             channel_playlist_id = self._html_search_meta(
3007                 'channelId', channel_page, 'channel id', default=None)
3008             if not channel_playlist_id:
3009                 channel_url = self._html_search_meta(
3010                     ('al:ios:url', 'twitter:app:url:iphone', 'twitter:app:url:ipad'),
3011                     channel_page, 'channel url', default=None)
3012                 if channel_url:
3013                     channel_playlist_id = self._search_regex(
3014                         r'vnd\.youtube://user/([0-9A-Za-z_-]+)',
3015                         channel_url, 'channel id', default=None)
3016         if channel_playlist_id and channel_playlist_id.startswith('UC'):
3017             playlist_id = 'UU' + channel_playlist_id[2:]
3018             return self.url_result(
3019                 compat_urlparse.urljoin(url, '/playlist?list=%s' % playlist_id), 'YoutubePlaylist')
3020
3021         channel_page = self._download_webpage(url, channel_id, 'Downloading page #1')
3022         autogenerated = re.search(r'''(?x)
3023                 class="[^"]*?(?:
3024                     channel-header-autogenerated-label|
3025                     yt-channel-title-autogenerated
3026                 )[^"]*"''', channel_page) is not None
3027
3028         if autogenerated:
3029             # The videos are contained in a single page
3030             # the ajax pages can't be used, they are empty
3031             entries = [
3032                 self.url_result(
3033                     video_id, 'Youtube', video_id=video_id,
3034                     video_title=video_title)
3035                 for video_id, video_title in self.extract_videos_from_page(channel_page)]
3036             return self.playlist_result(entries, channel_id)
3037
3038         try:
3039             next(self._entries(channel_page, channel_id))
3040         except StopIteration:
3041             alert_message = self._html_search_regex(
3042                 r'(?s)<div[^>]+class=(["\']).*?\byt-alert-message\b.*?\1[^>]*>(?P<alert>[^<]+)</div>',
3043                 channel_page, 'alert', default=None, group='alert')
3044             if alert_message:
3045                 raise ExtractorError('Youtube said: %s' % alert_message, expected=True)
3046
3047         return self.playlist_result(self._entries(channel_page, channel_id), channel_id)
3048
3049
3050 class YoutubeUserIE(YoutubeChannelIE):
3051     IE_DESC = 'YouTube.com user videos (URL or "ytuser" keyword)'
3052     _VALID_URL = r'(?:(?:https?://(?:\w+\.)?youtube\.com/(?:(?P<user>user|c)/)?(?!(?:attribution_link|watch|results|shared)(?:$|[^a-z_A-Z0-9%-])))|ytuser:)(?!feed/)(?P<id>[A-Za-z0-9_%-]+)'
3053     _TEMPLATE_URL = 'https://www.youtube.com/%s/%s/videos'
3054     IE_NAME = 'youtube:user'
3055
3056     _TESTS = [{
3057         'url': 'https://www.youtube.com/user/TheLinuxFoundation',
3058         'playlist_mincount': 320,
3059         'info_dict': {
3060             'id': 'UUfX55Sx5hEFjoC3cNs6mCUQ',
3061             'title': 'Uploads from The Linux Foundation',
3062             'uploader': 'The Linux Foundation',
3063             'uploader_id': 'TheLinuxFoundation',
3064         }
3065     }, {
3066         # Only available via https://www.youtube.com/c/12minuteathlete/videos
3067         # but not https://www.youtube.com/user/12minuteathlete/videos
3068         'url': 'https://www.youtube.com/c/12minuteathlete/videos',
3069         'playlist_mincount': 249,
3070         'info_dict': {
3071             'id': 'UUVjM-zV6_opMDx7WYxnjZiQ',
3072             'title': 'Uploads from 12 Minute Athlete',
3073             'uploader': '12 Minute Athlete',
3074             'uploader_id': 'the12minuteathlete',
3075         }
3076     }, {
3077         'url': 'ytuser:phihag',
3078         'only_matching': True,
3079     }, {
3080         'url': 'https://www.youtube.com/c/gametrailers',
3081         'only_matching': True,
3082     }, {
3083         'url': 'https://www.youtube.com/c/Pawe%C5%82Zadro%C5%BCniak',
3084         'only_matching': True,
3085     }, {
3086         'url': 'https://www.youtube.com/gametrailers',
3087         'only_matching': True,
3088     }, {
3089         # This channel is not available, geo restricted to JP
3090         'url': 'https://www.youtube.com/user/kananishinoSMEJ/videos',
3091         'only_matching': True,
3092     }]
3093
3094     @classmethod
3095     def suitable(cls, url):
3096         # Don't return True if the url can be extracted with other youtube
3097         # extractor, the regex would is too permissive and it would match.
3098         other_yt_ies = iter(klass for (name, klass) in globals().items() if name.startswith('Youtube') and name.endswith('IE') and klass is not cls)
3099         if any(ie.suitable(url) for ie in other_yt_ies):
3100             return False
3101         else:
3102             return super(YoutubeUserIE, cls).suitable(url)
3103
3104     def _build_template_url(self, url, channel_id):
3105         mobj = re.match(self._VALID_URL, url)
3106         return self._TEMPLATE_URL % (mobj.group('user') or 'user', mobj.group('id'))
3107
3108
3109 class YoutubeLiveIE(YoutubeBaseInfoExtractor):
3110     IE_DESC = 'YouTube.com live streams'
3111     _VALID_URL = r'(?P<base_url>https?://(?:\w+\.)?youtube\.com/(?:(?:user|channel|c)/)?(?P<id>[^/]+))/live'
3112     IE_NAME = 'youtube:live'
3113
3114     _TESTS = [{
3115         'url': 'https://www.youtube.com/user/TheYoungTurks/live',
3116         'info_dict': {
3117             'id': 'a48o2S1cPoo',
3118             'ext': 'mp4',
3119             'title': 'The Young Turks - Live Main Show',
3120             'uploader': 'The Young Turks',
3121             'uploader_id': 'TheYoungTurks',
3122             'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/TheYoungTurks',
3123             'upload_date': '20150715',
3124             'license': 'Standard YouTube License',
3125             'description': 'md5:438179573adcdff3c97ebb1ee632b891',
3126             'categories': ['News & Politics'],
3127             'tags': ['Cenk Uygur (TV Program Creator)', 'The Young Turks (Award-Winning Work)', 'Talk Show (TV Genre)'],
3128             'like_count': int,
3129             'dislike_count': int,
3130         },
3131         'params': {
3132             'skip_download': True,
3133         },
3134     }, {
3135         'url': 'https://www.youtube.com/channel/UC1yBKRuGpC1tSM73A0ZjYjQ/live',
3136         'only_matching': True,
3137     }, {
3138         'url': 'https://www.youtube.com/c/CommanderVideoHq/live',
3139         'only_matching': True,
3140     }, {
3141         'url': 'https://www.youtube.com/TheYoungTurks/live',
3142         'only_matching': True,
3143     }]
3144
3145     def _real_extract(self, url):
3146         mobj = re.match(self._VALID_URL, url)
3147         channel_id = mobj.group('id')
3148         base_url = mobj.group('base_url')
3149         webpage = self._download_webpage(url, channel_id, fatal=False)
3150         if webpage:
3151             page_type = self._og_search_property(
3152                 'type', webpage, 'page type', default='')
3153             video_id = self._html_search_meta(
3154                 'videoId', webpage, 'video id', default=None)
3155             if page_type.startswith('video') and video_id and re.match(
3156                     r'^[0-9A-Za-z_-]{11}$', video_id):
3157                 return self.url_result(video_id, YoutubeIE.ie_key())
3158         return self.url_result(base_url)
3159
3160
3161 class YoutubePlaylistsIE(YoutubePlaylistsBaseInfoExtractor):
3162     IE_DESC = 'YouTube.com user/channel playlists'
3163     _VALID_URL = r'https?://(?:\w+\.)?youtube\.com/(?:user|channel|c)/(?P<id>[^/]+)/playlists'
3164     IE_NAME = 'youtube:playlists'
3165
3166     _TESTS = [{
3167         'url': 'https://www.youtube.com/user/ThirstForScience/playlists',
3168         'playlist_mincount': 4,
3169         'info_dict': {
3170             'id': 'ThirstForScience',
3171             'title': 'ThirstForScience',
3172         },
3173     }, {
3174         # with "Load more" button
3175         'url': 'https://www.youtube.com/user/igorkle1/playlists?view=1&sort=dd',
3176         'playlist_mincount': 70,
3177         'info_dict': {
3178             'id': 'igorkle1',
3179             'title': 'Игорь Клейнер',
3180         },
3181     }, {
3182         'url': 'https://www.youtube.com/channel/UCiU1dHvZObB2iP6xkJ__Icw/playlists',
3183         'playlist_mincount': 17,
3184         'info_dict': {
3185             'id': 'UCiU1dHvZObB2iP6xkJ__Icw',
3186             'title': 'Chem Player',
3187         },
3188         'skip': 'Blocked',
3189     }, {
3190         'url': 'https://www.youtube.com/c/ChristophLaimer/playlists',
3191         'only_matching': True,
3192     }]
3193
3194
3195 class YoutubeSearchBaseInfoExtractor(YoutubePlaylistBaseInfoExtractor):
3196     _VIDEO_RE = r'href="\s*/watch\?v=(?P<id>[0-9A-Za-z_-]{11})(?:[^"]*"[^>]+\btitle="(?P<title>[^"]+))?'
3197
3198
3199 class YoutubeSearchIE(SearchInfoExtractor, YoutubeSearchBaseInfoExtractor):
3200     IE_DESC = 'YouTube.com searches'
3201     # there doesn't appear to be a real limit, for example if you search for
3202     # 'python' you get more than 8.000.000 results
3203     _MAX_RESULTS = float('inf')
3204     IE_NAME = 'youtube:search'
3205     _SEARCH_KEY = 'ytsearch'
3206     _SEARCH_PARAMS = None
3207     _TESTS = []
3208
3209     def _entries(self, query, n):
3210         data = {
3211             'context': {
3212                 'client': {
3213                     'clientName': 'WEB',
3214                     'clientVersion': '2.20201021.03.00',
3215                 }
3216             },
3217             'query': query,
3218         }
3219         if self._SEARCH_PARAMS:
3220             data['params'] = self._SEARCH_PARAMS
3221         total = 0
3222         for page_num in itertools.count(1):
3223             search = self._download_json(
3224                 'https://www.youtube.com/youtubei/v1/search?key=AIzaSyAO_FJ2SlqU8Q4STEHLGCilw_Y9_11qcW8',
3225                 video_id='query "%s"' % query,
3226                 note='Downloading page %s' % page_num,
3227                 errnote='Unable to download API page', fatal=False,
3228                 data=json.dumps(data).encode('utf8'),
3229                 headers={'content-type': 'application/json'})
3230             if not search:
3231                 break
3232             slr_contents = try_get(
3233                 search,
3234                 (lambda x: x['contents']['twoColumnSearchResultsRenderer']['primaryContents']['sectionListRenderer']['contents'],
3235                  lambda x: x['onResponseReceivedCommands'][0]['appendContinuationItemsAction']['continuationItems']),
3236                 list)
3237             if not slr_contents:
3238                 break
3239             isr_contents = try_get(
3240                 slr_contents,
3241                 lambda x: x[0]['itemSectionRenderer']['contents'],
3242                 list)
3243             if not isr_contents:
3244                 break
3245             for content in isr_contents:
3246                 if not isinstance(content, dict):
3247                     continue
3248                 video = content.get('videoRenderer')
3249                 if not isinstance(video, dict):
3250                     continue
3251                 video_id = video.get('videoId')
3252                 if not video_id:
3253                     continue
3254                 title = try_get(video, lambda x: x['title']['runs'][0]['text'], compat_str)
3255                 description = try_get(video, lambda x: x['descriptionSnippet']['runs'][0]['text'], compat_str)
3256                 duration = parse_duration(try_get(video, lambda x: x['lengthText']['simpleText'], compat_str))
3257                 view_count_text = try_get(video, lambda x: x['viewCountText']['simpleText'], compat_str) or ''
3258                 view_count = int_or_none(self._search_regex(
3259                     r'^(\d+)', re.sub(r'\s', '', view_count_text),
3260                     'view count', default=None))
3261                 uploader = try_get(video, lambda x: x['ownerText']['runs'][0]['text'], compat_str)
3262                 total += 1
3263                 yield {
3264                     '_type': 'url_transparent',
3265                     'ie_key': YoutubeIE.ie_key(),
3266                     'id': video_id,
3267                     'url': video_id,
3268                     'title': title,
3269                     'description': description,
3270                     'duration': duration,
3271                     'view_count': view_count,
3272                     'uploader': uploader,
3273                 }
3274                 if total == n:
3275                     return
3276             token = try_get(
3277                 slr_contents,
3278                 lambda x: x[1]['continuationItemRenderer']['continuationEndpoint']['continuationCommand']['token'],
3279                 compat_str)
3280             if not token:
3281                 break
3282             data['continuation'] = token
3283
3284     def _get_n_results(self, query, n):
3285         """Get a specified number of results for a query"""
3286         return self.playlist_result(self._entries(query, n), query)
3287
3288
3289 class YoutubeSearchDateIE(YoutubeSearchIE):
3290     IE_NAME = YoutubeSearchIE.IE_NAME + ':date'
3291     _SEARCH_KEY = 'ytsearchdate'
3292     IE_DESC = 'YouTube.com searches, newest videos first'
3293     _SEARCH_PARAMS = 'CAI%3D'
3294
3295
3296 class YoutubeSearchURLIE(YoutubeSearchBaseInfoExtractor):
3297     IE_DESC = 'YouTube.com search URLs'
3298     IE_NAME = 'youtube:search_url'
3299     _VALID_URL = r'https?://(?:www\.)?youtube\.com/results\?(.*?&)?(?:search_query|q)=(?P<query>[^&]+)(?:[&]|$)'
3300     _SEARCH_DATA = r'(?:window\["ytInitialData"\]|ytInitialData)\W?=\W?({.*?});'
3301     _TESTS = [{
3302         'url': 'https://www.youtube.com/results?baz=bar&search_query=youtube-dl+test+video&filters=video&lclk=video',
3303         'playlist_mincount': 5,
3304         'info_dict': {
3305             'title': 'youtube-dl test video',
3306         }
3307     }, {
3308         'url': 'https://www.youtube.com/results?q=test&sp=EgQIBBgB',
3309         'only_matching': True,
3310     }]
3311
3312     def _find_videos_in_json(self, extracted):
3313         videos = []
3314
3315         def _real_find(obj):
3316             if obj is None or isinstance(obj, str):
3317                 return
3318
3319             if type(obj) is list:
3320                 for elem in obj:
3321                     _real_find(elem)
3322
3323             if type(obj) is dict:
3324                 if "videoId" in obj:
3325                     videos.append(obj)
3326                     return
3327
3328                 for _, o in obj.items():
3329                     _real_find(o)
3330
3331         _real_find(extracted)
3332
3333         return videos
3334
3335     def extract_videos_from_page_impl(self, page, ids_in_page, titles_in_page):
3336         search_response = self._parse_json(self._search_regex(self._SEARCH_DATA, page, 'ytInitialData'), None)
3337
3338         result_items = self._find_videos_in_json(search_response)
3339
3340         for renderer in result_items:
3341             video_id = try_get(renderer, lambda x: x['videoId'])
3342             video_title = try_get(renderer, lambda x: x['title']['runs'][0]['text']) or try_get(renderer, lambda x: x['title']['simpleText'])
3343
3344             if video_id is None or video_title is None:
3345                 # we do not have a videoRenderer or title extraction broke
3346                 continue
3347
3348             video_title = video_title.strip()
3349
3350             try:
3351                 idx = ids_in_page.index(video_id)
3352                 if video_title and not titles_in_page[idx]:
3353                     titles_in_page[idx] = video_title
3354             except ValueError:
3355                 ids_in_page.append(video_id)
3356                 titles_in_page.append(video_title)
3357
3358     def extract_videos_from_page(self, page):
3359         ids_in_page = []
3360         titles_in_page = []
3361         self.extract_videos_from_page_impl(page, ids_in_page, titles_in_page)
3362         return zip(ids_in_page, titles_in_page)
3363
3364     def _real_extract(self, url):
3365         mobj = re.match(self._VALID_URL, url)
3366         query = compat_urllib_parse_unquote_plus(mobj.group('query'))
3367         webpage = self._download_webpage(url, query)
3368         return self.playlist_result(self._process_page(webpage), playlist_title=query)
3369
3370
3371 class YoutubeShowIE(YoutubePlaylistsBaseInfoExtractor):
3372     IE_DESC = 'YouTube.com (multi-season) shows'
3373     _VALID_URL = r'https?://(?:www\.)?youtube\.com/show/(?P<id>[^?#]*)'
3374     IE_NAME = 'youtube:show'
3375     _TESTS = [{
3376         'url': 'https://www.youtube.com/show/airdisasters',
3377         'playlist_mincount': 5,
3378         'info_dict': {
3379             'id': 'airdisasters',
3380             'title': 'Air Disasters',
3381         }
3382     }]
3383
3384     def _real_extract(self, url):
3385         playlist_id = self._match_id(url)
3386         return super(YoutubeShowIE, self)._real_extract(
3387             'https://www.youtube.com/show/%s/playlists' % playlist_id)
3388
3389
3390 class YoutubeFeedsInfoExtractor(YoutubeBaseInfoExtractor):
3391     """
3392     Base class for feed extractors
3393     Subclasses must define the _FEED_NAME and _PLAYLIST_TITLE properties.
3394     """
3395     _LOGIN_REQUIRED = True
3396     _FEED_DATA = r'(?:window\["ytInitialData"\]|ytInitialData)\W?=\W?({.*?});'
3397     _YTCFG_DATA = r"ytcfg.set\(({.*?})\)"
3398
3399     @property
3400     def IE_NAME(self):
3401         return 'youtube:%s' % self._FEED_NAME
3402
3403     def _real_initialize(self):
3404         self._login()
3405
3406     def _find_videos_in_json(self, extracted):
3407         videos = []
3408         c = {}
3409
3410         def _real_find(obj):
3411             if obj is None or isinstance(obj, str):
3412                 return
3413
3414             if type(obj) is list:
3415                 for elem in obj:
3416                     _real_find(elem)
3417
3418             if type(obj) is dict:
3419                 if "videoId" in obj:
3420                     videos.append(obj)
3421                     return
3422
3423                 if "nextContinuationData" in obj:
3424                     c["continuation"] = obj["nextContinuationData"]
3425                     return
3426
3427                 for _, o in obj.items():
3428                     _real_find(o)
3429
3430         _real_find(extracted)
3431
3432         return videos, try_get(c, lambda x: x["continuation"])
3433
3434     def _entries(self, page):
3435         info = []
3436
3437         yt_conf = self._parse_json(self._search_regex(self._YTCFG_DATA, page, 'ytcfg.set', default="null"), None, fatal=False)
3438
3439         search_response = self._parse_json(self._search_regex(self._FEED_DATA, page, 'ytInitialData'), None)
3440
3441         for page_num in itertools.count(1):
3442             video_info, continuation = self._find_videos_in_json(search_response)
3443
3444             new_info = []
3445
3446             for v in video_info:
3447                 v_id = try_get(v, lambda x: x['videoId'])
3448                 if not v_id:
3449                     continue
3450
3451                 have_video = False
3452                 for old in info:
3453                     if old['videoId'] == v_id:
3454                         have_video = True
3455                         break
3456
3457                 if not have_video:
3458                     new_info.append(v)
3459
3460             if not new_info:
3461                 break
3462
3463             info.extend(new_info)
3464
3465             for video in new_info:
3466                 yield self.url_result(try_get(video, lambda x: x['videoId']), YoutubeIE.ie_key(), video_title=try_get(video, lambda x: x['title']['runs'][0]['text']) or try_get(video, lambda x: x['title']['simpleText']))
3467
3468             if not continuation or not yt_conf:
3469                 break
3470
3471             search_response = self._download_json(
3472                 'https://www.youtube.com/browse_ajax', self._PLAYLIST_TITLE,
3473                 'Downloading page #%s' % page_num,
3474                 transform_source=uppercase_escape,
3475                 query={
3476                     "ctoken": try_get(continuation, lambda x: x["continuation"]),
3477                     "continuation": try_get(continuation, lambda x: x["continuation"]),
3478                     "itct": try_get(continuation, lambda x: x["clickTrackingParams"])
3479                 },
3480                 headers={
3481                     "X-YouTube-Client-Name": try_get(yt_conf, lambda x: x["INNERTUBE_CONTEXT_CLIENT_NAME"]),
3482                     "X-YouTube-Client-Version": try_get(yt_conf, lambda x: x["INNERTUBE_CONTEXT_CLIENT_VERSION"]),
3483                     "X-Youtube-Identity-Token": try_get(yt_conf, lambda x: x["ID_TOKEN"]),
3484                     "X-YouTube-Device": try_get(yt_conf, lambda x: x["DEVICE"]),
3485                     "X-YouTube-Page-CL": try_get(yt_conf, lambda x: x["PAGE_CL"]),
3486                     "X-YouTube-Page-Label": try_get(yt_conf, lambda x: x["PAGE_BUILD_LABEL"]),
3487                     "X-YouTube-Variants-Checksum": try_get(yt_conf, lambda x: x["VARIANTS_CHECKSUM"]),
3488                 })
3489
3490     def _real_extract(self, url):
3491         page = self._download_webpage(
3492             'https://www.youtube.com/feed/%s' % self._FEED_NAME,
3493             self._PLAYLIST_TITLE)
3494         return self.playlist_result(
3495             self._entries(page), playlist_title=self._PLAYLIST_TITLE)
3496
3497
3498 class YoutubeWatchLaterIE(YoutubePlaylistIE):
3499     IE_NAME = 'youtube:watchlater'
3500     IE_DESC = 'Youtube watch later list, ":ytwatchlater" for short (requires authentication)'
3501     _VALID_URL = r'https?://(?:www\.)?youtube\.com/(?:feed/watch_later|(?:playlist|watch)\?(?:.+&)?list=WL)|:ytwatchlater'
3502
3503     _TESTS = [{
3504         'url': 'https://www.youtube.com/playlist?list=WL',
3505         'only_matching': True,
3506     }, {
3507         'url': 'https://www.youtube.com/watch?v=bCNU9TrbiRk&index=1&list=WL',
3508         'only_matching': True,
3509     }]
3510
3511     def _real_extract(self, url):
3512         _, video = self._check_download_just_video(url, 'WL')
3513         if video:
3514             return video
3515         _, playlist = self._extract_playlist('WL')
3516         return playlist
3517
3518
3519 class YoutubeFavouritesIE(YoutubeBaseInfoExtractor):
3520     IE_NAME = 'youtube:favorites'
3521     IE_DESC = 'YouTube.com favourite videos, ":ytfav" for short (requires authentication)'
3522     _VALID_URL = r'https?://(?:www\.)?youtube\.com/my_favorites|:ytfav(?:ou?rites)?'
3523     _LOGIN_REQUIRED = True
3524
3525     def _real_extract(self, url):
3526         webpage = self._download_webpage('https://www.youtube.com/my_favorites', 'Youtube Favourites videos')
3527         playlist_id = self._search_regex(r'list=(.+?)["&]', webpage, 'favourites playlist id')
3528         return self.url_result(playlist_id, 'YoutubePlaylist')
3529
3530
3531 class YoutubeRecommendedIE(YoutubeFeedsInfoExtractor):
3532     IE_DESC = 'YouTube.com recommended videos, ":ytrec" for short (requires authentication)'
3533     _VALID_URL = r'https?://(?:www\.)?youtube\.com/feed/recommended|:ytrec(?:ommended)?'
3534     _FEED_NAME = 'recommended'
3535     _PLAYLIST_TITLE = 'Youtube Recommended videos'
3536
3537
3538 class YoutubeSubscriptionsIE(YoutubeFeedsInfoExtractor):
3539     IE_DESC = 'YouTube.com subscriptions feed, "ytsubs" keyword (requires authentication)'
3540     _VALID_URL = r'https?://(?:www\.)?youtube\.com/feed/subscriptions|:ytsubs(?:criptions)?'
3541     _FEED_NAME = 'subscriptions'
3542     _PLAYLIST_TITLE = 'Youtube Subscriptions'
3543
3544
3545 class YoutubeHistoryIE(YoutubeFeedsInfoExtractor):
3546     IE_DESC = 'Youtube watch history, ":ythistory" for short (requires authentication)'
3547     _VALID_URL = r'https?://(?:www\.)?youtube\.com/feed/history|:ythistory'
3548     _FEED_NAME = 'history'
3549     _PLAYLIST_TITLE = 'Youtube History'
3550
3551
3552 class YoutubeTruncatedURLIE(InfoExtractor):
3553     IE_NAME = 'youtube:truncated_url'
3554     IE_DESC = False  # Do not list
3555     _VALID_URL = r'''(?x)
3556         (?:https?://)?
3557         (?:\w+\.)?[yY][oO][uU][tT][uU][bB][eE](?:-nocookie)?\.com/
3558         (?:watch\?(?:
3559             feature=[a-z_]+|
3560             annotation_id=annotation_[^&]+|
3561             x-yt-cl=[0-9]+|
3562             hl=[^&]*|
3563             t=[0-9]+
3564         )?
3565         |
3566             attribution_link\?a=[^&]+
3567         )
3568         $
3569     '''
3570
3571     _TESTS = [{
3572         'url': 'https://www.youtube.com/watch?annotation_id=annotation_3951667041',
3573         'only_matching': True,
3574     }, {
3575         'url': 'https://www.youtube.com/watch?',
3576         'only_matching': True,
3577     }, {
3578         'url': 'https://www.youtube.com/watch?x-yt-cl=84503534',
3579         'only_matching': True,
3580     }, {
3581         'url': 'https://www.youtube.com/watch?feature=foo',
3582         'only_matching': True,
3583     }, {
3584         'url': 'https://www.youtube.com/watch?hl=en-GB',
3585         'only_matching': True,
3586     }, {
3587         'url': 'https://www.youtube.com/watch?t=2372',
3588         'only_matching': True,
3589     }]
3590
3591     def _real_extract(self, url):
3592         raise ExtractorError(
3593             'Did you forget to quote the URL? Remember that & is a meta '
3594             'character in most shells, so you want to put the URL in quotes, '
3595             'like  youtube-dl '
3596             '"https://www.youtube.com/watch?feature=foo&v=BaW_jenozKc" '
3597             ' or simply  youtube-dl BaW_jenozKc  .',
3598             expected=True)
3599
3600
3601 class YoutubeTruncatedIDIE(InfoExtractor):
3602     IE_NAME = 'youtube:truncated_id'
3603     IE_DESC = False  # Do not list
3604     _VALID_URL = r'https?://(?:www\.)?youtube\.com/watch\?v=(?P<id>[0-9A-Za-z_-]{1,10})$'
3605
3606     _TESTS = [{
3607         'url': 'https://www.youtube.com/watch?v=N_708QY7Ob',
3608         'only_matching': True,
3609     }]
3610
3611     def _real_extract(self, url):
3612         video_id = self._match_id(url)
3613         raise ExtractorError(
3614             'Incomplete YouTube ID %s. URL %s looks truncated.' % (video_id, url),
3615             expected=True)