youtube_dlc/extractor/youtube.py

   1 # coding: utf-8
   2
   3 from __future__ import unicode_literals
   4
   5
   6 import itertools
   7 import json
   8 import os.path
   9 import random
  10 import re
  11 import time
  12 import traceback
  13
  14 from .common import InfoExtractor, SearchInfoExtractor
  15 from ..jsinterp import JSInterpreter
  16 from ..swfinterp import SWFInterpreter
  17 from ..compat import (
  18     compat_chr,
  19     compat_HTTPError,
  20     compat_kwargs,
  21     compat_parse_qs,
  22     compat_urllib_parse_unquote,
  23     compat_urllib_parse_unquote_plus,
  24     compat_urllib_parse_urlencode,
  25     compat_urllib_parse_urlparse,
  26     compat_urlparse,
  27     compat_str,
  28 )
  29 from ..utils import (
  30     bool_or_none,
  31     clean_html,
  32     error_to_compat_str,
  33     extract_attributes,
  34     ExtractorError,
  35     float_or_none,
  36     get_element_by_attribute,
  37     get_element_by_id,
  38     int_or_none,
  39     mimetype2ext,
  40     orderedSet,
  41     parse_codecs,
  42     parse_count,
  43     parse_duration,
  44     remove_quotes,
  45     remove_start,
  46     smuggle_url,
  47     str_or_none,
  48     str_to_int,
  49     try_get,
  50     unescapeHTML,
  51     unified_strdate,
  52     unsmuggle_url,
  53     uppercase_escape,
  54     url_or_none,
  55     urlencode_postdata,
  56 )
  57
  58
  59 class YoutubeBaseInfoExtractor(InfoExtractor):
  60     """Provide base functions for Youtube extractors"""
  61     _LOGIN_URL = 'https://accounts.google.com/ServiceLogin'
  62     _TWOFACTOR_URL = 'https://accounts.google.com/signin/challenge'
  63
  64     _LOOKUP_URL = 'https://accounts.google.com/_/signin/sl/lookup'
  65     _CHALLENGE_URL = 'https://accounts.google.com/_/signin/sl/challenge'
  66     _TFA_URL = 'https://accounts.google.com/_/signin/challenge?hl=en&TL={0}'
  67
  68     _NETRC_MACHINE = 'youtube'
  69     # If True it will raise an error if no login info is provided
  70     _LOGIN_REQUIRED = False
  71
  72     _PLAYLIST_ID_RE = r'(?:PL|LL|EC|UU|FL|RD|UL|TL|PU|OLAK5uy_)[0-9A-Za-z-_]{10,}'
  73
  74     _YOUTUBE_CLIENT_HEADERS = {
  75         'x-youtube-client-name': '1',
  76         'x-youtube-client-version': '1.20200609.04.02',
  77     }
  78
  79     def _set_language(self):
  80         self._set_cookie(
  81             '.youtube.com', 'PREF', 'f1=50000000&f6=8&hl=en',
  82             # YouTube sets the expire time to about two months
  83             expire_time=time.time() + 2 * 30 * 24 * 3600)
  84
  85     def _ids_to_results(self, ids):
  86         return [
  87             self.url_result(vid_id, 'Youtube', video_id=vid_id)
  88             for vid_id in ids]
  89
  90     def _login(self):
  91         """
  92         Attempt to log in to YouTube.
  93         True is returned if successful or skipped.
  94         False is returned if login failed.
  95
  96         If _LOGIN_REQUIRED is set and no authentication was provided, an error is raised.
  97         """
  98         username, password = self._get_login_info()
  99         # No authentication to be performed
 100         if username is None:
 101             if self._LOGIN_REQUIRED and self._downloader.params.get('cookiefile') is None:
 102                 raise ExtractorError('No login info available, needed for using %s.' % self.IE_NAME, expected=True)
 103             if self._downloader.params.get('cookiefile') and False:  # TODO remove 'and False' later - too many people using outdated cookies and open issues, remind them.
 104                 self.to_screen('[Cookies] Reminder - Make sure to always use up to date cookies!')
 105             return True
 106
 107         login_page = self._download_webpage(
 108             self._LOGIN_URL, None,
 109             note='Downloading login page',
 110             errnote='unable to fetch login page', fatal=False)
 111         if login_page is False:
 112             return
 113
 114         login_form = self._hidden_inputs(login_page)
 115
 116         def req(url, f_req, note, errnote):
 117             data = login_form.copy()
 118             data.update({
 119                 'pstMsg': 1,
 120                 'checkConnection': 'youtube',
 121                 'checkedDomains': 'youtube',
 122                 'hl': 'en',
 123                 'deviceinfo': '[null,null,null,[],null,"US",null,null,[],"GlifWebSignIn",null,[null,null,[]]]',
 124                 'f.req': json.dumps(f_req),
 125                 'flowName': 'GlifWebSignIn',
 126                 'flowEntry': 'ServiceLogin',
 127                 # TODO: reverse actual botguard identifier generation algo
 128                 'bgRequest': '["identifier",""]',
 129             })
 130             return self._download_json(
 131                 url, None, note=note, errnote=errnote,
 132                 transform_source=lambda s: re.sub(r'^[^[]*', '', s),
 133                 fatal=False,
 134                 data=urlencode_postdata(data), headers={
 135                     'Content-Type': 'application/x-www-form-urlencoded;charset=utf-8',
 136                     'Google-Accounts-XSRF': 1,
 137                 })
 138
 139         def warn(message):
 140             self._downloader.report_warning(message)
 141
 142         lookup_req = [
 143             username,
 144             None, [], None, 'US', None, None, 2, False, True,
 145             [
 146                 None, None,
 147                 [2, 1, None, 1,
 148                  'https://accounts.google.com/ServiceLogin?passive=true&continue=https%3A%2F%2Fwww.youtube.com%2Fsignin%3Fnext%3D%252F%26action_handle_signin%3Dtrue%26hl%3Den%26app%3Ddesktop%26feature%3Dsign_in_button&hl=en&service=youtube&uilel=3&requestPath=%2FServiceLogin&Page=PasswordSeparationSignIn',
 149                  None, [], 4],
 150                 1, [None, None, []], None, None, None, True
 151             ],
 152             username,
 153         ]
 154
 155         lookup_results = req(
 156             self._LOOKUP_URL, lookup_req,
 157             'Looking up account info', 'Unable to look up account info')
 158
 159         if lookup_results is False:
 160             return False
 161
 162         user_hash = try_get(lookup_results, lambda x: x[0][2], compat_str)
 163         if not user_hash:
 164             warn('Unable to extract user hash')
 165             return False
 166
 167         challenge_req = [
 168             user_hash,
 169             None, 1, None, [1, None, None, None, [password, None, True]],
 170             [
 171                 None, None, [2, 1, None, 1, 'https://accounts.google.com/ServiceLogin?passive=true&continue=https%3A%2F%2Fwww.youtube.com%2Fsignin%3Fnext%3D%252F%26action_handle_signin%3Dtrue%26hl%3Den%26app%3Ddesktop%26feature%3Dsign_in_button&hl=en&service=youtube&uilel=3&requestPath=%2FServiceLogin&Page=PasswordSeparationSignIn', None, [], 4],
 172                 1, [None, None, []], None, None, None, True
 173             ]]
 174
 175         challenge_results = req(
 176             self._CHALLENGE_URL, challenge_req,
 177             'Logging in', 'Unable to log in')
 178
 179         if challenge_results is False:
 180             return
 181
 182         login_res = try_get(challenge_results, lambda x: x[0][5], list)
 183         if login_res:
 184             login_msg = try_get(login_res, lambda x: x[5], compat_str)
 185             warn(
 186                 'Unable to login: %s' % 'Invalid password'
 187                 if login_msg == 'INCORRECT_ANSWER_ENTERED' else login_msg)
 188             return False
 189
 190         res = try_get(challenge_results, lambda x: x[0][-1], list)
 191         if not res:
 192             warn('Unable to extract result entry')
 193             return False
 194
 195         login_challenge = try_get(res, lambda x: x[0][0], list)
 196         if login_challenge:
 197             challenge_str = try_get(login_challenge, lambda x: x[2], compat_str)
 198             if challenge_str == 'TWO_STEP_VERIFICATION':
 199                 # SEND_SUCCESS - TFA code has been successfully sent to phone
 200                 # QUOTA_EXCEEDED - reached the limit of TFA codes
 201                 status = try_get(login_challenge, lambda x: x[5], compat_str)
 202                 if status == 'QUOTA_EXCEEDED':
 203                     warn('Exceeded the limit of TFA codes, try later')
 204                     return False
 205
 206                 tl = try_get(challenge_results, lambda x: x[1][2], compat_str)
 207                 if not tl:
 208                     warn('Unable to extract TL')
 209                     return False
 210
 211                 tfa_code = self._get_tfa_info('2-step verification code')
 212
 213                 if not tfa_code:
 214                     warn(
 215                         'Two-factor authentication required. Provide it either interactively or with --twofactor <code>'
 216                         '(Note that only TOTP (Google Authenticator App) codes work at this time.)')
 217                     return False
 218
 219                 tfa_code = remove_start(tfa_code, 'G-')
 220
 221                 tfa_req = [
 222                     user_hash, None, 2, None,
 223                     [
 224                         9, None, None, None, None, None, None, None,
 225                         [None, tfa_code, True, 2]
 226                     ]]
 227
 228                 tfa_results = req(
 229                     self._TFA_URL.format(tl), tfa_req,
 230                     'Submitting TFA code', 'Unable to submit TFA code')
 231
 232                 if tfa_results is False:
 233                     return False
 234
 235                 tfa_res = try_get(tfa_results, lambda x: x[0][5], list)
 236                 if tfa_res:
 237                     tfa_msg = try_get(tfa_res, lambda x: x[5], compat_str)
 238                     warn(
 239                         'Unable to finish TFA: %s' % 'Invalid TFA code'
 240                         if tfa_msg == 'INCORRECT_ANSWER_ENTERED' else tfa_msg)
 241                     return False
 242
 243                 check_cookie_url = try_get(
 244                     tfa_results, lambda x: x[0][-1][2], compat_str)
 245             else:
 246                 CHALLENGES = {
 247                     'LOGIN_CHALLENGE': "This device isn't recognized. For your security, Google wants to make sure it's really you.",
 248                     'USERNAME_RECOVERY': 'Please provide additional information to aid in the recovery process.',
 249                     'REAUTH': "There is something unusual about your activity. For your security, Google wants to make sure it's really you.",
 250                 }
 251                 challenge = CHALLENGES.get(
 252                     challenge_str,
 253                     '%s returned error %s.' % (self.IE_NAME, challenge_str))
 254                 warn('%s\nGo to https://accounts.google.com/, login and solve a challenge.' % challenge)
 255                 return False
 256         else:
 257             check_cookie_url = try_get(res, lambda x: x[2], compat_str)
 258
 259         if not check_cookie_url:
 260             warn('Unable to extract CheckCookie URL')
 261             return False
 262
 263         check_cookie_results = self._download_webpage(
 264             check_cookie_url, None, 'Checking cookie', fatal=False)
 265
 266         if check_cookie_results is False:
 267             return False
 268
 269         if 'https://myaccount.google.com/' not in check_cookie_results:
 270             warn('Unable to log in')
 271             return False
 272
 273         return True
 274
 275     def _download_webpage_handle(self, *args, **kwargs):
 276         query = kwargs.get('query', {}).copy()
 277         query['disable_polymer'] = 'true'
 278         kwargs['query'] = query
 279         return super(YoutubeBaseInfoExtractor, self)._download_webpage_handle(
 280             *args, **compat_kwargs(kwargs))
 281
 282     def _get_yt_initial_data(self, video_id, webpage):
 283         config = self._search_regex(
 284             (r'window\["ytInitialData"\]\s*=\s*(.*?)(?<=});',
 285              r'var\s+ytInitialData\s*=\s*(.*?)(?<=});'),
 286             webpage, 'ytInitialData', default=None)
 287         if config:
 288             return self._parse_json(
 289                 uppercase_escape(config), video_id, fatal=False)
 290
 291     def _real_initialize(self):
 292         if self._downloader is None:
 293             return
 294         self._set_language()
 295         if not self._login():
 296             return
 297
 298
 299 class YoutubeEntryListBaseInfoExtractor(YoutubeBaseInfoExtractor):
 300     # Extract entries from page with "Load more" button
 301     def _entries(self, page, playlist_id):
 302         more_widget_html = content_html = page
 303         for page_num in itertools.count(1):
 304             for entry in self._process_page(content_html):
 305                 yield entry
 306
 307             mobj = re.search(r'data-uix-load-more-href="/?(?P<more>[^"]+)"', more_widget_html)
 308             if not mobj:
 309                 break
 310
 311             count = 0
 312             retries = 3
 313             while count <= retries:
 314                 try:
 315                     # Downloading page may result in intermittent 5xx HTTP error
 316                     # that is usually worked around with a retry
 317                     more = self._download_json(
 318                         'https://www.youtube.com/%s' % mobj.group('more'), playlist_id,
 319                         'Downloading page #%s%s'
 320                         % (page_num, ' (retry #%d)' % count if count else ''),
 321                         transform_source=uppercase_escape,
 322                         headers=self._YOUTUBE_CLIENT_HEADERS)
 323                     break
 324                 except ExtractorError as e:
 325                     if isinstance(e.cause, compat_HTTPError) and e.cause.code in (500, 503):
 326                         count += 1
 327                         if count <= retries:
 328                             continue
 329                     raise
 330
 331             content_html = more['content_html']
 332             if not content_html.strip():
 333                 # Some webpages show a "Load more" button but they don't
 334                 # have more videos
 335                 break
 336             more_widget_html = more['load_more_widget_html']
 337
 338
 339 class YoutubePlaylistBaseInfoExtractor(YoutubeEntryListBaseInfoExtractor):
 340     def _process_page(self, content):
 341         for video_id, video_title in self.extract_videos_from_page(content):
 342             yield self.url_result(video_id, 'Youtube', video_id, video_title)
 343
 344     def extract_videos_from_page_impl(self, video_re, page, ids_in_page, titles_in_page):
 345         for mobj in re.finditer(video_re, page):
 346             # The link with index 0 is not the first video of the playlist (not sure if still actual)
 347             if 'index' in mobj.groupdict() and mobj.group('id') == '0':
 348                 continue
 349             video_id = mobj.group('id')
 350             video_title = unescapeHTML(
 351                 mobj.group('title')) if 'title' in mobj.groupdict() else None
 352             if video_title:
 353                 video_title = video_title.strip()
 354             if video_title == '► Play all':
 355                 video_title = None
 356             try:
 357                 idx = ids_in_page.index(video_id)
 358                 if video_title and not titles_in_page[idx]:
 359                     titles_in_page[idx] = video_title
 360             except ValueError:
 361                 ids_in_page.append(video_id)
 362                 titles_in_page.append(video_title)
 363
 364     def extract_videos_from_page(self, page):
 365         ids_in_page = []
 366         titles_in_page = []
 367         self.extract_videos_from_page_impl(
 368             self._VIDEO_RE, page, ids_in_page, titles_in_page)
 369         return zip(ids_in_page, titles_in_page)
 370
 371
 372 class YoutubePlaylistsBaseInfoExtractor(YoutubeEntryListBaseInfoExtractor):
 373     def _process_page(self, content):
 374         for playlist_id in orderedSet(re.findall(
 375                 r'<h3[^>]+class="[^"]*yt-lockup-title[^"]*"[^>]*><a[^>]+href="/?playlist\?list=([0-9A-Za-z-_]{10,})"',
 376                 content)):
 377             yield self.url_result(
 378                 'https://www.youtube.com/playlist?list=%s' % playlist_id, 'YoutubePlaylist')
 379
 380     def _real_extract(self, url):
 381         playlist_id = self._match_id(url)
 382         webpage = self._download_webpage(url, playlist_id)
 383         title = self._og_search_title(webpage, fatal=False)
 384         return self.playlist_result(self._entries(webpage, playlist_id), playlist_id, title)
 385
 386
 387 class YoutubeIE(YoutubeBaseInfoExtractor):
 388     IE_DESC = 'YouTube.com'
 389     _VALID_URL = r"""(?x)^
 390                      (
 391                          (?:https?://|//)                                    # http(s):// or protocol-independent URL
 392                          (?:(?:(?:(?:\w+\.)?[yY][oO][uU][tT][uU][bB][eE](?:-nocookie|kids)?\.com/|
 393                             (?:www\.)?deturl\.com/www\.youtube\.com/|
 394                             (?:www\.)?pwnyoutube\.com/|
 395                             (?:www\.)?hooktube\.com/|
 396                             (?:www\.)?yourepeat\.com/|
 397                             tube\.majestyc\.net/|
 398                             # Invidious instances taken from https://github.com/omarroth/invidious/wiki/Invidious-Instances
 399                             (?:(?:www|dev)\.)?invidio\.us/|
 400                             (?:(?:www|no)\.)?invidiou\.sh/|
 401                             (?:(?:www|fi|de)\.)?invidious\.snopyta\.org/|
 402                             (?:www\.)?invidious\.kabi\.tk/|
 403                             (?:www\.)?invidious\.13ad\.de/|
 404                             (?:www\.)?invidious\.mastodon\.host/|
 405                             (?:www\.)?invidious\.nixnet\.xyz/|
 406                             (?:www\.)?invidious\.drycat\.fr/|
 407                             (?:www\.)?tube\.poal\.co/|
 408                             (?:www\.)?vid\.wxzm\.sx/|
 409                             (?:www\.)?yewtu\.be/|
 410                             (?:www\.)?yt\.elukerio\.org/|
 411                             (?:www\.)?yt\.lelux\.fi/|
 412                             (?:www\.)?invidious\.ggc-project\.de/|
 413                             (?:www\.)?yt\.maisputain\.ovh/|
 414                             (?:www\.)?invidious\.13ad\.de/|
 415                             (?:www\.)?invidious\.toot\.koeln/|
 416                             (?:www\.)?invidious\.fdn\.fr/|
 417                             (?:www\.)?watch\.nettohikari\.com/|
 418                             (?:www\.)?kgg2m7yk5aybusll\.onion/|
 419                             (?:www\.)?qklhadlycap4cnod\.onion/|
 420                             (?:www\.)?axqzx4s6s54s32yentfqojs3x5i7faxza6xo3ehd4bzzsg2ii4fv2iid\.onion/|
 421                             (?:www\.)?c7hqkpkpemu6e7emz5b4vyz7idjgdvgaaa3dyimmeojqbgpea3xqjoid\.onion/|
 422                             (?:www\.)?fz253lmuao3strwbfbmx46yu7acac2jz27iwtorgmbqlkurlclmancad\.onion/|
 423                             (?:www\.)?invidious\.l4qlywnpwqsluw65ts7md3khrivpirse744un3x7mlskqauz5pyuzgqd\.onion/|
 424                             (?:www\.)?owxfohz4kjyv25fvlqilyxast7inivgiktls3th44jhk3ej3i7ya\.b32\.i2p/|
 425                             (?:www\.)?4l2dgddgsrkf2ous66i6seeyi6etzfgrue332grh2n7madpwopotugyd\.onion/|
 426                             youtube\.googleapis\.com/)                        # the various hostnames, with wildcard subdomains
 427                          (?:.*?\#/)?                                          # handle anchor (#/) redirect urls
 428                          (?:                                                  # the various things that can precede the ID:
 429                              (?:(?:v|embed|e)/(?!videoseries))                # v/ or embed/ or e/
 430                              |(?:                                             # or the v= param in all its forms
 431                                  (?:(?:watch|movie)(?:_popup)?(?:\.php)?/?)?  # preceding watch(_popup|.php) or nothing (like /?v=xxxx)
 432                                  (?:\?|\#!?)                                  # the params delimiter ? or # or #!
 433                                  (?:.*?[&;])??                                # any other preceding param (like /?s=tuff&v=xxxx or ?s=tuff&amp;v=V36LpHqtcDY)
 434                                  v=
 435                              )
 436                          ))
 437                          |(?:
 438                             youtu\.be|                                        # just youtu.be/xxxx
 439                             vid\.plus|                                        # or vid.plus/xxxx
 440                             zwearz\.com/watch|                                # or zwearz.com/watch/xxxx
 441                          )/
 442                          |(?:www\.)?cleanvideosearch\.com/media/action/yt/watch\?videoId=
 443                          )
 444                      )?                                                       # all until now is optional -> you can pass the naked ID
 445                      ([0-9A-Za-z_-]{11})                                      # here is it! the YouTube video ID
 446                      (?!.*?\blist=
 447                         (?:
 448                             %(playlist_id)s|                                  # combined list/video URLs are handled by the playlist IE
 449                             WL                                                # WL are handled by the watch later IE
 450                         )
 451                      )
 452                      (?(1).+)?                                                # if we found the ID, everything can follow
 453                      $""" % {'playlist_id': YoutubeBaseInfoExtractor._PLAYLIST_ID_RE}
 454     _NEXT_URL_RE = r'[\?&]next_url=([^&]+)'
 455     _PLAYER_INFO_RE = (
 456         r'/(?P<id>[a-zA-Z0-9_-]{8,})/player_ias\.vflset(?:/[a-zA-Z]{2,3}_[a-zA-Z]{2,3})?/base\.(?P<ext>[a-z]+)$',
 457         r'\b(?P<id>vfl[a-zA-Z0-9_-]+)\b.*?\.(?P<ext>[a-z]+)$',
 458     )
 459     _formats = {
 460         '5': {'ext': 'flv', 'width': 400, 'height': 240, 'acodec': 'mp3', 'abr': 64, 'vcodec': 'h263'},
 461         '6': {'ext': 'flv', 'width': 450, 'height': 270, 'acodec': 'mp3', 'abr': 64, 'vcodec': 'h263'},
 462         '13': {'ext': '3gp', 'acodec': 'aac', 'vcodec': 'mp4v'},
 463         '17': {'ext': '3gp', 'width': 176, 'height': 144, 'acodec': 'aac', 'abr': 24, 'vcodec': 'mp4v'},
 464         '18': {'ext': 'mp4', 'width': 640, 'height': 360, 'acodec': 'aac', 'abr': 96, 'vcodec': 'h264'},
 465         '22': {'ext': 'mp4', 'width': 1280, 'height': 720, 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264'},
 466         '34': {'ext': 'flv', 'width': 640, 'height': 360, 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264'},
 467         '35': {'ext': 'flv', 'width': 854, 'height': 480, 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264'},
 468         # itag 36 videos are either 320x180 (BaW_jenozKc) or 320x240 (__2ABJjxzNo), abr varies as well
 469         '36': {'ext': '3gp', 'width': 320, 'acodec': 'aac', 'vcodec': 'mp4v'},
 470         '37': {'ext': 'mp4', 'width': 1920, 'height': 1080, 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264'},
 471         '38': {'ext': 'mp4', 'width': 4096, 'height': 3072, 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264'},
 472         '43': {'ext': 'webm', 'width': 640, 'height': 360, 'acodec': 'vorbis', 'abr': 128, 'vcodec': 'vp8'},
 473         '44': {'ext': 'webm', 'width': 854, 'height': 480, 'acodec': 'vorbis', 'abr': 128, 'vcodec': 'vp8'},
 474         '45': {'ext': 'webm', 'width': 1280, 'height': 720, 'acodec': 'vorbis', 'abr': 192, 'vcodec': 'vp8'},
 475         '46': {'ext': 'webm', 'width': 1920, 'height': 1080, 'acodec': 'vorbis', 'abr': 192, 'vcodec': 'vp8'},
 476         '59': {'ext': 'mp4', 'width': 854, 'height': 480, 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264'},
 477         '78': {'ext': 'mp4', 'width': 854, 'height': 480, 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264'},
 478
 479
 480         # 3D videos
 481         '82': {'ext': 'mp4', 'height': 360, 'format_note': '3D', 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264', 'preference': -20},
 482         '83': {'ext': 'mp4', 'height': 480, 'format_note': '3D', 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264', 'preference': -20},
 483         '84': {'ext': 'mp4', 'height': 720, 'format_note': '3D', 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264', 'preference': -20},
 484         '85': {'ext': 'mp4', 'height': 1080, 'format_note': '3D', 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264', 'preference': -20},
 485         '100': {'ext': 'webm', 'height': 360, 'format_note': '3D', 'acodec': 'vorbis', 'abr': 128, 'vcodec': 'vp8', 'preference': -20},
 486         '101': {'ext': 'webm', 'height': 480, 'format_note': '3D', 'acodec': 'vorbis', 'abr': 192, 'vcodec': 'vp8', 'preference': -20},
 487         '102': {'ext': 'webm', 'height': 720, 'format_note': '3D', 'acodec': 'vorbis', 'abr': 192, 'vcodec': 'vp8', 'preference': -20},
 488
 489         # Apple HTTP Live Streaming
 490         '91': {'ext': 'mp4', 'height': 144, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 48, 'vcodec': 'h264', 'preference': -10},
 491         '92': {'ext': 'mp4', 'height': 240, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 48, 'vcodec': 'h264', 'preference': -10},
 492         '93': {'ext': 'mp4', 'height': 360, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264', 'preference': -10},
 493         '94': {'ext': 'mp4', 'height': 480, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264', 'preference': -10},
 494         '95': {'ext': 'mp4', 'height': 720, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 256, 'vcodec': 'h264', 'preference': -10},
 495         '96': {'ext': 'mp4', 'height': 1080, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 256, 'vcodec': 'h264', 'preference': -10},
 496         '132': {'ext': 'mp4', 'height': 240, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 48, 'vcodec': 'h264', 'preference': -10},
 497         '151': {'ext': 'mp4', 'height': 72, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 24, 'vcodec': 'h264', 'preference': -10},
 498
 499         # DASH mp4 video
 500         '133': {'ext': 'mp4', 'height': 240, 'format_note': 'DASH video', 'vcodec': 'h264'},
 501         '134': {'ext': 'mp4', 'height': 360, 'format_note': 'DASH video', 'vcodec': 'h264'},
 502         '135': {'ext': 'mp4', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'h264'},
 503         '136': {'ext': 'mp4', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'h264'},
 504         '137': {'ext': 'mp4', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'h264'},
 505         '138': {'ext': 'mp4', 'format_note': 'DASH video', 'vcodec': 'h264'},  # Height can vary (https://github.com/ytdl-org/youtube-dl/issues/4559)
 506         '160': {'ext': 'mp4', 'height': 144, 'format_note': 'DASH video', 'vcodec': 'h264'},
 507         '212': {'ext': 'mp4', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'h264'},
 508         '264': {'ext': 'mp4', 'height': 1440, 'format_note': 'DASH video', 'vcodec': 'h264'},
 509         '298': {'ext': 'mp4', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'h264', 'fps': 60},
 510         '299': {'ext': 'mp4', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'h264', 'fps': 60},
 511         '266': {'ext': 'mp4', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'h264'},
 512
 513         # Dash mp4 audio
 514         '139': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'abr': 48, 'container': 'm4a_dash'},
 515         '140': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'abr': 128, 'container': 'm4a_dash'},
 516         '141': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'abr': 256, 'container': 'm4a_dash'},
 517         '256': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'container': 'm4a_dash'},
 518         '258': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'container': 'm4a_dash'},
 519         '325': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'dtse', 'container': 'm4a_dash'},
 520         '328': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'ec-3', 'container': 'm4a_dash'},
 521
 522         # Dash webm
 523         '167': {'ext': 'webm', 'height': 360, 'width': 640, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
 524         '168': {'ext': 'webm', 'height': 480, 'width': 854, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
 525         '169': {'ext': 'webm', 'height': 720, 'width': 1280, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
 526         '170': {'ext': 'webm', 'height': 1080, 'width': 1920, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
 527         '218': {'ext': 'webm', 'height': 480, 'width': 854, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
 528         '219': {'ext': 'webm', 'height': 480, 'width': 854, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
 529         '278': {'ext': 'webm', 'height': 144, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp9'},
 530         '242': {'ext': 'webm', 'height': 240, 'format_note': 'DASH video', 'vcodec': 'vp9'},
 531         '243': {'ext': 'webm', 'height': 360, 'format_note': 'DASH video', 'vcodec': 'vp9'},
 532         '244': {'ext': 'webm', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'vp9'},
 533         '245': {'ext': 'webm', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'vp9'},
 534         '246': {'ext': 'webm', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'vp9'},
 535         '247': {'ext': 'webm', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'vp9'},
 536         '248': {'ext': 'webm', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'vp9'},
 537         '271': {'ext': 'webm', 'height': 1440, 'format_note': 'DASH video', 'vcodec': 'vp9'},
 538         # itag 272 videos are either 3840x2160 (e.g. RtoitU2A-3E) or 7680x4320 (sLprVF6d7Ug)
 539         '272': {'ext': 'webm', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'vp9'},
 540         '302': {'ext': 'webm', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'vp9', 'fps': 60},
 541         '303': {'ext': 'webm', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'vp9', 'fps': 60},
 542         '308': {'ext': 'webm', 'height': 1440, 'format_note': 'DASH video', 'vcodec': 'vp9', 'fps': 60},
 543         '313': {'ext': 'webm', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'vp9'},
 544         '315': {'ext': 'webm', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'vp9', 'fps': 60},
 545
 546         # Dash webm audio
 547         '171': {'ext': 'webm', 'acodec': 'vorbis', 'format_note': 'DASH audio', 'abr': 128},
 548         '172': {'ext': 'webm', 'acodec': 'vorbis', 'format_note': 'DASH audio', 'abr': 256},
 549
 550         # Dash webm audio with opus inside
 551         '249': {'ext': 'webm', 'format_note': 'DASH audio', 'acodec': 'opus', 'abr': 50},
 552         '250': {'ext': 'webm', 'format_note': 'DASH audio', 'acodec': 'opus', 'abr': 70},
 553         '251': {'ext': 'webm', 'format_note': 'DASH audio', 'acodec': 'opus', 'abr': 160},
 554
 555         # RTMP (unnamed)
 556         '_rtmp': {'protocol': 'rtmp'},
 557
 558         # av01 video only formats sometimes served with "unknown" codecs
 559         '394': {'acodec': 'none', 'vcodec': 'av01.0.05M.08'},
 560         '395': {'acodec': 'none', 'vcodec': 'av01.0.05M.08'},
 561         '396': {'acodec': 'none', 'vcodec': 'av01.0.05M.08'},
 562         '397': {'acodec': 'none', 'vcodec': 'av01.0.05M.08'},
 563     }
 564     _SUBTITLE_FORMATS = ('json3', 'srv1', 'srv2', 'srv3', 'ttml', 'vtt')
 565
 566     _GEO_BYPASS = False
 567
 568     IE_NAME = 'youtube'
 569     _TESTS = [
 570         {
 571             'url': 'https://www.youtube.com/watch?v=BaW_jenozKc&t=1s&end=9',
 572             'info_dict': {
 573                 'id': 'BaW_jenozKc',
 574                 'ext': 'mp4',
 575                 'title': 'youtube-dl test video "\'/\\ä↭𝕐',
 576                 'uploader': 'Philipp Hagemeister',
 577                 'uploader_id': 'phihag',
 578                 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/phihag',
 579                 'channel_id': 'UCLqxVugv74EIW3VWh2NOa3Q',
 580                 'channel_url': r're:https?://(?:www\.)?youtube\.com/channel/UCLqxVugv74EIW3VWh2NOa3Q',
 581                 'upload_date': '20121002',
 582                 'description': 'test chars:  "\'/\\ä↭𝕐\ntest URL: https://github.com/rg3/youtube-dl/issues/1892\n\nThis is a test video for youtube-dl.\n\nFor more information, contact phihag@phihag.de .',
 583                 'categories': ['Science & Technology'],
 584                 'tags': ['youtube-dl'],
 585                 'duration': 10,
 586                 'view_count': int,
 587                 'like_count': int,
 588                 'dislike_count': int,
 589                 'start_time': 1,
 590                 'end_time': 9,
 591             }
 592         },
 593         {
 594             'url': '//www.YouTube.com/watch?v=yZIXLfi8CZQ',
 595             'note': 'Embed-only video (#1746)',
 596             'info_dict': {
 597                 'id': 'yZIXLfi8CZQ',
 598                 'ext': 'mp4',
 599                 'upload_date': '20120608',
 600                 'title': 'Principal Sexually Assaults A Teacher - Episode 117 - 8th June 2012',
 601                 'description': 'md5:09b78bd971f1e3e289601dfba15ca4f7',
 602                 'uploader': 'SET India',
 603                 'uploader_id': 'setindia',
 604                 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/setindia',
 605                 'age_limit': 18,
 606             }
 607         },
 608         {
 609             'url': 'https://www.youtube.com/watch?v=BaW_jenozKc&v=UxxajLWwzqY',
 610             'note': 'Use the first video ID in the URL',
 611             'info_dict': {
 612                 'id': 'BaW_jenozKc',
 613                 'ext': 'mp4',
 614                 'title': 'youtube-dl test video "\'/\\ä↭𝕐',
 615                 'uploader': 'Philipp Hagemeister',
 616                 'uploader_id': 'phihag',
 617                 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/phihag',
 618                 'upload_date': '20121002',
 619                 'description': 'test chars:  "\'/\\ä↭𝕐\ntest URL: https://github.com/rg3/youtube-dl/issues/1892\n\nThis is a test video for youtube-dl.\n\nFor more information, contact phihag@phihag.de .',
 620                 'categories': ['Science & Technology'],
 621                 'tags': ['youtube-dl'],
 622                 'duration': 10,
 623                 'view_count': int,
 624                 'like_count': int,
 625                 'dislike_count': int,
 626             },
 627             'params': {
 628                 'skip_download': True,
 629             },
 630         },
 631         {
 632             'url': 'https://www.youtube.com/watch?v=a9LDPn-MO4I',
 633             'note': '256k DASH audio (format 141) via DASH manifest',
 634             'info_dict': {
 635                 'id': 'a9LDPn-MO4I',
 636                 'ext': 'm4a',
 637                 'upload_date': '20121002',
 638                 'uploader_id': '8KVIDEO',
 639                 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/8KVIDEO',
 640                 'description': '',
 641                 'uploader': '8KVIDEO',
 642                 'title': 'UHDTV TEST 8K VIDEO.mp4'
 643             },
 644             'params': {
 645                 'youtube_include_dash_manifest': True,
 646                 'format': '141',
 647             },
 648             'skip': 'format 141 not served anymore',
 649         },
 650         # Controversy video
 651         {
 652             'url': 'https://www.youtube.com/watch?v=T4XJQO3qol8',
 653             'info_dict': {
 654                 'id': 'T4XJQO3qol8',
 655                 'ext': 'mp4',
 656                 'duration': 219,
 657                 'upload_date': '20100909',
 658                 'uploader': 'Amazing Atheist',
 659                 'uploader_id': 'TheAmazingAtheist',
 660                 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/TheAmazingAtheist',
 661                 'title': 'Burning Everyone\'s Koran',
 662                 'description': 'SUBSCRIBE: http://www.youtube.com/saturninefilms\n\nEven Obama has taken a stand against freedom on this issue: http://www.huffingtonpost.com/2010/09/09/obama-gma-interview-quran_n_710282.html',
 663             }
 664         },
 665         # Normal age-gate video (embed allowed)
 666         {
 667             'url': 'https://youtube.com/watch?v=HtVdAasjOgU',
 668             'info_dict': {
 669                 'id': 'HtVdAasjOgU',
 670                 'ext': 'mp4',
 671                 'title': 'The Witcher 3: Wild Hunt - The Sword Of Destiny Trailer',
 672                 'description': r're:(?s).{100,}About the Game\n.*?The Witcher 3: Wild Hunt.{100,}',
 673                 'duration': 142,
 674                 'uploader': 'The Witcher',
 675                 'uploader_id': 'WitcherGame',
 676                 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/WitcherGame',
 677                 'upload_date': '20140605',
 678                 'age_limit': 18,
 679             },
 680         },
 681         # Olympics (https://github.com/ytdl-org/youtube-dl/issues/4431)
 682         {
 683             'url': 'lqQg6PlCWgI',
 684             'info_dict': {
 685                 'id': 'lqQg6PlCWgI',
 686                 'ext': 'mp4',
 687                 'duration': 6085,
 688                 'upload_date': '20150827',
 689                 'uploader_id': 'olympic',
 690                 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/olympic',
 691                 'description': 'HO09  - Women -  GER-AUS - Hockey - 31 July 2012 - London 2012 Olympic Games',
 692                 'uploader': 'Olympic',
 693                 'title': 'Hockey - Women -  GER-AUS - London 2012 Olympic Games',
 694             },
 695             'params': {
 696                 'skip_download': 'requires avconv',
 697             }
 698         },
 699         # Non-square pixels
 700         {
 701             'url': 'https://www.youtube.com/watch?v=_b-2C3KPAM0',
 702             'info_dict': {
 703                 'id': '_b-2C3KPAM0',
 704                 'ext': 'mp4',
 705                 'stretched_ratio': 16 / 9.,
 706                 'duration': 85,
 707                 'upload_date': '20110310',
 708                 'uploader_id': 'AllenMeow',
 709                 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/AllenMeow',
 710                 'description': 'made by Wacom from Korea | 字幕&加油添醋 by TY\'s Allen | 感謝heylisa00cavey1001同學熱情提供梗及翻譯',
 711                 'uploader': '孫ᄋᄅ',
 712                 'title': '[A-made] 變態妍字幕版 太妍 我就是這樣的人',
 713             },
 714         },
 715         # url_encoded_fmt_stream_map is empty string
 716         {
 717             'url': 'qEJwOuvDf7I',
 718             'info_dict': {
 719                 'id': 'qEJwOuvDf7I',
 720                 'ext': 'webm',
 721                 'title': 'Обсуждение судебной практики по выборам 14 сентября 2014 года в Санкт-Петербурге',
 722                 'description': '',
 723                 'upload_date': '20150404',
 724                 'uploader_id': 'spbelect',
 725                 'uploader': 'Наблюдатели Петербурга',
 726             },
 727             'params': {
 728                 'skip_download': 'requires avconv',
 729             },
 730             'skip': 'This live event has ended.',
 731         },
 732         # Extraction from multiple DASH manifests (https://github.com/ytdl-org/youtube-dl/pull/6097)
 733         {
 734             'url': 'https://www.youtube.com/watch?v=FIl7x6_3R5Y',
 735             'info_dict': {
 736                 'id': 'FIl7x6_3R5Y',
 737                 'ext': 'webm',
 738                 'title': 'md5:7b81415841e02ecd4313668cde88737a',
 739                 'description': 'md5:116377fd2963b81ec4ce64b542173306',
 740                 'duration': 220,
 741                 'upload_date': '20150625',
 742                 'uploader_id': 'dorappi2000',
 743                 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/dorappi2000',
 744                 'uploader': 'dorappi2000',
 745                 'formats': 'mincount:31',
 746             },
 747             'skip': 'not actual anymore',
 748         },
 749         # DASH manifest with segment_list
 750         {
 751             'url': 'https://www.youtube.com/embed/CsmdDsKjzN8',
 752             'md5': '8ce563a1d667b599d21064e982ab9e31',
 753             'info_dict': {
 754                 'id': 'CsmdDsKjzN8',
 755                 'ext': 'mp4',
 756                 'upload_date': '20150501',  # According to '<meta itemprop="datePublished"', but in other places it's 20150510
 757                 'uploader': 'Airtek',
 758                 'description': 'Retransmisión en directo de la XVIII media maratón de Zaragoza.',
 759                 'uploader_id': 'UCzTzUmjXxxacNnL8I3m4LnQ',
 760                 'title': 'Retransmisión XVIII Media maratón Zaragoza 2015',
 761             },
 762             'params': {
 763                 'youtube_include_dash_manifest': True,
 764                 'format': '135',  # bestvideo
 765             },
 766             'skip': 'This live event has ended.',
 767         },
 768         {
 769             # Multifeed videos (multiple cameras), URL is for Main Camera
 770             'url': 'https://www.youtube.com/watch?v=jqWvoWXjCVs',
 771             'info_dict': {
 772                 'id': 'jqWvoWXjCVs',
 773                 'title': 'teamPGP: Rocket League Noob Stream',
 774                 'description': 'md5:dc7872fb300e143831327f1bae3af010',
 775             },
 776             'playlist': [{
 777                 'info_dict': {
 778                     'id': 'jqWvoWXjCVs',
 779                     'ext': 'mp4',
 780                     'title': 'teamPGP: Rocket League Noob Stream (Main Camera)',
 781                     'description': 'md5:dc7872fb300e143831327f1bae3af010',
 782                     'duration': 7335,
 783                     'upload_date': '20150721',
 784                     'uploader': 'Beer Games Beer',
 785                     'uploader_id': 'beergamesbeer',
 786                     'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/beergamesbeer',
 787                     'license': 'Standard YouTube License',
 788                 },
 789             }, {
 790                 'info_dict': {
 791                     'id': '6h8e8xoXJzg',
 792                     'ext': 'mp4',
 793                     'title': 'teamPGP: Rocket League Noob Stream (kreestuh)',
 794                     'description': 'md5:dc7872fb300e143831327f1bae3af010',
 795                     'duration': 7337,
 796                     'upload_date': '20150721',
 797                     'uploader': 'Beer Games Beer',
 798                     'uploader_id': 'beergamesbeer',
 799                     'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/beergamesbeer',
 800                     'license': 'Standard YouTube License',
 801                 },
 802             }, {
 803                 'info_dict': {
 804                     'id': 'PUOgX5z9xZw',
 805                     'ext': 'mp4',
 806                     'title': 'teamPGP: Rocket League Noob Stream (grizzle)',
 807                     'description': 'md5:dc7872fb300e143831327f1bae3af010',
 808                     'duration': 7337,
 809                     'upload_date': '20150721',
 810                     'uploader': 'Beer Games Beer',
 811                     'uploader_id': 'beergamesbeer',
 812                     'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/beergamesbeer',
 813                     'license': 'Standard YouTube License',
 814                 },
 815             }, {
 816                 'info_dict': {
 817                     'id': 'teuwxikvS5k',
 818                     'ext': 'mp4',
 819                     'title': 'teamPGP: Rocket League Noob Stream (zim)',
 820                     'description': 'md5:dc7872fb300e143831327f1bae3af010',
 821                     'duration': 7334,
 822                     'upload_date': '20150721',
 823                     'uploader': 'Beer Games Beer',
 824                     'uploader_id': 'beergamesbeer',
 825                     'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/beergamesbeer',
 826                     'license': 'Standard YouTube License',
 827                 },
 828             }],
 829             'params': {
 830                 'skip_download': True,
 831             },
 832             'skip': 'This video is not available.',
 833         },
 834         {
 835             # Multifeed video with comma in title (see https://github.com/ytdl-org/youtube-dl/issues/8536)
 836             'url': 'https://www.youtube.com/watch?v=gVfLd0zydlo',
 837             'info_dict': {
 838                 'id': 'gVfLd0zydlo',
 839                 'title': 'DevConf.cz 2016 Day 2 Workshops 1 14:00 - 15:30',
 840             },
 841             'playlist_count': 2,
 842             'skip': 'Not multifeed anymore',
 843         },
 844         {
 845             'url': 'https://vid.plus/FlRa-iH7PGw',
 846             'only_matching': True,
 847         },
 848         {
 849             'url': 'https://zwearz.com/watch/9lWxNJF-ufM/electra-woman-dyna-girl-official-trailer-grace-helbig.html',
 850             'only_matching': True,
 851         },
 852         {
 853             # Title with JS-like syntax "};" (see https://github.com/ytdl-org/youtube-dl/issues/7468)
 854             # Also tests cut-off URL expansion in video description (see
 855             # https://github.com/ytdl-org/youtube-dl/issues/1892,
 856             # https://github.com/ytdl-org/youtube-dl/issues/8164)
 857             'url': 'https://www.youtube.com/watch?v=lsguqyKfVQg',
 858             'info_dict': {
 859                 'id': 'lsguqyKfVQg',
 860                 'ext': 'mp4',
 861                 'title': '{dark walk}; Loki/AC/Dishonored; collab w/Elflover21',
 862                 'alt_title': 'Dark Walk - Position Music',
 863                 'description': 'md5:8085699c11dc3f597ce0410b0dcbb34a',
 864                 'duration': 133,
 865                 'upload_date': '20151119',
 866                 'uploader_id': 'IronSoulElf',
 867                 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/IronSoulElf',
 868                 'uploader': 'IronSoulElf',
 869                 'creator': 'Todd Haberman,  Daniel Law Heath and Aaron Kaplan',
 870                 'track': 'Dark Walk - Position Music',
 871                 'artist': 'Todd Haberman,  Daniel Law Heath and Aaron Kaplan',
 872                 'album': 'Position Music - Production Music Vol. 143 - Dark Walk',
 873             },
 874             'params': {
 875                 'skip_download': True,
 876             },
 877         },
 878         {
 879             # Tags with '};' (see https://github.com/ytdl-org/youtube-dl/issues/7468)
 880             'url': 'https://www.youtube.com/watch?v=Ms7iBXnlUO8',
 881             'only_matching': True,
 882         },
 883         {
 884             # Video with yt:stretch=17:0
 885             'url': 'https://www.youtube.com/watch?v=Q39EVAstoRM',
 886             'info_dict': {
 887                 'id': 'Q39EVAstoRM',
 888                 'ext': 'mp4',
 889                 'title': 'Clash Of Clans#14 Dicas De Ataque Para CV 4',
 890                 'description': 'md5:ee18a25c350637c8faff806845bddee9',
 891                 'upload_date': '20151107',
 892                 'uploader_id': 'UCCr7TALkRbo3EtFzETQF1LA',
 893                 'uploader': 'CH GAMER DROID',
 894             },
 895             'params': {
 896                 'skip_download': True,
 897             },
 898             'skip': 'This video does not exist.',
 899         },
 900         {
 901             # Video licensed under Creative Commons
 902             'url': 'https://www.youtube.com/watch?v=M4gD1WSo5mA',
 903             'info_dict': {
 904                 'id': 'M4gD1WSo5mA',
 905                 'ext': 'mp4',
 906                 'title': 'md5:e41008789470fc2533a3252216f1c1d1',
 907                 'description': 'md5:a677553cf0840649b731a3024aeff4cc',
 908                 'duration': 721,
 909                 'upload_date': '20150127',
 910                 'uploader_id': 'BerkmanCenter',
 911                 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/BerkmanCenter',
 912                 'uploader': 'The Berkman Klein Center for Internet & Society',
 913                 'license': 'Creative Commons Attribution license (reuse allowed)',
 914             },
 915             'params': {
 916                 'skip_download': True,
 917             },
 918         },
 919         {
 920             # Channel-like uploader_url
 921             'url': 'https://www.youtube.com/watch?v=eQcmzGIKrzg',
 922             'info_dict': {
 923                 'id': 'eQcmzGIKrzg',
 924                 'ext': 'mp4',
 925                 'title': 'Democratic Socialism and Foreign Policy | Bernie Sanders',
 926                 'description': 'md5:dda0d780d5a6e120758d1711d062a867',
 927                 'duration': 4060,
 928                 'upload_date': '20151119',
 929                 'uploader': 'Bernie Sanders',
 930                 'uploader_id': 'UCH1dpzjCEiGAt8CXkryhkZg',
 931                 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UCH1dpzjCEiGAt8CXkryhkZg',
 932                 'license': 'Creative Commons Attribution license (reuse allowed)',
 933             },
 934             'params': {
 935                 'skip_download': True,
 936             },
 937         },
 938         {
 939             'url': 'https://www.youtube.com/watch?feature=player_embedded&amp;amp;v=V36LpHqtcDY',
 940             'only_matching': True,
 941         },
 942         {
 943             # YouTube Red paid video (https://github.com/ytdl-org/youtube-dl/issues/10059)
 944             'url': 'https://www.youtube.com/watch?v=i1Ko8UG-Tdo',
 945             'only_matching': True,
 946         },
 947         {
 948             # Rental video preview
 949             'url': 'https://www.youtube.com/watch?v=yYr8q0y5Jfg',
 950             'info_dict': {
 951                 'id': 'uGpuVWrhIzE',
 952                 'ext': 'mp4',
 953                 'title': 'Piku - Trailer',
 954                 'description': 'md5:c36bd60c3fd6f1954086c083c72092eb',
 955                 'upload_date': '20150811',
 956                 'uploader': 'FlixMatrix',
 957                 'uploader_id': 'FlixMatrixKaravan',
 958                 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/FlixMatrixKaravan',
 959                 'license': 'Standard YouTube License',
 960             },
 961             'params': {
 962                 'skip_download': True,
 963             },
 964             'skip': 'This video is not available.',
 965         },
 966         {
 967             # YouTube Red video with episode data
 968             'url': 'https://www.youtube.com/watch?v=iqKdEhx-dD4',
 969             'info_dict': {
 970                 'id': 'iqKdEhx-dD4',
 971                 'ext': 'mp4',
 972                 'title': 'Isolation - Mind Field (Ep 1)',
 973                 'description': 'md5:46a29be4ceffa65b92d277b93f463c0f',
 974                 'duration': 2085,
 975                 'upload_date': '20170118',
 976                 'uploader': 'Vsauce',
 977                 'uploader_id': 'Vsauce',
 978                 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/Vsauce',
 979                 'series': 'Mind Field',
 980                 'season_number': 1,
 981                 'episode_number': 1,
 982             },
 983             'params': {
 984                 'skip_download': True,
 985             },
 986             'expected_warnings': [
 987                 'Skipping DASH manifest',
 988             ],
 989         },
 990         {
 991             # The following content has been identified by the YouTube community
 992             # as inappropriate or offensive to some audiences.
 993             'url': 'https://www.youtube.com/watch?v=6SJNVb0GnPI',
 994             'info_dict': {
 995                 'id': '6SJNVb0GnPI',
 996                 'ext': 'mp4',
 997                 'title': 'Race Differences in Intelligence',
 998                 'description': 'md5:5d161533167390427a1f8ee89a1fc6f1',
 999                 'duration': 965,
1000                 'upload_date': '20140124',
1001                 'uploader': 'New Century Foundation',
1002                 'uploader_id': 'UCEJYpZGqgUob0zVVEaLhvVg',
1003                 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UCEJYpZGqgUob0zVVEaLhvVg',
1004             },
1005             'params': {
1006                 'skip_download': True,
1007             },
1008         },
1009         {
1010             # itag 212
1011             'url': '1t24XAntNCY',
1012             'only_matching': True,
1013         },
1014         {
1015             # geo restricted to JP
1016             'url': 'sJL6WA-aGkQ',
1017             'only_matching': True,
1018         },
1019         {
1020             'url': 'https://www.youtube.com/watch?v=MuAGGZNfUkU&list=RDMM',
1021             'only_matching': True,
1022         },
1023         {
1024             'url': 'https://invidio.us/watch?v=BaW_jenozKc',
1025             'only_matching': True,
1026         },
1027         {
1028             # DRM protected
1029             'url': 'https://www.youtube.com/watch?v=s7_qI6_mIXc',
1030             'only_matching': True,
1031         },
1032         {
1033             # Video with unsupported adaptive stream type formats
1034             'url': 'https://www.youtube.com/watch?v=Z4Vy8R84T1U',
1035             'info_dict': {
1036                 'id': 'Z4Vy8R84T1U',
1037                 'ext': 'mp4',
1038                 'title': 'saman SMAN 53 Jakarta(Sancety) opening COFFEE4th at SMAN 53 Jakarta',
1039                 'description': 'md5:d41d8cd98f00b204e9800998ecf8427e',
1040                 'duration': 433,
1041                 'upload_date': '20130923',
1042                 'uploader': 'Amelia Putri Harwita',
1043                 'uploader_id': 'UCpOxM49HJxmC1qCalXyB3_Q',
1044                 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UCpOxM49HJxmC1qCalXyB3_Q',
1045                 'formats': 'maxcount:10',
1046             },
1047             'params': {
1048                 'skip_download': True,
1049                 'youtube_include_dash_manifest': False,
1050             },
1051             'skip': 'not actual anymore',
1052         },
1053         {
1054             # Youtube Music Auto-generated description
1055             'url': 'https://music.youtube.com/watch?v=MgNrAu2pzNs',
1056             'info_dict': {
1057                 'id': 'MgNrAu2pzNs',
1058                 'ext': 'mp4',
1059                 'title': 'Voyeur Girl',
1060                 'description': 'md5:7ae382a65843d6df2685993e90a8628f',
1061                 'upload_date': '20190312',
1062                 'uploader': 'Stephen - Topic',
1063                 'uploader_id': 'UC-pWHpBjdGG69N9mM2auIAA',
1064                 'artist': 'Stephen',
1065                 'track': 'Voyeur Girl',
1066                 'album': 'it\'s too much love to know my dear',
1067                 'release_date': '20190313',
1068                 'release_year': 2019,
1069             },
1070             'params': {
1071                 'skip_download': True,
1072             },
1073         },
1074         {
1075             # Youtube Music Auto-generated description
1076             # Retrieve 'artist' field from 'Artist:' in video description
1077             # when it is present on youtube music video
1078             'url': 'https://www.youtube.com/watch?v=k0jLE7tTwjY',
1079             'info_dict': {
1080                 'id': 'k0jLE7tTwjY',
1081                 'ext': 'mp4',
1082                 'title': 'Latch Feat. Sam Smith',
1083                 'description': 'md5:3cb1e8101a7c85fcba9b4fb41b951335',
1084                 'upload_date': '20150110',
1085                 'uploader': 'Various Artists - Topic',
1086                 'uploader_id': 'UCNkEcmYdjrH4RqtNgh7BZ9w',
1087                 'artist': 'Disclosure',
1088                 'track': 'Latch Feat. Sam Smith',
1089                 'album': 'Latch Featuring Sam Smith',
1090                 'release_date': '20121008',
1091                 'release_year': 2012,
1092             },
1093             'params': {
1094                 'skip_download': True,
1095             },
1096         },
1097         {
1098             # Youtube Music Auto-generated description
1099             # handle multiple artists on youtube music video
1100             'url': 'https://www.youtube.com/watch?v=74qn0eJSjpA',
1101             'info_dict': {
1102                 'id': '74qn0eJSjpA',
1103                 'ext': 'mp4',
1104                 'title': 'Eastside',
1105                 'description': 'md5:290516bb73dcbfab0dcc4efe6c3de5f2',
1106                 'upload_date': '20180710',
1107                 'uploader': 'Benny Blanco - Topic',
1108                 'uploader_id': 'UCzqz_ksRu_WkIzmivMdIS7A',
1109                 'artist': 'benny blanco, Halsey, Khalid',
1110                 'track': 'Eastside',
1111                 'album': 'Eastside',
1112                 'release_date': '20180713',
1113                 'release_year': 2018,
1114             },
1115             'params': {
1116                 'skip_download': True,
1117             },
1118         },
1119         {
1120             # Youtube Music Auto-generated description
1121             # handle youtube music video with release_year and no release_date
1122             'url': 'https://www.youtube.com/watch?v=-hcAI0g-f5M',
1123             'info_dict': {
1124                 'id': '-hcAI0g-f5M',
1125                 'ext': 'mp4',
1126                 'title': 'Put It On Me',
1127                 'description': 'md5:f6422397c07c4c907c6638e1fee380a5',
1128                 'upload_date': '20180426',
1129                 'uploader': 'Matt Maeson - Topic',
1130                 'uploader_id': 'UCnEkIGqtGcQMLk73Kp-Q5LQ',
1131                 'artist': 'Matt Maeson',
1132                 'track': 'Put It On Me',
1133                 'album': 'The Hearse',
1134                 'release_date': None,
1135                 'release_year': 2018,
1136             },
1137             'params': {
1138                 'skip_download': True,
1139             },
1140         },
1141         {
1142             'url': 'https://www.youtubekids.com/watch?v=3b8nCWDgZ6Q',
1143             'only_matching': True,
1144         },
1145         {
1146             # invalid -> valid video id redirection
1147             'url': 'DJztXj2GPfl',
1148             'info_dict': {
1149                 'id': 'DJztXj2GPfk',
1150                 'ext': 'mp4',
1151                 'title': 'Panjabi MC - Mundian To Bach Ke (The Dictator Soundtrack)',
1152                 'description': 'md5:bf577a41da97918e94fa9798d9228825',
1153                 'upload_date': '20090125',
1154                 'uploader': 'Prochorowka',
1155                 'uploader_id': 'Prochorowka',
1156                 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/Prochorowka',
1157                 'artist': 'Panjabi MC',
1158                 'track': 'Beware of the Boys (Mundian to Bach Ke) - Motivo Hi-Lectro Remix',
1159                 'album': 'Beware of the Boys (Mundian To Bach Ke)',
1160             },
1161             'params': {
1162                 'skip_download': True,
1163             },
1164         },
1165         {
1166             # empty description results in an empty string
1167             'url': 'https://www.youtube.com/watch?v=x41yOUIvK2k',
1168             'info_dict': {
1169                 'id': 'x41yOUIvK2k',
1170                 'ext': 'mp4',
1171                 'title': 'IMG 3456',
1172                 'description': '',
1173                 'upload_date': '20170613',
1174                 'uploader_id': 'ElevageOrVert',
1175                 'uploader': 'ElevageOrVert',
1176             },
1177             'params': {
1178                 'skip_download': True,
1179             },
1180         },
1181     ]
1182
1183     def __init__(self, *args, **kwargs):
1184         super(YoutubeIE, self).__init__(*args, **kwargs)
1185         self._player_cache = {}
1186
1187     def report_video_info_webpage_download(self, video_id):
1188         """Report attempt to download video info webpage."""
1189         self.to_screen('%s: Downloading video info webpage' % video_id)
1190
1191     def report_information_extraction(self, video_id):
1192         """Report attempt to extract video information."""
1193         self.to_screen('%s: Extracting video information' % video_id)
1194
1195     def report_unavailable_format(self, video_id, format):
1196         """Report extracted video URL."""
1197         self.to_screen('%s: Format %s not available' % (video_id, format))
1198
1199     def report_rtmp_download(self):
1200         """Indicate the download will use the RTMP protocol."""
1201         self.to_screen('RTMP download detected')
1202
1203     def _signature_cache_id(self, example_sig):
1204         """ Return a string representation of a signature """
1205         return '.'.join(compat_str(len(part)) for part in example_sig.split('.'))
1206
1207     @classmethod
1208     def _extract_player_info(cls, player_url):
1209         for player_re in cls._PLAYER_INFO_RE:
1210             id_m = re.search(player_re, player_url)
1211             if id_m:
1212                 break
1213         else:
1214             raise ExtractorError('Cannot identify player %r' % player_url)
1215         return id_m.group('ext'), id_m.group('id')
1216
1217     def _extract_signature_function(self, video_id, player_url, example_sig):
1218         player_type, player_id = self._extract_player_info(player_url)
1219
1220         # Read from filesystem cache
1221         func_id = '%s_%s_%s' % (
1222             player_type, player_id, self._signature_cache_id(example_sig))
1223         assert os.path.basename(func_id) == func_id
1224
1225         cache_spec = self._downloader.cache.load('youtube-sigfuncs', func_id)
1226         if cache_spec is not None:
1227             return lambda s: ''.join(s[i] for i in cache_spec)
1228
1229         download_note = (
1230             'Downloading player %s' % player_url
1231             if self._downloader.params.get('verbose') else
1232             'Downloading %s player %s' % (player_type, player_id)
1233         )
1234         if player_type == 'js':
1235             code = self._download_webpage(
1236                 player_url, video_id,
1237                 note=download_note,
1238                 errnote='Download of %s failed' % player_url)
1239             res = self._parse_sig_js(code)
1240         elif player_type == 'swf':
1241             urlh = self._request_webpage(
1242                 player_url, video_id,
1243                 note=download_note,
1244                 errnote='Download of %s failed' % player_url)
1245             code = urlh.read()
1246             res = self._parse_sig_swf(code)
1247         else:
1248             assert False, 'Invalid player type %r' % player_type
1249
1250         test_string = ''.join(map(compat_chr, range(len(example_sig))))
1251         cache_res = res(test_string)
1252         cache_spec = [ord(c) for c in cache_res]
1253
1254         self._downloader.cache.store('youtube-sigfuncs', func_id, cache_spec)
1255         return res
1256
1257     def _print_sig_code(self, func, example_sig):
1258         def gen_sig_code(idxs):
1259             def _genslice(start, end, step):
1260                 starts = '' if start == 0 else str(start)
1261                 ends = (':%d' % (end + step)) if end + step >= 0 else ':'
1262                 steps = '' if step == 1 else (':%d' % step)
1263                 return 's[%s%s%s]' % (starts, ends, steps)
1264
1265             step = None
1266             # Quelch pyflakes warnings - start will be set when step is set
1267             start = '(Never used)'
1268             for i, prev in zip(idxs[1:], idxs[:-1]):
1269                 if step is not None:
1270                     if i - prev == step:
1271                         continue
1272                     yield _genslice(start, prev, step)
1273                     step = None
1274                     continue
1275                 if i - prev in [-1, 1]:
1276                     step = i - prev
1277                     start = prev
1278                     continue
1279                 else:
1280                     yield 's[%d]' % prev
1281             if step is None:
1282                 yield 's[%d]' % i
1283             else:
1284                 yield _genslice(start, i, step)
1285
1286         test_string = ''.join(map(compat_chr, range(len(example_sig))))
1287         cache_res = func(test_string)
1288         cache_spec = [ord(c) for c in cache_res]
1289         expr_code = ' + '.join(gen_sig_code(cache_spec))
1290         signature_id_tuple = '(%s)' % (
1291             ', '.join(compat_str(len(p)) for p in example_sig.split('.')))
1292         code = ('if tuple(len(p) for p in s.split(\'.\')) == %s:\n'
1293                 '    return %s\n') % (signature_id_tuple, expr_code)
1294         self.to_screen('Extracted signature function:\n' + code)
1295
1296     def _parse_sig_js(self, jscode):
1297         funcname = self._search_regex(
1298             (r'\b[cs]\s*&&\s*[adf]\.set\([^,]+\s*,\s*encodeURIComponent\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\(',
1299              r'\b[a-zA-Z0-9]+\s*&&\s*[a-zA-Z0-9]+\.set\([^,]+\s*,\s*encodeURIComponent\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\(',
1300              r'(?:\b|[^a-zA-Z0-9$])(?P<sig>[a-zA-Z0-9$]{2})\s*=\s*function\(\s*a\s*\)\s*{\s*a\s*=\s*a\.split\(\s*""\s*\)',
1301              r'(?P<sig>[a-zA-Z0-9$]+)\s*=\s*function\(\s*a\s*\)\s*{\s*a\s*=\s*a\.split\(\s*""\s*\)',
1302              # Obsolete patterns
1303              r'(["\'])signature\1\s*,\s*(?P<sig>[a-zA-Z0-9$]+)\(',
1304              r'\.sig\|\|(?P<sig>[a-zA-Z0-9$]+)\(',
1305              r'yt\.akamaized\.net/\)\s*\|\|\s*.*?\s*[cs]\s*&&\s*[adf]\.set\([^,]+\s*,\s*(?:encodeURIComponent\s*\()?\s*(?P<sig>[a-zA-Z0-9$]+)\(',
1306              r'\b[cs]\s*&&\s*[adf]\.set\([^,]+\s*,\s*(?P<sig>[a-zA-Z0-9$]+)\(',
1307              r'\b[a-zA-Z0-9]+\s*&&\s*[a-zA-Z0-9]+\.set\([^,]+\s*,\s*(?P<sig>[a-zA-Z0-9$]+)\(',
1308              r'\bc\s*&&\s*a\.set\([^,]+\s*,\s*\([^)]*\)\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\(',
1309              r'\bc\s*&&\s*[a-zA-Z0-9]+\.set\([^,]+\s*,\s*\([^)]*\)\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\(',
1310              r'\bc\s*&&\s*[a-zA-Z0-9]+\.set\([^,]+\s*,\s*\([^)]*\)\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\('),
1311             jscode, 'Initial JS player signature function name', group='sig')
1312
1313         jsi = JSInterpreter(jscode)
1314         initial_function = jsi.extract_function(funcname)
1315         return lambda s: initial_function([s])
1316
1317     def _parse_sig_swf(self, file_contents):
1318         swfi = SWFInterpreter(file_contents)
1319         TARGET_CLASSNAME = 'SignatureDecipher'
1320         searched_class = swfi.extract_class(TARGET_CLASSNAME)
1321         initial_function = swfi.extract_function(searched_class, 'decipher')
1322         return lambda s: initial_function([s])
1323
1324     def _decrypt_signature(self, s, video_id, player_url, age_gate=False):
1325         """Turn the encrypted s field into a working signature"""
1326
1327         if player_url is None:
1328             raise ExtractorError('Cannot decrypt signature without player_url')
1329
1330         if player_url.startswith('//'):
1331             player_url = 'https:' + player_url
1332         elif not re.match(r'https?://', player_url):
1333             player_url = compat_urlparse.urljoin(
1334                 'https://www.youtube.com', player_url)
1335         try:
1336             player_id = (player_url, self._signature_cache_id(s))
1337             if player_id not in self._player_cache:
1338                 func = self._extract_signature_function(
1339                     video_id, player_url, s
1340                 )
1341                 self._player_cache[player_id] = func
1342             func = self._player_cache[player_id]
1343             if self._downloader.params.get('youtube_print_sig_code'):
1344                 self._print_sig_code(func, s)
1345             return func(s)
1346         except Exception as e:
1347             tb = traceback.format_exc()
1348             raise ExtractorError(
1349                 'Signature extraction failed: ' + tb, cause=e)
1350
1351     def _get_subtitles(self, video_id, webpage, has_live_chat_replay):
1352         try:
1353             subs_doc = self._download_xml(
1354                 'https://video.google.com/timedtext?hl=en&type=list&v=%s' % video_id,
1355                 video_id, note=False)
1356         except ExtractorError as err:
1357             self._downloader.report_warning('unable to download video subtitles: %s' % error_to_compat_str(err))
1358             return {}
1359
1360         sub_lang_list = {}
1361         for track in subs_doc.findall('track'):
1362             lang = track.attrib['lang_code']
1363             if lang in sub_lang_list:
1364                 continue
1365             sub_formats = []
1366             for ext in self._SUBTITLE_FORMATS:
1367                 params = compat_urllib_parse_urlencode({
1368                     'lang': lang,
1369                     'v': video_id,
1370                     'fmt': ext,
1371                     'name': track.attrib['name'].encode('utf-8'),
1372                 })
1373                 sub_formats.append({
1374                     'url': 'https://www.youtube.com/api/timedtext?' + params,
1375                     'ext': ext,
1376                 })
1377             sub_lang_list[lang] = sub_formats
1378         if has_live_chat_replay:
1379             sub_lang_list['live_chat'] = [
1380                 {
1381                     'video_id': video_id,
1382                     'ext': 'json',
1383                     'protocol': 'youtube_live_chat_replay',
1384                 },
1385             ]
1386         if not sub_lang_list:
1387             self._downloader.report_warning('video doesn\'t have subtitles')
1388             return {}
1389         return sub_lang_list
1390
1391     def _get_ytplayer_config(self, video_id, webpage):
1392         patterns = (
1393             # User data may contain arbitrary character sequences that may affect
1394             # JSON extraction with regex, e.g. when '};' is contained the second
1395             # regex won't capture the whole JSON. Yet working around by trying more
1396             # concrete regex first keeping in mind proper quoted string handling
1397             # to be implemented in future that will replace this workaround (see
1398             # https://github.com/ytdl-org/youtube-dl/issues/7468,
1399             # https://github.com/ytdl-org/youtube-dl/pull/7599)
1400             r';ytplayer\.config\s*=\s*({.+?});ytplayer',
1401             r';ytplayer\.config\s*=\s*({.+?});',
1402             r'ytInitialPlayerResponse\s*=\s*({.+?});var meta'
1403         )
1404         config = self._search_regex(
1405             patterns, webpage, 'ytplayer.config', default=None)
1406         if config:
1407             return self._parse_json(
1408                 uppercase_escape(config), video_id, fatal=False)
1409
1410     def _get_music_metadata_from_yt_initial(self, yt_initial):
1411         music_metadata = []
1412         key_map = {
1413             'Album': 'album',
1414             'Artist': 'artist',
1415             'Song': 'track'
1416         }
1417         contents = try_get(yt_initial, lambda x: x['contents']['twoColumnWatchNextResults']['results']['results']['contents'])
1418         if type(contents) is list:
1419             for content in contents:
1420                 music_track = {}
1421                 if type(content) is not dict:
1422                     continue
1423                 videoSecondaryInfoRenderer = try_get(content, lambda x: x['videoSecondaryInfoRenderer'])
1424                 if type(videoSecondaryInfoRenderer) is not dict:
1425                     continue
1426                 rows = try_get(videoSecondaryInfoRenderer, lambda x: x['metadataRowContainer']['metadataRowContainerRenderer']['rows'])
1427                 if type(rows) is not list:
1428                     continue
1429                 for row in rows:
1430                     metadataRowRenderer = try_get(row, lambda x: x['metadataRowRenderer'])
1431                     if type(metadataRowRenderer) is not dict:
1432                         continue
1433                     key = try_get(metadataRowRenderer, lambda x: x['title']['simpleText'])
1434                     value = try_get(metadataRowRenderer, lambda x: x['contents'][0]['simpleText']) or \
1435                         try_get(metadataRowRenderer, lambda x: x['contents'][0]['runs'][0]['text'])
1436                     if type(key) is not str or type(value) is not str:
1437                         continue
1438                     if key in key_map:
1439                         if key_map[key] in music_track:
1440                             # we've started on a new track
1441                             music_metadata.append(music_track)
1442                             music_track = {}
1443                         music_track[key_map[key]] = value
1444                 if len(music_track.keys()):
1445                     music_metadata.append(music_track)
1446         return music_metadata
1447
1448     def _get_automatic_captions(self, video_id, webpage):
1449         """We need the webpage for getting the captions url, pass it as an
1450            argument to speed up the process."""
1451         self.to_screen('%s: Looking for automatic captions' % video_id)
1452         player_config = self._get_ytplayer_config(video_id, webpage)
1453         err_msg = 'Couldn\'t find automatic captions for %s' % video_id
1454         if not player_config:
1455             self._downloader.report_warning(err_msg)
1456             return {}
1457         try:
1458             if "args" in player_config and "ttsurl" in player_config["args"]:
1459                 args = player_config['args']
1460                 caption_url = args['ttsurl']
1461                 timestamp = args['timestamp']
1462
1463                 # We get the available subtitles
1464                 list_params = compat_urllib_parse_urlencode({
1465                     'type': 'list',
1466                     'tlangs': 1,
1467                     'asrs': 1,
1468                 })
1469                 list_url = caption_url + '&' + list_params
1470                 caption_list = self._download_xml(list_url, video_id)
1471                 original_lang_node = caption_list.find('track')
1472                 if original_lang_node is None:
1473                     self._downloader.report_warning('Video doesn\'t have automatic captions')
1474                     return {}
1475                 original_lang = original_lang_node.attrib['lang_code']
1476                 caption_kind = original_lang_node.attrib.get('kind', '')
1477
1478                 sub_lang_list = {}
1479                 for lang_node in caption_list.findall('target'):
1480                     sub_lang = lang_node.attrib['lang_code']
1481                     sub_formats = []
1482                     for ext in self._SUBTITLE_FORMATS:
1483                         params = compat_urllib_parse_urlencode({
1484                             'lang': original_lang,
1485                             'tlang': sub_lang,
1486                             'fmt': ext,
1487                             'ts': timestamp,
1488                             'kind': caption_kind,
1489                         })
1490                         sub_formats.append({
1491                             'url': caption_url + '&' + params,
1492                             'ext': ext,
1493                         })
1494                     sub_lang_list[sub_lang] = sub_formats
1495                 return sub_lang_list
1496
1497             def make_captions(sub_url, sub_langs):
1498                 parsed_sub_url = compat_urllib_parse_urlparse(sub_url)
1499                 caption_qs = compat_parse_qs(parsed_sub_url.query)
1500                 captions = {}
1501                 for sub_lang in sub_langs:
1502                     sub_formats = []
1503                     for ext in self._SUBTITLE_FORMATS:
1504                         caption_qs.update({
1505                             'tlang': [sub_lang],
1506                             'fmt': [ext],
1507                         })
1508                         sub_url = compat_urlparse.urlunparse(parsed_sub_url._replace(
1509                             query=compat_urllib_parse_urlencode(caption_qs, True)))
1510                         sub_formats.append({
1511                             'url': sub_url,
1512                             'ext': ext,
1513                         })
1514                     captions[sub_lang] = sub_formats
1515                 return captions
1516
1517             # New captions format as of 22.06.2017
1518             if "args" in player_config:
1519                 player_response = player_config["args"].get('player_response')
1520             else:
1521                 # New player system (ytInitialPlayerResponse) as of October 2020
1522                 player_response = player_config
1523
1524             if player_response:
1525                 if isinstance(player_response, compat_str):
1526                     player_response = self._parse_json(
1527                         player_response, video_id, fatal=False)
1528
1529                 renderer = player_response['captions']['playerCaptionsTracklistRenderer']
1530                 caption_tracks = renderer['captionTracks']
1531                 for caption_track in caption_tracks:
1532                     if 'kind' not in caption_track:
1533                         # not an automatic transcription
1534                         continue
1535                     base_url = caption_track['baseUrl']
1536                     sub_lang_list = []
1537                     for lang in renderer['translationLanguages']:
1538                         lang_code = lang.get('languageCode')
1539                         if lang_code:
1540                             sub_lang_list.append(lang_code)
1541                     return make_captions(base_url, sub_lang_list)
1542
1543                 self._downloader.report_warning("Couldn't find automatic captions for %s" % video_id)
1544                 return {}
1545
1546             if "args" in player_config:
1547                 args = player_config["args"]
1548
1549                 # Some videos don't provide ttsurl but rather caption_tracks and
1550                 # caption_translation_languages (e.g. 20LmZk1hakA)
1551                 # Does not used anymore as of 22.06.2017
1552                 caption_tracks = args['caption_tracks']
1553                 caption_translation_languages = args['caption_translation_languages']
1554                 caption_url = compat_parse_qs(caption_tracks.split(',')[0])['u'][0]
1555                 sub_lang_list = []
1556                 for lang in caption_translation_languages.split(','):
1557                     lang_qs = compat_parse_qs(compat_urllib_parse_unquote_plus(lang))
1558                     sub_lang = lang_qs.get('lc', [None])[0]
1559                     if sub_lang:
1560                         sub_lang_list.append(sub_lang)
1561                 return make_captions(caption_url, sub_lang_list)
1562         # An extractor error can be raise by the download process if there are
1563         # no automatic captions but there are subtitles
1564         except (KeyError, IndexError, ExtractorError):
1565             self._downloader.report_warning(err_msg)
1566             return {}
1567
1568     def _mark_watched(self, video_id, video_info, player_response):
1569         playback_url = url_or_none(try_get(
1570             player_response,
1571             lambda x: x['playbackTracking']['videostatsPlaybackUrl']['baseUrl']) or try_get(
1572             video_info, lambda x: x['videostats_playback_base_url'][0]))
1573         if not playback_url:
1574             return
1575         parsed_playback_url = compat_urlparse.urlparse(playback_url)
1576         qs = compat_urlparse.parse_qs(parsed_playback_url.query)
1577
1578         # cpn generation algorithm is reverse engineered from base.js.
1579         # In fact it works even with dummy cpn.
1580         CPN_ALPHABET = 'abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789-_'
1581         cpn = ''.join((CPN_ALPHABET[random.randint(0, 256) & 63] for _ in range(0, 16)))
1582
1583         qs.update({
1584             'ver': ['2'],
1585             'cpn': [cpn],
1586         })
1587         playback_url = compat_urlparse.urlunparse(
1588             parsed_playback_url._replace(query=compat_urllib_parse_urlencode(qs, True)))
1589
1590         self._download_webpage(
1591             playback_url, video_id, 'Marking watched',
1592             'Unable to mark watched', fatal=False)
1593
1594     @staticmethod
1595     def _extract_urls(webpage):
1596         # Embedded YouTube player
1597         entries = [
1598             unescapeHTML(mobj.group('url'))
1599             for mobj in re.finditer(r'''(?x)
1600             (?:
1601                 <iframe[^>]+?src=|
1602                 data-video-url=|
1603                 <embed[^>]+?src=|
1604                 embedSWF\(?:\s*|
1605                 <object[^>]+data=|
1606                 new\s+SWFObject\(
1607             )
1608             (["\'])
1609                 (?P<url>(?:https?:)?//(?:www\.)?youtube(?:-nocookie)?\.com/
1610                 (?:embed|v|p)/[0-9A-Za-z_-]{11}.*?)
1611             \1''', webpage)]
1612
1613         # lazyYT YouTube embed
1614         entries.extend(list(map(
1615             unescapeHTML,
1616             re.findall(r'class="lazyYT" data-youtube-id="([^"]+)"', webpage))))
1617
1618         # Wordpress "YouTube Video Importer" plugin
1619         matches = re.findall(r'''(?x)<div[^>]+
1620             class=(?P<q1>[\'"])[^\'"]*\byvii_single_video_player\b[^\'"]*(?P=q1)[^>]+
1621             data-video_id=(?P<q2>[\'"])([^\'"]+)(?P=q2)''', webpage)
1622         entries.extend(m[-1] for m in matches)
1623
1624         return entries
1625
1626     @staticmethod
1627     def _extract_url(webpage):
1628         urls = YoutubeIE._extract_urls(webpage)
1629         return urls[0] if urls else None
1630
1631     @classmethod
1632     def extract_id(cls, url):
1633         mobj = re.match(cls._VALID_URL, url, re.VERBOSE)
1634         if mobj is None:
1635             raise ExtractorError('Invalid URL: %s' % url)
1636         video_id = mobj.group(2)
1637         return video_id
1638
1639     def _extract_chapters_from_json(self, webpage, video_id, duration):
1640         if not webpage:
1641             return
1642         initial_data = self._parse_json(
1643             self._search_regex(
1644                 r'window\["ytInitialData"\] = (.+);\n', webpage,
1645                 'player args', default='{}'),
1646             video_id, fatal=False)
1647         if not initial_data or not isinstance(initial_data, dict):
1648             return
1649         chapters_list = try_get(
1650             initial_data,
1651             lambda x: x['playerOverlays']
1652                        ['playerOverlayRenderer']
1653                        ['decoratedPlayerBarRenderer']
1654                        ['decoratedPlayerBarRenderer']
1655                        ['playerBar']
1656                        ['chapteredPlayerBarRenderer']
1657                        ['chapters'],
1658             list)
1659         if not chapters_list:
1660             return
1661
1662         def chapter_time(chapter):
1663             return float_or_none(
1664                 try_get(
1665                     chapter,
1666                     lambda x: x['chapterRenderer']['timeRangeStartMillis'],
1667                     int),
1668                 scale=1000)
1669         chapters = []
1670         for next_num, chapter in enumerate(chapters_list, start=1):
1671             start_time = chapter_time(chapter)
1672             if start_time is None:
1673                 continue
1674             end_time = (chapter_time(chapters_list[next_num])
1675                         if next_num < len(chapters_list) else duration)
1676             if end_time is None:
1677                 continue
1678             title = try_get(
1679                 chapter, lambda x: x['chapterRenderer']['title']['simpleText'],
1680                 compat_str)
1681             chapters.append({
1682                 'start_time': start_time,
1683                 'end_time': end_time,
1684                 'title': title,
1685             })
1686         return chapters
1687
1688     @staticmethod
1689     def _extract_chapters_from_description(description, duration):
1690         if not description:
1691             return None
1692         chapter_lines = re.findall(
1693             r'(?:^|<br\s*/>)([^<]*<a[^>]+onclick=["\']yt\.www\.watch\.player\.seekTo[^>]+>(\d{1,2}:\d{1,2}(?::\d{1,2})?)</a>[^>]*)(?=$|<br\s*/>)',
1694             description)
1695         if not chapter_lines:
1696             return None
1697         chapters = []
1698         for next_num, (chapter_line, time_point) in enumerate(
1699                 chapter_lines, start=1):
1700             start_time = parse_duration(time_point)
1701             if start_time is None:
1702                 continue
1703             if start_time > duration:
1704                 break
1705             end_time = (duration if next_num == len(chapter_lines)
1706                         else parse_duration(chapter_lines[next_num][1]))
1707             if end_time is None:
1708                 continue
1709             if end_time > duration:
1710                 end_time = duration
1711             if start_time > end_time:
1712                 break
1713             chapter_title = re.sub(
1714                 r'<a[^>]+>[^<]+</a>', '', chapter_line).strip(' \t-')
1715             chapter_title = re.sub(r'\s+', ' ', chapter_title)
1716             chapters.append({
1717                 'start_time': start_time,
1718                 'end_time': end_time,
1719                 'title': chapter_title,
1720             })
1721         return chapters
1722
1723     def _extract_chapters(self, webpage, description, video_id, duration):
1724         return (self._extract_chapters_from_json(webpage, video_id, duration)
1725                 or self._extract_chapters_from_description(description, duration))
1726
1727     def _real_extract(self, url):
1728         url, smuggled_data = unsmuggle_url(url, {})
1729
1730         proto = (
1731             'http' if self._downloader.params.get('prefer_insecure', False)
1732             else 'https')
1733
1734         start_time = None
1735         end_time = None
1736         parsed_url = compat_urllib_parse_urlparse(url)
1737         for component in [parsed_url.fragment, parsed_url.query]:
1738             query = compat_parse_qs(component)
1739             if start_time is None and 't' in query:
1740                 start_time = parse_duration(query['t'][0])
1741             if start_time is None and 'start' in query:
1742                 start_time = parse_duration(query['start'][0])
1743             if end_time is None and 'end' in query:
1744                 end_time = parse_duration(query['end'][0])
1745
1746         # Extract original video URL from URL with redirection, like age verification, using next_url parameter
1747         mobj = re.search(self._NEXT_URL_RE, url)
1748         if mobj:
1749             url = proto + '://www.youtube.com/' + compat_urllib_parse_unquote(mobj.group(1)).lstrip('/')
1750         video_id = self.extract_id(url)
1751
1752         # Get video webpage
1753         url = proto + '://www.youtube.com/watch?v=%s&gl=US&hl=en&has_verified=1&bpctr=9999999999' % video_id
1754         video_webpage, urlh = self._download_webpage_handle(url, video_id)
1755
1756         qs = compat_parse_qs(compat_urllib_parse_urlparse(urlh.geturl()).query)
1757         video_id = qs.get('v', [None])[0] or video_id
1758
1759         # Attempt to extract SWF player URL
1760         mobj = re.search(r'swfConfig.*?"(https?:\\/\\/.*?watch.*?-.*?\.swf)"', video_webpage)
1761         if mobj is not None:
1762             player_url = re.sub(r'\\(.)', r'\1', mobj.group(1))
1763         else:
1764             player_url = None
1765
1766         dash_mpds = []
1767
1768         def add_dash_mpd(video_info):
1769             dash_mpd = video_info.get('dashmpd')
1770             if dash_mpd and dash_mpd[0] not in dash_mpds:
1771                 dash_mpds.append(dash_mpd[0])
1772
1773         def add_dash_mpd_pr(pl_response):
1774             dash_mpd = url_or_none(try_get(
1775                 pl_response, lambda x: x['streamingData']['dashManifestUrl'],
1776                 compat_str))
1777             if dash_mpd and dash_mpd not in dash_mpds:
1778                 dash_mpds.append(dash_mpd)
1779
1780         is_live = None
1781         view_count = None
1782
1783         def extract_view_count(v_info):
1784             return int_or_none(try_get(v_info, lambda x: x['view_count'][0]))
1785
1786         def extract_player_response(player_response, video_id):
1787             pl_response = str_or_none(player_response)
1788             if not pl_response:
1789                 return
1790             pl_response = self._parse_json(pl_response, video_id, fatal=False)
1791             if isinstance(pl_response, dict):
1792                 add_dash_mpd_pr(pl_response)
1793                 return pl_response
1794
1795         def extract_embedded_config(embed_webpage, video_id):
1796             embedded_config = self._search_regex(
1797                 r'setConfig\(({.*})\);',
1798                 embed_webpage, 'ytInitialData', default=None)
1799             if embedded_config:
1800                 return embedded_config
1801
1802         player_response = {}
1803
1804         # Get video info
1805         video_info = {}
1806         embed_webpage = None
1807         if (self._og_search_property('restrictions:age', video_webpage, default=None) == '18+'
1808                 or re.search(r'player-age-gate-content">', video_webpage) is not None):
1809             cookie_keys = self._get_cookies('https://www.youtube.com').keys()
1810             age_gate = True
1811             # We simulate the access to the video from www.youtube.com/v/{video_id}
1812             # this can be viewed without login into Youtube
1813             url = proto + '://www.youtube.com/embed/%s' % video_id
1814             embed_webpage = self._download_webpage(url, video_id, 'Downloading embed webpage')
1815             ext = extract_embedded_config(embed_webpage, video_id)
1816             # playabilityStatus = re.search(r'{\\\"status\\\":\\\"(?P<playabilityStatus>[^\"]+)\\\"', ext)
1817             playable_in_embed = re.search(r'{\\\"playableInEmbed\\\":(?P<playableinEmbed>[^\,]+)', ext)
1818             if not playable_in_embed:
1819                 self.to_screen('Could not determine whether playabale in embed for video %s' % video_id)
1820                 playable_in_embed = ''
1821             else:
1822                 playable_in_embed = playable_in_embed.group('playableinEmbed')
1823             # check if video is only playable on youtube in other words not playable in embed - if so it requires auth (cookies)
1824             # if re.search(r'player-unavailable">', embed_webpage) is not None:
1825             if playable_in_embed == 'false':
1826                 '''
1827                 # TODO apply this patch when Support for Python 2.6(!) and above drops
1828                 if ({'VISITOR_INFO1_LIVE', 'HSID', 'SSID', 'SID'} <= cookie_keys
1829                         or {'VISITOR_INFO1_LIVE', '__Secure-3PSID', 'LOGIN_INFO'} <= cookie_keys):
1830                 '''
1831                 if (set(('VISITOR_INFO1_LIVE', 'HSID', 'SSID', 'SID')) <= set(cookie_keys)
1832                         or set(('VISITOR_INFO1_LIVE', '__Secure-3PSID', 'LOGIN_INFO')) <= set(cookie_keys)):
1833                     age_gate = False
1834                     # Try looking directly into the video webpage
1835                     ytplayer_config = self._get_ytplayer_config(video_id, video_webpage)
1836                     if ytplayer_config:
1837                         args = ytplayer_config.get("args")
1838                         if args is not None:
1839                             if args.get('url_encoded_fmt_stream_map') or args.get('hlsvp'):
1840                                 # Convert to the same format returned by compat_parse_qs
1841                                 video_info = dict((k, [v]) for k, v in args.items())
1842                                 add_dash_mpd(video_info)
1843                             # Rental video is not rented but preview is available (e.g.
1844                             # https://www.youtube.com/watch?v=yYr8q0y5Jfg,
1845                             # https://github.com/ytdl-org/youtube-dl/issues/10532)
1846                             if not video_info and args.get('ypc_vid'):
1847                                 return self.url_result(
1848                                     args['ypc_vid'], YoutubeIE.ie_key(), video_id=args['ypc_vid'])
1849                             if args.get('livestream') == '1' or args.get('live_playback') == 1:
1850                                 is_live = True
1851                             if not player_response:
1852                                 player_response = extract_player_response(args.get('player_response'), video_id)
1853                         elif not player_response:
1854                             player_response = ytplayer_config
1855                     if not video_info or self._downloader.params.get('youtube_include_dash_manifest', True):
1856                         add_dash_mpd_pr(player_response)
1857                 else:
1858                     raise ExtractorError('Video is age restricted and only playable on Youtube. Requires cookies!', expected=True)
1859             else:
1860                 data = compat_urllib_parse_urlencode({
1861                     'video_id': video_id,
1862                     'eurl': 'https://youtube.googleapis.com/v/' + video_id,
1863                     'sts': self._search_regex(
1864                         r'"sts"\s*:\s*(\d+)', embed_webpage, 'sts', default=''),
1865                 })
1866                 video_info_url = proto + '://www.youtube.com/get_video_info?' + data
1867                 try:
1868                     video_info_webpage = self._download_webpage(
1869                         video_info_url, video_id,
1870                         note='Refetching age-gated info webpage',
1871                         errnote='unable to download video info webpage')
1872                 except ExtractorError:
1873                     video_info_webpage = None
1874                 if video_info_webpage:
1875                     video_info = compat_parse_qs(video_info_webpage)
1876                     pl_response = video_info.get('player_response', [None])[0]
1877                     player_response = extract_player_response(pl_response, video_id)
1878                     add_dash_mpd(video_info)
1879                     view_count = extract_view_count(video_info)
1880         else:
1881             age_gate = False
1882             # Try looking directly into the video webpage
1883             ytplayer_config = self._get_ytplayer_config(video_id, video_webpage)
1884             args = ytplayer_config.get("args")
1885             if args is not None:
1886                 if args.get('url_encoded_fmt_stream_map') or args.get('hlsvp'):
1887                     # Convert to the same format returned by compat_parse_qs
1888                     video_info = dict((k, [v]) for k, v in args.items())
1889                     add_dash_mpd(video_info)
1890                 # Rental video is not rented but preview is available (e.g.
1891                 # https://www.youtube.com/watch?v=yYr8q0y5Jfg,
1892                 # https://github.com/ytdl-org/youtube-dl/issues/10532)
1893                 if not video_info and args.get('ypc_vid'):
1894                     return self.url_result(
1895                         args['ypc_vid'], YoutubeIE.ie_key(), video_id=args['ypc_vid'])
1896                 if args.get('livestream') == '1' or args.get('live_playback') == 1:
1897                     is_live = True
1898                 if not player_response:
1899                     player_response = extract_player_response(args.get('player_response'), video_id)
1900             elif not player_response:
1901                 player_response = ytplayer_config
1902             if not video_info or self._downloader.params.get('youtube_include_dash_manifest', True):
1903                 add_dash_mpd_pr(player_response)
1904
1905         def extract_unavailable_message():
1906             messages = []
1907             for tag, kind in (('h1', 'message'), ('div', 'submessage')):
1908                 msg = self._html_search_regex(
1909                     r'(?s)<{tag}[^>]+id=["\']unavailable-{kind}["\'][^>]*>(.+?)</{tag}>'.format(tag=tag, kind=kind),
1910                     video_webpage, 'unavailable %s' % kind, default=None)
1911                 if msg:
1912                     messages.append(msg)
1913             if messages:
1914                 return '\n'.join(messages)
1915
1916         if not video_info and not player_response:
1917             unavailable_message = extract_unavailable_message()
1918             if not unavailable_message:
1919                 unavailable_message = 'Unable to extract video data'
1920             raise ExtractorError(
1921                 'YouTube said: %s' % unavailable_message, expected=True, video_id=video_id)
1922
1923         if not isinstance(video_info, dict):
1924             video_info = {}
1925
1926         video_details = try_get(
1927             player_response, lambda x: x['videoDetails'], dict) or {}
1928
1929         microformat = try_get(
1930             player_response, lambda x: x['microformat']['playerMicroformatRenderer'], dict) or {}
1931
1932         video_title = video_info.get('title', [None])[0] or video_details.get('title')
1933         if not video_title:
1934             self._downloader.report_warning('Unable to extract video title')
1935             video_title = '_'
1936
1937         description_original = video_description = get_element_by_id("eow-description", video_webpage)
1938         if video_description:
1939
1940             def replace_url(m):
1941                 redir_url = compat_urlparse.urljoin(url, m.group(1))
1942                 parsed_redir_url = compat_urllib_parse_urlparse(redir_url)
1943                 if re.search(r'^(?:www\.)?(?:youtube(?:-nocookie)?\.com|youtu\.be)$', parsed_redir_url.netloc) and parsed_redir_url.path == '/redirect':
1944                     qs = compat_parse_qs(parsed_redir_url.query)
1945                     q = qs.get('q')
1946                     if q and q[0]:
1947                         return q[0]
1948                 return redir_url
1949
1950             description_original = video_description = re.sub(r'''(?x)
1951                 <a\s+
1952                     (?:[a-zA-Z-]+="[^"]*"\s+)*?
1953                     (?:title|href)="([^"]+)"\s+
1954                     (?:[a-zA-Z-]+="[^"]*"\s+)*?
1955                     class="[^"]*"[^>]*>
1956                 [^<]+\.{3}\s*
1957                 </a>
1958             ''', replace_url, video_description)
1959             video_description = clean_html(video_description)
1960         else:
1961             video_description = video_details.get('shortDescription')
1962             if video_description is None:
1963                 video_description = self._html_search_meta('description', video_webpage)
1964
1965         if not smuggled_data.get('force_singlefeed', False):
1966             if not self._downloader.params.get('noplaylist'):
1967                 multifeed_metadata_list = try_get(
1968                     player_response,
1969                     lambda x: x['multicamera']['playerLegacyMulticameraRenderer']['metadataList'],
1970                     compat_str) or try_get(
1971                     video_info, lambda x: x['multifeed_metadata_list'][0], compat_str)
1972                 if multifeed_metadata_list:
1973                     entries = []
1974                     feed_ids = []
1975                     for feed in multifeed_metadata_list.split(','):
1976                         # Unquote should take place before split on comma (,) since textual
1977                         # fields may contain comma as well (see
1978                         # https://github.com/ytdl-org/youtube-dl/issues/8536)
1979                         feed_data = compat_parse_qs(compat_urllib_parse_unquote_plus(feed))
1980
1981                         def feed_entry(name):
1982                             return try_get(feed_data, lambda x: x[name][0], compat_str)
1983
1984                         feed_id = feed_entry('id')
1985                         if not feed_id:
1986                             continue
1987                         feed_title = feed_entry('title')
1988                         title = video_title
1989                         if feed_title:
1990                             title += ' (%s)' % feed_title
1991                         entries.append({
1992                             '_type': 'url_transparent',
1993                             'ie_key': 'Youtube',
1994                             'url': smuggle_url(
1995                                 '%s://www.youtube.com/watch?v=%s' % (proto, feed_data['id'][0]),
1996                                 {'force_singlefeed': True}),
1997                             'title': title,
1998                         })
1999                         feed_ids.append(feed_id)
2000                     self.to_screen(
2001                         'Downloading multifeed video (%s) - add --no-playlist to just download video %s'
2002                         % (', '.join(feed_ids), video_id))
2003                     return self.playlist_result(entries, video_id, video_title, video_description)
2004             else:
2005                 self.to_screen('Downloading just video %s because of --no-playlist' % video_id)
2006
2007         if view_count is None:
2008             view_count = extract_view_count(video_info)
2009         if view_count is None and video_details:
2010             view_count = int_or_none(video_details.get('viewCount'))
2011         if view_count is None and microformat:
2012             view_count = int_or_none(microformat.get('viewCount'))
2013
2014         if is_live is None:
2015             is_live = bool_or_none(video_details.get('isLive'))
2016
2017         has_live_chat_replay = False
2018         if not is_live:
2019             yt_initial_data = self._get_yt_initial_data(video_id, video_webpage)
2020             try:
2021                 yt_initial_data['contents']['twoColumnWatchNextResults']['conversationBar']['liveChatRenderer']['continuations'][0]['reloadContinuationData']['continuation']
2022                 has_live_chat_replay = True
2023             except (KeyError, IndexError, TypeError):
2024                 pass
2025
2026         # Check for "rental" videos
2027         if 'ypc_video_rental_bar_text' in video_info and 'author' not in video_info:
2028             raise ExtractorError('"rental" videos not supported. See https://github.com/ytdl-org/youtube-dl/issues/359 for more information.', expected=True)
2029
2030         def _extract_filesize(media_url):
2031             return int_or_none(self._search_regex(
2032                 r'\bclen[=/](\d+)', media_url, 'filesize', default=None))
2033
2034         streaming_formats = try_get(player_response, lambda x: x['streamingData']['formats'], list) or []
2035         streaming_formats.extend(try_get(player_response, lambda x: x['streamingData']['adaptiveFormats'], list) or [])
2036
2037         if 'conn' in video_info and video_info['conn'][0].startswith('rtmp'):
2038             self.report_rtmp_download()
2039             formats = [{
2040                 'format_id': '_rtmp',
2041                 'protocol': 'rtmp',
2042                 'url': video_info['conn'][0],
2043                 'player_url': player_url,
2044             }]
2045         elif not is_live and (streaming_formats or len(video_info.get('url_encoded_fmt_stream_map', [''])[0]) >= 1 or len(video_info.get('adaptive_fmts', [''])[0]) >= 1):
2046             encoded_url_map = video_info.get('url_encoded_fmt_stream_map', [''])[0] + ',' + video_info.get('adaptive_fmts', [''])[0]
2047             if 'rtmpe%3Dyes' in encoded_url_map:
2048                 raise ExtractorError('rtmpe downloads are not supported, see https://github.com/ytdl-org/youtube-dl/issues/343 for more information.', expected=True)
2049             formats = []
2050             formats_spec = {}
2051             fmt_list = video_info.get('fmt_list', [''])[0]
2052             if fmt_list:
2053                 for fmt in fmt_list.split(','):
2054                     spec = fmt.split('/')
2055                     if len(spec) > 1:
2056                         width_height = spec[1].split('x')
2057                         if len(width_height) == 2:
2058                             formats_spec[spec[0]] = {
2059                                 'resolution': spec[1],
2060                                 'width': int_or_none(width_height[0]),
2061                                 'height': int_or_none(width_height[1]),
2062                             }
2063             for fmt in streaming_formats:
2064                 itag = str_or_none(fmt.get('itag'))
2065                 if not itag:
2066                     continue
2067                 quality = fmt.get('quality')
2068                 quality_label = fmt.get('qualityLabel') or quality
2069                 formats_spec[itag] = {
2070                     'asr': int_or_none(fmt.get('audioSampleRate')),
2071                     'filesize': int_or_none(fmt.get('contentLength')),
2072                     'format_note': quality_label,
2073                     'fps': int_or_none(fmt.get('fps')),
2074                     'height': int_or_none(fmt.get('height')),
2075                     # bitrate for itag 43 is always 2147483647
2076                     'tbr': float_or_none(fmt.get('averageBitrate') or fmt.get('bitrate'), 1000) if itag != '43' else None,
2077                     'width': int_or_none(fmt.get('width')),
2078                 }
2079
2080             for fmt in streaming_formats:
2081                 if fmt.get('drmFamilies') or fmt.get('drm_families'):
2082                     continue
2083                 url = url_or_none(fmt.get('url'))
2084
2085                 if not url:
2086                     cipher = fmt.get('cipher') or fmt.get('signatureCipher')
2087                     if not cipher:
2088                         continue
2089                     url_data = compat_parse_qs(cipher)
2090                     url = url_or_none(try_get(url_data, lambda x: x['url'][0], compat_str))
2091                     if not url:
2092                         continue
2093                 else:
2094                     cipher = None
2095                     url_data = compat_parse_qs(compat_urllib_parse_urlparse(url).query)
2096
2097                 stream_type = int_or_none(try_get(url_data, lambda x: x['stream_type'][0]))
2098                 # Unsupported FORMAT_STREAM_TYPE_OTF
2099                 if stream_type == 3:
2100                     continue
2101
2102                 format_id = fmt.get('itag') or url_data['itag'][0]
2103                 if not format_id:
2104                     continue
2105                 format_id = compat_str(format_id)
2106
2107                 if cipher:
2108                     if 's' in url_data or self._downloader.params.get('youtube_include_dash_manifest', True):
2109                         ASSETS_RE = r'(?:"assets":.+?"js":\s*("[^"]+"))|(?:"jsUrl":\s*("[^"]+"))'
2110                         jsplayer_url_json = self._search_regex(
2111                             ASSETS_RE,
2112                             embed_webpage if age_gate else video_webpage,
2113                             'JS player URL (1)', default=None)
2114                         if not jsplayer_url_json and not age_gate:
2115                             # We need the embed website after all
2116                             if embed_webpage is None:
2117                                 embed_url = proto + '://www.youtube.com/embed/%s' % video_id
2118                                 embed_webpage = self._download_webpage(
2119                                     embed_url, video_id, 'Downloading embed webpage')
2120                             jsplayer_url_json = self._search_regex(
2121                                 ASSETS_RE, embed_webpage, 'JS player URL')
2122
2123                         player_url = json.loads(jsplayer_url_json)
2124                         if player_url is None:
2125                             player_url_json = self._search_regex(
2126                                 r'ytplayer\.config.*?"url"\s*:\s*("[^"]+")',
2127                                 video_webpage, 'age gate player URL')
2128                             player_url = json.loads(player_url_json)
2129
2130                     if 'sig' in url_data:
2131                         url += '&signature=' + url_data['sig'][0]
2132                     elif 's' in url_data:
2133                         encrypted_sig = url_data['s'][0]
2134
2135                         if self._downloader.params.get('verbose'):
2136                             if player_url is None:
2137                                 player_desc = 'unknown'
2138                             else:
2139                                 player_type, player_version = self._extract_player_info(player_url)
2140                                 player_desc = '%s player %s' % ('flash' if player_type == 'swf' else 'html5', player_version)
2141                             parts_sizes = self._signature_cache_id(encrypted_sig)
2142                             self.to_screen('{%s} signature length %s, %s' %
2143                                            (format_id, parts_sizes, player_desc))
2144
2145                         signature = self._decrypt_signature(
2146                             encrypted_sig, video_id, player_url, age_gate)
2147                         sp = try_get(url_data, lambda x: x['sp'][0], compat_str) or 'signature'
2148                         url += '&%s=%s' % (sp, signature)
2149                 if 'ratebypass' not in url:
2150                     url += '&ratebypass=yes'
2151
2152                 dct = {
2153                     'format_id': format_id,
2154                     'url': url,
2155                     'player_url': player_url,
2156                 }
2157                 if format_id in self._formats:
2158                     dct.update(self._formats[format_id])
2159                 if format_id in formats_spec:
2160                     dct.update(formats_spec[format_id])
2161
2162                 # Some itags are not included in DASH manifest thus corresponding formats will
2163                 # lack metadata (see https://github.com/ytdl-org/youtube-dl/pull/5993).
2164                 # Trying to extract metadata from url_encoded_fmt_stream_map entry.
2165                 mobj = re.search(r'^(?P<width>\d+)[xX](?P<height>\d+)$', url_data.get('size', [''])[0])
2166                 width, height = (int(mobj.group('width')), int(mobj.group('height'))) if mobj else (None, None)
2167
2168                 if width is None:
2169                     width = int_or_none(fmt.get('width'))
2170                 if height is None:
2171                     height = int_or_none(fmt.get('height'))
2172
2173                 filesize = int_or_none(url_data.get(
2174                     'clen', [None])[0]) or _extract_filesize(url)
2175
2176                 quality = url_data.get('quality', [None])[0] or fmt.get('quality')
2177                 quality_label = url_data.get('quality_label', [None])[0] or fmt.get('qualityLabel')
2178
2179                 tbr = (float_or_none(url_data.get('bitrate', [None])[0], 1000)
2180                        or float_or_none(fmt.get('bitrate'), 1000)) if format_id != '43' else None
2181                 fps = int_or_none(url_data.get('fps', [None])[0]) or int_or_none(fmt.get('fps'))
2182
2183                 more_fields = {
2184                     'filesize': filesize,
2185                     'tbr': tbr,
2186                     'width': width,
2187                     'height': height,
2188                     'fps': fps,
2189                     'format_note': quality_label or quality,
2190                 }
2191                 for key, value in more_fields.items():
2192                     if value:
2193                         dct[key] = value
2194                 type_ = url_data.get('type', [None])[0] or fmt.get('mimeType')
2195                 if type_:
2196                     type_split = type_.split(';')
2197                     kind_ext = type_split[0].split('/')
2198                     if len(kind_ext) == 2:
2199                         kind, _ = kind_ext
2200                         dct['ext'] = mimetype2ext(type_split[0])
2201                         if kind in ('audio', 'video'):
2202                             codecs = None
2203                             for mobj in re.finditer(
2204                                     r'(?P<key>[a-zA-Z_-]+)=(?P<quote>["\']?)(?P<val>.+?)(?P=quote)(?:;|$)', type_):
2205                                 if mobj.group('key') == 'codecs':
2206                                     codecs = mobj.group('val')
2207                                     break
2208                             if codecs:
2209                                 dct.update(parse_codecs(codecs))
2210                 if dct.get('acodec') == 'none' or dct.get('vcodec') == 'none':
2211                     dct['downloader_options'] = {
2212                         # Youtube throttles chunks >~10M
2213                         'http_chunk_size': 10485760,
2214                     }
2215                 formats.append(dct)
2216         else:
2217             manifest_url = (
2218                 url_or_none(try_get(
2219                     player_response,
2220                     lambda x: x['streamingData']['hlsManifestUrl'],
2221                     compat_str))
2222                 or url_or_none(try_get(
2223                     video_info, lambda x: x['hlsvp'][0], compat_str)))
2224             if manifest_url:
2225                 formats = []
2226                 m3u8_formats = self._extract_m3u8_formats(
2227                     manifest_url, video_id, 'mp4', fatal=False)
2228                 for a_format in m3u8_formats:
2229                     itag = self._search_regex(
2230                         r'/itag/(\d+)/', a_format['url'], 'itag', default=None)
2231                     if itag:
2232                         a_format['format_id'] = itag
2233                         if itag in self._formats:
2234                             dct = self._formats[itag].copy()
2235                             dct.update(a_format)
2236                             a_format = dct
2237                     a_format['player_url'] = player_url
2238                     # Accept-Encoding header causes failures in live streams on Youtube and Youtube Gaming
2239                     a_format.setdefault('http_headers', {})['Youtubedl-no-compression'] = 'True'
2240                     if self._downloader.params.get('youtube_include_hls_manifest', True):
2241                         formats.append(a_format)
2242             else:
2243                 error_message = extract_unavailable_message()
2244                 if not error_message:
2245                     error_message = clean_html(try_get(
2246                         player_response, lambda x: x['playabilityStatus']['reason'],
2247                         compat_str))
2248                 if not error_message:
2249                     error_message = clean_html(
2250                         try_get(video_info, lambda x: x['reason'][0], compat_str))
2251                 if error_message:
2252                     raise ExtractorError(error_message, expected=True)
2253                 raise ExtractorError('no conn, hlsvp, hlsManifestUrl or url_encoded_fmt_stream_map information found in video info')
2254
2255         # uploader
2256         video_uploader = try_get(
2257             video_info, lambda x: x['author'][0],
2258             compat_str) or str_or_none(video_details.get('author'))
2259         if video_uploader:
2260             video_uploader = compat_urllib_parse_unquote_plus(video_uploader)
2261         else:
2262             self._downloader.report_warning('unable to extract uploader name')
2263
2264         # uploader_id
2265         video_uploader_id = None
2266         video_uploader_url = None
2267         mobj = re.search(
2268             r'<link itemprop="url" href="(?P<uploader_url>https?://www\.youtube\.com/(?:user|channel)/(?P<uploader_id>[^"]+))">',
2269             video_webpage)
2270         if mobj is not None:
2271             video_uploader_id = mobj.group('uploader_id')
2272             video_uploader_url = mobj.group('uploader_url')
2273         else:
2274             owner_profile_url = url_or_none(microformat.get('ownerProfileUrl'))
2275             if owner_profile_url:
2276                 video_uploader_id = self._search_regex(
2277                     r'(?:user|channel)/([^/]+)', owner_profile_url, 'uploader id',
2278                     default=None)
2279                 video_uploader_url = owner_profile_url
2280
2281         channel_id = (
2282             str_or_none(video_details.get('channelId'))
2283             or self._html_search_meta(
2284                 'channelId', video_webpage, 'channel id', default=None)
2285             or self._search_regex(
2286                 r'data-channel-external-id=(["\'])(?P<id>(?:(?!\1).)+)\1',
2287                 video_webpage, 'channel id', default=None, group='id'))
2288         channel_url = 'http://www.youtube.com/channel/%s' % channel_id if channel_id else None
2289
2290         thumbnails = []
2291         thumbnails_list = try_get(
2292             video_details, lambda x: x['thumbnail']['thumbnails'], list) or []
2293         for t in thumbnails_list:
2294             if not isinstance(t, dict):
2295                 continue
2296             thumbnail_url = url_or_none(t.get('url'))
2297             if not thumbnail_url:
2298                 continue
2299             thumbnails.append({
2300                 'url': thumbnail_url,
2301                 'width': int_or_none(t.get('width')),
2302                 'height': int_or_none(t.get('height')),
2303             })
2304
2305         if not thumbnails:
2306             video_thumbnail = None
2307             # We try first to get a high quality image:
2308             m_thumb = re.search(r'<span itemprop="thumbnail".*?href="(.*?)">',
2309                                 video_webpage, re.DOTALL)
2310             if m_thumb is not None:
2311                 video_thumbnail = m_thumb.group(1)
2312             thumbnail_url = try_get(video_info, lambda x: x['thumbnail_url'][0], compat_str)
2313             if thumbnail_url:
2314                 video_thumbnail = compat_urllib_parse_unquote_plus(thumbnail_url)
2315             if video_thumbnail:
2316                 thumbnails.append({'url': video_thumbnail})
2317
2318         # upload date
2319         upload_date = self._html_search_meta(
2320             'datePublished', video_webpage, 'upload date', default=None)
2321         if not upload_date:
2322             upload_date = self._search_regex(
2323                 [r'(?s)id="eow-date.*?>(.*?)</span>',
2324                  r'(?:id="watch-uploader-info".*?>.*?|["\']simpleText["\']\s*:\s*["\'])(?:Published|Uploaded|Streamed live|Started) on (.+?)[<"\']'],
2325                 video_webpage, 'upload date', default=None)
2326         if not upload_date:
2327             upload_date = microformat.get('publishDate') or microformat.get('uploadDate')
2328         upload_date = unified_strdate(upload_date)
2329
2330         video_license = self._html_search_regex(
2331             r'<h4[^>]+class="title"[^>]*>\s*License\s*</h4>\s*<ul[^>]*>\s*<li>(.+?)</li',
2332             video_webpage, 'license', default=None)
2333
2334         m_music = re.search(
2335             r'''(?x)
2336                 <h4[^>]+class="title"[^>]*>\s*Music\s*</h4>\s*
2337                 <ul[^>]*>\s*
2338                 <li>(?P<title>.+?)
2339                 by (?P<creator>.+?)
2340                 (?:
2341                     \(.+?\)|
2342                     <a[^>]*
2343                         (?:
2344                             \bhref=["\']/red[^>]*>|             # drop possible
2345                             >\s*Listen ad-free with YouTube Red # YouTube Red ad
2346                         )
2347                     .*?
2348                 )?</li
2349             ''',
2350             video_webpage)
2351         if m_music:
2352             video_alt_title = remove_quotes(unescapeHTML(m_music.group('title')))
2353             video_creator = clean_html(m_music.group('creator'))
2354         else:
2355             video_alt_title = video_creator = None
2356
2357         def extract_meta(field):
2358             return self._html_search_regex(
2359                 r'<h4[^>]+class="title"[^>]*>\s*%s\s*</h4>\s*<ul[^>]*>\s*<li>(.+?)</li>\s*' % field,
2360                 video_webpage, field, default=None)
2361
2362         track = extract_meta('Song')
2363         artist = extract_meta('Artist')
2364         album = extract_meta('Album')
2365
2366         # Youtube Music Auto-generated description
2367         release_date = release_year = None
2368         if video_description:
2369             mobj = re.search(r'(?s)Provided to YouTube by [^\n]+\n+(?P<track>[^·]+)·(?P<artist>[^\n]+)\n+(?P<album>[^\n]+)(?:.+?℗\s*(?P<release_year>\d{4})(?!\d))?(?:.+?Released on\s*:\s*(?P<release_date>\d{4}-\d{2}-\d{2}))?(.+?\nArtist\s*:\s*(?P<clean_artist>[^\n]+))?', video_description)
2370             if mobj:
2371                 if not track:
2372                     track = mobj.group('track').strip()
2373                 if not artist:
2374                     artist = mobj.group('clean_artist') or ', '.join(a.strip() for a in mobj.group('artist').split('·'))
2375                 if not album:
2376                     album = mobj.group('album'.strip())
2377                 release_year = mobj.group('release_year')
2378                 release_date = mobj.group('release_date')
2379                 if release_date:
2380                     release_date = release_date.replace('-', '')
2381                     if not release_year:
2382                         release_year = int(release_date[:4])
2383                 if release_year:
2384                     release_year = int(release_year)
2385
2386         yt_initial = self._get_yt_initial_data(video_id, video_webpage)
2387         if yt_initial:
2388             music_metadata = self._get_music_metadata_from_yt_initial(yt_initial)
2389             if len(music_metadata):
2390                 album = music_metadata[0].get('album')
2391                 artist = music_metadata[0].get('artist')
2392                 track = music_metadata[0].get('track')
2393
2394         m_episode = re.search(
2395             r'<div[^>]+id="watch7-headline"[^>]*>\s*<span[^>]*>.*?>(?P<series>[^<]+)</a></b>\s*S(?P<season>\d+)\s*•\s*E(?P<episode>\d+)</span>',
2396             video_webpage)
2397         if m_episode:
2398             series = unescapeHTML(m_episode.group('series'))
2399             season_number = int(m_episode.group('season'))
2400             episode_number = int(m_episode.group('episode'))
2401         else:
2402             series = season_number = episode_number = None
2403
2404         m_cat_container = self._search_regex(
2405             r'(?s)<h4[^>]*>\s*Category\s*</h4>\s*<ul[^>]*>(.*?)</ul>',
2406             video_webpage, 'categories', default=None)
2407         category = None
2408         if m_cat_container:
2409             category = self._html_search_regex(
2410                 r'(?s)<a[^<]+>(.*?)</a>', m_cat_container, 'category',
2411                 default=None)
2412         if not category:
2413             category = try_get(
2414                 microformat, lambda x: x['category'], compat_str)
2415         video_categories = None if category is None else [category]
2416
2417         video_tags = [
2418             unescapeHTML(m.group('content'))
2419             for m in re.finditer(self._meta_regex('og:video:tag'), video_webpage)]
2420         if not video_tags:
2421             video_tags = try_get(video_details, lambda x: x['keywords'], list)
2422
2423         def _extract_count(count_name):
2424             return str_to_int(self._search_regex(
2425                 r'"accessibilityData":\{"label":"([\d,\w]+) %ss"\}'
2426                 % re.escape(count_name),
2427                 video_webpage, count_name, default=None))
2428
2429         like_count = _extract_count('like')
2430         dislike_count = _extract_count('dislike')
2431
2432         if view_count is None:
2433             view_count = str_to_int(self._search_regex(
2434                 r'<[^>]+class=["\']watch-view-count[^>]+>\s*([\d,\s]+)', video_webpage,
2435                 'view count', default=None))
2436
2437         average_rating = (
2438             float_or_none(video_details.get('averageRating'))
2439             or try_get(video_info, lambda x: float_or_none(x['avg_rating'][0])))
2440
2441         # subtitles
2442         video_subtitles = self.extract_subtitles(
2443             video_id, video_webpage, has_live_chat_replay)
2444         automatic_captions = self.extract_automatic_captions(video_id, video_webpage)
2445
2446         video_duration = try_get(
2447             video_info, lambda x: int_or_none(x['length_seconds'][0]))
2448         if not video_duration:
2449             video_duration = int_or_none(video_details.get('lengthSeconds'))
2450         if not video_duration:
2451             video_duration = parse_duration(self._html_search_meta(
2452                 'duration', video_webpage, 'video duration'))
2453
2454         # Get Subscriber Count of channel
2455         subscriber_count = parse_count(self._search_regex(
2456             r'"text":"([\d\.]+\w?) subscribers"',
2457             video_webpage,
2458             'subscriber count',
2459             default=None
2460         ))
2461
2462         # annotations
2463         video_annotations = None
2464         if self._downloader.params.get('writeannotations', False):
2465             xsrf_token = self._search_regex(
2466                 r'([\'"])XSRF_TOKEN\1\s*:\s*([\'"])(?P<xsrf_token>[A-Za-z0-9+/=]+)\2',
2467                 video_webpage, 'xsrf token', group='xsrf_token', fatal=False)
2468             invideo_url = try_get(
2469                 player_response, lambda x: x['annotations'][0]['playerAnnotationsUrlsRenderer']['invideoUrl'], compat_str)
2470             if xsrf_token and invideo_url:
2471                 xsrf_field_name = self._search_regex(
2472                     r'([\'"])XSRF_FIELD_NAME\1\s*:\s*([\'"])(?P<xsrf_field_name>\w+)\2',
2473                     video_webpage, 'xsrf field name',
2474                     group='xsrf_field_name', default='session_token')
2475                 video_annotations = self._download_webpage(
2476                     self._proto_relative_url(invideo_url),
2477                     video_id, note='Downloading annotations',
2478                     errnote='Unable to download video annotations', fatal=False,
2479                     data=urlencode_postdata({xsrf_field_name: xsrf_token}))
2480
2481         chapters = self._extract_chapters(video_webpage, description_original, video_id, video_duration)
2482
2483         # Look for the DASH manifest
2484         if self._downloader.params.get('youtube_include_dash_manifest', True):
2485             dash_mpd_fatal = True
2486             for mpd_url in dash_mpds:
2487                 dash_formats = {}
2488                 try:
2489                     def decrypt_sig(mobj):
2490                         s = mobj.group(1)
2491                         dec_s = self._decrypt_signature(s, video_id, player_url, age_gate)
2492                         return '/signature/%s' % dec_s
2493
2494                     mpd_url = re.sub(r'/s/([a-fA-F0-9\.]+)', decrypt_sig, mpd_url)
2495
2496                     for df in self._extract_mpd_formats(
2497                             mpd_url, video_id, fatal=dash_mpd_fatal,
2498                             formats_dict=self._formats):
2499                         if not df.get('filesize'):
2500                             df['filesize'] = _extract_filesize(df['url'])
2501                         # Do not overwrite DASH format found in some previous DASH manifest
2502                         if df['format_id'] not in dash_formats:
2503                             dash_formats[df['format_id']] = df
2504                         # Additional DASH manifests may end up in HTTP Error 403 therefore
2505                         # allow them to fail without bug report message if we already have
2506                         # some DASH manifest succeeded. This is temporary workaround to reduce
2507                         # burst of bug reports until we figure out the reason and whether it
2508                         # can be fixed at all.
2509                         dash_mpd_fatal = False
2510                 except (ExtractorError, KeyError) as e:
2511                     self.report_warning(
2512                         'Skipping DASH manifest: %r' % e, video_id)
2513                 if dash_formats:
2514                     # Remove the formats we found through non-DASH, they
2515                     # contain less info and it can be wrong, because we use
2516                     # fixed values (for example the resolution). See
2517                     # https://github.com/ytdl-org/youtube-dl/issues/5774 for an
2518                     # example.
2519                     formats = [f for f in formats if f['format_id'] not in dash_formats.keys()]
2520                     formats.extend(dash_formats.values())
2521
2522         # Check for malformed aspect ratio
2523         stretched_m = re.search(
2524             r'<meta\s+property="og:video:tag".*?content="yt:stretch=(?P<w>[0-9]+):(?P<h>[0-9]+)">',
2525             video_webpage)
2526         if stretched_m:
2527             w = float(stretched_m.group('w'))
2528             h = float(stretched_m.group('h'))
2529             # yt:stretch may hold invalid ratio data (e.g. for Q39EVAstoRM ratio is 17:0).
2530             # We will only process correct ratios.
2531             if w > 0 and h > 0:
2532                 ratio = w / h
2533                 for f in formats:
2534                     if f.get('vcodec') != 'none':
2535                         f['stretched_ratio'] = ratio
2536
2537         if not formats:
2538             if 'reason' in video_info:
2539                 if 'The uploader has not made this video available in your country.' in video_info['reason']:
2540                     regions_allowed = self._html_search_meta(
2541                         'regionsAllowed', video_webpage, default=None)
2542                     countries = regions_allowed.split(',') if regions_allowed else None
2543                     self.raise_geo_restricted(
2544                         msg=video_info['reason'][0], countries=countries)
2545                 reason = video_info['reason'][0]
2546                 if 'Invalid parameters' in reason:
2547                     unavailable_message = extract_unavailable_message()
2548                     if unavailable_message:
2549                         reason = unavailable_message
2550                 raise ExtractorError(
2551                     'YouTube said: %s' % reason,
2552                     expected=True, video_id=video_id)
2553             if video_info.get('license_info') or try_get(player_response, lambda x: x['streamingData']['licenseInfos']):
2554                 raise ExtractorError('This video is DRM protected.', expected=True)
2555
2556         self._sort_formats(formats)
2557
2558         self.mark_watched(video_id, video_info, player_response)
2559
2560         return {
2561             'id': video_id,
2562             'uploader': video_uploader,
2563             'uploader_id': video_uploader_id,
2564             'uploader_url': video_uploader_url,
2565             'channel_id': channel_id,
2566             'channel_url': channel_url,
2567             'upload_date': upload_date,
2568             'license': video_license,
2569             'creator': video_creator or artist,
2570             'title': video_title,
2571             'alt_title': video_alt_title or track,
2572             'thumbnails': thumbnails,
2573             'description': video_description,
2574             'categories': video_categories,
2575             'tags': video_tags,
2576             'subtitles': video_subtitles,
2577             'automatic_captions': automatic_captions,
2578             'duration': video_duration,
2579             'age_limit': 18 if age_gate else 0,
2580             'annotations': video_annotations,
2581             'chapters': chapters,
2582             'webpage_url': proto + '://www.youtube.com/watch?v=%s' % video_id,
2583             'view_count': view_count,
2584             'like_count': like_count,
2585             'dislike_count': dislike_count,
2586             'average_rating': average_rating,
2587             'formats': formats,
2588             'is_live': is_live,
2589             'start_time': start_time,
2590             'end_time': end_time,
2591             'series': series,
2592             'season_number': season_number,
2593             'episode_number': episode_number,
2594             'track': track,
2595             'artist': artist,
2596             'album': album,
2597             'release_date': release_date,
2598             'release_year': release_year,
2599             'subscriber_count': subscriber_count,
2600         }
2601
2602
2603 class YoutubePlaylistIE(YoutubePlaylistBaseInfoExtractor):
2604     IE_DESC = 'YouTube.com playlists'
2605     _VALID_URL = r"""(?x)(?:
2606                         (?:https?://)?
2607                         (?:\w+\.)?
2608                         (?:
2609                             (?:
2610                                 youtube(?:kids)?\.com|
2611                                 invidio\.us
2612                             )
2613                             /
2614                             (?:
2615                                (?:course|view_play_list|my_playlists|artist|playlist|watch|embed/(?:videoseries|[0-9A-Za-z_-]{11}))
2616                                \? (?:.*?[&;])*? (?:p|a|list)=
2617                             |  p/
2618                             )|
2619                             youtu\.be/[0-9A-Za-z_-]{11}\?.*?\blist=
2620                         )
2621                         (
2622                             (?:PL|LL|EC|UU|FL|RD|UL|TL|PU|OLAK5uy_)?[0-9A-Za-z-_]{10,}
2623                             # Top tracks, they can also include dots
2624                             |(?:MC)[\w\.]*
2625                         )
2626                         .*
2627                      |
2628                         (%(playlist_id)s)
2629                      )""" % {'playlist_id': YoutubeBaseInfoExtractor._PLAYLIST_ID_RE}
2630     _TEMPLATE_URL = 'https://www.youtube.com/playlist?list=%s'
2631     _VIDEO_RE_TPL = r'href="\s*/watch\?v=%s(?:&amp;(?:[^"]*?index=(?P<index>\d+))?(?:[^>]+>(?P<title>[^<]+))?)?'
2632     _VIDEO_RE = _VIDEO_RE_TPL % r'(?P<id>[0-9A-Za-z_-]{11})'
2633     IE_NAME = 'youtube:playlist'
2634     _YTM_PLAYLIST_PREFIX = 'RDCLAK5uy_'
2635     _YTM_CHANNEL_INFO = {
2636         'uploader': 'Youtube Music',
2637         'uploader_id': 'music',  # or "UC-9-kyTW8ZkZNDHQJ6FgpwQ"
2638         'uploader_url': 'https://www.youtube.com/music'
2639     }
2640     _TESTS = [{
2641         'url': 'https://www.youtube.com/playlist?list=PL4lCao7KL_QFVb7Iudeipvc2BCavECqzc',
2642         'info_dict': {
2643             'uploader_id': 'UCmlqkdCBesrv2Lak1mF_MxA',
2644             'uploader': 'Sergey M.',
2645             'id': 'PL4lCao7KL_QFVb7Iudeipvc2BCavECqzc',
2646             'title': 'youtube-dl public playlist',
2647         },
2648         'playlist_count': 1,
2649     }, {
2650         'url': 'https://www.youtube.com/playlist?list=PL4lCao7KL_QFodcLWhDpGCYnngnHtQ-Xf',
2651         'info_dict': {
2652             'uploader_id': 'UCmlqkdCBesrv2Lak1mF_MxA',
2653             'uploader': 'Sergey M.',
2654             'id': 'PL4lCao7KL_QFodcLWhDpGCYnngnHtQ-Xf',
2655             'title': 'youtube-dl empty playlist',
2656         },
2657         'playlist_count': 0,
2658     }, {
2659         'note': 'Playlist with deleted videos (#651). As a bonus, the video #51 is also twice in this list.',
2660         'url': 'https://www.youtube.com/playlist?list=PLwP_SiAcdui0KVebT0mU9Apz359a4ubsC',
2661         'info_dict': {
2662             'title': '29C3: Not my department',
2663             'id': 'PLwP_SiAcdui0KVebT0mU9Apz359a4ubsC',
2664             'uploader': 'Christiaan008',
2665             'uploader_id': 'ChRiStIaAn008',
2666         },
2667         'playlist_count': 96,
2668     }, {
2669         'note': 'issue #673',
2670         'url': 'PLBB231211A4F62143',
2671         'info_dict': {
2672             'title': '[OLD]Team Fortress 2 (Class-based LP)',
2673             'id': 'PLBB231211A4F62143',
2674             'uploader': 'Wickydoo',
2675             'uploader_id': 'Wickydoo',
2676         },
2677         'playlist_mincount': 26,
2678     }, {
2679         'note': 'Large playlist',
2680         'url': 'https://www.youtube.com/playlist?list=UUBABnxM4Ar9ten8Mdjj1j0Q',
2681         'info_dict': {
2682             'title': 'Uploads from Cauchemar',
2683             'id': 'UUBABnxM4Ar9ten8Mdjj1j0Q',
2684             'uploader': 'Cauchemar',
2685             'uploader_id': 'Cauchemar89',
2686         },
2687         'playlist_mincount': 799,
2688     }, {
2689         'url': 'PLtPgu7CB4gbY9oDN3drwC3cMbJggS7dKl',
2690         'info_dict': {
2691             'title': 'YDL_safe_search',
2692             'id': 'PLtPgu7CB4gbY9oDN3drwC3cMbJggS7dKl',
2693         },
2694         'playlist_count': 2,
2695         'skip': 'This playlist is private',
2696     }, {
2697         'note': 'embedded',
2698         'url': 'https://www.youtube.com/embed/videoseries?list=PL6IaIsEjSbf96XFRuNccS_RuEXwNdsoEu',
2699         'playlist_count': 4,
2700         'info_dict': {
2701             'title': 'JODA15',
2702             'id': 'PL6IaIsEjSbf96XFRuNccS_RuEXwNdsoEu',
2703             'uploader': 'milan',
2704             'uploader_id': 'UCEI1-PVPcYXjB73Hfelbmaw',
2705         }
2706     }, {
2707         'url': 'http://www.youtube.com/embed/_xDOZElKyNU?list=PLsyOSbh5bs16vubvKePAQ1x3PhKavfBIl',
2708         'playlist_mincount': 485,
2709         'info_dict': {
2710             'title': '2018 Chinese New Singles (11/6 updated)',
2711             'id': 'PLsyOSbh5bs16vubvKePAQ1x3PhKavfBIl',
2712             'uploader': 'LBK',
2713             'uploader_id': 'sdragonfang',
2714         }
2715     }, {
2716         'note': 'Embedded SWF player',
2717         'url': 'https://www.youtube.com/p/YN5VISEtHet5D4NEvfTd0zcgFk84NqFZ?hl=en_US&fs=1&rel=0',
2718         'playlist_count': 4,
2719         'info_dict': {
2720             'title': 'JODA7',
2721             'id': 'YN5VISEtHet5D4NEvfTd0zcgFk84NqFZ',
2722         },
2723         'skip': 'This playlist does not exist',
2724     }, {
2725         'note': 'Buggy playlist: the webpage has a "Load more" button but it doesn\'t have more videos',
2726         'url': 'https://www.youtube.com/playlist?list=UUXw-G3eDE9trcvY2sBMM_aA',
2727         'info_dict': {
2728             'title': 'Uploads from Interstellar Movie',
2729             'id': 'UUXw-G3eDE9trcvY2sBMM_aA',
2730             'uploader': 'Interstellar Movie',
2731             'uploader_id': 'InterstellarMovie1',
2732         },
2733         'playlist_mincount': 21,
2734     }, {
2735         # Playlist URL that does not actually serve a playlist
2736         'url': 'https://www.youtube.com/watch?v=FqZTN594JQw&list=PLMYEtVRpaqY00V9W81Cwmzp6N6vZqfUKD4',
2737         'info_dict': {
2738             'id': 'FqZTN594JQw',
2739             'ext': 'webm',
2740             'title': "Smiley's People 01 detective, Adventure Series, Action",
2741             'uploader': 'STREEM',
2742             'uploader_id': 'UCyPhqAZgwYWZfxElWVbVJng',
2743             'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UCyPhqAZgwYWZfxElWVbVJng',
2744             'upload_date': '20150526',
2745             'license': 'Standard YouTube License',
2746             'description': 'md5:507cdcb5a49ac0da37a920ece610be80',
2747             'categories': ['People & Blogs'],
2748             'tags': list,
2749             'view_count': int,
2750             'like_count': int,
2751             'dislike_count': int,
2752         },
2753         'params': {
2754             'skip_download': True,
2755         },
2756         'skip': 'This video is not available.',
2757         'add_ie': [YoutubeIE.ie_key()],
2758     }, {
2759         'url': 'https://youtu.be/yeWKywCrFtk?list=PL2qgrgXsNUG5ig9cat4ohreBjYLAPC0J5',
2760         'info_dict': {
2761             'id': 'yeWKywCrFtk',
2762             'ext': 'mp4',
2763             'title': 'Small Scale Baler and Braiding Rugs',
2764             'uploader': 'Backus-Page House Museum',
2765             'uploader_id': 'backuspagemuseum',
2766             'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/backuspagemuseum',
2767             'upload_date': '20161008',
2768             'description': 'md5:800c0c78d5eb128500bffd4f0b4f2e8a',
2769             'categories': ['Nonprofits & Activism'],
2770             'tags': list,
2771             'like_count': int,
2772             'dislike_count': int,
2773         },
2774         'params': {
2775             'noplaylist': True,
2776             'skip_download': True,
2777         },
2778     }, {
2779         # https://github.com/ytdl-org/youtube-dl/issues/21844
2780         'url': 'https://www.youtube.com/playlist?list=PLzH6n4zXuckpfMu_4Ff8E7Z1behQks5ba',
2781         'info_dict': {
2782             'title': 'Data Analysis with Dr Mike Pound',
2783             'id': 'PLzH6n4zXuckpfMu_4Ff8E7Z1behQks5ba',
2784             'uploader_id': 'Computerphile',
2785             'uploader': 'Computerphile',
2786         },
2787         'playlist_mincount': 11,
2788     }, {
2789         'url': 'https://youtu.be/uWyaPkt-VOI?list=PL9D9FC436B881BA21',
2790         'only_matching': True,
2791     }, {
2792         'url': 'TLGGrESM50VT6acwMjAyMjAxNw',
2793         'only_matching': True,
2794     }, {
2795         # music album playlist
2796         'url': 'OLAK5uy_m4xAFdmMC5rX3Ji3g93pQe3hqLZw_9LhM',
2797         'only_matching': True,
2798     }, {
2799         'url': 'https://invidio.us/playlist?list=PLDIoUOhQQPlXr63I_vwF9GD8sAKh77dWU',
2800         'only_matching': True,
2801     }, {
2802         'url': 'https://www.youtubekids.com/watch?v=Agk7R8I8o5U&list=PUZ6jURNr1WQZCNHF0ao-c0g',
2803         'only_matching': True,
2804     }]
2805
2806     def _real_initialize(self):
2807         self._login()
2808
2809     def extract_videos_from_page(self, page):
2810         ids_in_page = []
2811         titles_in_page = []
2812
2813         for item in re.findall(
2814                 r'(<[^>]*\bdata-video-id\s*=\s*["\'][0-9A-Za-z_-]{11}[^>]+>)', page):
2815             attrs = extract_attributes(item)
2816             video_id = attrs['data-video-id']
2817             video_title = unescapeHTML(attrs.get('data-title'))
2818             if video_title:
2819                 video_title = video_title.strip()
2820             ids_in_page.append(video_id)
2821             titles_in_page.append(video_title)
2822
2823         # Fallback with old _VIDEO_RE
2824         self.extract_videos_from_page_impl(
2825             self._VIDEO_RE, page, ids_in_page, titles_in_page)
2826
2827         # Relaxed fallbacks
2828         self.extract_videos_from_page_impl(
2829             r'href="\s*/watch\?v\s*=\s*(?P<id>[0-9A-Za-z_-]{11})', page,
2830             ids_in_page, titles_in_page)
2831         self.extract_videos_from_page_impl(
2832             r'data-video-ids\s*=\s*["\'](?P<id>[0-9A-Za-z_-]{11})', page,
2833             ids_in_page, titles_in_page)
2834
2835         return zip(ids_in_page, titles_in_page)
2836
2837     def _extract_mix_ids_from_yt_initial(self, yt_initial):
2838         ids = []
2839         playlist_contents = try_get(yt_initial, lambda x: x['contents']['twoColumnWatchNextResults']['playlist']['playlist']['contents'], list)
2840         if playlist_contents:
2841             for item in playlist_contents:
2842                 videoId = try_get(item, lambda x: x['playlistPanelVideoRenderer']['videoId'], compat_str)
2843                 if videoId:
2844                     ids.append(videoId)
2845         return ids
2846
2847     def _extract_mix(self, playlist_id):
2848         # The mixes are generated from a single video
2849         # the id of the playlist is just 'RD' + video_id
2850         ids = []
2851         yt_initial = None
2852         last_id = playlist_id[-11:]
2853         for n in itertools.count(1):
2854             url = 'https://www.youtube.com/watch?v=%s&list=%s' % (last_id, playlist_id)
2855             webpage = self._download_webpage(
2856                 url, playlist_id, 'Downloading page {0} of Youtube mix'.format(n))
2857             new_ids = orderedSet(re.findall(
2858                 r'''(?xs)data-video-username=".*?".*?
2859                            href="/watch\?v=([0-9A-Za-z_-]{11})&amp;[^"]*?list=%s''' % re.escape(playlist_id),
2860                 webpage))
2861
2862             # if no ids in html of page, try using embedded json
2863             if (len(new_ids) == 0):
2864                 yt_initial = self._get_yt_initial_data(playlist_id, webpage)
2865                 if yt_initial:
2866                     new_ids = self._extract_mix_ids_from_yt_initial(yt_initial)
2867
2868             # Fetch new pages until all the videos are repeated, it seems that
2869             # there are always 51 unique videos.
2870             new_ids = [_id for _id in new_ids if _id not in ids]
2871             if not new_ids:
2872                 break
2873             ids.extend(new_ids)
2874             last_id = ids[-1]
2875
2876         url_results = self._ids_to_results(ids)
2877
2878         search_title = lambda class_name: get_element_by_attribute('class', class_name, webpage)
2879         title_span = (
2880             search_title('playlist-title')
2881             or search_title('title long-title')
2882             or search_title('title'))
2883         title = clean_html(title_span)
2884
2885         if not title:
2886             title = try_get(yt_initial, lambda x: x['contents']['twoColumnWatchNextResults']['playlist']['playlist']['title'], compat_str)
2887
2888         return self.playlist_result(url_results, playlist_id, title)
2889
2890     def _extract_playlist(self, playlist_id):
2891         url = self._TEMPLATE_URL % playlist_id
2892         page = self._download_webpage(url, playlist_id)
2893
2894         # the yt-alert-message now has tabindex attribute (see https://github.com/ytdl-org/youtube-dl/issues/11604)
2895         for match in re.findall(r'<div class="yt-alert-message"[^>]*>([^<]+)</div>', page):
2896             match = match.strip()
2897             # Check if the playlist exists or is private
2898             mobj = re.match(r'[^<]*(?:The|This) playlist (?P<reason>does not exist|is private)[^<]*', match)
2899             if mobj:
2900                 reason = mobj.group('reason')
2901                 message = 'This playlist %s' % reason
2902                 if 'private' in reason:
2903                     message += ', use --username or --netrc to access it'
2904                 message += '.'
2905                 raise ExtractorError(message, expected=True)
2906             elif re.match(r'[^<]*Invalid parameters[^<]*', match):
2907                 raise ExtractorError(
2908                     'Invalid parameters. Maybe URL is incorrect.',
2909                     expected=True)
2910             elif re.match(r'[^<]*Choose your language[^<]*', match):
2911                 continue
2912             else:
2913                 self.report_warning('Youtube gives an alert message: ' + match)
2914
2915         playlist_title = self._html_search_regex(
2916             r'(?s)<h1 class="pl-header-title[^"]*"[^>]*>\s*(.*?)\s*</h1>',
2917             page, 'title', default=None)
2918
2919         _UPLOADER_BASE = r'class=["\']pl-header-details[^>]+>\s*<li>\s*<a[^>]+\bhref='
2920         uploader = self._html_search_regex(
2921             r'%s["\']/(?:user|channel)/[^>]+>([^<]+)' % _UPLOADER_BASE,
2922             page, 'uploader', default=None)
2923         mobj = re.search(
2924             r'%s(["\'])(?P<path>/(?:user|channel)/(?P<uploader_id>.+?))\1' % _UPLOADER_BASE,
2925             page)
2926         if mobj:
2927             uploader_id = mobj.group('uploader_id')
2928             uploader_url = compat_urlparse.urljoin(url, mobj.group('path'))
2929         else:
2930             uploader_id = uploader_url = None
2931
2932         has_videos = True
2933
2934         if not playlist_title:
2935             try:
2936                 # Some playlist URLs don't actually serve a playlist (e.g.
2937                 # https://www.youtube.com/watch?v=FqZTN594JQw&list=PLMYEtVRpaqY00V9W81Cwmzp6N6vZqfUKD4)
2938                 next(self._entries(page, playlist_id))
2939             except StopIteration:
2940                 has_videos = False
2941
2942         playlist = self.playlist_result(
2943             self._entries(page, playlist_id), playlist_id, playlist_title)
2944         playlist.update({
2945             'uploader': uploader,
2946             'uploader_id': uploader_id,
2947             'uploader_url': uploader_url,
2948         })
2949         if playlist_id.startswith(self._YTM_PLAYLIST_PREFIX):
2950             playlist.update(self._YTM_CHANNEL_INFO)
2951
2952         return has_videos, playlist
2953
2954     def _check_download_just_video(self, url, playlist_id):
2955         # Check if it's a video-specific URL
2956         query_dict = compat_urlparse.parse_qs(compat_urlparse.urlparse(url).query)
2957         video_id = query_dict.get('v', [None])[0] or self._search_regex(
2958             r'(?:(?:^|//)youtu\.be/|youtube\.com/embed/(?!videoseries))([0-9A-Za-z_-]{11})', url,
2959             'video id', default=None)
2960         if video_id:
2961             if self._downloader.params.get('noplaylist'):
2962                 self.to_screen('Downloading just video %s because of --no-playlist' % video_id)
2963                 return video_id, self.url_result(video_id, 'Youtube', video_id=video_id)
2964             else:
2965                 self.to_screen('Downloading playlist %s - add --no-playlist to just download video %s' % (playlist_id, video_id))
2966                 return video_id, None
2967         return None, None
2968
2969     def _real_extract(self, url):
2970         # Extract playlist id
2971         mobj = re.match(self._VALID_URL, url)
2972         if mobj is None:
2973             raise ExtractorError('Invalid URL: %s' % url)
2974         playlist_id = mobj.group(1) or mobj.group(2)
2975
2976         video_id, video = self._check_download_just_video(url, playlist_id)
2977         if video:
2978             return video
2979
2980         if playlist_id.startswith(('RD', 'UL', 'PU')):
2981             if not playlist_id.startswith(self._YTM_PLAYLIST_PREFIX):
2982                 # Mixes require a custom extraction process,
2983                 # Youtube Music playlists act like normal playlists (with randomized order)
2984                 return self._extract_mix(playlist_id)
2985
2986         has_videos, playlist = self._extract_playlist(playlist_id)
2987         if has_videos or not video_id:
2988             return playlist
2989
2990         # Some playlist URLs don't actually serve a playlist (see
2991         # https://github.com/ytdl-org/youtube-dl/issues/10537).
2992         # Fallback to plain video extraction if there is a video id
2993         # along with playlist id.
2994         return self.url_result(video_id, 'Youtube', video_id=video_id)
2995
2996
2997 class YoutubeChannelIE(YoutubePlaylistBaseInfoExtractor):
2998     IE_DESC = 'YouTube.com channels'
2999     _VALID_URL = r'https?://(?:youtu\.be|(?:\w+\.)?youtube(?:-nocookie|kids)?\.com|(?:www\.)?invidio\.us)/channel/(?P<id>[0-9A-Za-z_-]+)'
3000     _TEMPLATE_URL = 'https://www.youtube.com/channel/%s/videos'
3001     _VIDEO_RE = r'(?:title="(?P<title>[^"]+)"[^>]+)?href="/watch\?v=(?P<id>[0-9A-Za-z_-]+)&?'
3002     IE_NAME = 'youtube:channel'
3003     _TESTS = [{
3004         'note': 'paginated channel',
3005         'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w',
3006         'playlist_mincount': 91,
3007         'info_dict': {
3008             'id': 'UUKfVa3S1e4PHvxWcwyMMg8w',
3009             'title': 'Uploads from lex will',
3010             'uploader': 'lex will',
3011             'uploader_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
3012         }
3013     }, {
3014         'note': 'Age restricted channel',
3015         # from https://www.youtube.com/user/DeusExOfficial
3016         'url': 'https://www.youtube.com/channel/UCs0ifCMCm1icqRbqhUINa0w',
3017         'playlist_mincount': 64,
3018         'info_dict': {
3019             'id': 'UUs0ifCMCm1icqRbqhUINa0w',
3020             'title': 'Uploads from Deus Ex',
3021             'uploader': 'Deus Ex',
3022             'uploader_id': 'DeusExOfficial',
3023         },
3024     }, {
3025         'url': 'https://invidio.us/channel/UC23qupoDRn9YOAVzeoxjOQA',
3026         'only_matching': True,
3027     }, {
3028         'url': 'https://www.youtubekids.com/channel/UCyu8StPfZWapR6rfW_JgqcA',
3029         'only_matching': True,
3030     }]
3031
3032     @classmethod
3033     def suitable(cls, url):
3034         return (False if YoutubePlaylistsIE.suitable(url) or YoutubeLiveIE.suitable(url)
3035                 else super(YoutubeChannelIE, cls).suitable(url))
3036
3037     def _build_template_url(self, url, channel_id):
3038         return self._TEMPLATE_URL % channel_id
3039
3040     def _real_extract(self, url):
3041         channel_id = self._match_id(url)
3042
3043         url = self._build_template_url(url, channel_id)
3044
3045         # Channel by page listing is restricted to 35 pages of 30 items, i.e. 1050 videos total (see #5778)
3046         # Workaround by extracting as a playlist if managed to obtain channel playlist URL
3047         # otherwise fallback on channel by page extraction
3048         channel_page = self._download_webpage(
3049             url + '?view=57', channel_id,
3050             'Downloading channel page', fatal=False)
3051         if channel_page is False:
3052             channel_playlist_id = False
3053         else:
3054             channel_playlist_id = self._html_search_meta(
3055                 'channelId', channel_page, 'channel id', default=None)
3056             if not channel_playlist_id:
3057                 channel_url = self._html_search_meta(
3058                     ('al:ios:url', 'twitter:app:url:iphone', 'twitter:app:url:ipad'),
3059                     channel_page, 'channel url', default=None)
3060                 if channel_url:
3061                     channel_playlist_id = self._search_regex(
3062                         r'vnd\.youtube://user/([0-9A-Za-z_-]+)',
3063                         channel_url, 'channel id', default=None)
3064         if channel_playlist_id and channel_playlist_id.startswith('UC'):
3065             playlist_id = 'UU' + channel_playlist_id[2:]
3066             return self.url_result(
3067                 compat_urlparse.urljoin(url, '/playlist?list=%s' % playlist_id), 'YoutubePlaylist')
3068
3069         channel_page = self._download_webpage(url, channel_id, 'Downloading page #1')
3070         autogenerated = re.search(r'''(?x)
3071                 class="[^"]*?(?:
3072                     channel-header-autogenerated-label|
3073                     yt-channel-title-autogenerated
3074                 )[^"]*"''', channel_page) is not None
3075
3076         if autogenerated:
3077             # The videos are contained in a single page
3078             # the ajax pages can't be used, they are empty
3079             entries = [
3080                 self.url_result(
3081                     video_id, 'Youtube', video_id=video_id,
3082                     video_title=video_title)
3083                 for video_id, video_title in self.extract_videos_from_page(channel_page)]
3084             return self.playlist_result(entries, channel_id)
3085
3086         try:
3087             next(self._entries(channel_page, channel_id))
3088         except StopIteration:
3089             alert_message = self._html_search_regex(
3090                 r'(?s)<div[^>]+class=(["\']).*?\byt-alert-message\b.*?\1[^>]*>(?P<alert>[^<]+)</div>',
3091                 channel_page, 'alert', default=None, group='alert')
3092             if alert_message:
3093                 raise ExtractorError('Youtube said: %s' % alert_message, expected=True)
3094
3095         return self.playlist_result(self._entries(channel_page, channel_id), channel_id)
3096
3097
3098 class YoutubeUserIE(YoutubeChannelIE):
3099     IE_DESC = 'YouTube.com user videos (URL or "ytuser" keyword)'
3100     _VALID_URL = r'(?:(?:https?://(?:\w+\.)?youtube\.com/(?:(?P<user>user|c)/)?(?!(?:attribution_link|watch|results|shared)(?:$|[^a-z_A-Z0-9%-])))|ytuser:)(?!feed/)(?P<id>[A-Za-z0-9_%-]+)'
3101     _TEMPLATE_URL = 'https://www.youtube.com/%s/%s/videos'
3102     IE_NAME = 'youtube:user'
3103
3104     _TESTS = [{
3105         'url': 'https://www.youtube.com/user/TheLinuxFoundation',
3106         'playlist_mincount': 320,
3107         'info_dict': {
3108             'id': 'UUfX55Sx5hEFjoC3cNs6mCUQ',
3109             'title': 'Uploads from The Linux Foundation',
3110             'uploader': 'The Linux Foundation',
3111             'uploader_id': 'TheLinuxFoundation',
3112         }
3113     }, {
3114         # Only available via https://www.youtube.com/c/12minuteathlete/videos
3115         # but not https://www.youtube.com/user/12minuteathlete/videos
3116         'url': 'https://www.youtube.com/c/12minuteathlete/videos',
3117         'playlist_mincount': 249,
3118         'info_dict': {
3119             'id': 'UUVjM-zV6_opMDx7WYxnjZiQ',
3120             'title': 'Uploads from 12 Minute Athlete',
3121             'uploader': '12 Minute Athlete',
3122             'uploader_id': 'the12minuteathlete',
3123         }
3124     }, {
3125         'url': 'ytuser:phihag',
3126         'only_matching': True,
3127     }, {
3128         'url': 'https://www.youtube.com/c/gametrailers',
3129         'only_matching': True,
3130     }, {
3131         'url': 'https://www.youtube.com/c/Pawe%C5%82Zadro%C5%BCniak',
3132         'only_matching': True,
3133     }, {
3134         'url': 'https://www.youtube.com/gametrailers',
3135         'only_matching': True,
3136     }, {
3137         # This channel is not available, geo restricted to JP
3138         'url': 'https://www.youtube.com/user/kananishinoSMEJ/videos',
3139         'only_matching': True,
3140     }]
3141
3142     @classmethod
3143     def suitable(cls, url):
3144         # Don't return True if the url can be extracted with other youtube
3145         # extractor, the regex would is too permissive and it would match.
3146         other_yt_ies = iter(klass for (name, klass) in globals().items() if name.startswith('Youtube') and name.endswith('IE') and klass is not cls)
3147         if any(ie.suitable(url) for ie in other_yt_ies):
3148             return False
3149         else:
3150             return super(YoutubeUserIE, cls).suitable(url)
3151
3152     def _build_template_url(self, url, channel_id):
3153         mobj = re.match(self._VALID_URL, url)
3154         return self._TEMPLATE_URL % (mobj.group('user') or 'user', mobj.group('id'))
3155
3156
3157 class YoutubeLiveIE(YoutubeBaseInfoExtractor):
3158     IE_DESC = 'YouTube.com live streams'
3159     _VALID_URL = r'(?P<base_url>https?://(?:\w+\.)?youtube\.com/(?:(?:user|channel|c)/)?(?P<id>[^/]+))/live'
3160     IE_NAME = 'youtube:live'
3161
3162     _TESTS = [{
3163         'url': 'https://www.youtube.com/user/TheYoungTurks/live',
3164         'info_dict': {
3165             'id': 'a48o2S1cPoo',
3166             'ext': 'mp4',
3167             'title': 'The Young Turks - Live Main Show',
3168             'uploader': 'The Young Turks',
3169             'uploader_id': 'TheYoungTurks',
3170             'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/TheYoungTurks',
3171             'upload_date': '20150715',
3172             'license': 'Standard YouTube License',
3173             'description': 'md5:438179573adcdff3c97ebb1ee632b891',
3174             'categories': ['News & Politics'],
3175             'tags': ['Cenk Uygur (TV Program Creator)', 'The Young Turks (Award-Winning Work)', 'Talk Show (TV Genre)'],
3176             'like_count': int,
3177             'dislike_count': int,
3178         },
3179         'params': {
3180             'skip_download': True,
3181         },
3182     }, {
3183         'url': 'https://www.youtube.com/channel/UC1yBKRuGpC1tSM73A0ZjYjQ/live',
3184         'only_matching': True,
3185     }, {
3186         'url': 'https://www.youtube.com/c/CommanderVideoHq/live',
3187         'only_matching': True,
3188     }, {
3189         'url': 'https://www.youtube.com/TheYoungTurks/live',
3190         'only_matching': True,
3191     }]
3192
3193     def _real_extract(self, url):
3194         mobj = re.match(self._VALID_URL, url)
3195         channel_id = mobj.group('id')
3196         base_url = mobj.group('base_url')
3197         webpage = self._download_webpage(url, channel_id, fatal=False)
3198         if webpage:
3199             page_type = self._og_search_property(
3200                 'type', webpage, 'page type', default='')
3201             video_id = self._html_search_meta(
3202                 'videoId', webpage, 'video id', default=None)
3203             if page_type.startswith('video') and video_id and re.match(
3204                     r'^[0-9A-Za-z_-]{11}$', video_id):
3205                 return self.url_result(video_id, YoutubeIE.ie_key())
3206         return self.url_result(base_url)
3207
3208
3209 class YoutubePlaylistsIE(YoutubePlaylistsBaseInfoExtractor):
3210     IE_DESC = 'YouTube.com user/channel playlists'
3211     _VALID_URL = r'https?://(?:\w+\.)?youtube\.com/(?:user|channel|c)/(?P<id>[^/]+)/playlists'
3212     IE_NAME = 'youtube:playlists'
3213
3214     _TESTS = [{
3215         'url': 'https://www.youtube.com/user/ThirstForScience/playlists',
3216         'playlist_mincount': 4,
3217         'info_dict': {
3218             'id': 'ThirstForScience',
3219             'title': 'ThirstForScience',
3220         },
3221     }, {
3222         # with "Load more" button
3223         'url': 'https://www.youtube.com/user/igorkle1/playlists?view=1&sort=dd',
3224         'playlist_mincount': 70,
3225         'info_dict': {
3226             'id': 'igorkle1',
3227             'title': 'Игорь Клейнер',
3228         },
3229     }, {
3230         'url': 'https://www.youtube.com/channel/UCiU1dHvZObB2iP6xkJ__Icw/playlists',
3231         'playlist_mincount': 17,
3232         'info_dict': {
3233             'id': 'UCiU1dHvZObB2iP6xkJ__Icw',
3234             'title': 'Chem Player',
3235         },
3236         'skip': 'Blocked',
3237     }, {
3238         'url': 'https://www.youtube.com/c/ChristophLaimer/playlists',
3239         'only_matching': True,
3240     }]
3241
3242
3243 class YoutubeSearchBaseInfoExtractor(YoutubePlaylistBaseInfoExtractor):
3244     _VIDEO_RE = r'href="\s*/watch\?v=(?P<id>[0-9A-Za-z_-]{11})(?:[^"]*"[^>]+\btitle="(?P<title>[^"]+))?'
3245
3246
3247 class YoutubeSearchIE(SearchInfoExtractor, YoutubeSearchBaseInfoExtractor):
3248     IE_DESC = 'YouTube.com searches'
3249     # there doesn't appear to be a real limit, for example if you search for
3250     # 'python' you get more than 8.000.000 results
3251     _MAX_RESULTS = float('inf')
3252     IE_NAME = 'youtube:search'
3253     _SEARCH_KEY = 'ytsearch'
3254     _SEARCH_PARAMS = None
3255     _TESTS = []
3256
3257     def _entries(self, query, n):
3258         data = {
3259             'context': {
3260                 'client': {
3261                     'clientName': 'WEB',
3262                     'clientVersion': '2.20201021.03.00',
3263                 }
3264             },
3265             'query': query,
3266         }
3267         if self._SEARCH_PARAMS:
3268             data['params'] = self._SEARCH_PARAMS
3269         total = 0
3270         for page_num in itertools.count(1):
3271             search = self._download_json(
3272                 'https://www.youtube.com/youtubei/v1/search?key=AIzaSyAO_FJ2SlqU8Q4STEHLGCilw_Y9_11qcW8',
3273                 video_id='query "%s"' % query,
3274                 note='Downloading page %s' % page_num,
3275                 errnote='Unable to download API page', fatal=False,
3276                 data=json.dumps(data).encode('utf8'),
3277                 headers={'content-type': 'application/json'})
3278             if not search:
3279                 break
3280             slr_contents = try_get(
3281                 search,
3282                 (lambda x: x['contents']['twoColumnSearchResultsRenderer']['primaryContents']['sectionListRenderer']['contents'],
3283                  lambda x: x['onResponseReceivedCommands'][0]['appendContinuationItemsAction']['continuationItems']),
3284                 list)
3285             if not slr_contents:
3286                 break
3287             isr_contents = try_get(
3288                 slr_contents,
3289                 lambda x: x[0]['itemSectionRenderer']['contents'],
3290                 list)
3291             if not isr_contents:
3292                 break
3293             for content in isr_contents:
3294                 if not isinstance(content, dict):
3295                     continue
3296                 video = content.get('videoRenderer')
3297                 if not isinstance(video, dict):
3298                     continue
3299                 video_id = video.get('videoId')
3300                 if not video_id:
3301                     continue
3302                 title = try_get(video, lambda x: x['title']['runs'][0]['text'], compat_str)
3303                 description = try_get(video, lambda x: x['descriptionSnippet']['runs'][0]['text'], compat_str)
3304                 duration = parse_duration(try_get(video, lambda x: x['lengthText']['simpleText'], compat_str))
3305                 view_count_text = try_get(video, lambda x: x['viewCountText']['simpleText'], compat_str) or ''
3306                 view_count = int_or_none(self._search_regex(
3307                     r'^(\d+)', re.sub(r'\s', '', view_count_text),
3308                     'view count', default=None))
3309                 uploader = try_get(video, lambda x: x['ownerText']['runs'][0]['text'], compat_str)
3310                 total += 1
3311                 yield {
3312                     '_type': 'url_transparent',
3313                     'ie_key': YoutubeIE.ie_key(),
3314                     'id': video_id,
3315                     'url': video_id,
3316                     'title': title,
3317                     'description': description,
3318                     'duration': duration,
3319                     'view_count': view_count,
3320                     'uploader': uploader,
3321                 }
3322                 if total == n:
3323                     return
3324             token = try_get(
3325                 slr_contents,
3326                 lambda x: x[1]['continuationItemRenderer']['continuationEndpoint']['continuationCommand']['token'],
3327                 compat_str)
3328             if not token:
3329                 break
3330             data['continuation'] = token
3331
3332     def _get_n_results(self, query, n):
3333         """Get a specified number of results for a query"""
3334         return self.playlist_result(self._entries(query, n), query)
3335
3336
3337 class YoutubeSearchDateIE(YoutubeSearchIE):
3338     IE_NAME = YoutubeSearchIE.IE_NAME + ':date'
3339     _SEARCH_KEY = 'ytsearchdate'
3340     IE_DESC = 'YouTube.com searches, newest videos first'
3341     _SEARCH_PARAMS = 'CAI%3D'
3342
3343
3344 class YoutubeSearchURLIE(YoutubeSearchBaseInfoExtractor):
3345     IE_DESC = 'YouTube.com search URLs'
3346     IE_NAME = 'youtube:search_url'
3347     _VALID_URL = r'https?://(?:www\.)?youtube\.com/results\?(.*?&)?(?:search_query|q)=(?P<query>[^&]+)(?:[&]|$)'
3348     _SEARCH_DATA = r'(?:window\["ytInitialData"\]|ytInitialData)\W?=\W?({.*?});'
3349     _TESTS = [{
3350         'url': 'https://www.youtube.com/results?baz=bar&search_query=youtube-dl+test+video&filters=video&lclk=video',
3351         'playlist_mincount': 5,
3352         'info_dict': {
3353             'title': 'youtube-dl test video',
3354         }
3355     }, {
3356         'url': 'https://www.youtube.com/results?q=test&sp=EgQIBBgB',
3357         'only_matching': True,
3358     }]
3359
3360     def _find_videos_in_json(self, extracted):
3361         videos = []
3362
3363         def _real_find(obj):
3364             if obj is None or isinstance(obj, str):
3365                 return
3366
3367             if type(obj) is list:
3368                 for elem in obj:
3369                     _real_find(elem)
3370
3371             if type(obj) is dict:
3372                 if "videoId" in obj:
3373                     videos.append(obj)
3374                     return
3375
3376                 for _, o in obj.items():
3377                     _real_find(o)
3378
3379         _real_find(extracted)
3380
3381         return videos
3382
3383     def extract_videos_from_page_impl(self, page, ids_in_page, titles_in_page):
3384         search_response = self._parse_json(self._search_regex(self._SEARCH_DATA, page, 'ytInitialData'), None)
3385
3386         result_items = self._find_videos_in_json(search_response)
3387
3388         for renderer in result_items:
3389             video_id = try_get(renderer, lambda x: x['videoId'])
3390             video_title = try_get(renderer, lambda x: x['title']['runs'][0]['text']) or try_get(renderer, lambda x: x['title']['simpleText'])
3391
3392             if video_id is None or video_title is None:
3393                 # we do not have a videoRenderer or title extraction broke
3394                 continue
3395
3396             video_title = video_title.strip()
3397
3398             try:
3399                 idx = ids_in_page.index(video_id)
3400                 if video_title and not titles_in_page[idx]:
3401                     titles_in_page[idx] = video_title
3402             except ValueError:
3403                 ids_in_page.append(video_id)
3404                 titles_in_page.append(video_title)
3405
3406     def extract_videos_from_page(self, page):
3407         ids_in_page = []
3408         titles_in_page = []
3409         self.extract_videos_from_page_impl(page, ids_in_page, titles_in_page)
3410         return zip(ids_in_page, titles_in_page)
3411
3412     def _real_extract(self, url):
3413         mobj = re.match(self._VALID_URL, url)
3414         query = compat_urllib_parse_unquote_plus(mobj.group('query'))
3415         webpage = self._download_webpage(url, query)
3416         return self.playlist_result(self._process_page(webpage), playlist_title=query)
3417
3418
3419 class YoutubeShowIE(YoutubePlaylistsBaseInfoExtractor):
3420     IE_DESC = 'YouTube.com (multi-season) shows'
3421     _VALID_URL = r'https?://(?:www\.)?youtube\.com/show/(?P<id>[^?#]*)'
3422     IE_NAME = 'youtube:show'
3423     _TESTS = [{
3424         'url': 'https://www.youtube.com/show/airdisasters',
3425         'playlist_mincount': 5,
3426         'info_dict': {
3427             'id': 'airdisasters',
3428             'title': 'Air Disasters',
3429         }
3430     }]
3431
3432     def _real_extract(self, url):
3433         playlist_id = self._match_id(url)
3434         return super(YoutubeShowIE, self)._real_extract(
3435             'https://www.youtube.com/show/%s/playlists' % playlist_id)
3436
3437
3438 class YoutubeFeedsInfoExtractor(YoutubeBaseInfoExtractor):
3439     """
3440     Base class for feed extractors
3441     Subclasses must define the _FEED_NAME and _PLAYLIST_TITLE properties.
3442     """
3443     _LOGIN_REQUIRED = True
3444     _FEED_DATA = r'(?:window\["ytInitialData"\]|ytInitialData)\W?=\W?({.*?});'
3445     _YTCFG_DATA = r"ytcfg.set\(({.*?})\)"
3446
3447     @property
3448     def IE_NAME(self):
3449         return 'youtube:%s' % self._FEED_NAME
3450
3451     def _real_initialize(self):
3452         self._login()
3453
3454     def _find_videos_in_json(self, extracted):
3455         videos = []
3456         c = {}
3457
3458         def _real_find(obj):
3459             if obj is None or isinstance(obj, str):
3460                 return
3461
3462             if type(obj) is list:
3463                 for elem in obj:
3464                     _real_find(elem)
3465
3466             if type(obj) is dict:
3467                 if "videoId" in obj:
3468                     videos.append(obj)
3469                     return
3470
3471                 if "nextContinuationData" in obj:
3472                     c["continuation"] = obj["nextContinuationData"]
3473                     return
3474
3475                 for _, o in obj.items():
3476                     _real_find(o)
3477
3478         _real_find(extracted)
3479
3480         return videos, try_get(c, lambda x: x["continuation"])
3481
3482     def _entries(self, page):
3483         info = []
3484
3485         yt_conf = self._parse_json(self._search_regex(self._YTCFG_DATA, page, 'ytcfg.set', default="null"), None, fatal=False)
3486
3487         search_response = self._parse_json(self._search_regex(self._FEED_DATA, page, 'ytInitialData'), None)
3488
3489         for page_num in itertools.count(1):
3490             video_info, continuation = self._find_videos_in_json(search_response)
3491
3492             new_info = []
3493
3494             for v in video_info:
3495                 v_id = try_get(v, lambda x: x['videoId'])
3496                 if not v_id:
3497                     continue
3498
3499                 have_video = False
3500                 for old in info:
3501                     if old['videoId'] == v_id:
3502                         have_video = True
3503                         break
3504
3505                 if not have_video:
3506                     new_info.append(v)
3507
3508             if not new_info:
3509                 break
3510
3511             info.extend(new_info)
3512
3513             for video in new_info:
3514                 yield self.url_result(try_get(video, lambda x: x['videoId']), YoutubeIE.ie_key(), video_title=try_get(video, lambda x: x['title']['runs'][0]['text']) or try_get(video, lambda x: x['title']['simpleText']))
3515
3516             if not continuation or not yt_conf:
3517                 break
3518
3519             search_response = self._download_json(
3520                 'https://www.youtube.com/browse_ajax', self._PLAYLIST_TITLE,
3521                 'Downloading page #%s' % page_num,
3522                 transform_source=uppercase_escape,
3523                 query={
3524                     "ctoken": try_get(continuation, lambda x: x["continuation"]),
3525                     "continuation": try_get(continuation, lambda x: x["continuation"]),
3526                     "itct": try_get(continuation, lambda x: x["clickTrackingParams"])
3527                 },
3528                 headers={
3529                     "X-YouTube-Client-Name": try_get(yt_conf, lambda x: x["INNERTUBE_CONTEXT_CLIENT_NAME"]),
3530                     "X-YouTube-Client-Version": try_get(yt_conf, lambda x: x["INNERTUBE_CONTEXT_CLIENT_VERSION"]),
3531                     "X-Youtube-Identity-Token": try_get(yt_conf, lambda x: x["ID_TOKEN"]),
3532                     "X-YouTube-Device": try_get(yt_conf, lambda x: x["DEVICE"]),
3533                     "X-YouTube-Page-CL": try_get(yt_conf, lambda x: x["PAGE_CL"]),
3534                     "X-YouTube-Page-Label": try_get(yt_conf, lambda x: x["PAGE_BUILD_LABEL"]),
3535                     "X-YouTube-Variants-Checksum": try_get(yt_conf, lambda x: x["VARIANTS_CHECKSUM"]),
3536                 })
3537
3538     def _real_extract(self, url):
3539         page = self._download_webpage(
3540             'https://www.youtube.com/feed/%s' % self._FEED_NAME,
3541             self._PLAYLIST_TITLE)
3542         return self.playlist_result(
3543             self._entries(page), playlist_title=self._PLAYLIST_TITLE)
3544
3545
3546 class YoutubeWatchLaterIE(YoutubePlaylistIE):
3547     IE_NAME = 'youtube:watchlater'
3548     IE_DESC = 'Youtube watch later list, ":ytwatchlater" for short (requires authentication)'
3549     _VALID_URL = r'https?://(?:www\.)?youtube\.com/(?:feed/watch_later|(?:playlist|watch)\?(?:.+&)?list=WL)|:ytwatchlater'
3550
3551     _TESTS = [{
3552         'url': 'https://www.youtube.com/playlist?list=WL',
3553         'only_matching': True,
3554     }, {
3555         'url': 'https://www.youtube.com/watch?v=bCNU9TrbiRk&index=1&list=WL',
3556         'only_matching': True,
3557     }]
3558
3559     def _real_extract(self, url):
3560         _, video = self._check_download_just_video(url, 'WL')
3561         if video:
3562             return video
3563         _, playlist = self._extract_playlist('WL')
3564         return playlist
3565
3566
3567 class YoutubeFavouritesIE(YoutubeBaseInfoExtractor):
3568     IE_NAME = 'youtube:favorites'
3569     IE_DESC = 'YouTube.com favourite videos, ":ytfav" for short (requires authentication)'
3570     _VALID_URL = r'https?://(?:www\.)?youtube\.com/my_favorites|:ytfav(?:ou?rites)?'
3571     _LOGIN_REQUIRED = True
3572
3573     def _real_extract(self, url):
3574         webpage = self._download_webpage('https://www.youtube.com/my_favorites', 'Youtube Favourites videos')
3575         playlist_id = self._search_regex(r'list=(.+?)["&]', webpage, 'favourites playlist id')
3576         return self.url_result(playlist_id, 'YoutubePlaylist')
3577
3578
3579 class YoutubeRecommendedIE(YoutubeFeedsInfoExtractor):
3580     IE_DESC = 'YouTube.com recommended videos, ":ytrec" for short (requires authentication)'
3581     _VALID_URL = r'https?://(?:www\.)?youtube\.com/feed/recommended|:ytrec(?:ommended)?'
3582     _FEED_NAME = 'recommended'
3583     _PLAYLIST_TITLE = 'Youtube Recommended videos'
3584
3585
3586 class YoutubeSubscriptionsIE(YoutubeFeedsInfoExtractor):
3587     IE_DESC = 'YouTube.com subscriptions feed, "ytsubs" keyword (requires authentication)'
3588     _VALID_URL = r'https?://(?:www\.)?youtube\.com/feed/subscriptions|:ytsubs(?:criptions)?'
3589     _FEED_NAME = 'subscriptions'
3590     _PLAYLIST_TITLE = 'Youtube Subscriptions'
3591
3592
3593 class YoutubeHistoryIE(YoutubeFeedsInfoExtractor):
3594     IE_DESC = 'Youtube watch history, ":ythistory" for short (requires authentication)'
3595     _VALID_URL = r'https?://(?:www\.)?youtube\.com/feed/history|:ythistory'
3596     _FEED_NAME = 'history'
3597     _PLAYLIST_TITLE = 'Youtube History'
3598
3599
3600 class YoutubeTruncatedURLIE(InfoExtractor):
3601     IE_NAME = 'youtube:truncated_url'
3602     IE_DESC = False  # Do not list
3603     _VALID_URL = r'''(?x)
3604         (?:https?://)?
3605         (?:\w+\.)?[yY][oO][uU][tT][uU][bB][eE](?:-nocookie)?\.com/
3606         (?:watch\?(?:
3607             feature=[a-z_]+|
3608             annotation_id=annotation_[^&]+|
3609             x-yt-cl=[0-9]+|
3610             hl=[^&]*|
3611             t=[0-9]+
3612         )?
3613         |
3614             attribution_link\?a=[^&]+
3615         )
3616         $
3617     '''
3618
3619     _TESTS = [{
3620         'url': 'https://www.youtube.com/watch?annotation_id=annotation_3951667041',
3621         'only_matching': True,
3622     }, {
3623         'url': 'https://www.youtube.com/watch?',
3624         'only_matching': True,
3625     }, {
3626         'url': 'https://www.youtube.com/watch?x-yt-cl=84503534',
3627         'only_matching': True,
3628     }, {
3629         'url': 'https://www.youtube.com/watch?feature=foo',
3630         'only_matching': True,
3631     }, {
3632         'url': 'https://www.youtube.com/watch?hl=en-GB',
3633         'only_matching': True,
3634     }, {
3635         'url': 'https://www.youtube.com/watch?t=2372',
3636         'only_matching': True,
3637     }]
3638
3639     def _real_extract(self, url):
3640         raise ExtractorError(
3641             'Did you forget to quote the URL? Remember that & is a meta '
3642             'character in most shells, so you want to put the URL in quotes, '
3643             'like  youtube-dl '
3644             '"https://www.youtube.com/watch?feature=foo&v=BaW_jenozKc" '
3645             ' or simply  youtube-dl BaW_jenozKc  .',
3646             expected=True)
3647
3648
3649 class YoutubeTruncatedIDIE(InfoExtractor):
3650     IE_NAME = 'youtube:truncated_id'
3651     IE_DESC = False  # Do not list
3652     _VALID_URL = r'https?://(?:www\.)?youtube\.com/watch\?v=(?P<id>[0-9A-Za-z_-]{1,10})$'
3653
3654     _TESTS = [{
3655         'url': 'https://www.youtube.com/watch?v=N_708QY7Ob',
3656         'only_matching': True,
3657     }]
3658
3659     def _real_extract(self, url):
3660         video_id = self._match_id(url)
3661         raise ExtractorError(
3662             'Incomplete YouTube ID %s. URL %s looks truncated.' % (video_id, url),
3663             expected=True)