youtube_dlc/extractor/youtube.py

   1 # coding: utf-8
   2
   3 from __future__ import unicode_literals
   4
   5
   6 import itertools
   7 import json
   8 import os.path
   9 import random
  10 import re
  11 import time
  12 import traceback
  13
  14 from .common import InfoExtractor, SearchInfoExtractor
  15 from ..jsinterp import JSInterpreter
  16 from ..swfinterp import SWFInterpreter
  17 from ..compat import (
  18     compat_chr,
  19     compat_HTTPError,
  20     compat_kwargs,
  21     compat_parse_qs,
  22     compat_urllib_parse_unquote,
  23     compat_urllib_parse_unquote_plus,
  24     compat_urllib_parse_urlencode,
  25     compat_urllib_parse_urlparse,
  26     compat_urlparse,
  27     compat_str,
  28 )
  29 from ..utils import (
  30     bool_or_none,
  31     clean_html,
  32     error_to_compat_str,
  33     extract_attributes,
  34     ExtractorError,
  35     float_or_none,
  36     get_element_by_attribute,
  37     get_element_by_id,
  38     int_or_none,
  39     mimetype2ext,
  40     orderedSet,
  41     parse_codecs,
  42     parse_count,
  43     parse_duration,
  44     remove_quotes,
  45     remove_start,
  46     smuggle_url,
  47     str_or_none,
  48     str_to_int,
  49     try_get,
  50     unescapeHTML,
  51     unified_strdate,
  52     unsmuggle_url,
  53     uppercase_escape,
  54     url_or_none,
  55     urlencode_postdata,
  56 )
  57
  58
  59 class YoutubeBaseInfoExtractor(InfoExtractor):
  60     """Provide base functions for Youtube extractors"""
  61     _LOGIN_URL = 'https://accounts.google.com/ServiceLogin'
  62     _TWOFACTOR_URL = 'https://accounts.google.com/signin/challenge'
  63
  64     _LOOKUP_URL = 'https://accounts.google.com/_/signin/sl/lookup'
  65     _CHALLENGE_URL = 'https://accounts.google.com/_/signin/sl/challenge'
  66     _TFA_URL = 'https://accounts.google.com/_/signin/challenge?hl=en&TL={0}'
  67
  68     _NETRC_MACHINE = 'youtube'
  69     # If True it will raise an error if no login info is provided
  70     _LOGIN_REQUIRED = False
  71
  72     _PLAYLIST_ID_RE = r'(?:PL|LL|EC|UU|FL|RD|UL|TL|PU|OLAK5uy_)[0-9A-Za-z-_]{10,}'
  73
  74     _YOUTUBE_CLIENT_HEADERS = {
  75         'x-youtube-client-name': '1',
  76         'x-youtube-client-version': '1.20200609.04.02',
  77     }
  78
  79     def _set_language(self):
  80         self._set_cookie(
  81             '.youtube.com', 'PREF', 'f1=50000000&f6=8&hl=en',
  82             # YouTube sets the expire time to about two months
  83             expire_time=time.time() + 2 * 30 * 24 * 3600)
  84
  85     def _ids_to_results(self, ids):
  86         return [
  87             self.url_result(vid_id, 'Youtube', video_id=vid_id)
  88             for vid_id in ids]
  89
  90     def _login(self):
  91         """
  92         Attempt to log in to YouTube.
  93         True is returned if successful or skipped.
  94         False is returned if login failed.
  95
  96         If _LOGIN_REQUIRED is set and no authentication was provided, an error is raised.
  97         """
  98         username, password = self._get_login_info()
  99         # No authentication to be performed
 100         if username is None:
 101             if self._LOGIN_REQUIRED and self._downloader.params.get('cookiefile') is None:
 102                 raise ExtractorError('No login info available, needed for using %s.' % self.IE_NAME, expected=True)
 103             if self._downloader.params.get('cookiefile') and False:  # TODO remove 'and False' later - too many people using outdated cookies and open issues, remind them.
 104                 self.to_screen('[Cookies] Reminder - Make sure to always use up to date cookies!')
 105             return True
 106
 107         login_page = self._download_webpage(
 108             self._LOGIN_URL, None,
 109             note='Downloading login page',
 110             errnote='unable to fetch login page', fatal=False)
 111         if login_page is False:
 112             return
 113
 114         login_form = self._hidden_inputs(login_page)
 115
 116         def req(url, f_req, note, errnote):
 117             data = login_form.copy()
 118             data.update({
 119                 'pstMsg': 1,
 120                 'checkConnection': 'youtube',
 121                 'checkedDomains': 'youtube',
 122                 'hl': 'en',
 123                 'deviceinfo': '[null,null,null,[],null,"US",null,null,[],"GlifWebSignIn",null,[null,null,[]]]',
 124                 'f.req': json.dumps(f_req),
 125                 'flowName': 'GlifWebSignIn',
 126                 'flowEntry': 'ServiceLogin',
 127                 # TODO: reverse actual botguard identifier generation algo
 128                 'bgRequest': '["identifier",""]',
 129             })
 130             return self._download_json(
 131                 url, None, note=note, errnote=errnote,
 132                 transform_source=lambda s: re.sub(r'^[^[]*', '', s),
 133                 fatal=False,
 134                 data=urlencode_postdata(data), headers={
 135                     'Content-Type': 'application/x-www-form-urlencoded;charset=utf-8',
 136                     'Google-Accounts-XSRF': 1,
 137                 })
 138
 139         def warn(message):
 140             self._downloader.report_warning(message)
 141
 142         lookup_req = [
 143             username,
 144             None, [], None, 'US', None, None, 2, False, True,
 145             [
 146                 None, None,
 147                 [2, 1, None, 1,
 148                  'https://accounts.google.com/ServiceLogin?passive=true&continue=https%3A%2F%2Fwww.youtube.com%2Fsignin%3Fnext%3D%252F%26action_handle_signin%3Dtrue%26hl%3Den%26app%3Ddesktop%26feature%3Dsign_in_button&hl=en&service=youtube&uilel=3&requestPath=%2FServiceLogin&Page=PasswordSeparationSignIn',
 149                  None, [], 4],
 150                 1, [None, None, []], None, None, None, True
 151             ],
 152             username,
 153         ]
 154
 155         lookup_results = req(
 156             self._LOOKUP_URL, lookup_req,
 157             'Looking up account info', 'Unable to look up account info')
 158
 159         if lookup_results is False:
 160             return False
 161
 162         user_hash = try_get(lookup_results, lambda x: x[0][2], compat_str)
 163         if not user_hash:
 164             warn('Unable to extract user hash')
 165             return False
 166
 167         challenge_req = [
 168             user_hash,
 169             None, 1, None, [1, None, None, None, [password, None, True]],
 170             [
 171                 None, None, [2, 1, None, 1, 'https://accounts.google.com/ServiceLogin?passive=true&continue=https%3A%2F%2Fwww.youtube.com%2Fsignin%3Fnext%3D%252F%26action_handle_signin%3Dtrue%26hl%3Den%26app%3Ddesktop%26feature%3Dsign_in_button&hl=en&service=youtube&uilel=3&requestPath=%2FServiceLogin&Page=PasswordSeparationSignIn', None, [], 4],
 172                 1, [None, None, []], None, None, None, True
 173             ]]
 174
 175         challenge_results = req(
 176             self._CHALLENGE_URL, challenge_req,
 177             'Logging in', 'Unable to log in')
 178
 179         if challenge_results is False:
 180             return
 181
 182         login_res = try_get(challenge_results, lambda x: x[0][5], list)
 183         if login_res:
 184             login_msg = try_get(login_res, lambda x: x[5], compat_str)
 185             warn(
 186                 'Unable to login: %s' % 'Invalid password'
 187                 if login_msg == 'INCORRECT_ANSWER_ENTERED' else login_msg)
 188             return False
 189
 190         res = try_get(challenge_results, lambda x: x[0][-1], list)
 191         if not res:
 192             warn('Unable to extract result entry')
 193             return False
 194
 195         login_challenge = try_get(res, lambda x: x[0][0], list)
 196         if login_challenge:
 197             challenge_str = try_get(login_challenge, lambda x: x[2], compat_str)
 198             if challenge_str == 'TWO_STEP_VERIFICATION':
 199                 # SEND_SUCCESS - TFA code has been successfully sent to phone
 200                 # QUOTA_EXCEEDED - reached the limit of TFA codes
 201                 status = try_get(login_challenge, lambda x: x[5], compat_str)
 202                 if status == 'QUOTA_EXCEEDED':
 203                     warn('Exceeded the limit of TFA codes, try later')
 204                     return False
 205
 206                 tl = try_get(challenge_results, lambda x: x[1][2], compat_str)
 207                 if not tl:
 208                     warn('Unable to extract TL')
 209                     return False
 210
 211                 tfa_code = self._get_tfa_info('2-step verification code')
 212
 213                 if not tfa_code:
 214                     warn(
 215                         'Two-factor authentication required. Provide it either interactively or with --twofactor <code>'
 216                         '(Note that only TOTP (Google Authenticator App) codes work at this time.)')
 217                     return False
 218
 219                 tfa_code = remove_start(tfa_code, 'G-')
 220
 221                 tfa_req = [
 222                     user_hash, None, 2, None,
 223                     [
 224                         9, None, None, None, None, None, None, None,
 225                         [None, tfa_code, True, 2]
 226                     ]]
 227
 228                 tfa_results = req(
 229                     self._TFA_URL.format(tl), tfa_req,
 230                     'Submitting TFA code', 'Unable to submit TFA code')
 231
 232                 if tfa_results is False:
 233                     return False
 234
 235                 tfa_res = try_get(tfa_results, lambda x: x[0][5], list)
 236                 if tfa_res:
 237                     tfa_msg = try_get(tfa_res, lambda x: x[5], compat_str)
 238                     warn(
 239                         'Unable to finish TFA: %s' % 'Invalid TFA code'
 240                         if tfa_msg == 'INCORRECT_ANSWER_ENTERED' else tfa_msg)
 241                     return False
 242
 243                 check_cookie_url = try_get(
 244                     tfa_results, lambda x: x[0][-1][2], compat_str)
 245             else:
 246                 CHALLENGES = {
 247                     'LOGIN_CHALLENGE': "This device isn't recognized. For your security, Google wants to make sure it's really you.",
 248                     'USERNAME_RECOVERY': 'Please provide additional information to aid in the recovery process.',
 249                     'REAUTH': "There is something unusual about your activity. For your security, Google wants to make sure it's really you.",
 250                 }
 251                 challenge = CHALLENGES.get(
 252                     challenge_str,
 253                     '%s returned error %s.' % (self.IE_NAME, challenge_str))
 254                 warn('%s\nGo to https://accounts.google.com/, login and solve a challenge.' % challenge)
 255                 return False
 256         else:
 257             check_cookie_url = try_get(res, lambda x: x[2], compat_str)
 258
 259         if not check_cookie_url:
 260             warn('Unable to extract CheckCookie URL')
 261             return False
 262
 263         check_cookie_results = self._download_webpage(
 264             check_cookie_url, None, 'Checking cookie', fatal=False)
 265
 266         if check_cookie_results is False:
 267             return False
 268
 269         if 'https://myaccount.google.com/' not in check_cookie_results:
 270             warn('Unable to log in')
 271             return False
 272
 273         return True
 274
 275     def _download_webpage_handle(self, *args, **kwargs):
 276         query = kwargs.get('query', {}).copy()
 277         query['disable_polymer'] = 'true'
 278         kwargs['query'] = query
 279         return super(YoutubeBaseInfoExtractor, self)._download_webpage_handle(
 280             *args, **compat_kwargs(kwargs))
 281
 282     def _real_initialize(self):
 283         if self._downloader is None:
 284             return
 285         self._set_language()
 286         if not self._login():
 287             return
 288
 289
 290 class YoutubeEntryListBaseInfoExtractor(YoutubeBaseInfoExtractor):
 291     # Extract entries from page with "Load more" button
 292     def _entries(self, page, playlist_id):
 293         more_widget_html = content_html = page
 294         for page_num in itertools.count(1):
 295             for entry in self._process_page(content_html):
 296                 yield entry
 297
 298             mobj = re.search(r'data-uix-load-more-href="/?(?P<more>[^"]+)"', more_widget_html)
 299             if not mobj:
 300                 break
 301
 302             count = 0
 303             retries = 3
 304             while count <= retries:
 305                 try:
 306                     # Downloading page may result in intermittent 5xx HTTP error
 307                     # that is usually worked around with a retry
 308                     more = self._download_json(
 309                         'https://www.youtube.com/%s' % mobj.group('more'), playlist_id,
 310                         'Downloading page #%s%s'
 311                         % (page_num, ' (retry #%d)' % count if count else ''),
 312                         transform_source=uppercase_escape,
 313                         headers=self._YOUTUBE_CLIENT_HEADERS)
 314                     break
 315                 except ExtractorError as e:
 316                     if isinstance(e.cause, compat_HTTPError) and e.cause.code in (500, 503):
 317                         count += 1
 318                         if count <= retries:
 319                             continue
 320                     raise
 321
 322             content_html = more['content_html']
 323             if not content_html.strip():
 324                 # Some webpages show a "Load more" button but they don't
 325                 # have more videos
 326                 break
 327             more_widget_html = more['load_more_widget_html']
 328
 329
 330 class YoutubePlaylistBaseInfoExtractor(YoutubeEntryListBaseInfoExtractor):
 331     def _process_page(self, content):
 332         for video_id, video_title in self.extract_videos_from_page(content):
 333             yield self.url_result(video_id, 'Youtube', video_id, video_title)
 334
 335     def extract_videos_from_page_impl(self, video_re, page, ids_in_page, titles_in_page):
 336         for mobj in re.finditer(video_re, page):
 337             # The link with index 0 is not the first video of the playlist (not sure if still actual)
 338             if 'index' in mobj.groupdict() and mobj.group('id') == '0':
 339                 continue
 340             video_id = mobj.group('id')
 341             video_title = unescapeHTML(
 342                 mobj.group('title')) if 'title' in mobj.groupdict() else None
 343             if video_title:
 344                 video_title = video_title.strip()
 345             if video_title == '► Play all':
 346                 video_title = None
 347             try:
 348                 idx = ids_in_page.index(video_id)
 349                 if video_title and not titles_in_page[idx]:
 350                     titles_in_page[idx] = video_title
 351             except ValueError:
 352                 ids_in_page.append(video_id)
 353                 titles_in_page.append(video_title)
 354
 355     def extract_videos_from_page(self, page):
 356         ids_in_page = []
 357         titles_in_page = []
 358         self.extract_videos_from_page_impl(
 359             self._VIDEO_RE, page, ids_in_page, titles_in_page)
 360         return zip(ids_in_page, titles_in_page)
 361
 362
 363 class YoutubePlaylistsBaseInfoExtractor(YoutubeEntryListBaseInfoExtractor):
 364     def _process_page(self, content):
 365         for playlist_id in orderedSet(re.findall(
 366                 r'<h3[^>]+class="[^"]*yt-lockup-title[^"]*"[^>]*><a[^>]+href="/?playlist\?list=([0-9A-Za-z-_]{10,})"',
 367                 content)):
 368             yield self.url_result(
 369                 'https://www.youtube.com/playlist?list=%s' % playlist_id, 'YoutubePlaylist')
 370
 371     def _real_extract(self, url):
 372         playlist_id = self._match_id(url)
 373         webpage = self._download_webpage(url, playlist_id)
 374         title = self._og_search_title(webpage, fatal=False)
 375         return self.playlist_result(self._entries(webpage, playlist_id), playlist_id, title)
 376
 377
 378 class YoutubeIE(YoutubeBaseInfoExtractor):
 379     IE_DESC = 'YouTube.com'
 380     _VALID_URL = r"""(?x)^
 381                      (
 382                          (?:https?://|//)                                    # http(s):// or protocol-independent URL
 383                          (?:(?:(?:(?:\w+\.)?[yY][oO][uU][tT][uU][bB][eE](?:-nocookie|kids)?\.com/|
 384                             (?:www\.)?deturl\.com/www\.youtube\.com/|
 385                             (?:www\.)?pwnyoutube\.com/|
 386                             (?:www\.)?hooktube\.com/|
 387                             (?:www\.)?yourepeat\.com/|
 388                             tube\.majestyc\.net/|
 389                             # Invidious instances taken from https://github.com/omarroth/invidious/wiki/Invidious-Instances
 390                             (?:(?:www|dev)\.)?invidio\.us/|
 391                             (?:(?:www|no)\.)?invidiou\.sh/|
 392                             (?:(?:www|fi|de)\.)?invidious\.snopyta\.org/|
 393                             (?:www\.)?invidious\.kabi\.tk/|
 394                             (?:www\.)?invidious\.13ad\.de/|
 395                             (?:www\.)?invidious\.mastodon\.host/|
 396                             (?:www\.)?invidious\.nixnet\.xyz/|
 397                             (?:www\.)?invidious\.drycat\.fr/|
 398                             (?:www\.)?tube\.poal\.co/|
 399                             (?:www\.)?vid\.wxzm\.sx/|
 400                             (?:www\.)?yewtu\.be/|
 401                             (?:www\.)?yt\.elukerio\.org/|
 402                             (?:www\.)?yt\.lelux\.fi/|
 403                             (?:www\.)?invidious\.ggc-project\.de/|
 404                             (?:www\.)?yt\.maisputain\.ovh/|
 405                             (?:www\.)?invidious\.13ad\.de/|
 406                             (?:www\.)?invidious\.toot\.koeln/|
 407                             (?:www\.)?invidious\.fdn\.fr/|
 408                             (?:www\.)?watch\.nettohikari\.com/|
 409                             (?:www\.)?kgg2m7yk5aybusll\.onion/|
 410                             (?:www\.)?qklhadlycap4cnod\.onion/|
 411                             (?:www\.)?axqzx4s6s54s32yentfqojs3x5i7faxza6xo3ehd4bzzsg2ii4fv2iid\.onion/|
 412                             (?:www\.)?c7hqkpkpemu6e7emz5b4vyz7idjgdvgaaa3dyimmeojqbgpea3xqjoid\.onion/|
 413                             (?:www\.)?fz253lmuao3strwbfbmx46yu7acac2jz27iwtorgmbqlkurlclmancad\.onion/|
 414                             (?:www\.)?invidious\.l4qlywnpwqsluw65ts7md3khrivpirse744un3x7mlskqauz5pyuzgqd\.onion/|
 415                             (?:www\.)?owxfohz4kjyv25fvlqilyxast7inivgiktls3th44jhk3ej3i7ya\.b32\.i2p/|
 416                             (?:www\.)?4l2dgddgsrkf2ous66i6seeyi6etzfgrue332grh2n7madpwopotugyd\.onion/|
 417                             youtube\.googleapis\.com/)                        # the various hostnames, with wildcard subdomains
 418                          (?:.*?\#/)?                                          # handle anchor (#/) redirect urls
 419                          (?:                                                  # the various things that can precede the ID:
 420                              (?:(?:v|embed|e)/(?!videoseries))                # v/ or embed/ or e/
 421                              |(?:                                             # or the v= param in all its forms
 422                                  (?:(?:watch|movie)(?:_popup)?(?:\.php)?/?)?  # preceding watch(_popup|.php) or nothing (like /?v=xxxx)
 423                                  (?:\?|\#!?)                                  # the params delimiter ? or # or #!
 424                                  (?:.*?[&;])??                                # any other preceding param (like /?s=tuff&v=xxxx or ?s=tuff&amp;v=V36LpHqtcDY)
 425                                  v=
 426                              )
 427                          ))
 428                          |(?:
 429                             youtu\.be|                                        # just youtu.be/xxxx
 430                             vid\.plus|                                        # or vid.plus/xxxx
 431                             zwearz\.com/watch|                                # or zwearz.com/watch/xxxx
 432                          )/
 433                          |(?:www\.)?cleanvideosearch\.com/media/action/yt/watch\?videoId=
 434                          )
 435                      )?                                                       # all until now is optional -> you can pass the naked ID
 436                      ([0-9A-Za-z_-]{11})                                      # here is it! the YouTube video ID
 437                      (?!.*?\blist=
 438                         (?:
 439                             %(playlist_id)s|                                  # combined list/video URLs are handled by the playlist IE
 440                             WL                                                # WL are handled by the watch later IE
 441                         )
 442                      )
 443                      (?(1).+)?                                                # if we found the ID, everything can follow
 444                      $""" % {'playlist_id': YoutubeBaseInfoExtractor._PLAYLIST_ID_RE}
 445     _NEXT_URL_RE = r'[\?&]next_url=([^&]+)'
 446     _PLAYER_INFO_RE = (
 447         r'/(?P<id>[a-zA-Z0-9_-]{8,})/player_ias\.vflset(?:/[a-zA-Z]{2,3}_[a-zA-Z]{2,3})?/base\.(?P<ext>[a-z]+)$',
 448         r'\b(?P<id>vfl[a-zA-Z0-9_-]+)\b.*?\.(?P<ext>[a-z]+)$',
 449     )
 450     _formats = {
 451         '5': {'ext': 'flv', 'width': 400, 'height': 240, 'acodec': 'mp3', 'abr': 64, 'vcodec': 'h263'},
 452         '6': {'ext': 'flv', 'width': 450, 'height': 270, 'acodec': 'mp3', 'abr': 64, 'vcodec': 'h263'},
 453         '13': {'ext': '3gp', 'acodec': 'aac', 'vcodec': 'mp4v'},
 454         '17': {'ext': '3gp', 'width': 176, 'height': 144, 'acodec': 'aac', 'abr': 24, 'vcodec': 'mp4v'},
 455         '18': {'ext': 'mp4', 'width': 640, 'height': 360, 'acodec': 'aac', 'abr': 96, 'vcodec': 'h264'},
 456         '22': {'ext': 'mp4', 'width': 1280, 'height': 720, 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264'},
 457         '34': {'ext': 'flv', 'width': 640, 'height': 360, 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264'},
 458         '35': {'ext': 'flv', 'width': 854, 'height': 480, 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264'},
 459         # itag 36 videos are either 320x180 (BaW_jenozKc) or 320x240 (__2ABJjxzNo), abr varies as well
 460         '36': {'ext': '3gp', 'width': 320, 'acodec': 'aac', 'vcodec': 'mp4v'},
 461         '37': {'ext': 'mp4', 'width': 1920, 'height': 1080, 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264'},
 462         '38': {'ext': 'mp4', 'width': 4096, 'height': 3072, 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264'},
 463         '43': {'ext': 'webm', 'width': 640, 'height': 360, 'acodec': 'vorbis', 'abr': 128, 'vcodec': 'vp8'},
 464         '44': {'ext': 'webm', 'width': 854, 'height': 480, 'acodec': 'vorbis', 'abr': 128, 'vcodec': 'vp8'},
 465         '45': {'ext': 'webm', 'width': 1280, 'height': 720, 'acodec': 'vorbis', 'abr': 192, 'vcodec': 'vp8'},
 466         '46': {'ext': 'webm', 'width': 1920, 'height': 1080, 'acodec': 'vorbis', 'abr': 192, 'vcodec': 'vp8'},
 467         '59': {'ext': 'mp4', 'width': 854, 'height': 480, 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264'},
 468         '78': {'ext': 'mp4', 'width': 854, 'height': 480, 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264'},
 469
 470
 471         # 3D videos
 472         '82': {'ext': 'mp4', 'height': 360, 'format_note': '3D', 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264', 'preference': -20},
 473         '83': {'ext': 'mp4', 'height': 480, 'format_note': '3D', 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264', 'preference': -20},
 474         '84': {'ext': 'mp4', 'height': 720, 'format_note': '3D', 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264', 'preference': -20},
 475         '85': {'ext': 'mp4', 'height': 1080, 'format_note': '3D', 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264', 'preference': -20},
 476         '100': {'ext': 'webm', 'height': 360, 'format_note': '3D', 'acodec': 'vorbis', 'abr': 128, 'vcodec': 'vp8', 'preference': -20},
 477         '101': {'ext': 'webm', 'height': 480, 'format_note': '3D', 'acodec': 'vorbis', 'abr': 192, 'vcodec': 'vp8', 'preference': -20},
 478         '102': {'ext': 'webm', 'height': 720, 'format_note': '3D', 'acodec': 'vorbis', 'abr': 192, 'vcodec': 'vp8', 'preference': -20},
 479
 480         # Apple HTTP Live Streaming
 481         '91': {'ext': 'mp4', 'height': 144, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 48, 'vcodec': 'h264', 'preference': -10},
 482         '92': {'ext': 'mp4', 'height': 240, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 48, 'vcodec': 'h264', 'preference': -10},
 483         '93': {'ext': 'mp4', 'height': 360, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264', 'preference': -10},
 484         '94': {'ext': 'mp4', 'height': 480, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264', 'preference': -10},
 485         '95': {'ext': 'mp4', 'height': 720, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 256, 'vcodec': 'h264', 'preference': -10},
 486         '96': {'ext': 'mp4', 'height': 1080, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 256, 'vcodec': 'h264', 'preference': -10},
 487         '132': {'ext': 'mp4', 'height': 240, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 48, 'vcodec': 'h264', 'preference': -10},
 488         '151': {'ext': 'mp4', 'height': 72, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 24, 'vcodec': 'h264', 'preference': -10},
 489
 490         # DASH mp4 video
 491         '133': {'ext': 'mp4', 'height': 240, 'format_note': 'DASH video', 'vcodec': 'h264'},
 492         '134': {'ext': 'mp4', 'height': 360, 'format_note': 'DASH video', 'vcodec': 'h264'},
 493         '135': {'ext': 'mp4', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'h264'},
 494         '136': {'ext': 'mp4', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'h264'},
 495         '137': {'ext': 'mp4', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'h264'},
 496         '138': {'ext': 'mp4', 'format_note': 'DASH video', 'vcodec': 'h264'},  # Height can vary (https://github.com/ytdl-org/youtube-dl/issues/4559)
 497         '160': {'ext': 'mp4', 'height': 144, 'format_note': 'DASH video', 'vcodec': 'h264'},
 498         '212': {'ext': 'mp4', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'h264'},
 499         '264': {'ext': 'mp4', 'height': 1440, 'format_note': 'DASH video', 'vcodec': 'h264'},
 500         '298': {'ext': 'mp4', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'h264', 'fps': 60},
 501         '299': {'ext': 'mp4', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'h264', 'fps': 60},
 502         '266': {'ext': 'mp4', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'h264'},
 503
 504         # Dash mp4 audio
 505         '139': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'abr': 48, 'container': 'm4a_dash'},
 506         '140': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'abr': 128, 'container': 'm4a_dash'},
 507         '141': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'abr': 256, 'container': 'm4a_dash'},
 508         '256': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'container': 'm4a_dash'},
 509         '258': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'container': 'm4a_dash'},
 510         '325': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'dtse', 'container': 'm4a_dash'},
 511         '328': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'ec-3', 'container': 'm4a_dash'},
 512
 513         # Dash webm
 514         '167': {'ext': 'webm', 'height': 360, 'width': 640, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
 515         '168': {'ext': 'webm', 'height': 480, 'width': 854, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
 516         '169': {'ext': 'webm', 'height': 720, 'width': 1280, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
 517         '170': {'ext': 'webm', 'height': 1080, 'width': 1920, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
 518         '218': {'ext': 'webm', 'height': 480, 'width': 854, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
 519         '219': {'ext': 'webm', 'height': 480, 'width': 854, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
 520         '278': {'ext': 'webm', 'height': 144, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp9'},
 521         '242': {'ext': 'webm', 'height': 240, 'format_note': 'DASH video', 'vcodec': 'vp9'},
 522         '243': {'ext': 'webm', 'height': 360, 'format_note': 'DASH video', 'vcodec': 'vp9'},
 523         '244': {'ext': 'webm', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'vp9'},
 524         '245': {'ext': 'webm', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'vp9'},
 525         '246': {'ext': 'webm', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'vp9'},
 526         '247': {'ext': 'webm', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'vp9'},
 527         '248': {'ext': 'webm', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'vp9'},
 528         '271': {'ext': 'webm', 'height': 1440, 'format_note': 'DASH video', 'vcodec': 'vp9'},
 529         # itag 272 videos are either 3840x2160 (e.g. RtoitU2A-3E) or 7680x4320 (sLprVF6d7Ug)
 530         '272': {'ext': 'webm', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'vp9'},
 531         '302': {'ext': 'webm', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'vp9', 'fps': 60},
 532         '303': {'ext': 'webm', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'vp9', 'fps': 60},
 533         '308': {'ext': 'webm', 'height': 1440, 'format_note': 'DASH video', 'vcodec': 'vp9', 'fps': 60},
 534         '313': {'ext': 'webm', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'vp9'},
 535         '315': {'ext': 'webm', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'vp9', 'fps': 60},
 536
 537         # Dash webm audio
 538         '171': {'ext': 'webm', 'acodec': 'vorbis', 'format_note': 'DASH audio', 'abr': 128},
 539         '172': {'ext': 'webm', 'acodec': 'vorbis', 'format_note': 'DASH audio', 'abr': 256},
 540
 541         # Dash webm audio with opus inside
 542         '249': {'ext': 'webm', 'format_note': 'DASH audio', 'acodec': 'opus', 'abr': 50},
 543         '250': {'ext': 'webm', 'format_note': 'DASH audio', 'acodec': 'opus', 'abr': 70},
 544         '251': {'ext': 'webm', 'format_note': 'DASH audio', 'acodec': 'opus', 'abr': 160},
 545
 546         # RTMP (unnamed)
 547         '_rtmp': {'protocol': 'rtmp'},
 548
 549         # av01 video only formats sometimes served with "unknown" codecs
 550         '394': {'acodec': 'none', 'vcodec': 'av01.0.05M.08'},
 551         '395': {'acodec': 'none', 'vcodec': 'av01.0.05M.08'},
 552         '396': {'acodec': 'none', 'vcodec': 'av01.0.05M.08'},
 553         '397': {'acodec': 'none', 'vcodec': 'av01.0.05M.08'},
 554     }
 555     _SUBTITLE_FORMATS = ('json3', 'srv1', 'srv2', 'srv3', 'ttml', 'vtt')
 556
 557     _GEO_BYPASS = False
 558
 559     IE_NAME = 'youtube'
 560     _TESTS = [
 561         {
 562             'url': 'https://www.youtube.com/watch?v=BaW_jenozKc&t=1s&end=9',
 563             'info_dict': {
 564                 'id': 'BaW_jenozKc',
 565                 'ext': 'mp4',
 566                 'title': 'youtube-dl test video "\'/\\ä↭𝕐',
 567                 'uploader': 'Philipp Hagemeister',
 568                 'uploader_id': 'phihag',
 569                 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/phihag',
 570                 'channel_id': 'UCLqxVugv74EIW3VWh2NOa3Q',
 571                 'channel_url': r're:https?://(?:www\.)?youtube\.com/channel/UCLqxVugv74EIW3VWh2NOa3Q',
 572                 'upload_date': '20121002',
 573                 'description': 'test chars:  "\'/\\ä↭𝕐\ntest URL: https://github.com/rg3/youtube-dl/issues/1892\n\nThis is a test video for youtube-dl.\n\nFor more information, contact phihag@phihag.de .',
 574                 'categories': ['Science & Technology'],
 575                 'tags': ['youtube-dl'],
 576                 'duration': 10,
 577                 'view_count': int,
 578                 'like_count': int,
 579                 'dislike_count': int,
 580                 'start_time': 1,
 581                 'end_time': 9,
 582             }
 583         },
 584         {
 585             'url': '//www.YouTube.com/watch?v=yZIXLfi8CZQ',
 586             'note': 'Embed-only video (#1746)',
 587             'info_dict': {
 588                 'id': 'yZIXLfi8CZQ',
 589                 'ext': 'mp4',
 590                 'upload_date': '20120608',
 591                 'title': 'Principal Sexually Assaults A Teacher - Episode 117 - 8th June 2012',
 592                 'description': 'md5:09b78bd971f1e3e289601dfba15ca4f7',
 593                 'uploader': 'SET India',
 594                 'uploader_id': 'setindia',
 595                 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/setindia',
 596                 'age_limit': 18,
 597             }
 598         },
 599         {
 600             'url': 'https://www.youtube.com/watch?v=BaW_jenozKc&v=UxxajLWwzqY',
 601             'note': 'Use the first video ID in the URL',
 602             'info_dict': {
 603                 'id': 'BaW_jenozKc',
 604                 'ext': 'mp4',
 605                 'title': 'youtube-dl test video "\'/\\ä↭𝕐',
 606                 'uploader': 'Philipp Hagemeister',
 607                 'uploader_id': 'phihag',
 608                 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/phihag',
 609                 'upload_date': '20121002',
 610                 'description': 'test chars:  "\'/\\ä↭𝕐\ntest URL: https://github.com/rg3/youtube-dl/issues/1892\n\nThis is a test video for youtube-dl.\n\nFor more information, contact phihag@phihag.de .',
 611                 'categories': ['Science & Technology'],
 612                 'tags': ['youtube-dl'],
 613                 'duration': 10,
 614                 'view_count': int,
 615                 'like_count': int,
 616                 'dislike_count': int,
 617             },
 618             'params': {
 619                 'skip_download': True,
 620             },
 621         },
 622         {
 623             'url': 'https://www.youtube.com/watch?v=a9LDPn-MO4I',
 624             'note': '256k DASH audio (format 141) via DASH manifest',
 625             'info_dict': {
 626                 'id': 'a9LDPn-MO4I',
 627                 'ext': 'm4a',
 628                 'upload_date': '20121002',
 629                 'uploader_id': '8KVIDEO',
 630                 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/8KVIDEO',
 631                 'description': '',
 632                 'uploader': '8KVIDEO',
 633                 'title': 'UHDTV TEST 8K VIDEO.mp4'
 634             },
 635             'params': {
 636                 'youtube_include_dash_manifest': True,
 637                 'format': '141',
 638             },
 639             'skip': 'format 141 not served anymore',
 640         },
 641         # Controversy video
 642         {
 643             'url': 'https://www.youtube.com/watch?v=T4XJQO3qol8',
 644             'info_dict': {
 645                 'id': 'T4XJQO3qol8',
 646                 'ext': 'mp4',
 647                 'duration': 219,
 648                 'upload_date': '20100909',
 649                 'uploader': 'Amazing Atheist',
 650                 'uploader_id': 'TheAmazingAtheist',
 651                 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/TheAmazingAtheist',
 652                 'title': 'Burning Everyone\'s Koran',
 653                 'description': 'SUBSCRIBE: http://www.youtube.com/saturninefilms\n\nEven Obama has taken a stand against freedom on this issue: http://www.huffingtonpost.com/2010/09/09/obama-gma-interview-quran_n_710282.html',
 654             }
 655         },
 656         # Normal age-gate video (embed allowed)
 657         {
 658             'url': 'https://youtube.com/watch?v=HtVdAasjOgU',
 659             'info_dict': {
 660                 'id': 'HtVdAasjOgU',
 661                 'ext': 'mp4',
 662                 'title': 'The Witcher 3: Wild Hunt - The Sword Of Destiny Trailer',
 663                 'description': r're:(?s).{100,}About the Game\n.*?The Witcher 3: Wild Hunt.{100,}',
 664                 'duration': 142,
 665                 'uploader': 'The Witcher',
 666                 'uploader_id': 'WitcherGame',
 667                 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/WitcherGame',
 668                 'upload_date': '20140605',
 669                 'age_limit': 18,
 670             },
 671         },
 672         # Olympics (https://github.com/ytdl-org/youtube-dl/issues/4431)
 673         {
 674             'url': 'lqQg6PlCWgI',
 675             'info_dict': {
 676                 'id': 'lqQg6PlCWgI',
 677                 'ext': 'mp4',
 678                 'duration': 6085,
 679                 'upload_date': '20150827',
 680                 'uploader_id': 'olympic',
 681                 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/olympic',
 682                 'description': 'HO09  - Women -  GER-AUS - Hockey - 31 July 2012 - London 2012 Olympic Games',
 683                 'uploader': 'Olympic',
 684                 'title': 'Hockey - Women -  GER-AUS - London 2012 Olympic Games',
 685             },
 686             'params': {
 687                 'skip_download': 'requires avconv',
 688             }
 689         },
 690         # Non-square pixels
 691         {
 692             'url': 'https://www.youtube.com/watch?v=_b-2C3KPAM0',
 693             'info_dict': {
 694                 'id': '_b-2C3KPAM0',
 695                 'ext': 'mp4',
 696                 'stretched_ratio': 16 / 9.,
 697                 'duration': 85,
 698                 'upload_date': '20110310',
 699                 'uploader_id': 'AllenMeow',
 700                 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/AllenMeow',
 701                 'description': 'made by Wacom from Korea | 字幕&加油添醋 by TY\'s Allen | 感謝heylisa00cavey1001同學熱情提供梗及翻譯',
 702                 'uploader': '孫ᄋᄅ',
 703                 'title': '[A-made] 變態妍字幕版 太妍 我就是這樣的人',
 704             },
 705         },
 706         # url_encoded_fmt_stream_map is empty string
 707         {
 708             'url': 'qEJwOuvDf7I',
 709             'info_dict': {
 710                 'id': 'qEJwOuvDf7I',
 711                 'ext': 'webm',
 712                 'title': 'Обсуждение судебной практики по выборам 14 сентября 2014 года в Санкт-Петербурге',
 713                 'description': '',
 714                 'upload_date': '20150404',
 715                 'uploader_id': 'spbelect',
 716                 'uploader': 'Наблюдатели Петербурга',
 717             },
 718             'params': {
 719                 'skip_download': 'requires avconv',
 720             },
 721             'skip': 'This live event has ended.',
 722         },
 723         # Extraction from multiple DASH manifests (https://github.com/ytdl-org/youtube-dl/pull/6097)
 724         {
 725             'url': 'https://www.youtube.com/watch?v=FIl7x6_3R5Y',
 726             'info_dict': {
 727                 'id': 'FIl7x6_3R5Y',
 728                 'ext': 'webm',
 729                 'title': 'md5:7b81415841e02ecd4313668cde88737a',
 730                 'description': 'md5:116377fd2963b81ec4ce64b542173306',
 731                 'duration': 220,
 732                 'upload_date': '20150625',
 733                 'uploader_id': 'dorappi2000',
 734                 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/dorappi2000',
 735                 'uploader': 'dorappi2000',
 736                 'formats': 'mincount:31',
 737             },
 738             'skip': 'not actual anymore',
 739         },
 740         # DASH manifest with segment_list
 741         {
 742             'url': 'https://www.youtube.com/embed/CsmdDsKjzN8',
 743             'md5': '8ce563a1d667b599d21064e982ab9e31',
 744             'info_dict': {
 745                 'id': 'CsmdDsKjzN8',
 746                 'ext': 'mp4',
 747                 'upload_date': '20150501',  # According to '<meta itemprop="datePublished"', but in other places it's 20150510
 748                 'uploader': 'Airtek',
 749                 'description': 'Retransmisión en directo de la XVIII media maratón de Zaragoza.',
 750                 'uploader_id': 'UCzTzUmjXxxacNnL8I3m4LnQ',
 751                 'title': 'Retransmisión XVIII Media maratón Zaragoza 2015',
 752             },
 753             'params': {
 754                 'youtube_include_dash_manifest': True,
 755                 'format': '135',  # bestvideo
 756             },
 757             'skip': 'This live event has ended.',
 758         },
 759         {
 760             # Multifeed videos (multiple cameras), URL is for Main Camera
 761             'url': 'https://www.youtube.com/watch?v=jqWvoWXjCVs',
 762             'info_dict': {
 763                 'id': 'jqWvoWXjCVs',
 764                 'title': 'teamPGP: Rocket League Noob Stream',
 765                 'description': 'md5:dc7872fb300e143831327f1bae3af010',
 766             },
 767             'playlist': [{
 768                 'info_dict': {
 769                     'id': 'jqWvoWXjCVs',
 770                     'ext': 'mp4',
 771                     'title': 'teamPGP: Rocket League Noob Stream (Main Camera)',
 772                     'description': 'md5:dc7872fb300e143831327f1bae3af010',
 773                     'duration': 7335,
 774                     'upload_date': '20150721',
 775                     'uploader': 'Beer Games Beer',
 776                     'uploader_id': 'beergamesbeer',
 777                     'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/beergamesbeer',
 778                     'license': 'Standard YouTube License',
 779                 },
 780             }, {
 781                 'info_dict': {
 782                     'id': '6h8e8xoXJzg',
 783                     'ext': 'mp4',
 784                     'title': 'teamPGP: Rocket League Noob Stream (kreestuh)',
 785                     'description': 'md5:dc7872fb300e143831327f1bae3af010',
 786                     'duration': 7337,
 787                     'upload_date': '20150721',
 788                     'uploader': 'Beer Games Beer',
 789                     'uploader_id': 'beergamesbeer',
 790                     'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/beergamesbeer',
 791                     'license': 'Standard YouTube License',
 792                 },
 793             }, {
 794                 'info_dict': {
 795                     'id': 'PUOgX5z9xZw',
 796                     'ext': 'mp4',
 797                     'title': 'teamPGP: Rocket League Noob Stream (grizzle)',
 798                     'description': 'md5:dc7872fb300e143831327f1bae3af010',
 799                     'duration': 7337,
 800                     'upload_date': '20150721',
 801                     'uploader': 'Beer Games Beer',
 802                     'uploader_id': 'beergamesbeer',
 803                     'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/beergamesbeer',
 804                     'license': 'Standard YouTube License',
 805                 },
 806             }, {
 807                 'info_dict': {
 808                     'id': 'teuwxikvS5k',
 809                     'ext': 'mp4',
 810                     'title': 'teamPGP: Rocket League Noob Stream (zim)',
 811                     'description': 'md5:dc7872fb300e143831327f1bae3af010',
 812                     'duration': 7334,
 813                     'upload_date': '20150721',
 814                     'uploader': 'Beer Games Beer',
 815                     'uploader_id': 'beergamesbeer',
 816                     'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/beergamesbeer',
 817                     'license': 'Standard YouTube License',
 818                 },
 819             }],
 820             'params': {
 821                 'skip_download': True,
 822             },
 823             'skip': 'This video is not available.',
 824         },
 825         {
 826             # Multifeed video with comma in title (see https://github.com/ytdl-org/youtube-dl/issues/8536)
 827             'url': 'https://www.youtube.com/watch?v=gVfLd0zydlo',
 828             'info_dict': {
 829                 'id': 'gVfLd0zydlo',
 830                 'title': 'DevConf.cz 2016 Day 2 Workshops 1 14:00 - 15:30',
 831             },
 832             'playlist_count': 2,
 833             'skip': 'Not multifeed anymore',
 834         },
 835         {
 836             'url': 'https://vid.plus/FlRa-iH7PGw',
 837             'only_matching': True,
 838         },
 839         {
 840             'url': 'https://zwearz.com/watch/9lWxNJF-ufM/electra-woman-dyna-girl-official-trailer-grace-helbig.html',
 841             'only_matching': True,
 842         },
 843         {
 844             # Title with JS-like syntax "};" (see https://github.com/ytdl-org/youtube-dl/issues/7468)
 845             # Also tests cut-off URL expansion in video description (see
 846             # https://github.com/ytdl-org/youtube-dl/issues/1892,
 847             # https://github.com/ytdl-org/youtube-dl/issues/8164)
 848             'url': 'https://www.youtube.com/watch?v=lsguqyKfVQg',
 849             'info_dict': {
 850                 'id': 'lsguqyKfVQg',
 851                 'ext': 'mp4',
 852                 'title': '{dark walk}; Loki/AC/Dishonored; collab w/Elflover21',
 853                 'alt_title': 'Dark Walk - Position Music',
 854                 'description': 'md5:8085699c11dc3f597ce0410b0dcbb34a',
 855                 'duration': 133,
 856                 'upload_date': '20151119',
 857                 'uploader_id': 'IronSoulElf',
 858                 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/IronSoulElf',
 859                 'uploader': 'IronSoulElf',
 860                 'creator': 'Todd Haberman,  Daniel Law Heath and Aaron Kaplan',
 861                 'track': 'Dark Walk - Position Music',
 862                 'artist': 'Todd Haberman,  Daniel Law Heath and Aaron Kaplan',
 863                 'album': 'Position Music - Production Music Vol. 143 - Dark Walk',
 864             },
 865             'params': {
 866                 'skip_download': True,
 867             },
 868         },
 869         {
 870             # Tags with '};' (see https://github.com/ytdl-org/youtube-dl/issues/7468)
 871             'url': 'https://www.youtube.com/watch?v=Ms7iBXnlUO8',
 872             'only_matching': True,
 873         },
 874         {
 875             # Video with yt:stretch=17:0
 876             'url': 'https://www.youtube.com/watch?v=Q39EVAstoRM',
 877             'info_dict': {
 878                 'id': 'Q39EVAstoRM',
 879                 'ext': 'mp4',
 880                 'title': 'Clash Of Clans#14 Dicas De Ataque Para CV 4',
 881                 'description': 'md5:ee18a25c350637c8faff806845bddee9',
 882                 'upload_date': '20151107',
 883                 'uploader_id': 'UCCr7TALkRbo3EtFzETQF1LA',
 884                 'uploader': 'CH GAMER DROID',
 885             },
 886             'params': {
 887                 'skip_download': True,
 888             },
 889             'skip': 'This video does not exist.',
 890         },
 891         {
 892             # Video licensed under Creative Commons
 893             'url': 'https://www.youtube.com/watch?v=M4gD1WSo5mA',
 894             'info_dict': {
 895                 'id': 'M4gD1WSo5mA',
 896                 'ext': 'mp4',
 897                 'title': 'md5:e41008789470fc2533a3252216f1c1d1',
 898                 'description': 'md5:a677553cf0840649b731a3024aeff4cc',
 899                 'duration': 721,
 900                 'upload_date': '20150127',
 901                 'uploader_id': 'BerkmanCenter',
 902                 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/BerkmanCenter',
 903                 'uploader': 'The Berkman Klein Center for Internet & Society',
 904                 'license': 'Creative Commons Attribution license (reuse allowed)',
 905             },
 906             'params': {
 907                 'skip_download': True,
 908             },
 909         },
 910         {
 911             # Channel-like uploader_url
 912             'url': 'https://www.youtube.com/watch?v=eQcmzGIKrzg',
 913             'info_dict': {
 914                 'id': 'eQcmzGIKrzg',
 915                 'ext': 'mp4',
 916                 'title': 'Democratic Socialism and Foreign Policy | Bernie Sanders',
 917                 'description': 'md5:dda0d780d5a6e120758d1711d062a867',
 918                 'duration': 4060,
 919                 'upload_date': '20151119',
 920                 'uploader': 'Bernie Sanders',
 921                 'uploader_id': 'UCH1dpzjCEiGAt8CXkryhkZg',
 922                 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UCH1dpzjCEiGAt8CXkryhkZg',
 923                 'license': 'Creative Commons Attribution license (reuse allowed)',
 924             },
 925             'params': {
 926                 'skip_download': True,
 927             },
 928         },
 929         {
 930             'url': 'https://www.youtube.com/watch?feature=player_embedded&amp;amp;v=V36LpHqtcDY',
 931             'only_matching': True,
 932         },
 933         {
 934             # YouTube Red paid video (https://github.com/ytdl-org/youtube-dl/issues/10059)
 935             'url': 'https://www.youtube.com/watch?v=i1Ko8UG-Tdo',
 936             'only_matching': True,
 937         },
 938         {
 939             # Rental video preview
 940             'url': 'https://www.youtube.com/watch?v=yYr8q0y5Jfg',
 941             'info_dict': {
 942                 'id': 'uGpuVWrhIzE',
 943                 'ext': 'mp4',
 944                 'title': 'Piku - Trailer',
 945                 'description': 'md5:c36bd60c3fd6f1954086c083c72092eb',
 946                 'upload_date': '20150811',
 947                 'uploader': 'FlixMatrix',
 948                 'uploader_id': 'FlixMatrixKaravan',
 949                 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/FlixMatrixKaravan',
 950                 'license': 'Standard YouTube License',
 951             },
 952             'params': {
 953                 'skip_download': True,
 954             },
 955             'skip': 'This video is not available.',
 956         },
 957         {
 958             # YouTube Red video with episode data
 959             'url': 'https://www.youtube.com/watch?v=iqKdEhx-dD4',
 960             'info_dict': {
 961                 'id': 'iqKdEhx-dD4',
 962                 'ext': 'mp4',
 963                 'title': 'Isolation - Mind Field (Ep 1)',
 964                 'description': 'md5:46a29be4ceffa65b92d277b93f463c0f',
 965                 'duration': 2085,
 966                 'upload_date': '20170118',
 967                 'uploader': 'Vsauce',
 968                 'uploader_id': 'Vsauce',
 969                 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/Vsauce',
 970                 'series': 'Mind Field',
 971                 'season_number': 1,
 972                 'episode_number': 1,
 973             },
 974             'params': {
 975                 'skip_download': True,
 976             },
 977             'expected_warnings': [
 978                 'Skipping DASH manifest',
 979             ],
 980         },
 981         {
 982             # The following content has been identified by the YouTube community
 983             # as inappropriate or offensive to some audiences.
 984             'url': 'https://www.youtube.com/watch?v=6SJNVb0GnPI',
 985             'info_dict': {
 986                 'id': '6SJNVb0GnPI',
 987                 'ext': 'mp4',
 988                 'title': 'Race Differences in Intelligence',
 989                 'description': 'md5:5d161533167390427a1f8ee89a1fc6f1',
 990                 'duration': 965,
 991                 'upload_date': '20140124',
 992                 'uploader': 'New Century Foundation',
 993                 'uploader_id': 'UCEJYpZGqgUob0zVVEaLhvVg',
 994                 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UCEJYpZGqgUob0zVVEaLhvVg',
 995             },
 996             'params': {
 997                 'skip_download': True,
 998             },
 999         },
1000         {
1001             # itag 212
1002             'url': '1t24XAntNCY',
1003             'only_matching': True,
1004         },
1005         {
1006             # geo restricted to JP
1007             'url': 'sJL6WA-aGkQ',
1008             'only_matching': True,
1009         },
1010         {
1011             'url': 'https://www.youtube.com/watch?v=MuAGGZNfUkU&list=RDMM',
1012             'only_matching': True,
1013         },
1014         {
1015             'url': 'https://invidio.us/watch?v=BaW_jenozKc',
1016             'only_matching': True,
1017         },
1018         {
1019             # DRM protected
1020             'url': 'https://www.youtube.com/watch?v=s7_qI6_mIXc',
1021             'only_matching': True,
1022         },
1023         {
1024             # Video with unsupported adaptive stream type formats
1025             'url': 'https://www.youtube.com/watch?v=Z4Vy8R84T1U',
1026             'info_dict': {
1027                 'id': 'Z4Vy8R84T1U',
1028                 'ext': 'mp4',
1029                 'title': 'saman SMAN 53 Jakarta(Sancety) opening COFFEE4th at SMAN 53 Jakarta',
1030                 'description': 'md5:d41d8cd98f00b204e9800998ecf8427e',
1031                 'duration': 433,
1032                 'upload_date': '20130923',
1033                 'uploader': 'Amelia Putri Harwita',
1034                 'uploader_id': 'UCpOxM49HJxmC1qCalXyB3_Q',
1035                 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UCpOxM49HJxmC1qCalXyB3_Q',
1036                 'formats': 'maxcount:10',
1037             },
1038             'params': {
1039                 'skip_download': True,
1040                 'youtube_include_dash_manifest': False,
1041             },
1042             'skip': 'not actual anymore',
1043         },
1044         {
1045             # Youtube Music Auto-generated description
1046             'url': 'https://music.youtube.com/watch?v=MgNrAu2pzNs',
1047             'info_dict': {
1048                 'id': 'MgNrAu2pzNs',
1049                 'ext': 'mp4',
1050                 'title': 'Voyeur Girl',
1051                 'description': 'md5:7ae382a65843d6df2685993e90a8628f',
1052                 'upload_date': '20190312',
1053                 'uploader': 'Stephen - Topic',
1054                 'uploader_id': 'UC-pWHpBjdGG69N9mM2auIAA',
1055                 'artist': 'Stephen',
1056                 'track': 'Voyeur Girl',
1057                 'album': 'it\'s too much love to know my dear',
1058                 'release_date': '20190313',
1059                 'release_year': 2019,
1060             },
1061             'params': {
1062                 'skip_download': True,
1063             },
1064         },
1065         {
1066             # Youtube Music Auto-generated description
1067             # Retrieve 'artist' field from 'Artist:' in video description
1068             # when it is present on youtube music video
1069             'url': 'https://www.youtube.com/watch?v=k0jLE7tTwjY',
1070             'info_dict': {
1071                 'id': 'k0jLE7tTwjY',
1072                 'ext': 'mp4',
1073                 'title': 'Latch Feat. Sam Smith',
1074                 'description': 'md5:3cb1e8101a7c85fcba9b4fb41b951335',
1075                 'upload_date': '20150110',
1076                 'uploader': 'Various Artists - Topic',
1077                 'uploader_id': 'UCNkEcmYdjrH4RqtNgh7BZ9w',
1078                 'artist': 'Disclosure',
1079                 'track': 'Latch Feat. Sam Smith',
1080                 'album': 'Latch Featuring Sam Smith',
1081                 'release_date': '20121008',
1082                 'release_year': 2012,
1083             },
1084             'params': {
1085                 'skip_download': True,
1086             },
1087         },
1088         {
1089             # Youtube Music Auto-generated description
1090             # handle multiple artists on youtube music video
1091             'url': 'https://www.youtube.com/watch?v=74qn0eJSjpA',
1092             'info_dict': {
1093                 'id': '74qn0eJSjpA',
1094                 'ext': 'mp4',
1095                 'title': 'Eastside',
1096                 'description': 'md5:290516bb73dcbfab0dcc4efe6c3de5f2',
1097                 'upload_date': '20180710',
1098                 'uploader': 'Benny Blanco - Topic',
1099                 'uploader_id': 'UCzqz_ksRu_WkIzmivMdIS7A',
1100                 'artist': 'benny blanco, Halsey, Khalid',
1101                 'track': 'Eastside',
1102                 'album': 'Eastside',
1103                 'release_date': '20180713',
1104                 'release_year': 2018,
1105             },
1106             'params': {
1107                 'skip_download': True,
1108             },
1109         },
1110         {
1111             # Youtube Music Auto-generated description
1112             # handle youtube music video with release_year and no release_date
1113             'url': 'https://www.youtube.com/watch?v=-hcAI0g-f5M',
1114             'info_dict': {
1115                 'id': '-hcAI0g-f5M',
1116                 'ext': 'mp4',
1117                 'title': 'Put It On Me',
1118                 'description': 'md5:f6422397c07c4c907c6638e1fee380a5',
1119                 'upload_date': '20180426',
1120                 'uploader': 'Matt Maeson - Topic',
1121                 'uploader_id': 'UCnEkIGqtGcQMLk73Kp-Q5LQ',
1122                 'artist': 'Matt Maeson',
1123                 'track': 'Put It On Me',
1124                 'album': 'The Hearse',
1125                 'release_date': None,
1126                 'release_year': 2018,
1127             },
1128             'params': {
1129                 'skip_download': True,
1130             },
1131         },
1132         {
1133             'url': 'https://www.youtubekids.com/watch?v=3b8nCWDgZ6Q',
1134             'only_matching': True,
1135         },
1136         {
1137             # invalid -> valid video id redirection
1138             'url': 'DJztXj2GPfl',
1139             'info_dict': {
1140                 'id': 'DJztXj2GPfk',
1141                 'ext': 'mp4',
1142                 'title': 'Panjabi MC - Mundian To Bach Ke (The Dictator Soundtrack)',
1143                 'description': 'md5:bf577a41da97918e94fa9798d9228825',
1144                 'upload_date': '20090125',
1145                 'uploader': 'Prochorowka',
1146                 'uploader_id': 'Prochorowka',
1147                 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/Prochorowka',
1148                 'artist': 'Panjabi MC',
1149                 'track': 'Beware of the Boys (Mundian to Bach Ke) - Motivo Hi-Lectro Remix',
1150                 'album': 'Beware of the Boys (Mundian To Bach Ke)',
1151             },
1152             'params': {
1153                 'skip_download': True,
1154             },
1155         },
1156         {
1157             # empty description results in an empty string
1158             'url': 'https://www.youtube.com/watch?v=x41yOUIvK2k',
1159             'info_dict': {
1160                 'id': 'x41yOUIvK2k',
1161                 'ext': 'mp4',
1162                 'title': 'IMG 3456',
1163                 'description': '',
1164                 'upload_date': '20170613',
1165                 'uploader_id': 'ElevageOrVert',
1166                 'uploader': 'ElevageOrVert',
1167             },
1168             'params': {
1169                 'skip_download': True,
1170             },
1171         },
1172     ]
1173
1174     def __init__(self, *args, **kwargs):
1175         super(YoutubeIE, self).__init__(*args, **kwargs)
1176         self._player_cache = {}
1177
1178     def report_video_info_webpage_download(self, video_id):
1179         """Report attempt to download video info webpage."""
1180         self.to_screen('%s: Downloading video info webpage' % video_id)
1181
1182     def report_information_extraction(self, video_id):
1183         """Report attempt to extract video information."""
1184         self.to_screen('%s: Extracting video information' % video_id)
1185
1186     def report_unavailable_format(self, video_id, format):
1187         """Report extracted video URL."""
1188         self.to_screen('%s: Format %s not available' % (video_id, format))
1189
1190     def report_rtmp_download(self):
1191         """Indicate the download will use the RTMP protocol."""
1192         self.to_screen('RTMP download detected')
1193
1194     def _signature_cache_id(self, example_sig):
1195         """ Return a string representation of a signature """
1196         return '.'.join(compat_str(len(part)) for part in example_sig.split('.'))
1197
1198     @classmethod
1199     def _extract_player_info(cls, player_url):
1200         for player_re in cls._PLAYER_INFO_RE:
1201             id_m = re.search(player_re, player_url)
1202             if id_m:
1203                 break
1204         else:
1205             raise ExtractorError('Cannot identify player %r' % player_url)
1206         return id_m.group('ext'), id_m.group('id')
1207
1208     def _extract_signature_function(self, video_id, player_url, example_sig):
1209         player_type, player_id = self._extract_player_info(player_url)
1210
1211         # Read from filesystem cache
1212         func_id = '%s_%s_%s' % (
1213             player_type, player_id, self._signature_cache_id(example_sig))
1214         assert os.path.basename(func_id) == func_id
1215
1216         cache_spec = self._downloader.cache.load('youtube-sigfuncs', func_id)
1217         if cache_spec is not None:
1218             return lambda s: ''.join(s[i] for i in cache_spec)
1219
1220         download_note = (
1221             'Downloading player %s' % player_url
1222             if self._downloader.params.get('verbose') else
1223             'Downloading %s player %s' % (player_type, player_id)
1224         )
1225         if player_type == 'js':
1226             code = self._download_webpage(
1227                 player_url, video_id,
1228                 note=download_note,
1229                 errnote='Download of %s failed' % player_url)
1230             res = self._parse_sig_js(code)
1231         elif player_type == 'swf':
1232             urlh = self._request_webpage(
1233                 player_url, video_id,
1234                 note=download_note,
1235                 errnote='Download of %s failed' % player_url)
1236             code = urlh.read()
1237             res = self._parse_sig_swf(code)
1238         else:
1239             assert False, 'Invalid player type %r' % player_type
1240
1241         test_string = ''.join(map(compat_chr, range(len(example_sig))))
1242         cache_res = res(test_string)
1243         cache_spec = [ord(c) for c in cache_res]
1244
1245         self._downloader.cache.store('youtube-sigfuncs', func_id, cache_spec)
1246         return res
1247
1248     def _print_sig_code(self, func, example_sig):
1249         def gen_sig_code(idxs):
1250             def _genslice(start, end, step):
1251                 starts = '' if start == 0 else str(start)
1252                 ends = (':%d' % (end + step)) if end + step >= 0 else ':'
1253                 steps = '' if step == 1 else (':%d' % step)
1254                 return 's[%s%s%s]' % (starts, ends, steps)
1255
1256             step = None
1257             # Quelch pyflakes warnings - start will be set when step is set
1258             start = '(Never used)'
1259             for i, prev in zip(idxs[1:], idxs[:-1]):
1260                 if step is not None:
1261                     if i - prev == step:
1262                         continue
1263                     yield _genslice(start, prev, step)
1264                     step = None
1265                     continue
1266                 if i - prev in [-1, 1]:
1267                     step = i - prev
1268                     start = prev
1269                     continue
1270                 else:
1271                     yield 's[%d]' % prev
1272             if step is None:
1273                 yield 's[%d]' % i
1274             else:
1275                 yield _genslice(start, i, step)
1276
1277         test_string = ''.join(map(compat_chr, range(len(example_sig))))
1278         cache_res = func(test_string)
1279         cache_spec = [ord(c) for c in cache_res]
1280         expr_code = ' + '.join(gen_sig_code(cache_spec))
1281         signature_id_tuple = '(%s)' % (
1282             ', '.join(compat_str(len(p)) for p in example_sig.split('.')))
1283         code = ('if tuple(len(p) for p in s.split(\'.\')) == %s:\n'
1284                 '    return %s\n') % (signature_id_tuple, expr_code)
1285         self.to_screen('Extracted signature function:\n' + code)
1286
1287     def _parse_sig_js(self, jscode):
1288         funcname = self._search_regex(
1289             (r'\b[cs]\s*&&\s*[adf]\.set\([^,]+\s*,\s*encodeURIComponent\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\(',
1290              r'\b[a-zA-Z0-9]+\s*&&\s*[a-zA-Z0-9]+\.set\([^,]+\s*,\s*encodeURIComponent\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\(',
1291              r'(?:\b|[^a-zA-Z0-9$])(?P<sig>[a-zA-Z0-9$]{2})\s*=\s*function\(\s*a\s*\)\s*{\s*a\s*=\s*a\.split\(\s*""\s*\)',
1292              r'(?P<sig>[a-zA-Z0-9$]+)\s*=\s*function\(\s*a\s*\)\s*{\s*a\s*=\s*a\.split\(\s*""\s*\)',
1293              # Obsolete patterns
1294              r'(["\'])signature\1\s*,\s*(?P<sig>[a-zA-Z0-9$]+)\(',
1295              r'\.sig\|\|(?P<sig>[a-zA-Z0-9$]+)\(',
1296              r'yt\.akamaized\.net/\)\s*\|\|\s*.*?\s*[cs]\s*&&\s*[adf]\.set\([^,]+\s*,\s*(?:encodeURIComponent\s*\()?\s*(?P<sig>[a-zA-Z0-9$]+)\(',
1297              r'\b[cs]\s*&&\s*[adf]\.set\([^,]+\s*,\s*(?P<sig>[a-zA-Z0-9$]+)\(',
1298              r'\b[a-zA-Z0-9]+\s*&&\s*[a-zA-Z0-9]+\.set\([^,]+\s*,\s*(?P<sig>[a-zA-Z0-9$]+)\(',
1299              r'\bc\s*&&\s*a\.set\([^,]+\s*,\s*\([^)]*\)\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\(',
1300              r'\bc\s*&&\s*[a-zA-Z0-9]+\.set\([^,]+\s*,\s*\([^)]*\)\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\(',
1301              r'\bc\s*&&\s*[a-zA-Z0-9]+\.set\([^,]+\s*,\s*\([^)]*\)\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\('),
1302             jscode, 'Initial JS player signature function name', group='sig')
1303
1304         jsi = JSInterpreter(jscode)
1305         initial_function = jsi.extract_function(funcname)
1306         return lambda s: initial_function([s])
1307
1308     def _parse_sig_swf(self, file_contents):
1309         swfi = SWFInterpreter(file_contents)
1310         TARGET_CLASSNAME = 'SignatureDecipher'
1311         searched_class = swfi.extract_class(TARGET_CLASSNAME)
1312         initial_function = swfi.extract_function(searched_class, 'decipher')
1313         return lambda s: initial_function([s])
1314
1315     def _decrypt_signature(self, s, video_id, player_url, age_gate=False):
1316         """Turn the encrypted s field into a working signature"""
1317
1318         if player_url is None:
1319             raise ExtractorError('Cannot decrypt signature without player_url')
1320
1321         if player_url.startswith('//'):
1322             player_url = 'https:' + player_url
1323         elif not re.match(r'https?://', player_url):
1324             player_url = compat_urlparse.urljoin(
1325                 'https://www.youtube.com', player_url)
1326         try:
1327             player_id = (player_url, self._signature_cache_id(s))
1328             if player_id not in self._player_cache:
1329                 func = self._extract_signature_function(
1330                     video_id, player_url, s
1331                 )
1332                 self._player_cache[player_id] = func
1333             func = self._player_cache[player_id]
1334             if self._downloader.params.get('youtube_print_sig_code'):
1335                 self._print_sig_code(func, s)
1336             return func(s)
1337         except Exception as e:
1338             tb = traceback.format_exc()
1339             raise ExtractorError(
1340                 'Signature extraction failed: ' + tb, cause=e)
1341
1342     def _get_subtitles(self, video_id, webpage, has_live_chat_replay):
1343         try:
1344             subs_doc = self._download_xml(
1345                 'https://video.google.com/timedtext?hl=en&type=list&v=%s' % video_id,
1346                 video_id, note=False)
1347         except ExtractorError as err:
1348             self._downloader.report_warning('unable to download video subtitles: %s' % error_to_compat_str(err))
1349             return {}
1350
1351         sub_lang_list = {}
1352         for track in subs_doc.findall('track'):
1353             lang = track.attrib['lang_code']
1354             if lang in sub_lang_list:
1355                 continue
1356             sub_formats = []
1357             for ext in self._SUBTITLE_FORMATS:
1358                 params = compat_urllib_parse_urlencode({
1359                     'lang': lang,
1360                     'v': video_id,
1361                     'fmt': ext,
1362                     'name': track.attrib['name'].encode('utf-8'),
1363                 })
1364                 sub_formats.append({
1365                     'url': 'https://www.youtube.com/api/timedtext?' + params,
1366                     'ext': ext,
1367                 })
1368             sub_lang_list[lang] = sub_formats
1369         if has_live_chat_replay:
1370             sub_lang_list['live_chat'] = [
1371                 {
1372                     'video_id': video_id,
1373                     'ext': 'json',
1374                     'protocol': 'youtube_live_chat_replay',
1375                 },
1376             ]
1377         if not sub_lang_list:
1378             self._downloader.report_warning('video doesn\'t have subtitles')
1379             return {}
1380         return sub_lang_list
1381
1382     def _get_ytplayer_config(self, video_id, webpage):
1383         patterns = (
1384             # User data may contain arbitrary character sequences that may affect
1385             # JSON extraction with regex, e.g. when '};' is contained the second
1386             # regex won't capture the whole JSON. Yet working around by trying more
1387             # concrete regex first keeping in mind proper quoted string handling
1388             # to be implemented in future that will replace this workaround (see
1389             # https://github.com/ytdl-org/youtube-dl/issues/7468,
1390             # https://github.com/ytdl-org/youtube-dl/pull/7599)
1391             r';ytplayer\.config\s*=\s*({.+?});ytplayer',
1392             r';ytplayer\.config\s*=\s*({.+?});',
1393         )
1394         config = self._search_regex(
1395             patterns, webpage, 'ytplayer.config', default=None)
1396         if config:
1397             return self._parse_json(
1398                 uppercase_escape(config), video_id, fatal=False)
1399
1400     def _get_yt_initial_data(self, video_id, webpage):
1401         config = self._search_regex(
1402             (r'window\["ytInitialData"\]\s*=\s*(.*?)(?<=});',
1403              r'var\s+ytInitialData\s*=\s*(.*?)(?<=});'),
1404             webpage, 'ytInitialData', default=None)
1405         if config:
1406             return self._parse_json(
1407                 uppercase_escape(config), video_id, fatal=False)
1408
1409     def _get_automatic_captions(self, video_id, webpage):
1410         """We need the webpage for getting the captions url, pass it as an
1411            argument to speed up the process."""
1412         self.to_screen('%s: Looking for automatic captions' % video_id)
1413         player_config = self._get_ytplayer_config(video_id, webpage)
1414         err_msg = 'Couldn\'t find automatic captions for %s' % video_id
1415         if not player_config:
1416             self._downloader.report_warning(err_msg)
1417             return {}
1418         try:
1419             args = player_config['args']
1420             caption_url = args.get('ttsurl')
1421             if caption_url:
1422                 timestamp = args['timestamp']
1423                 # We get the available subtitles
1424                 list_params = compat_urllib_parse_urlencode({
1425                     'type': 'list',
1426                     'tlangs': 1,
1427                     'asrs': 1,
1428                 })
1429                 list_url = caption_url + '&' + list_params
1430                 caption_list = self._download_xml(list_url, video_id)
1431                 original_lang_node = caption_list.find('track')
1432                 if original_lang_node is None:
1433                     self._downloader.report_warning('Video doesn\'t have automatic captions')
1434                     return {}
1435                 original_lang = original_lang_node.attrib['lang_code']
1436                 caption_kind = original_lang_node.attrib.get('kind', '')
1437
1438                 sub_lang_list = {}
1439                 for lang_node in caption_list.findall('target'):
1440                     sub_lang = lang_node.attrib['lang_code']
1441                     sub_formats = []
1442                     for ext in self._SUBTITLE_FORMATS:
1443                         params = compat_urllib_parse_urlencode({
1444                             'lang': original_lang,
1445                             'tlang': sub_lang,
1446                             'fmt': ext,
1447                             'ts': timestamp,
1448                             'kind': caption_kind,
1449                         })
1450                         sub_formats.append({
1451                             'url': caption_url + '&' + params,
1452                             'ext': ext,
1453                         })
1454                     sub_lang_list[sub_lang] = sub_formats
1455                 return sub_lang_list
1456
1457             def make_captions(sub_url, sub_langs):
1458                 parsed_sub_url = compat_urllib_parse_urlparse(sub_url)
1459                 caption_qs = compat_parse_qs(parsed_sub_url.query)
1460                 captions = {}
1461                 for sub_lang in sub_langs:
1462                     sub_formats = []
1463                     for ext in self._SUBTITLE_FORMATS:
1464                         caption_qs.update({
1465                             'tlang': [sub_lang],
1466                             'fmt': [ext],
1467                         })
1468                         sub_url = compat_urlparse.urlunparse(parsed_sub_url._replace(
1469                             query=compat_urllib_parse_urlencode(caption_qs, True)))
1470                         sub_formats.append({
1471                             'url': sub_url,
1472                             'ext': ext,
1473                         })
1474                     captions[sub_lang] = sub_formats
1475                 return captions
1476
1477             # New captions format as of 22.06.2017
1478             player_response = args.get('player_response')
1479             if player_response and isinstance(player_response, compat_str):
1480                 player_response = self._parse_json(
1481                     player_response, video_id, fatal=False)
1482                 if player_response:
1483                     renderer = player_response['captions']['playerCaptionsTracklistRenderer']
1484                     caption_tracks = renderer['captionTracks']
1485                     for caption_track in caption_tracks:
1486                         if 'kind' not in caption_track:
1487                             # not an automatic transcription
1488                             continue
1489                         base_url = caption_track['baseUrl']
1490                         sub_lang_list = []
1491                         for lang in renderer['translationLanguages']:
1492                             lang_code = lang.get('languageCode')
1493                             if lang_code:
1494                                 sub_lang_list.append(lang_code)
1495                         return make_captions(base_url, sub_lang_list)
1496
1497                     self._downloader.report_warning("Couldn't find automatic captions for %s" % video_id)
1498                     return {}
1499             # Some videos don't provide ttsurl but rather caption_tracks and
1500             # caption_translation_languages (e.g. 20LmZk1hakA)
1501             # Does not used anymore as of 22.06.2017
1502             caption_tracks = args['caption_tracks']
1503             caption_translation_languages = args['caption_translation_languages']
1504             caption_url = compat_parse_qs(caption_tracks.split(',')[0])['u'][0]
1505             sub_lang_list = []
1506             for lang in caption_translation_languages.split(','):
1507                 lang_qs = compat_parse_qs(compat_urllib_parse_unquote_plus(lang))
1508                 sub_lang = lang_qs.get('lc', [None])[0]
1509                 if sub_lang:
1510                     sub_lang_list.append(sub_lang)
1511             return make_captions(caption_url, sub_lang_list)
1512         # An extractor error can be raise by the download process if there are
1513         # no automatic captions but there are subtitles
1514         except (KeyError, IndexError, ExtractorError):
1515             self._downloader.report_warning(err_msg)
1516             return {}
1517
1518     def _mark_watched(self, video_id, video_info, player_response):
1519         playback_url = url_or_none(try_get(
1520             player_response,
1521             lambda x: x['playbackTracking']['videostatsPlaybackUrl']['baseUrl']) or try_get(
1522             video_info, lambda x: x['videostats_playback_base_url'][0]))
1523         if not playback_url:
1524             return
1525         parsed_playback_url = compat_urlparse.urlparse(playback_url)
1526         qs = compat_urlparse.parse_qs(parsed_playback_url.query)
1527
1528         # cpn generation algorithm is reverse engineered from base.js.
1529         # In fact it works even with dummy cpn.
1530         CPN_ALPHABET = 'abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789-_'
1531         cpn = ''.join((CPN_ALPHABET[random.randint(0, 256) & 63] for _ in range(0, 16)))
1532
1533         qs.update({
1534             'ver': ['2'],
1535             'cpn': [cpn],
1536         })
1537         playback_url = compat_urlparse.urlunparse(
1538             parsed_playback_url._replace(query=compat_urllib_parse_urlencode(qs, True)))
1539
1540         self._download_webpage(
1541             playback_url, video_id, 'Marking watched',
1542             'Unable to mark watched', fatal=False)
1543
1544     @staticmethod
1545     def _extract_urls(webpage):
1546         # Embedded YouTube player
1547         entries = [
1548             unescapeHTML(mobj.group('url'))
1549             for mobj in re.finditer(r'''(?x)
1550             (?:
1551                 <iframe[^>]+?src=|
1552                 data-video-url=|
1553                 <embed[^>]+?src=|
1554                 embedSWF\(?:\s*|
1555                 <object[^>]+data=|
1556                 new\s+SWFObject\(
1557             )
1558             (["\'])
1559                 (?P<url>(?:https?:)?//(?:www\.)?youtube(?:-nocookie)?\.com/
1560                 (?:embed|v|p)/[0-9A-Za-z_-]{11}.*?)
1561             \1''', webpage)]
1562
1563         # lazyYT YouTube embed
1564         entries.extend(list(map(
1565             unescapeHTML,
1566             re.findall(r'class="lazyYT" data-youtube-id="([^"]+)"', webpage))))
1567
1568         # Wordpress "YouTube Video Importer" plugin
1569         matches = re.findall(r'''(?x)<div[^>]+
1570             class=(?P<q1>[\'"])[^\'"]*\byvii_single_video_player\b[^\'"]*(?P=q1)[^>]+
1571             data-video_id=(?P<q2>[\'"])([^\'"]+)(?P=q2)''', webpage)
1572         entries.extend(m[-1] for m in matches)
1573
1574         return entries
1575
1576     @staticmethod
1577     def _extract_url(webpage):
1578         urls = YoutubeIE._extract_urls(webpage)
1579         return urls[0] if urls else None
1580
1581     @classmethod
1582     def extract_id(cls, url):
1583         mobj = re.match(cls._VALID_URL, url, re.VERBOSE)
1584         if mobj is None:
1585             raise ExtractorError('Invalid URL: %s' % url)
1586         video_id = mobj.group(2)
1587         return video_id
1588
1589     def _extract_chapters_from_json(self, webpage, video_id, duration):
1590         if not webpage:
1591             return
1592         initial_data = self._parse_json(
1593             self._search_regex(
1594                 r'window\["ytInitialData"\] = (.+);\n', webpage,
1595                 'player args', default='{}'),
1596             video_id, fatal=False)
1597         if not initial_data or not isinstance(initial_data, dict):
1598             return
1599         chapters_list = try_get(
1600             initial_data,
1601             lambda x: x['playerOverlays']
1602                        ['playerOverlayRenderer']
1603                        ['decoratedPlayerBarRenderer']
1604                        ['decoratedPlayerBarRenderer']
1605                        ['playerBar']
1606                        ['chapteredPlayerBarRenderer']
1607                        ['chapters'],
1608             list)
1609         if not chapters_list:
1610             return
1611
1612         def chapter_time(chapter):
1613             return float_or_none(
1614                 try_get(
1615                     chapter,
1616                     lambda x: x['chapterRenderer']['timeRangeStartMillis'],
1617                     int),
1618                 scale=1000)
1619         chapters = []
1620         for next_num, chapter in enumerate(chapters_list, start=1):
1621             start_time = chapter_time(chapter)
1622             if start_time is None:
1623                 continue
1624             end_time = (chapter_time(chapters_list[next_num])
1625                         if next_num < len(chapters_list) else duration)
1626             if end_time is None:
1627                 continue
1628             title = try_get(
1629                 chapter, lambda x: x['chapterRenderer']['title']['simpleText'],
1630                 compat_str)
1631             chapters.append({
1632                 'start_time': start_time,
1633                 'end_time': end_time,
1634                 'title': title,
1635             })
1636         return chapters
1637
1638     @staticmethod
1639     def _extract_chapters_from_description(description, duration):
1640         if not description:
1641             return None
1642         chapter_lines = re.findall(
1643             r'(?:^|<br\s*/>)([^<]*<a[^>]+onclick=["\']yt\.www\.watch\.player\.seekTo[^>]+>(\d{1,2}:\d{1,2}(?::\d{1,2})?)</a>[^>]*)(?=$|<br\s*/>)',
1644             description)
1645         if not chapter_lines:
1646             return None
1647         chapters = []
1648         for next_num, (chapter_line, time_point) in enumerate(
1649                 chapter_lines, start=1):
1650             start_time = parse_duration(time_point)
1651             if start_time is None:
1652                 continue
1653             if start_time > duration:
1654                 break
1655             end_time = (duration if next_num == len(chapter_lines)
1656                         else parse_duration(chapter_lines[next_num][1]))
1657             if end_time is None:
1658                 continue
1659             if end_time > duration:
1660                 end_time = duration
1661             if start_time > end_time:
1662                 break
1663             chapter_title = re.sub(
1664                 r'<a[^>]+>[^<]+</a>', '', chapter_line).strip(' \t-')
1665             chapter_title = re.sub(r'\s+', ' ', chapter_title)
1666             chapters.append({
1667                 'start_time': start_time,
1668                 'end_time': end_time,
1669                 'title': chapter_title,
1670             })
1671         return chapters
1672
1673     def _extract_chapters(self, webpage, description, video_id, duration):
1674         return (self._extract_chapters_from_json(webpage, video_id, duration)
1675                 or self._extract_chapters_from_description(description, duration))
1676
1677     def _real_extract(self, url):
1678         url, smuggled_data = unsmuggle_url(url, {})
1679
1680         proto = (
1681             'http' if self._downloader.params.get('prefer_insecure', False)
1682             else 'https')
1683
1684         start_time = None
1685         end_time = None
1686         parsed_url = compat_urllib_parse_urlparse(url)
1687         for component in [parsed_url.fragment, parsed_url.query]:
1688             query = compat_parse_qs(component)
1689             if start_time is None and 't' in query:
1690                 start_time = parse_duration(query['t'][0])
1691             if start_time is None and 'start' in query:
1692                 start_time = parse_duration(query['start'][0])
1693             if end_time is None and 'end' in query:
1694                 end_time = parse_duration(query['end'][0])
1695
1696         # Extract original video URL from URL with redirection, like age verification, using next_url parameter
1697         mobj = re.search(self._NEXT_URL_RE, url)
1698         if mobj:
1699             url = proto + '://www.youtube.com/' + compat_urllib_parse_unquote(mobj.group(1)).lstrip('/')
1700         video_id = self.extract_id(url)
1701
1702         # Get video webpage
1703         url = proto + '://www.youtube.com/watch?v=%s&gl=US&hl=en&has_verified=1&bpctr=9999999999' % video_id
1704         video_webpage, urlh = self._download_webpage_handle(url, video_id)
1705
1706         qs = compat_parse_qs(compat_urllib_parse_urlparse(urlh.geturl()).query)
1707         video_id = qs.get('v', [None])[0] or video_id
1708
1709         # Attempt to extract SWF player URL
1710         mobj = re.search(r'swfConfig.*?"(https?:\\/\\/.*?watch.*?-.*?\.swf)"', video_webpage)
1711         if mobj is not None:
1712             player_url = re.sub(r'\\(.)', r'\1', mobj.group(1))
1713         else:
1714             player_url = None
1715
1716         dash_mpds = []
1717
1718         def add_dash_mpd(video_info):
1719             dash_mpd = video_info.get('dashmpd')
1720             if dash_mpd and dash_mpd[0] not in dash_mpds:
1721                 dash_mpds.append(dash_mpd[0])
1722
1723         def add_dash_mpd_pr(pl_response):
1724             dash_mpd = url_or_none(try_get(
1725                 pl_response, lambda x: x['streamingData']['dashManifestUrl'],
1726                 compat_str))
1727             if dash_mpd and dash_mpd not in dash_mpds:
1728                 dash_mpds.append(dash_mpd)
1729
1730         is_live = None
1731         view_count = None
1732
1733         def extract_view_count(v_info):
1734             return int_or_none(try_get(v_info, lambda x: x['view_count'][0]))
1735
1736         def extract_player_response(player_response, video_id):
1737             pl_response = str_or_none(player_response)
1738             if not pl_response:
1739                 return
1740             pl_response = self._parse_json(pl_response, video_id, fatal=False)
1741             if isinstance(pl_response, dict):
1742                 add_dash_mpd_pr(pl_response)
1743                 return pl_response
1744
1745         def extract_embedded_config(embed_webpage, video_id):
1746             embedded_config = self._search_regex(
1747                 r'setConfig\(({.*})\);',
1748                 embed_webpage, 'ytInitialData', default=None)
1749             if embedded_config:
1750                 return embedded_config
1751
1752         player_response = {}
1753
1754         # Get video info
1755         video_info = {}
1756         embed_webpage = None
1757         if (self._og_search_property('restrictions:age', video_webpage, default=None) == '18+'
1758                 or re.search(r'player-age-gate-content">', video_webpage) is not None):
1759             cookie_keys = self._get_cookies('https://www.youtube.com').keys()
1760             age_gate = True
1761             # We simulate the access to the video from www.youtube.com/v/{video_id}
1762             # this can be viewed without login into Youtube
1763             url = proto + '://www.youtube.com/embed/%s' % video_id
1764             embed_webpage = self._download_webpage(url, video_id, 'Downloading embed webpage')
1765             ext = extract_embedded_config(embed_webpage, video_id)
1766             # playabilityStatus = re.search(r'{\\\"status\\\":\\\"(?P<playabilityStatus>[^\"]+)\\\"', ext)
1767             playable_in_embed = re.search(r'{\\\"playableInEmbed\\\":(?P<playableinEmbed>[^\,]+)', ext)
1768             if not playable_in_embed:
1769                 self.to_screen('Could not determine whether playabale in embed for video %s' % video_id)
1770                 playable_in_embed = ''
1771             else:
1772                 playable_in_embed = playable_in_embed.group('playableinEmbed')
1773             # check if video is only playable on youtube in other words not playable in embed - if so it requires auth (cookies)
1774             # if re.search(r'player-unavailable">', embed_webpage) is not None:
1775             if playable_in_embed == 'false':
1776                 '''
1777                 # TODO apply this patch when Support for Python 2.6(!) and above drops
1778                 if ({'VISITOR_INFO1_LIVE', 'HSID', 'SSID', 'SID'} <= cookie_keys
1779                         or {'VISITOR_INFO1_LIVE', '__Secure-3PSID', 'LOGIN_INFO'} <= cookie_keys):
1780                 '''
1781                 if (set(('VISITOR_INFO1_LIVE', 'HSID', 'SSID', 'SID')) <= set(cookie_keys)
1782                         or set(('VISITOR_INFO1_LIVE', '__Secure-3PSID', 'LOGIN_INFO')) <= set(cookie_keys)):
1783                     age_gate = False
1784                     # Try looking directly into the video webpage
1785                     ytplayer_config = self._get_ytplayer_config(video_id, video_webpage)
1786                     if ytplayer_config:
1787                         args = ytplayer_config['args']
1788                         if args.get('url_encoded_fmt_stream_map') or args.get('hlsvp'):
1789                             # Convert to the same format returned by compat_parse_qs
1790                             video_info = dict((k, [v]) for k, v in args.items())
1791                             add_dash_mpd(video_info)
1792                         # Rental video is not rented but preview is available (e.g.
1793                         # https://www.youtube.com/watch?v=yYr8q0y5Jfg,
1794                         # https://github.com/ytdl-org/youtube-dl/issues/10532)
1795                         if not video_info and args.get('ypc_vid'):
1796                             return self.url_result(
1797                                 args['ypc_vid'], YoutubeIE.ie_key(), video_id=args['ypc_vid'])
1798                         if args.get('livestream') == '1' or args.get('live_playback') == 1:
1799                             is_live = True
1800                         if not player_response:
1801                             player_response = extract_player_response(args.get('player_response'), video_id)
1802                     if not video_info or self._downloader.params.get('youtube_include_dash_manifest', True):
1803                         add_dash_mpd_pr(player_response)
1804                 else:
1805                     raise ExtractorError('Video is age restricted and only playable on Youtube. Requires cookies!', expected=True)
1806             else:
1807                 data = compat_urllib_parse_urlencode({
1808                     'video_id': video_id,
1809                     'eurl': 'https://youtube.googleapis.com/v/' + video_id,
1810                     'sts': self._search_regex(
1811                         r'"sts"\s*:\s*(\d+)', embed_webpage, 'sts', default=''),
1812                 })
1813                 video_info_url = proto + '://www.youtube.com/get_video_info?' + data
1814                 try:
1815                     video_info_webpage = self._download_webpage(
1816                         video_info_url, video_id,
1817                         note='Refetching age-gated info webpage',
1818                         errnote='unable to download video info webpage')
1819                 except ExtractorError:
1820                     video_info_webpage = None
1821                 if video_info_webpage:
1822                     video_info = compat_parse_qs(video_info_webpage)
1823                     pl_response = video_info.get('player_response', [None])[0]
1824                     player_response = extract_player_response(pl_response, video_id)
1825                     add_dash_mpd(video_info)
1826                     view_count = extract_view_count(video_info)
1827         else:
1828             age_gate = False
1829             # Try looking directly into the video webpage
1830             ytplayer_config = self._get_ytplayer_config(video_id, video_webpage)
1831             if ytplayer_config:
1832                 args = ytplayer_config['args']
1833                 if args.get('url_encoded_fmt_stream_map') or args.get('hlsvp'):
1834                     # Convert to the same format returned by compat_parse_qs
1835                     video_info = dict((k, [v]) for k, v in args.items())
1836                     add_dash_mpd(video_info)
1837                 # Rental video is not rented but preview is available (e.g.
1838                 # https://www.youtube.com/watch?v=yYr8q0y5Jfg,
1839                 # https://github.com/ytdl-org/youtube-dl/issues/10532)
1840                 if not video_info and args.get('ypc_vid'):
1841                     return self.url_result(
1842                         args['ypc_vid'], YoutubeIE.ie_key(), video_id=args['ypc_vid'])
1843                 if args.get('livestream') == '1' or args.get('live_playback') == 1:
1844                     is_live = True
1845                 if not player_response:
1846                     player_response = extract_player_response(args.get('player_response'), video_id)
1847             if not video_info or self._downloader.params.get('youtube_include_dash_manifest', True):
1848                 add_dash_mpd_pr(player_response)
1849
1850         def extract_unavailable_message():
1851             messages = []
1852             for tag, kind in (('h1', 'message'), ('div', 'submessage')):
1853                 msg = self._html_search_regex(
1854                     r'(?s)<{tag}[^>]+id=["\']unavailable-{kind}["\'][^>]*>(.+?)</{tag}>'.format(tag=tag, kind=kind),
1855                     video_webpage, 'unavailable %s' % kind, default=None)
1856                 if msg:
1857                     messages.append(msg)
1858             if messages:
1859                 return '\n'.join(messages)
1860
1861         if not video_info and not player_response:
1862             unavailable_message = extract_unavailable_message()
1863             if not unavailable_message:
1864                 unavailable_message = 'Unable to extract video data'
1865             raise ExtractorError(
1866                 'YouTube said: %s' % unavailable_message, expected=True, video_id=video_id)
1867
1868         if not isinstance(video_info, dict):
1869             video_info = {}
1870
1871         video_details = try_get(
1872             player_response, lambda x: x['videoDetails'], dict) or {}
1873
1874         microformat = try_get(
1875             player_response, lambda x: x['microformat']['playerMicroformatRenderer'], dict) or {}
1876
1877         video_title = video_info.get('title', [None])[0] or video_details.get('title')
1878         if not video_title:
1879             self._downloader.report_warning('Unable to extract video title')
1880             video_title = '_'
1881
1882         description_original = video_description = get_element_by_id("eow-description", video_webpage)
1883         if video_description:
1884
1885             def replace_url(m):
1886                 redir_url = compat_urlparse.urljoin(url, m.group(1))
1887                 parsed_redir_url = compat_urllib_parse_urlparse(redir_url)
1888                 if re.search(r'^(?:www\.)?(?:youtube(?:-nocookie)?\.com|youtu\.be)$', parsed_redir_url.netloc) and parsed_redir_url.path == '/redirect':
1889                     qs = compat_parse_qs(parsed_redir_url.query)
1890                     q = qs.get('q')
1891                     if q and q[0]:
1892                         return q[0]
1893                 return redir_url
1894
1895             description_original = video_description = re.sub(r'''(?x)
1896                 <a\s+
1897                     (?:[a-zA-Z-]+="[^"]*"\s+)*?
1898                     (?:title|href)="([^"]+)"\s+
1899                     (?:[a-zA-Z-]+="[^"]*"\s+)*?
1900                     class="[^"]*"[^>]*>
1901                 [^<]+\.{3}\s*
1902                 </a>
1903             ''', replace_url, video_description)
1904             video_description = clean_html(video_description)
1905         else:
1906             video_description = video_details.get('shortDescription')
1907             if video_description is None:
1908                 video_description = self._html_search_meta('description', video_webpage)
1909
1910         if not smuggled_data.get('force_singlefeed', False):
1911             if not self._downloader.params.get('noplaylist'):
1912                 multifeed_metadata_list = try_get(
1913                     player_response,
1914                     lambda x: x['multicamera']['playerLegacyMulticameraRenderer']['metadataList'],
1915                     compat_str) or try_get(
1916                     video_info, lambda x: x['multifeed_metadata_list'][0], compat_str)
1917                 if multifeed_metadata_list:
1918                     entries = []
1919                     feed_ids = []
1920                     for feed in multifeed_metadata_list.split(','):
1921                         # Unquote should take place before split on comma (,) since textual
1922                         # fields may contain comma as well (see
1923                         # https://github.com/ytdl-org/youtube-dl/issues/8536)
1924                         feed_data = compat_parse_qs(compat_urllib_parse_unquote_plus(feed))
1925
1926                         def feed_entry(name):
1927                             return try_get(feed_data, lambda x: x[name][0], compat_str)
1928
1929                         feed_id = feed_entry('id')
1930                         if not feed_id:
1931                             continue
1932                         feed_title = feed_entry('title')
1933                         title = video_title
1934                         if feed_title:
1935                             title += ' (%s)' % feed_title
1936                         entries.append({
1937                             '_type': 'url_transparent',
1938                             'ie_key': 'Youtube',
1939                             'url': smuggle_url(
1940                                 '%s://www.youtube.com/watch?v=%s' % (proto, feed_data['id'][0]),
1941                                 {'force_singlefeed': True}),
1942                             'title': title,
1943                         })
1944                         feed_ids.append(feed_id)
1945                     self.to_screen(
1946                         'Downloading multifeed video (%s) - add --no-playlist to just download video %s'
1947                         % (', '.join(feed_ids), video_id))
1948                     return self.playlist_result(entries, video_id, video_title, video_description)
1949             else:
1950                 self.to_screen('Downloading just video %s because of --no-playlist' % video_id)
1951
1952         if view_count is None:
1953             view_count = extract_view_count(video_info)
1954         if view_count is None and video_details:
1955             view_count = int_or_none(video_details.get('viewCount'))
1956         if view_count is None and microformat:
1957             view_count = int_or_none(microformat.get('viewCount'))
1958
1959         if is_live is None:
1960             is_live = bool_or_none(video_details.get('isLive'))
1961
1962         has_live_chat_replay = False
1963         if not is_live:
1964             yt_initial_data = self._get_yt_initial_data(video_id, video_webpage)
1965             try:
1966                 yt_initial_data['contents']['twoColumnWatchNextResults']['conversationBar']['liveChatRenderer']['continuations'][0]['reloadContinuationData']['continuation']
1967                 has_live_chat_replay = True
1968             except (KeyError, IndexError, TypeError):
1969                 pass
1970
1971         # Check for "rental" videos
1972         if 'ypc_video_rental_bar_text' in video_info and 'author' not in video_info:
1973             raise ExtractorError('"rental" videos not supported. See https://github.com/ytdl-org/youtube-dl/issues/359 for more information.', expected=True)
1974
1975         def _extract_filesize(media_url):
1976             return int_or_none(self._search_regex(
1977                 r'\bclen[=/](\d+)', media_url, 'filesize', default=None))
1978
1979         streaming_formats = try_get(player_response, lambda x: x['streamingData']['formats'], list) or []
1980         streaming_formats.extend(try_get(player_response, lambda x: x['streamingData']['adaptiveFormats'], list) or [])
1981
1982         if 'conn' in video_info and video_info['conn'][0].startswith('rtmp'):
1983             self.report_rtmp_download()
1984             formats = [{
1985                 'format_id': '_rtmp',
1986                 'protocol': 'rtmp',
1987                 'url': video_info['conn'][0],
1988                 'player_url': player_url,
1989             }]
1990         elif not is_live and (streaming_formats or len(video_info.get('url_encoded_fmt_stream_map', [''])[0]) >= 1 or len(video_info.get('adaptive_fmts', [''])[0]) >= 1):
1991             encoded_url_map = video_info.get('url_encoded_fmt_stream_map', [''])[0] + ',' + video_info.get('adaptive_fmts', [''])[0]
1992             if 'rtmpe%3Dyes' in encoded_url_map:
1993                 raise ExtractorError('rtmpe downloads are not supported, see https://github.com/ytdl-org/youtube-dl/issues/343 for more information.', expected=True)
1994             formats = []
1995             formats_spec = {}
1996             fmt_list = video_info.get('fmt_list', [''])[0]
1997             if fmt_list:
1998                 for fmt in fmt_list.split(','):
1999                     spec = fmt.split('/')
2000                     if len(spec) > 1:
2001                         width_height = spec[1].split('x')
2002                         if len(width_height) == 2:
2003                             formats_spec[spec[0]] = {
2004                                 'resolution': spec[1],
2005                                 'width': int_or_none(width_height[0]),
2006                                 'height': int_or_none(width_height[1]),
2007                             }
2008             for fmt in streaming_formats:
2009                 itag = str_or_none(fmt.get('itag'))
2010                 if not itag:
2011                     continue
2012                 quality = fmt.get('quality')
2013                 quality_label = fmt.get('qualityLabel') or quality
2014                 formats_spec[itag] = {
2015                     'asr': int_or_none(fmt.get('audioSampleRate')),
2016                     'filesize': int_or_none(fmt.get('contentLength')),
2017                     'format_note': quality_label,
2018                     'fps': int_or_none(fmt.get('fps')),
2019                     'height': int_or_none(fmt.get('height')),
2020                     # bitrate for itag 43 is always 2147483647
2021                     'tbr': float_or_none(fmt.get('averageBitrate') or fmt.get('bitrate'), 1000) if itag != '43' else None,
2022                     'width': int_or_none(fmt.get('width')),
2023                 }
2024
2025             for fmt in streaming_formats:
2026                 if fmt.get('drmFamilies') or fmt.get('drm_families'):
2027                     continue
2028                 url = url_or_none(fmt.get('url'))
2029
2030                 if not url:
2031                     cipher = fmt.get('cipher') or fmt.get('signatureCipher')
2032                     if not cipher:
2033                         continue
2034                     url_data = compat_parse_qs(cipher)
2035                     url = url_or_none(try_get(url_data, lambda x: x['url'][0], compat_str))
2036                     if not url:
2037                         continue
2038                 else:
2039                     cipher = None
2040                     url_data = compat_parse_qs(compat_urllib_parse_urlparse(url).query)
2041
2042                 stream_type = int_or_none(try_get(url_data, lambda x: x['stream_type'][0]))
2043                 # Unsupported FORMAT_STREAM_TYPE_OTF
2044                 if stream_type == 3:
2045                     continue
2046
2047                 format_id = fmt.get('itag') or url_data['itag'][0]
2048                 if not format_id:
2049                     continue
2050                 format_id = compat_str(format_id)
2051
2052                 if cipher:
2053                     if 's' in url_data or self._downloader.params.get('youtube_include_dash_manifest', True):
2054                         ASSETS_RE = r'(?:"assets":.+?"js":\s*("[^"]+"))|(?:"jsUrl":\s*("[^"]+"))'
2055                         jsplayer_url_json = self._search_regex(
2056                             ASSETS_RE,
2057                             embed_webpage if age_gate else video_webpage,
2058                             'JS player URL (1)', default=None)
2059                         if not jsplayer_url_json and not age_gate:
2060                             # We need the embed website after all
2061                             if embed_webpage is None:
2062                                 embed_url = proto + '://www.youtube.com/embed/%s' % video_id
2063                                 embed_webpage = self._download_webpage(
2064                                     embed_url, video_id, 'Downloading embed webpage')
2065                             jsplayer_url_json = self._search_regex(
2066                                 ASSETS_RE, embed_webpage, 'JS player URL')
2067
2068                         player_url = json.loads(jsplayer_url_json)
2069                         if player_url is None:
2070                             player_url_json = self._search_regex(
2071                                 r'ytplayer\.config.*?"url"\s*:\s*("[^"]+")',
2072                                 video_webpage, 'age gate player URL')
2073                             player_url = json.loads(player_url_json)
2074
2075                     if 'sig' in url_data:
2076                         url += '&signature=' + url_data['sig'][0]
2077                     elif 's' in url_data:
2078                         encrypted_sig = url_data['s'][0]
2079
2080                         if self._downloader.params.get('verbose'):
2081                             if player_url is None:
2082                                 player_desc = 'unknown'
2083                             else:
2084                                 player_type, player_version = self._extract_player_info(player_url)
2085                                 player_desc = '%s player %s' % ('flash' if player_type == 'swf' else 'html5', player_version)
2086                             parts_sizes = self._signature_cache_id(encrypted_sig)
2087                             self.to_screen('{%s} signature length %s, %s' %
2088                                            (format_id, parts_sizes, player_desc))
2089
2090                         signature = self._decrypt_signature(
2091                             encrypted_sig, video_id, player_url, age_gate)
2092                         sp = try_get(url_data, lambda x: x['sp'][0], compat_str) or 'signature'
2093                         url += '&%s=%s' % (sp, signature)
2094                 if 'ratebypass' not in url:
2095                     url += '&ratebypass=yes'
2096
2097                 dct = {
2098                     'format_id': format_id,
2099                     'url': url,
2100                     'player_url': player_url,
2101                 }
2102                 if format_id in self._formats:
2103                     dct.update(self._formats[format_id])
2104                 if format_id in formats_spec:
2105                     dct.update(formats_spec[format_id])
2106
2107                 # Some itags are not included in DASH manifest thus corresponding formats will
2108                 # lack metadata (see https://github.com/ytdl-org/youtube-dl/pull/5993).
2109                 # Trying to extract metadata from url_encoded_fmt_stream_map entry.
2110                 mobj = re.search(r'^(?P<width>\d+)[xX](?P<height>\d+)$', url_data.get('size', [''])[0])
2111                 width, height = (int(mobj.group('width')), int(mobj.group('height'))) if mobj else (None, None)
2112
2113                 if width is None:
2114                     width = int_or_none(fmt.get('width'))
2115                 if height is None:
2116                     height = int_or_none(fmt.get('height'))
2117
2118                 filesize = int_or_none(url_data.get(
2119                     'clen', [None])[0]) or _extract_filesize(url)
2120
2121                 quality = url_data.get('quality', [None])[0] or fmt.get('quality')
2122                 quality_label = url_data.get('quality_label', [None])[0] or fmt.get('qualityLabel')
2123
2124                 tbr = (float_or_none(url_data.get('bitrate', [None])[0], 1000)
2125                        or float_or_none(fmt.get('bitrate'), 1000)) if format_id != '43' else None
2126                 fps = int_or_none(url_data.get('fps', [None])[0]) or int_or_none(fmt.get('fps'))
2127
2128                 more_fields = {
2129                     'filesize': filesize,
2130                     'tbr': tbr,
2131                     'width': width,
2132                     'height': height,
2133                     'fps': fps,
2134                     'format_note': quality_label or quality,
2135                 }
2136                 for key, value in more_fields.items():
2137                     if value:
2138                         dct[key] = value
2139                 type_ = url_data.get('type', [None])[0] or fmt.get('mimeType')
2140                 if type_:
2141                     type_split = type_.split(';')
2142                     kind_ext = type_split[0].split('/')
2143                     if len(kind_ext) == 2:
2144                         kind, _ = kind_ext
2145                         dct['ext'] = mimetype2ext(type_split[0])
2146                         if kind in ('audio', 'video'):
2147                             codecs = None
2148                             for mobj in re.finditer(
2149                                     r'(?P<key>[a-zA-Z_-]+)=(?P<quote>["\']?)(?P<val>.+?)(?P=quote)(?:;|$)', type_):
2150                                 if mobj.group('key') == 'codecs':
2151                                     codecs = mobj.group('val')
2152                                     break
2153                             if codecs:
2154                                 dct.update(parse_codecs(codecs))
2155                 if dct.get('acodec') == 'none' or dct.get('vcodec') == 'none':
2156                     dct['downloader_options'] = {
2157                         # Youtube throttles chunks >~10M
2158                         'http_chunk_size': 10485760,
2159                     }
2160                 formats.append(dct)
2161         else:
2162             manifest_url = (
2163                 url_or_none(try_get(
2164                     player_response,
2165                     lambda x: x['streamingData']['hlsManifestUrl'],
2166                     compat_str))
2167                 or url_or_none(try_get(
2168                     video_info, lambda x: x['hlsvp'][0], compat_str)))
2169             if manifest_url:
2170                 formats = []
2171                 m3u8_formats = self._extract_m3u8_formats(
2172                     manifest_url, video_id, 'mp4', fatal=False)
2173                 for a_format in m3u8_formats:
2174                     itag = self._search_regex(
2175                         r'/itag/(\d+)/', a_format['url'], 'itag', default=None)
2176                     if itag:
2177                         a_format['format_id'] = itag
2178                         if itag in self._formats:
2179                             dct = self._formats[itag].copy()
2180                             dct.update(a_format)
2181                             a_format = dct
2182                     a_format['player_url'] = player_url
2183                     # Accept-Encoding header causes failures in live streams on Youtube and Youtube Gaming
2184                     a_format.setdefault('http_headers', {})['Youtubedl-no-compression'] = 'True'
2185                     if self._downloader.params.get('youtube_include_hls_manifest', True):
2186                         formats.append(a_format)
2187             else:
2188                 error_message = extract_unavailable_message()
2189                 if not error_message:
2190                     error_message = clean_html(try_get(
2191                         player_response, lambda x: x['playabilityStatus']['reason'],
2192                         compat_str))
2193                 if not error_message:
2194                     error_message = clean_html(
2195                         try_get(video_info, lambda x: x['reason'][0], compat_str))
2196                 if error_message:
2197                     raise ExtractorError(error_message, expected=True)
2198                 raise ExtractorError('no conn, hlsvp, hlsManifestUrl or url_encoded_fmt_stream_map information found in video info')
2199
2200         # uploader
2201         video_uploader = try_get(
2202             video_info, lambda x: x['author'][0],
2203             compat_str) or str_or_none(video_details.get('author'))
2204         if video_uploader:
2205             video_uploader = compat_urllib_parse_unquote_plus(video_uploader)
2206         else:
2207             self._downloader.report_warning('unable to extract uploader name')
2208
2209         # uploader_id
2210         video_uploader_id = None
2211         video_uploader_url = None
2212         mobj = re.search(
2213             r'<link itemprop="url" href="(?P<uploader_url>https?://www\.youtube\.com/(?:user|channel)/(?P<uploader_id>[^"]+))">',
2214             video_webpage)
2215         if mobj is not None:
2216             video_uploader_id = mobj.group('uploader_id')
2217             video_uploader_url = mobj.group('uploader_url')
2218         else:
2219             owner_profile_url = url_or_none(microformat.get('ownerProfileUrl'))
2220             if owner_profile_url:
2221                 video_uploader_id = self._search_regex(
2222                     r'(?:user|channel)/([^/]+)', owner_profile_url, 'uploader id',
2223                     default=None)
2224                 video_uploader_url = owner_profile_url
2225
2226         channel_id = (
2227             str_or_none(video_details.get('channelId'))
2228             or self._html_search_meta(
2229                 'channelId', video_webpage, 'channel id', default=None)
2230             or self._search_regex(
2231                 r'data-channel-external-id=(["\'])(?P<id>(?:(?!\1).)+)\1',
2232                 video_webpage, 'channel id', default=None, group='id'))
2233         channel_url = 'http://www.youtube.com/channel/%s' % channel_id if channel_id else None
2234
2235         thumbnails = []
2236         thumbnails_list = try_get(
2237             video_details, lambda x: x['thumbnail']['thumbnails'], list) or []
2238         for t in thumbnails_list:
2239             if not isinstance(t, dict):
2240                 continue
2241             thumbnail_url = url_or_none(t.get('url'))
2242             if not thumbnail_url:
2243                 continue
2244             thumbnails.append({
2245                 'url': thumbnail_url,
2246                 'width': int_or_none(t.get('width')),
2247                 'height': int_or_none(t.get('height')),
2248             })
2249
2250         if not thumbnails:
2251             video_thumbnail = None
2252             # We try first to get a high quality image:
2253             m_thumb = re.search(r'<span itemprop="thumbnail".*?href="(.*?)">',
2254                                 video_webpage, re.DOTALL)
2255             if m_thumb is not None:
2256                 video_thumbnail = m_thumb.group(1)
2257             thumbnail_url = try_get(video_info, lambda x: x['thumbnail_url'][0], compat_str)
2258             if thumbnail_url:
2259                 video_thumbnail = compat_urllib_parse_unquote_plus(thumbnail_url)
2260             if video_thumbnail:
2261                 thumbnails.append({'url': video_thumbnail})
2262
2263         # upload date
2264         upload_date = self._html_search_meta(
2265             'datePublished', video_webpage, 'upload date', default=None)
2266         if not upload_date:
2267             upload_date = self._search_regex(
2268                 [r'(?s)id="eow-date.*?>(.*?)</span>',
2269                  r'(?:id="watch-uploader-info".*?>.*?|["\']simpleText["\']\s*:\s*["\'])(?:Published|Uploaded|Streamed live|Started) on (.+?)[<"\']'],
2270                 video_webpage, 'upload date', default=None)
2271         if not upload_date:
2272             upload_date = microformat.get('publishDate') or microformat.get('uploadDate')
2273         upload_date = unified_strdate(upload_date)
2274
2275         video_license = self._html_search_regex(
2276             r'<h4[^>]+class="title"[^>]*>\s*License\s*</h4>\s*<ul[^>]*>\s*<li>(.+?)</li',
2277             video_webpage, 'license', default=None)
2278
2279         m_music = re.search(
2280             r'''(?x)
2281                 <h4[^>]+class="title"[^>]*>\s*Music\s*</h4>\s*
2282                 <ul[^>]*>\s*
2283                 <li>(?P<title>.+?)
2284                 by (?P<creator>.+?)
2285                 (?:
2286                     \(.+?\)|
2287                     <a[^>]*
2288                         (?:
2289                             \bhref=["\']/red[^>]*>|             # drop possible
2290                             >\s*Listen ad-free with YouTube Red # YouTube Red ad
2291                         )
2292                     .*?
2293                 )?</li
2294             ''',
2295             video_webpage)
2296         if m_music:
2297             video_alt_title = remove_quotes(unescapeHTML(m_music.group('title')))
2298             video_creator = clean_html(m_music.group('creator'))
2299         else:
2300             video_alt_title = video_creator = None
2301
2302         def extract_meta(field):
2303             return self._html_search_regex(
2304                 r'<h4[^>]+class="title"[^>]*>\s*%s\s*</h4>\s*<ul[^>]*>\s*<li>(.+?)</li>\s*' % field,
2305                 video_webpage, field, default=None)
2306
2307         track = extract_meta('Song')
2308         artist = extract_meta('Artist')
2309         album = extract_meta('Album')
2310
2311         # Youtube Music Auto-generated description
2312         release_date = release_year = None
2313         if video_description:
2314             mobj = re.search(r'(?s)Provided to YouTube by [^\n]+\n+(?P<track>[^·]+)·(?P<artist>[^\n]+)\n+(?P<album>[^\n]+)(?:.+?℗\s*(?P<release_year>\d{4})(?!\d))?(?:.+?Released on\s*:\s*(?P<release_date>\d{4}-\d{2}-\d{2}))?(.+?\nArtist\s*:\s*(?P<clean_artist>[^\n]+))?', video_description)
2315             if mobj:
2316                 if not track:
2317                     track = mobj.group('track').strip()
2318                 if not artist:
2319                     artist = mobj.group('clean_artist') or ', '.join(a.strip() for a in mobj.group('artist').split('·'))
2320                 if not album:
2321                     album = mobj.group('album'.strip())
2322                 release_year = mobj.group('release_year')
2323                 release_date = mobj.group('release_date')
2324                 if release_date:
2325                     release_date = release_date.replace('-', '')
2326                     if not release_year:
2327                         release_year = int(release_date[:4])
2328                 if release_year:
2329                     release_year = int(release_year)
2330
2331         m_episode = re.search(
2332             r'<div[^>]+id="watch7-headline"[^>]*>\s*<span[^>]*>.*?>(?P<series>[^<]+)</a></b>\s*S(?P<season>\d+)\s*•\s*E(?P<episode>\d+)</span>',
2333             video_webpage)
2334         if m_episode:
2335             series = unescapeHTML(m_episode.group('series'))
2336             season_number = int(m_episode.group('season'))
2337             episode_number = int(m_episode.group('episode'))
2338         else:
2339             series = season_number = episode_number = None
2340
2341         m_cat_container = self._search_regex(
2342             r'(?s)<h4[^>]*>\s*Category\s*</h4>\s*<ul[^>]*>(.*?)</ul>',
2343             video_webpage, 'categories', default=None)
2344         category = None
2345         if m_cat_container:
2346             category = self._html_search_regex(
2347                 r'(?s)<a[^<]+>(.*?)</a>', m_cat_container, 'category',
2348                 default=None)
2349         if not category:
2350             category = try_get(
2351                 microformat, lambda x: x['category'], compat_str)
2352         video_categories = None if category is None else [category]
2353
2354         video_tags = [
2355             unescapeHTML(m.group('content'))
2356             for m in re.finditer(self._meta_regex('og:video:tag'), video_webpage)]
2357         if not video_tags:
2358             video_tags = try_get(video_details, lambda x: x['keywords'], list)
2359
2360         def _extract_count(count_name):
2361             return str_to_int(self._search_regex(
2362                 r'"accessibilityData":\{"label":"([\d,\w]+) %ss"\}'
2363                 % re.escape(count_name),
2364                 video_webpage, count_name, default=None))
2365
2366         like_count = _extract_count('like')
2367         dislike_count = _extract_count('dislike')
2368
2369         if view_count is None:
2370             view_count = str_to_int(self._search_regex(
2371                 r'<[^>]+class=["\']watch-view-count[^>]+>\s*([\d,\s]+)', video_webpage,
2372                 'view count', default=None))
2373
2374         average_rating = (
2375             float_or_none(video_details.get('averageRating'))
2376             or try_get(video_info, lambda x: float_or_none(x['avg_rating'][0])))
2377
2378         # subtitles
2379         video_subtitles = self.extract_subtitles(
2380             video_id, video_webpage, has_live_chat_replay)
2381         automatic_captions = self.extract_automatic_captions(video_id, video_webpage)
2382
2383         video_duration = try_get(
2384             video_info, lambda x: int_or_none(x['length_seconds'][0]))
2385         if not video_duration:
2386             video_duration = int_or_none(video_details.get('lengthSeconds'))
2387         if not video_duration:
2388             video_duration = parse_duration(self._html_search_meta(
2389                 'duration', video_webpage, 'video duration'))
2390
2391         # Get Subscriber Count of channel
2392         subscriber_count = parse_count(self._search_regex(
2393             r'"text":"([\d\.]+\w?) subscribers"',
2394             video_webpage,
2395             'subscriber count',
2396             default=None
2397         ))
2398
2399         # annotations
2400         video_annotations = None
2401         if self._downloader.params.get('writeannotations', False):
2402             xsrf_token = self._search_regex(
2403                 r'([\'"])XSRF_TOKEN\1\s*:\s*([\'"])(?P<xsrf_token>[A-Za-z0-9+/=]+)\2',
2404                 video_webpage, 'xsrf token', group='xsrf_token', fatal=False)
2405             invideo_url = try_get(
2406                 player_response, lambda x: x['annotations'][0]['playerAnnotationsUrlsRenderer']['invideoUrl'], compat_str)
2407             if xsrf_token and invideo_url:
2408                 xsrf_field_name = self._search_regex(
2409                     r'([\'"])XSRF_FIELD_NAME\1\s*:\s*([\'"])(?P<xsrf_field_name>\w+)\2',
2410                     video_webpage, 'xsrf field name',
2411                     group='xsrf_field_name', default='session_token')
2412                 video_annotations = self._download_webpage(
2413                     self._proto_relative_url(invideo_url),
2414                     video_id, note='Downloading annotations',
2415                     errnote='Unable to download video annotations', fatal=False,
2416                     data=urlencode_postdata({xsrf_field_name: xsrf_token}))
2417
2418         chapters = self._extract_chapters(video_webpage, description_original, video_id, video_duration)
2419
2420         # Look for the DASH manifest
2421         if self._downloader.params.get('youtube_include_dash_manifest', True):
2422             dash_mpd_fatal = True
2423             for mpd_url in dash_mpds:
2424                 dash_formats = {}
2425                 try:
2426                     def decrypt_sig(mobj):
2427                         s = mobj.group(1)
2428                         dec_s = self._decrypt_signature(s, video_id, player_url, age_gate)
2429                         return '/signature/%s' % dec_s
2430
2431                     mpd_url = re.sub(r'/s/([a-fA-F0-9\.]+)', decrypt_sig, mpd_url)
2432
2433                     for df in self._extract_mpd_formats(
2434                             mpd_url, video_id, fatal=dash_mpd_fatal,
2435                             formats_dict=self._formats):
2436                         if not df.get('filesize'):
2437                             df['filesize'] = _extract_filesize(df['url'])
2438                         # Do not overwrite DASH format found in some previous DASH manifest
2439                         if df['format_id'] not in dash_formats:
2440                             dash_formats[df['format_id']] = df
2441                         # Additional DASH manifests may end up in HTTP Error 403 therefore
2442                         # allow them to fail without bug report message if we already have
2443                         # some DASH manifest succeeded. This is temporary workaround to reduce
2444                         # burst of bug reports until we figure out the reason and whether it
2445                         # can be fixed at all.
2446                         dash_mpd_fatal = False
2447                 except (ExtractorError, KeyError) as e:
2448                     self.report_warning(
2449                         'Skipping DASH manifest: %r' % e, video_id)
2450                 if dash_formats:
2451                     # Remove the formats we found through non-DASH, they
2452                     # contain less info and it can be wrong, because we use
2453                     # fixed values (for example the resolution). See
2454                     # https://github.com/ytdl-org/youtube-dl/issues/5774 for an
2455                     # example.
2456                     formats = [f for f in formats if f['format_id'] not in dash_formats.keys()]
2457                     formats.extend(dash_formats.values())
2458
2459         # Check for malformed aspect ratio
2460         stretched_m = re.search(
2461             r'<meta\s+property="og:video:tag".*?content="yt:stretch=(?P<w>[0-9]+):(?P<h>[0-9]+)">',
2462             video_webpage)
2463         if stretched_m:
2464             w = float(stretched_m.group('w'))
2465             h = float(stretched_m.group('h'))
2466             # yt:stretch may hold invalid ratio data (e.g. for Q39EVAstoRM ratio is 17:0).
2467             # We will only process correct ratios.
2468             if w > 0 and h > 0:
2469                 ratio = w / h
2470                 for f in formats:
2471                     if f.get('vcodec') != 'none':
2472                         f['stretched_ratio'] = ratio
2473
2474         if not formats:
2475             if 'reason' in video_info:
2476                 if 'The uploader has not made this video available in your country.' in video_info['reason']:
2477                     regions_allowed = self._html_search_meta(
2478                         'regionsAllowed', video_webpage, default=None)
2479                     countries = regions_allowed.split(',') if regions_allowed else None
2480                     self.raise_geo_restricted(
2481                         msg=video_info['reason'][0], countries=countries)
2482                 reason = video_info['reason'][0]
2483                 if 'Invalid parameters' in reason:
2484                     unavailable_message = extract_unavailable_message()
2485                     if unavailable_message:
2486                         reason = unavailable_message
2487                 raise ExtractorError(
2488                     'YouTube said: %s' % reason,
2489                     expected=True, video_id=video_id)
2490             if video_info.get('license_info') or try_get(player_response, lambda x: x['streamingData']['licenseInfos']):
2491                 raise ExtractorError('This video is DRM protected.', expected=True)
2492
2493         self._sort_formats(formats)
2494
2495         self.mark_watched(video_id, video_info, player_response)
2496
2497         return {
2498             'id': video_id,
2499             'uploader': video_uploader,
2500             'uploader_id': video_uploader_id,
2501             'uploader_url': video_uploader_url,
2502             'channel_id': channel_id,
2503             'channel_url': channel_url,
2504             'upload_date': upload_date,
2505             'license': video_license,
2506             'creator': video_creator or artist,
2507             'title': video_title,
2508             'alt_title': video_alt_title or track,
2509             'thumbnails': thumbnails,
2510             'description': video_description,
2511             'categories': video_categories,
2512             'tags': video_tags,
2513             'subtitles': video_subtitles,
2514             'automatic_captions': automatic_captions,
2515             'duration': video_duration,
2516             'age_limit': 18 if age_gate else 0,
2517             'annotations': video_annotations,
2518             'chapters': chapters,
2519             'webpage_url': proto + '://www.youtube.com/watch?v=%s' % video_id,
2520             'view_count': view_count,
2521             'like_count': like_count,
2522             'dislike_count': dislike_count,
2523             'average_rating': average_rating,
2524             'formats': formats,
2525             'is_live': is_live,
2526             'start_time': start_time,
2527             'end_time': end_time,
2528             'series': series,
2529             'season_number': season_number,
2530             'episode_number': episode_number,
2531             'track': track,
2532             'artist': artist,
2533             'album': album,
2534             'release_date': release_date,
2535             'release_year': release_year,
2536             'subscriber_count': subscriber_count,
2537         }
2538
2539
2540 class YoutubePlaylistIE(YoutubePlaylistBaseInfoExtractor):
2541     IE_DESC = 'YouTube.com playlists'
2542     _VALID_URL = r"""(?x)(?:
2543                         (?:https?://)?
2544                         (?:\w+\.)?
2545                         (?:
2546                             (?:
2547                                 youtube(?:kids)?\.com|
2548                                 invidio\.us
2549                             )
2550                             /
2551                             (?:
2552                                (?:course|view_play_list|my_playlists|artist|playlist|watch|embed/(?:videoseries|[0-9A-Za-z_-]{11}))
2553                                \? (?:.*?[&;])*? (?:p|a|list)=
2554                             |  p/
2555                             )|
2556                             youtu\.be/[0-9A-Za-z_-]{11}\?.*?\blist=
2557                         )
2558                         (
2559                             (?:PL|LL|EC|UU|FL|RD|UL|TL|PU|OLAK5uy_)?[0-9A-Za-z-_]{10,}
2560                             # Top tracks, they can also include dots
2561                             |(?:MC)[\w\.]*
2562                         )
2563                         .*
2564                      |
2565                         (%(playlist_id)s)
2566                      )""" % {'playlist_id': YoutubeBaseInfoExtractor._PLAYLIST_ID_RE}
2567     _TEMPLATE_URL = 'https://www.youtube.com/playlist?list=%s'
2568     _VIDEO_RE_TPL = r'href="\s*/watch\?v=%s(?:&amp;(?:[^"]*?index=(?P<index>\d+))?(?:[^>]+>(?P<title>[^<]+))?)?'
2569     _VIDEO_RE = _VIDEO_RE_TPL % r'(?P<id>[0-9A-Za-z_-]{11})'
2570     IE_NAME = 'youtube:playlist'
2571     _TESTS = [{
2572         'url': 'https://www.youtube.com/playlist?list=PL4lCao7KL_QFVb7Iudeipvc2BCavECqzc',
2573         'info_dict': {
2574             'uploader_id': 'UCmlqkdCBesrv2Lak1mF_MxA',
2575             'uploader': 'Sergey M.',
2576             'id': 'PL4lCao7KL_QFVb7Iudeipvc2BCavECqzc',
2577             'title': 'youtube-dl public playlist',
2578         },
2579         'playlist_count': 1,
2580     }, {
2581         'url': 'https://www.youtube.com/playlist?list=PL4lCao7KL_QFodcLWhDpGCYnngnHtQ-Xf',
2582         'info_dict': {
2583             'uploader_id': 'UCmlqkdCBesrv2Lak1mF_MxA',
2584             'uploader': 'Sergey M.',
2585             'id': 'PL4lCao7KL_QFodcLWhDpGCYnngnHtQ-Xf',
2586             'title': 'youtube-dl empty playlist',
2587         },
2588         'playlist_count': 0,
2589     }, {
2590         'note': 'Playlist with deleted videos (#651). As a bonus, the video #51 is also twice in this list.',
2591         'url': 'https://www.youtube.com/playlist?list=PLwP_SiAcdui0KVebT0mU9Apz359a4ubsC',
2592         'info_dict': {
2593             'title': '29C3: Not my department',
2594             'id': 'PLwP_SiAcdui0KVebT0mU9Apz359a4ubsC',
2595             'uploader': 'Christiaan008',
2596             'uploader_id': 'ChRiStIaAn008',
2597         },
2598         'playlist_count': 96,
2599     }, {
2600         'note': 'issue #673',
2601         'url': 'PLBB231211A4F62143',
2602         'info_dict': {
2603             'title': '[OLD]Team Fortress 2 (Class-based LP)',
2604             'id': 'PLBB231211A4F62143',
2605             'uploader': 'Wickydoo',
2606             'uploader_id': 'Wickydoo',
2607         },
2608         'playlist_mincount': 26,
2609     }, {
2610         'note': 'Large playlist',
2611         'url': 'https://www.youtube.com/playlist?list=UUBABnxM4Ar9ten8Mdjj1j0Q',
2612         'info_dict': {
2613             'title': 'Uploads from Cauchemar',
2614             'id': 'UUBABnxM4Ar9ten8Mdjj1j0Q',
2615             'uploader': 'Cauchemar',
2616             'uploader_id': 'Cauchemar89',
2617         },
2618         'playlist_mincount': 799,
2619     }, {
2620         'url': 'PLtPgu7CB4gbY9oDN3drwC3cMbJggS7dKl',
2621         'info_dict': {
2622             'title': 'YDL_safe_search',
2623             'id': 'PLtPgu7CB4gbY9oDN3drwC3cMbJggS7dKl',
2624         },
2625         'playlist_count': 2,
2626         'skip': 'This playlist is private',
2627     }, {
2628         'note': 'embedded',
2629         'url': 'https://www.youtube.com/embed/videoseries?list=PL6IaIsEjSbf96XFRuNccS_RuEXwNdsoEu',
2630         'playlist_count': 4,
2631         'info_dict': {
2632             'title': 'JODA15',
2633             'id': 'PL6IaIsEjSbf96XFRuNccS_RuEXwNdsoEu',
2634             'uploader': 'milan',
2635             'uploader_id': 'UCEI1-PVPcYXjB73Hfelbmaw',
2636         }
2637     }, {
2638         'url': 'http://www.youtube.com/embed/_xDOZElKyNU?list=PLsyOSbh5bs16vubvKePAQ1x3PhKavfBIl',
2639         'playlist_mincount': 485,
2640         'info_dict': {
2641             'title': '2018 Chinese New Singles (11/6 updated)',
2642             'id': 'PLsyOSbh5bs16vubvKePAQ1x3PhKavfBIl',
2643             'uploader': 'LBK',
2644             'uploader_id': 'sdragonfang',
2645         }
2646     }, {
2647         'note': 'Embedded SWF player',
2648         'url': 'https://www.youtube.com/p/YN5VISEtHet5D4NEvfTd0zcgFk84NqFZ?hl=en_US&fs=1&rel=0',
2649         'playlist_count': 4,
2650         'info_dict': {
2651             'title': 'JODA7',
2652             'id': 'YN5VISEtHet5D4NEvfTd0zcgFk84NqFZ',
2653         },
2654         'skip': 'This playlist does not exist',
2655     }, {
2656         'note': 'Buggy playlist: the webpage has a "Load more" button but it doesn\'t have more videos',
2657         'url': 'https://www.youtube.com/playlist?list=UUXw-G3eDE9trcvY2sBMM_aA',
2658         'info_dict': {
2659             'title': 'Uploads from Interstellar Movie',
2660             'id': 'UUXw-G3eDE9trcvY2sBMM_aA',
2661             'uploader': 'Interstellar Movie',
2662             'uploader_id': 'InterstellarMovie1',
2663         },
2664         'playlist_mincount': 21,
2665     }, {
2666         # Playlist URL that does not actually serve a playlist
2667         'url': 'https://www.youtube.com/watch?v=FqZTN594JQw&list=PLMYEtVRpaqY00V9W81Cwmzp6N6vZqfUKD4',
2668         'info_dict': {
2669             'id': 'FqZTN594JQw',
2670             'ext': 'webm',
2671             'title': "Smiley's People 01 detective, Adventure Series, Action",
2672             'uploader': 'STREEM',
2673             'uploader_id': 'UCyPhqAZgwYWZfxElWVbVJng',
2674             'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UCyPhqAZgwYWZfxElWVbVJng',
2675             'upload_date': '20150526',
2676             'license': 'Standard YouTube License',
2677             'description': 'md5:507cdcb5a49ac0da37a920ece610be80',
2678             'categories': ['People & Blogs'],
2679             'tags': list,
2680             'view_count': int,
2681             'like_count': int,
2682             'dislike_count': int,
2683         },
2684         'params': {
2685             'skip_download': True,
2686         },
2687         'skip': 'This video is not available.',
2688         'add_ie': [YoutubeIE.ie_key()],
2689     }, {
2690         'url': 'https://youtu.be/yeWKywCrFtk?list=PL2qgrgXsNUG5ig9cat4ohreBjYLAPC0J5',
2691         'info_dict': {
2692             'id': 'yeWKywCrFtk',
2693             'ext': 'mp4',
2694             'title': 'Small Scale Baler and Braiding Rugs',
2695             'uploader': 'Backus-Page House Museum',
2696             'uploader_id': 'backuspagemuseum',
2697             'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/backuspagemuseum',
2698             'upload_date': '20161008',
2699             'description': 'md5:800c0c78d5eb128500bffd4f0b4f2e8a',
2700             'categories': ['Nonprofits & Activism'],
2701             'tags': list,
2702             'like_count': int,
2703             'dislike_count': int,
2704         },
2705         'params': {
2706             'noplaylist': True,
2707             'skip_download': True,
2708         },
2709     }, {
2710         # https://github.com/ytdl-org/youtube-dl/issues/21844
2711         'url': 'https://www.youtube.com/playlist?list=PLzH6n4zXuckpfMu_4Ff8E7Z1behQks5ba',
2712         'info_dict': {
2713             'title': 'Data Analysis with Dr Mike Pound',
2714             'id': 'PLzH6n4zXuckpfMu_4Ff8E7Z1behQks5ba',
2715             'uploader_id': 'Computerphile',
2716             'uploader': 'Computerphile',
2717         },
2718         'playlist_mincount': 11,
2719     }, {
2720         'url': 'https://youtu.be/uWyaPkt-VOI?list=PL9D9FC436B881BA21',
2721         'only_matching': True,
2722     }, {
2723         'url': 'TLGGrESM50VT6acwMjAyMjAxNw',
2724         'only_matching': True,
2725     }, {
2726         # music album playlist
2727         'url': 'OLAK5uy_m4xAFdmMC5rX3Ji3g93pQe3hqLZw_9LhM',
2728         'only_matching': True,
2729     }, {
2730         'url': 'https://invidio.us/playlist?list=PLDIoUOhQQPlXr63I_vwF9GD8sAKh77dWU',
2731         'only_matching': True,
2732     }, {
2733         'url': 'https://www.youtubekids.com/watch?v=Agk7R8I8o5U&list=PUZ6jURNr1WQZCNHF0ao-c0g',
2734         'only_matching': True,
2735     }]
2736
2737     def _real_initialize(self):
2738         self._login()
2739
2740     def extract_videos_from_page(self, page):
2741         ids_in_page = []
2742         titles_in_page = []
2743
2744         for item in re.findall(
2745                 r'(<[^>]*\bdata-video-id\s*=\s*["\'][0-9A-Za-z_-]{11}[^>]+>)', page):
2746             attrs = extract_attributes(item)
2747             video_id = attrs['data-video-id']
2748             video_title = unescapeHTML(attrs.get('data-title'))
2749             if video_title:
2750                 video_title = video_title.strip()
2751             ids_in_page.append(video_id)
2752             titles_in_page.append(video_title)
2753
2754         # Fallback with old _VIDEO_RE
2755         self.extract_videos_from_page_impl(
2756             self._VIDEO_RE, page, ids_in_page, titles_in_page)
2757
2758         # Relaxed fallbacks
2759         self.extract_videos_from_page_impl(
2760             r'href="\s*/watch\?v\s*=\s*(?P<id>[0-9A-Za-z_-]{11})', page,
2761             ids_in_page, titles_in_page)
2762         self.extract_videos_from_page_impl(
2763             r'data-video-ids\s*=\s*["\'](?P<id>[0-9A-Za-z_-]{11})', page,
2764             ids_in_page, titles_in_page)
2765
2766         return zip(ids_in_page, titles_in_page)
2767
2768     def _extract_mix(self, playlist_id):
2769         # The mixes are generated from a single video
2770         # the id of the playlist is just 'RD' + video_id
2771         ids = []
2772         last_id = playlist_id[-11:]
2773         for n in itertools.count(1):
2774             url = 'https://www.youtube.com/watch?v=%s&list=%s' % (last_id, playlist_id)
2775             webpage = self._download_webpage(
2776                 url, playlist_id, 'Downloading page {0} of Youtube mix'.format(n))
2777             new_ids = orderedSet(re.findall(
2778                 r'''(?xs)data-video-username=".*?".*?
2779                            href="/watch\?v=([0-9A-Za-z_-]{11})&amp;[^"]*?list=%s''' % re.escape(playlist_id),
2780                 webpage))
2781             # Fetch new pages until all the videos are repeated, it seems that
2782             # there are always 51 unique videos.
2783             new_ids = [_id for _id in new_ids if _id not in ids]
2784             if not new_ids:
2785                 break
2786             ids.extend(new_ids)
2787             last_id = ids[-1]
2788
2789         url_results = self._ids_to_results(ids)
2790
2791         search_title = lambda class_name: get_element_by_attribute('class', class_name, webpage)
2792         title_span = (
2793             search_title('playlist-title')
2794             or search_title('title long-title')
2795             or search_title('title'))
2796         title = clean_html(title_span)
2797
2798         return self.playlist_result(url_results, playlist_id, title)
2799
2800     def _extract_playlist(self, playlist_id):
2801         url = self._TEMPLATE_URL % playlist_id
2802         page = self._download_webpage(url, playlist_id)
2803
2804         # the yt-alert-message now has tabindex attribute (see https://github.com/ytdl-org/youtube-dl/issues/11604)
2805         for match in re.findall(r'<div class="yt-alert-message"[^>]*>([^<]+)</div>', page):
2806             match = match.strip()
2807             # Check if the playlist exists or is private
2808             mobj = re.match(r'[^<]*(?:The|This) playlist (?P<reason>does not exist|is private)[^<]*', match)
2809             if mobj:
2810                 reason = mobj.group('reason')
2811                 message = 'This playlist %s' % reason
2812                 if 'private' in reason:
2813                     message += ', use --username or --netrc to access it'
2814                 message += '.'
2815                 raise ExtractorError(message, expected=True)
2816             elif re.match(r'[^<]*Invalid parameters[^<]*', match):
2817                 raise ExtractorError(
2818                     'Invalid parameters. Maybe URL is incorrect.',
2819                     expected=True)
2820             elif re.match(r'[^<]*Choose your language[^<]*', match):
2821                 continue
2822             else:
2823                 self.report_warning('Youtube gives an alert message: ' + match)
2824
2825         playlist_title = self._html_search_regex(
2826             r'(?s)<h1 class="pl-header-title[^"]*"[^>]*>\s*(.*?)\s*</h1>',
2827             page, 'title', default=None)
2828
2829         _UPLOADER_BASE = r'class=["\']pl-header-details[^>]+>\s*<li>\s*<a[^>]+\bhref='
2830         uploader = self._html_search_regex(
2831             r'%s["\']/(?:user|channel)/[^>]+>([^<]+)' % _UPLOADER_BASE,
2832             page, 'uploader', default=None)
2833         mobj = re.search(
2834             r'%s(["\'])(?P<path>/(?:user|channel)/(?P<uploader_id>.+?))\1' % _UPLOADER_BASE,
2835             page)
2836         if mobj:
2837             uploader_id = mobj.group('uploader_id')
2838             uploader_url = compat_urlparse.urljoin(url, mobj.group('path'))
2839         else:
2840             uploader_id = uploader_url = None
2841
2842         has_videos = True
2843
2844         if not playlist_title:
2845             try:
2846                 # Some playlist URLs don't actually serve a playlist (e.g.
2847                 # https://www.youtube.com/watch?v=FqZTN594JQw&list=PLMYEtVRpaqY00V9W81Cwmzp6N6vZqfUKD4)
2848                 next(self._entries(page, playlist_id))
2849             except StopIteration:
2850                 has_videos = False
2851
2852         playlist = self.playlist_result(
2853             self._entries(page, playlist_id), playlist_id, playlist_title)
2854         playlist.update({
2855             'uploader': uploader,
2856             'uploader_id': uploader_id,
2857             'uploader_url': uploader_url,
2858         })
2859
2860         return has_videos, playlist
2861
2862     def _check_download_just_video(self, url, playlist_id):
2863         # Check if it's a video-specific URL
2864         query_dict = compat_urlparse.parse_qs(compat_urlparse.urlparse(url).query)
2865         video_id = query_dict.get('v', [None])[0] or self._search_regex(
2866             r'(?:(?:^|//)youtu\.be/|youtube\.com/embed/(?!videoseries))([0-9A-Za-z_-]{11})', url,
2867             'video id', default=None)
2868         if video_id:
2869             if self._downloader.params.get('noplaylist'):
2870                 self.to_screen('Downloading just video %s because of --no-playlist' % video_id)
2871                 return video_id, self.url_result(video_id, 'Youtube', video_id=video_id)
2872             else:
2873                 self.to_screen('Downloading playlist %s - add --no-playlist to just download video %s' % (playlist_id, video_id))
2874                 return video_id, None
2875         return None, None
2876
2877     def _real_extract(self, url):
2878         # Extract playlist id
2879         mobj = re.match(self._VALID_URL, url)
2880         if mobj is None:
2881             raise ExtractorError('Invalid URL: %s' % url)
2882         playlist_id = mobj.group(1) or mobj.group(2)
2883
2884         video_id, video = self._check_download_just_video(url, playlist_id)
2885         if video:
2886             return video
2887
2888         if playlist_id.startswith(('RD', 'UL', 'PU')):
2889             # Mixes require a custom extraction process
2890             return self._extract_mix(playlist_id)
2891
2892         has_videos, playlist = self._extract_playlist(playlist_id)
2893         if has_videos or not video_id:
2894             return playlist
2895
2896         # Some playlist URLs don't actually serve a playlist (see
2897         # https://github.com/ytdl-org/youtube-dl/issues/10537).
2898         # Fallback to plain video extraction if there is a video id
2899         # along with playlist id.
2900         return self.url_result(video_id, 'Youtube', video_id=video_id)
2901
2902
2903 class YoutubeChannelIE(YoutubePlaylistBaseInfoExtractor):
2904     IE_DESC = 'YouTube.com channels'
2905     _VALID_URL = r'https?://(?:youtu\.be|(?:\w+\.)?youtube(?:-nocookie|kids)?\.com|(?:www\.)?invidio\.us)/channel/(?P<id>[0-9A-Za-z_-]+)'
2906     _TEMPLATE_URL = 'https://www.youtube.com/channel/%s/videos'
2907     _VIDEO_RE = r'(?:title="(?P<title>[^"]+)"[^>]+)?href="/watch\?v=(?P<id>[0-9A-Za-z_-]+)&?'
2908     IE_NAME = 'youtube:channel'
2909     _TESTS = [{
2910         'note': 'paginated channel',
2911         'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w',
2912         'playlist_mincount': 91,
2913         'info_dict': {
2914             'id': 'UUKfVa3S1e4PHvxWcwyMMg8w',
2915             'title': 'Uploads from lex will',
2916             'uploader': 'lex will',
2917             'uploader_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
2918         }
2919     }, {
2920         'note': 'Age restricted channel',
2921         # from https://www.youtube.com/user/DeusExOfficial
2922         'url': 'https://www.youtube.com/channel/UCs0ifCMCm1icqRbqhUINa0w',
2923         'playlist_mincount': 64,
2924         'info_dict': {
2925             'id': 'UUs0ifCMCm1icqRbqhUINa0w',
2926             'title': 'Uploads from Deus Ex',
2927             'uploader': 'Deus Ex',
2928             'uploader_id': 'DeusExOfficial',
2929         },
2930     }, {
2931         'url': 'https://invidio.us/channel/UC23qupoDRn9YOAVzeoxjOQA',
2932         'only_matching': True,
2933     }, {
2934         'url': 'https://www.youtubekids.com/channel/UCyu8StPfZWapR6rfW_JgqcA',
2935         'only_matching': True,
2936     }]
2937
2938     @classmethod
2939     def suitable(cls, url):
2940         return (False if YoutubePlaylistsIE.suitable(url) or YoutubeLiveIE.suitable(url)
2941                 else super(YoutubeChannelIE, cls).suitable(url))
2942
2943     def _build_template_url(self, url, channel_id):
2944         return self._TEMPLATE_URL % channel_id
2945
2946     def _real_extract(self, url):
2947         channel_id = self._match_id(url)
2948
2949         url = self._build_template_url(url, channel_id)
2950
2951         # Channel by page listing is restricted to 35 pages of 30 items, i.e. 1050 videos total (see #5778)
2952         # Workaround by extracting as a playlist if managed to obtain channel playlist URL
2953         # otherwise fallback on channel by page extraction
2954         channel_page = self._download_webpage(
2955             url + '?view=57', channel_id,
2956             'Downloading channel page', fatal=False)
2957         if channel_page is False:
2958             channel_playlist_id = False
2959         else:
2960             channel_playlist_id = self._html_search_meta(
2961                 'channelId', channel_page, 'channel id', default=None)
2962             if not channel_playlist_id:
2963                 channel_url = self._html_search_meta(
2964                     ('al:ios:url', 'twitter:app:url:iphone', 'twitter:app:url:ipad'),
2965                     channel_page, 'channel url', default=None)
2966                 if channel_url:
2967                     channel_playlist_id = self._search_regex(
2968                         r'vnd\.youtube://user/([0-9A-Za-z_-]+)',
2969                         channel_url, 'channel id', default=None)
2970         if channel_playlist_id and channel_playlist_id.startswith('UC'):
2971             playlist_id = 'UU' + channel_playlist_id[2:]
2972             return self.url_result(
2973                 compat_urlparse.urljoin(url, '/playlist?list=%s' % playlist_id), 'YoutubePlaylist')
2974
2975         channel_page = self._download_webpage(url, channel_id, 'Downloading page #1')
2976         autogenerated = re.search(r'''(?x)
2977                 class="[^"]*?(?:
2978                     channel-header-autogenerated-label|
2979                     yt-channel-title-autogenerated
2980                 )[^"]*"''', channel_page) is not None
2981
2982         if autogenerated:
2983             # The videos are contained in a single page
2984             # the ajax pages can't be used, they are empty
2985             entries = [
2986                 self.url_result(
2987                     video_id, 'Youtube', video_id=video_id,
2988                     video_title=video_title)
2989                 for video_id, video_title in self.extract_videos_from_page(channel_page)]
2990             return self.playlist_result(entries, channel_id)
2991
2992         try:
2993             next(self._entries(channel_page, channel_id))
2994         except StopIteration:
2995             alert_message = self._html_search_regex(
2996                 r'(?s)<div[^>]+class=(["\']).*?\byt-alert-message\b.*?\1[^>]*>(?P<alert>[^<]+)</div>',
2997                 channel_page, 'alert', default=None, group='alert')
2998             if alert_message:
2999                 raise ExtractorError('Youtube said: %s' % alert_message, expected=True)
3000
3001         return self.playlist_result(self._entries(channel_page, channel_id), channel_id)
3002
3003
3004 class YoutubeUserIE(YoutubeChannelIE):
3005     IE_DESC = 'YouTube.com user videos (URL or "ytuser" keyword)'
3006     _VALID_URL = r'(?:(?:https?://(?:\w+\.)?youtube\.com/(?:(?P<user>user|c)/)?(?!(?:attribution_link|watch|results|shared)(?:$|[^a-z_A-Z0-9%-])))|ytuser:)(?!feed/)(?P<id>[A-Za-z0-9_%-]+)'
3007     _TEMPLATE_URL = 'https://www.youtube.com/%s/%s/videos'
3008     IE_NAME = 'youtube:user'
3009
3010     _TESTS = [{
3011         'url': 'https://www.youtube.com/user/TheLinuxFoundation',
3012         'playlist_mincount': 320,
3013         'info_dict': {
3014             'id': 'UUfX55Sx5hEFjoC3cNs6mCUQ',
3015             'title': 'Uploads from The Linux Foundation',
3016             'uploader': 'The Linux Foundation',
3017             'uploader_id': 'TheLinuxFoundation',
3018         }
3019     }, {
3020         # Only available via https://www.youtube.com/c/12minuteathlete/videos
3021         # but not https://www.youtube.com/user/12minuteathlete/videos
3022         'url': 'https://www.youtube.com/c/12minuteathlete/videos',
3023         'playlist_mincount': 249,
3024         'info_dict': {
3025             'id': 'UUVjM-zV6_opMDx7WYxnjZiQ',
3026             'title': 'Uploads from 12 Minute Athlete',
3027             'uploader': '12 Minute Athlete',
3028             'uploader_id': 'the12minuteathlete',
3029         }
3030     }, {
3031         'url': 'ytuser:phihag',
3032         'only_matching': True,
3033     }, {
3034         'url': 'https://www.youtube.com/c/gametrailers',
3035         'only_matching': True,
3036     }, {
3037         'url': 'https://www.youtube.com/c/Pawe%C5%82Zadro%C5%BCniak',
3038         'only_matching': True,
3039     }, {
3040         'url': 'https://www.youtube.com/gametrailers',
3041         'only_matching': True,
3042     }, {
3043         # This channel is not available, geo restricted to JP
3044         'url': 'https://www.youtube.com/user/kananishinoSMEJ/videos',
3045         'only_matching': True,
3046     }]
3047
3048     @classmethod
3049     def suitable(cls, url):
3050         # Don't return True if the url can be extracted with other youtube
3051         # extractor, the regex would is too permissive and it would match.
3052         other_yt_ies = iter(klass for (name, klass) in globals().items() if name.startswith('Youtube') and name.endswith('IE') and klass is not cls)
3053         if any(ie.suitable(url) for ie in other_yt_ies):
3054             return False
3055         else:
3056             return super(YoutubeUserIE, cls).suitable(url)
3057
3058     def _build_template_url(self, url, channel_id):
3059         mobj = re.match(self._VALID_URL, url)
3060         return self._TEMPLATE_URL % (mobj.group('user') or 'user', mobj.group('id'))
3061
3062
3063 class YoutubeLiveIE(YoutubeBaseInfoExtractor):
3064     IE_DESC = 'YouTube.com live streams'
3065     _VALID_URL = r'(?P<base_url>https?://(?:\w+\.)?youtube\.com/(?:(?:user|channel|c)/)?(?P<id>[^/]+))/live'
3066     IE_NAME = 'youtube:live'
3067
3068     _TESTS = [{
3069         'url': 'https://www.youtube.com/user/TheYoungTurks/live',
3070         'info_dict': {
3071             'id': 'a48o2S1cPoo',
3072             'ext': 'mp4',
3073             'title': 'The Young Turks - Live Main Show',
3074             'uploader': 'The Young Turks',
3075             'uploader_id': 'TheYoungTurks',
3076             'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/TheYoungTurks',
3077             'upload_date': '20150715',
3078             'license': 'Standard YouTube License',
3079             'description': 'md5:438179573adcdff3c97ebb1ee632b891',
3080             'categories': ['News & Politics'],
3081             'tags': ['Cenk Uygur (TV Program Creator)', 'The Young Turks (Award-Winning Work)', 'Talk Show (TV Genre)'],
3082             'like_count': int,
3083             'dislike_count': int,
3084         },
3085         'params': {
3086             'skip_download': True,
3087         },
3088     }, {
3089         'url': 'https://www.youtube.com/channel/UC1yBKRuGpC1tSM73A0ZjYjQ/live',
3090         'only_matching': True,
3091     }, {
3092         'url': 'https://www.youtube.com/c/CommanderVideoHq/live',
3093         'only_matching': True,
3094     }, {
3095         'url': 'https://www.youtube.com/TheYoungTurks/live',
3096         'only_matching': True,
3097     }]
3098
3099     def _real_extract(self, url):
3100         mobj = re.match(self._VALID_URL, url)
3101         channel_id = mobj.group('id')
3102         base_url = mobj.group('base_url')
3103         webpage = self._download_webpage(url, channel_id, fatal=False)
3104         if webpage:
3105             page_type = self._og_search_property(
3106                 'type', webpage, 'page type', default='')
3107             video_id = self._html_search_meta(
3108                 'videoId', webpage, 'video id', default=None)
3109             if page_type.startswith('video') and video_id and re.match(
3110                     r'^[0-9A-Za-z_-]{11}$', video_id):
3111                 return self.url_result(video_id, YoutubeIE.ie_key())
3112         return self.url_result(base_url)
3113
3114
3115 class YoutubePlaylistsIE(YoutubePlaylistsBaseInfoExtractor):
3116     IE_DESC = 'YouTube.com user/channel playlists'
3117     _VALID_URL = r'https?://(?:\w+\.)?youtube\.com/(?:user|channel|c)/(?P<id>[^/]+)/playlists'
3118     IE_NAME = 'youtube:playlists'
3119
3120     _TESTS = [{
3121         'url': 'https://www.youtube.com/user/ThirstForScience/playlists',
3122         'playlist_mincount': 4,
3123         'info_dict': {
3124             'id': 'ThirstForScience',
3125             'title': 'ThirstForScience',
3126         },
3127     }, {
3128         # with "Load more" button
3129         'url': 'https://www.youtube.com/user/igorkle1/playlists?view=1&sort=dd',
3130         'playlist_mincount': 70,
3131         'info_dict': {
3132             'id': 'igorkle1',
3133             'title': 'Игорь Клейнер',
3134         },
3135     }, {
3136         'url': 'https://www.youtube.com/channel/UCiU1dHvZObB2iP6xkJ__Icw/playlists',
3137         'playlist_mincount': 17,
3138         'info_dict': {
3139             'id': 'UCiU1dHvZObB2iP6xkJ__Icw',
3140             'title': 'Chem Player',
3141         },
3142         'skip': 'Blocked',
3143     }, {
3144         'url': 'https://www.youtube.com/c/ChristophLaimer/playlists',
3145         'only_matching': True,
3146     }]
3147
3148
3149 class YoutubeSearchBaseInfoExtractor(YoutubePlaylistBaseInfoExtractor):
3150     _VIDEO_RE = r'href="\s*/watch\?v=(?P<id>[0-9A-Za-z_-]{11})(?:[^"]*"[^>]+\btitle="(?P<title>[^"]+))?'
3151
3152
3153 class YoutubeSearchIE(SearchInfoExtractor, YoutubeSearchBaseInfoExtractor):
3154     IE_DESC = 'YouTube.com searches'
3155     # there doesn't appear to be a real limit, for example if you search for
3156     # 'python' you get more than 8.000.000 results
3157     _MAX_RESULTS = float('inf')
3158     IE_NAME = 'youtube:search'
3159     _SEARCH_KEY = 'ytsearch'
3160     _SEARCH_PARAMS = None
3161     _TESTS = []
3162
3163     def _entries(self, query, n):
3164         data = {
3165             'context': {
3166                 'client': {
3167                     'clientName': 'WEB',
3168                     'clientVersion': '2.20201021.03.00',
3169                 }
3170             },
3171             'query': query,
3172         }
3173         if self._SEARCH_PARAMS:
3174             data['params'] = self._SEARCH_PARAMS
3175         total = 0
3176         for page_num in itertools.count(1):
3177             search = self._download_json(
3178                 'https://www.youtube.com/youtubei/v1/search?key=AIzaSyAO_FJ2SlqU8Q4STEHLGCilw_Y9_11qcW8',
3179                 video_id='query "%s"' % query,
3180                 note='Downloading page %s' % page_num,
3181                 errnote='Unable to download API page', fatal=False,
3182                 data=json.dumps(data).encode('utf8'),
3183                 headers={'content-type': 'application/json'})
3184             if not search:
3185                 break
3186             slr_contents = try_get(
3187                 search,
3188                 (lambda x: x['contents']['twoColumnSearchResultsRenderer']['primaryContents']['sectionListRenderer']['contents'],
3189                  lambda x: x['onResponseReceivedCommands'][0]['appendContinuationItemsAction']['continuationItems']),
3190                 list)
3191             if not slr_contents:
3192                 break
3193             isr_contents = try_get(
3194                 slr_contents,
3195                 lambda x: x[0]['itemSectionRenderer']['contents'],
3196                 list)
3197             if not isr_contents:
3198                 break
3199             for content in isr_contents:
3200                 if not isinstance(content, dict):
3201                     continue
3202                 video = content.get('videoRenderer')
3203                 if not isinstance(video, dict):
3204                     continue
3205                 video_id = video.get('videoId')
3206                 if not video_id:
3207                     continue
3208                 title = try_get(video, lambda x: x['title']['runs'][0]['text'], compat_str)
3209                 description = try_get(video, lambda x: x['descriptionSnippet']['runs'][0]['text'], compat_str)
3210                 duration = parse_duration(try_get(video, lambda x: x['lengthText']['simpleText'], compat_str))
3211                 view_count_text = try_get(video, lambda x: x['viewCountText']['simpleText'], compat_str) or ''
3212                 view_count = int_or_none(self._search_regex(
3213                     r'^(\d+)', re.sub(r'\s', '', view_count_text),
3214                     'view count', default=None))
3215                 uploader = try_get(video, lambda x: x['ownerText']['runs'][0]['text'], compat_str)
3216                 total += 1
3217                 yield {
3218                     '_type': 'url_transparent',
3219                     'ie_key': YoutubeIE.ie_key(),
3220                     'id': video_id,
3221                     'url': video_id,
3222                     'title': title,
3223                     'description': description,
3224                     'duration': duration,
3225                     'view_count': view_count,
3226                     'uploader': uploader,
3227                 }
3228                 if total == n:
3229                     return
3230             token = try_get(
3231                 slr_contents,
3232                 lambda x: x[1]['continuationItemRenderer']['continuationEndpoint']['continuationCommand']['token'],
3233                 compat_str)
3234             if not token:
3235                 break
3236             data['continuation'] = token
3237
3238     def _get_n_results(self, query, n):
3239         """Get a specified number of results for a query"""
3240         return self.playlist_result(self._entries(query, n), query)
3241
3242
3243 class YoutubeSearchDateIE(YoutubeSearchIE):
3244     IE_NAME = YoutubeSearchIE.IE_NAME + ':date'
3245     _SEARCH_KEY = 'ytsearchdate'
3246     IE_DESC = 'YouTube.com searches, newest videos first'
3247     _SEARCH_PARAMS = 'CAI%3D'
3248
3249
3250 class YoutubeSearchURLIE(YoutubeSearchBaseInfoExtractor):
3251     IE_DESC = 'YouTube.com search URLs'
3252     IE_NAME = 'youtube:search_url'
3253     _VALID_URL = r'https?://(?:www\.)?youtube\.com/results\?(.*?&)?(?:search_query|q)=(?P<query>[^&]+)(?:[&]|$)'
3254     _SEARCH_DATA = r'(?:window\["ytInitialData"\]|ytInitialData)\W?=\W?({.*?});'
3255     _TESTS = [{
3256         'url': 'https://www.youtube.com/results?baz=bar&search_query=youtube-dl+test+video&filters=video&lclk=video',
3257         'playlist_mincount': 5,
3258         'info_dict': {
3259             'title': 'youtube-dl test video',
3260         }
3261     }, {
3262         'url': 'https://www.youtube.com/results?q=test&sp=EgQIBBgB',
3263         'only_matching': True,
3264     }]
3265
3266     def _find_videos_in_json(self, extracted):
3267         videos = []
3268
3269         def _real_find(obj):
3270             if obj is None or isinstance(obj, str):
3271                 return
3272
3273             if type(obj) is list:
3274                 for elem in obj:
3275                     _real_find(elem)
3276
3277             if type(obj) is dict:
3278                 if "videoId" in obj:
3279                     videos.append(obj)
3280                     return
3281
3282                 for _, o in obj.items():
3283                     _real_find(o)
3284
3285         _real_find(extracted)
3286
3287         return videos
3288
3289     def extract_videos_from_page_impl(self, page, ids_in_page, titles_in_page):
3290         search_response = self._parse_json(self._search_regex(self._SEARCH_DATA, page, 'ytInitialData'), None)
3291
3292         result_items = self._find_videos_in_json(search_response)
3293
3294         for renderer in result_items:
3295             video_id = try_get(renderer, lambda x: x['videoId'])
3296             video_title = try_get(renderer, lambda x: x['title']['runs'][0]['text']) or try_get(renderer, lambda x: x['title']['simpleText'])
3297
3298             if video_id is None or video_title is None:
3299                 # we do not have a videoRenderer or title extraction broke
3300                 continue
3301
3302             video_title = video_title.strip()
3303
3304             try:
3305                 idx = ids_in_page.index(video_id)
3306                 if video_title and not titles_in_page[idx]:
3307                     titles_in_page[idx] = video_title
3308             except ValueError:
3309                 ids_in_page.append(video_id)
3310                 titles_in_page.append(video_title)
3311
3312     def extract_videos_from_page(self, page):
3313         ids_in_page = []
3314         titles_in_page = []
3315         self.extract_videos_from_page_impl(page, ids_in_page, titles_in_page)
3316         return zip(ids_in_page, titles_in_page)
3317
3318     def _real_extract(self, url):
3319         mobj = re.match(self._VALID_URL, url)
3320         query = compat_urllib_parse_unquote_plus(mobj.group('query'))
3321         webpage = self._download_webpage(url, query)
3322         return self.playlist_result(self._process_page(webpage), playlist_title=query)
3323
3324
3325 class YoutubeShowIE(YoutubePlaylistsBaseInfoExtractor):
3326     IE_DESC = 'YouTube.com (multi-season) shows'
3327     _VALID_URL = r'https?://(?:www\.)?youtube\.com/show/(?P<id>[^?#]*)'
3328     IE_NAME = 'youtube:show'
3329     _TESTS = [{
3330         'url': 'https://www.youtube.com/show/airdisasters',
3331         'playlist_mincount': 5,
3332         'info_dict': {
3333             'id': 'airdisasters',
3334             'title': 'Air Disasters',
3335         }
3336     }]
3337
3338     def _real_extract(self, url):
3339         playlist_id = self._match_id(url)
3340         return super(YoutubeShowIE, self)._real_extract(
3341             'https://www.youtube.com/show/%s/playlists' % playlist_id)
3342
3343
3344 class YoutubeFeedsInfoExtractor(YoutubeBaseInfoExtractor):
3345     """
3346     Base class for feed extractors
3347     Subclasses must define the _FEED_NAME and _PLAYLIST_TITLE properties.
3348     """
3349     _LOGIN_REQUIRED = True
3350     _FEED_DATA = r'(?:window\["ytInitialData"\]|ytInitialData)\W?=\W?({.*?});'
3351     _YTCFG_DATA = r"ytcfg.set\(({.*?})\)"
3352
3353     @property
3354     def IE_NAME(self):
3355         return 'youtube:%s' % self._FEED_NAME
3356
3357     def _real_initialize(self):
3358         self._login()
3359
3360     def _find_videos_in_json(self, extracted):
3361         videos = []
3362         c = {}
3363
3364         def _real_find(obj):
3365             if obj is None or isinstance(obj, str):
3366                 return
3367
3368             if type(obj) is list:
3369                 for elem in obj:
3370                     _real_find(elem)
3371
3372             if type(obj) is dict:
3373                 if "videoId" in obj:
3374                     videos.append(obj)
3375                     return
3376
3377                 if "nextContinuationData" in obj:
3378                     c["continuation"] = obj["nextContinuationData"]
3379                     return
3380
3381                 for _, o in obj.items():
3382                     _real_find(o)
3383
3384         _real_find(extracted)
3385
3386         return videos, try_get(c, lambda x: x["continuation"])
3387
3388     def _entries(self, page):
3389         info = []
3390
3391         yt_conf = self._parse_json(self._search_regex(self._YTCFG_DATA, page, 'ytcfg.set', default="null"), None, fatal=False)
3392
3393         search_response = self._parse_json(self._search_regex(self._FEED_DATA, page, 'ytInitialData'), None)
3394
3395         for page_num in itertools.count(1):
3396             video_info, continuation = self._find_videos_in_json(search_response)
3397
3398             new_info = []
3399
3400             for v in video_info:
3401                 v_id = try_get(v, lambda x: x['videoId'])
3402                 if not v_id:
3403                     continue
3404
3405                 have_video = False
3406                 for old in info:
3407                     if old['videoId'] == v_id:
3408                         have_video = True
3409                         break
3410
3411                 if not have_video:
3412                     new_info.append(v)
3413
3414             if not new_info:
3415                 break
3416
3417             info.extend(new_info)
3418
3419             for video in new_info:
3420                 yield self.url_result(try_get(video, lambda x: x['videoId']), YoutubeIE.ie_key(), video_title=try_get(video, lambda x: x['title']['runs'][0]['text']) or try_get(video, lambda x: x['title']['simpleText']))
3421
3422             if not continuation or not yt_conf:
3423                 break
3424
3425             search_response = self._download_json(
3426                 'https://www.youtube.com/browse_ajax', self._PLAYLIST_TITLE,
3427                 'Downloading page #%s' % page_num,
3428                 transform_source=uppercase_escape,
3429                 query={
3430                     "ctoken": try_get(continuation, lambda x: x["continuation"]),
3431                     "continuation": try_get(continuation, lambda x: x["continuation"]),
3432                     "itct": try_get(continuation, lambda x: x["clickTrackingParams"])
3433                 },
3434                 headers={
3435                     "X-YouTube-Client-Name": try_get(yt_conf, lambda x: x["INNERTUBE_CONTEXT_CLIENT_NAME"]),
3436                     "X-YouTube-Client-Version": try_get(yt_conf, lambda x: x["INNERTUBE_CONTEXT_CLIENT_VERSION"]),
3437                     "X-Youtube-Identity-Token": try_get(yt_conf, lambda x: x["ID_TOKEN"]),
3438                     "X-YouTube-Device": try_get(yt_conf, lambda x: x["DEVICE"]),
3439                     "X-YouTube-Page-CL": try_get(yt_conf, lambda x: x["PAGE_CL"]),
3440                     "X-YouTube-Page-Label": try_get(yt_conf, lambda x: x["PAGE_BUILD_LABEL"]),
3441                     "X-YouTube-Variants-Checksum": try_get(yt_conf, lambda x: x["VARIANTS_CHECKSUM"]),
3442                 })
3443
3444     def _real_extract(self, url):
3445         page = self._download_webpage(
3446             'https://www.youtube.com/feed/%s' % self._FEED_NAME,
3447             self._PLAYLIST_TITLE)
3448         return self.playlist_result(
3449             self._entries(page), playlist_title=self._PLAYLIST_TITLE)
3450
3451
3452 class YoutubeWatchLaterIE(YoutubePlaylistIE):
3453     IE_NAME = 'youtube:watchlater'
3454     IE_DESC = 'Youtube watch later list, ":ytwatchlater" for short (requires authentication)'
3455     _VALID_URL = r'https?://(?:www\.)?youtube\.com/(?:feed/watch_later|(?:playlist|watch)\?(?:.+&)?list=WL)|:ytwatchlater'
3456
3457     _TESTS = [{
3458         'url': 'https://www.youtube.com/playlist?list=WL',
3459         'only_matching': True,
3460     }, {
3461         'url': 'https://www.youtube.com/watch?v=bCNU9TrbiRk&index=1&list=WL',
3462         'only_matching': True,
3463     }]
3464
3465     def _real_extract(self, url):
3466         _, video = self._check_download_just_video(url, 'WL')
3467         if video:
3468             return video
3469         _, playlist = self._extract_playlist('WL')
3470         return playlist
3471
3472
3473 class YoutubeFavouritesIE(YoutubeBaseInfoExtractor):
3474     IE_NAME = 'youtube:favorites'
3475     IE_DESC = 'YouTube.com favourite videos, ":ytfav" for short (requires authentication)'
3476     _VALID_URL = r'https?://(?:www\.)?youtube\.com/my_favorites|:ytfav(?:ou?rites)?'
3477     _LOGIN_REQUIRED = True
3478
3479     def _real_extract(self, url):
3480         webpage = self._download_webpage('https://www.youtube.com/my_favorites', 'Youtube Favourites videos')
3481         playlist_id = self._search_regex(r'list=(.+?)["&]', webpage, 'favourites playlist id')
3482         return self.url_result(playlist_id, 'YoutubePlaylist')
3483
3484
3485 class YoutubeRecommendedIE(YoutubeFeedsInfoExtractor):
3486     IE_DESC = 'YouTube.com recommended videos, ":ytrec" for short (requires authentication)'
3487     _VALID_URL = r'https?://(?:www\.)?youtube\.com/feed/recommended|:ytrec(?:ommended)?'
3488     _FEED_NAME = 'recommended'
3489     _PLAYLIST_TITLE = 'Youtube Recommended videos'
3490
3491
3492 class YoutubeSubscriptionsIE(YoutubeFeedsInfoExtractor):
3493     IE_DESC = 'YouTube.com subscriptions feed, "ytsubs" keyword (requires authentication)'
3494     _VALID_URL = r'https?://(?:www\.)?youtube\.com/feed/subscriptions|:ytsubs(?:criptions)?'
3495     _FEED_NAME = 'subscriptions'
3496     _PLAYLIST_TITLE = 'Youtube Subscriptions'
3497
3498
3499 class YoutubeHistoryIE(YoutubeFeedsInfoExtractor):
3500     IE_DESC = 'Youtube watch history, ":ythistory" for short (requires authentication)'
3501     _VALID_URL = r'https?://(?:www\.)?youtube\.com/feed/history|:ythistory'
3502     _FEED_NAME = 'history'
3503     _PLAYLIST_TITLE = 'Youtube History'
3504
3505
3506 class YoutubeTruncatedURLIE(InfoExtractor):
3507     IE_NAME = 'youtube:truncated_url'
3508     IE_DESC = False  # Do not list
3509     _VALID_URL = r'''(?x)
3510         (?:https?://)?
3511         (?:\w+\.)?[yY][oO][uU][tT][uU][bB][eE](?:-nocookie)?\.com/
3512         (?:watch\?(?:
3513             feature=[a-z_]+|
3514             annotation_id=annotation_[^&]+|
3515             x-yt-cl=[0-9]+|
3516             hl=[^&]*|
3517             t=[0-9]+
3518         )?
3519         |
3520             attribution_link\?a=[^&]+
3521         )
3522         $
3523     '''
3524
3525     _TESTS = [{
3526         'url': 'https://www.youtube.com/watch?annotation_id=annotation_3951667041',
3527         'only_matching': True,
3528     }, {
3529         'url': 'https://www.youtube.com/watch?',
3530         'only_matching': True,
3531     }, {
3532         'url': 'https://www.youtube.com/watch?x-yt-cl=84503534',
3533         'only_matching': True,
3534     }, {
3535         'url': 'https://www.youtube.com/watch?feature=foo',
3536         'only_matching': True,
3537     }, {
3538         'url': 'https://www.youtube.com/watch?hl=en-GB',
3539         'only_matching': True,
3540     }, {
3541         'url': 'https://www.youtube.com/watch?t=2372',
3542         'only_matching': True,
3543     }]
3544
3545     def _real_extract(self, url):
3546         raise ExtractorError(
3547             'Did you forget to quote the URL? Remember that & is a meta '
3548             'character in most shells, so you want to put the URL in quotes, '
3549             'like  youtube-dl '
3550             '"https://www.youtube.com/watch?feature=foo&v=BaW_jenozKc" '
3551             ' or simply  youtube-dl BaW_jenozKc  .',
3552             expected=True)
3553
3554
3555 class YoutubeTruncatedIDIE(InfoExtractor):
3556     IE_NAME = 'youtube:truncated_id'
3557     IE_DESC = False  # Do not list
3558     _VALID_URL = r'https?://(?:www\.)?youtube\.com/watch\?v=(?P<id>[0-9A-Za-z_-]{1,10})$'
3559
3560     _TESTS = [{
3561         'url': 'https://www.youtube.com/watch?v=N_708QY7Ob',
3562         'only_matching': True,
3563     }]
3564
3565     def _real_extract(self, url):
3566         video_id = self._match_id(url)
3567         raise ExtractorError(
3568             'Incomplete YouTube ID %s. URL %s looks truncated.' % (video_id, url),
3569             expected=True)