youtube_dlc/extractor/youtube.py

   1 # coding: utf-8
   2
   3 from __future__ import unicode_literals
   4
   5
   6 import itertools
   7 import json
   8 import os.path
   9 import random
  10 import re
  11 import time
  12 import traceback
  13
  14 from .common import InfoExtractor, SearchInfoExtractor
  15 from ..jsinterp import JSInterpreter
  16 from ..swfinterp import SWFInterpreter
  17 from ..compat import (
  18     compat_chr,
  19     compat_HTTPError,
  20     compat_kwargs,
  21     compat_parse_qs,
  22     compat_urllib_parse_unquote,
  23     compat_urllib_parse_unquote_plus,
  24     compat_urllib_parse_urlencode,
  25     compat_urllib_parse_urlparse,
  26     compat_urlparse,
  27     compat_str,
  28 )
  29 from ..utils import (
  30     bool_or_none,
  31     clean_html,
  32     error_to_compat_str,
  33     extract_attributes,
  34     ExtractorError,
  35     float_or_none,
  36     get_element_by_attribute,
  37     get_element_by_id,
  38     int_or_none,
  39     mimetype2ext,
  40     orderedSet,
  41     parse_codecs,
  42     parse_count,
  43     parse_duration,
  44     remove_quotes,
  45     remove_start,
  46     smuggle_url,
  47     str_or_none,
  48     str_to_int,
  49     try_get,
  50     unescapeHTML,
  51     unified_strdate,
  52     unsmuggle_url,
  53     uppercase_escape,
  54     url_or_none,
  55     urlencode_postdata,
  56 )
  57
  58
  59 class YoutubeBaseInfoExtractor(InfoExtractor):
  60     """Provide base functions for Youtube extractors"""
  61     _LOGIN_URL = 'https://accounts.google.com/ServiceLogin'
  62     _TWOFACTOR_URL = 'https://accounts.google.com/signin/challenge'
  63
  64     _LOOKUP_URL = 'https://accounts.google.com/_/signin/sl/lookup'
  65     _CHALLENGE_URL = 'https://accounts.google.com/_/signin/sl/challenge'
  66     _TFA_URL = 'https://accounts.google.com/_/signin/challenge?hl=en&TL={0}'
  67
  68     _NETRC_MACHINE = 'youtube'
  69     # If True it will raise an error if no login info is provided
  70     _LOGIN_REQUIRED = False
  71
  72     _PLAYLIST_ID_RE = r'(?:PL|LL|EC|UU|FL|RD|UL|TL|PU|OLAK5uy_)[0-9A-Za-z-_]{10,}'
  73
  74     _YOUTUBE_CLIENT_HEADERS = {
  75         'x-youtube-client-name': '1',
  76         'x-youtube-client-version': '1.20200609.04.02',
  77     }
  78
  79     def _set_language(self):
  80         self._set_cookie(
  81             '.youtube.com', 'PREF', 'f1=50000000&f6=8&hl=en',
  82             # YouTube sets the expire time to about two months
  83             expire_time=time.time() + 2 * 30 * 24 * 3600)
  84
  85     def _ids_to_results(self, ids):
  86         return [
  87             self.url_result(vid_id, 'Youtube', video_id=vid_id)
  88             for vid_id in ids]
  89
  90     def _login(self):
  91         """
  92         Attempt to log in to YouTube.
  93         True is returned if successful or skipped.
  94         False is returned if login failed.
  95
  96         If _LOGIN_REQUIRED is set and no authentication was provided, an error is raised.
  97         """
  98         username, password = self._get_login_info()
  99         # No authentication to be performed
 100         if username is None:
 101             if self._LOGIN_REQUIRED and self._downloader.params.get('cookiefile') is None:
 102                 raise ExtractorError('No login info available, needed for using %s.' % self.IE_NAME, expected=True)
 103             return True
 104
 105         login_page = self._download_webpage(
 106             self._LOGIN_URL, None,
 107             note='Downloading login page',
 108             errnote='unable to fetch login page', fatal=False)
 109         if login_page is False:
 110             return
 111
 112         login_form = self._hidden_inputs(login_page)
 113
 114         def req(url, f_req, note, errnote):
 115             data = login_form.copy()
 116             data.update({
 117                 'pstMsg': 1,
 118                 'checkConnection': 'youtube',
 119                 'checkedDomains': 'youtube',
 120                 'hl': 'en',
 121                 'deviceinfo': '[null,null,null,[],null,"US",null,null,[],"GlifWebSignIn",null,[null,null,[]]]',
 122                 'f.req': json.dumps(f_req),
 123                 'flowName': 'GlifWebSignIn',
 124                 'flowEntry': 'ServiceLogin',
 125                 # TODO: reverse actual botguard identifier generation algo
 126                 'bgRequest': '["identifier",""]',
 127             })
 128             return self._download_json(
 129                 url, None, note=note, errnote=errnote,
 130                 transform_source=lambda s: re.sub(r'^[^[]*', '', s),
 131                 fatal=False,
 132                 data=urlencode_postdata(data), headers={
 133                     'Content-Type': 'application/x-www-form-urlencoded;charset=utf-8',
 134                     'Google-Accounts-XSRF': 1,
 135                 })
 136
 137         def warn(message):
 138             self._downloader.report_warning(message)
 139
 140         lookup_req = [
 141             username,
 142             None, [], None, 'US', None, None, 2, False, True,
 143             [
 144                 None, None,
 145                 [2, 1, None, 1,
 146                  'https://accounts.google.com/ServiceLogin?passive=true&continue=https%3A%2F%2Fwww.youtube.com%2Fsignin%3Fnext%3D%252F%26action_handle_signin%3Dtrue%26hl%3Den%26app%3Ddesktop%26feature%3Dsign_in_button&hl=en&service=youtube&uilel=3&requestPath=%2FServiceLogin&Page=PasswordSeparationSignIn',
 147                  None, [], 4],
 148                 1, [None, None, []], None, None, None, True
 149             ],
 150             username,
 151         ]
 152
 153         lookup_results = req(
 154             self._LOOKUP_URL, lookup_req,
 155             'Looking up account info', 'Unable to look up account info')
 156
 157         if lookup_results is False:
 158             return False
 159
 160         user_hash = try_get(lookup_results, lambda x: x[0][2], compat_str)
 161         if not user_hash:
 162             warn('Unable to extract user hash')
 163             return False
 164
 165         challenge_req = [
 166             user_hash,
 167             None, 1, None, [1, None, None, None, [password, None, True]],
 168             [
 169                 None, None, [2, 1, None, 1, 'https://accounts.google.com/ServiceLogin?passive=true&continue=https%3A%2F%2Fwww.youtube.com%2Fsignin%3Fnext%3D%252F%26action_handle_signin%3Dtrue%26hl%3Den%26app%3Ddesktop%26feature%3Dsign_in_button&hl=en&service=youtube&uilel=3&requestPath=%2FServiceLogin&Page=PasswordSeparationSignIn', None, [], 4],
 170                 1, [None, None, []], None, None, None, True
 171             ]]
 172
 173         challenge_results = req(
 174             self._CHALLENGE_URL, challenge_req,
 175             'Logging in', 'Unable to log in')
 176
 177         if challenge_results is False:
 178             return
 179
 180         login_res = try_get(challenge_results, lambda x: x[0][5], list)
 181         if login_res:
 182             login_msg = try_get(login_res, lambda x: x[5], compat_str)
 183             warn(
 184                 'Unable to login: %s' % 'Invalid password'
 185                 if login_msg == 'INCORRECT_ANSWER_ENTERED' else login_msg)
 186             return False
 187
 188         res = try_get(challenge_results, lambda x: x[0][-1], list)
 189         if not res:
 190             warn('Unable to extract result entry')
 191             return False
 192
 193         login_challenge = try_get(res, lambda x: x[0][0], list)
 194         if login_challenge:
 195             challenge_str = try_get(login_challenge, lambda x: x[2], compat_str)
 196             if challenge_str == 'TWO_STEP_VERIFICATION':
 197                 # SEND_SUCCESS - TFA code has been successfully sent to phone
 198                 # QUOTA_EXCEEDED - reached the limit of TFA codes
 199                 status = try_get(login_challenge, lambda x: x[5], compat_str)
 200                 if status == 'QUOTA_EXCEEDED':
 201                     warn('Exceeded the limit of TFA codes, try later')
 202                     return False
 203
 204                 tl = try_get(challenge_results, lambda x: x[1][2], compat_str)
 205                 if not tl:
 206                     warn('Unable to extract TL')
 207                     return False
 208
 209                 tfa_code = self._get_tfa_info('2-step verification code')
 210
 211                 if not tfa_code:
 212                     warn(
 213                         'Two-factor authentication required. Provide it either interactively or with --twofactor <code>'
 214                         '(Note that only TOTP (Google Authenticator App) codes work at this time.)')
 215                     return False
 216
 217                 tfa_code = remove_start(tfa_code, 'G-')
 218
 219                 tfa_req = [
 220                     user_hash, None, 2, None,
 221                     [
 222                         9, None, None, None, None, None, None, None,
 223                         [None, tfa_code, True, 2]
 224                     ]]
 225
 226                 tfa_results = req(
 227                     self._TFA_URL.format(tl), tfa_req,
 228                     'Submitting TFA code', 'Unable to submit TFA code')
 229
 230                 if tfa_results is False:
 231                     return False
 232
 233                 tfa_res = try_get(tfa_results, lambda x: x[0][5], list)
 234                 if tfa_res:
 235                     tfa_msg = try_get(tfa_res, lambda x: x[5], compat_str)
 236                     warn(
 237                         'Unable to finish TFA: %s' % 'Invalid TFA code'
 238                         if tfa_msg == 'INCORRECT_ANSWER_ENTERED' else tfa_msg)
 239                     return False
 240
 241                 check_cookie_url = try_get(
 242                     tfa_results, lambda x: x[0][-1][2], compat_str)
 243             else:
 244                 CHALLENGES = {
 245                     'LOGIN_CHALLENGE': "This device isn't recognized. For your security, Google wants to make sure it's really you.",
 246                     'USERNAME_RECOVERY': 'Please provide additional information to aid in the recovery process.',
 247                     'REAUTH': "There is something unusual about your activity. For your security, Google wants to make sure it's really you.",
 248                 }
 249                 challenge = CHALLENGES.get(
 250                     challenge_str,
 251                     '%s returned error %s.' % (self.IE_NAME, challenge_str))
 252                 warn('%s\nGo to https://accounts.google.com/, login and solve a challenge.' % challenge)
 253                 return False
 254         else:
 255             check_cookie_url = try_get(res, lambda x: x[2], compat_str)
 256
 257         if not check_cookie_url:
 258             warn('Unable to extract CheckCookie URL')
 259             return False
 260
 261         check_cookie_results = self._download_webpage(
 262             check_cookie_url, None, 'Checking cookie', fatal=False)
 263
 264         if check_cookie_results is False:
 265             return False
 266
 267         if 'https://myaccount.google.com/' not in check_cookie_results:
 268             warn('Unable to log in')
 269             return False
 270
 271         return True
 272
 273     def _download_webpage_handle(self, *args, **kwargs):
 274         query = kwargs.get('query', {}).copy()
 275         query['disable_polymer'] = 'true'
 276         kwargs['query'] = query
 277         return super(YoutubeBaseInfoExtractor, self)._download_webpage_handle(
 278             *args, **compat_kwargs(kwargs))
 279
 280     def _real_initialize(self):
 281         if self._downloader is None:
 282             return
 283         self._set_language()
 284         if not self._login():
 285             return
 286
 287
 288 class YoutubeEntryListBaseInfoExtractor(YoutubeBaseInfoExtractor):
 289     # Extract entries from page with "Load more" button
 290     def _entries(self, page, playlist_id):
 291         more_widget_html = content_html = page
 292         for page_num in itertools.count(1):
 293             for entry in self._process_page(content_html):
 294                 yield entry
 295
 296             mobj = re.search(r'data-uix-load-more-href="/?(?P<more>[^"]+)"', more_widget_html)
 297             if not mobj:
 298                 break
 299
 300             count = 0
 301             retries = 3
 302             while count <= retries:
 303                 try:
 304                     # Downloading page may result in intermittent 5xx HTTP error
 305                     # that is usually worked around with a retry
 306                     more = self._download_json(
 307                         'https://www.youtube.com/%s' % mobj.group('more'), playlist_id,
 308                         'Downloading page #%s%s'
 309                         % (page_num, ' (retry #%d)' % count if count else ''),
 310                         transform_source=uppercase_escape,
 311                         headers=self._YOUTUBE_CLIENT_HEADERS)
 312                     break
 313                 except ExtractorError as e:
 314                     if isinstance(e.cause, compat_HTTPError) and e.cause.code in (500, 503):
 315                         count += 1
 316                         if count <= retries:
 317                             continue
 318                     raise
 319
 320             content_html = more['content_html']
 321             if not content_html.strip():
 322                 # Some webpages show a "Load more" button but they don't
 323                 # have more videos
 324                 break
 325             more_widget_html = more['load_more_widget_html']
 326
 327
 328 class YoutubePlaylistBaseInfoExtractor(YoutubeEntryListBaseInfoExtractor):
 329     def _process_page(self, content):
 330         for video_id, video_title in self.extract_videos_from_page(content):
 331             yield self.url_result(video_id, 'Youtube', video_id, video_title)
 332
 333     def extract_videos_from_page_impl(self, video_re, page, ids_in_page, titles_in_page):
 334         for mobj in re.finditer(video_re, page):
 335             # The link with index 0 is not the first video of the playlist (not sure if still actual)
 336             if 'index' in mobj.groupdict() and mobj.group('id') == '0':
 337                 continue
 338             video_id = mobj.group('id')
 339             video_title = unescapeHTML(
 340                 mobj.group('title')) if 'title' in mobj.groupdict() else None
 341             if video_title:
 342                 video_title = video_title.strip()
 343             if video_title == '► Play all':
 344                 video_title = None
 345             try:
 346                 idx = ids_in_page.index(video_id)
 347                 if video_title and not titles_in_page[idx]:
 348                     titles_in_page[idx] = video_title
 349             except ValueError:
 350                 ids_in_page.append(video_id)
 351                 titles_in_page.append(video_title)
 352
 353     def extract_videos_from_page(self, page):
 354         ids_in_page = []
 355         titles_in_page = []
 356         self.extract_videos_from_page_impl(
 357             self._VIDEO_RE, page, ids_in_page, titles_in_page)
 358         return zip(ids_in_page, titles_in_page)
 359
 360
 361 class YoutubePlaylistsBaseInfoExtractor(YoutubeEntryListBaseInfoExtractor):
 362     def _process_page(self, content):
 363         for playlist_id in orderedSet(re.findall(
 364                 r'<h3[^>]+class="[^"]*yt-lockup-title[^"]*"[^>]*><a[^>]+href="/?playlist\?list=([0-9A-Za-z-_]{10,})"',
 365                 content)):
 366             yield self.url_result(
 367                 'https://www.youtube.com/playlist?list=%s' % playlist_id, 'YoutubePlaylist')
 368
 369     def _real_extract(self, url):
 370         playlist_id = self._match_id(url)
 371         webpage = self._download_webpage(url, playlist_id)
 372         title = self._og_search_title(webpage, fatal=False)
 373         return self.playlist_result(self._entries(webpage, playlist_id), playlist_id, title)
 374
 375
 376 class YoutubeIE(YoutubeBaseInfoExtractor):
 377     IE_DESC = 'YouTube.com'
 378     _VALID_URL = r"""(?x)^
 379                      (
 380                          (?:https?://|//)                                    # http(s):// or protocol-independent URL
 381                          (?:(?:(?:(?:\w+\.)?[yY][oO][uU][tT][uU][bB][eE](?:-nocookie|kids)?\.com/|
 382                             (?:www\.)?deturl\.com/www\.youtube\.com/|
 383                             (?:www\.)?pwnyoutube\.com/|
 384                             (?:www\.)?hooktube\.com/|
 385                             (?:www\.)?yourepeat\.com/|
 386                             tube\.majestyc\.net/|
 387                             # Invidious instances taken from https://github.com/omarroth/invidious/wiki/Invidious-Instances
 388                             (?:(?:www|dev)\.)?invidio\.us/|
 389                             (?:(?:www|no)\.)?invidiou\.sh/|
 390                             (?:(?:www|fi|de)\.)?invidious\.snopyta\.org/|
 391                             (?:www\.)?invidious\.kabi\.tk/|
 392                             (?:www\.)?invidious\.13ad\.de/|
 393                             (?:www\.)?invidious\.mastodon\.host/|
 394                             (?:www\.)?invidious\.nixnet\.xyz/|
 395                             (?:www\.)?invidious\.drycat\.fr/|
 396                             (?:www\.)?tube\.poal\.co/|
 397                             (?:www\.)?vid\.wxzm\.sx/|
 398                             (?:www\.)?yewtu\.be/|
 399                             (?:www\.)?yt\.elukerio\.org/|
 400                             (?:www\.)?yt\.lelux\.fi/|
 401                             (?:www\.)?invidious\.ggc-project\.de/|
 402                             (?:www\.)?yt\.maisputain\.ovh/|
 403                             (?:www\.)?invidious\.13ad\.de/|
 404                             (?:www\.)?invidious\.toot\.koeln/|
 405                             (?:www\.)?invidious\.fdn\.fr/|
 406                             (?:www\.)?watch\.nettohikari\.com/|
 407                             (?:www\.)?kgg2m7yk5aybusll\.onion/|
 408                             (?:www\.)?qklhadlycap4cnod\.onion/|
 409                             (?:www\.)?axqzx4s6s54s32yentfqojs3x5i7faxza6xo3ehd4bzzsg2ii4fv2iid\.onion/|
 410                             (?:www\.)?c7hqkpkpemu6e7emz5b4vyz7idjgdvgaaa3dyimmeojqbgpea3xqjoid\.onion/|
 411                             (?:www\.)?fz253lmuao3strwbfbmx46yu7acac2jz27iwtorgmbqlkurlclmancad\.onion/|
 412                             (?:www\.)?invidious\.l4qlywnpwqsluw65ts7md3khrivpirse744un3x7mlskqauz5pyuzgqd\.onion/|
 413                             (?:www\.)?owxfohz4kjyv25fvlqilyxast7inivgiktls3th44jhk3ej3i7ya\.b32\.i2p/|
 414                             (?:www\.)?4l2dgddgsrkf2ous66i6seeyi6etzfgrue332grh2n7madpwopotugyd\.onion/|
 415                             youtube\.googleapis\.com/)                        # the various hostnames, with wildcard subdomains
 416                          (?:.*?\#/)?                                          # handle anchor (#/) redirect urls
 417                          (?:                                                  # the various things that can precede the ID:
 418                              (?:(?:v|embed|e)/(?!videoseries))                # v/ or embed/ or e/
 419                              |(?:                                             # or the v= param in all its forms
 420                                  (?:(?:watch|movie)(?:_popup)?(?:\.php)?/?)?  # preceding watch(_popup|.php) or nothing (like /?v=xxxx)
 421                                  (?:\?|\#!?)                                  # the params delimiter ? or # or #!
 422                                  (?:.*?[&;])??                                # any other preceding param (like /?s=tuff&v=xxxx or ?s=tuff&amp;v=V36LpHqtcDY)
 423                                  v=
 424                              )
 425                          ))
 426                          |(?:
 427                             youtu\.be|                                        # just youtu.be/xxxx
 428                             vid\.plus|                                        # or vid.plus/xxxx
 429                             zwearz\.com/watch|                                # or zwearz.com/watch/xxxx
 430                          )/
 431                          |(?:www\.)?cleanvideosearch\.com/media/action/yt/watch\?videoId=
 432                          )
 433                      )?                                                       # all until now is optional -> you can pass the naked ID
 434                      ([0-9A-Za-z_-]{11})                                      # here is it! the YouTube video ID
 435                      (?!.*?\blist=
 436                         (?:
 437                             %(playlist_id)s|                                  # combined list/video URLs are handled by the playlist IE
 438                             WL                                                # WL are handled by the watch later IE
 439                         )
 440                      )
 441                      (?(1).+)?                                                # if we found the ID, everything can follow
 442                      $""" % {'playlist_id': YoutubeBaseInfoExtractor._PLAYLIST_ID_RE}
 443     _NEXT_URL_RE = r'[\?&]next_url=([^&]+)'
 444     _PLAYER_INFO_RE = (
 445         r'/(?P<id>[a-zA-Z0-9_-]{8,})/player_ias\.vflset(?:/[a-zA-Z]{2,3}_[a-zA-Z]{2,3})?/base\.(?P<ext>[a-z]+)$',
 446         r'\b(?P<id>vfl[a-zA-Z0-9_-]+)\b.*?\.(?P<ext>[a-z]+)$',
 447     )
 448     _formats = {
 449         '5': {'ext': 'flv', 'width': 400, 'height': 240, 'acodec': 'mp3', 'abr': 64, 'vcodec': 'h263'},
 450         '6': {'ext': 'flv', 'width': 450, 'height': 270, 'acodec': 'mp3', 'abr': 64, 'vcodec': 'h263'},
 451         '13': {'ext': '3gp', 'acodec': 'aac', 'vcodec': 'mp4v'},
 452         '17': {'ext': '3gp', 'width': 176, 'height': 144, 'acodec': 'aac', 'abr': 24, 'vcodec': 'mp4v'},
 453         '18': {'ext': 'mp4', 'width': 640, 'height': 360, 'acodec': 'aac', 'abr': 96, 'vcodec': 'h264'},
 454         '22': {'ext': 'mp4', 'width': 1280, 'height': 720, 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264'},
 455         '34': {'ext': 'flv', 'width': 640, 'height': 360, 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264'},
 456         '35': {'ext': 'flv', 'width': 854, 'height': 480, 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264'},
 457         # itag 36 videos are either 320x180 (BaW_jenozKc) or 320x240 (__2ABJjxzNo), abr varies as well
 458         '36': {'ext': '3gp', 'width': 320, 'acodec': 'aac', 'vcodec': 'mp4v'},
 459         '37': {'ext': 'mp4', 'width': 1920, 'height': 1080, 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264'},
 460         '38': {'ext': 'mp4', 'width': 4096, 'height': 3072, 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264'},
 461         '43': {'ext': 'webm', 'width': 640, 'height': 360, 'acodec': 'vorbis', 'abr': 128, 'vcodec': 'vp8'},
 462         '44': {'ext': 'webm', 'width': 854, 'height': 480, 'acodec': 'vorbis', 'abr': 128, 'vcodec': 'vp8'},
 463         '45': {'ext': 'webm', 'width': 1280, 'height': 720, 'acodec': 'vorbis', 'abr': 192, 'vcodec': 'vp8'},
 464         '46': {'ext': 'webm', 'width': 1920, 'height': 1080, 'acodec': 'vorbis', 'abr': 192, 'vcodec': 'vp8'},
 465         '59': {'ext': 'mp4', 'width': 854, 'height': 480, 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264'},
 466         '78': {'ext': 'mp4', 'width': 854, 'height': 480, 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264'},
 467
 468
 469         # 3D videos
 470         '82': {'ext': 'mp4', 'height': 360, 'format_note': '3D', 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264', 'preference': -20},
 471         '83': {'ext': 'mp4', 'height': 480, 'format_note': '3D', 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264', 'preference': -20},
 472         '84': {'ext': 'mp4', 'height': 720, 'format_note': '3D', 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264', 'preference': -20},
 473         '85': {'ext': 'mp4', 'height': 1080, 'format_note': '3D', 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264', 'preference': -20},
 474         '100': {'ext': 'webm', 'height': 360, 'format_note': '3D', 'acodec': 'vorbis', 'abr': 128, 'vcodec': 'vp8', 'preference': -20},
 475         '101': {'ext': 'webm', 'height': 480, 'format_note': '3D', 'acodec': 'vorbis', 'abr': 192, 'vcodec': 'vp8', 'preference': -20},
 476         '102': {'ext': 'webm', 'height': 720, 'format_note': '3D', 'acodec': 'vorbis', 'abr': 192, 'vcodec': 'vp8', 'preference': -20},
 477
 478         # Apple HTTP Live Streaming
 479         '91': {'ext': 'mp4', 'height': 144, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 48, 'vcodec': 'h264', 'preference': -10},
 480         '92': {'ext': 'mp4', 'height': 240, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 48, 'vcodec': 'h264', 'preference': -10},
 481         '93': {'ext': 'mp4', 'height': 360, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264', 'preference': -10},
 482         '94': {'ext': 'mp4', 'height': 480, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264', 'preference': -10},
 483         '95': {'ext': 'mp4', 'height': 720, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 256, 'vcodec': 'h264', 'preference': -10},
 484         '96': {'ext': 'mp4', 'height': 1080, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 256, 'vcodec': 'h264', 'preference': -10},
 485         '132': {'ext': 'mp4', 'height': 240, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 48, 'vcodec': 'h264', 'preference': -10},
 486         '151': {'ext': 'mp4', 'height': 72, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 24, 'vcodec': 'h264', 'preference': -10},
 487
 488         # DASH mp4 video
 489         '133': {'ext': 'mp4', 'height': 240, 'format_note': 'DASH video', 'vcodec': 'h264'},
 490         '134': {'ext': 'mp4', 'height': 360, 'format_note': 'DASH video', 'vcodec': 'h264'},
 491         '135': {'ext': 'mp4', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'h264'},
 492         '136': {'ext': 'mp4', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'h264'},
 493         '137': {'ext': 'mp4', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'h264'},
 494         '138': {'ext': 'mp4', 'format_note': 'DASH video', 'vcodec': 'h264'},  # Height can vary (https://github.com/ytdl-org/youtube-dl/issues/4559)
 495         '160': {'ext': 'mp4', 'height': 144, 'format_note': 'DASH video', 'vcodec': 'h264'},
 496         '212': {'ext': 'mp4', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'h264'},
 497         '264': {'ext': 'mp4', 'height': 1440, 'format_note': 'DASH video', 'vcodec': 'h264'},
 498         '298': {'ext': 'mp4', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'h264', 'fps': 60},
 499         '299': {'ext': 'mp4', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'h264', 'fps': 60},
 500         '266': {'ext': 'mp4', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'h264'},
 501
 502         # Dash mp4 audio
 503         '139': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'abr': 48, 'container': 'm4a_dash'},
 504         '140': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'abr': 128, 'container': 'm4a_dash'},
 505         '141': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'abr': 256, 'container': 'm4a_dash'},
 506         '256': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'container': 'm4a_dash'},
 507         '258': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'container': 'm4a_dash'},
 508         '325': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'dtse', 'container': 'm4a_dash'},
 509         '328': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'ec-3', 'container': 'm4a_dash'},
 510
 511         # Dash webm
 512         '167': {'ext': 'webm', 'height': 360, 'width': 640, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
 513         '168': {'ext': 'webm', 'height': 480, 'width': 854, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
 514         '169': {'ext': 'webm', 'height': 720, 'width': 1280, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
 515         '170': {'ext': 'webm', 'height': 1080, 'width': 1920, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
 516         '218': {'ext': 'webm', 'height': 480, 'width': 854, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
 517         '219': {'ext': 'webm', 'height': 480, 'width': 854, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
 518         '278': {'ext': 'webm', 'height': 144, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp9'},
 519         '242': {'ext': 'webm', 'height': 240, 'format_note': 'DASH video', 'vcodec': 'vp9'},
 520         '243': {'ext': 'webm', 'height': 360, 'format_note': 'DASH video', 'vcodec': 'vp9'},
 521         '244': {'ext': 'webm', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'vp9'},
 522         '245': {'ext': 'webm', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'vp9'},
 523         '246': {'ext': 'webm', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'vp9'},
 524         '247': {'ext': 'webm', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'vp9'},
 525         '248': {'ext': 'webm', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'vp9'},
 526         '271': {'ext': 'webm', 'height': 1440, 'format_note': 'DASH video', 'vcodec': 'vp9'},
 527         # itag 272 videos are either 3840x2160 (e.g. RtoitU2A-3E) or 7680x4320 (sLprVF6d7Ug)
 528         '272': {'ext': 'webm', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'vp9'},
 529         '302': {'ext': 'webm', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'vp9', 'fps': 60},
 530         '303': {'ext': 'webm', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'vp9', 'fps': 60},
 531         '308': {'ext': 'webm', 'height': 1440, 'format_note': 'DASH video', 'vcodec': 'vp9', 'fps': 60},
 532         '313': {'ext': 'webm', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'vp9'},
 533         '315': {'ext': 'webm', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'vp9', 'fps': 60},
 534
 535         # Dash webm audio
 536         '171': {'ext': 'webm', 'acodec': 'vorbis', 'format_note': 'DASH audio', 'abr': 128},
 537         '172': {'ext': 'webm', 'acodec': 'vorbis', 'format_note': 'DASH audio', 'abr': 256},
 538
 539         # Dash webm audio with opus inside
 540         '249': {'ext': 'webm', 'format_note': 'DASH audio', 'acodec': 'opus', 'abr': 50},
 541         '250': {'ext': 'webm', 'format_note': 'DASH audio', 'acodec': 'opus', 'abr': 70},
 542         '251': {'ext': 'webm', 'format_note': 'DASH audio', 'acodec': 'opus', 'abr': 160},
 543
 544         # RTMP (unnamed)
 545         '_rtmp': {'protocol': 'rtmp'},
 546
 547         # av01 video only formats sometimes served with "unknown" codecs
 548         '394': {'acodec': 'none', 'vcodec': 'av01.0.05M.08'},
 549         '395': {'acodec': 'none', 'vcodec': 'av01.0.05M.08'},
 550         '396': {'acodec': 'none', 'vcodec': 'av01.0.05M.08'},
 551         '397': {'acodec': 'none', 'vcodec': 'av01.0.05M.08'},
 552     }
 553     _SUBTITLE_FORMATS = ('json3', 'srv1', 'srv2', 'srv3', 'ttml', 'vtt')
 554
 555     _GEO_BYPASS = False
 556
 557     IE_NAME = 'youtube'
 558     _TESTS = [
 559         {
 560             'url': 'https://www.youtube.com/watch?v=BaW_jenozKc&t=1s&end=9',
 561             'info_dict': {
 562                 'id': 'BaW_jenozKc',
 563                 'ext': 'mp4',
 564                 'title': 'youtube-dl test video "\'/\\ä↭𝕐',
 565                 'uploader': 'Philipp Hagemeister',
 566                 'uploader_id': 'phihag',
 567                 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/phihag',
 568                 'channel_id': 'UCLqxVugv74EIW3VWh2NOa3Q',
 569                 'channel_url': r're:https?://(?:www\.)?youtube\.com/channel/UCLqxVugv74EIW3VWh2NOa3Q',
 570                 'upload_date': '20121002',
 571                 'description': 'test chars:  "\'/\\ä↭𝕐\ntest URL: https://github.com/rg3/youtube-dl/issues/1892\n\nThis is a test video for youtube-dl.\n\nFor more information, contact phihag@phihag.de .',
 572                 'categories': ['Science & Technology'],
 573                 'tags': ['youtube-dl'],
 574                 'duration': 10,
 575                 'view_count': int,
 576                 'like_count': int,
 577                 'dislike_count': int,
 578                 'start_time': 1,
 579                 'end_time': 9,
 580             }
 581         },
 582         {
 583             'url': 'https://www.youtube.com/watch?v=UxxajLWwzqY',
 584             'note': 'Test generic use_cipher_signature video (#897)',
 585             'info_dict': {
 586                 'id': 'UxxajLWwzqY',
 587                 'ext': 'mp4',
 588                 'upload_date': '20120506',
 589                 'title': 'Icona Pop - I Love It (feat. Charli XCX) [OFFICIAL VIDEO]',
 590                 'alt_title': 'I Love It (feat. Charli XCX)',
 591                 'description': 'md5:19a2f98d9032b9311e686ed039564f63',
 592                 'tags': ['Icona Pop i love it', 'sweden', 'pop music', 'big beat records', 'big beat', 'charli',
 593                          'xcx', 'charli xcx', 'girls', 'hbo', 'i love it', "i don't care", 'icona', 'pop',
 594                          'iconic ep', 'iconic', 'love', 'it'],
 595                 'duration': 180,
 596                 'uploader': 'Icona Pop',
 597                 'uploader_id': 'IconaPop',
 598                 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/IconaPop',
 599                 'creator': 'Icona Pop',
 600                 'track': 'I Love It (feat. Charli XCX)',
 601                 'artist': 'Icona Pop',
 602             }
 603         },
 604         {
 605             'url': 'https://www.youtube.com/watch?v=07FYdnEawAQ',
 606             'note': 'Test VEVO video with age protection (#956)',
 607             'info_dict': {
 608                 'id': '07FYdnEawAQ',
 609                 'ext': 'mp4',
 610                 'upload_date': '20130703',
 611                 'title': 'Justin Timberlake - Tunnel Vision (Official Music Video) (Explicit)',
 612                 'alt_title': 'Tunnel Vision',
 613                 'description': 'md5:07dab3356cde4199048e4c7cd93471e1',
 614                 'duration': 419,
 615                 'uploader': 'justintimberlakeVEVO',
 616                 'uploader_id': 'justintimberlakeVEVO',
 617                 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/justintimberlakeVEVO',
 618                 'creator': 'Justin Timberlake',
 619                 'track': 'Tunnel Vision',
 620                 'artist': 'Justin Timberlake',
 621                 'age_limit': 18,
 622             }
 623         },
 624         {
 625             'url': '//www.YouTube.com/watch?v=yZIXLfi8CZQ',
 626             'note': 'Embed-only video (#1746)',
 627             'info_dict': {
 628                 'id': 'yZIXLfi8CZQ',
 629                 'ext': 'mp4',
 630                 'upload_date': '20120608',
 631                 'title': 'Principal Sexually Assaults A Teacher - Episode 117 - 8th June 2012',
 632                 'description': 'md5:09b78bd971f1e3e289601dfba15ca4f7',
 633                 'uploader': 'SET India',
 634                 'uploader_id': 'setindia',
 635                 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/setindia',
 636                 'age_limit': 18,
 637             }
 638         },
 639         {
 640             'url': 'https://www.youtube.com/watch?v=BaW_jenozKc&v=UxxajLWwzqY',
 641             'note': 'Use the first video ID in the URL',
 642             'info_dict': {
 643                 'id': 'BaW_jenozKc',
 644                 'ext': 'mp4',
 645                 'title': 'youtube-dl test video "\'/\\ä↭𝕐',
 646                 'uploader': 'Philipp Hagemeister',
 647                 'uploader_id': 'phihag',
 648                 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/phihag',
 649                 'upload_date': '20121002',
 650                 'description': 'test chars:  "\'/\\ä↭𝕐\ntest URL: https://github.com/rg3/youtube-dl/issues/1892\n\nThis is a test video for youtube-dl.\n\nFor more information, contact phihag@phihag.de .',
 651                 'categories': ['Science & Technology'],
 652                 'tags': ['youtube-dl'],
 653                 'duration': 10,
 654                 'view_count': int,
 655                 'like_count': int,
 656                 'dislike_count': int,
 657             },
 658             'params': {
 659                 'skip_download': True,
 660             },
 661         },
 662         {
 663             'url': 'https://www.youtube.com/watch?v=a9LDPn-MO4I',
 664             'note': '256k DASH audio (format 141) via DASH manifest',
 665             'info_dict': {
 666                 'id': 'a9LDPn-MO4I',
 667                 'ext': 'm4a',
 668                 'upload_date': '20121002',
 669                 'uploader_id': '8KVIDEO',
 670                 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/8KVIDEO',
 671                 'description': '',
 672                 'uploader': '8KVIDEO',
 673                 'title': 'UHDTV TEST 8K VIDEO.mp4'
 674             },
 675             'params': {
 676                 'youtube_include_dash_manifest': True,
 677                 'format': '141',
 678             },
 679             'skip': 'format 141 not served anymore',
 680         },
 681         # DASH manifest with encrypted signature
 682         {
 683             'url': 'https://www.youtube.com/watch?v=IB3lcPjvWLA',
 684             'info_dict': {
 685                 'id': 'IB3lcPjvWLA',
 686                 'ext': 'm4a',
 687                 'title': 'Afrojack, Spree Wilson - The Spark (Official Music Video) ft. Spree Wilson',
 688                 'description': 'md5:8f5e2b82460520b619ccac1f509d43bf',
 689                 'duration': 244,
 690                 'uploader': 'AfrojackVEVO',
 691                 'uploader_id': 'AfrojackVEVO',
 692                 'upload_date': '20131011',
 693             },
 694             'params': {
 695                 'youtube_include_dash_manifest': True,
 696                 'format': '141/bestaudio[ext=m4a]',
 697             },
 698         },
 699         # JS player signature function name containing $
 700         {
 701             'url': 'https://www.youtube.com/watch?v=nfWlot6h_JM',
 702             'info_dict': {
 703                 'id': 'nfWlot6h_JM',
 704                 'ext': 'm4a',
 705                 'title': 'Taylor Swift - Shake It Off',
 706                 'description': 'md5:307195cd21ff7fa352270fe884570ef0',
 707                 'duration': 242,
 708                 'uploader': 'TaylorSwiftVEVO',
 709                 'uploader_id': 'TaylorSwiftVEVO',
 710                 'upload_date': '20140818',
 711             },
 712             'params': {
 713                 'youtube_include_dash_manifest': True,
 714                 'format': '141/bestaudio[ext=m4a]',
 715             },
 716         },
 717         # Controversy video
 718         {
 719             'url': 'https://www.youtube.com/watch?v=T4XJQO3qol8',
 720             'info_dict': {
 721                 'id': 'T4XJQO3qol8',
 722                 'ext': 'mp4',
 723                 'duration': 219,
 724                 'upload_date': '20100909',
 725                 'uploader': 'Amazing Atheist',
 726                 'uploader_id': 'TheAmazingAtheist',
 727                 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/TheAmazingAtheist',
 728                 'title': 'Burning Everyone\'s Koran',
 729                 'description': 'SUBSCRIBE: http://www.youtube.com/saturninefilms\n\nEven Obama has taken a stand against freedom on this issue: http://www.huffingtonpost.com/2010/09/09/obama-gma-interview-quran_n_710282.html',
 730             }
 731         },
 732         # Normal age-gate video (No vevo, embed allowed)
 733         {
 734             'url': 'https://youtube.com/watch?v=HtVdAasjOgU',
 735             'info_dict': {
 736                 'id': 'HtVdAasjOgU',
 737                 'ext': 'mp4',
 738                 'title': 'The Witcher 3: Wild Hunt - The Sword Of Destiny Trailer',
 739                 'description': r're:(?s).{100,}About the Game\n.*?The Witcher 3: Wild Hunt.{100,}',
 740                 'duration': 142,
 741                 'uploader': 'The Witcher',
 742                 'uploader_id': 'WitcherGame',
 743                 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/WitcherGame',
 744                 'upload_date': '20140605',
 745                 'age_limit': 18,
 746             },
 747         },
 748         # Age-gate video with encrypted signature
 749         {
 750             'url': 'https://www.youtube.com/watch?v=6kLq3WMV1nU',
 751             'info_dict': {
 752                 'id': '6kLq3WMV1nU',
 753                 'ext': 'mp4',
 754                 'title': 'Dedication To My Ex (Miss That) (Lyric Video)',
 755                 'description': 'md5:33765bb339e1b47e7e72b5490139bb41',
 756                 'duration': 246,
 757                 'uploader': 'LloydVEVO',
 758                 'uploader_id': 'LloydVEVO',
 759                 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/LloydVEVO',
 760                 'upload_date': '20110629',
 761                 'age_limit': 18,
 762             },
 763         },
 764         # video_info is None (https://github.com/ytdl-org/youtube-dl/issues/4421)
 765         # YouTube Red ad is not captured for creator
 766         {
 767             'url': '__2ABJjxzNo',
 768             'info_dict': {
 769                 'id': '__2ABJjxzNo',
 770                 'ext': 'mp4',
 771                 'duration': 266,
 772                 'upload_date': '20100430',
 773                 'uploader_id': 'deadmau5',
 774                 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/deadmau5',
 775                 'creator': 'Dada Life, deadmau5',
 776                 'description': 'md5:12c56784b8032162bb936a5f76d55360',
 777                 'uploader': 'deadmau5',
 778                 'title': 'Deadmau5 - Some Chords (HD)',
 779                 'alt_title': 'This Machine Kills Some Chords',
 780             },
 781             'expected_warnings': [
 782                 'DASH manifest missing',
 783             ]
 784         },
 785         # Olympics (https://github.com/ytdl-org/youtube-dl/issues/4431)
 786         {
 787             'url': 'lqQg6PlCWgI',
 788             'info_dict': {
 789                 'id': 'lqQg6PlCWgI',
 790                 'ext': 'mp4',
 791                 'duration': 6085,
 792                 'upload_date': '20150827',
 793                 'uploader_id': 'olympic',
 794                 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/olympic',
 795                 'description': 'HO09  - Women -  GER-AUS - Hockey - 31 July 2012 - London 2012 Olympic Games',
 796                 'uploader': 'Olympic',
 797                 'title': 'Hockey - Women -  GER-AUS - London 2012 Olympic Games',
 798             },
 799             'params': {
 800                 'skip_download': 'requires avconv',
 801             }
 802         },
 803         # Non-square pixels
 804         {
 805             'url': 'https://www.youtube.com/watch?v=_b-2C3KPAM0',
 806             'info_dict': {
 807                 'id': '_b-2C3KPAM0',
 808                 'ext': 'mp4',
 809                 'stretched_ratio': 16 / 9.,
 810                 'duration': 85,
 811                 'upload_date': '20110310',
 812                 'uploader_id': 'AllenMeow',
 813                 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/AllenMeow',
 814                 'description': 'made by Wacom from Korea | 字幕&加油添醋 by TY\'s Allen | 感謝heylisa00cavey1001同學熱情提供梗及翻譯',
 815                 'uploader': '孫ᄋᄅ',
 816                 'title': '[A-made] 變態妍字幕版 太妍 我就是這樣的人',
 817             },
 818         },
 819         # url_encoded_fmt_stream_map is empty string
 820         {
 821             'url': 'qEJwOuvDf7I',
 822             'info_dict': {
 823                 'id': 'qEJwOuvDf7I',
 824                 'ext': 'webm',
 825                 'title': 'Обсуждение судебной практики по выборам 14 сентября 2014 года в Санкт-Петербурге',
 826                 'description': '',
 827                 'upload_date': '20150404',
 828                 'uploader_id': 'spbelect',
 829                 'uploader': 'Наблюдатели Петербурга',
 830             },
 831             'params': {
 832                 'skip_download': 'requires avconv',
 833             },
 834             'skip': 'This live event has ended.',
 835         },
 836         # Extraction from multiple DASH manifests (https://github.com/ytdl-org/youtube-dl/pull/6097)
 837         {
 838             'url': 'https://www.youtube.com/watch?v=FIl7x6_3R5Y',
 839             'info_dict': {
 840                 'id': 'FIl7x6_3R5Y',
 841                 'ext': 'webm',
 842                 'title': 'md5:7b81415841e02ecd4313668cde88737a',
 843                 'description': 'md5:116377fd2963b81ec4ce64b542173306',
 844                 'duration': 220,
 845                 'upload_date': '20150625',
 846                 'uploader_id': 'dorappi2000',
 847                 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/dorappi2000',
 848                 'uploader': 'dorappi2000',
 849                 'formats': 'mincount:31',
 850             },
 851             'skip': 'not actual anymore',
 852         },
 853         # DASH manifest with segment_list
 854         {
 855             'url': 'https://www.youtube.com/embed/CsmdDsKjzN8',
 856             'md5': '8ce563a1d667b599d21064e982ab9e31',
 857             'info_dict': {
 858                 'id': 'CsmdDsKjzN8',
 859                 'ext': 'mp4',
 860                 'upload_date': '20150501',  # According to '<meta itemprop="datePublished"', but in other places it's 20150510
 861                 'uploader': 'Airtek',
 862                 'description': 'Retransmisión en directo de la XVIII media maratón de Zaragoza.',
 863                 'uploader_id': 'UCzTzUmjXxxacNnL8I3m4LnQ',
 864                 'title': 'Retransmisión XVIII Media maratón Zaragoza 2015',
 865             },
 866             'params': {
 867                 'youtube_include_dash_manifest': True,
 868                 'format': '135',  # bestvideo
 869             },
 870             'skip': 'This live event has ended.',
 871         },
 872         {
 873             # Multifeed videos (multiple cameras), URL is for Main Camera
 874             'url': 'https://www.youtube.com/watch?v=jqWvoWXjCVs',
 875             'info_dict': {
 876                 'id': 'jqWvoWXjCVs',
 877                 'title': 'teamPGP: Rocket League Noob Stream',
 878                 'description': 'md5:dc7872fb300e143831327f1bae3af010',
 879             },
 880             'playlist': [{
 881                 'info_dict': {
 882                     'id': 'jqWvoWXjCVs',
 883                     'ext': 'mp4',
 884                     'title': 'teamPGP: Rocket League Noob Stream (Main Camera)',
 885                     'description': 'md5:dc7872fb300e143831327f1bae3af010',
 886                     'duration': 7335,
 887                     'upload_date': '20150721',
 888                     'uploader': 'Beer Games Beer',
 889                     'uploader_id': 'beergamesbeer',
 890                     'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/beergamesbeer',
 891                     'license': 'Standard YouTube License',
 892                 },
 893             }, {
 894                 'info_dict': {
 895                     'id': '6h8e8xoXJzg',
 896                     'ext': 'mp4',
 897                     'title': 'teamPGP: Rocket League Noob Stream (kreestuh)',
 898                     'description': 'md5:dc7872fb300e143831327f1bae3af010',
 899                     'duration': 7337,
 900                     'upload_date': '20150721',
 901                     'uploader': 'Beer Games Beer',
 902                     'uploader_id': 'beergamesbeer',
 903                     'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/beergamesbeer',
 904                     'license': 'Standard YouTube License',
 905                 },
 906             }, {
 907                 'info_dict': {
 908                     'id': 'PUOgX5z9xZw',
 909                     'ext': 'mp4',
 910                     'title': 'teamPGP: Rocket League Noob Stream (grizzle)',
 911                     'description': 'md5:dc7872fb300e143831327f1bae3af010',
 912                     'duration': 7337,
 913                     'upload_date': '20150721',
 914                     'uploader': 'Beer Games Beer',
 915                     'uploader_id': 'beergamesbeer',
 916                     'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/beergamesbeer',
 917                     'license': 'Standard YouTube License',
 918                 },
 919             }, {
 920                 'info_dict': {
 921                     'id': 'teuwxikvS5k',
 922                     'ext': 'mp4',
 923                     'title': 'teamPGP: Rocket League Noob Stream (zim)',
 924                     'description': 'md5:dc7872fb300e143831327f1bae3af010',
 925                     'duration': 7334,
 926                     'upload_date': '20150721',
 927                     'uploader': 'Beer Games Beer',
 928                     'uploader_id': 'beergamesbeer',
 929                     'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/beergamesbeer',
 930                     'license': 'Standard YouTube License',
 931                 },
 932             }],
 933             'params': {
 934                 'skip_download': True,
 935             },
 936             'skip': 'This video is not available.',
 937         },
 938         {
 939             # Multifeed video with comma in title (see https://github.com/ytdl-org/youtube-dl/issues/8536)
 940             'url': 'https://www.youtube.com/watch?v=gVfLd0zydlo',
 941             'info_dict': {
 942                 'id': 'gVfLd0zydlo',
 943                 'title': 'DevConf.cz 2016 Day 2 Workshops 1 14:00 - 15:30',
 944             },
 945             'playlist_count': 2,
 946             'skip': 'Not multifeed anymore',
 947         },
 948         {
 949             'url': 'https://vid.plus/FlRa-iH7PGw',
 950             'only_matching': True,
 951         },
 952         {
 953             'url': 'https://zwearz.com/watch/9lWxNJF-ufM/electra-woman-dyna-girl-official-trailer-grace-helbig.html',
 954             'only_matching': True,
 955         },
 956         {
 957             # Title with JS-like syntax "};" (see https://github.com/ytdl-org/youtube-dl/issues/7468)
 958             # Also tests cut-off URL expansion in video description (see
 959             # https://github.com/ytdl-org/youtube-dl/issues/1892,
 960             # https://github.com/ytdl-org/youtube-dl/issues/8164)
 961             'url': 'https://www.youtube.com/watch?v=lsguqyKfVQg',
 962             'info_dict': {
 963                 'id': 'lsguqyKfVQg',
 964                 'ext': 'mp4',
 965                 'title': '{dark walk}; Loki/AC/Dishonored; collab w/Elflover21',
 966                 'alt_title': 'Dark Walk - Position Music',
 967                 'description': 'md5:8085699c11dc3f597ce0410b0dcbb34a',
 968                 'duration': 133,
 969                 'upload_date': '20151119',
 970                 'uploader_id': 'IronSoulElf',
 971                 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/IronSoulElf',
 972                 'uploader': 'IronSoulElf',
 973                 'creator': 'Todd Haberman,  Daniel Law Heath and Aaron Kaplan',
 974                 'track': 'Dark Walk - Position Music',
 975                 'artist': 'Todd Haberman,  Daniel Law Heath and Aaron Kaplan',
 976                 'album': 'Position Music - Production Music Vol. 143 - Dark Walk',
 977             },
 978             'params': {
 979                 'skip_download': True,
 980             },
 981         },
 982         {
 983             # Tags with '};' (see https://github.com/ytdl-org/youtube-dl/issues/7468)
 984             'url': 'https://www.youtube.com/watch?v=Ms7iBXnlUO8',
 985             'only_matching': True,
 986         },
 987         {
 988             # Video with yt:stretch=17:0
 989             'url': 'https://www.youtube.com/watch?v=Q39EVAstoRM',
 990             'info_dict': {
 991                 'id': 'Q39EVAstoRM',
 992                 'ext': 'mp4',
 993                 'title': 'Clash Of Clans#14 Dicas De Ataque Para CV 4',
 994                 'description': 'md5:ee18a25c350637c8faff806845bddee9',
 995                 'upload_date': '20151107',
 996                 'uploader_id': 'UCCr7TALkRbo3EtFzETQF1LA',
 997                 'uploader': 'CH GAMER DROID',
 998             },
 999             'params': {
1000                 'skip_download': True,
1001             },
1002             'skip': 'This video does not exist.',
1003         },
1004         {
1005             # Video licensed under Creative Commons
1006             'url': 'https://www.youtube.com/watch?v=M4gD1WSo5mA',
1007             'info_dict': {
1008                 'id': 'M4gD1WSo5mA',
1009                 'ext': 'mp4',
1010                 'title': 'md5:e41008789470fc2533a3252216f1c1d1',
1011                 'description': 'md5:a677553cf0840649b731a3024aeff4cc',
1012                 'duration': 721,
1013                 'upload_date': '20150127',
1014                 'uploader_id': 'BerkmanCenter',
1015                 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/BerkmanCenter',
1016                 'uploader': 'The Berkman Klein Center for Internet & Society',
1017                 'license': 'Creative Commons Attribution license (reuse allowed)',
1018             },
1019             'params': {
1020                 'skip_download': True,
1021             },
1022         },
1023         {
1024             # Channel-like uploader_url
1025             'url': 'https://www.youtube.com/watch?v=eQcmzGIKrzg',
1026             'info_dict': {
1027                 'id': 'eQcmzGIKrzg',
1028                 'ext': 'mp4',
1029                 'title': 'Democratic Socialism and Foreign Policy | Bernie Sanders',
1030                 'description': 'md5:dda0d780d5a6e120758d1711d062a867',
1031                 'duration': 4060,
1032                 'upload_date': '20151119',
1033                 'uploader': 'Bernie Sanders',
1034                 'uploader_id': 'UCH1dpzjCEiGAt8CXkryhkZg',
1035                 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UCH1dpzjCEiGAt8CXkryhkZg',
1036                 'license': 'Creative Commons Attribution license (reuse allowed)',
1037             },
1038             'params': {
1039                 'skip_download': True,
1040             },
1041         },
1042         {
1043             'url': 'https://www.youtube.com/watch?feature=player_embedded&amp;amp;v=V36LpHqtcDY',
1044             'only_matching': True,
1045         },
1046         {
1047             # YouTube Red paid video (https://github.com/ytdl-org/youtube-dl/issues/10059)
1048             'url': 'https://www.youtube.com/watch?v=i1Ko8UG-Tdo',
1049             'only_matching': True,
1050         },
1051         {
1052             # Rental video preview
1053             'url': 'https://www.youtube.com/watch?v=yYr8q0y5Jfg',
1054             'info_dict': {
1055                 'id': 'uGpuVWrhIzE',
1056                 'ext': 'mp4',
1057                 'title': 'Piku - Trailer',
1058                 'description': 'md5:c36bd60c3fd6f1954086c083c72092eb',
1059                 'upload_date': '20150811',
1060                 'uploader': 'FlixMatrix',
1061                 'uploader_id': 'FlixMatrixKaravan',
1062                 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/FlixMatrixKaravan',
1063                 'license': 'Standard YouTube License',
1064             },
1065             'params': {
1066                 'skip_download': True,
1067             },
1068             'skip': 'This video is not available.',
1069         },
1070         {
1071             # YouTube Red video with episode data
1072             'url': 'https://www.youtube.com/watch?v=iqKdEhx-dD4',
1073             'info_dict': {
1074                 'id': 'iqKdEhx-dD4',
1075                 'ext': 'mp4',
1076                 'title': 'Isolation - Mind Field (Ep 1)',
1077                 'description': 'md5:46a29be4ceffa65b92d277b93f463c0f',
1078                 'duration': 2085,
1079                 'upload_date': '20170118',
1080                 'uploader': 'Vsauce',
1081                 'uploader_id': 'Vsauce',
1082                 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/Vsauce',
1083                 'series': 'Mind Field',
1084                 'season_number': 1,
1085                 'episode_number': 1,
1086             },
1087             'params': {
1088                 'skip_download': True,
1089             },
1090             'expected_warnings': [
1091                 'Skipping DASH manifest',
1092             ],
1093         },
1094         {
1095             # The following content has been identified by the YouTube community
1096             # as inappropriate or offensive to some audiences.
1097             'url': 'https://www.youtube.com/watch?v=6SJNVb0GnPI',
1098             'info_dict': {
1099                 'id': '6SJNVb0GnPI',
1100                 'ext': 'mp4',
1101                 'title': 'Race Differences in Intelligence',
1102                 'description': 'md5:5d161533167390427a1f8ee89a1fc6f1',
1103                 'duration': 965,
1104                 'upload_date': '20140124',
1105                 'uploader': 'New Century Foundation',
1106                 'uploader_id': 'UCEJYpZGqgUob0zVVEaLhvVg',
1107                 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UCEJYpZGqgUob0zVVEaLhvVg',
1108             },
1109             'params': {
1110                 'skip_download': True,
1111             },
1112         },
1113         {
1114             # itag 212
1115             'url': '1t24XAntNCY',
1116             'only_matching': True,
1117         },
1118         {
1119             # geo restricted to JP
1120             'url': 'sJL6WA-aGkQ',
1121             'only_matching': True,
1122         },
1123         {
1124             'url': 'https://www.youtube.com/watch?v=MuAGGZNfUkU&list=RDMM',
1125             'only_matching': True,
1126         },
1127         {
1128             'url': 'https://invidio.us/watch?v=BaW_jenozKc',
1129             'only_matching': True,
1130         },
1131         {
1132             # DRM protected
1133             'url': 'https://www.youtube.com/watch?v=s7_qI6_mIXc',
1134             'only_matching': True,
1135         },
1136         {
1137             # Video with unsupported adaptive stream type formats
1138             'url': 'https://www.youtube.com/watch?v=Z4Vy8R84T1U',
1139             'info_dict': {
1140                 'id': 'Z4Vy8R84T1U',
1141                 'ext': 'mp4',
1142                 'title': 'saman SMAN 53 Jakarta(Sancety) opening COFFEE4th at SMAN 53 Jakarta',
1143                 'description': 'md5:d41d8cd98f00b204e9800998ecf8427e',
1144                 'duration': 433,
1145                 'upload_date': '20130923',
1146                 'uploader': 'Amelia Putri Harwita',
1147                 'uploader_id': 'UCpOxM49HJxmC1qCalXyB3_Q',
1148                 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UCpOxM49HJxmC1qCalXyB3_Q',
1149                 'formats': 'maxcount:10',
1150             },
1151             'params': {
1152                 'skip_download': True,
1153                 'youtube_include_dash_manifest': False,
1154             },
1155             'skip': 'not actual anymore',
1156         },
1157         {
1158             # Youtube Music Auto-generated description
1159             'url': 'https://music.youtube.com/watch?v=MgNrAu2pzNs',
1160             'info_dict': {
1161                 'id': 'MgNrAu2pzNs',
1162                 'ext': 'mp4',
1163                 'title': 'Voyeur Girl',
1164                 'description': 'md5:7ae382a65843d6df2685993e90a8628f',
1165                 'upload_date': '20190312',
1166                 'uploader': 'Stephen - Topic',
1167                 'uploader_id': 'UC-pWHpBjdGG69N9mM2auIAA',
1168                 'artist': 'Stephen',
1169                 'track': 'Voyeur Girl',
1170                 'album': 'it\'s too much love to know my dear',
1171                 'release_date': '20190313',
1172                 'release_year': 2019,
1173             },
1174             'params': {
1175                 'skip_download': True,
1176             },
1177         },
1178         {
1179             # Youtube Music Auto-generated description
1180             # Retrieve 'artist' field from 'Artist:' in video description
1181             # when it is present on youtube music video
1182             'url': 'https://www.youtube.com/watch?v=k0jLE7tTwjY',
1183             'info_dict': {
1184                 'id': 'k0jLE7tTwjY',
1185                 'ext': 'mp4',
1186                 'title': 'Latch Feat. Sam Smith',
1187                 'description': 'md5:3cb1e8101a7c85fcba9b4fb41b951335',
1188                 'upload_date': '20150110',
1189                 'uploader': 'Various Artists - Topic',
1190                 'uploader_id': 'UCNkEcmYdjrH4RqtNgh7BZ9w',
1191                 'artist': 'Disclosure',
1192                 'track': 'Latch Feat. Sam Smith',
1193                 'album': 'Latch Featuring Sam Smith',
1194                 'release_date': '20121008',
1195                 'release_year': 2012,
1196             },
1197             'params': {
1198                 'skip_download': True,
1199             },
1200         },
1201         {
1202             # Youtube Music Auto-generated description
1203             # handle multiple artists on youtube music video
1204             'url': 'https://www.youtube.com/watch?v=74qn0eJSjpA',
1205             'info_dict': {
1206                 'id': '74qn0eJSjpA',
1207                 'ext': 'mp4',
1208                 'title': 'Eastside',
1209                 'description': 'md5:290516bb73dcbfab0dcc4efe6c3de5f2',
1210                 'upload_date': '20180710',
1211                 'uploader': 'Benny Blanco - Topic',
1212                 'uploader_id': 'UCzqz_ksRu_WkIzmivMdIS7A',
1213                 'artist': 'benny blanco, Halsey, Khalid',
1214                 'track': 'Eastside',
1215                 'album': 'Eastside',
1216                 'release_date': '20180713',
1217                 'release_year': 2018,
1218             },
1219             'params': {
1220                 'skip_download': True,
1221             },
1222         },
1223         {
1224             # Youtube Music Auto-generated description
1225             # handle youtube music video with release_year and no release_date
1226             'url': 'https://www.youtube.com/watch?v=-hcAI0g-f5M',
1227             'info_dict': {
1228                 'id': '-hcAI0g-f5M',
1229                 'ext': 'mp4',
1230                 'title': 'Put It On Me',
1231                 'description': 'md5:f6422397c07c4c907c6638e1fee380a5',
1232                 'upload_date': '20180426',
1233                 'uploader': 'Matt Maeson - Topic',
1234                 'uploader_id': 'UCnEkIGqtGcQMLk73Kp-Q5LQ',
1235                 'artist': 'Matt Maeson',
1236                 'track': 'Put It On Me',
1237                 'album': 'The Hearse',
1238                 'release_date': None,
1239                 'release_year': 2018,
1240             },
1241             'params': {
1242                 'skip_download': True,
1243             },
1244         },
1245         {
1246             'url': 'https://www.youtubekids.com/watch?v=3b8nCWDgZ6Q',
1247             'only_matching': True,
1248         },
1249         {
1250             # invalid -> valid video id redirection
1251             'url': 'DJztXj2GPfl',
1252             'info_dict': {
1253                 'id': 'DJztXj2GPfk',
1254                 'ext': 'mp4',
1255                 'title': 'Panjabi MC - Mundian To Bach Ke (The Dictator Soundtrack)',
1256                 'description': 'md5:bf577a41da97918e94fa9798d9228825',
1257                 'upload_date': '20090125',
1258                 'uploader': 'Prochorowka',
1259                 'uploader_id': 'Prochorowka',
1260                 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/Prochorowka',
1261                 'artist': 'Panjabi MC',
1262                 'track': 'Beware of the Boys (Mundian to Bach Ke) - Motivo Hi-Lectro Remix',
1263                 'album': 'Beware of the Boys (Mundian To Bach Ke)',
1264             },
1265             'params': {
1266                 'skip_download': True,
1267             },
1268         },
1269         {
1270             # empty description results in an empty string
1271             'url': 'https://www.youtube.com/watch?v=x41yOUIvK2k',
1272             'info_dict': {
1273                 'id': 'x41yOUIvK2k',
1274                 'ext': 'mp4',
1275                 'title': 'IMG 3456',
1276                 'description': '',
1277                 'upload_date': '20170613',
1278                 'uploader_id': 'ElevageOrVert',
1279                 'uploader': 'ElevageOrVert',
1280             },
1281             'params': {
1282                 'skip_download': True,
1283             },
1284         },
1285     ]
1286
1287     def __init__(self, *args, **kwargs):
1288         super(YoutubeIE, self).__init__(*args, **kwargs)
1289         self._player_cache = {}
1290
1291     def report_video_info_webpage_download(self, video_id):
1292         """Report attempt to download video info webpage."""
1293         self.to_screen('%s: Downloading video info webpage' % video_id)
1294
1295     def report_information_extraction(self, video_id):
1296         """Report attempt to extract video information."""
1297         self.to_screen('%s: Extracting video information' % video_id)
1298
1299     def report_unavailable_format(self, video_id, format):
1300         """Report extracted video URL."""
1301         self.to_screen('%s: Format %s not available' % (video_id, format))
1302
1303     def report_rtmp_download(self):
1304         """Indicate the download will use the RTMP protocol."""
1305         self.to_screen('RTMP download detected')
1306
1307     def _signature_cache_id(self, example_sig):
1308         """ Return a string representation of a signature """
1309         return '.'.join(compat_str(len(part)) for part in example_sig.split('.'))
1310
1311     @classmethod
1312     def _extract_player_info(cls, player_url):
1313         for player_re in cls._PLAYER_INFO_RE:
1314             id_m = re.search(player_re, player_url)
1315             if id_m:
1316                 break
1317         else:
1318             raise ExtractorError('Cannot identify player %r' % player_url)
1319         return id_m.group('ext'), id_m.group('id')
1320
1321     def _extract_signature_function(self, video_id, player_url, example_sig):
1322         player_type, player_id = self._extract_player_info(player_url)
1323
1324         # Read from filesystem cache
1325         func_id = '%s_%s_%s' % (
1326             player_type, player_id, self._signature_cache_id(example_sig))
1327         assert os.path.basename(func_id) == func_id
1328
1329         cache_spec = self._downloader.cache.load('youtube-sigfuncs', func_id)
1330         if cache_spec is not None:
1331             return lambda s: ''.join(s[i] for i in cache_spec)
1332
1333         download_note = (
1334             'Downloading player %s' % player_url
1335             if self._downloader.params.get('verbose') else
1336             'Downloading %s player %s' % (player_type, player_id)
1337         )
1338         if player_type == 'js':
1339             code = self._download_webpage(
1340                 player_url, video_id,
1341                 note=download_note,
1342                 errnote='Download of %s failed' % player_url)
1343             res = self._parse_sig_js(code)
1344         elif player_type == 'swf':
1345             urlh = self._request_webpage(
1346                 player_url, video_id,
1347                 note=download_note,
1348                 errnote='Download of %s failed' % player_url)
1349             code = urlh.read()
1350             res = self._parse_sig_swf(code)
1351         else:
1352             assert False, 'Invalid player type %r' % player_type
1353
1354         test_string = ''.join(map(compat_chr, range(len(example_sig))))
1355         cache_res = res(test_string)
1356         cache_spec = [ord(c) for c in cache_res]
1357
1358         self._downloader.cache.store('youtube-sigfuncs', func_id, cache_spec)
1359         return res
1360
1361     def _print_sig_code(self, func, example_sig):
1362         def gen_sig_code(idxs):
1363             def _genslice(start, end, step):
1364                 starts = '' if start == 0 else str(start)
1365                 ends = (':%d' % (end + step)) if end + step >= 0 else ':'
1366                 steps = '' if step == 1 else (':%d' % step)
1367                 return 's[%s%s%s]' % (starts, ends, steps)
1368
1369             step = None
1370             # Quelch pyflakes warnings - start will be set when step is set
1371             start = '(Never used)'
1372             for i, prev in zip(idxs[1:], idxs[:-1]):
1373                 if step is not None:
1374                     if i - prev == step:
1375                         continue
1376                     yield _genslice(start, prev, step)
1377                     step = None
1378                     continue
1379                 if i - prev in [-1, 1]:
1380                     step = i - prev
1381                     start = prev
1382                     continue
1383                 else:
1384                     yield 's[%d]' % prev
1385             if step is None:
1386                 yield 's[%d]' % i
1387             else:
1388                 yield _genslice(start, i, step)
1389
1390         test_string = ''.join(map(compat_chr, range(len(example_sig))))
1391         cache_res = func(test_string)
1392         cache_spec = [ord(c) for c in cache_res]
1393         expr_code = ' + '.join(gen_sig_code(cache_spec))
1394         signature_id_tuple = '(%s)' % (
1395             ', '.join(compat_str(len(p)) for p in example_sig.split('.')))
1396         code = ('if tuple(len(p) for p in s.split(\'.\')) == %s:\n'
1397                 '    return %s\n') % (signature_id_tuple, expr_code)
1398         self.to_screen('Extracted signature function:\n' + code)
1399
1400     def _parse_sig_js(self, jscode):
1401         funcname = self._search_regex(
1402             (r'\b[cs]\s*&&\s*[adf]\.set\([^,]+\s*,\s*encodeURIComponent\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\(',
1403              r'\b[a-zA-Z0-9]+\s*&&\s*[a-zA-Z0-9]+\.set\([^,]+\s*,\s*encodeURIComponent\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\(',
1404              r'(?:\b|[^a-zA-Z0-9$])(?P<sig>[a-zA-Z0-9$]{2})\s*=\s*function\(\s*a\s*\)\s*{\s*a\s*=\s*a\.split\(\s*""\s*\)',
1405              r'(?P<sig>[a-zA-Z0-9$]+)\s*=\s*function\(\s*a\s*\)\s*{\s*a\s*=\s*a\.split\(\s*""\s*\)',
1406              # Obsolete patterns
1407              r'(["\'])signature\1\s*,\s*(?P<sig>[a-zA-Z0-9$]+)\(',
1408              r'\.sig\|\|(?P<sig>[a-zA-Z0-9$]+)\(',
1409              r'yt\.akamaized\.net/\)\s*\|\|\s*.*?\s*[cs]\s*&&\s*[adf]\.set\([^,]+\s*,\s*(?:encodeURIComponent\s*\()?\s*(?P<sig>[a-zA-Z0-9$]+)\(',
1410              r'\b[cs]\s*&&\s*[adf]\.set\([^,]+\s*,\s*(?P<sig>[a-zA-Z0-9$]+)\(',
1411              r'\b[a-zA-Z0-9]+\s*&&\s*[a-zA-Z0-9]+\.set\([^,]+\s*,\s*(?P<sig>[a-zA-Z0-9$]+)\(',
1412              r'\bc\s*&&\s*a\.set\([^,]+\s*,\s*\([^)]*\)\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\(',
1413              r'\bc\s*&&\s*[a-zA-Z0-9]+\.set\([^,]+\s*,\s*\([^)]*\)\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\(',
1414              r'\bc\s*&&\s*[a-zA-Z0-9]+\.set\([^,]+\s*,\s*\([^)]*\)\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\('),
1415             jscode, 'Initial JS player signature function name', group='sig')
1416
1417         jsi = JSInterpreter(jscode)
1418         initial_function = jsi.extract_function(funcname)
1419         return lambda s: initial_function([s])
1420
1421     def _parse_sig_swf(self, file_contents):
1422         swfi = SWFInterpreter(file_contents)
1423         TARGET_CLASSNAME = 'SignatureDecipher'
1424         searched_class = swfi.extract_class(TARGET_CLASSNAME)
1425         initial_function = swfi.extract_function(searched_class, 'decipher')
1426         return lambda s: initial_function([s])
1427
1428     def _decrypt_signature(self, s, video_id, player_url, age_gate=False):
1429         """Turn the encrypted s field into a working signature"""
1430
1431         if player_url is None:
1432             raise ExtractorError('Cannot decrypt signature without player_url')
1433
1434         if player_url.startswith('//'):
1435             player_url = 'https:' + player_url
1436         elif not re.match(r'https?://', player_url):
1437             player_url = compat_urlparse.urljoin(
1438                 'https://www.youtube.com', player_url)
1439         try:
1440             player_id = (player_url, self._signature_cache_id(s))
1441             if player_id not in self._player_cache:
1442                 func = self._extract_signature_function(
1443                     video_id, player_url, s
1444                 )
1445                 self._player_cache[player_id] = func
1446             func = self._player_cache[player_id]
1447             if self._downloader.params.get('youtube_print_sig_code'):
1448                 self._print_sig_code(func, s)
1449             return func(s)
1450         except Exception as e:
1451             tb = traceback.format_exc()
1452             raise ExtractorError(
1453                 'Signature extraction failed: ' + tb, cause=e)
1454
1455     def _get_subtitles(self, video_id, webpage, has_live_chat_replay):
1456         try:
1457             subs_doc = self._download_xml(
1458                 'https://video.google.com/timedtext?hl=en&type=list&v=%s' % video_id,
1459                 video_id, note=False)
1460         except ExtractorError as err:
1461             self._downloader.report_warning('unable to download video subtitles: %s' % error_to_compat_str(err))
1462             return {}
1463
1464         sub_lang_list = {}
1465         for track in subs_doc.findall('track'):
1466             lang = track.attrib['lang_code']
1467             if lang in sub_lang_list:
1468                 continue
1469             sub_formats = []
1470             for ext in self._SUBTITLE_FORMATS:
1471                 params = compat_urllib_parse_urlencode({
1472                     'lang': lang,
1473                     'v': video_id,
1474                     'fmt': ext,
1475                     'name': track.attrib['name'].encode('utf-8'),
1476                 })
1477                 sub_formats.append({
1478                     'url': 'https://www.youtube.com/api/timedtext?' + params,
1479                     'ext': ext,
1480                 })
1481             sub_lang_list[lang] = sub_formats
1482         if has_live_chat_replay:
1483             sub_lang_list['live_chat'] = [
1484                 {
1485                     'video_id': video_id,
1486                     'ext': 'json',
1487                     'protocol': 'youtube_live_chat_replay',
1488                 },
1489             ]
1490         if not sub_lang_list:
1491             self._downloader.report_warning('video doesn\'t have subtitles')
1492             return {}
1493         return sub_lang_list
1494
1495     def _get_ytplayer_config(self, video_id, webpage):
1496         patterns = (
1497             # User data may contain arbitrary character sequences that may affect
1498             # JSON extraction with regex, e.g. when '};' is contained the second
1499             # regex won't capture the whole JSON. Yet working around by trying more
1500             # concrete regex first keeping in mind proper quoted string handling
1501             # to be implemented in future that will replace this workaround (see
1502             # https://github.com/ytdl-org/youtube-dl/issues/7468,
1503             # https://github.com/ytdl-org/youtube-dl/pull/7599)
1504             r';ytplayer\.config\s*=\s*({.+?});ytplayer',
1505             r';ytplayer\.config\s*=\s*({.+?});',
1506         )
1507         config = self._search_regex(
1508             patterns, webpage, 'ytplayer.config', default=None)
1509         if config:
1510             return self._parse_json(
1511                 uppercase_escape(config), video_id, fatal=False)
1512
1513     def _get_yt_initial_data(self, video_id, webpage):
1514         config = self._search_regex(
1515             (r'window\["ytInitialData"\]\s*=\s*(.*?)(?<=});',
1516              r'var\s+ytInitialData\s*=\s*(.*?)(?<=});'),
1517             webpage, 'ytInitialData', default=None)
1518         if config:
1519             return self._parse_json(
1520                 uppercase_escape(config), video_id, fatal=False)
1521
1522     def _get_automatic_captions(self, video_id, webpage):
1523         """We need the webpage for getting the captions url, pass it as an
1524            argument to speed up the process."""
1525         self.to_screen('%s: Looking for automatic captions' % video_id)
1526         player_config = self._get_ytplayer_config(video_id, webpage)
1527         err_msg = 'Couldn\'t find automatic captions for %s' % video_id
1528         if not player_config:
1529             self._downloader.report_warning(err_msg)
1530             return {}
1531         try:
1532             args = player_config['args']
1533             caption_url = args.get('ttsurl')
1534             if caption_url:
1535                 timestamp = args['timestamp']
1536                 # We get the available subtitles
1537                 list_params = compat_urllib_parse_urlencode({
1538                     'type': 'list',
1539                     'tlangs': 1,
1540                     'asrs': 1,
1541                 })
1542                 list_url = caption_url + '&' + list_params
1543                 caption_list = self._download_xml(list_url, video_id)
1544                 original_lang_node = caption_list.find('track')
1545                 if original_lang_node is None:
1546                     self._downloader.report_warning('Video doesn\'t have automatic captions')
1547                     return {}
1548                 original_lang = original_lang_node.attrib['lang_code']
1549                 caption_kind = original_lang_node.attrib.get('kind', '')
1550
1551                 sub_lang_list = {}
1552                 for lang_node in caption_list.findall('target'):
1553                     sub_lang = lang_node.attrib['lang_code']
1554                     sub_formats = []
1555                     for ext in self._SUBTITLE_FORMATS:
1556                         params = compat_urllib_parse_urlencode({
1557                             'lang': original_lang,
1558                             'tlang': sub_lang,
1559                             'fmt': ext,
1560                             'ts': timestamp,
1561                             'kind': caption_kind,
1562                         })
1563                         sub_formats.append({
1564                             'url': caption_url + '&' + params,
1565                             'ext': ext,
1566                         })
1567                     sub_lang_list[sub_lang] = sub_formats
1568                 return sub_lang_list
1569
1570             def make_captions(sub_url, sub_langs):
1571                 parsed_sub_url = compat_urllib_parse_urlparse(sub_url)
1572                 caption_qs = compat_parse_qs(parsed_sub_url.query)
1573                 captions = {}
1574                 for sub_lang in sub_langs:
1575                     sub_formats = []
1576                     for ext in self._SUBTITLE_FORMATS:
1577                         caption_qs.update({
1578                             'tlang': [sub_lang],
1579                             'fmt': [ext],
1580                         })
1581                         sub_url = compat_urlparse.urlunparse(parsed_sub_url._replace(
1582                             query=compat_urllib_parse_urlencode(caption_qs, True)))
1583                         sub_formats.append({
1584                             'url': sub_url,
1585                             'ext': ext,
1586                         })
1587                     captions[sub_lang] = sub_formats
1588                 return captions
1589
1590             # New captions format as of 22.06.2017
1591             player_response = args.get('player_response')
1592             if player_response and isinstance(player_response, compat_str):
1593                 player_response = self._parse_json(
1594                     player_response, video_id, fatal=False)
1595                 if player_response:
1596                     renderer = player_response['captions']['playerCaptionsTracklistRenderer']
1597                     caption_tracks = renderer['captionTracks']
1598                     for caption_track in caption_tracks:
1599                         if 'kind' not in caption_track:
1600                             # not an automatic transcription
1601                             continue
1602                         base_url = caption_track['baseUrl']
1603                         sub_lang_list = []
1604                         for lang in renderer['translationLanguages']:
1605                             lang_code = lang.get('languageCode')
1606                             if lang_code:
1607                                 sub_lang_list.append(lang_code)
1608                         return make_captions(base_url, sub_lang_list)
1609
1610                     self._downloader.report_warning("Couldn't find automatic captions for %s" % video_id)
1611                     return {}
1612             # Some videos don't provide ttsurl but rather caption_tracks and
1613             # caption_translation_languages (e.g. 20LmZk1hakA)
1614             # Does not used anymore as of 22.06.2017
1615             caption_tracks = args['caption_tracks']
1616             caption_translation_languages = args['caption_translation_languages']
1617             caption_url = compat_parse_qs(caption_tracks.split(',')[0])['u'][0]
1618             sub_lang_list = []
1619             for lang in caption_translation_languages.split(','):
1620                 lang_qs = compat_parse_qs(compat_urllib_parse_unquote_plus(lang))
1621                 sub_lang = lang_qs.get('lc', [None])[0]
1622                 if sub_lang:
1623                     sub_lang_list.append(sub_lang)
1624             return make_captions(caption_url, sub_lang_list)
1625         # An extractor error can be raise by the download process if there are
1626         # no automatic captions but there are subtitles
1627         except (KeyError, IndexError, ExtractorError):
1628             self._downloader.report_warning(err_msg)
1629             return {}
1630
1631     def _mark_watched(self, video_id, video_info, player_response):
1632         playback_url = url_or_none(try_get(
1633             player_response,
1634             lambda x: x['playbackTracking']['videostatsPlaybackUrl']['baseUrl']) or try_get(
1635             video_info, lambda x: x['videostats_playback_base_url'][0]))
1636         if not playback_url:
1637             return
1638         parsed_playback_url = compat_urlparse.urlparse(playback_url)
1639         qs = compat_urlparse.parse_qs(parsed_playback_url.query)
1640
1641         # cpn generation algorithm is reverse engineered from base.js.
1642         # In fact it works even with dummy cpn.
1643         CPN_ALPHABET = 'abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789-_'
1644         cpn = ''.join((CPN_ALPHABET[random.randint(0, 256) & 63] for _ in range(0, 16)))
1645
1646         qs.update({
1647             'ver': ['2'],
1648             'cpn': [cpn],
1649         })
1650         playback_url = compat_urlparse.urlunparse(
1651             parsed_playback_url._replace(query=compat_urllib_parse_urlencode(qs, True)))
1652
1653         self._download_webpage(
1654             playback_url, video_id, 'Marking watched',
1655             'Unable to mark watched', fatal=False)
1656
1657     @staticmethod
1658     def _extract_urls(webpage):
1659         # Embedded YouTube player
1660         entries = [
1661             unescapeHTML(mobj.group('url'))
1662             for mobj in re.finditer(r'''(?x)
1663             (?:
1664                 <iframe[^>]+?src=|
1665                 data-video-url=|
1666                 <embed[^>]+?src=|
1667                 embedSWF\(?:\s*|
1668                 <object[^>]+data=|
1669                 new\s+SWFObject\(
1670             )
1671             (["\'])
1672                 (?P<url>(?:https?:)?//(?:www\.)?youtube(?:-nocookie)?\.com/
1673                 (?:embed|v|p)/[0-9A-Za-z_-]{11}.*?)
1674             \1''', webpage)]
1675
1676         # lazyYT YouTube embed
1677         entries.extend(list(map(
1678             unescapeHTML,
1679             re.findall(r'class="lazyYT" data-youtube-id="([^"]+)"', webpage))))
1680
1681         # Wordpress "YouTube Video Importer" plugin
1682         matches = re.findall(r'''(?x)<div[^>]+
1683             class=(?P<q1>[\'"])[^\'"]*\byvii_single_video_player\b[^\'"]*(?P=q1)[^>]+
1684             data-video_id=(?P<q2>[\'"])([^\'"]+)(?P=q2)''', webpage)
1685         entries.extend(m[-1] for m in matches)
1686
1687         return entries
1688
1689     @staticmethod
1690     def _extract_url(webpage):
1691         urls = YoutubeIE._extract_urls(webpage)
1692         return urls[0] if urls else None
1693
1694     @classmethod
1695     def extract_id(cls, url):
1696         mobj = re.match(cls._VALID_URL, url, re.VERBOSE)
1697         if mobj is None:
1698             raise ExtractorError('Invalid URL: %s' % url)
1699         video_id = mobj.group(2)
1700         return video_id
1701
1702     def _extract_chapters_from_json(self, webpage, video_id, duration):
1703         if not webpage:
1704             return
1705         initial_data = self._parse_json(
1706             self._search_regex(
1707                 r'window\["ytInitialData"\] = (.+);\n', webpage,
1708                 'player args', default='{}'),
1709             video_id, fatal=False)
1710         if not initial_data or not isinstance(initial_data, dict):
1711             return
1712         chapters_list = try_get(
1713             initial_data,
1714             lambda x: x['playerOverlays']
1715                        ['playerOverlayRenderer']
1716                        ['decoratedPlayerBarRenderer']
1717                        ['decoratedPlayerBarRenderer']
1718                        ['playerBar']
1719                        ['chapteredPlayerBarRenderer']
1720                        ['chapters'],
1721             list)
1722         if not chapters_list:
1723             return
1724
1725         def chapter_time(chapter):
1726             return float_or_none(
1727                 try_get(
1728                     chapter,
1729                     lambda x: x['chapterRenderer']['timeRangeStartMillis'],
1730                     int),
1731                 scale=1000)
1732         chapters = []
1733         for next_num, chapter in enumerate(chapters_list, start=1):
1734             start_time = chapter_time(chapter)
1735             if start_time is None:
1736                 continue
1737             end_time = (chapter_time(chapters_list[next_num])
1738                         if next_num < len(chapters_list) else duration)
1739             if end_time is None:
1740                 continue
1741             title = try_get(
1742                 chapter, lambda x: x['chapterRenderer']['title']['simpleText'],
1743                 compat_str)
1744             chapters.append({
1745                 'start_time': start_time,
1746                 'end_time': end_time,
1747                 'title': title,
1748             })
1749         return chapters
1750
1751     @staticmethod
1752     def _extract_chapters_from_description(description, duration):
1753         if not description:
1754             return None
1755         chapter_lines = re.findall(
1756             r'(?:^|<br\s*/>)([^<]*<a[^>]+onclick=["\']yt\.www\.watch\.player\.seekTo[^>]+>(\d{1,2}:\d{1,2}(?::\d{1,2})?)</a>[^>]*)(?=$|<br\s*/>)',
1757             description)
1758         if not chapter_lines:
1759             return None
1760         chapters = []
1761         for next_num, (chapter_line, time_point) in enumerate(
1762                 chapter_lines, start=1):
1763             start_time = parse_duration(time_point)
1764             if start_time is None:
1765                 continue
1766             if start_time > duration:
1767                 break
1768             end_time = (duration if next_num == len(chapter_lines)
1769                         else parse_duration(chapter_lines[next_num][1]))
1770             if end_time is None:
1771                 continue
1772             if end_time > duration:
1773                 end_time = duration
1774             if start_time > end_time:
1775                 break
1776             chapter_title = re.sub(
1777                 r'<a[^>]+>[^<]+</a>', '', chapter_line).strip(' \t-')
1778             chapter_title = re.sub(r'\s+', ' ', chapter_title)
1779             chapters.append({
1780                 'start_time': start_time,
1781                 'end_time': end_time,
1782                 'title': chapter_title,
1783             })
1784         return chapters
1785
1786     def _extract_chapters(self, webpage, description, video_id, duration):
1787         return (self._extract_chapters_from_json(webpage, video_id, duration)
1788                 or self._extract_chapters_from_description(description, duration))
1789
1790     def _real_extract(self, url):
1791         url, smuggled_data = unsmuggle_url(url, {})
1792
1793         proto = (
1794             'http' if self._downloader.params.get('prefer_insecure', False)
1795             else 'https')
1796
1797         start_time = None
1798         end_time = None
1799         parsed_url = compat_urllib_parse_urlparse(url)
1800         for component in [parsed_url.fragment, parsed_url.query]:
1801             query = compat_parse_qs(component)
1802             if start_time is None and 't' in query:
1803                 start_time = parse_duration(query['t'][0])
1804             if start_time is None and 'start' in query:
1805                 start_time = parse_duration(query['start'][0])
1806             if end_time is None and 'end' in query:
1807                 end_time = parse_duration(query['end'][0])
1808
1809         # Extract original video URL from URL with redirection, like age verification, using next_url parameter
1810         mobj = re.search(self._NEXT_URL_RE, url)
1811         if mobj:
1812             url = proto + '://www.youtube.com/' + compat_urllib_parse_unquote(mobj.group(1)).lstrip('/')
1813         video_id = self.extract_id(url)
1814
1815         # Get video webpage
1816         url = proto + '://www.youtube.com/watch?v=%s&gl=US&hl=en&has_verified=1&bpctr=9999999999' % video_id
1817         video_webpage, urlh = self._download_webpage_handle(url, video_id)
1818
1819         qs = compat_parse_qs(compat_urllib_parse_urlparse(urlh.geturl()).query)
1820         video_id = qs.get('v', [None])[0] or video_id
1821
1822         # Attempt to extract SWF player URL
1823         mobj = re.search(r'swfConfig.*?"(https?:\\/\\/.*?watch.*?-.*?\.swf)"', video_webpage)
1824         if mobj is not None:
1825             player_url = re.sub(r'\\(.)', r'\1', mobj.group(1))
1826         else:
1827             player_url = None
1828
1829         dash_mpds = []
1830
1831         def add_dash_mpd(video_info):
1832             dash_mpd = video_info.get('dashmpd')
1833             if dash_mpd and dash_mpd[0] not in dash_mpds:
1834                 dash_mpds.append(dash_mpd[0])
1835
1836         def add_dash_mpd_pr(pl_response):
1837             dash_mpd = url_or_none(try_get(
1838                 pl_response, lambda x: x['streamingData']['dashManifestUrl'],
1839                 compat_str))
1840             if dash_mpd and dash_mpd not in dash_mpds:
1841                 dash_mpds.append(dash_mpd)
1842
1843         is_live = None
1844         view_count = None
1845
1846         def extract_view_count(v_info):
1847             return int_or_none(try_get(v_info, lambda x: x['view_count'][0]))
1848
1849         def extract_player_response(player_response, video_id):
1850             pl_response = str_or_none(player_response)
1851             if not pl_response:
1852                 return
1853             pl_response = self._parse_json(pl_response, video_id, fatal=False)
1854             if isinstance(pl_response, dict):
1855                 add_dash_mpd_pr(pl_response)
1856                 return pl_response
1857
1858         player_response = {}
1859
1860         # Get video info
1861         video_info = {}
1862         embed_webpage = None
1863         if (self._og_search_property('restrictions:age', video_webpage, default=None) == '18+'
1864                 or re.search(r'player-age-gate-content">', video_webpage) is not None):
1865             cookie_keys = self._get_cookies('https://www.youtube.com').keys()
1866             age_gate = True
1867             # We simulate the access to the video from www.youtube.com/v/{video_id}
1868             # this can be viewed without login into Youtube
1869             url = proto + '://www.youtube.com/embed/%s' % video_id
1870             embed_webpage = self._download_webpage(url, video_id, 'Downloading embed webpage')
1871             # check if video is only playable on youtube - if so it requires auth (cookies)
1872             if re.search(r'player-unavailable">', embed_webpage) is not None:
1873                 '''
1874                 # TODO apply this patch when Support for Python 2.6(!) and above drops
1875                 if ({'VISITOR_INFO1_LIVE', 'HSID', 'SSID', 'SID'} <= cookie_keys
1876                         or {'VISITOR_INFO1_LIVE', '__Secure-3PSID', 'LOGIN_INFO'} <= cookie_keys):
1877                 '''
1878                 if (set(('VISITOR_INFO1_LIVE', 'HSID', 'SSID', 'SID')) <= set(cookie_keys)
1879                         or set(('VISITOR_INFO1_LIVE', '__Secure-3PSID', 'LOGIN_INFO')) <= set(cookie_keys)):
1880                     age_gate = False
1881                     # Try looking directly into the video webpage
1882                     ytplayer_config = self._get_ytplayer_config(video_id, video_webpage)
1883                     if ytplayer_config:
1884                         args = ytplayer_config['args']
1885                         if args.get('url_encoded_fmt_stream_map') or args.get('hlsvp'):
1886                             # Convert to the same format returned by compat_parse_qs
1887                             video_info = dict((k, [v]) for k, v in args.items())
1888                             add_dash_mpd(video_info)
1889                         # Rental video is not rented but preview is available (e.g.
1890                         # https://www.youtube.com/watch?v=yYr8q0y5Jfg,
1891                         # https://github.com/ytdl-org/youtube-dl/issues/10532)
1892                         if not video_info and args.get('ypc_vid'):
1893                             return self.url_result(
1894                                 args['ypc_vid'], YoutubeIE.ie_key(), video_id=args['ypc_vid'])
1895                         if args.get('livestream') == '1' or args.get('live_playback') == 1:
1896                             is_live = True
1897                         if not player_response:
1898                             player_response = extract_player_response(args.get('player_response'), video_id)
1899                     if not video_info or self._downloader.params.get('youtube_include_dash_manifest', True):
1900                         add_dash_mpd_pr(player_response)
1901                 else:
1902                     raise ExtractorError('Video is age restricted and only playable on Youtube. Requires cookies!', expected=True)
1903             else:
1904                 data = compat_urllib_parse_urlencode({
1905                     'video_id': video_id,
1906                     'eurl': 'https://youtube.googleapis.com/v/' + video_id,
1907                     'sts': self._search_regex(
1908                         r'"sts"\s*:\s*(\d+)', embed_webpage, 'sts', default=''),
1909                 })
1910                 video_info_url = proto + '://www.youtube.com/get_video_info?' + data
1911                 try:
1912                     video_info_webpage = self._download_webpage(
1913                         video_info_url, video_id,
1914                         note='Refetching age-gated info webpage',
1915                         errnote='unable to download video info webpage')
1916                 except ExtractorError:
1917                     video_info_webpage = None
1918                 if video_info_webpage:
1919                     video_info = compat_parse_qs(video_info_webpage)
1920                     pl_response = video_info.get('player_response', [None])[0]
1921                     player_response = extract_player_response(pl_response, video_id)
1922                     add_dash_mpd(video_info)
1923                     view_count = extract_view_count(video_info)
1924         else:
1925             age_gate = False
1926             # Try looking directly into the video webpage
1927             ytplayer_config = self._get_ytplayer_config(video_id, video_webpage)
1928             if ytplayer_config:
1929                 args = ytplayer_config['args']
1930                 if args.get('url_encoded_fmt_stream_map') or args.get('hlsvp'):
1931                     # Convert to the same format returned by compat_parse_qs
1932                     video_info = dict((k, [v]) for k, v in args.items())
1933                     add_dash_mpd(video_info)
1934                 # Rental video is not rented but preview is available (e.g.
1935                 # https://www.youtube.com/watch?v=yYr8q0y5Jfg,
1936                 # https://github.com/ytdl-org/youtube-dl/issues/10532)
1937                 if not video_info and args.get('ypc_vid'):
1938                     return self.url_result(
1939                         args['ypc_vid'], YoutubeIE.ie_key(), video_id=args['ypc_vid'])
1940                 if args.get('livestream') == '1' or args.get('live_playback') == 1:
1941                     is_live = True
1942                 if not player_response:
1943                     player_response = extract_player_response(args.get('player_response'), video_id)
1944             if not video_info or self._downloader.params.get('youtube_include_dash_manifest', True):
1945                 add_dash_mpd_pr(player_response)
1946
1947         def extract_unavailable_message():
1948             messages = []
1949             for tag, kind in (('h1', 'message'), ('div', 'submessage')):
1950                 msg = self._html_search_regex(
1951                     r'(?s)<{tag}[^>]+id=["\']unavailable-{kind}["\'][^>]*>(.+?)</{tag}>'.format(tag=tag, kind=kind),
1952                     video_webpage, 'unavailable %s' % kind, default=None)
1953                 if msg:
1954                     messages.append(msg)
1955             if messages:
1956                 return '\n'.join(messages)
1957
1958         if not video_info and not player_response:
1959             unavailable_message = extract_unavailable_message()
1960             if not unavailable_message:
1961                 unavailable_message = 'Unable to extract video data'
1962             raise ExtractorError(
1963                 'YouTube said: %s' % unavailable_message, expected=True, video_id=video_id)
1964
1965         if not isinstance(video_info, dict):
1966             video_info = {}
1967
1968         video_details = try_get(
1969             player_response, lambda x: x['videoDetails'], dict) or {}
1970
1971         microformat = try_get(
1972             player_response, lambda x: x['microformat']['playerMicroformatRenderer'], dict) or {}
1973
1974         video_title = video_info.get('title', [None])[0] or video_details.get('title')
1975         if not video_title:
1976             self._downloader.report_warning('Unable to extract video title')
1977             video_title = '_'
1978
1979         description_original = video_description = get_element_by_id("eow-description", video_webpage)
1980         if video_description:
1981
1982             def replace_url(m):
1983                 redir_url = compat_urlparse.urljoin(url, m.group(1))
1984                 parsed_redir_url = compat_urllib_parse_urlparse(redir_url)
1985                 if re.search(r'^(?:www\.)?(?:youtube(?:-nocookie)?\.com|youtu\.be)$', parsed_redir_url.netloc) and parsed_redir_url.path == '/redirect':
1986                     qs = compat_parse_qs(parsed_redir_url.query)
1987                     q = qs.get('q')
1988                     if q and q[0]:
1989                         return q[0]
1990                 return redir_url
1991
1992             description_original = video_description = re.sub(r'''(?x)
1993                 <a\s+
1994                     (?:[a-zA-Z-]+="[^"]*"\s+)*?
1995                     (?:title|href)="([^"]+)"\s+
1996                     (?:[a-zA-Z-]+="[^"]*"\s+)*?
1997                     class="[^"]*"[^>]*>
1998                 [^<]+\.{3}\s*
1999                 </a>
2000             ''', replace_url, video_description)
2001             video_description = clean_html(video_description)
2002         else:
2003             video_description = video_details.get('shortDescription')
2004             if video_description is None:
2005                 video_description = self._html_search_meta('description', video_webpage)
2006
2007         if not smuggled_data.get('force_singlefeed', False):
2008             if not self._downloader.params.get('noplaylist'):
2009                 multifeed_metadata_list = try_get(
2010                     player_response,
2011                     lambda x: x['multicamera']['playerLegacyMulticameraRenderer']['metadataList'],
2012                     compat_str) or try_get(
2013                     video_info, lambda x: x['multifeed_metadata_list'][0], compat_str)
2014                 if multifeed_metadata_list:
2015                     entries = []
2016                     feed_ids = []
2017                     for feed in multifeed_metadata_list.split(','):
2018                         # Unquote should take place before split on comma (,) since textual
2019                         # fields may contain comma as well (see
2020                         # https://github.com/ytdl-org/youtube-dl/issues/8536)
2021                         feed_data = compat_parse_qs(compat_urllib_parse_unquote_plus(feed))
2022
2023                         def feed_entry(name):
2024                             return try_get(feed_data, lambda x: x[name][0], compat_str)
2025
2026                         feed_id = feed_entry('id')
2027                         if not feed_id:
2028                             continue
2029                         feed_title = feed_entry('title')
2030                         title = video_title
2031                         if feed_title:
2032                             title += ' (%s)' % feed_title
2033                         entries.append({
2034                             '_type': 'url_transparent',
2035                             'ie_key': 'Youtube',
2036                             'url': smuggle_url(
2037                                 '%s://www.youtube.com/watch?v=%s' % (proto, feed_data['id'][0]),
2038                                 {'force_singlefeed': True}),
2039                             'title': title,
2040                         })
2041                         feed_ids.append(feed_id)
2042                     self.to_screen(
2043                         'Downloading multifeed video (%s) - add --no-playlist to just download video %s'
2044                         % (', '.join(feed_ids), video_id))
2045                     return self.playlist_result(entries, video_id, video_title, video_description)
2046             else:
2047                 self.to_screen('Downloading just video %s because of --no-playlist' % video_id)
2048
2049         if view_count is None:
2050             view_count = extract_view_count(video_info)
2051         if view_count is None and video_details:
2052             view_count = int_or_none(video_details.get('viewCount'))
2053         if view_count is None and microformat:
2054             view_count = int_or_none(microformat.get('viewCount'))
2055
2056         if is_live is None:
2057             is_live = bool_or_none(video_details.get('isLive'))
2058
2059         has_live_chat_replay = False
2060         if not is_live:
2061             yt_initial_data = self._get_yt_initial_data(video_id, video_webpage)
2062             try:
2063                 yt_initial_data['contents']['twoColumnWatchNextResults']['conversationBar']['liveChatRenderer']['continuations'][0]['reloadContinuationData']['continuation']
2064                 has_live_chat_replay = True
2065             except (KeyError, IndexError, TypeError):
2066                 pass
2067
2068         # Check for "rental" videos
2069         if 'ypc_video_rental_bar_text' in video_info and 'author' not in video_info:
2070             raise ExtractorError('"rental" videos not supported. See https://github.com/ytdl-org/youtube-dl/issues/359 for more information.', expected=True)
2071
2072         def _extract_filesize(media_url):
2073             return int_or_none(self._search_regex(
2074                 r'\bclen[=/](\d+)', media_url, 'filesize', default=None))
2075
2076         streaming_formats = try_get(player_response, lambda x: x['streamingData']['formats'], list) or []
2077         streaming_formats.extend(try_get(player_response, lambda x: x['streamingData']['adaptiveFormats'], list) or [])
2078
2079         if 'conn' in video_info and video_info['conn'][0].startswith('rtmp'):
2080             self.report_rtmp_download()
2081             formats = [{
2082                 'format_id': '_rtmp',
2083                 'protocol': 'rtmp',
2084                 'url': video_info['conn'][0],
2085                 'player_url': player_url,
2086             }]
2087         elif not is_live and (streaming_formats or len(video_info.get('url_encoded_fmt_stream_map', [''])[0]) >= 1 or len(video_info.get('adaptive_fmts', [''])[0]) >= 1):
2088             encoded_url_map = video_info.get('url_encoded_fmt_stream_map', [''])[0] + ',' + video_info.get('adaptive_fmts', [''])[0]
2089             if 'rtmpe%3Dyes' in encoded_url_map:
2090                 raise ExtractorError('rtmpe downloads are not supported, see https://github.com/ytdl-org/youtube-dl/issues/343 for more information.', expected=True)
2091             formats = []
2092             formats_spec = {}
2093             fmt_list = video_info.get('fmt_list', [''])[0]
2094             if fmt_list:
2095                 for fmt in fmt_list.split(','):
2096                     spec = fmt.split('/')
2097                     if len(spec) > 1:
2098                         width_height = spec[1].split('x')
2099                         if len(width_height) == 2:
2100                             formats_spec[spec[0]] = {
2101                                 'resolution': spec[1],
2102                                 'width': int_or_none(width_height[0]),
2103                                 'height': int_or_none(width_height[1]),
2104                             }
2105             for fmt in streaming_formats:
2106                 itag = str_or_none(fmt.get('itag'))
2107                 if not itag:
2108                     continue
2109                 quality = fmt.get('quality')
2110                 quality_label = fmt.get('qualityLabel') or quality
2111                 formats_spec[itag] = {
2112                     'asr': int_or_none(fmt.get('audioSampleRate')),
2113                     'filesize': int_or_none(fmt.get('contentLength')),
2114                     'format_note': quality_label,
2115                     'fps': int_or_none(fmt.get('fps')),
2116                     'height': int_or_none(fmt.get('height')),
2117                     # bitrate for itag 43 is always 2147483647
2118                     'tbr': float_or_none(fmt.get('averageBitrate') or fmt.get('bitrate'), 1000) if itag != '43' else None,
2119                     'width': int_or_none(fmt.get('width')),
2120                 }
2121
2122             for fmt in streaming_formats:
2123                 if fmt.get('drmFamilies') or fmt.get('drm_families'):
2124                     continue
2125                 url = url_or_none(fmt.get('url'))
2126
2127                 if not url:
2128                     cipher = fmt.get('cipher') or fmt.get('signatureCipher')
2129                     if not cipher:
2130                         continue
2131                     url_data = compat_parse_qs(cipher)
2132                     url = url_or_none(try_get(url_data, lambda x: x['url'][0], compat_str))
2133                     if not url:
2134                         continue
2135                 else:
2136                     cipher = None
2137                     url_data = compat_parse_qs(compat_urllib_parse_urlparse(url).query)
2138
2139                 stream_type = int_or_none(try_get(url_data, lambda x: x['stream_type'][0]))
2140                 # Unsupported FORMAT_STREAM_TYPE_OTF
2141                 if stream_type == 3:
2142                     continue
2143
2144                 format_id = fmt.get('itag') or url_data['itag'][0]
2145                 if not format_id:
2146                     continue
2147                 format_id = compat_str(format_id)
2148
2149                 if cipher:
2150                     if 's' in url_data or self._downloader.params.get('youtube_include_dash_manifest', True):
2151                         ASSETS_RE = r'"assets":.+?"js":\s*("[^"]+")'
2152                         jsplayer_url_json = self._search_regex(
2153                             ASSETS_RE,
2154                             embed_webpage if age_gate else video_webpage,
2155                             'JS player URL (1)', default=None)
2156                         if not jsplayer_url_json and not age_gate:
2157                             # We need the embed website after all
2158                             if embed_webpage is None:
2159                                 embed_url = proto + '://www.youtube.com/embed/%s' % video_id
2160                                 embed_webpage = self._download_webpage(
2161                                     embed_url, video_id, 'Downloading embed webpage')
2162                             jsplayer_url_json = self._search_regex(
2163                                 ASSETS_RE, embed_webpage, 'JS player URL')
2164
2165                         player_url = json.loads(jsplayer_url_json)
2166                         if player_url is None:
2167                             player_url_json = self._search_regex(
2168                                 r'ytplayer\.config.*?"url"\s*:\s*("[^"]+")',
2169                                 video_webpage, 'age gate player URL')
2170                             player_url = json.loads(player_url_json)
2171
2172                     if 'sig' in url_data:
2173                         url += '&signature=' + url_data['sig'][0]
2174                     elif 's' in url_data:
2175                         encrypted_sig = url_data['s'][0]
2176
2177                         if self._downloader.params.get('verbose'):
2178                             if player_url is None:
2179                                 player_desc = 'unknown'
2180                             else:
2181                                 player_type, player_version = self._extract_player_info(player_url)
2182                                 player_desc = '%s player %s' % ('flash' if player_type == 'swf' else 'html5', player_version)
2183                             parts_sizes = self._signature_cache_id(encrypted_sig)
2184                             self.to_screen('{%s} signature length %s, %s' %
2185                                            (format_id, parts_sizes, player_desc))
2186
2187                         signature = self._decrypt_signature(
2188                             encrypted_sig, video_id, player_url, age_gate)
2189                         sp = try_get(url_data, lambda x: x['sp'][0], compat_str) or 'signature'
2190                         url += '&%s=%s' % (sp, signature)
2191                 if 'ratebypass' not in url:
2192                     url += '&ratebypass=yes'
2193
2194                 dct = {
2195                     'format_id': format_id,
2196                     'url': url,
2197                     'player_url': player_url,
2198                 }
2199                 if format_id in self._formats:
2200                     dct.update(self._formats[format_id])
2201                 if format_id in formats_spec:
2202                     dct.update(formats_spec[format_id])
2203
2204                 # Some itags are not included in DASH manifest thus corresponding formats will
2205                 # lack metadata (see https://github.com/ytdl-org/youtube-dl/pull/5993).
2206                 # Trying to extract metadata from url_encoded_fmt_stream_map entry.
2207                 mobj = re.search(r'^(?P<width>\d+)[xX](?P<height>\d+)$', url_data.get('size', [''])[0])
2208                 width, height = (int(mobj.group('width')), int(mobj.group('height'))) if mobj else (None, None)
2209
2210                 if width is None:
2211                     width = int_or_none(fmt.get('width'))
2212                 if height is None:
2213                     height = int_or_none(fmt.get('height'))
2214
2215                 filesize = int_or_none(url_data.get(
2216                     'clen', [None])[0]) or _extract_filesize(url)
2217
2218                 quality = url_data.get('quality', [None])[0] or fmt.get('quality')
2219                 quality_label = url_data.get('quality_label', [None])[0] or fmt.get('qualityLabel')
2220
2221                 tbr = (float_or_none(url_data.get('bitrate', [None])[0], 1000)
2222                        or float_or_none(fmt.get('bitrate'), 1000)) if format_id != '43' else None
2223                 fps = int_or_none(url_data.get('fps', [None])[0]) or int_or_none(fmt.get('fps'))
2224
2225                 more_fields = {
2226                     'filesize': filesize,
2227                     'tbr': tbr,
2228                     'width': width,
2229                     'height': height,
2230                     'fps': fps,
2231                     'format_note': quality_label or quality,
2232                 }
2233                 for key, value in more_fields.items():
2234                     if value:
2235                         dct[key] = value
2236                 type_ = url_data.get('type', [None])[0] or fmt.get('mimeType')
2237                 if type_:
2238                     type_split = type_.split(';')
2239                     kind_ext = type_split[0].split('/')
2240                     if len(kind_ext) == 2:
2241                         kind, _ = kind_ext
2242                         dct['ext'] = mimetype2ext(type_split[0])
2243                         if kind in ('audio', 'video'):
2244                             codecs = None
2245                             for mobj in re.finditer(
2246                                     r'(?P<key>[a-zA-Z_-]+)=(?P<quote>["\']?)(?P<val>.+?)(?P=quote)(?:;|$)', type_):
2247                                 if mobj.group('key') == 'codecs':
2248                                     codecs = mobj.group('val')
2249                                     break
2250                             if codecs:
2251                                 dct.update(parse_codecs(codecs))
2252                 if dct.get('acodec') == 'none' or dct.get('vcodec') == 'none':
2253                     dct['downloader_options'] = {
2254                         # Youtube throttles chunks >~10M
2255                         'http_chunk_size': 10485760,
2256                     }
2257                 formats.append(dct)
2258         else:
2259             manifest_url = (
2260                 url_or_none(try_get(
2261                     player_response,
2262                     lambda x: x['streamingData']['hlsManifestUrl'],
2263                     compat_str))
2264                 or url_or_none(try_get(
2265                     video_info, lambda x: x['hlsvp'][0], compat_str)))
2266             if manifest_url:
2267                 formats = []
2268                 m3u8_formats = self._extract_m3u8_formats(
2269                     manifest_url, video_id, 'mp4', fatal=False)
2270                 for a_format in m3u8_formats:
2271                     itag = self._search_regex(
2272                         r'/itag/(\d+)/', a_format['url'], 'itag', default=None)
2273                     if itag:
2274                         a_format['format_id'] = itag
2275                         if itag in self._formats:
2276                             dct = self._formats[itag].copy()
2277                             dct.update(a_format)
2278                             a_format = dct
2279                     a_format['player_url'] = player_url
2280                     # Accept-Encoding header causes failures in live streams on Youtube and Youtube Gaming
2281                     a_format.setdefault('http_headers', {})['Youtubedl-no-compression'] = 'True'
2282                     if self._downloader.params.get('youtube_include_hls_manifest', True):
2283                         formats.append(a_format)
2284             else:
2285                 error_message = extract_unavailable_message()
2286                 if not error_message:
2287                     error_message = clean_html(try_get(
2288                         player_response, lambda x: x['playabilityStatus']['reason'],
2289                         compat_str))
2290                 if not error_message:
2291                     error_message = clean_html(
2292                         try_get(video_info, lambda x: x['reason'][0], compat_str))
2293                 if error_message:
2294                     raise ExtractorError(error_message, expected=True)
2295                 raise ExtractorError('no conn, hlsvp, hlsManifestUrl or url_encoded_fmt_stream_map information found in video info')
2296
2297         # uploader
2298         video_uploader = try_get(
2299             video_info, lambda x: x['author'][0],
2300             compat_str) or str_or_none(video_details.get('author'))
2301         if video_uploader:
2302             video_uploader = compat_urllib_parse_unquote_plus(video_uploader)
2303         else:
2304             self._downloader.report_warning('unable to extract uploader name')
2305
2306         # uploader_id
2307         video_uploader_id = None
2308         video_uploader_url = None
2309         mobj = re.search(
2310             r'<link itemprop="url" href="(?P<uploader_url>https?://www\.youtube\.com/(?:user|channel)/(?P<uploader_id>[^"]+))">',
2311             video_webpage)
2312         if mobj is not None:
2313             video_uploader_id = mobj.group('uploader_id')
2314             video_uploader_url = mobj.group('uploader_url')
2315         else:
2316             owner_profile_url = url_or_none(microformat.get('ownerProfileUrl'))
2317             if owner_profile_url:
2318                 video_uploader_id = self._search_regex(
2319                     r'(?:user|channel)/([^/]+)', owner_profile_url, 'uploader id',
2320                     default=None)
2321                 video_uploader_url = owner_profile_url
2322
2323         channel_id = (
2324             str_or_none(video_details.get('channelId'))
2325             or self._html_search_meta(
2326                 'channelId', video_webpage, 'channel id', default=None)
2327             or self._search_regex(
2328                 r'data-channel-external-id=(["\'])(?P<id>(?:(?!\1).)+)\1',
2329                 video_webpage, 'channel id', default=None, group='id'))
2330         channel_url = 'http://www.youtube.com/channel/%s' % channel_id if channel_id else None
2331
2332         thumbnails = []
2333         thumbnails_list = try_get(
2334             video_details, lambda x: x['thumbnail']['thumbnails'], list) or []
2335         for t in thumbnails_list:
2336             if not isinstance(t, dict):
2337                 continue
2338             thumbnail_url = url_or_none(t.get('url'))
2339             if not thumbnail_url:
2340                 continue
2341             thumbnails.append({
2342                 'url': thumbnail_url,
2343                 'width': int_or_none(t.get('width')),
2344                 'height': int_or_none(t.get('height')),
2345             })
2346
2347         if not thumbnails:
2348             video_thumbnail = None
2349             # We try first to get a high quality image:
2350             m_thumb = re.search(r'<span itemprop="thumbnail".*?href="(.*?)">',
2351                                 video_webpage, re.DOTALL)
2352             if m_thumb is not None:
2353                 video_thumbnail = m_thumb.group(1)
2354             thumbnail_url = try_get(video_info, lambda x: x['thumbnail_url'][0], compat_str)
2355             if thumbnail_url:
2356                 video_thumbnail = compat_urllib_parse_unquote_plus(thumbnail_url)
2357             if video_thumbnail:
2358                 thumbnails.append({'url': video_thumbnail})
2359
2360         # upload date
2361         upload_date = self._html_search_meta(
2362             'datePublished', video_webpage, 'upload date', default=None)
2363         if not upload_date:
2364             upload_date = self._search_regex(
2365                 [r'(?s)id="eow-date.*?>(.*?)</span>',
2366                  r'(?:id="watch-uploader-info".*?>.*?|["\']simpleText["\']\s*:\s*["\'])(?:Published|Uploaded|Streamed live|Started) on (.+?)[<"\']'],
2367                 video_webpage, 'upload date', default=None)
2368         if not upload_date:
2369             upload_date = microformat.get('publishDate') or microformat.get('uploadDate')
2370         upload_date = unified_strdate(upload_date)
2371
2372         video_license = self._html_search_regex(
2373             r'<h4[^>]+class="title"[^>]*>\s*License\s*</h4>\s*<ul[^>]*>\s*<li>(.+?)</li',
2374             video_webpage, 'license', default=None)
2375
2376         m_music = re.search(
2377             r'''(?x)
2378                 <h4[^>]+class="title"[^>]*>\s*Music\s*</h4>\s*
2379                 <ul[^>]*>\s*
2380                 <li>(?P<title>.+?)
2381                 by (?P<creator>.+?)
2382                 (?:
2383                     \(.+?\)|
2384                     <a[^>]*
2385                         (?:
2386                             \bhref=["\']/red[^>]*>|             # drop possible
2387                             >\s*Listen ad-free with YouTube Red # YouTube Red ad
2388                         )
2389                     .*?
2390                 )?</li
2391             ''',
2392             video_webpage)
2393         if m_music:
2394             video_alt_title = remove_quotes(unescapeHTML(m_music.group('title')))
2395             video_creator = clean_html(m_music.group('creator'))
2396         else:
2397             video_alt_title = video_creator = None
2398
2399         def extract_meta(field):
2400             return self._html_search_regex(
2401                 r'<h4[^>]+class="title"[^>]*>\s*%s\s*</h4>\s*<ul[^>]*>\s*<li>(.+?)</li>\s*' % field,
2402                 video_webpage, field, default=None)
2403
2404         track = extract_meta('Song')
2405         artist = extract_meta('Artist')
2406         album = extract_meta('Album')
2407
2408         # Youtube Music Auto-generated description
2409         release_date = release_year = None
2410         if video_description:
2411             mobj = re.search(r'(?s)Provided to YouTube by [^\n]+\n+(?P<track>[^·]+)·(?P<artist>[^\n]+)\n+(?P<album>[^\n]+)(?:.+?℗\s*(?P<release_year>\d{4})(?!\d))?(?:.+?Released on\s*:\s*(?P<release_date>\d{4}-\d{2}-\d{2}))?(.+?\nArtist\s*:\s*(?P<clean_artist>[^\n]+))?', video_description)
2412             if mobj:
2413                 if not track:
2414                     track = mobj.group('track').strip()
2415                 if not artist:
2416                     artist = mobj.group('clean_artist') or ', '.join(a.strip() for a in mobj.group('artist').split('·'))
2417                 if not album:
2418                     album = mobj.group('album'.strip())
2419                 release_year = mobj.group('release_year')
2420                 release_date = mobj.group('release_date')
2421                 if release_date:
2422                     release_date = release_date.replace('-', '')
2423                     if not release_year:
2424                         release_year = int(release_date[:4])
2425                 if release_year:
2426                     release_year = int(release_year)
2427
2428         m_episode = re.search(
2429             r'<div[^>]+id="watch7-headline"[^>]*>\s*<span[^>]*>.*?>(?P<series>[^<]+)</a></b>\s*S(?P<season>\d+)\s*•\s*E(?P<episode>\d+)</span>',
2430             video_webpage)
2431         if m_episode:
2432             series = unescapeHTML(m_episode.group('series'))
2433             season_number = int(m_episode.group('season'))
2434             episode_number = int(m_episode.group('episode'))
2435         else:
2436             series = season_number = episode_number = None
2437
2438         m_cat_container = self._search_regex(
2439             r'(?s)<h4[^>]*>\s*Category\s*</h4>\s*<ul[^>]*>(.*?)</ul>',
2440             video_webpage, 'categories', default=None)
2441         category = None
2442         if m_cat_container:
2443             category = self._html_search_regex(
2444                 r'(?s)<a[^<]+>(.*?)</a>', m_cat_container, 'category',
2445                 default=None)
2446         if not category:
2447             category = try_get(
2448                 microformat, lambda x: x['category'], compat_str)
2449         video_categories = None if category is None else [category]
2450
2451         video_tags = [
2452             unescapeHTML(m.group('content'))
2453             for m in re.finditer(self._meta_regex('og:video:tag'), video_webpage)]
2454         if not video_tags:
2455             video_tags = try_get(video_details, lambda x: x['keywords'], list)
2456
2457         def _extract_count(count_name):
2458             return str_to_int(self._search_regex(
2459                 r'"accessibilityData":\{"label":"([\d,\w]+) %ss"\}'
2460                 % re.escape(count_name),
2461                 video_webpage, count_name, default=None))
2462
2463         like_count = _extract_count('like')
2464         dislike_count = _extract_count('dislike')
2465
2466         if view_count is None:
2467             view_count = str_to_int(self._search_regex(
2468                 r'<[^>]+class=["\']watch-view-count[^>]+>\s*([\d,\s]+)', video_webpage,
2469                 'view count', default=None))
2470
2471         average_rating = (
2472             float_or_none(video_details.get('averageRating'))
2473             or try_get(video_info, lambda x: float_or_none(x['avg_rating'][0])))
2474
2475         # subtitles
2476         video_subtitles = self.extract_subtitles(
2477             video_id, video_webpage, has_live_chat_replay)
2478         automatic_captions = self.extract_automatic_captions(video_id, video_webpage)
2479
2480         video_duration = try_get(
2481             video_info, lambda x: int_or_none(x['length_seconds'][0]))
2482         if not video_duration:
2483             video_duration = int_or_none(video_details.get('lengthSeconds'))
2484         if not video_duration:
2485             video_duration = parse_duration(self._html_search_meta(
2486                 'duration', video_webpage, 'video duration'))
2487
2488         # Get Subscriber Count of channel
2489         subscriber_count = parse_count(self._search_regex(
2490             r'"text":"([\d\.]+\w?) subscribers"',
2491             video_webpage,
2492             'subscriber count',
2493             default=None
2494         ))
2495
2496         # annotations
2497         video_annotations = None
2498         if self._downloader.params.get('writeannotations', False):
2499             xsrf_token = self._search_regex(
2500                 r'([\'"])XSRF_TOKEN\1\s*:\s*([\'"])(?P<xsrf_token>[A-Za-z0-9+/=]+)\2',
2501                 video_webpage, 'xsrf token', group='xsrf_token', fatal=False)
2502             invideo_url = try_get(
2503                 player_response, lambda x: x['annotations'][0]['playerAnnotationsUrlsRenderer']['invideoUrl'], compat_str)
2504             if xsrf_token and invideo_url:
2505                 xsrf_field_name = self._search_regex(
2506                     r'([\'"])XSRF_FIELD_NAME\1\s*:\s*([\'"])(?P<xsrf_field_name>\w+)\2',
2507                     video_webpage, 'xsrf field name',
2508                     group='xsrf_field_name', default='session_token')
2509                 video_annotations = self._download_webpage(
2510                     self._proto_relative_url(invideo_url),
2511                     video_id, note='Downloading annotations',
2512                     errnote='Unable to download video annotations', fatal=False,
2513                     data=urlencode_postdata({xsrf_field_name: xsrf_token}))
2514
2515         chapters = self._extract_chapters(video_webpage, description_original, video_id, video_duration)
2516
2517         # Look for the DASH manifest
2518         if self._downloader.params.get('youtube_include_dash_manifest', True):
2519             dash_mpd_fatal = True
2520             for mpd_url in dash_mpds:
2521                 dash_formats = {}
2522                 try:
2523                     def decrypt_sig(mobj):
2524                         s = mobj.group(1)
2525                         dec_s = self._decrypt_signature(s, video_id, player_url, age_gate)
2526                         return '/signature/%s' % dec_s
2527
2528                     mpd_url = re.sub(r'/s/([a-fA-F0-9\.]+)', decrypt_sig, mpd_url)
2529
2530                     for df in self._extract_mpd_formats(
2531                             mpd_url, video_id, fatal=dash_mpd_fatal,
2532                             formats_dict=self._formats):
2533                         if not df.get('filesize'):
2534                             df['filesize'] = _extract_filesize(df['url'])
2535                         # Do not overwrite DASH format found in some previous DASH manifest
2536                         if df['format_id'] not in dash_formats:
2537                             dash_formats[df['format_id']] = df
2538                         # Additional DASH manifests may end up in HTTP Error 403 therefore
2539                         # allow them to fail without bug report message if we already have
2540                         # some DASH manifest succeeded. This is temporary workaround to reduce
2541                         # burst of bug reports until we figure out the reason and whether it
2542                         # can be fixed at all.
2543                         dash_mpd_fatal = False
2544                 except (ExtractorError, KeyError) as e:
2545                     self.report_warning(
2546                         'Skipping DASH manifest: %r' % e, video_id)
2547                 if dash_formats:
2548                     # Remove the formats we found through non-DASH, they
2549                     # contain less info and it can be wrong, because we use
2550                     # fixed values (for example the resolution). See
2551                     # https://github.com/ytdl-org/youtube-dl/issues/5774 for an
2552                     # example.
2553                     formats = [f for f in formats if f['format_id'] not in dash_formats.keys()]
2554                     formats.extend(dash_formats.values())
2555
2556         # Check for malformed aspect ratio
2557         stretched_m = re.search(
2558             r'<meta\s+property="og:video:tag".*?content="yt:stretch=(?P<w>[0-9]+):(?P<h>[0-9]+)">',
2559             video_webpage)
2560         if stretched_m:
2561             w = float(stretched_m.group('w'))
2562             h = float(stretched_m.group('h'))
2563             # yt:stretch may hold invalid ratio data (e.g. for Q39EVAstoRM ratio is 17:0).
2564             # We will only process correct ratios.
2565             if w > 0 and h > 0:
2566                 ratio = w / h
2567                 for f in formats:
2568                     if f.get('vcodec') != 'none':
2569                         f['stretched_ratio'] = ratio
2570
2571         if not formats:
2572             if 'reason' in video_info:
2573                 if 'The uploader has not made this video available in your country.' in video_info['reason']:
2574                     regions_allowed = self._html_search_meta(
2575                         'regionsAllowed', video_webpage, default=None)
2576                     countries = regions_allowed.split(',') if regions_allowed else None
2577                     self.raise_geo_restricted(
2578                         msg=video_info['reason'][0], countries=countries)
2579                 reason = video_info['reason'][0]
2580                 if 'Invalid parameters' in reason:
2581                     unavailable_message = extract_unavailable_message()
2582                     if unavailable_message:
2583                         reason = unavailable_message
2584                 raise ExtractorError(
2585                     'YouTube said: %s' % reason,
2586                     expected=True, video_id=video_id)
2587             if video_info.get('license_info') or try_get(player_response, lambda x: x['streamingData']['licenseInfos']):
2588                 raise ExtractorError('This video is DRM protected.', expected=True)
2589
2590         self._sort_formats(formats)
2591
2592         self.mark_watched(video_id, video_info, player_response)
2593
2594         return {
2595             'id': video_id,
2596             'uploader': video_uploader,
2597             'uploader_id': video_uploader_id,
2598             'uploader_url': video_uploader_url,
2599             'channel_id': channel_id,
2600             'channel_url': channel_url,
2601             'upload_date': upload_date,
2602             'license': video_license,
2603             'creator': video_creator or artist,
2604             'title': video_title,
2605             'alt_title': video_alt_title or track,
2606             'thumbnails': thumbnails,
2607             'description': video_description,
2608             'categories': video_categories,
2609             'tags': video_tags,
2610             'subtitles': video_subtitles,
2611             'automatic_captions': automatic_captions,
2612             'duration': video_duration,
2613             'age_limit': 18 if age_gate else 0,
2614             'annotations': video_annotations,
2615             'chapters': chapters,
2616             'webpage_url': proto + '://www.youtube.com/watch?v=%s' % video_id,
2617             'view_count': view_count,
2618             'like_count': like_count,
2619             'dislike_count': dislike_count,
2620             'average_rating': average_rating,
2621             'formats': formats,
2622             'is_live': is_live,
2623             'start_time': start_time,
2624             'end_time': end_time,
2625             'series': series,
2626             'season_number': season_number,
2627             'episode_number': episode_number,
2628             'track': track,
2629             'artist': artist,
2630             'album': album,
2631             'release_date': release_date,
2632             'release_year': release_year,
2633             'subscriber_count': subscriber_count,
2634         }
2635
2636
2637 class YoutubePlaylistIE(YoutubePlaylistBaseInfoExtractor):
2638     IE_DESC = 'YouTube.com playlists'
2639     _VALID_URL = r"""(?x)(?:
2640                         (?:https?://)?
2641                         (?:\w+\.)?
2642                         (?:
2643                             (?:
2644                                 youtube(?:kids)?\.com|
2645                                 invidio\.us
2646                             )
2647                             /
2648                             (?:
2649                                (?:course|view_play_list|my_playlists|artist|playlist|watch|embed/(?:videoseries|[0-9A-Za-z_-]{11}))
2650                                \? (?:.*?[&;])*? (?:p|a|list)=
2651                             |  p/
2652                             )|
2653                             youtu\.be/[0-9A-Za-z_-]{11}\?.*?\blist=
2654                         )
2655                         (
2656                             (?:PL|LL|EC|UU|FL|RD|UL|TL|PU|OLAK5uy_)?[0-9A-Za-z-_]{10,}
2657                             # Top tracks, they can also include dots
2658                             |(?:MC)[\w\.]*
2659                         )
2660                         .*
2661                      |
2662                         (%(playlist_id)s)
2663                      )""" % {'playlist_id': YoutubeBaseInfoExtractor._PLAYLIST_ID_RE}
2664     _TEMPLATE_URL = 'https://www.youtube.com/playlist?list=%s'
2665     _VIDEO_RE_TPL = r'href="\s*/watch\?v=%s(?:&amp;(?:[^"]*?index=(?P<index>\d+))?(?:[^>]+>(?P<title>[^<]+))?)?'
2666     _VIDEO_RE = _VIDEO_RE_TPL % r'(?P<id>[0-9A-Za-z_-]{11})'
2667     IE_NAME = 'youtube:playlist'
2668     _TESTS = [{
2669         'url': 'https://www.youtube.com/playlist?list=PL4lCao7KL_QFVb7Iudeipvc2BCavECqzc',
2670         'info_dict': {
2671             'uploader_id': 'UCmlqkdCBesrv2Lak1mF_MxA',
2672             'uploader': 'Sergey M.',
2673             'id': 'PL4lCao7KL_QFVb7Iudeipvc2BCavECqzc',
2674             'title': 'youtube-dl public playlist',
2675         },
2676         'playlist_count': 1,
2677     }, {
2678         'url': 'https://www.youtube.com/playlist?list=PL4lCao7KL_QFodcLWhDpGCYnngnHtQ-Xf',
2679         'info_dict': {
2680             'uploader_id': 'UCmlqkdCBesrv2Lak1mF_MxA',
2681             'uploader': 'Sergey M.',
2682             'id': 'PL4lCao7KL_QFodcLWhDpGCYnngnHtQ-Xf',
2683             'title': 'youtube-dl empty playlist',
2684         },
2685         'playlist_count': 0,
2686     }, {
2687         'note': 'Playlist with deleted videos (#651). As a bonus, the video #51 is also twice in this list.',
2688         'url': 'https://www.youtube.com/playlist?list=PLwP_SiAcdui0KVebT0mU9Apz359a4ubsC',
2689         'info_dict': {
2690             'title': '29C3: Not my department',
2691             'id': 'PLwP_SiAcdui0KVebT0mU9Apz359a4ubsC',
2692             'uploader': 'Christiaan008',
2693             'uploader_id': 'ChRiStIaAn008',
2694         },
2695         'playlist_count': 96,
2696     }, {
2697         'note': 'issue #673',
2698         'url': 'PLBB231211A4F62143',
2699         'info_dict': {
2700             'title': '[OLD]Team Fortress 2 (Class-based LP)',
2701             'id': 'PLBB231211A4F62143',
2702             'uploader': 'Wickydoo',
2703             'uploader_id': 'Wickydoo',
2704         },
2705         'playlist_mincount': 26,
2706     }, {
2707         'note': 'Large playlist',
2708         'url': 'https://www.youtube.com/playlist?list=UUBABnxM4Ar9ten8Mdjj1j0Q',
2709         'info_dict': {
2710             'title': 'Uploads from Cauchemar',
2711             'id': 'UUBABnxM4Ar9ten8Mdjj1j0Q',
2712             'uploader': 'Cauchemar',
2713             'uploader_id': 'Cauchemar89',
2714         },
2715         'playlist_mincount': 799,
2716     }, {
2717         'url': 'PLtPgu7CB4gbY9oDN3drwC3cMbJggS7dKl',
2718         'info_dict': {
2719             'title': 'YDL_safe_search',
2720             'id': 'PLtPgu7CB4gbY9oDN3drwC3cMbJggS7dKl',
2721         },
2722         'playlist_count': 2,
2723         'skip': 'This playlist is private',
2724     }, {
2725         'note': 'embedded',
2726         'url': 'https://www.youtube.com/embed/videoseries?list=PL6IaIsEjSbf96XFRuNccS_RuEXwNdsoEu',
2727         'playlist_count': 4,
2728         'info_dict': {
2729             'title': 'JODA15',
2730             'id': 'PL6IaIsEjSbf96XFRuNccS_RuEXwNdsoEu',
2731             'uploader': 'milan',
2732             'uploader_id': 'UCEI1-PVPcYXjB73Hfelbmaw',
2733         }
2734     }, {
2735         'url': 'http://www.youtube.com/embed/_xDOZElKyNU?list=PLsyOSbh5bs16vubvKePAQ1x3PhKavfBIl',
2736         'playlist_mincount': 485,
2737         'info_dict': {
2738             'title': '2018 Chinese New Singles (11/6 updated)',
2739             'id': 'PLsyOSbh5bs16vubvKePAQ1x3PhKavfBIl',
2740             'uploader': 'LBK',
2741             'uploader_id': 'sdragonfang',
2742         }
2743     }, {
2744         'note': 'Embedded SWF player',
2745         'url': 'https://www.youtube.com/p/YN5VISEtHet5D4NEvfTd0zcgFk84NqFZ?hl=en_US&fs=1&rel=0',
2746         'playlist_count': 4,
2747         'info_dict': {
2748             'title': 'JODA7',
2749             'id': 'YN5VISEtHet5D4NEvfTd0zcgFk84NqFZ',
2750         },
2751         'skip': 'This playlist does not exist',
2752     }, {
2753         'note': 'Buggy playlist: the webpage has a "Load more" button but it doesn\'t have more videos',
2754         'url': 'https://www.youtube.com/playlist?list=UUXw-G3eDE9trcvY2sBMM_aA',
2755         'info_dict': {
2756             'title': 'Uploads from Interstellar Movie',
2757             'id': 'UUXw-G3eDE9trcvY2sBMM_aA',
2758             'uploader': 'Interstellar Movie',
2759             'uploader_id': 'InterstellarMovie1',
2760         },
2761         'playlist_mincount': 21,
2762     }, {
2763         # Playlist URL that does not actually serve a playlist
2764         'url': 'https://www.youtube.com/watch?v=FqZTN594JQw&list=PLMYEtVRpaqY00V9W81Cwmzp6N6vZqfUKD4',
2765         'info_dict': {
2766             'id': 'FqZTN594JQw',
2767             'ext': 'webm',
2768             'title': "Smiley's People 01 detective, Adventure Series, Action",
2769             'uploader': 'STREEM',
2770             'uploader_id': 'UCyPhqAZgwYWZfxElWVbVJng',
2771             'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UCyPhqAZgwYWZfxElWVbVJng',
2772             'upload_date': '20150526',
2773             'license': 'Standard YouTube License',
2774             'description': 'md5:507cdcb5a49ac0da37a920ece610be80',
2775             'categories': ['People & Blogs'],
2776             'tags': list,
2777             'view_count': int,
2778             'like_count': int,
2779             'dislike_count': int,
2780         },
2781         'params': {
2782             'skip_download': True,
2783         },
2784         'skip': 'This video is not available.',
2785         'add_ie': [YoutubeIE.ie_key()],
2786     }, {
2787         'url': 'https://youtu.be/yeWKywCrFtk?list=PL2qgrgXsNUG5ig9cat4ohreBjYLAPC0J5',
2788         'info_dict': {
2789             'id': 'yeWKywCrFtk',
2790             'ext': 'mp4',
2791             'title': 'Small Scale Baler and Braiding Rugs',
2792             'uploader': 'Backus-Page House Museum',
2793             'uploader_id': 'backuspagemuseum',
2794             'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/backuspagemuseum',
2795             'upload_date': '20161008',
2796             'description': 'md5:800c0c78d5eb128500bffd4f0b4f2e8a',
2797             'categories': ['Nonprofits & Activism'],
2798             'tags': list,
2799             'like_count': int,
2800             'dislike_count': int,
2801         },
2802         'params': {
2803             'noplaylist': True,
2804             'skip_download': True,
2805         },
2806     }, {
2807         # https://github.com/ytdl-org/youtube-dl/issues/21844
2808         'url': 'https://www.youtube.com/playlist?list=PLzH6n4zXuckpfMu_4Ff8E7Z1behQks5ba',
2809         'info_dict': {
2810             'title': 'Data Analysis with Dr Mike Pound',
2811             'id': 'PLzH6n4zXuckpfMu_4Ff8E7Z1behQks5ba',
2812             'uploader_id': 'Computerphile',
2813             'uploader': 'Computerphile',
2814         },
2815         'playlist_mincount': 11,
2816     }, {
2817         'url': 'https://youtu.be/uWyaPkt-VOI?list=PL9D9FC436B881BA21',
2818         'only_matching': True,
2819     }, {
2820         'url': 'TLGGrESM50VT6acwMjAyMjAxNw',
2821         'only_matching': True,
2822     }, {
2823         # music album playlist
2824         'url': 'OLAK5uy_m4xAFdmMC5rX3Ji3g93pQe3hqLZw_9LhM',
2825         'only_matching': True,
2826     }, {
2827         'url': 'https://invidio.us/playlist?list=PLDIoUOhQQPlXr63I_vwF9GD8sAKh77dWU',
2828         'only_matching': True,
2829     }, {
2830         'url': 'https://www.youtubekids.com/watch?v=Agk7R8I8o5U&list=PUZ6jURNr1WQZCNHF0ao-c0g',
2831         'only_matching': True,
2832     }]
2833
2834     def _real_initialize(self):
2835         self._login()
2836
2837     def extract_videos_from_page(self, page):
2838         ids_in_page = []
2839         titles_in_page = []
2840
2841         for item in re.findall(
2842                 r'(<[^>]*\bdata-video-id\s*=\s*["\'][0-9A-Za-z_-]{11}[^>]+>)', page):
2843             attrs = extract_attributes(item)
2844             video_id = attrs['data-video-id']
2845             video_title = unescapeHTML(attrs.get('data-title'))
2846             if video_title:
2847                 video_title = video_title.strip()
2848             ids_in_page.append(video_id)
2849             titles_in_page.append(video_title)
2850
2851         # Fallback with old _VIDEO_RE
2852         self.extract_videos_from_page_impl(
2853             self._VIDEO_RE, page, ids_in_page, titles_in_page)
2854
2855         # Relaxed fallbacks
2856         self.extract_videos_from_page_impl(
2857             r'href="\s*/watch\?v\s*=\s*(?P<id>[0-9A-Za-z_-]{11})', page,
2858             ids_in_page, titles_in_page)
2859         self.extract_videos_from_page_impl(
2860             r'data-video-ids\s*=\s*["\'](?P<id>[0-9A-Za-z_-]{11})', page,
2861             ids_in_page, titles_in_page)
2862
2863         return zip(ids_in_page, titles_in_page)
2864
2865     def _extract_mix(self, playlist_id):
2866         # The mixes are generated from a single video
2867         # the id of the playlist is just 'RD' + video_id
2868         ids = []
2869         last_id = playlist_id[-11:]
2870         for n in itertools.count(1):
2871             url = 'https://www.youtube.com/watch?v=%s&list=%s' % (last_id, playlist_id)
2872             webpage = self._download_webpage(
2873                 url, playlist_id, 'Downloading page {0} of Youtube mix'.format(n))
2874             new_ids = orderedSet(re.findall(
2875                 r'''(?xs)data-video-username=".*?".*?
2876                            href="/watch\?v=([0-9A-Za-z_-]{11})&amp;[^"]*?list=%s''' % re.escape(playlist_id),
2877                 webpage))
2878             # Fetch new pages until all the videos are repeated, it seems that
2879             # there are always 51 unique videos.
2880             new_ids = [_id for _id in new_ids if _id not in ids]
2881             if not new_ids:
2882                 break
2883             ids.extend(new_ids)
2884             last_id = ids[-1]
2885
2886         url_results = self._ids_to_results(ids)
2887
2888         search_title = lambda class_name: get_element_by_attribute('class', class_name, webpage)
2889         title_span = (
2890             search_title('playlist-title')
2891             or search_title('title long-title')
2892             or search_title('title'))
2893         title = clean_html(title_span)
2894
2895         return self.playlist_result(url_results, playlist_id, title)
2896
2897     def _extract_playlist(self, playlist_id):
2898         url = self._TEMPLATE_URL % playlist_id
2899         page = self._download_webpage(url, playlist_id)
2900
2901         # the yt-alert-message now has tabindex attribute (see https://github.com/ytdl-org/youtube-dl/issues/11604)
2902         for match in re.findall(r'<div class="yt-alert-message"[^>]*>([^<]+)</div>', page):
2903             match = match.strip()
2904             # Check if the playlist exists or is private
2905             mobj = re.match(r'[^<]*(?:The|This) playlist (?P<reason>does not exist|is private)[^<]*', match)
2906             if mobj:
2907                 reason = mobj.group('reason')
2908                 message = 'This playlist %s' % reason
2909                 if 'private' in reason:
2910                     message += ', use --username or --netrc to access it'
2911                 message += '.'
2912                 raise ExtractorError(message, expected=True)
2913             elif re.match(r'[^<]*Invalid parameters[^<]*', match):
2914                 raise ExtractorError(
2915                     'Invalid parameters. Maybe URL is incorrect.',
2916                     expected=True)
2917             elif re.match(r'[^<]*Choose your language[^<]*', match):
2918                 continue
2919             else:
2920                 self.report_warning('Youtube gives an alert message: ' + match)
2921
2922         playlist_title = self._html_search_regex(
2923             r'(?s)<h1 class="pl-header-title[^"]*"[^>]*>\s*(.*?)\s*</h1>',
2924             page, 'title', default=None)
2925
2926         _UPLOADER_BASE = r'class=["\']pl-header-details[^>]+>\s*<li>\s*<a[^>]+\bhref='
2927         uploader = self._html_search_regex(
2928             r'%s["\']/(?:user|channel)/[^>]+>([^<]+)' % _UPLOADER_BASE,
2929             page, 'uploader', default=None)
2930         mobj = re.search(
2931             r'%s(["\'])(?P<path>/(?:user|channel)/(?P<uploader_id>.+?))\1' % _UPLOADER_BASE,
2932             page)
2933         if mobj:
2934             uploader_id = mobj.group('uploader_id')
2935             uploader_url = compat_urlparse.urljoin(url, mobj.group('path'))
2936         else:
2937             uploader_id = uploader_url = None
2938
2939         has_videos = True
2940
2941         if not playlist_title:
2942             try:
2943                 # Some playlist URLs don't actually serve a playlist (e.g.
2944                 # https://www.youtube.com/watch?v=FqZTN594JQw&list=PLMYEtVRpaqY00V9W81Cwmzp6N6vZqfUKD4)
2945                 next(self._entries(page, playlist_id))
2946             except StopIteration:
2947                 has_videos = False
2948
2949         playlist = self.playlist_result(
2950             self._entries(page, playlist_id), playlist_id, playlist_title)
2951         playlist.update({
2952             'uploader': uploader,
2953             'uploader_id': uploader_id,
2954             'uploader_url': uploader_url,
2955         })
2956
2957         return has_videos, playlist
2958
2959     def _check_download_just_video(self, url, playlist_id):
2960         # Check if it's a video-specific URL
2961         query_dict = compat_urlparse.parse_qs(compat_urlparse.urlparse(url).query)
2962         video_id = query_dict.get('v', [None])[0] or self._search_regex(
2963             r'(?:(?:^|//)youtu\.be/|youtube\.com/embed/(?!videoseries))([0-9A-Za-z_-]{11})', url,
2964             'video id', default=None)
2965         if video_id:
2966             if self._downloader.params.get('noplaylist'):
2967                 self.to_screen('Downloading just video %s because of --no-playlist' % video_id)
2968                 return video_id, self.url_result(video_id, 'Youtube', video_id=video_id)
2969             else:
2970                 self.to_screen('Downloading playlist %s - add --no-playlist to just download video %s' % (playlist_id, video_id))
2971                 return video_id, None
2972         return None, None
2973
2974     def _real_extract(self, url):
2975         # Extract playlist id
2976         mobj = re.match(self._VALID_URL, url)
2977         if mobj is None:
2978             raise ExtractorError('Invalid URL: %s' % url)
2979         playlist_id = mobj.group(1) or mobj.group(2)
2980
2981         video_id, video = self._check_download_just_video(url, playlist_id)
2982         if video:
2983             return video
2984
2985         if playlist_id.startswith(('RD', 'UL', 'PU')):
2986             # Mixes require a custom extraction process
2987             return self._extract_mix(playlist_id)
2988
2989         has_videos, playlist = self._extract_playlist(playlist_id)
2990         if has_videos or not video_id:
2991             return playlist
2992
2993         # Some playlist URLs don't actually serve a playlist (see
2994         # https://github.com/ytdl-org/youtube-dl/issues/10537).
2995         # Fallback to plain video extraction if there is a video id
2996         # along with playlist id.
2997         return self.url_result(video_id, 'Youtube', video_id=video_id)
2998
2999
3000 class YoutubeChannelIE(YoutubePlaylistBaseInfoExtractor):
3001     IE_DESC = 'YouTube.com channels'
3002     _VALID_URL = r'https?://(?:youtu\.be|(?:\w+\.)?youtube(?:-nocookie|kids)?\.com|(?:www\.)?invidio\.us)/channel/(?P<id>[0-9A-Za-z_-]+)'
3003     _TEMPLATE_URL = 'https://www.youtube.com/channel/%s/videos'
3004     _VIDEO_RE = r'(?:title="(?P<title>[^"]+)"[^>]+)?href="/watch\?v=(?P<id>[0-9A-Za-z_-]+)&?'
3005     IE_NAME = 'youtube:channel'
3006     _TESTS = [{
3007         'note': 'paginated channel',
3008         'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w',
3009         'playlist_mincount': 91,
3010         'info_dict': {
3011             'id': 'UUKfVa3S1e4PHvxWcwyMMg8w',
3012             'title': 'Uploads from lex will',
3013             'uploader': 'lex will',
3014             'uploader_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
3015         }
3016     }, {
3017         'note': 'Age restricted channel',
3018         # from https://www.youtube.com/user/DeusExOfficial
3019         'url': 'https://www.youtube.com/channel/UCs0ifCMCm1icqRbqhUINa0w',
3020         'playlist_mincount': 64,
3021         'info_dict': {
3022             'id': 'UUs0ifCMCm1icqRbqhUINa0w',
3023             'title': 'Uploads from Deus Ex',
3024             'uploader': 'Deus Ex',
3025             'uploader_id': 'DeusExOfficial',
3026         },
3027     }, {
3028         'url': 'https://invidio.us/channel/UC23qupoDRn9YOAVzeoxjOQA',
3029         'only_matching': True,
3030     }, {
3031         'url': 'https://www.youtubekids.com/channel/UCyu8StPfZWapR6rfW_JgqcA',
3032         'only_matching': True,
3033     }]
3034
3035     @classmethod
3036     def suitable(cls, url):
3037         return (False if YoutubePlaylistsIE.suitable(url) or YoutubeLiveIE.suitable(url)
3038                 else super(YoutubeChannelIE, cls).suitable(url))
3039
3040     def _build_template_url(self, url, channel_id):
3041         return self._TEMPLATE_URL % channel_id
3042
3043     def _real_extract(self, url):
3044         channel_id = self._match_id(url)
3045
3046         url = self._build_template_url(url, channel_id)
3047
3048         # Channel by page listing is restricted to 35 pages of 30 items, i.e. 1050 videos total (see #5778)
3049         # Workaround by extracting as a playlist if managed to obtain channel playlist URL
3050         # otherwise fallback on channel by page extraction
3051         channel_page = self._download_webpage(
3052             url + '?view=57', channel_id,
3053             'Downloading channel page', fatal=False)
3054         if channel_page is False:
3055             channel_playlist_id = False
3056         else:
3057             channel_playlist_id = self._html_search_meta(
3058                 'channelId', channel_page, 'channel id', default=None)
3059             if not channel_playlist_id:
3060                 channel_url = self._html_search_meta(
3061                     ('al:ios:url', 'twitter:app:url:iphone', 'twitter:app:url:ipad'),
3062                     channel_page, 'channel url', default=None)
3063                 if channel_url:
3064                     channel_playlist_id = self._search_regex(
3065                         r'vnd\.youtube://user/([0-9A-Za-z_-]+)',
3066                         channel_url, 'channel id', default=None)
3067         if channel_playlist_id and channel_playlist_id.startswith('UC'):
3068             playlist_id = 'UU' + channel_playlist_id[2:]
3069             return self.url_result(
3070                 compat_urlparse.urljoin(url, '/playlist?list=%s' % playlist_id), 'YoutubePlaylist')
3071
3072         channel_page = self._download_webpage(url, channel_id, 'Downloading page #1')
3073         autogenerated = re.search(r'''(?x)
3074                 class="[^"]*?(?:
3075                     channel-header-autogenerated-label|
3076                     yt-channel-title-autogenerated
3077                 )[^"]*"''', channel_page) is not None
3078
3079         if autogenerated:
3080             # The videos are contained in a single page
3081             # the ajax pages can't be used, they are empty
3082             entries = [
3083                 self.url_result(
3084                     video_id, 'Youtube', video_id=video_id,
3085                     video_title=video_title)
3086                 for video_id, video_title in self.extract_videos_from_page(channel_page)]
3087             return self.playlist_result(entries, channel_id)
3088
3089         try:
3090             next(self._entries(channel_page, channel_id))
3091         except StopIteration:
3092             alert_message = self._html_search_regex(
3093                 r'(?s)<div[^>]+class=(["\']).*?\byt-alert-message\b.*?\1[^>]*>(?P<alert>[^<]+)</div>',
3094                 channel_page, 'alert', default=None, group='alert')
3095             if alert_message:
3096                 raise ExtractorError('Youtube said: %s' % alert_message, expected=True)
3097
3098         return self.playlist_result(self._entries(channel_page, channel_id), channel_id)
3099
3100
3101 class YoutubeUserIE(YoutubeChannelIE):
3102     IE_DESC = 'YouTube.com user videos (URL or "ytuser" keyword)'
3103     _VALID_URL = r'(?:(?:https?://(?:\w+\.)?youtube\.com/(?:(?P<user>user|c)/)?(?!(?:attribution_link|watch|results|shared)(?:$|[^a-z_A-Z0-9%-])))|ytuser:)(?!feed/)(?P<id>[A-Za-z0-9_%-]+)'
3104     _TEMPLATE_URL = 'https://www.youtube.com/%s/%s/videos'
3105     IE_NAME = 'youtube:user'
3106
3107     _TESTS = [{
3108         'url': 'https://www.youtube.com/user/TheLinuxFoundation',
3109         'playlist_mincount': 320,
3110         'info_dict': {
3111             'id': 'UUfX55Sx5hEFjoC3cNs6mCUQ',
3112             'title': 'Uploads from The Linux Foundation',
3113             'uploader': 'The Linux Foundation',
3114             'uploader_id': 'TheLinuxFoundation',
3115         }
3116     }, {
3117         # Only available via https://www.youtube.com/c/12minuteathlete/videos
3118         # but not https://www.youtube.com/user/12minuteathlete/videos
3119         'url': 'https://www.youtube.com/c/12minuteathlete/videos',
3120         'playlist_mincount': 249,
3121         'info_dict': {
3122             'id': 'UUVjM-zV6_opMDx7WYxnjZiQ',
3123             'title': 'Uploads from 12 Minute Athlete',
3124             'uploader': '12 Minute Athlete',
3125             'uploader_id': 'the12minuteathlete',
3126         }
3127     }, {
3128         'url': 'ytuser:phihag',
3129         'only_matching': True,
3130     }, {
3131         'url': 'https://www.youtube.com/c/gametrailers',
3132         'only_matching': True,
3133     }, {
3134         'url': 'https://www.youtube.com/c/Pawe%C5%82Zadro%C5%BCniak',
3135         'only_matching': True,
3136     }, {
3137         'url': 'https://www.youtube.com/gametrailers',
3138         'only_matching': True,
3139     }, {
3140         # This channel is not available, geo restricted to JP
3141         'url': 'https://www.youtube.com/user/kananishinoSMEJ/videos',
3142         'only_matching': True,
3143     }]
3144
3145     @classmethod
3146     def suitable(cls, url):
3147         # Don't return True if the url can be extracted with other youtube
3148         # extractor, the regex would is too permissive and it would match.
3149         other_yt_ies = iter(klass for (name, klass) in globals().items() if name.startswith('Youtube') and name.endswith('IE') and klass is not cls)
3150         if any(ie.suitable(url) for ie in other_yt_ies):
3151             return False
3152         else:
3153             return super(YoutubeUserIE, cls).suitable(url)
3154
3155     def _build_template_url(self, url, channel_id):
3156         mobj = re.match(self._VALID_URL, url)
3157         return self._TEMPLATE_URL % (mobj.group('user') or 'user', mobj.group('id'))
3158
3159
3160 class YoutubeLiveIE(YoutubeBaseInfoExtractor):
3161     IE_DESC = 'YouTube.com live streams'
3162     _VALID_URL = r'(?P<base_url>https?://(?:\w+\.)?youtube\.com/(?:(?:user|channel|c)/)?(?P<id>[^/]+))/live'
3163     IE_NAME = 'youtube:live'
3164
3165     _TESTS = [{
3166         'url': 'https://www.youtube.com/user/TheYoungTurks/live',
3167         'info_dict': {
3168             'id': 'a48o2S1cPoo',
3169             'ext': 'mp4',
3170             'title': 'The Young Turks - Live Main Show',
3171             'uploader': 'The Young Turks',
3172             'uploader_id': 'TheYoungTurks',
3173             'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/TheYoungTurks',
3174             'upload_date': '20150715',
3175             'license': 'Standard YouTube License',
3176             'description': 'md5:438179573adcdff3c97ebb1ee632b891',
3177             'categories': ['News & Politics'],
3178             'tags': ['Cenk Uygur (TV Program Creator)', 'The Young Turks (Award-Winning Work)', 'Talk Show (TV Genre)'],
3179             'like_count': int,
3180             'dislike_count': int,
3181         },
3182         'params': {
3183             'skip_download': True,
3184         },
3185     }, {
3186         'url': 'https://www.youtube.com/channel/UC1yBKRuGpC1tSM73A0ZjYjQ/live',
3187         'only_matching': True,
3188     }, {
3189         'url': 'https://www.youtube.com/c/CommanderVideoHq/live',
3190         'only_matching': True,
3191     }, {
3192         'url': 'https://www.youtube.com/TheYoungTurks/live',
3193         'only_matching': True,
3194     }]
3195
3196     def _real_extract(self, url):
3197         mobj = re.match(self._VALID_URL, url)
3198         channel_id = mobj.group('id')
3199         base_url = mobj.group('base_url')
3200         webpage = self._download_webpage(url, channel_id, fatal=False)
3201         if webpage:
3202             page_type = self._og_search_property(
3203                 'type', webpage, 'page type', default='')
3204             video_id = self._html_search_meta(
3205                 'videoId', webpage, 'video id', default=None)
3206             if page_type.startswith('video') and video_id and re.match(
3207                     r'^[0-9A-Za-z_-]{11}$', video_id):
3208                 return self.url_result(video_id, YoutubeIE.ie_key())
3209         return self.url_result(base_url)
3210
3211
3212 class YoutubePlaylistsIE(YoutubePlaylistsBaseInfoExtractor):
3213     IE_DESC = 'YouTube.com user/channel playlists'
3214     _VALID_URL = r'https?://(?:\w+\.)?youtube\.com/(?:user|channel|c)/(?P<id>[^/]+)/playlists'
3215     IE_NAME = 'youtube:playlists'
3216
3217     _TESTS = [{
3218         'url': 'https://www.youtube.com/user/ThirstForScience/playlists',
3219         'playlist_mincount': 4,
3220         'info_dict': {
3221             'id': 'ThirstForScience',
3222             'title': 'ThirstForScience',
3223         },
3224     }, {
3225         # with "Load more" button
3226         'url': 'https://www.youtube.com/user/igorkle1/playlists?view=1&sort=dd',
3227         'playlist_mincount': 70,
3228         'info_dict': {
3229             'id': 'igorkle1',
3230             'title': 'Игорь Клейнер',
3231         },
3232     }, {
3233         'url': 'https://www.youtube.com/channel/UCiU1dHvZObB2iP6xkJ__Icw/playlists',
3234         'playlist_mincount': 17,
3235         'info_dict': {
3236             'id': 'UCiU1dHvZObB2iP6xkJ__Icw',
3237             'title': 'Chem Player',
3238         },
3239         'skip': 'Blocked',
3240     }, {
3241         'url': 'https://www.youtube.com/c/ChristophLaimer/playlists',
3242         'only_matching': True,
3243     }]
3244
3245
3246 class YoutubeSearchBaseInfoExtractor(YoutubePlaylistBaseInfoExtractor):
3247     _VIDEO_RE = r'href="\s*/watch\?v=(?P<id>[0-9A-Za-z_-]{11})(?:[^"]*"[^>]+\btitle="(?P<title>[^"]+))?'
3248
3249
3250 class YoutubeSearchIE(SearchInfoExtractor, YoutubeSearchBaseInfoExtractor):
3251     IE_DESC = 'YouTube.com searches'
3252     # there doesn't appear to be a real limit, for example if you search for
3253     # 'python' you get more than 8.000.000 results
3254     _MAX_RESULTS = float('inf')
3255     IE_NAME = 'youtube:search'
3256     _SEARCH_KEY = 'ytsearch'
3257     _EXTRA_QUERY_ARGS = {}
3258     _TESTS = []
3259
3260     def _get_n_results(self, query, n):
3261         """Get a specified number of results for a query"""
3262
3263         videos = []
3264         limit = n
3265
3266         url_query = {
3267             'search_query': query.encode('utf-8'),
3268         }
3269         url_query.update(self._EXTRA_QUERY_ARGS)
3270         result_url = 'https://www.youtube.com/results?' + compat_urllib_parse_urlencode(url_query)
3271
3272         for pagenum in itertools.count(1):
3273             data = self._download_json(
3274                 result_url, video_id='query "%s"' % query,
3275                 note='Downloading page %s' % pagenum,
3276                 errnote='Unable to download API page',
3277                 query={'spf': 'navigate'})
3278             html_content = data[1]['body']['content']
3279
3280             if 'class="search-message' in html_content:
3281                 raise ExtractorError(
3282                     '[youtube] No video results', expected=True)
3283
3284             new_videos = list(self._process_page(html_content))
3285             videos += new_videos
3286             if not new_videos or len(videos) > limit:
3287                 break
3288             next_link = self._html_search_regex(
3289                 r'href="(/results\?[^"]*\bsp=[^"]+)"[^>]*>\s*<span[^>]+class="[^"]*\byt-uix-button-content\b[^"]*"[^>]*>Next',
3290                 html_content, 'next link', default=None)
3291             if next_link is None:
3292                 break
3293             result_url = compat_urlparse.urljoin('https://www.youtube.com/', next_link)
3294
3295         if len(videos) > n:
3296             videos = videos[:n]
3297         return self.playlist_result(videos, query)
3298
3299
3300 class YoutubeSearchDateIE(YoutubeSearchIE):
3301     IE_NAME = YoutubeSearchIE.IE_NAME + ':date'
3302     _SEARCH_KEY = 'ytsearchdate'
3303     IE_DESC = 'YouTube.com searches, newest videos first'
3304     _EXTRA_QUERY_ARGS = {'search_sort': 'video_date_uploaded'}
3305
3306
3307 class YoutubeSearchURLIE(YoutubeSearchBaseInfoExtractor):
3308     IE_DESC = 'YouTube.com search URLs'
3309     IE_NAME = 'youtube:search_url'
3310     _VALID_URL = r'https?://(?:www\.)?youtube\.com/results\?(.*?&)?(?:search_query|q)=(?P<query>[^&]+)(?:[&]|$)'
3311     _SEARCH_DATA = r'(?:window\["ytInitialData"\]|ytInitialData)\W?=\W?({.*?});'
3312     _TESTS = [{
3313         'url': 'https://www.youtube.com/results?baz=bar&search_query=youtube-dl+test+video&filters=video&lclk=video',
3314         'playlist_mincount': 5,
3315         'info_dict': {
3316             'title': 'youtube-dl test video',
3317         }
3318     }, {
3319         'url': 'https://www.youtube.com/results?q=test&sp=EgQIBBgB',
3320         'only_matching': True,
3321     }]
3322
3323     def _find_videos_in_json(self, extracted):
3324         videos = []
3325
3326         def _real_find(obj):
3327             if obj is None or isinstance(obj, str):
3328                 return
3329
3330             if type(obj) is list:
3331                 for elem in obj:
3332                     _real_find(elem)
3333
3334             if type(obj) is dict:
3335                 if "videoId" in obj:
3336                     videos.append(obj)
3337                     return
3338
3339                 for _, o in obj.items():
3340                     _real_find(o)
3341
3342         _real_find(extracted)
3343
3344         return videos
3345
3346     def extract_videos_from_page_impl(self, page, ids_in_page, titles_in_page):
3347         search_response = self._parse_json(self._search_regex(self._SEARCH_DATA, page, 'ytInitialData'), None)
3348
3349         result_items = self._find_videos_in_json(search_response)
3350
3351         for renderer in result_items:
3352             video_id = try_get(renderer, lambda x: x['videoId'])
3353             video_title = try_get(renderer, lambda x: x['title']['runs'][0]['text']) or try_get(renderer, lambda x: x['title']['simpleText'])
3354
3355             if video_id is None or video_title is None:
3356                 # we do not have a videoRenderer or title extraction broke
3357                 continue
3358
3359             video_title = video_title.strip()
3360
3361             try:
3362                 idx = ids_in_page.index(video_id)
3363                 if video_title and not titles_in_page[idx]:
3364                     titles_in_page[idx] = video_title
3365             except ValueError:
3366                 ids_in_page.append(video_id)
3367                 titles_in_page.append(video_title)
3368
3369     def extract_videos_from_page(self, page):
3370         ids_in_page = []
3371         titles_in_page = []
3372         self.extract_videos_from_page_impl(page, ids_in_page, titles_in_page)
3373         return zip(ids_in_page, titles_in_page)
3374
3375     def _real_extract(self, url):
3376         mobj = re.match(self._VALID_URL, url)
3377         query = compat_urllib_parse_unquote_plus(mobj.group('query'))
3378         webpage = self._download_webpage(url, query)
3379         return self.playlist_result(self._process_page(webpage), playlist_title=query)
3380
3381
3382 class YoutubeShowIE(YoutubePlaylistsBaseInfoExtractor):
3383     IE_DESC = 'YouTube.com (multi-season) shows'
3384     _VALID_URL = r'https?://(?:www\.)?youtube\.com/show/(?P<id>[^?#]*)'
3385     IE_NAME = 'youtube:show'
3386     _TESTS = [{
3387         'url': 'https://www.youtube.com/show/airdisasters',
3388         'playlist_mincount': 5,
3389         'info_dict': {
3390             'id': 'airdisasters',
3391             'title': 'Air Disasters',
3392         }
3393     }]
3394
3395     def _real_extract(self, url):
3396         playlist_id = self._match_id(url)
3397         return super(YoutubeShowIE, self)._real_extract(
3398             'https://www.youtube.com/show/%s/playlists' % playlist_id)
3399
3400
3401 class YoutubeFeedsInfoExtractor(YoutubeBaseInfoExtractor):
3402     """
3403     Base class for feed extractors
3404     Subclasses must define the _FEED_NAME and _PLAYLIST_TITLE properties.
3405     """
3406     _LOGIN_REQUIRED = True
3407     _FEED_DATA = r'(?:window\["ytInitialData"\]|ytInitialData)\W?=\W?({.*?});'
3408     _YTCFG_DATA = r"ytcfg.set\(({.*?})\)"
3409
3410     @property
3411     def IE_NAME(self):
3412         return 'youtube:%s' % self._FEED_NAME
3413
3414     def _real_initialize(self):
3415         self._login()
3416
3417     def _find_videos_in_json(self, extracted):
3418         videos = []
3419         c = {}
3420
3421         def _real_find(obj):
3422             if obj is None or isinstance(obj, str):
3423                 return
3424
3425             if type(obj) is list:
3426                 for elem in obj:
3427                     _real_find(elem)
3428
3429             if type(obj) is dict:
3430                 if "videoId" in obj:
3431                     videos.append(obj)
3432                     return
3433
3434                 if "nextContinuationData" in obj:
3435                     c["continuation"] = obj["nextContinuationData"]
3436                     return
3437
3438                 for _, o in obj.items():
3439                     _real_find(o)
3440
3441         _real_find(extracted)
3442
3443         return videos, try_get(c, lambda x: x["continuation"])
3444
3445     def _entries(self, page):
3446         info = []
3447
3448         yt_conf = self._parse_json(self._search_regex(self._YTCFG_DATA, page, 'ytcfg.set', default="null"), None, fatal=False)
3449
3450         search_response = self._parse_json(self._search_regex(self._FEED_DATA, page, 'ytInitialData'), None)
3451
3452         for page_num in itertools.count(1):
3453             video_info, continuation = self._find_videos_in_json(search_response)
3454
3455             new_info = []
3456
3457             for v in video_info:
3458                 v_id = try_get(v, lambda x: x['videoId'])
3459                 if not v_id:
3460                     continue
3461
3462                 have_video = False
3463                 for old in info:
3464                     if old['videoId'] == v_id:
3465                         have_video = True
3466                         break
3467
3468                 if not have_video:
3469                     new_info.append(v)
3470
3471             if not new_info:
3472                 break
3473
3474             info.extend(new_info)
3475
3476             for video in new_info:
3477                 yield self.url_result(try_get(video, lambda x: x['videoId']), YoutubeIE.ie_key(), video_title=try_get(video, lambda x: x['title']['runs'][0]['text']) or try_get(video, lambda x: x['title']['simpleText']))
3478
3479             if not continuation or not yt_conf:
3480                 break
3481
3482             search_response = self._download_json(
3483                 'https://www.youtube.com/browse_ajax', self._PLAYLIST_TITLE,
3484                 'Downloading page #%s' % page_num,
3485                 transform_source=uppercase_escape,
3486                 query={
3487                     "ctoken": try_get(continuation, lambda x: x["continuation"]),
3488                     "continuation": try_get(continuation, lambda x: x["continuation"]),
3489                     "itct": try_get(continuation, lambda x: x["clickTrackingParams"])
3490                 },
3491                 headers={
3492                     "X-YouTube-Client-Name": try_get(yt_conf, lambda x: x["INNERTUBE_CONTEXT_CLIENT_NAME"]),
3493                     "X-YouTube-Client-Version": try_get(yt_conf, lambda x: x["INNERTUBE_CONTEXT_CLIENT_VERSION"]),
3494                     "X-Youtube-Identity-Token": try_get(yt_conf, lambda x: x["ID_TOKEN"]),
3495                     "X-YouTube-Device": try_get(yt_conf, lambda x: x["DEVICE"]),
3496                     "X-YouTube-Page-CL": try_get(yt_conf, lambda x: x["PAGE_CL"]),
3497                     "X-YouTube-Page-Label": try_get(yt_conf, lambda x: x["PAGE_BUILD_LABEL"]),
3498                     "X-YouTube-Variants-Checksum": try_get(yt_conf, lambda x: x["VARIANTS_CHECKSUM"]),
3499                 })
3500
3501     def _real_extract(self, url):
3502         page = self._download_webpage(
3503             'https://www.youtube.com/feed/%s' % self._FEED_NAME,
3504             self._PLAYLIST_TITLE)
3505         return self.playlist_result(
3506             self._entries(page), playlist_title=self._PLAYLIST_TITLE)
3507
3508
3509 class YoutubeWatchLaterIE(YoutubePlaylistIE):
3510     IE_NAME = 'youtube:watchlater'
3511     IE_DESC = 'Youtube watch later list, ":ytwatchlater" for short (requires authentication)'
3512     _VALID_URL = r'https?://(?:www\.)?youtube\.com/(?:feed/watch_later|(?:playlist|watch)\?(?:.+&)?list=WL)|:ytwatchlater'
3513
3514     _TESTS = [{
3515         'url': 'https://www.youtube.com/playlist?list=WL',
3516         'only_matching': True,
3517     }, {
3518         'url': 'https://www.youtube.com/watch?v=bCNU9TrbiRk&index=1&list=WL',
3519         'only_matching': True,
3520     }]
3521
3522     def _real_extract(self, url):
3523         _, video = self._check_download_just_video(url, 'WL')
3524         if video:
3525             return video
3526         _, playlist = self._extract_playlist('WL')
3527         return playlist
3528
3529
3530 class YoutubeFavouritesIE(YoutubeBaseInfoExtractor):
3531     IE_NAME = 'youtube:favorites'
3532     IE_DESC = 'YouTube.com favourite videos, ":ytfav" for short (requires authentication)'
3533     _VALID_URL = r'https?://(?:www\.)?youtube\.com/my_favorites|:ytfav(?:ou?rites)?'
3534     _LOGIN_REQUIRED = True
3535
3536     def _real_extract(self, url):
3537         webpage = self._download_webpage('https://www.youtube.com/my_favorites', 'Youtube Favourites videos')
3538         playlist_id = self._search_regex(r'list=(.+?)["&]', webpage, 'favourites playlist id')
3539         return self.url_result(playlist_id, 'YoutubePlaylist')
3540
3541
3542 class YoutubeRecommendedIE(YoutubeFeedsInfoExtractor):
3543     IE_DESC = 'YouTube.com recommended videos, ":ytrec" for short (requires authentication)'
3544     _VALID_URL = r'https?://(?:www\.)?youtube\.com/feed/recommended|:ytrec(?:ommended)?'
3545     _FEED_NAME = 'recommended'
3546     _PLAYLIST_TITLE = 'Youtube Recommended videos'
3547
3548
3549 class YoutubeSubscriptionsIE(YoutubeFeedsInfoExtractor):
3550     IE_DESC = 'YouTube.com subscriptions feed, "ytsubs" keyword (requires authentication)'
3551     _VALID_URL = r'https?://(?:www\.)?youtube\.com/feed/subscriptions|:ytsubs(?:criptions)?'
3552     _FEED_NAME = 'subscriptions'
3553     _PLAYLIST_TITLE = 'Youtube Subscriptions'
3554
3555
3556 class YoutubeHistoryIE(YoutubeFeedsInfoExtractor):
3557     IE_DESC = 'Youtube watch history, ":ythistory" for short (requires authentication)'
3558     _VALID_URL = r'https?://(?:www\.)?youtube\.com/feed/history|:ythistory'
3559     _FEED_NAME = 'history'
3560     _PLAYLIST_TITLE = 'Youtube History'
3561
3562
3563 class YoutubeTruncatedURLIE(InfoExtractor):
3564     IE_NAME = 'youtube:truncated_url'
3565     IE_DESC = False  # Do not list
3566     _VALID_URL = r'''(?x)
3567         (?:https?://)?
3568         (?:\w+\.)?[yY][oO][uU][tT][uU][bB][eE](?:-nocookie)?\.com/
3569         (?:watch\?(?:
3570             feature=[a-z_]+|
3571             annotation_id=annotation_[^&]+|
3572             x-yt-cl=[0-9]+|
3573             hl=[^&]*|
3574             t=[0-9]+
3575         )?
3576         |
3577             attribution_link\?a=[^&]+
3578         )
3579         $
3580     '''
3581
3582     _TESTS = [{
3583         'url': 'https://www.youtube.com/watch?annotation_id=annotation_3951667041',
3584         'only_matching': True,
3585     }, {
3586         'url': 'https://www.youtube.com/watch?',
3587         'only_matching': True,
3588     }, {
3589         'url': 'https://www.youtube.com/watch?x-yt-cl=84503534',
3590         'only_matching': True,
3591     }, {
3592         'url': 'https://www.youtube.com/watch?feature=foo',
3593         'only_matching': True,
3594     }, {
3595         'url': 'https://www.youtube.com/watch?hl=en-GB',
3596         'only_matching': True,
3597     }, {
3598         'url': 'https://www.youtube.com/watch?t=2372',
3599         'only_matching': True,
3600     }]
3601
3602     def _real_extract(self, url):
3603         raise ExtractorError(
3604             'Did you forget to quote the URL? Remember that & is a meta '
3605             'character in most shells, so you want to put the URL in quotes, '
3606             'like  youtube-dl '
3607             '"https://www.youtube.com/watch?feature=foo&v=BaW_jenozKc" '
3608             ' or simply  youtube-dl BaW_jenozKc  .',
3609             expected=True)
3610
3611
3612 class YoutubeTruncatedIDIE(InfoExtractor):
3613     IE_NAME = 'youtube:truncated_id'
3614     IE_DESC = False  # Do not list
3615     _VALID_URL = r'https?://(?:www\.)?youtube\.com/watch\?v=(?P<id>[0-9A-Za-z_-]{1,10})$'
3616
3617     _TESTS = [{
3618         'url': 'https://www.youtube.com/watch?v=N_708QY7Ob',
3619         'only_matching': True,
3620     }]
3621
3622     def _real_extract(self, url):
3623         video_id = self._match_id(url)
3624         raise ExtractorError(
3625             'Incomplete YouTube ID %s. URL %s looks truncated.' % (video_id, url),
3626             expected=True)