youtube_dl/extractor/youtube.py

   1 # coding: utf-8
   2
   3 from __future__ import unicode_literals
   4
   5
   6 import itertools
   7 import json
   8 import os.path
   9 import random
  10 import re
  11 import time
  12 import traceback
  13
  14 from .common import InfoExtractor, SearchInfoExtractor
  15 from ..jsinterp import JSInterpreter
  16 from ..swfinterp import SWFInterpreter
  17 from ..compat import (
  18     compat_chr,
  19     compat_kwargs,
  20     compat_parse_qs,
  21     compat_urllib_parse_unquote,
  22     compat_urllib_parse_unquote_plus,
  23     compat_urllib_parse_urlencode,
  24     compat_urllib_parse_urlparse,
  25     compat_urlparse,
  26     compat_str,
  27 )
  28 from ..utils import (
  29     clean_html,
  30     error_to_compat_str,
  31     ExtractorError,
  32     float_or_none,
  33     get_element_by_attribute,
  34     get_element_by_id,
  35     int_or_none,
  36     mimetype2ext,
  37     orderedSet,
  38     parse_codecs,
  39     parse_duration,
  40     qualities,
  41     remove_quotes,
  42     remove_start,
  43     smuggle_url,
  44     str_to_int,
  45     try_get,
  46     unescapeHTML,
  47     unified_strdate,
  48     unsmuggle_url,
  49     uppercase_escape,
  50     urlencode_postdata,
  51 )
  52
  53
  54 class YoutubeBaseInfoExtractor(InfoExtractor):
  55     """Provide base functions for Youtube extractors"""
  56     _LOGIN_URL = 'https://accounts.google.com/ServiceLogin'
  57     _TWOFACTOR_URL = 'https://accounts.google.com/signin/challenge'
  58
  59     _LOOKUP_URL = 'https://accounts.google.com/_/signin/sl/lookup'
  60     _CHALLENGE_URL = 'https://accounts.google.com/_/signin/sl/challenge'
  61     _TFA_URL = 'https://accounts.google.com/_/signin/challenge?hl=en&TL={0}'
  62
  63     _NETRC_MACHINE = 'youtube'
  64     # If True it will raise an error if no login info is provided
  65     _LOGIN_REQUIRED = False
  66
  67     _PLAYLIST_ID_RE = r'(?:PL|LL|EC|UU|FL|RD|UL|TL|OLAK5uy_)[0-9A-Za-z-_]{10,}'
  68
  69     def _set_language(self):
  70         self._set_cookie(
  71             '.youtube.com', 'PREF', 'f1=50000000&hl=en',
  72             # YouTube sets the expire time to about two months
  73             expire_time=time.time() + 2 * 30 * 24 * 3600)
  74
  75     def _ids_to_results(self, ids):
  76         return [
  77             self.url_result(vid_id, 'Youtube', video_id=vid_id)
  78             for vid_id in ids]
  79
  80     def _login(self):
  81         """
  82         Attempt to log in to YouTube.
  83         True is returned if successful or skipped.
  84         False is returned if login failed.
  85
  86         If _LOGIN_REQUIRED is set and no authentication was provided, an error is raised.
  87         """
  88         username, password = self._get_login_info()
  89         # No authentication to be performed
  90         if username is None:
  91             if self._LOGIN_REQUIRED and self._downloader.params.get('cookiefile') is None:
  92                 raise ExtractorError('No login info available, needed for using %s.' % self.IE_NAME, expected=True)
  93             return True
  94
  95         login_page = self._download_webpage(
  96             self._LOGIN_URL, None,
  97             note='Downloading login page',
  98             errnote='unable to fetch login page', fatal=False)
  99         if login_page is False:
 100             return
 101
 102         login_form = self._hidden_inputs(login_page)
 103
 104         def req(url, f_req, note, errnote):
 105             data = login_form.copy()
 106             data.update({
 107                 'pstMsg': 1,
 108                 'checkConnection': 'youtube',
 109                 'checkedDomains': 'youtube',
 110                 'hl': 'en',
 111                 'deviceinfo': '[null,null,null,[],null,"US",null,null,[],"GlifWebSignIn",null,[null,null,[]]]',
 112                 'f.req': json.dumps(f_req),
 113                 'flowName': 'GlifWebSignIn',
 114                 'flowEntry': 'ServiceLogin',
 115             })
 116             return self._download_json(
 117                 url, None, note=note, errnote=errnote,
 118                 transform_source=lambda s: re.sub(r'^[^[]*', '', s),
 119                 fatal=False,
 120                 data=urlencode_postdata(data), headers={
 121                     'Content-Type': 'application/x-www-form-urlencoded;charset=utf-8',
 122                     'Google-Accounts-XSRF': 1,
 123                 })
 124
 125         def warn(message):
 126             self._downloader.report_warning(message)
 127
 128         lookup_req = [
 129             username,
 130             None, [], None, 'US', None, None, 2, False, True,
 131             [
 132                 None, None,
 133                 [2, 1, None, 1,
 134                  'https://accounts.google.com/ServiceLogin?passive=true&continue=https%3A%2F%2Fwww.youtube.com%2Fsignin%3Fnext%3D%252F%26action_handle_signin%3Dtrue%26hl%3Den%26app%3Ddesktop%26feature%3Dsign_in_button&hl=en&service=youtube&uilel=3&requestPath=%2FServiceLogin&Page=PasswordSeparationSignIn',
 135                  None, [], 4],
 136                 1, [None, None, []], None, None, None, True
 137             ],
 138             username,
 139         ]
 140
 141         lookup_results = req(
 142             self._LOOKUP_URL, lookup_req,
 143             'Looking up account info', 'Unable to look up account info')
 144
 145         if lookup_results is False:
 146             return False
 147
 148         user_hash = try_get(lookup_results, lambda x: x[0][2], compat_str)
 149         if not user_hash:
 150             warn('Unable to extract user hash')
 151             return False
 152
 153         challenge_req = [
 154             user_hash,
 155             None, 1, None, [1, None, None, None, [password, None, True]],
 156             [
 157                 None, None, [2, 1, None, 1, 'https://accounts.google.com/ServiceLogin?passive=true&continue=https%3A%2F%2Fwww.youtube.com%2Fsignin%3Fnext%3D%252F%26action_handle_signin%3Dtrue%26hl%3Den%26app%3Ddesktop%26feature%3Dsign_in_button&hl=en&service=youtube&uilel=3&requestPath=%2FServiceLogin&Page=PasswordSeparationSignIn', None, [], 4],
 158                 1, [None, None, []], None, None, None, True
 159             ]]
 160
 161         challenge_results = req(
 162             self._CHALLENGE_URL, challenge_req,
 163             'Logging in', 'Unable to log in')
 164
 165         if challenge_results is False:
 166             return
 167
 168         login_res = try_get(challenge_results, lambda x: x[0][5], list)
 169         if login_res:
 170             login_msg = try_get(login_res, lambda x: x[5], compat_str)
 171             warn(
 172                 'Unable to login: %s' % 'Invalid password'
 173                 if login_msg == 'INCORRECT_ANSWER_ENTERED' else login_msg)
 174             return False
 175
 176         res = try_get(challenge_results, lambda x: x[0][-1], list)
 177         if not res:
 178             warn('Unable to extract result entry')
 179             return False
 180
 181         login_challenge = try_get(res, lambda x: x[0][0], list)
 182         if login_challenge:
 183             challenge_str = try_get(login_challenge, lambda x: x[2], compat_str)
 184             if challenge_str == 'TWO_STEP_VERIFICATION':
 185                 # SEND_SUCCESS - TFA code has been successfully sent to phone
 186                 # QUOTA_EXCEEDED - reached the limit of TFA codes
 187                 status = try_get(login_challenge, lambda x: x[5], compat_str)
 188                 if status == 'QUOTA_EXCEEDED':
 189                     warn('Exceeded the limit of TFA codes, try later')
 190                     return False
 191
 192                 tl = try_get(challenge_results, lambda x: x[1][2], compat_str)
 193                 if not tl:
 194                     warn('Unable to extract TL')
 195                     return False
 196
 197                 tfa_code = self._get_tfa_info('2-step verification code')
 198
 199                 if not tfa_code:
 200                     warn(
 201                         'Two-factor authentication required. Provide it either interactively or with --twofactor <code>'
 202                         '(Note that only TOTP (Google Authenticator App) codes work at this time.)')
 203                     return False
 204
 205                 tfa_code = remove_start(tfa_code, 'G-')
 206
 207                 tfa_req = [
 208                     user_hash, None, 2, None,
 209                     [
 210                         9, None, None, None, None, None, None, None,
 211                         [None, tfa_code, True, 2]
 212                     ]]
 213
 214                 tfa_results = req(
 215                     self._TFA_URL.format(tl), tfa_req,
 216                     'Submitting TFA code', 'Unable to submit TFA code')
 217
 218                 if tfa_results is False:
 219                     return False
 220
 221                 tfa_res = try_get(tfa_results, lambda x: x[0][5], list)
 222                 if tfa_res:
 223                     tfa_msg = try_get(tfa_res, lambda x: x[5], compat_str)
 224                     warn(
 225                         'Unable to finish TFA: %s' % 'Invalid TFA code'
 226                         if tfa_msg == 'INCORRECT_ANSWER_ENTERED' else tfa_msg)
 227                     return False
 228
 229                 check_cookie_url = try_get(
 230                     tfa_results, lambda x: x[0][-1][2], compat_str)
 231             else:
 232                 CHALLENGES = {
 233                     'LOGIN_CHALLENGE': "This device isn't recognized. For your security, Google wants to make sure it's really you.",
 234                     'USERNAME_RECOVERY': 'Please provide additional information to aid in the recovery process.',
 235                     'REAUTH': "There is something unusual about your activity. For your security, Google wants to make sure it's really you.",
 236                 }
 237                 challenge = CHALLENGES.get(
 238                     challenge_str,
 239                     '%s returned error %s.' % (self.IE_NAME, challenge_str))
 240                 warn('%s\nGo to https://accounts.google.com/, login and solve a challenge.' % challenge)
 241                 return False
 242         else:
 243             check_cookie_url = try_get(res, lambda x: x[2], compat_str)
 244
 245         if not check_cookie_url:
 246             warn('Unable to extract CheckCookie URL')
 247             return False
 248
 249         check_cookie_results = self._download_webpage(
 250             check_cookie_url, None, 'Checking cookie', fatal=False)
 251
 252         if check_cookie_results is False:
 253             return False
 254
 255         if 'https://myaccount.google.com/' not in check_cookie_results:
 256             warn('Unable to log in')
 257             return False
 258
 259         return True
 260
 261     def _download_webpage_handle(self, *args, **kwargs):
 262         query = kwargs.get('query', {}).copy()
 263         query['disable_polymer'] = 'true'
 264         kwargs['query'] = query
 265         return super(YoutubeBaseInfoExtractor, self)._download_webpage_handle(
 266             *args, **compat_kwargs(kwargs))
 267
 268     def _real_initialize(self):
 269         if self._downloader is None:
 270             return
 271         self._set_language()
 272         if not self._login():
 273             return
 274
 275
 276 class YoutubeEntryListBaseInfoExtractor(YoutubeBaseInfoExtractor):
 277     # Extract entries from page with "Load more" button
 278     def _entries(self, page, playlist_id):
 279         more_widget_html = content_html = page
 280         for page_num in itertools.count(1):
 281             for entry in self._process_page(content_html):
 282                 yield entry
 283
 284             mobj = re.search(r'data-uix-load-more-href="/?(?P<more>[^"]+)"', more_widget_html)
 285             if not mobj:
 286                 break
 287
 288             more = self._download_json(
 289                 'https://youtube.com/%s' % mobj.group('more'), playlist_id,
 290                 'Downloading page #%s' % page_num,
 291                 transform_source=uppercase_escape)
 292             content_html = more['content_html']
 293             if not content_html.strip():
 294                 # Some webpages show a "Load more" button but they don't
 295                 # have more videos
 296                 break
 297             more_widget_html = more['load_more_widget_html']
 298
 299
 300 class YoutubePlaylistBaseInfoExtractor(YoutubeEntryListBaseInfoExtractor):
 301     def _process_page(self, content):
 302         for video_id, video_title in self.extract_videos_from_page(content):
 303             yield self.url_result(video_id, 'Youtube', video_id, video_title)
 304
 305     def extract_videos_from_page(self, page):
 306         ids_in_page = []
 307         titles_in_page = []
 308         for mobj in re.finditer(self._VIDEO_RE, page):
 309             # The link with index 0 is not the first video of the playlist (not sure if still actual)
 310             if 'index' in mobj.groupdict() and mobj.group('id') == '0':
 311                 continue
 312             video_id = mobj.group('id')
 313             video_title = unescapeHTML(mobj.group('title'))
 314             if video_title:
 315                 video_title = video_title.strip()
 316             try:
 317                 idx = ids_in_page.index(video_id)
 318                 if video_title and not titles_in_page[idx]:
 319                     titles_in_page[idx] = video_title
 320             except ValueError:
 321                 ids_in_page.append(video_id)
 322                 titles_in_page.append(video_title)
 323         return zip(ids_in_page, titles_in_page)
 324
 325
 326 class YoutubePlaylistsBaseInfoExtractor(YoutubeEntryListBaseInfoExtractor):
 327     def _process_page(self, content):
 328         for playlist_id in orderedSet(re.findall(
 329                 r'<h3[^>]+class="[^"]*yt-lockup-title[^"]*"[^>]*><a[^>]+href="/?playlist\?list=([0-9A-Za-z-_]{10,})"',
 330                 content)):
 331             yield self.url_result(
 332                 'https://www.youtube.com/playlist?list=%s' % playlist_id, 'YoutubePlaylist')
 333
 334     def _real_extract(self, url):
 335         playlist_id = self._match_id(url)
 336         webpage = self._download_webpage(url, playlist_id)
 337         title = self._og_search_title(webpage, fatal=False)
 338         return self.playlist_result(self._entries(webpage, playlist_id), playlist_id, title)
 339
 340
 341 class YoutubeIE(YoutubeBaseInfoExtractor):
 342     IE_DESC = 'YouTube.com'
 343     _VALID_URL = r"""(?x)^
 344                      (
 345                          (?:https?://|//)                                    # http(s):// or protocol-independent URL
 346                          (?:(?:(?:(?:\w+\.)?[yY][oO][uU][tT][uU][bB][eE](?:-nocookie)?\.com/|
 347                             (?:www\.)?deturl\.com/www\.youtube\.com/|
 348                             (?:www\.)?pwnyoutube\.com/|
 349                             (?:www\.)?hooktube\.com/|
 350                             (?:www\.)?yourepeat\.com/|
 351                             tube\.majestyc\.net/|
 352                             (?:www\.)?invidio\.us/|
 353                             youtube\.googleapis\.com/)                        # the various hostnames, with wildcard subdomains
 354                          (?:.*?\#/)?                                          # handle anchor (#/) redirect urls
 355                          (?:                                                  # the various things that can precede the ID:
 356                              (?:(?:v|embed|e)/(?!videoseries))                # v/ or embed/ or e/
 357                              |(?:                                             # or the v= param in all its forms
 358                                  (?:(?:watch|movie)(?:_popup)?(?:\.php)?/?)?  # preceding watch(_popup|.php) or nothing (like /?v=xxxx)
 359                                  (?:\?|\#!?)                                  # the params delimiter ? or # or #!
 360                                  (?:.*?[&;])??                                # any other preceding param (like /?s=tuff&v=xxxx or ?s=tuff&amp;v=V36LpHqtcDY)
 361                                  v=
 362                              )
 363                          ))
 364                          |(?:
 365                             youtu\.be|                                        # just youtu.be/xxxx
 366                             vid\.plus|                                        # or vid.plus/xxxx
 367                             zwearz\.com/watch|                                # or zwearz.com/watch/xxxx
 368                          )/
 369                          |(?:www\.)?cleanvideosearch\.com/media/action/yt/watch\?videoId=
 370                          )
 371                      )?                                                       # all until now is optional -> you can pass the naked ID
 372                      ([0-9A-Za-z_-]{11})                                      # here is it! the YouTube video ID
 373                      (?!.*?\blist=
 374                         (?:
 375                             %(playlist_id)s|                                  # combined list/video URLs are handled by the playlist IE
 376                             WL                                                # WL are handled by the watch later IE
 377                         )
 378                      )
 379                      (?(1).+)?                                                # if we found the ID, everything can follow
 380                      $""" % {'playlist_id': YoutubeBaseInfoExtractor._PLAYLIST_ID_RE}
 381     _NEXT_URL_RE = r'[\?&]next_url=([^&]+)'
 382     _formats = {
 383         '5': {'ext': 'flv', 'width': 400, 'height': 240, 'acodec': 'mp3', 'abr': 64, 'vcodec': 'h263'},
 384         '6': {'ext': 'flv', 'width': 450, 'height': 270, 'acodec': 'mp3', 'abr': 64, 'vcodec': 'h263'},
 385         '13': {'ext': '3gp', 'acodec': 'aac', 'vcodec': 'mp4v'},
 386         '17': {'ext': '3gp', 'width': 176, 'height': 144, 'acodec': 'aac', 'abr': 24, 'vcodec': 'mp4v'},
 387         '18': {'ext': 'mp4', 'width': 640, 'height': 360, 'acodec': 'aac', 'abr': 96, 'vcodec': 'h264'},
 388         '22': {'ext': 'mp4', 'width': 1280, 'height': 720, 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264'},
 389         '34': {'ext': 'flv', 'width': 640, 'height': 360, 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264'},
 390         '35': {'ext': 'flv', 'width': 854, 'height': 480, 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264'},
 391         # itag 36 videos are either 320x180 (BaW_jenozKc) or 320x240 (__2ABJjxzNo), abr varies as well
 392         '36': {'ext': '3gp', 'width': 320, 'acodec': 'aac', 'vcodec': 'mp4v'},
 393         '37': {'ext': 'mp4', 'width': 1920, 'height': 1080, 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264'},
 394         '38': {'ext': 'mp4', 'width': 4096, 'height': 3072, 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264'},
 395         '43': {'ext': 'webm', 'width': 640, 'height': 360, 'acodec': 'vorbis', 'abr': 128, 'vcodec': 'vp8'},
 396         '44': {'ext': 'webm', 'width': 854, 'height': 480, 'acodec': 'vorbis', 'abr': 128, 'vcodec': 'vp8'},
 397         '45': {'ext': 'webm', 'width': 1280, 'height': 720, 'acodec': 'vorbis', 'abr': 192, 'vcodec': 'vp8'},
 398         '46': {'ext': 'webm', 'width': 1920, 'height': 1080, 'acodec': 'vorbis', 'abr': 192, 'vcodec': 'vp8'},
 399         '59': {'ext': 'mp4', 'width': 854, 'height': 480, 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264'},
 400         '78': {'ext': 'mp4', 'width': 854, 'height': 480, 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264'},
 401
 402
 403         # 3D videos
 404         '82': {'ext': 'mp4', 'height': 360, 'format_note': '3D', 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264', 'preference': -20},
 405         '83': {'ext': 'mp4', 'height': 480, 'format_note': '3D', 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264', 'preference': -20},
 406         '84': {'ext': 'mp4', 'height': 720, 'format_note': '3D', 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264', 'preference': -20},
 407         '85': {'ext': 'mp4', 'height': 1080, 'format_note': '3D', 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264', 'preference': -20},
 408         '100': {'ext': 'webm', 'height': 360, 'format_note': '3D', 'acodec': 'vorbis', 'abr': 128, 'vcodec': 'vp8', 'preference': -20},
 409         '101': {'ext': 'webm', 'height': 480, 'format_note': '3D', 'acodec': 'vorbis', 'abr': 192, 'vcodec': 'vp8', 'preference': -20},
 410         '102': {'ext': 'webm', 'height': 720, 'format_note': '3D', 'acodec': 'vorbis', 'abr': 192, 'vcodec': 'vp8', 'preference': -20},
 411
 412         # Apple HTTP Live Streaming
 413         '91': {'ext': 'mp4', 'height': 144, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 48, 'vcodec': 'h264', 'preference': -10},
 414         '92': {'ext': 'mp4', 'height': 240, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 48, 'vcodec': 'h264', 'preference': -10},
 415         '93': {'ext': 'mp4', 'height': 360, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264', 'preference': -10},
 416         '94': {'ext': 'mp4', 'height': 480, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264', 'preference': -10},
 417         '95': {'ext': 'mp4', 'height': 720, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 256, 'vcodec': 'h264', 'preference': -10},
 418         '96': {'ext': 'mp4', 'height': 1080, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 256, 'vcodec': 'h264', 'preference': -10},
 419         '132': {'ext': 'mp4', 'height': 240, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 48, 'vcodec': 'h264', 'preference': -10},
 420         '151': {'ext': 'mp4', 'height': 72, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 24, 'vcodec': 'h264', 'preference': -10},
 421
 422         # DASH mp4 video
 423         '133': {'ext': 'mp4', 'height': 240, 'format_note': 'DASH video', 'vcodec': 'h264'},
 424         '134': {'ext': 'mp4', 'height': 360, 'format_note': 'DASH video', 'vcodec': 'h264'},
 425         '135': {'ext': 'mp4', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'h264'},
 426         '136': {'ext': 'mp4', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'h264'},
 427         '137': {'ext': 'mp4', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'h264'},
 428         '138': {'ext': 'mp4', 'format_note': 'DASH video', 'vcodec': 'h264'},  # Height can vary (https://github.com/rg3/youtube-dl/issues/4559)
 429         '160': {'ext': 'mp4', 'height': 144, 'format_note': 'DASH video', 'vcodec': 'h264'},
 430         '212': {'ext': 'mp4', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'h264'},
 431         '264': {'ext': 'mp4', 'height': 1440, 'format_note': 'DASH video', 'vcodec': 'h264'},
 432         '298': {'ext': 'mp4', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'h264', 'fps': 60},
 433         '299': {'ext': 'mp4', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'h264', 'fps': 60},
 434         '266': {'ext': 'mp4', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'h264'},
 435
 436         # Dash mp4 audio
 437         '139': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'abr': 48, 'container': 'm4a_dash'},
 438         '140': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'abr': 128, 'container': 'm4a_dash'},
 439         '141': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'abr': 256, 'container': 'm4a_dash'},
 440         '256': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'container': 'm4a_dash'},
 441         '258': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'container': 'm4a_dash'},
 442         '325': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'dtse', 'container': 'm4a_dash'},
 443         '328': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'ec-3', 'container': 'm4a_dash'},
 444
 445         # Dash webm
 446         '167': {'ext': 'webm', 'height': 360, 'width': 640, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
 447         '168': {'ext': 'webm', 'height': 480, 'width': 854, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
 448         '169': {'ext': 'webm', 'height': 720, 'width': 1280, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
 449         '170': {'ext': 'webm', 'height': 1080, 'width': 1920, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
 450         '218': {'ext': 'webm', 'height': 480, 'width': 854, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
 451         '219': {'ext': 'webm', 'height': 480, 'width': 854, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
 452         '278': {'ext': 'webm', 'height': 144, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp9'},
 453         '242': {'ext': 'webm', 'height': 240, 'format_note': 'DASH video', 'vcodec': 'vp9'},
 454         '243': {'ext': 'webm', 'height': 360, 'format_note': 'DASH video', 'vcodec': 'vp9'},
 455         '244': {'ext': 'webm', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'vp9'},
 456         '245': {'ext': 'webm', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'vp9'},
 457         '246': {'ext': 'webm', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'vp9'},
 458         '247': {'ext': 'webm', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'vp9'},
 459         '248': {'ext': 'webm', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'vp9'},
 460         '271': {'ext': 'webm', 'height': 1440, 'format_note': 'DASH video', 'vcodec': 'vp9'},
 461         # itag 272 videos are either 3840x2160 (e.g. RtoitU2A-3E) or 7680x4320 (sLprVF6d7Ug)
 462         '272': {'ext': 'webm', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'vp9'},
 463         '302': {'ext': 'webm', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'vp9', 'fps': 60},
 464         '303': {'ext': 'webm', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'vp9', 'fps': 60},
 465         '308': {'ext': 'webm', 'height': 1440, 'format_note': 'DASH video', 'vcodec': 'vp9', 'fps': 60},
 466         '313': {'ext': 'webm', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'vp9'},
 467         '315': {'ext': 'webm', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'vp9', 'fps': 60},
 468
 469         # Dash webm audio
 470         '171': {'ext': 'webm', 'acodec': 'vorbis', 'format_note': 'DASH audio', 'abr': 128},
 471         '172': {'ext': 'webm', 'acodec': 'vorbis', 'format_note': 'DASH audio', 'abr': 256},
 472
 473         # Dash webm audio with opus inside
 474         '249': {'ext': 'webm', 'format_note': 'DASH audio', 'acodec': 'opus', 'abr': 50},
 475         '250': {'ext': 'webm', 'format_note': 'DASH audio', 'acodec': 'opus', 'abr': 70},
 476         '251': {'ext': 'webm', 'format_note': 'DASH audio', 'acodec': 'opus', 'abr': 160},
 477
 478         # RTMP (unnamed)
 479         '_rtmp': {'protocol': 'rtmp'},
 480     }
 481     _SUBTITLE_FORMATS = ('ttml', 'vtt')
 482
 483     _GEO_BYPASS = False
 484
 485     IE_NAME = 'youtube'
 486     _TESTS = [
 487         {
 488             'url': 'https://www.youtube.com/watch?v=BaW_jenozKc&t=1s&end=9',
 489             'info_dict': {
 490                 'id': 'BaW_jenozKc',
 491                 'ext': 'mp4',
 492                 'title': 'youtube-dl test video "\'/\\ä↭𝕐',
 493                 'uploader': 'Philipp Hagemeister',
 494                 'uploader_id': 'phihag',
 495                 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/phihag',
 496                 'channel_id': 'UCLqxVugv74EIW3VWh2NOa3Q',
 497                 'channel_url': r're:https?://(?:www\.)?youtube\.com/channel/UCLqxVugv74EIW3VWh2NOa3Q',
 498                 'upload_date': '20121002',
 499                 'license': 'Standard YouTube License',
 500                 'description': 'test chars:  "\'/\\ä↭𝕐\ntest URL: https://github.com/rg3/youtube-dl/issues/1892\n\nThis is a test video for youtube-dl.\n\nFor more information, contact phihag@phihag.de .',
 501                 'categories': ['Science & Technology'],
 502                 'tags': ['youtube-dl'],
 503                 'duration': 10,
 504                 'like_count': int,
 505                 'dislike_count': int,
 506                 'start_time': 1,
 507                 'end_time': 9,
 508             }
 509         },
 510         {
 511             'url': 'https://www.youtube.com/watch?v=UxxajLWwzqY',
 512             'note': 'Test generic use_cipher_signature video (#897)',
 513             'info_dict': {
 514                 'id': 'UxxajLWwzqY',
 515                 'ext': 'mp4',
 516                 'upload_date': '20120506',
 517                 'title': 'Icona Pop - I Love It (feat. Charli XCX) [OFFICIAL VIDEO]',
 518                 'alt_title': 'I Love It (feat. Charli XCX)',
 519                 'description': 'md5:f3ceb5ef83a08d95b9d146f973157cc8',
 520                 'tags': ['Icona Pop i love it', 'sweden', 'pop music', 'big beat records', 'big beat', 'charli',
 521                          'xcx', 'charli xcx', 'girls', 'hbo', 'i love it', "i don't care", 'icona', 'pop',
 522                          'iconic ep', 'iconic', 'love', 'it'],
 523                 'duration': 180,
 524                 'uploader': 'Icona Pop',
 525                 'uploader_id': 'IconaPop',
 526                 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/IconaPop',
 527                 'license': 'Standard YouTube License',
 528                 'creator': 'Icona Pop',
 529                 'track': 'I Love It (feat. Charli XCX)',
 530                 'artist': 'Icona Pop',
 531             }
 532         },
 533         {
 534             'url': 'https://www.youtube.com/watch?v=07FYdnEawAQ',
 535             'note': 'Test VEVO video with age protection (#956)',
 536             'info_dict': {
 537                 'id': '07FYdnEawAQ',
 538                 'ext': 'mp4',
 539                 'upload_date': '20130703',
 540                 'title': 'Justin Timberlake - Tunnel Vision (Explicit)',
 541                 'alt_title': 'Tunnel Vision',
 542                 'description': 'md5:64249768eec3bc4276236606ea996373',
 543                 'duration': 419,
 544                 'uploader': 'justintimberlakeVEVO',
 545                 'uploader_id': 'justintimberlakeVEVO',
 546                 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/justintimberlakeVEVO',
 547                 'license': 'Standard YouTube License',
 548                 'creator': 'Justin Timberlake',
 549                 'track': 'Tunnel Vision',
 550                 'artist': 'Justin Timberlake',
 551                 'age_limit': 18,
 552             }
 553         },
 554         {
 555             'url': '//www.YouTube.com/watch?v=yZIXLfi8CZQ',
 556             'note': 'Embed-only video (#1746)',
 557             'info_dict': {
 558                 'id': 'yZIXLfi8CZQ',
 559                 'ext': 'mp4',
 560                 'upload_date': '20120608',
 561                 'title': 'Principal Sexually Assaults A Teacher - Episode 117 - 8th June 2012',
 562                 'description': 'md5:09b78bd971f1e3e289601dfba15ca4f7',
 563                 'uploader': 'SET India',
 564                 'uploader_id': 'setindia',
 565                 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/setindia',
 566                 'license': 'Standard YouTube License',
 567                 'age_limit': 18,
 568             }
 569         },
 570         {
 571             'url': 'https://www.youtube.com/watch?v=BaW_jenozKc&v=UxxajLWwzqY',
 572             'note': 'Use the first video ID in the URL',
 573             'info_dict': {
 574                 'id': 'BaW_jenozKc',
 575                 'ext': 'mp4',
 576                 'title': 'youtube-dl test video "\'/\\ä↭𝕐',
 577                 'uploader': 'Philipp Hagemeister',
 578                 'uploader_id': 'phihag',
 579                 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/phihag',
 580                 'upload_date': '20121002',
 581                 'license': 'Standard YouTube License',
 582                 'description': 'test chars:  "\'/\\ä↭𝕐\ntest URL: https://github.com/rg3/youtube-dl/issues/1892\n\nThis is a test video for youtube-dl.\n\nFor more information, contact phihag@phihag.de .',
 583                 'categories': ['Science & Technology'],
 584                 'tags': ['youtube-dl'],
 585                 'duration': 10,
 586                 'like_count': int,
 587                 'dislike_count': int,
 588             },
 589             'params': {
 590                 'skip_download': True,
 591             },
 592         },
 593         {
 594             'url': 'https://www.youtube.com/watch?v=a9LDPn-MO4I',
 595             'note': '256k DASH audio (format 141) via DASH manifest',
 596             'info_dict': {
 597                 'id': 'a9LDPn-MO4I',
 598                 'ext': 'm4a',
 599                 'upload_date': '20121002',
 600                 'uploader_id': '8KVIDEO',
 601                 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/8KVIDEO',
 602                 'description': '',
 603                 'uploader': '8KVIDEO',
 604                 'license': 'Standard YouTube License',
 605                 'title': 'UHDTV TEST 8K VIDEO.mp4'
 606             },
 607             'params': {
 608                 'youtube_include_dash_manifest': True,
 609                 'format': '141',
 610             },
 611             'skip': 'format 141 not served anymore',
 612         },
 613         # DASH manifest with encrypted signature
 614         {
 615             'url': 'https://www.youtube.com/watch?v=IB3lcPjvWLA',
 616             'info_dict': {
 617                 'id': 'IB3lcPjvWLA',
 618                 'ext': 'm4a',
 619                 'title': 'Afrojack, Spree Wilson - The Spark ft. Spree Wilson',
 620                 'description': 'md5:1900ed86ee514927b9e00fbead6969a5',
 621                 'duration': 244,
 622                 'uploader': 'AfrojackVEVO',
 623                 'uploader_id': 'AfrojackVEVO',
 624                 'upload_date': '20131011',
 625                 'license': 'Standard YouTube License',
 626             },
 627             'params': {
 628                 'youtube_include_dash_manifest': True,
 629                 'format': '141/bestaudio[ext=m4a]',
 630             },
 631         },
 632         # JS player signature function name containing $
 633         {
 634             'url': 'https://www.youtube.com/watch?v=nfWlot6h_JM',
 635             'info_dict': {
 636                 'id': 'nfWlot6h_JM',
 637                 'ext': 'm4a',
 638                 'title': 'Taylor Swift - Shake It Off',
 639                 'alt_title': 'Shake It Off',
 640                 'description': 'md5:95f66187cd7c8b2c13eb78e1223b63c3',
 641                 'duration': 242,
 642                 'uploader': 'TaylorSwiftVEVO',
 643                 'uploader_id': 'TaylorSwiftVEVO',
 644                 'upload_date': '20140818',
 645                 'license': 'Standard YouTube License',
 646                 'creator': 'Taylor Swift',
 647             },
 648             'params': {
 649                 'youtube_include_dash_manifest': True,
 650                 'format': '141/bestaudio[ext=m4a]',
 651             },
 652         },
 653         # Controversy video
 654         {
 655             'url': 'https://www.youtube.com/watch?v=T4XJQO3qol8',
 656             'info_dict': {
 657                 'id': 'T4XJQO3qol8',
 658                 'ext': 'mp4',
 659                 'duration': 219,
 660                 'upload_date': '20100909',
 661                 'uploader': 'TJ Kirk',
 662                 'uploader_id': 'TheAmazingAtheist',
 663                 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/TheAmazingAtheist',
 664                 'license': 'Standard YouTube License',
 665                 'title': 'Burning Everyone\'s Koran',
 666                 'description': 'SUBSCRIBE: http://www.youtube.com/saturninefilms\n\nEven Obama has taken a stand against freedom on this issue: http://www.huffingtonpost.com/2010/09/09/obama-gma-interview-quran_n_710282.html',
 667             }
 668         },
 669         # Normal age-gate video (No vevo, embed allowed)
 670         {
 671             'url': 'https://youtube.com/watch?v=HtVdAasjOgU',
 672             'info_dict': {
 673                 'id': 'HtVdAasjOgU',
 674                 'ext': 'mp4',
 675                 'title': 'The Witcher 3: Wild Hunt - The Sword Of Destiny Trailer',
 676                 'description': r're:(?s).{100,}About the Game\n.*?The Witcher 3: Wild Hunt.{100,}',
 677                 'duration': 142,
 678                 'uploader': 'The Witcher',
 679                 'uploader_id': 'WitcherGame',
 680                 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/WitcherGame',
 681                 'upload_date': '20140605',
 682                 'license': 'Standard YouTube License',
 683                 'age_limit': 18,
 684             },
 685         },
 686         # Age-gate video with encrypted signature
 687         {
 688             'url': 'https://www.youtube.com/watch?v=6kLq3WMV1nU',
 689             'info_dict': {
 690                 'id': '6kLq3WMV1nU',
 691                 'ext': 'webm',
 692                 'title': 'Dedication To My Ex (Miss That) (Lyric Video)',
 693                 'description': 'md5:33765bb339e1b47e7e72b5490139bb41',
 694                 'duration': 246,
 695                 'uploader': 'LloydVEVO',
 696                 'uploader_id': 'LloydVEVO',
 697                 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/LloydVEVO',
 698                 'upload_date': '20110629',
 699                 'license': 'Standard YouTube License',
 700                 'age_limit': 18,
 701             },
 702         },
 703         # video_info is None (https://github.com/rg3/youtube-dl/issues/4421)
 704         # YouTube Red ad is not captured for creator
 705         {
 706             'url': '__2ABJjxzNo',
 707             'info_dict': {
 708                 'id': '__2ABJjxzNo',
 709                 'ext': 'mp4',
 710                 'duration': 266,
 711                 'upload_date': '20100430',
 712                 'uploader_id': 'deadmau5',
 713                 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/deadmau5',
 714                 'creator': 'deadmau5',
 715                 'description': 'md5:12c56784b8032162bb936a5f76d55360',
 716                 'uploader': 'deadmau5',
 717                 'license': 'Standard YouTube License',
 718                 'title': 'Deadmau5 - Some Chords (HD)',
 719                 'alt_title': 'Some Chords',
 720             },
 721             'expected_warnings': [
 722                 'DASH manifest missing',
 723             ]
 724         },
 725         # Olympics (https://github.com/rg3/youtube-dl/issues/4431)
 726         {
 727             'url': 'lqQg6PlCWgI',
 728             'info_dict': {
 729                 'id': 'lqQg6PlCWgI',
 730                 'ext': 'mp4',
 731                 'duration': 6085,
 732                 'upload_date': '20150827',
 733                 'uploader_id': 'olympic',
 734                 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/olympic',
 735                 'license': 'Standard YouTube License',
 736                 'description': 'HO09  - Women -  GER-AUS - Hockey - 31 July 2012 - London 2012 Olympic Games',
 737                 'uploader': 'Olympic',
 738                 'title': 'Hockey - Women -  GER-AUS - London 2012 Olympic Games',
 739             },
 740             'params': {
 741                 'skip_download': 'requires avconv',
 742             }
 743         },
 744         # Non-square pixels
 745         {
 746             'url': 'https://www.youtube.com/watch?v=_b-2C3KPAM0',
 747             'info_dict': {
 748                 'id': '_b-2C3KPAM0',
 749                 'ext': 'mp4',
 750                 'stretched_ratio': 16 / 9.,
 751                 'duration': 85,
 752                 'upload_date': '20110310',
 753                 'uploader_id': 'AllenMeow',
 754                 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/AllenMeow',
 755                 'description': 'made by Wacom from Korea | 字幕&加油添醋 by TY\'s Allen | 感謝heylisa00cavey1001同學熱情提供梗及翻譯',
 756                 'uploader': '孫ᄋᄅ',
 757                 'license': 'Standard YouTube License',
 758                 'title': '[A-made] 變態妍字幕版 太妍 我就是這樣的人',
 759             },
 760         },
 761         # url_encoded_fmt_stream_map is empty string
 762         {
 763             'url': 'qEJwOuvDf7I',
 764             'info_dict': {
 765                 'id': 'qEJwOuvDf7I',
 766                 'ext': 'webm',
 767                 'title': 'Обсуждение судебной практики по выборам 14 сентября 2014 года в Санкт-Петербурге',
 768                 'description': '',
 769                 'upload_date': '20150404',
 770                 'uploader_id': 'spbelect',
 771                 'uploader': 'Наблюдатели Петербурга',
 772             },
 773             'params': {
 774                 'skip_download': 'requires avconv',
 775             },
 776             'skip': 'This live event has ended.',
 777         },
 778         # Extraction from multiple DASH manifests (https://github.com/rg3/youtube-dl/pull/6097)
 779         {
 780             'url': 'https://www.youtube.com/watch?v=FIl7x6_3R5Y',
 781             'info_dict': {
 782                 'id': 'FIl7x6_3R5Y',
 783                 'ext': 'webm',
 784                 'title': 'md5:7b81415841e02ecd4313668cde88737a',
 785                 'description': 'md5:116377fd2963b81ec4ce64b542173306',
 786                 'duration': 220,
 787                 'upload_date': '20150625',
 788                 'uploader_id': 'dorappi2000',
 789                 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/dorappi2000',
 790                 'uploader': 'dorappi2000',
 791                 'license': 'Standard YouTube License',
 792                 'formats': 'mincount:31',
 793             },
 794             'skip': 'not actual anymore',
 795         },
 796         # DASH manifest with segment_list
 797         {
 798             'url': 'https://www.youtube.com/embed/CsmdDsKjzN8',
 799             'md5': '8ce563a1d667b599d21064e982ab9e31',
 800             'info_dict': {
 801                 'id': 'CsmdDsKjzN8',
 802                 'ext': 'mp4',
 803                 'upload_date': '20150501',  # According to '<meta itemprop="datePublished"', but in other places it's 20150510
 804                 'uploader': 'Airtek',
 805                 'description': 'Retransmisión en directo de la XVIII media maratón de Zaragoza.',
 806                 'uploader_id': 'UCzTzUmjXxxacNnL8I3m4LnQ',
 807                 'license': 'Standard YouTube License',
 808                 'title': 'Retransmisión XVIII Media maratón Zaragoza 2015',
 809             },
 810             'params': {
 811                 'youtube_include_dash_manifest': True,
 812                 'format': '135',  # bestvideo
 813             },
 814             'skip': 'This live event has ended.',
 815         },
 816         {
 817             # Multifeed videos (multiple cameras), URL is for Main Camera
 818             'url': 'https://www.youtube.com/watch?v=jqWvoWXjCVs',
 819             'info_dict': {
 820                 'id': 'jqWvoWXjCVs',
 821                 'title': 'teamPGP: Rocket League Noob Stream',
 822                 'description': 'md5:dc7872fb300e143831327f1bae3af010',
 823             },
 824             'playlist': [{
 825                 'info_dict': {
 826                     'id': 'jqWvoWXjCVs',
 827                     'ext': 'mp4',
 828                     'title': 'teamPGP: Rocket League Noob Stream (Main Camera)',
 829                     'description': 'md5:dc7872fb300e143831327f1bae3af010',
 830                     'duration': 7335,
 831                     'upload_date': '20150721',
 832                     'uploader': 'Beer Games Beer',
 833                     'uploader_id': 'beergamesbeer',
 834                     'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/beergamesbeer',
 835                     'license': 'Standard YouTube License',
 836                 },
 837             }, {
 838                 'info_dict': {
 839                     'id': '6h8e8xoXJzg',
 840                     'ext': 'mp4',
 841                     'title': 'teamPGP: Rocket League Noob Stream (kreestuh)',
 842                     'description': 'md5:dc7872fb300e143831327f1bae3af010',
 843                     'duration': 7337,
 844                     'upload_date': '20150721',
 845                     'uploader': 'Beer Games Beer',
 846                     'uploader_id': 'beergamesbeer',
 847                     'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/beergamesbeer',
 848                     'license': 'Standard YouTube License',
 849                 },
 850             }, {
 851                 'info_dict': {
 852                     'id': 'PUOgX5z9xZw',
 853                     'ext': 'mp4',
 854                     'title': 'teamPGP: Rocket League Noob Stream (grizzle)',
 855                     'description': 'md5:dc7872fb300e143831327f1bae3af010',
 856                     'duration': 7337,
 857                     'upload_date': '20150721',
 858                     'uploader': 'Beer Games Beer',
 859                     'uploader_id': 'beergamesbeer',
 860                     'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/beergamesbeer',
 861                     'license': 'Standard YouTube License',
 862                 },
 863             }, {
 864                 'info_dict': {
 865                     'id': 'teuwxikvS5k',
 866                     'ext': 'mp4',
 867                     'title': 'teamPGP: Rocket League Noob Stream (zim)',
 868                     'description': 'md5:dc7872fb300e143831327f1bae3af010',
 869                     'duration': 7334,
 870                     'upload_date': '20150721',
 871                     'uploader': 'Beer Games Beer',
 872                     'uploader_id': 'beergamesbeer',
 873                     'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/beergamesbeer',
 874                     'license': 'Standard YouTube License',
 875                 },
 876             }],
 877             'params': {
 878                 'skip_download': True,
 879             },
 880         },
 881         {
 882             # Multifeed video with comma in title (see https://github.com/rg3/youtube-dl/issues/8536)
 883             'url': 'https://www.youtube.com/watch?v=gVfLd0zydlo',
 884             'info_dict': {
 885                 'id': 'gVfLd0zydlo',
 886                 'title': 'DevConf.cz 2016 Day 2 Workshops 1 14:00 - 15:30',
 887             },
 888             'playlist_count': 2,
 889             'skip': 'Not multifeed anymore',
 890         },
 891         {
 892             'url': 'https://vid.plus/FlRa-iH7PGw',
 893             'only_matching': True,
 894         },
 895         {
 896             'url': 'https://zwearz.com/watch/9lWxNJF-ufM/electra-woman-dyna-girl-official-trailer-grace-helbig.html',
 897             'only_matching': True,
 898         },
 899         {
 900             # Title with JS-like syntax "};" (see https://github.com/rg3/youtube-dl/issues/7468)
 901             # Also tests cut-off URL expansion in video description (see
 902             # https://github.com/rg3/youtube-dl/issues/1892,
 903             # https://github.com/rg3/youtube-dl/issues/8164)
 904             'url': 'https://www.youtube.com/watch?v=lsguqyKfVQg',
 905             'info_dict': {
 906                 'id': 'lsguqyKfVQg',
 907                 'ext': 'mp4',
 908                 'title': '{dark walk}; Loki/AC/Dishonored; collab w/Elflover21',
 909                 'alt_title': 'Dark Walk - Position Music',
 910                 'description': 'md5:8085699c11dc3f597ce0410b0dcbb34a',
 911                 'duration': 133,
 912                 'upload_date': '20151119',
 913                 'uploader_id': 'IronSoulElf',
 914                 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/IronSoulElf',
 915                 'uploader': 'IronSoulElf',
 916                 'license': 'Standard YouTube License',
 917                 'creator': 'Todd Haberman,  Daniel Law Heath and Aaron Kaplan',
 918                 'track': 'Dark Walk - Position Music',
 919                 'artist': 'Todd Haberman,  Daniel Law Heath and Aaron Kaplan',
 920             },
 921             'params': {
 922                 'skip_download': True,
 923             },
 924         },
 925         {
 926             # Tags with '};' (see https://github.com/rg3/youtube-dl/issues/7468)
 927             'url': 'https://www.youtube.com/watch?v=Ms7iBXnlUO8',
 928             'only_matching': True,
 929         },
 930         {
 931             # Video with yt:stretch=17:0
 932             'url': 'https://www.youtube.com/watch?v=Q39EVAstoRM',
 933             'info_dict': {
 934                 'id': 'Q39EVAstoRM',
 935                 'ext': 'mp4',
 936                 'title': 'Clash Of Clans#14 Dicas De Ataque Para CV 4',
 937                 'description': 'md5:ee18a25c350637c8faff806845bddee9',
 938                 'upload_date': '20151107',
 939                 'uploader_id': 'UCCr7TALkRbo3EtFzETQF1LA',
 940                 'uploader': 'CH GAMER DROID',
 941             },
 942             'params': {
 943                 'skip_download': True,
 944             },
 945             'skip': 'This video does not exist.',
 946         },
 947         {
 948             # Video licensed under Creative Commons
 949             'url': 'https://www.youtube.com/watch?v=M4gD1WSo5mA',
 950             'info_dict': {
 951                 'id': 'M4gD1WSo5mA',
 952                 'ext': 'mp4',
 953                 'title': 'md5:e41008789470fc2533a3252216f1c1d1',
 954                 'description': 'md5:a677553cf0840649b731a3024aeff4cc',
 955                 'duration': 721,
 956                 'upload_date': '20150127',
 957                 'uploader_id': 'BerkmanCenter',
 958                 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/BerkmanCenter',
 959                 'uploader': 'The Berkman Klein Center for Internet & Society',
 960                 'license': 'Creative Commons Attribution license (reuse allowed)',
 961             },
 962             'params': {
 963                 'skip_download': True,
 964             },
 965         },
 966         {
 967             # Channel-like uploader_url
 968             'url': 'https://www.youtube.com/watch?v=eQcmzGIKrzg',
 969             'info_dict': {
 970                 'id': 'eQcmzGIKrzg',
 971                 'ext': 'mp4',
 972                 'title': 'Democratic Socialism and Foreign Policy | Bernie Sanders',
 973                 'description': 'md5:dda0d780d5a6e120758d1711d062a867',
 974                 'duration': 4060,
 975                 'upload_date': '20151119',
 976                 'uploader': 'Bernie Sanders',
 977                 'uploader_id': 'UCH1dpzjCEiGAt8CXkryhkZg',
 978                 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UCH1dpzjCEiGAt8CXkryhkZg',
 979                 'license': 'Creative Commons Attribution license (reuse allowed)',
 980             },
 981             'params': {
 982                 'skip_download': True,
 983             },
 984         },
 985         {
 986             'url': 'https://www.youtube.com/watch?feature=player_embedded&amp;amp;v=V36LpHqtcDY',
 987             'only_matching': True,
 988         },
 989         {
 990             # YouTube Red paid video (https://github.com/rg3/youtube-dl/issues/10059)
 991             'url': 'https://www.youtube.com/watch?v=i1Ko8UG-Tdo',
 992             'only_matching': True,
 993         },
 994         {
 995             # Rental video preview
 996             'url': 'https://www.youtube.com/watch?v=yYr8q0y5Jfg',
 997             'info_dict': {
 998                 'id': 'uGpuVWrhIzE',
 999                 'ext': 'mp4',
1000                 'title': 'Piku - Trailer',
1001                 'description': 'md5:c36bd60c3fd6f1954086c083c72092eb',
1002                 'upload_date': '20150811',
1003                 'uploader': 'FlixMatrix',
1004                 'uploader_id': 'FlixMatrixKaravan',
1005                 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/FlixMatrixKaravan',
1006                 'license': 'Standard YouTube License',
1007             },
1008             'params': {
1009                 'skip_download': True,
1010             },
1011             'skip': 'This video is not available.',
1012         },
1013         {
1014             # YouTube Red video with episode data
1015             'url': 'https://www.youtube.com/watch?v=iqKdEhx-dD4',
1016             'info_dict': {
1017                 'id': 'iqKdEhx-dD4',
1018                 'ext': 'mp4',
1019                 'title': 'Isolation - Mind Field (Ep 1)',
1020                 'description': 'md5:25b78d2f64ae81719f5c96319889b736',
1021                 'duration': 2085,
1022                 'upload_date': '20170118',
1023                 'uploader': 'Vsauce',
1024                 'uploader_id': 'Vsauce',
1025                 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/Vsauce',
1026                 'license': 'Standard YouTube License',
1027                 'series': 'Mind Field',
1028                 'season_number': 1,
1029                 'episode_number': 1,
1030             },
1031             'params': {
1032                 'skip_download': True,
1033             },
1034             'expected_warnings': [
1035                 'Skipping DASH manifest',
1036             ],
1037         },
1038         {
1039             # The following content has been identified by the YouTube community
1040             # as inappropriate or offensive to some audiences.
1041             'url': 'https://www.youtube.com/watch?v=6SJNVb0GnPI',
1042             'info_dict': {
1043                 'id': '6SJNVb0GnPI',
1044                 'ext': 'mp4',
1045                 'title': 'Race Differences in Intelligence',
1046                 'description': 'md5:5d161533167390427a1f8ee89a1fc6f1',
1047                 'duration': 965,
1048                 'upload_date': '20140124',
1049                 'uploader': 'New Century Foundation',
1050                 'uploader_id': 'UCEJYpZGqgUob0zVVEaLhvVg',
1051                 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UCEJYpZGqgUob0zVVEaLhvVg',
1052                 'license': 'Standard YouTube License',
1053             },
1054             'params': {
1055                 'skip_download': True,
1056             },
1057         },
1058         {
1059             # itag 212
1060             'url': '1t24XAntNCY',
1061             'only_matching': True,
1062         },
1063         {
1064             # geo restricted to JP
1065             'url': 'sJL6WA-aGkQ',
1066             'only_matching': True,
1067         },
1068         {
1069             'url': 'https://www.youtube.com/watch?v=MuAGGZNfUkU&list=RDMM',
1070             'only_matching': True,
1071         },
1072         {
1073             'url': 'https://invidio.us/watch?v=BaW_jenozKc',
1074             'only_matching': True,
1075         },
1076     ]
1077
1078     def __init__(self, *args, **kwargs):
1079         super(YoutubeIE, self).__init__(*args, **kwargs)
1080         self._player_cache = {}
1081
1082     def report_video_info_webpage_download(self, video_id):
1083         """Report attempt to download video info webpage."""
1084         self.to_screen('%s: Downloading video info webpage' % video_id)
1085
1086     def report_information_extraction(self, video_id):
1087         """Report attempt to extract video information."""
1088         self.to_screen('%s: Extracting video information' % video_id)
1089
1090     def report_unavailable_format(self, video_id, format):
1091         """Report extracted video URL."""
1092         self.to_screen('%s: Format %s not available' % (video_id, format))
1093
1094     def report_rtmp_download(self):
1095         """Indicate the download will use the RTMP protocol."""
1096         self.to_screen('RTMP download detected')
1097
1098     def _signature_cache_id(self, example_sig):
1099         """ Return a string representation of a signature """
1100         return '.'.join(compat_str(len(part)) for part in example_sig.split('.'))
1101
1102     def _extract_signature_function(self, video_id, player_url, example_sig):
1103         id_m = re.match(
1104             r'.*?-(?P<id>[a-zA-Z0-9_-]+)(?:/watch_as3|/html5player(?:-new)?|(?:/[a-z]{2}_[A-Z]{2})?/base)?\.(?P<ext>[a-z]+)$',
1105             player_url)
1106         if not id_m:
1107             raise ExtractorError('Cannot identify player %r' % player_url)
1108         player_type = id_m.group('ext')
1109         player_id = id_m.group('id')
1110
1111         # Read from filesystem cache
1112         func_id = '%s_%s_%s' % (
1113             player_type, player_id, self._signature_cache_id(example_sig))
1114         assert os.path.basename(func_id) == func_id
1115
1116         cache_spec = self._downloader.cache.load('youtube-sigfuncs', func_id)
1117         if cache_spec is not None:
1118             return lambda s: ''.join(s[i] for i in cache_spec)
1119
1120         download_note = (
1121             'Downloading player %s' % player_url
1122             if self._downloader.params.get('verbose') else
1123             'Downloading %s player %s' % (player_type, player_id)
1124         )
1125         if player_type == 'js':
1126             code = self._download_webpage(
1127                 player_url, video_id,
1128                 note=download_note,
1129                 errnote='Download of %s failed' % player_url)
1130             res = self._parse_sig_js(code)
1131         elif player_type == 'swf':
1132             urlh = self._request_webpage(
1133                 player_url, video_id,
1134                 note=download_note,
1135                 errnote='Download of %s failed' % player_url)
1136             code = urlh.read()
1137             res = self._parse_sig_swf(code)
1138         else:
1139             assert False, 'Invalid player type %r' % player_type
1140
1141         test_string = ''.join(map(compat_chr, range(len(example_sig))))
1142         cache_res = res(test_string)
1143         cache_spec = [ord(c) for c in cache_res]
1144
1145         self._downloader.cache.store('youtube-sigfuncs', func_id, cache_spec)
1146         return res
1147
1148     def _print_sig_code(self, func, example_sig):
1149         def gen_sig_code(idxs):
1150             def _genslice(start, end, step):
1151                 starts = '' if start == 0 else str(start)
1152                 ends = (':%d' % (end + step)) if end + step >= 0 else ':'
1153                 steps = '' if step == 1 else (':%d' % step)
1154                 return 's[%s%s%s]' % (starts, ends, steps)
1155
1156             step = None
1157             # Quelch pyflakes warnings - start will be set when step is set
1158             start = '(Never used)'
1159             for i, prev in zip(idxs[1:], idxs[:-1]):
1160                 if step is not None:
1161                     if i - prev == step:
1162                         continue
1163                     yield _genslice(start, prev, step)
1164                     step = None
1165                     continue
1166                 if i - prev in [-1, 1]:
1167                     step = i - prev
1168                     start = prev
1169                     continue
1170                 else:
1171                     yield 's[%d]' % prev
1172             if step is None:
1173                 yield 's[%d]' % i
1174             else:
1175                 yield _genslice(start, i, step)
1176
1177         test_string = ''.join(map(compat_chr, range(len(example_sig))))
1178         cache_res = func(test_string)
1179         cache_spec = [ord(c) for c in cache_res]
1180         expr_code = ' + '.join(gen_sig_code(cache_spec))
1181         signature_id_tuple = '(%s)' % (
1182             ', '.join(compat_str(len(p)) for p in example_sig.split('.')))
1183         code = ('if tuple(len(p) for p in s.split(\'.\')) == %s:\n'
1184                 '    return %s\n') % (signature_id_tuple, expr_code)
1185         self.to_screen('Extracted signature function:\n' + code)
1186
1187     def _parse_sig_js(self, jscode):
1188         funcname = self._search_regex(
1189             (r'(["\'])signature\1\s*,\s*(?P<sig>[a-zA-Z0-9$]+)\(',
1190              r'\.sig\|\|(?P<sig>[a-zA-Z0-9$]+)\(',
1191              r'yt\.akamaized\.net/\)\s*\|\|\s*.*?\s*c\s*&&\s*d\.set\([^,]+\s*,\s*(?P<sig>[a-zA-Z0-9$]+)\(',
1192              r'\bc\s*&&\s*d\.set\([^,]+\s*,\s*(?P<sig>[a-zA-Z0-9$]+)\('),
1193             jscode, 'Initial JS player signature function name', group='sig')
1194
1195         jsi = JSInterpreter(jscode)
1196         initial_function = jsi.extract_function(funcname)
1197         return lambda s: initial_function([s])
1198
1199     def _parse_sig_swf(self, file_contents):
1200         swfi = SWFInterpreter(file_contents)
1201         TARGET_CLASSNAME = 'SignatureDecipher'
1202         searched_class = swfi.extract_class(TARGET_CLASSNAME)
1203         initial_function = swfi.extract_function(searched_class, 'decipher')
1204         return lambda s: initial_function([s])
1205
1206     def _decrypt_signature(self, s, video_id, player_url, age_gate=False):
1207         """Turn the encrypted s field into a working signature"""
1208
1209         if player_url is None:
1210             raise ExtractorError('Cannot decrypt signature without player_url')
1211
1212         if player_url.startswith('//'):
1213             player_url = 'https:' + player_url
1214         elif not re.match(r'https?://', player_url):
1215             player_url = compat_urlparse.urljoin(
1216                 'https://www.youtube.com', player_url)
1217         try:
1218             player_id = (player_url, self._signature_cache_id(s))
1219             if player_id not in self._player_cache:
1220                 func = self._extract_signature_function(
1221                     video_id, player_url, s
1222                 )
1223                 self._player_cache[player_id] = func
1224             func = self._player_cache[player_id]
1225             if self._downloader.params.get('youtube_print_sig_code'):
1226                 self._print_sig_code(func, s)
1227             return func(s)
1228         except Exception as e:
1229             tb = traceback.format_exc()
1230             raise ExtractorError(
1231                 'Signature extraction failed: ' + tb, cause=e)
1232
1233     def _get_subtitles(self, video_id, webpage):
1234         try:
1235             subs_doc = self._download_xml(
1236                 'https://video.google.com/timedtext?hl=en&type=list&v=%s' % video_id,
1237                 video_id, note=False)
1238         except ExtractorError as err:
1239             self._downloader.report_warning('unable to download video subtitles: %s' % error_to_compat_str(err))
1240             return {}
1241
1242         sub_lang_list = {}
1243         for track in subs_doc.findall('track'):
1244             lang = track.attrib['lang_code']
1245             if lang in sub_lang_list:
1246                 continue
1247             sub_formats = []
1248             for ext in self._SUBTITLE_FORMATS:
1249                 params = compat_urllib_parse_urlencode({
1250                     'lang': lang,
1251                     'v': video_id,
1252                     'fmt': ext,
1253                     'name': track.attrib['name'].encode('utf-8'),
1254                 })
1255                 sub_formats.append({
1256                     'url': 'https://www.youtube.com/api/timedtext?' + params,
1257                     'ext': ext,
1258                 })
1259             sub_lang_list[lang] = sub_formats
1260         if not sub_lang_list:
1261             self._downloader.report_warning('video doesn\'t have subtitles')
1262             return {}
1263         return sub_lang_list
1264
1265     def _get_ytplayer_config(self, video_id, webpage):
1266         patterns = (
1267             # User data may contain arbitrary character sequences that may affect
1268             # JSON extraction with regex, e.g. when '};' is contained the second
1269             # regex won't capture the whole JSON. Yet working around by trying more
1270             # concrete regex first keeping in mind proper quoted string handling
1271             # to be implemented in future that will replace this workaround (see
1272             # https://github.com/rg3/youtube-dl/issues/7468,
1273             # https://github.com/rg3/youtube-dl/pull/7599)
1274             r';ytplayer\.config\s*=\s*({.+?});ytplayer',
1275             r';ytplayer\.config\s*=\s*({.+?});',
1276         )
1277         config = self._search_regex(
1278             patterns, webpage, 'ytplayer.config', default=None)
1279         if config:
1280             return self._parse_json(
1281                 uppercase_escape(config), video_id, fatal=False)
1282
1283     def _get_automatic_captions(self, video_id, webpage):
1284         """We need the webpage for getting the captions url, pass it as an
1285            argument to speed up the process."""
1286         self.to_screen('%s: Looking for automatic captions' % video_id)
1287         player_config = self._get_ytplayer_config(video_id, webpage)
1288         err_msg = 'Couldn\'t find automatic captions for %s' % video_id
1289         if not player_config:
1290             self._downloader.report_warning(err_msg)
1291             return {}
1292         try:
1293             args = player_config['args']
1294             caption_url = args.get('ttsurl')
1295             if caption_url:
1296                 timestamp = args['timestamp']
1297                 # We get the available subtitles
1298                 list_params = compat_urllib_parse_urlencode({
1299                     'type': 'list',
1300                     'tlangs': 1,
1301                     'asrs': 1,
1302                 })
1303                 list_url = caption_url + '&' + list_params
1304                 caption_list = self._download_xml(list_url, video_id)
1305                 original_lang_node = caption_list.find('track')
1306                 if original_lang_node is None:
1307                     self._downloader.report_warning('Video doesn\'t have automatic captions')
1308                     return {}
1309                 original_lang = original_lang_node.attrib['lang_code']
1310                 caption_kind = original_lang_node.attrib.get('kind', '')
1311
1312                 sub_lang_list = {}
1313                 for lang_node in caption_list.findall('target'):
1314                     sub_lang = lang_node.attrib['lang_code']
1315                     sub_formats = []
1316                     for ext in self._SUBTITLE_FORMATS:
1317                         params = compat_urllib_parse_urlencode({
1318                             'lang': original_lang,
1319                             'tlang': sub_lang,
1320                             'fmt': ext,
1321                             'ts': timestamp,
1322                             'kind': caption_kind,
1323                         })
1324                         sub_formats.append({
1325                             'url': caption_url + '&' + params,
1326                             'ext': ext,
1327                         })
1328                     sub_lang_list[sub_lang] = sub_formats
1329                 return sub_lang_list
1330
1331             def make_captions(sub_url, sub_langs):
1332                 parsed_sub_url = compat_urllib_parse_urlparse(sub_url)
1333                 caption_qs = compat_parse_qs(parsed_sub_url.query)
1334                 captions = {}
1335                 for sub_lang in sub_langs:
1336                     sub_formats = []
1337                     for ext in self._SUBTITLE_FORMATS:
1338                         caption_qs.update({
1339                             'tlang': [sub_lang],
1340                             'fmt': [ext],
1341                         })
1342                         sub_url = compat_urlparse.urlunparse(parsed_sub_url._replace(
1343                             query=compat_urllib_parse_urlencode(caption_qs, True)))
1344                         sub_formats.append({
1345                             'url': sub_url,
1346                             'ext': ext,
1347                         })
1348                     captions[sub_lang] = sub_formats
1349                 return captions
1350
1351             # New captions format as of 22.06.2017
1352             player_response = args.get('player_response')
1353             if player_response and isinstance(player_response, compat_str):
1354                 player_response = self._parse_json(
1355                     player_response, video_id, fatal=False)
1356                 if player_response:
1357                     renderer = player_response['captions']['playerCaptionsTracklistRenderer']
1358                     base_url = renderer['captionTracks'][0]['baseUrl']
1359                     sub_lang_list = []
1360                     for lang in renderer['translationLanguages']:
1361                         lang_code = lang.get('languageCode')
1362                         if lang_code:
1363                             sub_lang_list.append(lang_code)
1364                     return make_captions(base_url, sub_lang_list)
1365
1366             # Some videos don't provide ttsurl but rather caption_tracks and
1367             # caption_translation_languages (e.g. 20LmZk1hakA)
1368             # Does not used anymore as of 22.06.2017
1369             caption_tracks = args['caption_tracks']
1370             caption_translation_languages = args['caption_translation_languages']
1371             caption_url = compat_parse_qs(caption_tracks.split(',')[0])['u'][0]
1372             sub_lang_list = []
1373             for lang in caption_translation_languages.split(','):
1374                 lang_qs = compat_parse_qs(compat_urllib_parse_unquote_plus(lang))
1375                 sub_lang = lang_qs.get('lc', [None])[0]
1376                 if sub_lang:
1377                     sub_lang_list.append(sub_lang)
1378             return make_captions(caption_url, sub_lang_list)
1379         # An extractor error can be raise by the download process if there are
1380         # no automatic captions but there are subtitles
1381         except (KeyError, IndexError, ExtractorError):
1382             self._downloader.report_warning(err_msg)
1383             return {}
1384
1385     def _mark_watched(self, video_id, video_info):
1386         playback_url = video_info.get('videostats_playback_base_url', [None])[0]
1387         if not playback_url:
1388             return
1389         parsed_playback_url = compat_urlparse.urlparse(playback_url)
1390         qs = compat_urlparse.parse_qs(parsed_playback_url.query)
1391
1392         # cpn generation algorithm is reverse engineered from base.js.
1393         # In fact it works even with dummy cpn.
1394         CPN_ALPHABET = 'abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789-_'
1395         cpn = ''.join((CPN_ALPHABET[random.randint(0, 256) & 63] for _ in range(0, 16)))
1396
1397         qs.update({
1398             'ver': ['2'],
1399             'cpn': [cpn],
1400         })
1401         playback_url = compat_urlparse.urlunparse(
1402             parsed_playback_url._replace(query=compat_urllib_parse_urlencode(qs, True)))
1403
1404         self._download_webpage(
1405             playback_url, video_id, 'Marking watched',
1406             'Unable to mark watched', fatal=False)
1407
1408     @staticmethod
1409     def _extract_urls(webpage):
1410         # Embedded YouTube player
1411         entries = [
1412             unescapeHTML(mobj.group('url'))
1413             for mobj in re.finditer(r'''(?x)
1414             (?:
1415                 <iframe[^>]+?src=|
1416                 data-video-url=|
1417                 <embed[^>]+?src=|
1418                 embedSWF\(?:\s*|
1419                 <object[^>]+data=|
1420                 new\s+SWFObject\(
1421             )
1422             (["\'])
1423                 (?P<url>(?:https?:)?//(?:www\.)?youtube(?:-nocookie)?\.com/
1424                 (?:embed|v|p)/[0-9A-Za-z_-]{11}.*?)
1425             \1''', webpage)]
1426
1427         # lazyYT YouTube embed
1428         entries.extend(list(map(
1429             unescapeHTML,
1430             re.findall(r'class="lazyYT" data-youtube-id="([^"]+)"', webpage))))
1431
1432         # Wordpress "YouTube Video Importer" plugin
1433         matches = re.findall(r'''(?x)<div[^>]+
1434             class=(?P<q1>[\'"])[^\'"]*\byvii_single_video_player\b[^\'"]*(?P=q1)[^>]+
1435             data-video_id=(?P<q2>[\'"])([^\'"]+)(?P=q2)''', webpage)
1436         entries.extend(m[-1] for m in matches)
1437
1438         return entries
1439
1440     @staticmethod
1441     def _extract_url(webpage):
1442         urls = YoutubeIE._extract_urls(webpage)
1443         return urls[0] if urls else None
1444
1445     @classmethod
1446     def extract_id(cls, url):
1447         mobj = re.match(cls._VALID_URL, url, re.VERBOSE)
1448         if mobj is None:
1449             raise ExtractorError('Invalid URL: %s' % url)
1450         video_id = mobj.group(2)
1451         return video_id
1452
1453     def _extract_annotations(self, video_id):
1454         url = 'https://www.youtube.com/annotations_invideo?features=1&legacy=1&video_id=%s' % video_id
1455         return self._download_webpage(url, video_id, note='Searching for annotations.', errnote='Unable to download video annotations.')
1456
1457     @staticmethod
1458     def _extract_chapters(description, duration):
1459         if not description:
1460             return None
1461         chapter_lines = re.findall(
1462             r'(?:^|<br\s*/>)([^<]*<a[^>]+onclick=["\']yt\.www\.watch\.player\.seekTo[^>]+>(\d{1,2}:\d{1,2}(?::\d{1,2})?)</a>[^>]*)(?=$|<br\s*/>)',
1463             description)
1464         if not chapter_lines:
1465             return None
1466         chapters = []
1467         for next_num, (chapter_line, time_point) in enumerate(
1468                 chapter_lines, start=1):
1469             start_time = parse_duration(time_point)
1470             if start_time is None:
1471                 continue
1472             if start_time > duration:
1473                 break
1474             end_time = (duration if next_num == len(chapter_lines)
1475                         else parse_duration(chapter_lines[next_num][1]))
1476             if end_time is None:
1477                 continue
1478             if end_time > duration:
1479                 end_time = duration
1480             if start_time > end_time:
1481                 break
1482             chapter_title = re.sub(
1483                 r'<a[^>]+>[^<]+</a>', '', chapter_line).strip(' \t-')
1484             chapter_title = re.sub(r'\s+', ' ', chapter_title)
1485             chapters.append({
1486                 'start_time': start_time,
1487                 'end_time': end_time,
1488                 'title': chapter_title,
1489             })
1490         return chapters
1491
1492     def _real_extract(self, url):
1493         url, smuggled_data = unsmuggle_url(url, {})
1494
1495         proto = (
1496             'http' if self._downloader.params.get('prefer_insecure', False)
1497             else 'https')
1498
1499         start_time = None
1500         end_time = None
1501         parsed_url = compat_urllib_parse_urlparse(url)
1502         for component in [parsed_url.fragment, parsed_url.query]:
1503             query = compat_parse_qs(component)
1504             if start_time is None and 't' in query:
1505                 start_time = parse_duration(query['t'][0])
1506             if start_time is None and 'start' in query:
1507                 start_time = parse_duration(query['start'][0])
1508             if end_time is None and 'end' in query:
1509                 end_time = parse_duration(query['end'][0])
1510
1511         # Extract original video URL from URL with redirection, like age verification, using next_url parameter
1512         mobj = re.search(self._NEXT_URL_RE, url)
1513         if mobj:
1514             url = proto + '://www.youtube.com/' + compat_urllib_parse_unquote(mobj.group(1)).lstrip('/')
1515         video_id = self.extract_id(url)
1516
1517         # Get video webpage
1518         url = proto + '://www.youtube.com/watch?v=%s&gl=US&hl=en&has_verified=1&bpctr=9999999999' % video_id
1519         video_webpage = self._download_webpage(url, video_id)
1520
1521         # Attempt to extract SWF player URL
1522         mobj = re.search(r'swfConfig.*?"(https?:\\/\\/.*?watch.*?-.*?\.swf)"', video_webpage)
1523         if mobj is not None:
1524             player_url = re.sub(r'\\(.)', r'\1', mobj.group(1))
1525         else:
1526             player_url = None
1527
1528         dash_mpds = []
1529
1530         def add_dash_mpd(video_info):
1531             dash_mpd = video_info.get('dashmpd')
1532             if dash_mpd and dash_mpd[0] not in dash_mpds:
1533                 dash_mpds.append(dash_mpd[0])
1534
1535         is_live = None
1536         view_count = None
1537
1538         def extract_view_count(v_info):
1539             return int_or_none(try_get(v_info, lambda x: x['view_count'][0]))
1540
1541         # Get video info
1542         embed_webpage = None
1543         if re.search(r'player-age-gate-content">', video_webpage) is not None:
1544             age_gate = True
1545             # We simulate the access to the video from www.youtube.com/v/{video_id}
1546             # this can be viewed without login into Youtube
1547             url = proto + '://www.youtube.com/embed/%s' % video_id
1548             embed_webpage = self._download_webpage(url, video_id, 'Downloading embed webpage')
1549             data = compat_urllib_parse_urlencode({
1550                 'video_id': video_id,
1551                 'eurl': 'https://youtube.googleapis.com/v/' + video_id,
1552                 'sts': self._search_regex(
1553                     r'"sts"\s*:\s*(\d+)', embed_webpage, 'sts', default=''),
1554             })
1555             video_info_url = proto + '://www.youtube.com/get_video_info?' + data
1556             video_info_webpage = self._download_webpage(
1557                 video_info_url, video_id,
1558                 note='Refetching age-gated info webpage',
1559                 errnote='unable to download video info webpage')
1560             video_info = compat_parse_qs(video_info_webpage)
1561             add_dash_mpd(video_info)
1562         else:
1563             age_gate = False
1564             video_info = None
1565             sts = None
1566             # Try looking directly into the video webpage
1567             ytplayer_config = self._get_ytplayer_config(video_id, video_webpage)
1568             if ytplayer_config:
1569                 args = ytplayer_config['args']
1570                 if args.get('url_encoded_fmt_stream_map') or args.get('hlsvp'):
1571                     # Convert to the same format returned by compat_parse_qs
1572                     video_info = dict((k, [v]) for k, v in args.items())
1573                     add_dash_mpd(video_info)
1574                 # Rental video is not rented but preview is available (e.g.
1575                 # https://www.youtube.com/watch?v=yYr8q0y5Jfg,
1576                 # https://github.com/rg3/youtube-dl/issues/10532)
1577                 if not video_info and args.get('ypc_vid'):
1578                     return self.url_result(
1579                         args['ypc_vid'], YoutubeIE.ie_key(), video_id=args['ypc_vid'])
1580                 if args.get('livestream') == '1' or args.get('live_playback') == 1:
1581                     is_live = True
1582                 sts = ytplayer_config.get('sts')
1583             if not video_info or self._downloader.params.get('youtube_include_dash_manifest', True):
1584                 # We also try looking in get_video_info since it may contain different dashmpd
1585                 # URL that points to a DASH manifest with possibly different itag set (some itags
1586                 # are missing from DASH manifest pointed by webpage's dashmpd, some - from DASH
1587                 # manifest pointed by get_video_info's dashmpd).
1588                 # The general idea is to take a union of itags of both DASH manifests (for example
1589                 # video with such 'manifest behavior' see https://github.com/rg3/youtube-dl/issues/6093)
1590                 self.report_video_info_webpage_download(video_id)
1591                 for el in ('info', 'embedded', 'detailpage', 'vevo', ''):
1592                     query = {
1593                         'video_id': video_id,
1594                         'ps': 'default',
1595                         'eurl': '',
1596                         'gl': 'US',
1597                         'hl': 'en',
1598                     }
1599                     if el:
1600                         query['el'] = el
1601                     if sts:
1602                         query['sts'] = sts
1603                     video_info_webpage = self._download_webpage(
1604                         '%s://www.youtube.com/get_video_info' % proto,
1605                         video_id, note=False,
1606                         errnote='unable to download video info webpage',
1607                         fatal=False, query=query)
1608                     if not video_info_webpage:
1609                         continue
1610                     get_video_info = compat_parse_qs(video_info_webpage)
1611                     add_dash_mpd(get_video_info)
1612                     if view_count is None:
1613                         view_count = extract_view_count(get_video_info)
1614                     if not video_info:
1615                         video_info = get_video_info
1616                     if 'token' in get_video_info:
1617                         # Different get_video_info requests may report different results, e.g.
1618                         # some may report video unavailability, but some may serve it without
1619                         # any complaint (see https://github.com/rg3/youtube-dl/issues/7362,
1620                         # the original webpage as well as el=info and el=embedded get_video_info
1621                         # requests report video unavailability due to geo restriction while
1622                         # el=detailpage succeeds and returns valid data). This is probably
1623                         # due to YouTube measures against IP ranges of hosting providers.
1624                         # Working around by preferring the first succeeded video_info containing
1625                         # the token if no such video_info yet was found.
1626                         if 'token' not in video_info:
1627                             video_info = get_video_info
1628                         break
1629
1630         def extract_unavailable_message():
1631             return self._html_search_regex(
1632                 r'(?s)<h1[^>]+id="unavailable-message"[^>]*>(.+?)</h1>',
1633                 video_webpage, 'unavailable message', default=None)
1634
1635         if 'token' not in video_info:
1636             if 'reason' in video_info:
1637                 if 'The uploader has not made this video available in your country.' in video_info['reason']:
1638                     regions_allowed = self._html_search_meta(
1639                         'regionsAllowed', video_webpage, default=None)
1640                     countries = regions_allowed.split(',') if regions_allowed else None
1641                     self.raise_geo_restricted(
1642                         msg=video_info['reason'][0], countries=countries)
1643                 reason = video_info['reason'][0]
1644                 if 'Invalid parameters' in reason:
1645                     unavailable_message = extract_unavailable_message()
1646                     if unavailable_message:
1647                         reason = unavailable_message
1648                 raise ExtractorError(
1649                     'YouTube said: %s' % reason,
1650                     expected=True, video_id=video_id)
1651             else:
1652                 raise ExtractorError(
1653                     '"token" parameter not in video info for unknown reason',
1654                     video_id=video_id)
1655
1656         # title
1657         if 'title' in video_info:
1658             video_title = video_info['title'][0]
1659         else:
1660             self._downloader.report_warning('Unable to extract video title')
1661             video_title = '_'
1662
1663         # description
1664         description_original = video_description = get_element_by_id("eow-description", video_webpage)
1665         if video_description:
1666
1667             def replace_url(m):
1668                 redir_url = compat_urlparse.urljoin(url, m.group(1))
1669                 parsed_redir_url = compat_urllib_parse_urlparse(redir_url)
1670                 if re.search(r'^(?:www\.)?(?:youtube(?:-nocookie)?\.com|youtu\.be)$', parsed_redir_url.netloc) and parsed_redir_url.path == '/redirect':
1671                     qs = compat_parse_qs(parsed_redir_url.query)
1672                     q = qs.get('q')
1673                     if q and q[0]:
1674                         return q[0]
1675                 return redir_url
1676
1677             description_original = video_description = re.sub(r'''(?x)
1678                 <a\s+
1679                     (?:[a-zA-Z-]+="[^"]*"\s+)*?
1680                     (?:title|href)="([^"]+)"\s+
1681                     (?:[a-zA-Z-]+="[^"]*"\s+)*?
1682                     class="[^"]*"[^>]*>
1683                 [^<]+\.{3}\s*
1684                 </a>
1685             ''', replace_url, video_description)
1686             video_description = clean_html(video_description)
1687         else:
1688             fd_mobj = re.search(r'<meta name="description" content="([^"]+)"', video_webpage)
1689             if fd_mobj:
1690                 video_description = unescapeHTML(fd_mobj.group(1))
1691             else:
1692                 video_description = ''
1693
1694         if 'multifeed_metadata_list' in video_info and not smuggled_data.get('force_singlefeed', False):
1695             if not self._downloader.params.get('noplaylist'):
1696                 entries = []
1697                 feed_ids = []
1698                 multifeed_metadata_list = video_info['multifeed_metadata_list'][0]
1699                 for feed in multifeed_metadata_list.split(','):
1700                     # Unquote should take place before split on comma (,) since textual
1701                     # fields may contain comma as well (see
1702                     # https://github.com/rg3/youtube-dl/issues/8536)
1703                     feed_data = compat_parse_qs(compat_urllib_parse_unquote_plus(feed))
1704                     entries.append({
1705                         '_type': 'url_transparent',
1706                         'ie_key': 'Youtube',
1707                         'url': smuggle_url(
1708                             '%s://www.youtube.com/watch?v=%s' % (proto, feed_data['id'][0]),
1709                             {'force_singlefeed': True}),
1710                         'title': '%s (%s)' % (video_title, feed_data['title'][0]),
1711                     })
1712                     feed_ids.append(feed_data['id'][0])
1713                 self.to_screen(
1714                     'Downloading multifeed video (%s) - add --no-playlist to just download video %s'
1715                     % (', '.join(feed_ids), video_id))
1716                 return self.playlist_result(entries, video_id, video_title, video_description)
1717             self.to_screen('Downloading just video %s because of --no-playlist' % video_id)
1718
1719         if view_count is None:
1720             view_count = extract_view_count(video_info)
1721
1722         # Check for "rental" videos
1723         if 'ypc_video_rental_bar_text' in video_info and 'author' not in video_info:
1724             raise ExtractorError('"rental" videos not supported. See https://github.com/rg3/youtube-dl/issues/359 for more information.', expected=True)
1725
1726         def _extract_filesize(media_url):
1727             return int_or_none(self._search_regex(
1728                 r'\bclen[=/](\d+)', media_url, 'filesize', default=None))
1729
1730         if 'conn' in video_info and video_info['conn'][0].startswith('rtmp'):
1731             self.report_rtmp_download()
1732             formats = [{
1733                 'format_id': '_rtmp',
1734                 'protocol': 'rtmp',
1735                 'url': video_info['conn'][0],
1736                 'player_url': player_url,
1737             }]
1738         elif not is_live and (len(video_info.get('url_encoded_fmt_stream_map', [''])[0]) >= 1 or len(video_info.get('adaptive_fmts', [''])[0]) >= 1):
1739             encoded_url_map = video_info.get('url_encoded_fmt_stream_map', [''])[0] + ',' + video_info.get('adaptive_fmts', [''])[0]
1740             if 'rtmpe%3Dyes' in encoded_url_map:
1741                 raise ExtractorError('rtmpe downloads are not supported, see https://github.com/rg3/youtube-dl/issues/343 for more information.', expected=True)
1742             formats_spec = {}
1743             fmt_list = video_info.get('fmt_list', [''])[0]
1744             if fmt_list:
1745                 for fmt in fmt_list.split(','):
1746                     spec = fmt.split('/')
1747                     if len(spec) > 1:
1748                         width_height = spec[1].split('x')
1749                         if len(width_height) == 2:
1750                             formats_spec[spec[0]] = {
1751                                 'resolution': spec[1],
1752                                 'width': int_or_none(width_height[0]),
1753                                 'height': int_or_none(width_height[1]),
1754                             }
1755             q = qualities(['small', 'medium', 'hd720'])
1756             formats = []
1757             for url_data_str in encoded_url_map.split(','):
1758                 url_data = compat_parse_qs(url_data_str)
1759                 if 'itag' not in url_data or 'url' not in url_data:
1760                     continue
1761                 format_id = url_data['itag'][0]
1762                 url = url_data['url'][0]
1763
1764                 if 's' in url_data or self._downloader.params.get('youtube_include_dash_manifest', True):
1765                     ASSETS_RE = r'"assets":.+?"js":\s*("[^"]+")'
1766                     jsplayer_url_json = self._search_regex(
1767                         ASSETS_RE,
1768                         embed_webpage if age_gate else video_webpage,
1769                         'JS player URL (1)', default=None)
1770                     if not jsplayer_url_json and not age_gate:
1771                         # We need the embed website after all
1772                         if embed_webpage is None:
1773                             embed_url = proto + '://www.youtube.com/embed/%s' % video_id
1774                             embed_webpage = self._download_webpage(
1775                                 embed_url, video_id, 'Downloading embed webpage')
1776                         jsplayer_url_json = self._search_regex(
1777                             ASSETS_RE, embed_webpage, 'JS player URL')
1778
1779                     player_url = json.loads(jsplayer_url_json)
1780                     if player_url is None:
1781                         player_url_json = self._search_regex(
1782                             r'ytplayer\.config.*?"url"\s*:\s*("[^"]+")',
1783                             video_webpage, 'age gate player URL')
1784                         player_url = json.loads(player_url_json)
1785
1786                 if 'sig' in url_data:
1787                     url += '&signature=' + url_data['sig'][0]
1788                 elif 's' in url_data:
1789                     encrypted_sig = url_data['s'][0]
1790
1791                     if self._downloader.params.get('verbose'):
1792                         if player_url is None:
1793                             player_version = 'unknown'
1794                             player_desc = 'unknown'
1795                         else:
1796                             if player_url.endswith('swf'):
1797                                 player_version = self._search_regex(
1798                                     r'-(.+?)(?:/watch_as3)?\.swf$', player_url,
1799                                     'flash player', fatal=False)
1800                                 player_desc = 'flash player %s' % player_version
1801                             else:
1802                                 player_version = self._search_regex(
1803                                     [r'html5player-([^/]+?)(?:/html5player(?:-new)?)?\.js',
1804                                      r'(?:www|player)-([^/]+)(?:/[a-z]{2}_[A-Z]{2})?/base\.js'],
1805                                     player_url,
1806                                     'html5 player', fatal=False)
1807                                 player_desc = 'html5 player %s' % player_version
1808
1809                         parts_sizes = self._signature_cache_id(encrypted_sig)
1810                         self.to_screen('{%s} signature length %s, %s' %
1811                                        (format_id, parts_sizes, player_desc))
1812
1813                     signature = self._decrypt_signature(
1814                         encrypted_sig, video_id, player_url, age_gate)
1815                     url += '&signature=' + signature
1816                 if 'ratebypass' not in url:
1817                     url += '&ratebypass=yes'
1818
1819                 dct = {
1820                     'format_id': format_id,
1821                     'url': url,
1822                     'player_url': player_url,
1823                 }
1824                 if format_id in self._formats:
1825                     dct.update(self._formats[format_id])
1826                 if format_id in formats_spec:
1827                     dct.update(formats_spec[format_id])
1828
1829                 # Some itags are not included in DASH manifest thus corresponding formats will
1830                 # lack metadata (see https://github.com/rg3/youtube-dl/pull/5993).
1831                 # Trying to extract metadata from url_encoded_fmt_stream_map entry.
1832                 mobj = re.search(r'^(?P<width>\d+)[xX](?P<height>\d+)$', url_data.get('size', [''])[0])
1833                 width, height = (int(mobj.group('width')), int(mobj.group('height'))) if mobj else (None, None)
1834
1835                 filesize = int_or_none(url_data.get(
1836                     'clen', [None])[0]) or _extract_filesize(url)
1837
1838                 quality = url_data.get('quality_label', [None])[0] or url_data.get('quality', [None])[0]
1839
1840                 more_fields = {
1841                     'filesize': filesize,
1842                     'tbr': float_or_none(url_data.get('bitrate', [None])[0], 1000),
1843                     'width': width,
1844                     'height': height,
1845                     'fps': int_or_none(url_data.get('fps', [None])[0]),
1846                     'format_note': quality,
1847                     'quality': q(quality),
1848                 }
1849                 for key, value in more_fields.items():
1850                     if value:
1851                         dct[key] = value
1852                 type_ = url_data.get('type', [None])[0]
1853                 if type_:
1854                     type_split = type_.split(';')
1855                     kind_ext = type_split[0].split('/')
1856                     if len(kind_ext) == 2:
1857                         kind, _ = kind_ext
1858                         dct['ext'] = mimetype2ext(type_split[0])
1859                         if kind in ('audio', 'video'):
1860                             codecs = None
1861                             for mobj in re.finditer(
1862                                     r'(?P<key>[a-zA-Z_-]+)=(?P<quote>["\']?)(?P<val>.+?)(?P=quote)(?:;|$)', type_):
1863                                 if mobj.group('key') == 'codecs':
1864                                     codecs = mobj.group('val')
1865                                     break
1866                             if codecs:
1867                                 dct.update(parse_codecs(codecs))
1868                 if dct.get('acodec') == 'none' or dct.get('vcodec') == 'none':
1869                     dct['downloader_options'] = {
1870                         # Youtube throttles chunks >~10M
1871                         'http_chunk_size': 10485760,
1872                     }
1873                 formats.append(dct)
1874         elif video_info.get('hlsvp'):
1875             manifest_url = video_info['hlsvp'][0]
1876             formats = []
1877             m3u8_formats = self._extract_m3u8_formats(
1878                 manifest_url, video_id, 'mp4', fatal=False)
1879             for a_format in m3u8_formats:
1880                 itag = self._search_regex(
1881                     r'/itag/(\d+)/', a_format['url'], 'itag', default=None)
1882                 if itag:
1883                     a_format['format_id'] = itag
1884                     if itag in self._formats:
1885                         dct = self._formats[itag].copy()
1886                         dct.update(a_format)
1887                         a_format = dct
1888                 a_format['player_url'] = player_url
1889                 # Accept-Encoding header causes failures in live streams on Youtube and Youtube Gaming
1890                 a_format.setdefault('http_headers', {})['Youtubedl-no-compression'] = 'True'
1891                 formats.append(a_format)
1892         else:
1893             error_message = clean_html(video_info.get('reason', [None])[0])
1894             if not error_message:
1895                 error_message = extract_unavailable_message()
1896             if error_message:
1897                 raise ExtractorError(error_message, expected=True)
1898             raise ExtractorError('no conn, hlsvp or url_encoded_fmt_stream_map information found in video info')
1899
1900         # uploader
1901         video_uploader = try_get(video_info, lambda x: x['author'][0], compat_str)
1902         if video_uploader:
1903             video_uploader = compat_urllib_parse_unquote_plus(video_uploader)
1904         else:
1905             self._downloader.report_warning('unable to extract uploader name')
1906
1907         # uploader_id
1908         video_uploader_id = None
1909         video_uploader_url = None
1910         mobj = re.search(
1911             r'<link itemprop="url" href="(?P<uploader_url>https?://www\.youtube\.com/(?:user|channel)/(?P<uploader_id>[^"]+))">',
1912             video_webpage)
1913         if mobj is not None:
1914             video_uploader_id = mobj.group('uploader_id')
1915             video_uploader_url = mobj.group('uploader_url')
1916         else:
1917             self._downloader.report_warning('unable to extract uploader nickname')
1918
1919         channel_id = self._html_search_meta(
1920             'channelId', video_webpage, 'channel id')
1921         channel_url = 'http://www.youtube.com/channel/%s' % channel_id if channel_id else None
1922
1923         # thumbnail image
1924         # We try first to get a high quality image:
1925         m_thumb = re.search(r'<span itemprop="thumbnail".*?href="(.*?)">',
1926                             video_webpage, re.DOTALL)
1927         if m_thumb is not None:
1928             video_thumbnail = m_thumb.group(1)
1929         elif 'thumbnail_url' not in video_info:
1930             self._downloader.report_warning('unable to extract video thumbnail')
1931             video_thumbnail = None
1932         else:   # don't panic if we can't find it
1933             video_thumbnail = compat_urllib_parse_unquote_plus(video_info['thumbnail_url'][0])
1934
1935         # upload date
1936         upload_date = self._html_search_meta(
1937             'datePublished', video_webpage, 'upload date', default=None)
1938         if not upload_date:
1939             upload_date = self._search_regex(
1940                 [r'(?s)id="eow-date.*?>(.*?)</span>',
1941                  r'(?:id="watch-uploader-info".*?>.*?|["\']simpleText["\']\s*:\s*["\'])(?:Published|Uploaded|Streamed live|Started) on (.+?)[<"\']'],
1942                 video_webpage, 'upload date', default=None)
1943         upload_date = unified_strdate(upload_date)
1944
1945         video_license = self._html_search_regex(
1946             r'<h4[^>]+class="title"[^>]*>\s*License\s*</h4>\s*<ul[^>]*>\s*<li>(.+?)</li',
1947             video_webpage, 'license', default=None)
1948
1949         m_music = re.search(
1950             r'''(?x)
1951                 <h4[^>]+class="title"[^>]*>\s*Music\s*</h4>\s*
1952                 <ul[^>]*>\s*
1953                 <li>(?P<title>.+?)
1954                 by (?P<creator>.+?)
1955                 (?:
1956                     \(.+?\)|
1957                     <a[^>]*
1958                         (?:
1959                             \bhref=["\']/red[^>]*>|             # drop possible
1960                             >\s*Listen ad-free with YouTube Red # YouTube Red ad
1961                         )
1962                     .*?
1963                 )?</li
1964             ''',
1965             video_webpage)
1966         if m_music:
1967             video_alt_title = remove_quotes(unescapeHTML(m_music.group('title')))
1968             video_creator = clean_html(m_music.group('creator'))
1969         else:
1970             video_alt_title = video_creator = None
1971
1972         def extract_meta(field):
1973             return self._html_search_regex(
1974                 r'<h4[^>]+class="title"[^>]*>\s*%s\s*</h4>\s*<ul[^>]*>\s*<li>(.+?)</li>\s*' % field,
1975                 video_webpage, field, default=None)
1976
1977         track = extract_meta('Song')
1978         artist = extract_meta('Artist')
1979
1980         m_episode = re.search(
1981             r'<div[^>]+id="watch7-headline"[^>]*>\s*<span[^>]*>.*?>(?P<series>[^<]+)</a></b>\s*S(?P<season>\d+)\s*•\s*E(?P<episode>\d+)</span>',
1982             video_webpage)
1983         if m_episode:
1984             series = m_episode.group('series')
1985             season_number = int(m_episode.group('season'))
1986             episode_number = int(m_episode.group('episode'))
1987         else:
1988             series = season_number = episode_number = None
1989
1990         m_cat_container = self._search_regex(
1991             r'(?s)<h4[^>]*>\s*Category\s*</h4>\s*<ul[^>]*>(.*?)</ul>',
1992             video_webpage, 'categories', default=None)
1993         if m_cat_container:
1994             category = self._html_search_regex(
1995                 r'(?s)<a[^<]+>(.*?)</a>', m_cat_container, 'category',
1996                 default=None)
1997             video_categories = None if category is None else [category]
1998         else:
1999             video_categories = None
2000
2001         video_tags = [
2002             unescapeHTML(m.group('content'))
2003             for m in re.finditer(self._meta_regex('og:video:tag'), video_webpage)]
2004
2005         def _extract_count(count_name):
2006             return str_to_int(self._search_regex(
2007                 r'-%s-button[^>]+><span[^>]+class="yt-uix-button-content"[^>]*>([\d,]+)</span>'
2008                 % re.escape(count_name),
2009                 video_webpage, count_name, default=None))
2010
2011         like_count = _extract_count('like')
2012         dislike_count = _extract_count('dislike')
2013
2014         # subtitles
2015         video_subtitles = self.extract_subtitles(video_id, video_webpage)
2016         automatic_captions = self.extract_automatic_captions(video_id, video_webpage)
2017
2018         video_duration = try_get(
2019             video_info, lambda x: int_or_none(x['length_seconds'][0]))
2020         if not video_duration:
2021             video_duration = parse_duration(self._html_search_meta(
2022                 'duration', video_webpage, 'video duration'))
2023
2024         # annotations
2025         video_annotations = None
2026         if self._downloader.params.get('writeannotations', False):
2027             video_annotations = self._extract_annotations(video_id)
2028
2029         chapters = self._extract_chapters(description_original, video_duration)
2030
2031         # Look for the DASH manifest
2032         if self._downloader.params.get('youtube_include_dash_manifest', True):
2033             dash_mpd_fatal = True
2034             for mpd_url in dash_mpds:
2035                 dash_formats = {}
2036                 try:
2037                     def decrypt_sig(mobj):
2038                         s = mobj.group(1)
2039                         dec_s = self._decrypt_signature(s, video_id, player_url, age_gate)
2040                         return '/signature/%s' % dec_s
2041
2042                     mpd_url = re.sub(r'/s/([a-fA-F0-9\.]+)', decrypt_sig, mpd_url)
2043
2044                     for df in self._extract_mpd_formats(
2045                             mpd_url, video_id, fatal=dash_mpd_fatal,
2046                             formats_dict=self._formats):
2047                         if not df.get('filesize'):
2048                             df['filesize'] = _extract_filesize(df['url'])
2049                         # Do not overwrite DASH format found in some previous DASH manifest
2050                         if df['format_id'] not in dash_formats:
2051                             dash_formats[df['format_id']] = df
2052                         # Additional DASH manifests may end up in HTTP Error 403 therefore
2053                         # allow them to fail without bug report message if we already have
2054                         # some DASH manifest succeeded. This is temporary workaround to reduce
2055                         # burst of bug reports until we figure out the reason and whether it
2056                         # can be fixed at all.
2057                         dash_mpd_fatal = False
2058                 except (ExtractorError, KeyError) as e:
2059                     self.report_warning(
2060                         'Skipping DASH manifest: %r' % e, video_id)
2061                 if dash_formats:
2062                     # Remove the formats we found through non-DASH, they
2063                     # contain less info and it can be wrong, because we use
2064                     # fixed values (for example the resolution). See
2065                     # https://github.com/rg3/youtube-dl/issues/5774 for an
2066                     # example.
2067                     formats = [f for f in formats if f['format_id'] not in dash_formats.keys()]
2068                     formats.extend(dash_formats.values())
2069
2070         # Check for malformed aspect ratio
2071         stretched_m = re.search(
2072             r'<meta\s+property="og:video:tag".*?content="yt:stretch=(?P<w>[0-9]+):(?P<h>[0-9]+)">',
2073             video_webpage)
2074         if stretched_m:
2075             w = float(stretched_m.group('w'))
2076             h = float(stretched_m.group('h'))
2077             # yt:stretch may hold invalid ratio data (e.g. for Q39EVAstoRM ratio is 17:0).
2078             # We will only process correct ratios.
2079             if w > 0 and h > 0:
2080                 ratio = w / h
2081                 for f in formats:
2082                     if f.get('vcodec') != 'none':
2083                         f['stretched_ratio'] = ratio
2084
2085         self._sort_formats(formats)
2086
2087         self.mark_watched(video_id, video_info)
2088
2089         return {
2090             'id': video_id,
2091             'uploader': video_uploader,
2092             'uploader_id': video_uploader_id,
2093             'uploader_url': video_uploader_url,
2094             'channel_id': channel_id,
2095             'channel_url': channel_url,
2096             'upload_date': upload_date,
2097             'license': video_license,
2098             'creator': video_creator or artist,
2099             'title': video_title,
2100             'alt_title': video_alt_title or track,
2101             'thumbnail': video_thumbnail,
2102             'description': video_description,
2103             'categories': video_categories,
2104             'tags': video_tags,
2105             'subtitles': video_subtitles,
2106             'automatic_captions': automatic_captions,
2107             'duration': video_duration,
2108             'age_limit': 18 if age_gate else 0,
2109             'annotations': video_annotations,
2110             'chapters': chapters,
2111             'webpage_url': proto + '://www.youtube.com/watch?v=%s' % video_id,
2112             'view_count': view_count,
2113             'like_count': like_count,
2114             'dislike_count': dislike_count,
2115             'average_rating': float_or_none(video_info.get('avg_rating', [None])[0]),
2116             'formats': formats,
2117             'is_live': is_live,
2118             'start_time': start_time,
2119             'end_time': end_time,
2120             'series': series,
2121             'season_number': season_number,
2122             'episode_number': episode_number,
2123             'track': track,
2124             'artist': artist,
2125         }
2126
2127
2128 class YoutubePlaylistIE(YoutubePlaylistBaseInfoExtractor):
2129     IE_DESC = 'YouTube.com playlists'
2130     _VALID_URL = r"""(?x)(?:
2131                         (?:https?://)?
2132                         (?:\w+\.)?
2133                         (?:
2134                             youtube\.com/
2135                             (?:
2136                                (?:course|view_play_list|my_playlists|artist|playlist|watch|embed/(?:videoseries|[0-9A-Za-z_-]{11}))
2137                                \? (?:.*?[&;])*? (?:p|a|list)=
2138                             |  p/
2139                             )|
2140                             youtu\.be/[0-9A-Za-z_-]{11}\?.*?\blist=
2141                         )
2142                         (
2143                             (?:PL|LL|EC|UU|FL|RD|UL|TL|OLAK5uy_)?[0-9A-Za-z-_]{10,}
2144                             # Top tracks, they can also include dots
2145                             |(?:MC)[\w\.]*
2146                         )
2147                         .*
2148                      |
2149                         (%(playlist_id)s)
2150                      )""" % {'playlist_id': YoutubeBaseInfoExtractor._PLAYLIST_ID_RE}
2151     _TEMPLATE_URL = 'https://www.youtube.com/playlist?list=%s'
2152     _VIDEO_RE = r'href="\s*/watch\?v=(?P<id>[0-9A-Za-z_-]{11})&amp;[^"]*?index=(?P<index>\d+)(?:[^>]+>(?P<title>[^<]+))?'
2153     IE_NAME = 'youtube:playlist'
2154     _TESTS = [{
2155         'url': 'https://www.youtube.com/playlist?list=PLwiyx1dc3P2JR9N8gQaQN_BCvlSlap7re',
2156         'info_dict': {
2157             'title': 'ytdl test PL',
2158             'id': 'PLwiyx1dc3P2JR9N8gQaQN_BCvlSlap7re',
2159         },
2160         'playlist_count': 3,
2161     }, {
2162         'url': 'https://www.youtube.com/playlist?list=PLtPgu7CB4gbZDA7i_euNxn75ISqxwZPYx',
2163         'info_dict': {
2164             'id': 'PLtPgu7CB4gbZDA7i_euNxn75ISqxwZPYx',
2165             'title': 'YDL_Empty_List',
2166         },
2167         'playlist_count': 0,
2168         'skip': 'This playlist is private',
2169     }, {
2170         'note': 'Playlist with deleted videos (#651). As a bonus, the video #51 is also twice in this list.',
2171         'url': 'https://www.youtube.com/playlist?list=PLwP_SiAcdui0KVebT0mU9Apz359a4ubsC',
2172         'info_dict': {
2173             'title': '29C3: Not my department',
2174             'id': 'PLwP_SiAcdui0KVebT0mU9Apz359a4ubsC',
2175         },
2176         'playlist_count': 95,
2177     }, {
2178         'note': 'issue #673',
2179         'url': 'PLBB231211A4F62143',
2180         'info_dict': {
2181             'title': '[OLD]Team Fortress 2 (Class-based LP)',
2182             'id': 'PLBB231211A4F62143',
2183         },
2184         'playlist_mincount': 26,
2185     }, {
2186         'note': 'Large playlist',
2187         'url': 'https://www.youtube.com/playlist?list=UUBABnxM4Ar9ten8Mdjj1j0Q',
2188         'info_dict': {
2189             'title': 'Uploads from Cauchemar',
2190             'id': 'UUBABnxM4Ar9ten8Mdjj1j0Q',
2191         },
2192         'playlist_mincount': 799,
2193     }, {
2194         'url': 'PLtPgu7CB4gbY9oDN3drwC3cMbJggS7dKl',
2195         'info_dict': {
2196             'title': 'YDL_safe_search',
2197             'id': 'PLtPgu7CB4gbY9oDN3drwC3cMbJggS7dKl',
2198         },
2199         'playlist_count': 2,
2200         'skip': 'This playlist is private',
2201     }, {
2202         'note': 'embedded',
2203         'url': 'https://www.youtube.com/embed/videoseries?list=PL6IaIsEjSbf96XFRuNccS_RuEXwNdsoEu',
2204         'playlist_count': 4,
2205         'info_dict': {
2206             'title': 'JODA15',
2207             'id': 'PL6IaIsEjSbf96XFRuNccS_RuEXwNdsoEu',
2208         }
2209     }, {
2210         'url': 'http://www.youtube.com/embed/_xDOZElKyNU?list=PLsyOSbh5bs16vubvKePAQ1x3PhKavfBIl',
2211         'playlist_mincount': 485,
2212         'info_dict': {
2213             'title': '2017 華語最新單曲 (2/24更新)',
2214             'id': 'PLsyOSbh5bs16vubvKePAQ1x3PhKavfBIl',
2215         }
2216     }, {
2217         'note': 'Embedded SWF player',
2218         'url': 'https://www.youtube.com/p/YN5VISEtHet5D4NEvfTd0zcgFk84NqFZ?hl=en_US&fs=1&rel=0',
2219         'playlist_count': 4,
2220         'info_dict': {
2221             'title': 'JODA7',
2222             'id': 'YN5VISEtHet5D4NEvfTd0zcgFk84NqFZ',
2223         }
2224     }, {
2225         'note': 'Buggy playlist: the webpage has a "Load more" button but it doesn\'t have more videos',
2226         'url': 'https://www.youtube.com/playlist?list=UUXw-G3eDE9trcvY2sBMM_aA',
2227         'info_dict': {
2228             'title': 'Uploads from Interstellar Movie',
2229             'id': 'UUXw-G3eDE9trcvY2sBMM_aA',
2230         },
2231         'playlist_mincount': 21,
2232     }, {
2233         # Playlist URL that does not actually serve a playlist
2234         'url': 'https://www.youtube.com/watch?v=FqZTN594JQw&list=PLMYEtVRpaqY00V9W81Cwmzp6N6vZqfUKD4',
2235         'info_dict': {
2236             'id': 'FqZTN594JQw',
2237             'ext': 'webm',
2238             'title': "Smiley's People 01 detective, Adventure Series, Action",
2239             'uploader': 'STREEM',
2240             'uploader_id': 'UCyPhqAZgwYWZfxElWVbVJng',
2241             'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UCyPhqAZgwYWZfxElWVbVJng',
2242             'upload_date': '20150526',
2243             'license': 'Standard YouTube License',
2244             'description': 'md5:507cdcb5a49ac0da37a920ece610be80',
2245             'categories': ['People & Blogs'],
2246             'tags': list,
2247             'like_count': int,
2248             'dislike_count': int,
2249         },
2250         'params': {
2251             'skip_download': True,
2252         },
2253         'add_ie': [YoutubeIE.ie_key()],
2254     }, {
2255         'url': 'https://youtu.be/yeWKywCrFtk?list=PL2qgrgXsNUG5ig9cat4ohreBjYLAPC0J5',
2256         'info_dict': {
2257             'id': 'yeWKywCrFtk',
2258             'ext': 'mp4',
2259             'title': 'Small Scale Baler and Braiding Rugs',
2260             'uploader': 'Backus-Page House Museum',
2261             'uploader_id': 'backuspagemuseum',
2262             'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/backuspagemuseum',
2263             'upload_date': '20161008',
2264             'license': 'Standard YouTube License',
2265             'description': 'md5:800c0c78d5eb128500bffd4f0b4f2e8a',
2266             'categories': ['Nonprofits & Activism'],
2267             'tags': list,
2268             'like_count': int,
2269             'dislike_count': int,
2270         },
2271         'params': {
2272             'noplaylist': True,
2273             'skip_download': True,
2274         },
2275     }, {
2276         'url': 'https://youtu.be/uWyaPkt-VOI?list=PL9D9FC436B881BA21',
2277         'only_matching': True,
2278     }, {
2279         'url': 'TLGGrESM50VT6acwMjAyMjAxNw',
2280         'only_matching': True,
2281     }, {
2282         # music album playlist
2283         'url': 'OLAK5uy_m4xAFdmMC5rX3Ji3g93pQe3hqLZw_9LhM',
2284         'only_matching': True,
2285     }]
2286
2287     def _real_initialize(self):
2288         self._login()
2289
2290     def _extract_mix(self, playlist_id):
2291         # The mixes are generated from a single video
2292         # the id of the playlist is just 'RD' + video_id
2293         ids = []
2294         last_id = playlist_id[-11:]
2295         for n in itertools.count(1):
2296             url = 'https://youtube.com/watch?v=%s&list=%s' % (last_id, playlist_id)
2297             webpage = self._download_webpage(
2298                 url, playlist_id, 'Downloading page {0} of Youtube mix'.format(n))
2299             new_ids = orderedSet(re.findall(
2300                 r'''(?xs)data-video-username=".*?".*?
2301                            href="/watch\?v=([0-9A-Za-z_-]{11})&amp;[^"]*?list=%s''' % re.escape(playlist_id),
2302                 webpage))
2303             # Fetch new pages until all the videos are repeated, it seems that
2304             # there are always 51 unique videos.
2305             new_ids = [_id for _id in new_ids if _id not in ids]
2306             if not new_ids:
2307                 break
2308             ids.extend(new_ids)
2309             last_id = ids[-1]
2310
2311         url_results = self._ids_to_results(ids)
2312
2313         search_title = lambda class_name: get_element_by_attribute('class', class_name, webpage)
2314         title_span = (
2315             search_title('playlist-title') or
2316             search_title('title long-title') or
2317             search_title('title'))
2318         title = clean_html(title_span)
2319
2320         return self.playlist_result(url_results, playlist_id, title)
2321
2322     def _extract_playlist(self, playlist_id):
2323         url = self._TEMPLATE_URL % playlist_id
2324         page = self._download_webpage(url, playlist_id)
2325
2326         # the yt-alert-message now has tabindex attribute (see https://github.com/rg3/youtube-dl/issues/11604)
2327         for match in re.findall(r'<div class="yt-alert-message"[^>]*>([^<]+)</div>', page):
2328             match = match.strip()
2329             # Check if the playlist exists or is private
2330             mobj = re.match(r'[^<]*(?:The|This) playlist (?P<reason>does not exist|is private)[^<]*', match)
2331             if mobj:
2332                 reason = mobj.group('reason')
2333                 message = 'This playlist %s' % reason
2334                 if 'private' in reason:
2335                     message += ', use --username or --netrc to access it'
2336                 message += '.'
2337                 raise ExtractorError(message, expected=True)
2338             elif re.match(r'[^<]*Invalid parameters[^<]*', match):
2339                 raise ExtractorError(
2340                     'Invalid parameters. Maybe URL is incorrect.',
2341                     expected=True)
2342             elif re.match(r'[^<]*Choose your language[^<]*', match):
2343                 continue
2344             else:
2345                 self.report_warning('Youtube gives an alert message: ' + match)
2346
2347         playlist_title = self._html_search_regex(
2348             r'(?s)<h1 class="pl-header-title[^"]*"[^>]*>\s*(.*?)\s*</h1>',
2349             page, 'title', default=None)
2350
2351         _UPLOADER_BASE = r'class=["\']pl-header-details[^>]+>\s*<li>\s*<a[^>]+\bhref='
2352         uploader = self._search_regex(
2353             r'%s["\']/(?:user|channel)/[^>]+>([^<]+)' % _UPLOADER_BASE,
2354             page, 'uploader', default=None)
2355         mobj = re.search(
2356             r'%s(["\'])(?P<path>/(?:user|channel)/(?P<uploader_id>.+?))\1' % _UPLOADER_BASE,
2357             page)
2358         if mobj:
2359             uploader_id = mobj.group('uploader_id')
2360             uploader_url = compat_urlparse.urljoin(url, mobj.group('path'))
2361         else:
2362             uploader_id = uploader_url = None
2363
2364         has_videos = True
2365
2366         if not playlist_title:
2367             try:
2368                 # Some playlist URLs don't actually serve a playlist (e.g.
2369                 # https://www.youtube.com/watch?v=FqZTN594JQw&list=PLMYEtVRpaqY00V9W81Cwmzp6N6vZqfUKD4)
2370                 next(self._entries(page, playlist_id))
2371             except StopIteration:
2372                 has_videos = False
2373
2374         playlist = self.playlist_result(
2375             self._entries(page, playlist_id), playlist_id, playlist_title)
2376         playlist.update({
2377             'uploader': uploader,
2378             'uploader_id': uploader_id,
2379             'uploader_url': uploader_url,
2380         })
2381
2382         return has_videos, playlist
2383
2384     def _check_download_just_video(self, url, playlist_id):
2385         # Check if it's a video-specific URL
2386         query_dict = compat_urlparse.parse_qs(compat_urlparse.urlparse(url).query)
2387         video_id = query_dict.get('v', [None])[0] or self._search_regex(
2388             r'(?:(?:^|//)youtu\.be/|youtube\.com/embed/(?!videoseries))([0-9A-Za-z_-]{11})', url,
2389             'video id', default=None)
2390         if video_id:
2391             if self._downloader.params.get('noplaylist'):
2392                 self.to_screen('Downloading just video %s because of --no-playlist' % video_id)
2393                 return video_id, self.url_result(video_id, 'Youtube', video_id=video_id)
2394             else:
2395                 self.to_screen('Downloading playlist %s - add --no-playlist to just download video %s' % (playlist_id, video_id))
2396                 return video_id, None
2397         return None, None
2398
2399     def _real_extract(self, url):
2400         # Extract playlist id
2401         mobj = re.match(self._VALID_URL, url)
2402         if mobj is None:
2403             raise ExtractorError('Invalid URL: %s' % url)
2404         playlist_id = mobj.group(1) or mobj.group(2)
2405
2406         video_id, video = self._check_download_just_video(url, playlist_id)
2407         if video:
2408             return video
2409
2410         if playlist_id.startswith(('RD', 'UL', 'PU')):
2411             # Mixes require a custom extraction process
2412             return self._extract_mix(playlist_id)
2413
2414         has_videos, playlist = self._extract_playlist(playlist_id)
2415         if has_videos or not video_id:
2416             return playlist
2417
2418         # Some playlist URLs don't actually serve a playlist (see
2419         # https://github.com/rg3/youtube-dl/issues/10537).
2420         # Fallback to plain video extraction if there is a video id
2421         # along with playlist id.
2422         return self.url_result(video_id, 'Youtube', video_id=video_id)
2423
2424
2425 class YoutubeChannelIE(YoutubePlaylistBaseInfoExtractor):
2426     IE_DESC = 'YouTube.com channels'
2427     _VALID_URL = r'https?://(?:youtu\.be|(?:\w+\.)?youtube(?:-nocookie)?\.com|(?:www\.)?invidio\.us)/channel/(?P<id>[0-9A-Za-z_-]+)'
2428     _TEMPLATE_URL = 'https://www.youtube.com/channel/%s/videos'
2429     _VIDEO_RE = r'(?:title="(?P<title>[^"]+)"[^>]+)?href="/watch\?v=(?P<id>[0-9A-Za-z_-]+)&?'
2430     IE_NAME = 'youtube:channel'
2431     _TESTS = [{
2432         'note': 'paginated channel',
2433         'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w',
2434         'playlist_mincount': 91,
2435         'info_dict': {
2436             'id': 'UUKfVa3S1e4PHvxWcwyMMg8w',
2437             'title': 'Uploads from lex will',
2438         }
2439     }, {
2440         'note': 'Age restricted channel',
2441         # from https://www.youtube.com/user/DeusExOfficial
2442         'url': 'https://www.youtube.com/channel/UCs0ifCMCm1icqRbqhUINa0w',
2443         'playlist_mincount': 64,
2444         'info_dict': {
2445             'id': 'UUs0ifCMCm1icqRbqhUINa0w',
2446             'title': 'Uploads from Deus Ex',
2447         },
2448     }, {
2449         'url': 'https://invidio.us/channel/UC23qupoDRn9YOAVzeoxjOQA',
2450         'only_matching': True,
2451     }]
2452
2453     @classmethod
2454     def suitable(cls, url):
2455         return (False if YoutubePlaylistsIE.suitable(url) or YoutubeLiveIE.suitable(url)
2456                 else super(YoutubeChannelIE, cls).suitable(url))
2457
2458     def _build_template_url(self, url, channel_id):
2459         return self._TEMPLATE_URL % channel_id
2460
2461     def _real_extract(self, url):
2462         channel_id = self._match_id(url)
2463
2464         url = self._build_template_url(url, channel_id)
2465
2466         # Channel by page listing is restricted to 35 pages of 30 items, i.e. 1050 videos total (see #5778)
2467         # Workaround by extracting as a playlist if managed to obtain channel playlist URL
2468         # otherwise fallback on channel by page extraction
2469         channel_page = self._download_webpage(
2470             url + '?view=57', channel_id,
2471             'Downloading channel page', fatal=False)
2472         if channel_page is False:
2473             channel_playlist_id = False
2474         else:
2475             channel_playlist_id = self._html_search_meta(
2476                 'channelId', channel_page, 'channel id', default=None)
2477             if not channel_playlist_id:
2478                 channel_url = self._html_search_meta(
2479                     ('al:ios:url', 'twitter:app:url:iphone', 'twitter:app:url:ipad'),
2480                     channel_page, 'channel url', default=None)
2481                 if channel_url:
2482                     channel_playlist_id = self._search_regex(
2483                         r'vnd\.youtube://user/([0-9A-Za-z_-]+)',
2484                         channel_url, 'channel id', default=None)
2485         if channel_playlist_id and channel_playlist_id.startswith('UC'):
2486             playlist_id = 'UU' + channel_playlist_id[2:]
2487             return self.url_result(
2488                 compat_urlparse.urljoin(url, '/playlist?list=%s' % playlist_id), 'YoutubePlaylist')
2489
2490         channel_page = self._download_webpage(url, channel_id, 'Downloading page #1')
2491         autogenerated = re.search(r'''(?x)
2492                 class="[^"]*?(?:
2493                     channel-header-autogenerated-label|
2494                     yt-channel-title-autogenerated
2495                 )[^"]*"''', channel_page) is not None
2496
2497         if autogenerated:
2498             # The videos are contained in a single page
2499             # the ajax pages can't be used, they are empty
2500             entries = [
2501                 self.url_result(
2502                     video_id, 'Youtube', video_id=video_id,
2503                     video_title=video_title)
2504                 for video_id, video_title in self.extract_videos_from_page(channel_page)]
2505             return self.playlist_result(entries, channel_id)
2506
2507         try:
2508             next(self._entries(channel_page, channel_id))
2509         except StopIteration:
2510             alert_message = self._html_search_regex(
2511                 r'(?s)<div[^>]+class=(["\']).*?\byt-alert-message\b.*?\1[^>]*>(?P<alert>[^<]+)</div>',
2512                 channel_page, 'alert', default=None, group='alert')
2513             if alert_message:
2514                 raise ExtractorError('Youtube said: %s' % alert_message, expected=True)
2515
2516         return self.playlist_result(self._entries(channel_page, channel_id), channel_id)
2517
2518
2519 class YoutubeUserIE(YoutubeChannelIE):
2520     IE_DESC = 'YouTube.com user videos (URL or "ytuser" keyword)'
2521     _VALID_URL = r'(?:(?:https?://(?:\w+\.)?youtube\.com/(?:(?P<user>user|c)/)?(?!(?:attribution_link|watch|results|shared)(?:$|[^a-z_A-Z0-9-])))|ytuser:)(?!feed/)(?P<id>[A-Za-z0-9_-]+)'
2522     _TEMPLATE_URL = 'https://www.youtube.com/%s/%s/videos'
2523     IE_NAME = 'youtube:user'
2524
2525     _TESTS = [{
2526         'url': 'https://www.youtube.com/user/TheLinuxFoundation',
2527         'playlist_mincount': 320,
2528         'info_dict': {
2529             'id': 'UUfX55Sx5hEFjoC3cNs6mCUQ',
2530             'title': 'Uploads from The Linux Foundation',
2531         }
2532     }, {
2533         # Only available via https://www.youtube.com/c/12minuteathlete/videos
2534         # but not https://www.youtube.com/user/12minuteathlete/videos
2535         'url': 'https://www.youtube.com/c/12minuteathlete/videos',
2536         'playlist_mincount': 249,
2537         'info_dict': {
2538             'id': 'UUVjM-zV6_opMDx7WYxnjZiQ',
2539             'title': 'Uploads from 12 Minute Athlete',
2540         }
2541     }, {
2542         'url': 'ytuser:phihag',
2543         'only_matching': True,
2544     }, {
2545         'url': 'https://www.youtube.com/c/gametrailers',
2546         'only_matching': True,
2547     }, {
2548         'url': 'https://www.youtube.com/gametrailers',
2549         'only_matching': True,
2550     }, {
2551         # This channel is not available, geo restricted to JP
2552         'url': 'https://www.youtube.com/user/kananishinoSMEJ/videos',
2553         'only_matching': True,
2554     }]
2555
2556     @classmethod
2557     def suitable(cls, url):
2558         # Don't return True if the url can be extracted with other youtube
2559         # extractor, the regex would is too permissive and it would match.
2560         other_yt_ies = iter(klass for (name, klass) in globals().items() if name.startswith('Youtube') and name.endswith('IE') and klass is not cls)
2561         if any(ie.suitable(url) for ie in other_yt_ies):
2562             return False
2563         else:
2564             return super(YoutubeUserIE, cls).suitable(url)
2565
2566     def _build_template_url(self, url, channel_id):
2567         mobj = re.match(self._VALID_URL, url)
2568         return self._TEMPLATE_URL % (mobj.group('user') or 'user', mobj.group('id'))
2569
2570
2571 class YoutubeLiveIE(YoutubeBaseInfoExtractor):
2572     IE_DESC = 'YouTube.com live streams'
2573     _VALID_URL = r'(?P<base_url>https?://(?:\w+\.)?youtube\.com/(?:(?:user|channel|c)/)?(?P<id>[^/]+))/live'
2574     IE_NAME = 'youtube:live'
2575
2576     _TESTS = [{
2577         'url': 'https://www.youtube.com/user/TheYoungTurks/live',
2578         'info_dict': {
2579             'id': 'a48o2S1cPoo',
2580             'ext': 'mp4',
2581             'title': 'The Young Turks - Live Main Show',
2582             'uploader': 'The Young Turks',
2583             'uploader_id': 'TheYoungTurks',
2584             'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/TheYoungTurks',
2585             'upload_date': '20150715',
2586             'license': 'Standard YouTube License',
2587             'description': 'md5:438179573adcdff3c97ebb1ee632b891',
2588             'categories': ['News & Politics'],
2589             'tags': ['Cenk Uygur (TV Program Creator)', 'The Young Turks (Award-Winning Work)', 'Talk Show (TV Genre)'],
2590             'like_count': int,
2591             'dislike_count': int,
2592         },
2593         'params': {
2594             'skip_download': True,
2595         },
2596     }, {
2597         'url': 'https://www.youtube.com/channel/UC1yBKRuGpC1tSM73A0ZjYjQ/live',
2598         'only_matching': True,
2599     }, {
2600         'url': 'https://www.youtube.com/c/CommanderVideoHq/live',
2601         'only_matching': True,
2602     }, {
2603         'url': 'https://www.youtube.com/TheYoungTurks/live',
2604         'only_matching': True,
2605     }]
2606
2607     def _real_extract(self, url):
2608         mobj = re.match(self._VALID_URL, url)
2609         channel_id = mobj.group('id')
2610         base_url = mobj.group('base_url')
2611         webpage = self._download_webpage(url, channel_id, fatal=False)
2612         if webpage:
2613             page_type = self._og_search_property(
2614                 'type', webpage, 'page type', default='')
2615             video_id = self._html_search_meta(
2616                 'videoId', webpage, 'video id', default=None)
2617             if page_type.startswith('video') and video_id and re.match(
2618                     r'^[0-9A-Za-z_-]{11}$', video_id):
2619                 return self.url_result(video_id, YoutubeIE.ie_key())
2620         return self.url_result(base_url)
2621
2622
2623 class YoutubePlaylistsIE(YoutubePlaylistsBaseInfoExtractor):
2624     IE_DESC = 'YouTube.com user/channel playlists'
2625     _VALID_URL = r'https?://(?:\w+\.)?youtube\.com/(?:user|channel)/(?P<id>[^/]+)/playlists'
2626     IE_NAME = 'youtube:playlists'
2627
2628     _TESTS = [{
2629         'url': 'https://www.youtube.com/user/ThirstForScience/playlists',
2630         'playlist_mincount': 4,
2631         'info_dict': {
2632             'id': 'ThirstForScience',
2633             'title': 'Thirst for Science',
2634         },
2635     }, {
2636         # with "Load more" button
2637         'url': 'https://www.youtube.com/user/igorkle1/playlists?view=1&sort=dd',
2638         'playlist_mincount': 70,
2639         'info_dict': {
2640             'id': 'igorkle1',
2641             'title': 'Игорь Клейнер',
2642         },
2643     }, {
2644         'url': 'https://www.youtube.com/channel/UCiU1dHvZObB2iP6xkJ__Icw/playlists',
2645         'playlist_mincount': 17,
2646         'info_dict': {
2647             'id': 'UCiU1dHvZObB2iP6xkJ__Icw',
2648             'title': 'Chem Player',
2649         },
2650     }]
2651
2652
2653 class YoutubeSearchBaseInfoExtractor(YoutubePlaylistBaseInfoExtractor):
2654     _VIDEO_RE = r'href="\s*/watch\?v=(?P<id>[0-9A-Za-z_-]{11})(?:[^"]*"[^>]+\btitle="(?P<title>[^"]+))?'
2655
2656
2657 class YoutubeSearchIE(SearchInfoExtractor, YoutubeSearchBaseInfoExtractor):
2658     IE_DESC = 'YouTube.com searches'
2659     # there doesn't appear to be a real limit, for example if you search for
2660     # 'python' you get more than 8.000.000 results
2661     _MAX_RESULTS = float('inf')
2662     IE_NAME = 'youtube:search'
2663     _SEARCH_KEY = 'ytsearch'
2664     _EXTRA_QUERY_ARGS = {}
2665     _TESTS = []
2666
2667     def _get_n_results(self, query, n):
2668         """Get a specified number of results for a query"""
2669
2670         videos = []
2671         limit = n
2672
2673         url_query = {
2674             'search_query': query.encode('utf-8'),
2675         }
2676         url_query.update(self._EXTRA_QUERY_ARGS)
2677         result_url = 'https://www.youtube.com/results?' + compat_urllib_parse_urlencode(url_query)
2678
2679         for pagenum in itertools.count(1):
2680             data = self._download_json(
2681                 result_url, video_id='query "%s"' % query,
2682                 note='Downloading page %s' % pagenum,
2683                 errnote='Unable to download API page',
2684                 query={'spf': 'navigate'})
2685             html_content = data[1]['body']['content']
2686
2687             if 'class="search-message' in html_content:
2688                 raise ExtractorError(
2689                     '[youtube] No video results', expected=True)
2690
2691             new_videos = list(self._process_page(html_content))
2692             videos += new_videos
2693             if not new_videos or len(videos) > limit:
2694                 break
2695             next_link = self._html_search_regex(
2696                 r'href="(/results\?[^"]*\bsp=[^"]+)"[^>]*>\s*<span[^>]+class="[^"]*\byt-uix-button-content\b[^"]*"[^>]*>Next',
2697                 html_content, 'next link', default=None)
2698             if next_link is None:
2699                 break
2700             result_url = compat_urlparse.urljoin('https://www.youtube.com/', next_link)
2701
2702         if len(videos) > n:
2703             videos = videos[:n]
2704         return self.playlist_result(videos, query)
2705
2706
2707 class YoutubeSearchDateIE(YoutubeSearchIE):
2708     IE_NAME = YoutubeSearchIE.IE_NAME + ':date'
2709     _SEARCH_KEY = 'ytsearchdate'
2710     IE_DESC = 'YouTube.com searches, newest videos first'
2711     _EXTRA_QUERY_ARGS = {'search_sort': 'video_date_uploaded'}
2712
2713
2714 class YoutubeSearchURLIE(YoutubeSearchBaseInfoExtractor):
2715     IE_DESC = 'YouTube.com search URLs'
2716     IE_NAME = 'youtube:search_url'
2717     _VALID_URL = r'https?://(?:www\.)?youtube\.com/results\?(.*?&)?(?:search_query|q)=(?P<query>[^&]+)(?:[&]|$)'
2718     _TESTS = [{
2719         'url': 'https://www.youtube.com/results?baz=bar&search_query=youtube-dl+test+video&filters=video&lclk=video',
2720         'playlist_mincount': 5,
2721         'info_dict': {
2722             'title': 'youtube-dl test video',
2723         }
2724     }, {
2725         'url': 'https://www.youtube.com/results?q=test&sp=EgQIBBgB',
2726         'only_matching': True,
2727     }]
2728
2729     def _real_extract(self, url):
2730         mobj = re.match(self._VALID_URL, url)
2731         query = compat_urllib_parse_unquote_plus(mobj.group('query'))
2732         webpage = self._download_webpage(url, query)
2733         return self.playlist_result(self._process_page(webpage), playlist_title=query)
2734
2735
2736 class YoutubeShowIE(YoutubePlaylistsBaseInfoExtractor):
2737     IE_DESC = 'YouTube.com (multi-season) shows'
2738     _VALID_URL = r'https?://(?:www\.)?youtube\.com/show/(?P<id>[^?#]*)'
2739     IE_NAME = 'youtube:show'
2740     _TESTS = [{
2741         'url': 'https://www.youtube.com/show/airdisasters',
2742         'playlist_mincount': 5,
2743         'info_dict': {
2744             'id': 'airdisasters',
2745             'title': 'Air Disasters',
2746         }
2747     }]
2748
2749     def _real_extract(self, url):
2750         playlist_id = self._match_id(url)
2751         return super(YoutubeShowIE, self)._real_extract(
2752             'https://www.youtube.com/show/%s/playlists' % playlist_id)
2753
2754
2755 class YoutubeFeedsInfoExtractor(YoutubeBaseInfoExtractor):
2756     """
2757     Base class for feed extractors
2758     Subclasses must define the _FEED_NAME and _PLAYLIST_TITLE properties.
2759     """
2760     _LOGIN_REQUIRED = True
2761
2762     @property
2763     def IE_NAME(self):
2764         return 'youtube:%s' % self._FEED_NAME
2765
2766     def _real_initialize(self):
2767         self._login()
2768
2769     def _entries(self, page):
2770         # The extraction process is the same as for playlists, but the regex
2771         # for the video ids doesn't contain an index
2772         ids = []
2773         more_widget_html = content_html = page
2774         for page_num in itertools.count(1):
2775             matches = re.findall(r'href="\s*/watch\?v=([0-9A-Za-z_-]{11})', content_html)
2776
2777             # 'recommended' feed has infinite 'load more' and each new portion spins
2778             # the same videos in (sometimes) slightly different order, so we'll check
2779             # for unicity and break when portion has no new videos
2780             new_ids = list(filter(lambda video_id: video_id not in ids, orderedSet(matches)))
2781             if not new_ids:
2782                 break
2783
2784             ids.extend(new_ids)
2785
2786             for entry in self._ids_to_results(new_ids):
2787                 yield entry
2788
2789             mobj = re.search(r'data-uix-load-more-href="/?(?P<more>[^"]+)"', more_widget_html)
2790             if not mobj:
2791                 break
2792
2793             more = self._download_json(
2794                 'https://youtube.com/%s' % mobj.group('more'), self._PLAYLIST_TITLE,
2795                 'Downloading page #%s' % page_num,
2796                 transform_source=uppercase_escape)
2797             content_html = more['content_html']
2798             more_widget_html = more['load_more_widget_html']
2799
2800     def _real_extract(self, url):
2801         page = self._download_webpage(
2802             'https://www.youtube.com/feed/%s' % self._FEED_NAME,
2803             self._PLAYLIST_TITLE)
2804         return self.playlist_result(
2805             self._entries(page), playlist_title=self._PLAYLIST_TITLE)
2806
2807
2808 class YoutubeWatchLaterIE(YoutubePlaylistIE):
2809     IE_NAME = 'youtube:watchlater'
2810     IE_DESC = 'Youtube watch later list, ":ytwatchlater" for short (requires authentication)'
2811     _VALID_URL = r'https?://(?:www\.)?youtube\.com/(?:feed/watch_later|(?:playlist|watch)\?(?:.+&)?list=WL)|:ytwatchlater'
2812
2813     _TESTS = [{
2814         'url': 'https://www.youtube.com/playlist?list=WL',
2815         'only_matching': True,
2816     }, {
2817         'url': 'https://www.youtube.com/watch?v=bCNU9TrbiRk&index=1&list=WL',
2818         'only_matching': True,
2819     }]
2820
2821     def _real_extract(self, url):
2822         _, video = self._check_download_just_video(url, 'WL')
2823         if video:
2824             return video
2825         _, playlist = self._extract_playlist('WL')
2826         return playlist
2827
2828
2829 class YoutubeFavouritesIE(YoutubeBaseInfoExtractor):
2830     IE_NAME = 'youtube:favorites'
2831     IE_DESC = 'YouTube.com favourite videos, ":ytfav" for short (requires authentication)'
2832     _VALID_URL = r'https?://(?:www\.)?youtube\.com/my_favorites|:ytfav(?:ou?rites)?'
2833     _LOGIN_REQUIRED = True
2834
2835     def _real_extract(self, url):
2836         webpage = self._download_webpage('https://www.youtube.com/my_favorites', 'Youtube Favourites videos')
2837         playlist_id = self._search_regex(r'list=(.+?)["&]', webpage, 'favourites playlist id')
2838         return self.url_result(playlist_id, 'YoutubePlaylist')
2839
2840
2841 class YoutubeRecommendedIE(YoutubeFeedsInfoExtractor):
2842     IE_DESC = 'YouTube.com recommended videos, ":ytrec" for short (requires authentication)'
2843     _VALID_URL = r'https?://(?:www\.)?youtube\.com/feed/recommended|:ytrec(?:ommended)?'
2844     _FEED_NAME = 'recommended'
2845     _PLAYLIST_TITLE = 'Youtube Recommended videos'
2846
2847
2848 class YoutubeSubscriptionsIE(YoutubeFeedsInfoExtractor):
2849     IE_DESC = 'YouTube.com subscriptions feed, "ytsubs" keyword (requires authentication)'
2850     _VALID_URL = r'https?://(?:www\.)?youtube\.com/feed/subscriptions|:ytsubs(?:criptions)?'
2851     _FEED_NAME = 'subscriptions'
2852     _PLAYLIST_TITLE = 'Youtube Subscriptions'
2853
2854
2855 class YoutubeHistoryIE(YoutubeFeedsInfoExtractor):
2856     IE_DESC = 'Youtube watch history, ":ythistory" for short (requires authentication)'
2857     _VALID_URL = r'https?://(?:www\.)?youtube\.com/feed/history|:ythistory'
2858     _FEED_NAME = 'history'
2859     _PLAYLIST_TITLE = 'Youtube History'
2860
2861
2862 class YoutubeTruncatedURLIE(InfoExtractor):
2863     IE_NAME = 'youtube:truncated_url'
2864     IE_DESC = False  # Do not list
2865     _VALID_URL = r'''(?x)
2866         (?:https?://)?
2867         (?:\w+\.)?[yY][oO][uU][tT][uU][bB][eE](?:-nocookie)?\.com/
2868         (?:watch\?(?:
2869             feature=[a-z_]+|
2870             annotation_id=annotation_[^&]+|
2871             x-yt-cl=[0-9]+|
2872             hl=[^&]*|
2873             t=[0-9]+
2874         )?
2875         |
2876             attribution_link\?a=[^&]+
2877         )
2878         $
2879     '''
2880
2881     _TESTS = [{
2882         'url': 'https://www.youtube.com/watch?annotation_id=annotation_3951667041',
2883         'only_matching': True,
2884     }, {
2885         'url': 'https://www.youtube.com/watch?',
2886         'only_matching': True,
2887     }, {
2888         'url': 'https://www.youtube.com/watch?x-yt-cl=84503534',
2889         'only_matching': True,
2890     }, {
2891         'url': 'https://www.youtube.com/watch?feature=foo',
2892         'only_matching': True,
2893     }, {
2894         'url': 'https://www.youtube.com/watch?hl=en-GB',
2895         'only_matching': True,
2896     }, {
2897         'url': 'https://www.youtube.com/watch?t=2372',
2898         'only_matching': True,
2899     }]
2900
2901     def _real_extract(self, url):
2902         raise ExtractorError(
2903             'Did you forget to quote the URL? Remember that & is a meta '
2904             'character in most shells, so you want to put the URL in quotes, '
2905             'like  youtube-dl '
2906             '"https://www.youtube.com/watch?feature=foo&v=BaW_jenozKc" '
2907             ' or simply  youtube-dl BaW_jenozKc  .',
2908             expected=True)
2909
2910
2911 class YoutubeTruncatedIDIE(InfoExtractor):
2912     IE_NAME = 'youtube:truncated_id'
2913     IE_DESC = False  # Do not list
2914     _VALID_URL = r'https?://(?:www\.)?youtube\.com/watch\?v=(?P<id>[0-9A-Za-z_-]{1,10})$'
2915
2916     _TESTS = [{
2917         'url': 'https://www.youtube.com/watch?v=N_708QY7Ob',
2918         'only_matching': True,
2919     }]
2920
2921     def _real_extract(self, url):
2922         video_id = self._match_id(url)
2923         raise ExtractorError(
2924             'Incomplete YouTube ID %s. URL %s looks truncated.' % (video_id, url),
2925             expected=True)