youtube_dl/extractor/youtube.py

   1 # coding: utf-8
   2
   3 import collections
   4 import errno
   5 import io
   6 import itertools
   7 import json
   8 import os.path
   9 import re
  10 import socket
  11 import string
  12 import struct
  13 import traceback
  14 import xml.etree.ElementTree
  15 import zlib
  16
  17 from .common import InfoExtractor, SearchInfoExtractor
  18 from .subtitles import SubtitlesInfoExtractor
  19 from ..utils import (
  20     compat_chr,
  21     compat_http_client,
  22     compat_parse_qs,
  23     compat_urllib_error,
  24     compat_urllib_parse,
  25     compat_urllib_request,
  26     compat_str,
  27
  28     clean_html,
  29     get_element_by_id,
  30     ExtractorError,
  31     unescapeHTML,
  32     unified_strdate,
  33     orderedSet,
  34     write_json_file,
  35 )
  36
  37 class YoutubeBaseInfoExtractor(InfoExtractor):
  38     """Provide base functions for Youtube extractors"""
  39     _LOGIN_URL = 'https://accounts.google.com/ServiceLogin'
  40     _LANG_URL = r'https://www.youtube.com/?hl=en&persist_hl=1&gl=US&persist_gl=1&opt_out_ackd=1'
  41     _AGE_URL = 'http://www.youtube.com/verify_age?next_url=/&gl=US&hl=en'
  42     _NETRC_MACHINE = 'youtube'
  43     # If True it will raise an error if no login info is provided
  44     _LOGIN_REQUIRED = False
  45
  46     def report_lang(self):
  47         """Report attempt to set language."""
  48         self.to_screen(u'Setting language')
  49
  50     def _set_language(self):
  51         request = compat_urllib_request.Request(self._LANG_URL)
  52         try:
  53             self.report_lang()
  54             compat_urllib_request.urlopen(request).read()
  55         except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
  56             self._downloader.report_warning(u'unable to set language: %s' % compat_str(err))
  57             return False
  58         return True
  59
  60     def _login(self):
  61         (username, password) = self._get_login_info()
  62         # No authentication to be performed
  63         if username is None:
  64             if self._LOGIN_REQUIRED:
  65                 raise ExtractorError(u'No login info available, needed for using %s.' % self.IE_NAME, expected=True)
  66             return False
  67
  68         request = compat_urllib_request.Request(self._LOGIN_URL)
  69         try:
  70             login_page = compat_urllib_request.urlopen(request).read().decode('utf-8')
  71         except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
  72             self._downloader.report_warning(u'unable to fetch login page: %s' % compat_str(err))
  73             return False
  74
  75         galx = None
  76         dsh = None
  77         match = re.search(re.compile(r'<input.+?name="GALX".+?value="(.+?)"', re.DOTALL), login_page)
  78         if match:
  79           galx = match.group(1)
  80         match = re.search(re.compile(r'<input.+?name="dsh".+?value="(.+?)"', re.DOTALL), login_page)
  81         if match:
  82           dsh = match.group(1)
  83
  84         # Log in
  85         login_form_strs = {
  86                 u'continue': u'https://www.youtube.com/signin?action_handle_signin=true&feature=sign_in_button&hl=en_US&nomobiletemp=1',
  87                 u'Email': username,
  88                 u'GALX': galx,
  89                 u'Passwd': password,
  90                 u'PersistentCookie': u'yes',
  91                 u'_utf8': u'霱',
  92                 u'bgresponse': u'js_disabled',
  93                 u'checkConnection': u'',
  94                 u'checkedDomains': u'youtube',
  95                 u'dnConn': u'',
  96                 u'dsh': dsh,
  97                 u'pstMsg': u'0',
  98                 u'rmShown': u'1',
  99                 u'secTok': u'',
 100                 u'signIn': u'Sign in',
 101                 u'timeStmp': u'',
 102                 u'service': u'youtube',
 103                 u'uilel': u'3',
 104                 u'hl': u'en_US',
 105         }
 106         # Convert to UTF-8 *before* urlencode because Python 2.x's urlencode
 107         # chokes on unicode
 108         login_form = dict((k.encode('utf-8'), v.encode('utf-8')) for k,v in login_form_strs.items())
 109         login_data = compat_urllib_parse.urlencode(login_form).encode('ascii')
 110         request = compat_urllib_request.Request(self._LOGIN_URL, login_data)
 111         try:
 112             self.report_login()
 113             login_results = compat_urllib_request.urlopen(request).read().decode('utf-8')
 114             if re.search(r'(?i)<form[^>]* id="gaia_loginform"', login_results) is not None:
 115                 self._downloader.report_warning(u'unable to log in: bad username or password')
 116                 return False
 117         except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
 118             self._downloader.report_warning(u'unable to log in: %s' % compat_str(err))
 119             return False
 120         return True
 121
 122     def _confirm_age(self):
 123         age_form = {
 124                 'next_url':     '/',
 125                 'action_confirm':   'Confirm',
 126                 }
 127         request = compat_urllib_request.Request(self._AGE_URL, compat_urllib_parse.urlencode(age_form))
 128         try:
 129             self.report_age_confirmation()
 130             compat_urllib_request.urlopen(request).read().decode('utf-8')
 131         except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
 132             raise ExtractorError(u'Unable to confirm age: %s' % compat_str(err))
 133         return True
 134
 135     def _real_initialize(self):
 136         if self._downloader is None:
 137             return
 138         if not self._set_language():
 139             return
 140         if not self._login():
 141             return
 142         self._confirm_age()
 143
 144
 145 class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor):
 146     IE_DESC = u'YouTube.com'
 147     _VALID_URL = r"""^
 148                      (
 149                          (?:https?://)?                                       # http(s):// (optional)
 150                          (?:(?:(?:(?:\w+\.)?youtube(?:-nocookie)?\.com/|
 151                             tube\.majestyc\.net/|
 152                             youtube\.googleapis\.com/)                        # the various hostnames, with wildcard subdomains
 153                          (?:.*?\#/)?                                          # handle anchor (#/) redirect urls
 154                          (?:                                                  # the various things that can precede the ID:
 155                              (?:(?:v|embed|e)/)                               # v/ or embed/ or e/
 156                              |(?:                                             # or the v= param in all its forms
 157                                  (?:(?:watch|movie)(?:_popup)?(?:\.php)?)?    # preceding watch(_popup|.php) or nothing (like /?v=xxxx)
 158                                  (?:\?|\#!?)                                  # the params delimiter ? or # or #!
 159                                  (?:.*?&)?                                    # any other preceding param (like /?s=tuff&v=xxxx)
 160                                  v=
 161                              )
 162                          ))
 163                          |youtu\.be/                                          # just youtu.be/xxxx
 164                          )
 165                      )?                                                       # all until now is optional -> you can pass the naked ID
 166                      ([0-9A-Za-z_-]{11})                                      # here is it! the YouTube video ID
 167                      (?(1).+)?                                                # if we found the ID, everything can follow
 168                      $"""
 169     _NEXT_URL_RE = r'[\?&]next_url=([^&]+)'
 170     # Listed in order of quality
 171     _available_formats = ['38', '37', '46', '22', '45', '35', '44', '34', '18', '43', '6', '5', '36', '17', '13',
 172                           # Apple HTTP Live Streaming
 173                           '96', '95', '94', '93', '92', '132', '151',
 174                           # 3D
 175                           '85', '84', '102', '83', '101', '82', '100',
 176                           # Dash video
 177                           '138', '137', '248', '136', '247', '135', '246',
 178                           '245', '244', '134', '243', '133', '242', '160',
 179                           # Dash audio
 180                           '141', '172', '140', '171', '139',
 181                           ]
 182     _available_formats_prefer_free = ['38', '46', '37', '45', '22', '44', '35', '43', '34', '18', '6', '5', '36', '17', '13',
 183                                       # Apple HTTP Live Streaming
 184                                       '96', '95', '94', '93', '92', '132', '151',
 185                                       # 3D
 186                                       '85', '102', '84', '101', '83', '100', '82',
 187                                       # Dash video
 188                                       '138', '248', '137', '247', '136', '246', '245',
 189                                       '244', '135', '243', '134', '242', '133', '160',
 190                                       # Dash audio
 191                                       '172', '141', '171', '140', '139',
 192                                       ]
 193     _video_formats_map = {
 194         'flv': ['35', '34', '6', '5'],
 195         '3gp': ['36', '17', '13'],
 196         'mp4': ['38', '37', '22', '18'],
 197         'webm': ['46', '45', '44', '43'],
 198     }
 199     _video_extensions = {
 200         '13': '3gp',
 201         '17': '3gp',
 202         '18': 'mp4',
 203         '22': 'mp4',
 204         '36': '3gp',
 205         '37': 'mp4',
 206         '38': 'mp4',
 207         '43': 'webm',
 208         '44': 'webm',
 209         '45': 'webm',
 210         '46': 'webm',
 211
 212         # 3d videos
 213         '82': 'mp4',
 214         '83': 'mp4',
 215         '84': 'mp4',
 216         '85': 'mp4',
 217         '100': 'webm',
 218         '101': 'webm',
 219         '102': 'webm',
 220
 221         # Apple HTTP Live Streaming
 222         '92': 'mp4',
 223         '93': 'mp4',
 224         '94': 'mp4',
 225         '95': 'mp4',
 226         '96': 'mp4',
 227         '132': 'mp4',
 228         '151': 'mp4',
 229
 230         # Dash mp4
 231         '133': 'mp4',
 232         '134': 'mp4',
 233         '135': 'mp4',
 234         '136': 'mp4',
 235         '137': 'mp4',
 236         '138': 'mp4',
 237         '139': 'mp4',
 238         '140': 'mp4',
 239         '141': 'mp4',
 240         '160': 'mp4',
 241
 242         # Dash webm
 243         '171': 'webm',
 244         '172': 'webm',
 245         '242': 'webm',
 246         '243': 'webm',
 247         '244': 'webm',
 248         '245': 'webm',
 249         '246': 'webm',
 250         '247': 'webm',
 251         '248': 'webm',
 252     }
 253     _video_dimensions = {
 254         '5': '240x400',
 255         '6': '???',
 256         '13': '???',
 257         '17': '144x176',
 258         '18': '360x640',
 259         '22': '720x1280',
 260         '34': '360x640',
 261         '35': '480x854',
 262         '36': '240x320',
 263         '37': '1080x1920',
 264         '38': '3072x4096',
 265         '43': '360x640',
 266         '44': '480x854',
 267         '45': '720x1280',
 268         '46': '1080x1920',
 269         '82': '360p',
 270         '83': '480p',
 271         '84': '720p',
 272         '85': '1080p',
 273         '92': '240p',
 274         '93': '360p',
 275         '94': '480p',
 276         '95': '720p',
 277         '96': '1080p',
 278         '100': '360p',
 279         '101': '480p',
 280         '102': '720p',
 281         '132': '240p',
 282         '151': '72p',
 283         '133': '240p',
 284         '134': '360p',
 285         '135': '480p',
 286         '136': '720p',
 287         '137': '1080p',
 288         '138': '>1080p',
 289         '139': '48k',
 290         '140': '128k',
 291         '141': '256k',
 292         '160': '192p',
 293         '171': '128k',
 294         '172': '256k',
 295         '242': '240p',
 296         '243': '360p',
 297         '244': '480p',
 298         '245': '480p',
 299         '246': '480p',
 300         '247': '720p',
 301         '248': '1080p',
 302     }
 303     _special_itags = {
 304         '82': '3D',
 305         '83': '3D',
 306         '84': '3D',
 307         '85': '3D',
 308         '100': '3D',
 309         '101': '3D',
 310         '102': '3D',
 311         '133': 'DASH Video',
 312         '134': 'DASH Video',
 313         '135': 'DASH Video',
 314         '136': 'DASH Video',
 315         '137': 'DASH Video',
 316         '138': 'DASH Video',
 317         '139': 'DASH Audio',
 318         '140': 'DASH Audio',
 319         '141': 'DASH Audio',
 320         '160': 'DASH Video',
 321         '171': 'DASH Audio',
 322         '172': 'DASH Audio',
 323         '242': 'DASH Video',
 324         '243': 'DASH Video',
 325         '244': 'DASH Video',
 326         '245': 'DASH Video',
 327         '246': 'DASH Video',
 328         '247': 'DASH Video',
 329         '248': 'DASH Video',
 330     }
 331
 332     IE_NAME = u'youtube'
 333     _TESTS = [
 334         {
 335             u"url":  u"http://www.youtube.com/watch?v=BaW_jenozKc",
 336             u"file":  u"BaW_jenozKc.mp4",
 337             u"info_dict": {
 338                 u"title": u"youtube-dl test video \"'/\\ä↭𝕐",
 339                 u"uploader": u"Philipp Hagemeister",
 340                 u"uploader_id": u"phihag",
 341                 u"upload_date": u"20121002",
 342                 u"description": u"test chars:  \"'/\\ä↭𝕐\n\nThis is a test video for youtube-dl.\n\nFor more information, contact phihag@phihag.de ."
 343             }
 344         },
 345         {
 346             u"url":  u"http://www.youtube.com/watch?v=1ltcDfZMA3U",
 347             u"file":  u"1ltcDfZMA3U.flv",
 348             u"note": u"Test VEVO video (#897)",
 349             u"info_dict": {
 350                 u"upload_date": u"20070518",
 351                 u"title": u"Maps - It Will Find You",
 352                 u"description": u"Music video by Maps performing It Will Find You.",
 353                 u"uploader": u"MuteUSA",
 354                 u"uploader_id": u"MuteUSA"
 355             }
 356         },
 357         {
 358             u"url":  u"http://www.youtube.com/watch?v=UxxajLWwzqY",
 359             u"file":  u"UxxajLWwzqY.mp4",
 360             u"note": u"Test generic use_cipher_signature video (#897)",
 361             u"info_dict": {
 362                 u"upload_date": u"20120506",
 363                 u"title": u"Icona Pop - I Love It (feat. Charli XCX) [OFFICIAL VIDEO]",
 364                 u"description": u"md5:bdac09887d209a4ed54b8f76b2bdaa8b",
 365                 u"uploader": u"Icona Pop",
 366                 u"uploader_id": u"IconaPop"
 367             }
 368         },
 369         {
 370             u"url":  u"https://www.youtube.com/watch?v=07FYdnEawAQ",
 371             u"file":  u"07FYdnEawAQ.mp4",
 372             u"note": u"Test VEVO video with age protection (#956)",
 373             u"info_dict": {
 374                 u"upload_date": u"20130703",
 375                 u"title": u"Justin Timberlake - Tunnel Vision (Explicit)",
 376                 u"description": u"md5:64249768eec3bc4276236606ea996373",
 377                 u"uploader": u"justintimberlakeVEVO",
 378                 u"uploader_id": u"justintimberlakeVEVO"
 379             }
 380         },
 381         {
 382             u'url': u'https://www.youtube.com/watch?v=TGi3HqYrWHE',
 383             u'file': u'TGi3HqYrWHE.mp4',
 384             u'note': u'm3u8 video',
 385             u'info_dict': {
 386                 u'title': u'Triathlon - Men - London 2012 Olympic Games',
 387                 u'description': u'- Men -  TR02 - Triathlon - 07 August 2012 - London 2012 Olympic Games',
 388                 u'uploader': u'olympic',
 389                 u'upload_date': u'20120807',
 390                 u'uploader_id': u'olympic',
 391             },
 392             u'params': {
 393                 u'skip_download': True,
 394             },
 395         },
 396     ]
 397
 398
 399     @classmethod
 400     def suitable(cls, url):
 401         """Receives a URL and returns True if suitable for this IE."""
 402         if YoutubePlaylistIE.suitable(url): return False
 403         return re.match(cls._VALID_URL, url, re.VERBOSE) is not None
 404
 405     def __init__(self, *args, **kwargs):
 406         super(YoutubeIE, self).__init__(*args, **kwargs)
 407         self._player_cache = {}
 408
 409     def report_video_webpage_download(self, video_id):
 410         """Report attempt to download video webpage."""
 411         self.to_screen(u'%s: Downloading video webpage' % video_id)
 412
 413     def report_video_info_webpage_download(self, video_id):
 414         """Report attempt to download video info webpage."""
 415         self.to_screen(u'%s: Downloading video info webpage' % video_id)
 416
 417     def report_information_extraction(self, video_id):
 418         """Report attempt to extract video information."""
 419         self.to_screen(u'%s: Extracting video information' % video_id)
 420
 421     def report_unavailable_format(self, video_id, format):
 422         """Report extracted video URL."""
 423         self.to_screen(u'%s: Format %s not available' % (video_id, format))
 424
 425     def report_rtmp_download(self):
 426         """Indicate the download will use the RTMP protocol."""
 427         self.to_screen(u'RTMP download detected')
 428
 429     def _extract_signature_function(self, video_id, player_url, slen):
 430         id_m = re.match(r'.*-(?P<id>[a-zA-Z0-9_-]+)\.(?P<ext>[a-z]+)$',
 431                         player_url)
 432         player_type = id_m.group('ext')
 433         player_id = id_m.group('id')
 434
 435         # Read from filesystem cache
 436         func_id = '%s_%s_%d' % (player_type, player_id, slen)
 437         assert os.path.basename(func_id) == func_id
 438         cache_dir = self._downloader.params.get('cachedir',
 439                                                 u'~/.youtube-dl/cache')
 440
 441         cache_enabled = cache_dir is not None
 442         if cache_enabled:
 443             cache_fn = os.path.join(os.path.expanduser(cache_dir),
 444                                     u'youtube-sigfuncs',
 445                                     func_id + '.json')
 446             try:
 447                 with io.open(cache_fn, 'r', encoding='utf-8') as cachef:
 448                     cache_spec = json.load(cachef)
 449                 return lambda s: u''.join(s[i] for i in cache_spec)
 450             except IOError:
 451                 pass  # No cache available
 452
 453         if player_type == 'js':
 454             code = self._download_webpage(
 455                 player_url, video_id,
 456                 note=u'Downloading %s player %s' % (player_type, player_id),
 457                 errnote=u'Download of %s failed' % player_url)
 458             res = self._parse_sig_js(code)
 459         elif player_type == 'swf':
 460             urlh = self._request_webpage(
 461                 player_url, video_id,
 462                 note=u'Downloading %s player %s' % (player_type, player_id),
 463                 errnote=u'Download of %s failed' % player_url)
 464             code = urlh.read()
 465             res = self._parse_sig_swf(code)
 466         else:
 467             assert False, 'Invalid player type %r' % player_type
 468
 469         if cache_enabled:
 470             try:
 471                 test_string = u''.join(map(compat_chr, range(slen)))
 472                 cache_res = res(test_string)
 473                 cache_spec = [ord(c) for c in cache_res]
 474                 try:
 475                     os.makedirs(os.path.dirname(cache_fn))
 476                 except OSError as ose:
 477                     if ose.errno != errno.EEXIST:
 478                         raise
 479                 write_json_file(cache_spec, cache_fn)
 480             except Exception:
 481                 tb = traceback.format_exc()
 482                 self._downloader.report_warning(
 483                     u'Writing cache to %r failed: %s' % (cache_fn, tb))
 484
 485         return res
 486
 487     def _print_sig_code(self, func, slen):
 488         def gen_sig_code(idxs):
 489             def _genslice(start, end, step):
 490                 starts = u'' if start == 0 else str(start)
 491                 ends = (u':%d' % (end+step)) if end + step >= 0 else u':'
 492                 steps = u'' if step == 1 else (u':%d' % step)
 493                 return u's[%s%s%s]' % (starts, ends, steps)
 494
 495             step = None
 496             start = '(Never used)'  # Quelch pyflakes warnings - start will be
 497                                     # set as soon as step is set
 498             for i, prev in zip(idxs[1:], idxs[:-1]):
 499                 if step is not None:
 500                     if i - prev == step:
 501                         continue
 502                     yield _genslice(start, prev, step)
 503                     step = None
 504                     continue
 505                 if i - prev in [-1, 1]:
 506                     step = i - prev
 507                     start = prev
 508                     continue
 509                 else:
 510                     yield u's[%d]' % prev
 511             if step is None:
 512                 yield u's[%d]' % i
 513             else:
 514                 yield _genslice(start, i, step)
 515
 516         test_string = u''.join(map(compat_chr, range(slen)))
 517         cache_res = func(test_string)
 518         cache_spec = [ord(c) for c in cache_res]
 519         expr_code = u' + '.join(gen_sig_code(cache_spec))
 520         code = u'if len(s) == %d:\n    return %s\n' % (slen, expr_code)
 521         self.to_screen(u'Extracted signature function:\n' + code)
 522
 523     def _parse_sig_js(self, jscode):
 524         funcname = self._search_regex(
 525             r'signature=([a-zA-Z]+)', jscode,
 526             u'Initial JS player signature function name')
 527
 528         functions = {}
 529
 530         def argidx(varname):
 531             return string.lowercase.index(varname)
 532
 533         def interpret_statement(stmt, local_vars, allow_recursion=20):
 534             if allow_recursion < 0:
 535                 raise ExtractorError(u'Recursion limit reached')
 536
 537             if stmt.startswith(u'var '):
 538                 stmt = stmt[len(u'var '):]
 539             ass_m = re.match(r'^(?P<out>[a-z]+)(?:\[(?P<index>[^\]]+)\])?' +
 540                              r'=(?P<expr>.*)$', stmt)
 541             if ass_m:
 542                 if ass_m.groupdict().get('index'):
 543                     def assign(val):
 544                         lvar = local_vars[ass_m.group('out')]
 545                         idx = interpret_expression(ass_m.group('index'),
 546                                                    local_vars, allow_recursion)
 547                         assert isinstance(idx, int)
 548                         lvar[idx] = val
 549                         return val
 550                     expr = ass_m.group('expr')
 551                 else:
 552                     def assign(val):
 553                         local_vars[ass_m.group('out')] = val
 554                         return val
 555                     expr = ass_m.group('expr')
 556             elif stmt.startswith(u'return '):
 557                 assign = lambda v: v
 558                 expr = stmt[len(u'return '):]
 559             else:
 560                 raise ExtractorError(
 561                     u'Cannot determine left side of statement in %r' % stmt)
 562
 563             v = interpret_expression(expr, local_vars, allow_recursion)
 564             return assign(v)
 565
 566         def interpret_expression(expr, local_vars, allow_recursion):
 567             if expr.isdigit():
 568                 return int(expr)
 569
 570             if expr.isalpha():
 571                 return local_vars[expr]
 572
 573             m = re.match(r'^(?P<in>[a-z]+)\.(?P<member>.*)$', expr)
 574             if m:
 575                 member = m.group('member')
 576                 val = local_vars[m.group('in')]
 577                 if member == 'split("")':
 578                     return list(val)
 579                 if member == 'join("")':
 580                     return u''.join(val)
 581                 if member == 'length':
 582                     return len(val)
 583                 if member == 'reverse()':
 584                     return val[::-1]
 585                 slice_m = re.match(r'slice\((?P<idx>.*)\)', member)
 586                 if slice_m:
 587                     idx = interpret_expression(
 588                         slice_m.group('idx'), local_vars, allow_recursion-1)
 589                     return val[idx:]
 590
 591             m = re.match(
 592                 r'^(?P<in>[a-z]+)\[(?P<idx>.+)\]$', expr)
 593             if m:
 594                 val = local_vars[m.group('in')]
 595                 idx = interpret_expression(m.group('idx'), local_vars,
 596                                            allow_recursion-1)
 597                 return val[idx]
 598
 599             m = re.match(r'^(?P<a>.+?)(?P<op>[%])(?P<b>.+?)$', expr)
 600             if m:
 601                 a = interpret_expression(m.group('a'),
 602                                          local_vars, allow_recursion)
 603                 b = interpret_expression(m.group('b'),
 604                                          local_vars, allow_recursion)
 605                 return a % b
 606
 607             m = re.match(
 608                 r'^(?P<func>[a-zA-Z]+)\((?P<args>[a-z0-9,]+)\)$', expr)
 609             if m:
 610                 fname = m.group('func')
 611                 if fname not in functions:
 612                     functions[fname] = extract_function(fname)
 613                 argvals = [int(v) if v.isdigit() else local_vars[v]
 614                            for v in m.group('args').split(',')]
 615                 return functions[fname](argvals)
 616             raise ExtractorError(u'Unsupported JS expression %r' % expr)
 617
 618         def extract_function(funcname):
 619             func_m = re.search(
 620                 r'function ' + re.escape(funcname) +
 621                 r'\((?P<args>[a-z,]+)\){(?P<code>[^}]+)}',
 622                 jscode)
 623             argnames = func_m.group('args').split(',')
 624
 625             def resf(args):
 626                 local_vars = dict(zip(argnames, args))
 627                 for stmt in func_m.group('code').split(';'):
 628                     res = interpret_statement(stmt, local_vars)
 629                 return res
 630             return resf
 631
 632         initial_function = extract_function(funcname)
 633         return lambda s: initial_function([s])
 634
 635     def _parse_sig_swf(self, file_contents):
 636         if file_contents[1:3] != b'WS':
 637             raise ExtractorError(
 638                 u'Not an SWF file; header is %r' % file_contents[:3])
 639         if file_contents[:1] == b'C':
 640             content = zlib.decompress(file_contents[8:])
 641         else:
 642             raise NotImplementedError(u'Unsupported compression format %r' %
 643                                       file_contents[:1])
 644
 645         def extract_tags(content):
 646             pos = 0
 647             while pos < len(content):
 648                 header16 = struct.unpack('<H', content[pos:pos+2])[0]
 649                 pos += 2
 650                 tag_code = header16 >> 6
 651                 tag_len = header16 & 0x3f
 652                 if tag_len == 0x3f:
 653                     tag_len = struct.unpack('<I', content[pos:pos+4])[0]
 654                     pos += 4
 655                 assert pos+tag_len <= len(content)
 656                 yield (tag_code, content[pos:pos+tag_len])
 657                 pos += tag_len
 658
 659         code_tag = next(tag
 660                         for tag_code, tag in extract_tags(content)
 661                         if tag_code == 82)
 662         p = code_tag.index(b'\0', 4) + 1
 663         code_reader = io.BytesIO(code_tag[p:])
 664
 665         # Parse ABC (AVM2 ByteCode)
 666         def read_int(reader=None):
 667             if reader is None:
 668                 reader = code_reader
 669             res = 0
 670             shift = 0
 671             for _ in range(5):
 672                 buf = reader.read(1)
 673                 assert len(buf) == 1
 674                 b = struct.unpack('<B', buf)[0]
 675                 res = res | ((b & 0x7f) << shift)
 676                 if b & 0x80 == 0:
 677                     break
 678                 shift += 7
 679             return res
 680
 681         def u30(reader=None):
 682             res = read_int(reader)
 683             assert res & 0xf0000000 == 0
 684             return res
 685         u32 = read_int
 686
 687         def s32(reader=None):
 688             v = read_int(reader)
 689             if v & 0x80000000 != 0:
 690                 v = - ((v ^ 0xffffffff) + 1)
 691             return v
 692
 693         def read_string(reader=None):
 694             if reader is None:
 695                 reader = code_reader
 696             slen = u30(reader)
 697             resb = reader.read(slen)
 698             assert len(resb) == slen
 699             return resb.decode('utf-8')
 700
 701         def read_bytes(count, reader=None):
 702             if reader is None:
 703                 reader = code_reader
 704             resb = reader.read(count)
 705             assert len(resb) == count
 706             return resb
 707
 708         def read_byte(reader=None):
 709             resb = read_bytes(1, reader=reader)
 710             res = struct.unpack('<B', resb)[0]
 711             return res
 712
 713         # minor_version + major_version
 714         read_bytes(2 + 2)
 715
 716         # Constant pool
 717         int_count = u30()
 718         for _c in range(1, int_count):
 719             s32()
 720         uint_count = u30()
 721         for _c in range(1, uint_count):
 722             u32()
 723         double_count = u30()
 724         read_bytes((double_count-1) * 8)
 725         string_count = u30()
 726         constant_strings = [u'']
 727         for _c in range(1, string_count):
 728             s = read_string()
 729             constant_strings.append(s)
 730         namespace_count = u30()
 731         for _c in range(1, namespace_count):
 732             read_bytes(1)  # kind
 733             u30()  # name
 734         ns_set_count = u30()
 735         for _c in range(1, ns_set_count):
 736             count = u30()
 737             for _c2 in range(count):
 738                 u30()
 739         multiname_count = u30()
 740         MULTINAME_SIZES = {
 741             0x07: 2,  # QName
 742             0x0d: 2,  # QNameA
 743             0x0f: 1,  # RTQName
 744             0x10: 1,  # RTQNameA
 745             0x11: 0,  # RTQNameL
 746             0x12: 0,  # RTQNameLA
 747             0x09: 2,  # Multiname
 748             0x0e: 2,  # MultinameA
 749             0x1b: 1,  # MultinameL
 750             0x1c: 1,  # MultinameLA
 751         }
 752         multinames = [u'']
 753         for _c in range(1, multiname_count):
 754             kind = u30()
 755             assert kind in MULTINAME_SIZES, u'Invalid multiname kind %r' % kind
 756             if kind == 0x07:
 757                 u30()  # namespace_idx
 758                 name_idx = u30()
 759                 multinames.append(constant_strings[name_idx])
 760             else:
 761                 multinames.append('[MULTINAME kind: %d]' % kind)
 762                 for _c2 in range(MULTINAME_SIZES[kind]):
 763                     u30()
 764
 765         # Methods
 766         method_count = u30()
 767         MethodInfo = collections.namedtuple(
 768             'MethodInfo',
 769             ['NEED_ARGUMENTS', 'NEED_REST'])
 770         method_infos = []
 771         for method_id in range(method_count):
 772             param_count = u30()
 773             u30()  # return type
 774             for _ in range(param_count):
 775                 u30()  # param type
 776             u30()  # name index (always 0 for youtube)
 777             flags = read_byte()
 778             if flags & 0x08 != 0:
 779                 # Options present
 780                 option_count = u30()
 781                 for c in range(option_count):
 782                     u30()  # val
 783                     read_bytes(1)  # kind
 784             if flags & 0x80 != 0:
 785                 # Param names present
 786                 for _ in range(param_count):
 787                     u30()  # param name
 788             mi = MethodInfo(flags & 0x01 != 0, flags & 0x04 != 0)
 789             method_infos.append(mi)
 790
 791         # Metadata
 792         metadata_count = u30()
 793         for _c in range(metadata_count):
 794             u30()  # name
 795             item_count = u30()
 796             for _c2 in range(item_count):
 797                 u30()  # key
 798                 u30()  # value
 799
 800         def parse_traits_info():
 801             trait_name_idx = u30()
 802             kind_full = read_byte()
 803             kind = kind_full & 0x0f
 804             attrs = kind_full >> 4
 805             methods = {}
 806             if kind in [0x00, 0x06]:  # Slot or Const
 807                 u30()  # Slot id
 808                 u30()  # type_name_idx
 809                 vindex = u30()
 810                 if vindex != 0:
 811                     read_byte()  # vkind
 812             elif kind in [0x01, 0x02, 0x03]:  # Method / Getter / Setter
 813                 u30()  # disp_id
 814                 method_idx = u30()
 815                 methods[multinames[trait_name_idx]] = method_idx
 816             elif kind == 0x04:  # Class
 817                 u30()  # slot_id
 818                 u30()  # classi
 819             elif kind == 0x05:  # Function
 820                 u30()  # slot_id
 821                 function_idx = u30()
 822                 methods[function_idx] = multinames[trait_name_idx]
 823             else:
 824                 raise ExtractorError(u'Unsupported trait kind %d' % kind)
 825
 826             if attrs & 0x4 != 0:  # Metadata present
 827                 metadata_count = u30()
 828                 for _c3 in range(metadata_count):
 829                     u30()  # metadata index
 830
 831             return methods
 832
 833         # Classes
 834         TARGET_CLASSNAME = u'SignatureDecipher'
 835         searched_idx = multinames.index(TARGET_CLASSNAME)
 836         searched_class_id = None
 837         class_count = u30()
 838         for class_id in range(class_count):
 839             name_idx = u30()
 840             if name_idx == searched_idx:
 841                 # We found the class we're looking for!
 842                 searched_class_id = class_id
 843             u30()  # super_name idx
 844             flags = read_byte()
 845             if flags & 0x08 != 0:  # Protected namespace is present
 846                 u30()  # protected_ns_idx
 847             intrf_count = u30()
 848             for _c2 in range(intrf_count):
 849                 u30()
 850             u30()  # iinit
 851             trait_count = u30()
 852             for _c2 in range(trait_count):
 853                 parse_traits_info()
 854
 855         if searched_class_id is None:
 856             raise ExtractorError(u'Target class %r not found' %
 857                                  TARGET_CLASSNAME)
 858
 859         method_names = {}
 860         method_idxs = {}
 861         for class_id in range(class_count):
 862             u30()  # cinit
 863             trait_count = u30()
 864             for _c2 in range(trait_count):
 865                 trait_methods = parse_traits_info()
 866                 if class_id == searched_class_id:
 867                     method_names.update(trait_methods.items())
 868                     method_idxs.update(dict(
 869                         (idx, name)
 870                         for name, idx in trait_methods.items()))
 871
 872         # Scripts
 873         script_count = u30()
 874         for _c in range(script_count):
 875             u30()  # init
 876             trait_count = u30()
 877             for _c2 in range(trait_count):
 878                 parse_traits_info()
 879
 880         # Method bodies
 881         method_body_count = u30()
 882         Method = collections.namedtuple('Method', ['code', 'local_count'])
 883         methods = {}
 884         for _c in range(method_body_count):
 885             method_idx = u30()
 886             u30()  # max_stack
 887             local_count = u30()
 888             u30()  # init_scope_depth
 889             u30()  # max_scope_depth
 890             code_length = u30()
 891             code = read_bytes(code_length)
 892             if method_idx in method_idxs:
 893                 m = Method(code, local_count)
 894                 methods[method_idxs[method_idx]] = m
 895             exception_count = u30()
 896             for _c2 in range(exception_count):
 897                 u30()  # from
 898                 u30()  # to
 899                 u30()  # target
 900                 u30()  # exc_type
 901                 u30()  # var_name
 902             trait_count = u30()
 903             for _c2 in range(trait_count):
 904                 parse_traits_info()
 905
 906         assert p + code_reader.tell() == len(code_tag)
 907         assert len(methods) == len(method_idxs)
 908
 909         method_pyfunctions = {}
 910
 911         def extract_function(func_name):
 912             if func_name in method_pyfunctions:
 913                 return method_pyfunctions[func_name]
 914             if func_name not in methods:
 915                 raise ExtractorError(u'Cannot find function %r' % func_name)
 916             m = methods[func_name]
 917
 918             def resfunc(args):
 919                 registers = ['(this)'] + list(args) + [None] * m.local_count
 920                 stack = []
 921                 coder = io.BytesIO(m.code)
 922                 while True:
 923                     opcode = struct.unpack('!B', coder.read(1))[0]
 924                     if opcode == 36:  # pushbyte
 925                         v = struct.unpack('!B', coder.read(1))[0]
 926                         stack.append(v)
 927                     elif opcode == 44:  # pushstring
 928                         idx = u30(coder)
 929                         stack.append(constant_strings[idx])
 930                     elif opcode == 48:  # pushscope
 931                         # We don't implement the scope register, so we'll just
 932                         # ignore the popped value
 933                         stack.pop()
 934                     elif opcode == 70:  # callproperty
 935                         index = u30(coder)
 936                         mname = multinames[index]
 937                         arg_count = u30(coder)
 938                         args = list(reversed(
 939                             [stack.pop() for _ in range(arg_count)]))
 940                         obj = stack.pop()
 941                         if mname == u'split':
 942                             assert len(args) == 1
 943                             assert isinstance(args[0], compat_str)
 944                             assert isinstance(obj, compat_str)
 945                             if args[0] == u'':
 946                                 res = list(obj)
 947                             else:
 948                                 res = obj.split(args[0])
 949                             stack.append(res)
 950                         elif mname == u'slice':
 951                             assert len(args) == 1
 952                             assert isinstance(args[0], int)
 953                             assert isinstance(obj, list)
 954                             res = obj[args[0]:]
 955                             stack.append(res)
 956                         elif mname == u'join':
 957                             assert len(args) == 1
 958                             assert isinstance(args[0], compat_str)
 959                             assert isinstance(obj, list)
 960                             res = args[0].join(obj)
 961                             stack.append(res)
 962                         elif mname in method_pyfunctions:
 963                             stack.append(method_pyfunctions[mname](args))
 964                         else:
 965                             raise NotImplementedError(
 966                                 u'Unsupported property %r on %r'
 967                                 % (mname, obj))
 968                     elif opcode == 72:  # returnvalue
 969                         res = stack.pop()
 970                         return res
 971                     elif opcode == 79:  # callpropvoid
 972                         index = u30(coder)
 973                         mname = multinames[index]
 974                         arg_count = u30(coder)
 975                         args = list(reversed(
 976                             [stack.pop() for _ in range(arg_count)]))
 977                         obj = stack.pop()
 978                         if mname == u'reverse':
 979                             assert isinstance(obj, list)
 980                             obj.reverse()
 981                         else:
 982                             raise NotImplementedError(
 983                                 u'Unsupported (void) property %r on %r'
 984                                 % (mname, obj))
 985                     elif opcode == 93:  # findpropstrict
 986                         index = u30(coder)
 987                         mname = multinames[index]
 988                         res = extract_function(mname)
 989                         stack.append(res)
 990                     elif opcode == 97:  # setproperty
 991                         index = u30(coder)
 992                         value = stack.pop()
 993                         idx = stack.pop()
 994                         obj = stack.pop()
 995                         assert isinstance(obj, list)
 996                         assert isinstance(idx, int)
 997                         obj[idx] = value
 998                     elif opcode == 98:  # getlocal
 999                         index = u30(coder)
1000                         stack.append(registers[index])
1001                     elif opcode == 99:  # setlocal
1002                         index = u30(coder)
1003                         value = stack.pop()
1004                         registers[index] = value
1005                     elif opcode == 102:  # getproperty
1006                         index = u30(coder)
1007                         pname = multinames[index]
1008                         if pname == u'length':
1009                             obj = stack.pop()
1010                             assert isinstance(obj, list)
1011                             stack.append(len(obj))
1012                         else:  # Assume attribute access
1013                             idx = stack.pop()
1014                             assert isinstance(idx, int)
1015                             obj = stack.pop()
1016                             assert isinstance(obj, list)
1017                             stack.append(obj[idx])
1018                     elif opcode == 128:  # coerce
1019                         u30(coder)
1020                     elif opcode == 133:  # coerce_s
1021                         assert isinstance(stack[-1], (type(None), compat_str))
1022                     elif opcode == 164:  # modulo
1023                         value2 = stack.pop()
1024                         value1 = stack.pop()
1025                         res = value1 % value2
1026                         stack.append(res)
1027                     elif opcode == 208:  # getlocal_0
1028                         stack.append(registers[0])
1029                     elif opcode == 209:  # getlocal_1
1030                         stack.append(registers[1])
1031                     elif opcode == 210:  # getlocal_2
1032                         stack.append(registers[2])
1033                     elif opcode == 211:  # getlocal_3
1034                         stack.append(registers[3])
1035                     elif opcode == 214:  # setlocal_2
1036                         registers[2] = stack.pop()
1037                     elif opcode == 215:  # setlocal_3
1038                         registers[3] = stack.pop()
1039                     else:
1040                         raise NotImplementedError(
1041                             u'Unsupported opcode %d' % opcode)
1042
1043             method_pyfunctions[func_name] = resfunc
1044             return resfunc
1045
1046         initial_function = extract_function(u'decipher')
1047         return lambda s: initial_function([s])
1048
1049     def _decrypt_signature(self, s, video_id, player_url, age_gate=False):
1050         """Turn the encrypted s field into a working signature"""
1051
1052         if player_url is not None:
1053             try:
1054                 if player_url not in self._player_cache:
1055                     func = self._extract_signature_function(
1056                         video_id, player_url, len(s)
1057                     )
1058                     self._player_cache[player_url] = func
1059                 func = self._player_cache[player_url]
1060                 if self._downloader.params.get('youtube_print_sig_code'):
1061                     self._print_sig_code(func, len(s))
1062                 return func(s)
1063             except Exception:
1064                 tb = traceback.format_exc()
1065                 self._downloader.report_warning(
1066                     u'Automatic signature extraction failed: ' + tb)
1067
1068             self._downloader.report_warning(
1069                 u'Warning: Falling back to static signature algorithm')
1070
1071         return self._static_decrypt_signature(
1072             s, video_id, player_url, age_gate)
1073
1074     def _static_decrypt_signature(self, s, video_id, player_url, age_gate):
1075         if age_gate:
1076             # The videos with age protection use another player, so the
1077             # algorithms can be different.
1078             if len(s) == 86:
1079                 return s[2:63] + s[82] + s[64:82] + s[63]
1080
1081         if len(s) == 93:
1082             return s[86:29:-1] + s[88] + s[28:5:-1]
1083         elif len(s) == 92:
1084             return s[25] + s[3:25] + s[0] + s[26:42] + s[79] + s[43:79] + s[91] + s[80:83]
1085         elif len(s) == 91:
1086             return s[84:27:-1] + s[86] + s[26:5:-1]
1087         elif len(s) == 90:
1088             return s[25] + s[3:25] + s[2] + s[26:40] + s[77] + s[41:77] + s[89] + s[78:81]
1089         elif len(s) == 89:
1090             return s[84:78:-1] + s[87] + s[77:60:-1] + s[0] + s[59:3:-1]
1091         elif len(s) == 88:
1092             return s[7:28] + s[87] + s[29:45] + s[55] + s[46:55] + s[2] + s[56:87] + s[28]
1093         elif len(s) == 87:
1094             return s[6:27] + s[4] + s[28:39] + s[27] + s[40:59] + s[2] + s[60:]
1095         elif len(s) == 86:
1096             return s[80:72:-1] + s[16] + s[71:39:-1] + s[72] + s[38:16:-1] + s[82] + s[15::-1]
1097         elif len(s) == 85:
1098             return s[3:11] + s[0] + s[12:55] + s[84] + s[56:84]
1099         elif len(s) == 84:
1100             return s[78:70:-1] + s[14] + s[69:37:-1] + s[70] + s[36:14:-1] + s[80] + s[:14][::-1]
1101         elif len(s) == 83:
1102             return s[80:63:-1] + s[0] + s[62:0:-1] + s[63]
1103         elif len(s) == 82:
1104             return s[80:73:-1] + s[81] + s[72:54:-1] + s[2] + s[53:43:-1] + s[0] + s[42:2:-1] + s[43] + s[1] + s[54]
1105         elif len(s) == 81:
1106             return s[56] + s[79:56:-1] + s[41] + s[55:41:-1] + s[80] + s[40:34:-1] + s[0] + s[33:29:-1] + s[34] + s[28:9:-1] + s[29] + s[8:0:-1] + s[9]
1107         elif len(s) == 80:
1108             return s[1:19] + s[0] + s[20:68] + s[19] + s[69:80]
1109         elif len(s) == 79:
1110             return s[54] + s[77:54:-1] + s[39] + s[53:39:-1] + s[78] + s[38:34:-1] + s[0] + s[33:29:-1] + s[34] + s[28:9:-1] + s[29] + s[8:0:-1] + s[9]
1111
1112         else:
1113             raise ExtractorError(u'Unable to decrypt signature, key length %d not supported; retrying might work' % (len(s)))
1114
1115     def _get_available_subtitles(self, video_id):
1116         try:
1117             sub_list = self._download_webpage(
1118                 'http://video.google.com/timedtext?hl=en&type=list&v=%s' % video_id,
1119                 video_id, note=False)
1120         except ExtractorError as err:
1121             self._downloader.report_warning(u'unable to download video subtitles: %s' % compat_str(err))
1122             return {}
1123         lang_list = re.findall(r'name="([^"]*)"[^>]+lang_code="([\w\-]+)"', sub_list)
1124
1125         sub_lang_list = {}
1126         for l in lang_list:
1127             lang = l[1]
1128             params = compat_urllib_parse.urlencode({
1129                 'lang': lang,
1130                 'v': video_id,
1131                 'fmt': self._downloader.params.get('subtitlesformat'),
1132             })
1133             url = u'http://www.youtube.com/api/timedtext?' + params
1134             sub_lang_list[lang] = url
1135         if not sub_lang_list:
1136             self._downloader.report_warning(u'video doesn\'t have subtitles')
1137             return {}
1138         return sub_lang_list
1139
1140     def _get_available_automatic_caption(self, video_id, webpage):
1141         """We need the webpage for getting the captions url, pass it as an
1142            argument to speed up the process."""
1143         sub_format = self._downloader.params.get('subtitlesformat')
1144         self.to_screen(u'%s: Looking for automatic captions' % video_id)
1145         mobj = re.search(r';ytplayer.config = ({.*?});', webpage)
1146         err_msg = u'Couldn\'t find automatic captions for %s' % video_id
1147         if mobj is None:
1148             self._downloader.report_warning(err_msg)
1149             return {}
1150         player_config = json.loads(mobj.group(1))
1151         try:
1152             args = player_config[u'args']
1153             caption_url = args[u'ttsurl']
1154             timestamp = args[u'timestamp']
1155             # We get the available subtitles
1156             list_params = compat_urllib_parse.urlencode({
1157                 'type': 'list',
1158                 'tlangs': 1,
1159                 'asrs': 1,
1160             })
1161             list_url = caption_url + '&' + list_params
1162             list_page = self._download_webpage(list_url, video_id)
1163             caption_list = xml.etree.ElementTree.fromstring(list_page.encode('utf-8'))
1164             original_lang_node = caption_list.find('track')
1165             if original_lang_node.attrib.get('kind') != 'asr' :
1166                 self._downloader.report_warning(u'Video doesn\'t have automatic captions')
1167                 return {}
1168             original_lang = original_lang_node.attrib['lang_code']
1169
1170             sub_lang_list = {}
1171             for lang_node in caption_list.findall('target'):
1172                 sub_lang = lang_node.attrib['lang_code']
1173                 params = compat_urllib_parse.urlencode({
1174                     'lang': original_lang,
1175                     'tlang': sub_lang,
1176                     'fmt': sub_format,
1177                     'ts': timestamp,
1178                     'kind': 'asr',
1179                 })
1180                 sub_lang_list[sub_lang] = caption_url + '&' + params
1181             return sub_lang_list
1182         # An extractor error can be raise by the download process if there are
1183         # no automatic captions but there are subtitles
1184         except (KeyError, ExtractorError):
1185             self._downloader.report_warning(err_msg)
1186             return {}
1187
1188     def _print_formats(self, formats):
1189         print('Available formats:')
1190         for x in formats:
1191             print('%s\t:\t%s\t[%s]%s' %(x, self._video_extensions.get(x, 'flv'),
1192                                         self._video_dimensions.get(x, '???'),
1193                                         ' ('+self._special_itags[x]+')' if x in self._special_itags else ''))
1194
1195     def _extract_id(self, url):
1196         mobj = re.match(self._VALID_URL, url, re.VERBOSE)
1197         if mobj is None:
1198             raise ExtractorError(u'Invalid URL: %s' % url)
1199         video_id = mobj.group(2)
1200         return video_id
1201
1202     def _get_video_url_list(self, url_map):
1203         """
1204         Transform a dictionary in the format {itag:url} to a list of (itag, url)
1205         with the requested formats.
1206         """
1207         req_format = self._downloader.params.get('format', None)
1208         format_limit = self._downloader.params.get('format_limit', None)
1209         available_formats = self._available_formats_prefer_free if self._downloader.params.get('prefer_free_formats', False) else self._available_formats
1210         if format_limit is not None and format_limit in available_formats:
1211             format_list = available_formats[available_formats.index(format_limit):]
1212         else:
1213             format_list = available_formats
1214         existing_formats = [x for x in format_list if x in url_map]
1215         if len(existing_formats) == 0:
1216             raise ExtractorError(u'no known formats available for video')
1217         if self._downloader.params.get('listformats', None):
1218             self._print_formats(existing_formats)
1219             return
1220         if req_format is None or req_format == 'best':
1221             video_url_list = [(existing_formats[0], url_map[existing_formats[0]])] # Best quality
1222         elif req_format == 'worst':
1223             video_url_list = [(existing_formats[-1], url_map[existing_formats[-1]])] # worst quality
1224         elif req_format in ('-1', 'all'):
1225             video_url_list = [(f, url_map[f]) for f in existing_formats] # All formats
1226         else:
1227             # Specific formats. We pick the first in a slash-delimeted sequence.
1228             # Format can be specified as itag or 'mp4' or 'flv' etc. We pick the highest quality
1229             # available in the specified format. For example,
1230             # if '1/2/3/4' is requested and '2' and '4' are available, we pick '2'.
1231             # if '1/mp4/3/4' is requested and '1' and '5' (is a mp4) are available, we pick '1'.
1232             # if '1/mp4/3/4' is requested and '4' and '5' (is a mp4) are available, we pick '5'.
1233             req_formats = req_format.split('/')
1234             video_url_list = None
1235             for rf in req_formats:
1236                 if rf in url_map:
1237                     video_url_list = [(rf, url_map[rf])]
1238                     break
1239                 if rf in self._video_formats_map:
1240                     for srf in self._video_formats_map[rf]:
1241                         if srf in url_map:
1242                             video_url_list = [(srf, url_map[srf])]
1243                             break
1244                     else:
1245                         continue
1246                     break
1247             if video_url_list is None:
1248                 raise ExtractorError(u'requested format not available')
1249         return video_url_list
1250
1251     def _extract_from_m3u8(self, manifest_url, video_id):
1252         url_map = {}
1253         def _get_urls(_manifest):
1254             lines = _manifest.split('\n')
1255             urls = filter(lambda l: l and not l.startswith('#'),
1256                             lines)
1257             return urls
1258         manifest = self._download_webpage(manifest_url, video_id, u'Downloading formats manifest')
1259         formats_urls = _get_urls(manifest)
1260         for format_url in formats_urls:
1261             itag = self._search_regex(r'itag/(\d+?)/', format_url, 'itag')
1262             url_map[itag] = format_url
1263         return url_map
1264
1265     def _real_extract(self, url):
1266         if re.match(r'(?:https?://)?[^/]+/watch\?feature=[a-z_]+$', url):
1267             self._downloader.report_warning(u'Did you forget to quote the URL? Remember that & is a meta-character in most shells, so you want to put the URL in quotes, like  youtube-dl \'http://www.youtube.com/watch?feature=foo&v=BaW_jenozKc\' (or simply  youtube-dl BaW_jenozKc  ).')
1268
1269         # Extract original video URL from URL with redirection, like age verification, using next_url parameter
1270         mobj = re.search(self._NEXT_URL_RE, url)
1271         if mobj:
1272             url = 'https://www.youtube.com/' + compat_urllib_parse.unquote(mobj.group(1)).lstrip('/')
1273         video_id = self._extract_id(url)
1274
1275         # Get video webpage
1276         self.report_video_webpage_download(video_id)
1277         url = 'https://www.youtube.com/watch?v=%s&gl=US&hl=en&has_verified=1' % video_id
1278         request = compat_urllib_request.Request(url)
1279         try:
1280             video_webpage_bytes = compat_urllib_request.urlopen(request).read()
1281         except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
1282             raise ExtractorError(u'Unable to download video webpage: %s' % compat_str(err))
1283
1284         video_webpage = video_webpage_bytes.decode('utf-8', 'ignore')
1285
1286         # Attempt to extract SWF player URL
1287         mobj = re.search(r'swfConfig.*?"(https?:\\/\\/.*?watch.*?-.*?\.swf)"', video_webpage)
1288         if mobj is not None:
1289             player_url = re.sub(r'\\(.)', r'\1', mobj.group(1))
1290         else:
1291             player_url = None
1292
1293         # Get video info
1294         self.report_video_info_webpage_download(video_id)
1295         if re.search(r'player-age-gate-content">', video_webpage) is not None:
1296             self.report_age_confirmation()
1297             age_gate = True
1298             # We simulate the access to the video from www.youtube.com/v/{video_id}
1299             # this can be viewed without login into Youtube
1300             data = compat_urllib_parse.urlencode({'video_id': video_id,
1301                                                   'el': 'embedded',
1302                                                   'gl': 'US',
1303                                                   'hl': 'en',
1304                                                   'eurl': 'https://youtube.googleapis.com/v/' + video_id,
1305                                                   'asv': 3,
1306                                                   'sts':'1588',
1307                                                   })
1308             video_info_url = 'https://www.youtube.com/get_video_info?' + data
1309             video_info_webpage = self._download_webpage(video_info_url, video_id,
1310                                     note=False,
1311                                     errnote='unable to download video info webpage')
1312             video_info = compat_parse_qs(video_info_webpage)
1313         else:
1314             age_gate = False
1315             for el_type in ['&el=embedded', '&el=detailpage', '&el=vevo', '']:
1316                 video_info_url = ('https://www.youtube.com/get_video_info?&video_id=%s%s&ps=default&eurl=&gl=US&hl=en'
1317                         % (video_id, el_type))
1318                 video_info_webpage = self._download_webpage(video_info_url, video_id,
1319                                         note=False,
1320                                         errnote='unable to download video info webpage')
1321                 video_info = compat_parse_qs(video_info_webpage)
1322                 if 'token' in video_info:
1323                     break
1324         if 'token' not in video_info:
1325             if 'reason' in video_info:
1326                 raise ExtractorError(u'YouTube said: %s' % video_info['reason'][0], expected=True)
1327             else:
1328                 raise ExtractorError(u'"token" parameter not in video info for unknown reason')
1329
1330         # Check for "rental" videos
1331         if 'ypc_video_rental_bar_text' in video_info and 'author' not in video_info:
1332             raise ExtractorError(u'"rental" videos not supported')
1333
1334         # Start extracting information
1335         self.report_information_extraction(video_id)
1336
1337         # uploader
1338         if 'author' not in video_info:
1339             raise ExtractorError(u'Unable to extract uploader name')
1340         video_uploader = compat_urllib_parse.unquote_plus(video_info['author'][0])
1341
1342         # uploader_id
1343         video_uploader_id = None
1344         mobj = re.search(r'<link itemprop="url" href="http://www.youtube.com/(?:user|channel)/([^"]+)">', video_webpage)
1345         if mobj is not None:
1346             video_uploader_id = mobj.group(1)
1347         else:
1348             self._downloader.report_warning(u'unable to extract uploader nickname')
1349
1350         # title
1351         if 'title' not in video_info:
1352             raise ExtractorError(u'Unable to extract video title')
1353         video_title = compat_urllib_parse.unquote_plus(video_info['title'][0])
1354
1355         # thumbnail image
1356         # We try first to get a high quality image:
1357         m_thumb = re.search(r'<span itemprop="thumbnail".*?href="(.*?)">',
1358                             video_webpage, re.DOTALL)
1359         if m_thumb is not None:
1360             video_thumbnail = m_thumb.group(1)
1361         elif 'thumbnail_url' not in video_info:
1362             self._downloader.report_warning(u'unable to extract video thumbnail')
1363             video_thumbnail = ''
1364         else:   # don't panic if we can't find it
1365             video_thumbnail = compat_urllib_parse.unquote_plus(video_info['thumbnail_url'][0])
1366
1367         # upload date
1368         upload_date = None
1369         mobj = re.search(r'id="eow-date.*?>(.*?)</span>', video_webpage, re.DOTALL)
1370         if mobj is not None:
1371             upload_date = ' '.join(re.sub(r'[/,-]', r' ', mobj.group(1)).split())
1372             upload_date = unified_strdate(upload_date)
1373
1374         # description
1375         video_description = get_element_by_id("eow-description", video_webpage)
1376         if video_description:
1377             video_description = clean_html(video_description)
1378         else:
1379             fd_mobj = re.search(r'<meta name="description" content="([^"]+)"', video_webpage)
1380             if fd_mobj:
1381                 video_description = unescapeHTML(fd_mobj.group(1))
1382             else:
1383                 video_description = u''
1384
1385         # subtitles
1386         video_subtitles = self.extract_subtitles(video_id, video_webpage)
1387
1388         if self._downloader.params.get('listsubtitles', False):
1389             self._list_available_subtitles(video_id, video_webpage)
1390             return
1391
1392         if 'length_seconds' not in video_info:
1393             self._downloader.report_warning(u'unable to extract video duration')
1394             video_duration = ''
1395         else:
1396             video_duration = compat_urllib_parse.unquote_plus(video_info['length_seconds'][0])
1397
1398         # Decide which formats to download
1399
1400         try:
1401             mobj = re.search(r';ytplayer.config = ({.*?});', video_webpage)
1402             if not mobj:
1403                 raise ValueError('Could not find vevo ID')
1404             info = json.loads(mobj.group(1))
1405             args = info['args']
1406             # Easy way to know if the 's' value is in url_encoded_fmt_stream_map
1407             # this signatures are encrypted
1408             m_s = re.search(r'[&,]s=', args['url_encoded_fmt_stream_map'])
1409             if m_s is not None:
1410                 self.to_screen(u'%s: Encrypted signatures detected.' % video_id)
1411                 video_info['url_encoded_fmt_stream_map'] = [args['url_encoded_fmt_stream_map']]
1412             m_s = re.search(r'[&,]s=', args.get('adaptive_fmts', u''))
1413             if m_s is not None:
1414                 if 'url_encoded_fmt_stream_map' in video_info:
1415                     video_info['url_encoded_fmt_stream_map'][0] += ',' + args['adaptive_fmts']
1416                 else:
1417                     video_info['url_encoded_fmt_stream_map'] = [args['adaptive_fmts']]
1418             elif 'adaptive_fmts' in video_info:
1419                 if 'url_encoded_fmt_stream_map' in video_info:
1420                     video_info['url_encoded_fmt_stream_map'][0] += ',' + video_info['adaptive_fmts'][0]
1421                 else:
1422                     video_info['url_encoded_fmt_stream_map'] = video_info['adaptive_fmts']
1423         except ValueError:
1424             pass
1425
1426         if 'conn' in video_info and video_info['conn'][0].startswith('rtmp'):
1427             self.report_rtmp_download()
1428             video_url_list = [(None, video_info['conn'][0])]
1429         elif 'url_encoded_fmt_stream_map' in video_info and len(video_info['url_encoded_fmt_stream_map']) >= 1:
1430             if 'rtmpe%3Dyes' in video_info['url_encoded_fmt_stream_map'][0]:
1431                 raise ExtractorError('rtmpe downloads are not supported, see https://github.com/rg3/youtube-dl/issues/343 for more information.', expected=True)
1432             url_map = {}
1433             for url_data_str in video_info['url_encoded_fmt_stream_map'][0].split(','):
1434                 url_data = compat_parse_qs(url_data_str)
1435                 if 'itag' in url_data and 'url' in url_data:
1436                     url = url_data['url'][0]
1437                     if 'sig' in url_data:
1438                         url += '&signature=' + url_data['sig'][0]
1439                     elif 's' in url_data:
1440                         encrypted_sig = url_data['s'][0]
1441                         if self._downloader.params.get('verbose'):
1442                             if age_gate:
1443                                 if player_url is None:
1444                                     player_version = 'unknown'
1445                                 else:
1446                                     player_version = self._search_regex(
1447                                         r'-(.+)\.swf$', player_url,
1448                                         u'flash player', fatal=False)
1449                                 player_desc = 'flash player %s' % player_version
1450                             else:
1451                                 player_version = self._search_regex(
1452                                     r'html5player-(.+?)\.js', video_webpage,
1453                                     'html5 player', fatal=False)
1454                                 player_desc = u'html5 player %s' % player_version
1455
1456                             parts_sizes = u'.'.join(compat_str(len(part)) for part in encrypted_sig.split('.'))
1457                             self.to_screen(u'encrypted signature length %d (%s), itag %s, %s' %
1458                                 (len(encrypted_sig), parts_sizes, url_data['itag'][0], player_desc))
1459
1460                         if not age_gate:
1461                             jsplayer_url_json = self._search_regex(
1462                                 r'"assets":.+?"js":\s*("[^"]+")',
1463                                 video_webpage, u'JS player URL')
1464                             player_url = json.loads(jsplayer_url_json)
1465
1466                         signature = self._decrypt_signature(
1467                             encrypted_sig, video_id, player_url, age_gate)
1468                         url += '&signature=' + signature
1469                     if 'ratebypass' not in url:
1470                         url += '&ratebypass=yes'
1471                     url_map[url_data['itag'][0]] = url
1472             video_url_list = self._get_video_url_list(url_map)
1473             if not video_url_list:
1474                 return
1475         elif video_info.get('hlsvp'):
1476             manifest_url = video_info['hlsvp'][0]
1477             url_map = self._extract_from_m3u8(manifest_url, video_id)
1478             video_url_list = self._get_video_url_list(url_map)
1479             if not video_url_list:
1480                 return
1481
1482         else:
1483             raise ExtractorError(u'no conn or url_encoded_fmt_stream_map information found in video info')
1484
1485         results = []
1486         for format_param, video_real_url in video_url_list:
1487             # Extension
1488             video_extension = self._video_extensions.get(format_param, 'flv')
1489
1490             video_format = '{0} - {1}{2}'.format(format_param if format_param else video_extension,
1491                                               self._video_dimensions.get(format_param, '???'),
1492                                               ' ('+self._special_itags[format_param]+')' if format_param in self._special_itags else '')
1493
1494             results.append({
1495                 'id':       video_id,
1496                 'url':      video_real_url,
1497                 'uploader': video_uploader,
1498                 'uploader_id': video_uploader_id,
1499                 'upload_date':  upload_date,
1500                 'title':    video_title,
1501                 'ext':      video_extension,
1502                 'format':   video_format,
1503                 'thumbnail':    video_thumbnail,
1504                 'description':  video_description,
1505                 'player_url':   player_url,
1506                 'subtitles':    video_subtitles,
1507                 'duration':     video_duration
1508             })
1509         return results
1510
1511 class YoutubePlaylistIE(InfoExtractor):
1512     IE_DESC = u'YouTube.com playlists'
1513     _VALID_URL = r"""(?:
1514                         (?:https?://)?
1515                         (?:\w+\.)?
1516                         youtube\.com/
1517                         (?:
1518                            (?:course|view_play_list|my_playlists|artist|playlist|watch)
1519                            \? (?:.*?&)*? (?:p|a|list)=
1520                         |  p/
1521                         )
1522                         ((?:PL|EC|UU|FL)?[0-9A-Za-z-_]{10,})
1523                         .*
1524                      |
1525                         ((?:PL|EC|UU|FL)[0-9A-Za-z-_]{10,})
1526                      )"""
1527     _TEMPLATE_URL = 'https://gdata.youtube.com/feeds/api/playlists/%s?max-results=%i&start-index=%i&v=2&alt=json&safeSearch=none'
1528     _MAX_RESULTS = 50
1529     IE_NAME = u'youtube:playlist'
1530
1531     @classmethod
1532     def suitable(cls, url):
1533         """Receives a URL and returns True if suitable for this IE."""
1534         return re.match(cls._VALID_URL, url, re.VERBOSE) is not None
1535
1536     def _real_extract(self, url):
1537         # Extract playlist id
1538         mobj = re.match(self._VALID_URL, url, re.VERBOSE)
1539         if mobj is None:
1540             raise ExtractorError(u'Invalid URL: %s' % url)
1541
1542         # Download playlist videos from API
1543         playlist_id = mobj.group(1) or mobj.group(2)
1544         videos = []
1545
1546         for page_num in itertools.count(1):
1547             start_index = self._MAX_RESULTS * (page_num - 1) + 1
1548             if start_index >= 1000:
1549                 self._downloader.report_warning(u'Max number of results reached')
1550                 break
1551             url = self._TEMPLATE_URL % (playlist_id, self._MAX_RESULTS, start_index)
1552             page = self._download_webpage(url, playlist_id, u'Downloading page #%s' % page_num)
1553
1554             try:
1555                 response = json.loads(page)
1556             except ValueError as err:
1557                 raise ExtractorError(u'Invalid JSON in API response: ' + compat_str(err))
1558
1559             if 'feed' not in response:
1560                 raise ExtractorError(u'Got a malformed response from YouTube API')
1561             playlist_title = response['feed']['title']['$t']
1562             if 'entry' not in response['feed']:
1563                 # Number of videos is a multiple of self._MAX_RESULTS
1564                 break
1565
1566             for entry in response['feed']['entry']:
1567                 index = entry['yt$position']['$t']
1568                 if 'media$group' in entry and 'yt$videoid' in entry['media$group']:
1569                     videos.append((
1570                         index,
1571                         'https://www.youtube.com/watch?v=' + entry['media$group']['yt$videoid']['$t']
1572                     ))
1573
1574         videos = [v[1] for v in sorted(videos)]
1575
1576         url_results = [self.url_result(vurl, 'Youtube') for vurl in videos]
1577         return [self.playlist_result(url_results, playlist_id, playlist_title)]
1578
1579
1580 class YoutubeChannelIE(InfoExtractor):
1581     IE_DESC = u'YouTube.com channels'
1582     _VALID_URL = r"^(?:https?://)?(?:youtu\.be|(?:\w+\.)?youtube(?:-nocookie)?\.com)/channel/([0-9A-Za-z_-]+)"
1583     _TEMPLATE_URL = 'http://www.youtube.com/channel/%s/videos?sort=da&flow=list&view=0&page=%s&gl=US&hl=en'
1584     _MORE_PAGES_INDICATOR = 'yt-uix-load-more'
1585     _MORE_PAGES_URL = 'http://www.youtube.com/c4_browse_ajax?action_load_more_videos=1&flow=list&paging=%s&view=0&sort=da&channel_id=%s'
1586     IE_NAME = u'youtube:channel'
1587
1588     def extract_videos_from_page(self, page):
1589         ids_in_page = []
1590         for mobj in re.finditer(r'href="/watch\?v=([0-9A-Za-z_-]+)&?', page):
1591             if mobj.group(1) not in ids_in_page:
1592                 ids_in_page.append(mobj.group(1))
1593         return ids_in_page
1594
1595     def _real_extract(self, url):
1596         # Extract channel id
1597         mobj = re.match(self._VALID_URL, url)
1598         if mobj is None:
1599             raise ExtractorError(u'Invalid URL: %s' % url)
1600
1601         # Download channel page
1602         channel_id = mobj.group(1)
1603         video_ids = []
1604         pagenum = 1
1605
1606         url = self._TEMPLATE_URL % (channel_id, pagenum)
1607         page = self._download_webpage(url, channel_id,
1608                                       u'Downloading page #%s' % pagenum)
1609
1610         # Extract video identifiers
1611         ids_in_page = self.extract_videos_from_page(page)
1612         video_ids.extend(ids_in_page)
1613
1614         # Download any subsequent channel pages using the json-based channel_ajax query
1615         if self._MORE_PAGES_INDICATOR in page:
1616             for pagenum in itertools.count(1):
1617                 url = self._MORE_PAGES_URL % (pagenum, channel_id)
1618                 page = self._download_webpage(url, channel_id,
1619                                               u'Downloading page #%s' % pagenum)
1620
1621                 page = json.loads(page)
1622
1623                 ids_in_page = self.extract_videos_from_page(page['content_html'])
1624                 video_ids.extend(ids_in_page)
1625
1626                 if self._MORE_PAGES_INDICATOR  not in page['load_more_widget_html']:
1627                     break
1628
1629         self._downloader.to_screen(u'[youtube] Channel %s: Found %i videos' % (channel_id, len(video_ids)))
1630
1631         urls = ['http://www.youtube.com/watch?v=%s' % id for id in video_ids]
1632         url_entries = [self.url_result(eurl, 'Youtube') for eurl in urls]
1633         return [self.playlist_result(url_entries, channel_id)]
1634
1635
1636 class YoutubeUserIE(InfoExtractor):
1637     IE_DESC = u'YouTube.com user videos (URL or "ytuser" keyword)'
1638     _VALID_URL = r'(?:(?:(?:https?://)?(?:\w+\.)?youtube\.com/(?:user/)?)|ytuser:)(?!feed/)([A-Za-z0-9_-]+)'
1639     _TEMPLATE_URL = 'http://gdata.youtube.com/feeds/api/users/%s'
1640     _GDATA_PAGE_SIZE = 50
1641     _GDATA_URL = 'http://gdata.youtube.com/feeds/api/users/%s/uploads?max-results=%d&start-index=%d&alt=json'
1642     IE_NAME = u'youtube:user'
1643
1644     @classmethod
1645     def suitable(cls, url):
1646         # Don't return True if the url can be extracted with other youtube
1647         # extractor, the regex would is too permissive and it would match.
1648         other_ies = iter(klass for (name, klass) in globals().items() if name.endswith('IE') and klass is not cls)
1649         if any(ie.suitable(url) for ie in other_ies): return False
1650         else: return super(YoutubeUserIE, cls).suitable(url)
1651
1652     def _real_extract(self, url):
1653         # Extract username
1654         mobj = re.match(self._VALID_URL, url)
1655         if mobj is None:
1656             raise ExtractorError(u'Invalid URL: %s' % url)
1657
1658         username = mobj.group(1)
1659
1660         # Download video ids using YouTube Data API. Result size per
1661         # query is limited (currently to 50 videos) so we need to query
1662         # page by page until there are no video ids - it means we got
1663         # all of them.
1664
1665         video_ids = []
1666
1667         for pagenum in itertools.count(0):
1668             start_index = pagenum * self._GDATA_PAGE_SIZE + 1
1669
1670             gdata_url = self._GDATA_URL % (username, self._GDATA_PAGE_SIZE, start_index)
1671             page = self._download_webpage(gdata_url, username,
1672                                           u'Downloading video ids from %d to %d' % (start_index, start_index + self._GDATA_PAGE_SIZE))
1673
1674             try:
1675                 response = json.loads(page)
1676             except ValueError as err:
1677                 raise ExtractorError(u'Invalid JSON in API response: ' + compat_str(err))
1678             if 'entry' not in response['feed']:
1679                 # Number of videos is a multiple of self._MAX_RESULTS
1680                 break
1681
1682             # Extract video identifiers
1683             ids_in_page = []
1684             for entry in response['feed']['entry']:
1685                 ids_in_page.append(entry['id']['$t'].split('/')[-1])
1686             video_ids.extend(ids_in_page)
1687
1688             # A little optimization - if current page is not
1689             # "full", ie. does not contain PAGE_SIZE video ids then
1690             # we can assume that this page is the last one - there
1691             # are no more ids on further pages - no need to query
1692             # again.
1693
1694             if len(ids_in_page) < self._GDATA_PAGE_SIZE:
1695                 break
1696
1697         urls = ['http://www.youtube.com/watch?v=%s' % video_id for video_id in video_ids]
1698         url_results = [self.url_result(rurl, 'Youtube') for rurl in urls]
1699         return [self.playlist_result(url_results, playlist_title = username)]
1700
1701 class YoutubeSearchIE(SearchInfoExtractor):
1702     IE_DESC = u'YouTube.com searches'
1703     _API_URL = 'https://gdata.youtube.com/feeds/api/videos?q=%s&start-index=%i&max-results=50&v=2&alt=jsonc'
1704     _MAX_RESULTS = 1000
1705     IE_NAME = u'youtube:search'
1706     _SEARCH_KEY = 'ytsearch'
1707
1708     def report_download_page(self, query, pagenum):
1709         """Report attempt to download search page with given number."""
1710         self._downloader.to_screen(u'[youtube] query "%s": Downloading page %s' % (query, pagenum))
1711
1712     def _get_n_results(self, query, n):
1713         """Get a specified number of results for a query"""
1714
1715         video_ids = []
1716         pagenum = 0
1717         limit = n
1718
1719         while (50 * pagenum) < limit:
1720             self.report_download_page(query, pagenum+1)
1721             result_url = self._API_URL % (compat_urllib_parse.quote_plus(query), (50*pagenum)+1)
1722             request = compat_urllib_request.Request(result_url)
1723             try:
1724                 data = compat_urllib_request.urlopen(request).read().decode('utf-8')
1725             except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
1726                 raise ExtractorError(u'Unable to download API page: %s' % compat_str(err))
1727             api_response = json.loads(data)['data']
1728
1729             if not 'items' in api_response:
1730                 raise ExtractorError(u'[youtube] No video results')
1731
1732             new_ids = list(video['id'] for video in api_response['items'])
1733             video_ids += new_ids
1734
1735             limit = min(n, api_response['totalItems'])
1736             pagenum += 1
1737
1738         if len(video_ids) > n:
1739             video_ids = video_ids[:n]
1740         videos = [self.url_result('http://www.youtube.com/watch?v=%s' % id, 'Youtube') for id in video_ids]
1741         return self.playlist_result(videos, query)
1742
1743
1744 class YoutubeShowIE(InfoExtractor):
1745     IE_DESC = u'YouTube.com (multi-season) shows'
1746     _VALID_URL = r'https?://www\.youtube\.com/show/(.*)'
1747     IE_NAME = u'youtube:show'
1748
1749     def _real_extract(self, url):
1750         mobj = re.match(self._VALID_URL, url)
1751         show_name = mobj.group(1)
1752         webpage = self._download_webpage(url, show_name, u'Downloading show webpage')
1753         # There's one playlist for each season of the show
1754         m_seasons = list(re.finditer(r'href="(/playlist\?list=.*?)"', webpage))
1755         self.to_screen(u'%s: Found %s seasons' % (show_name, len(m_seasons)))
1756         return [self.url_result('https://www.youtube.com' + season.group(1), 'YoutubePlaylist') for season in m_seasons]
1757
1758
1759 class YoutubeFeedsInfoExtractor(YoutubeBaseInfoExtractor):
1760     """
1761     Base class for extractors that fetch info from
1762     http://www.youtube.com/feed_ajax
1763     Subclasses must define the _FEED_NAME and _PLAYLIST_TITLE properties.
1764     """
1765     _LOGIN_REQUIRED = True
1766     _PAGING_STEP = 30
1767     # use action_load_personal_feed instead of action_load_system_feed
1768     _PERSONAL_FEED = False
1769
1770     @property
1771     def _FEED_TEMPLATE(self):
1772         action = 'action_load_system_feed'
1773         if self._PERSONAL_FEED:
1774             action = 'action_load_personal_feed'
1775         return 'http://www.youtube.com/feed_ajax?%s=1&feed_name=%s&paging=%%s' % (action, self._FEED_NAME)
1776
1777     @property
1778     def IE_NAME(self):
1779         return u'youtube:%s' % self._FEED_NAME
1780
1781     def _real_initialize(self):
1782         self._login()
1783
1784     def _real_extract(self, url):
1785         feed_entries = []
1786         # The step argument is available only in 2.7 or higher
1787         for i in itertools.count(0):
1788             paging = i*self._PAGING_STEP
1789             info = self._download_webpage(self._FEED_TEMPLATE % paging,
1790                                           u'%s feed' % self._FEED_NAME,
1791                                           u'Downloading page %s' % i)
1792             info = json.loads(info)
1793             feed_html = info['feed_html']
1794             m_ids = re.finditer(r'"/watch\?v=(.*?)["&]', feed_html)
1795             ids = orderedSet(m.group(1) for m in m_ids)
1796             feed_entries.extend(self.url_result(id, 'Youtube') for id in ids)
1797             if info['paging'] is None:
1798                 break
1799         return self.playlist_result(feed_entries, playlist_title=self._PLAYLIST_TITLE)
1800
1801 class YoutubeSubscriptionsIE(YoutubeFeedsInfoExtractor):
1802     IE_DESC = u'YouTube.com subscriptions feed, "ytsubs" keyword(requires authentication)'
1803     _VALID_URL = r'https?://www\.youtube\.com/feed/subscriptions|:ytsubs(?:criptions)?'
1804     _FEED_NAME = 'subscriptions'
1805     _PLAYLIST_TITLE = u'Youtube Subscriptions'
1806
1807 class YoutubeRecommendedIE(YoutubeFeedsInfoExtractor):
1808     IE_DESC = u'YouTube.com recommended videos, "ytrec" keyword (requires authentication)'
1809     _VALID_URL = r'https?://www\.youtube\.com/feed/recommended|:ytrec(?:ommended)?'
1810     _FEED_NAME = 'recommended'
1811     _PLAYLIST_TITLE = u'Youtube Recommended videos'
1812
1813 class YoutubeWatchLaterIE(YoutubeFeedsInfoExtractor):
1814     IE_DESC = u'Youtube watch later list, "ytwatchlater" keyword (requires authentication)'
1815     _VALID_URL = r'https?://www\.youtube\.com/feed/watch_later|:ytwatchlater'
1816     _FEED_NAME = 'watch_later'
1817     _PLAYLIST_TITLE = u'Youtube Watch Later'
1818     _PAGING_STEP = 100
1819     _PERSONAL_FEED = True
1820
1821 class YoutubeFavouritesIE(YoutubeBaseInfoExtractor):
1822     IE_NAME = u'youtube:favorites'
1823     IE_DESC = u'YouTube.com favourite videos, "ytfav" keyword (requires authentication)'
1824     _VALID_URL = r'https?://www\.youtube\.com/my_favorites|:ytfav(?:ou?rites)?'
1825     _LOGIN_REQUIRED = True
1826
1827     def _real_extract(self, url):
1828         webpage = self._download_webpage('https://www.youtube.com/my_favorites', 'Youtube Favourites videos')
1829         playlist_id = self._search_regex(r'list=(.+?)["&]', webpage, u'favourites playlist id')
1830         return self.url_result(playlist_id, 'YoutubePlaylist')