]> jfr.im git - yt-dlp.git/blame - youtube_dl/extractor/youtube.py
Change test target (Verified with node.js)
[yt-dlp.git] / youtube_dl / extractor / youtube.py
CommitLineData
c5e8d7af 1# coding: utf-8
c5e8d7af 2
e0df6211
PH
3import collections
4import itertools
5import io
c5e8d7af
PH
6import json
7import netrc
8import re
9import socket
e0df6211
PH
10import string
11import struct
12import traceback
13import zlib
c5e8d7af 14
b05654f0 15from .common import InfoExtractor, SearchInfoExtractor
54d39d8b 16from .subtitles import SubtitlesInfoExtractor
c5e8d7af
PH
17from ..utils import (
18 compat_http_client,
19 compat_parse_qs,
20 compat_urllib_error,
21 compat_urllib_parse,
22 compat_urllib_request,
23 compat_str,
24
25 clean_html,
26 get_element_by_id,
27 ExtractorError,
28 unescapeHTML,
29 unified_strdate,
04cc9617 30 orderedSet,
c5e8d7af
PH
31)
32
de7f3446 33class YoutubeBaseInfoExtractor(InfoExtractor):
b2e8bc1b
JMF
34 """Provide base functions for Youtube extractors"""
35 _LOGIN_URL = 'https://accounts.google.com/ServiceLogin'
36 _LANG_URL = r'https://www.youtube.com/?hl=en&persist_hl=1&gl=US&persist_gl=1&opt_out_ackd=1'
37 _AGE_URL = 'http://www.youtube.com/verify_age?next_url=/&gl=US&hl=en'
38 _NETRC_MACHINE = 'youtube'
39 # If True it will raise an error if no login info is provided
40 _LOGIN_REQUIRED = False
41
42 def report_lang(self):
43 """Report attempt to set language."""
44 self.to_screen(u'Setting language')
45
46 def _set_language(self):
47 request = compat_urllib_request.Request(self._LANG_URL)
48 try:
49 self.report_lang()
50 compat_urllib_request.urlopen(request).read()
51 except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
52 self._downloader.report_warning(u'unable to set language: %s' % compat_str(err))
53 return False
54 return True
55
56 def _login(self):
57 (username, password) = self._get_login_info()
58 # No authentication to be performed
59 if username is None:
60 if self._LOGIN_REQUIRED:
61 raise ExtractorError(u'No login info available, needed for using %s.' % self.IE_NAME, expected=True)
62 return False
63
64 request = compat_urllib_request.Request(self._LOGIN_URL)
65 try:
66 login_page = compat_urllib_request.urlopen(request).read().decode('utf-8')
67 except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
68 self._downloader.report_warning(u'unable to fetch login page: %s' % compat_str(err))
69 return False
70
71 galx = None
72 dsh = None
73 match = re.search(re.compile(r'<input.+?name="GALX".+?value="(.+?)"', re.DOTALL), login_page)
74 if match:
75 galx = match.group(1)
76 match = re.search(re.compile(r'<input.+?name="dsh".+?value="(.+?)"', re.DOTALL), login_page)
77 if match:
78 dsh = match.group(1)
c5e8d7af 79
b2e8bc1b
JMF
80 # Log in
81 login_form_strs = {
82 u'continue': u'https://www.youtube.com/signin?action_handle_signin=true&feature=sign_in_button&hl=en_US&nomobiletemp=1',
83 u'Email': username,
84 u'GALX': galx,
85 u'Passwd': password,
86 u'PersistentCookie': u'yes',
87 u'_utf8': u'霱',
88 u'bgresponse': u'js_disabled',
89 u'checkConnection': u'',
90 u'checkedDomains': u'youtube',
91 u'dnConn': u'',
92 u'dsh': dsh,
93 u'pstMsg': u'0',
94 u'rmShown': u'1',
95 u'secTok': u'',
96 u'signIn': u'Sign in',
97 u'timeStmp': u'',
98 u'service': u'youtube',
99 u'uilel': u'3',
100 u'hl': u'en_US',
101 }
102 # Convert to UTF-8 *before* urlencode because Python 2.x's urlencode
103 # chokes on unicode
104 login_form = dict((k.encode('utf-8'), v.encode('utf-8')) for k,v in login_form_strs.items())
105 login_data = compat_urllib_parse.urlencode(login_form).encode('ascii')
106 request = compat_urllib_request.Request(self._LOGIN_URL, login_data)
107 try:
108 self.report_login()
109 login_results = compat_urllib_request.urlopen(request).read().decode('utf-8')
110 if re.search(r'(?i)<form[^>]* id="gaia_loginform"', login_results) is not None:
111 self._downloader.report_warning(u'unable to log in: bad username or password')
112 return False
113 except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
114 self._downloader.report_warning(u'unable to log in: %s' % compat_str(err))
115 return False
116 return True
117
118 def _confirm_age(self):
119 age_form = {
120 'next_url': '/',
121 'action_confirm': 'Confirm',
122 }
123 request = compat_urllib_request.Request(self._AGE_URL, compat_urllib_parse.urlencode(age_form))
124 try:
125 self.report_age_confirmation()
126 compat_urllib_request.urlopen(request).read().decode('utf-8')
127 except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
128 raise ExtractorError(u'Unable to confirm age: %s' % compat_str(err))
129 return True
130
131 def _real_initialize(self):
132 if self._downloader is None:
133 return
134 if not self._set_language():
135 return
136 if not self._login():
137 return
138 self._confirm_age()
c5e8d7af 139
8377574c 140
de7f3446 141class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor):
0f818663 142 IE_DESC = u'YouTube.com'
c5e8d7af
PH
143 _VALID_URL = r"""^
144 (
145 (?:https?://)? # http(s):// (optional)
f4b05232 146 (?:(?:(?:(?:\w+\.)?youtube(?:-nocookie)?\.com/|
e69ae5b9
JMF
147 tube\.majestyc\.net/|
148 youtube\.googleapis\.com/) # the various hostnames, with wildcard subdomains
c5e8d7af
PH
149 (?:.*?\#/)? # handle anchor (#/) redirect urls
150 (?: # the various things that can precede the ID:
151 (?:(?:v|embed|e)/) # v/ or embed/ or e/
152 |(?: # or the v= param in all its forms
d741e55a 153 (?:(?:watch|movie)(?:_popup)?(?:\.php)?)? # preceding watch(_popup|.php) or nothing (like /?v=xxxx)
c5e8d7af
PH
154 (?:\?|\#!?) # the params delimiter ? or # or #!
155 (?:.*?&)? # any other preceding param (like /?s=tuff&v=xxxx)
156 v=
157 )
f4b05232
JMF
158 ))
159 |youtu\.be/ # just youtu.be/xxxx
160 )
c5e8d7af 161 )? # all until now is optional -> you can pass the naked ID
8963d9c2 162 ([0-9A-Za-z_-]{11}) # here is it! the YouTube video ID
c5e8d7af
PH
163 (?(1).+)? # if we found the ID, everything can follow
164 $"""
c5e8d7af 165 _NEXT_URL_RE = r'[\?&]next_url=([^&]+)'
c5e8d7af 166 # Listed in order of quality
bdc6b3fc 167 _available_formats = ['38', '37', '46', '22', '45', '35', '44', '34', '18', '43', '6', '5', '36', '17', '13',
96fb5605 168 # Apple HTTP Live Streaming
bdc6b3fc 169 '96', '95', '94', '93', '92', '132', '151',
939fbd26
JMF
170 # 3D
171 '85', '84', '102', '83', '101', '82', '100',
172 # Dash video
173 '138', '137', '248', '136', '247', '135', '246',
174 '245', '244', '134', '243', '133', '242', '160',
175 # Dash audio
176 '141', '172', '140', '171', '139',
1d043b93 177 ]
bdc6b3fc 178 _available_formats_prefer_free = ['38', '46', '37', '45', '22', '44', '35', '43', '34', '18', '6', '5', '36', '17', '13',
96fb5605 179 # Apple HTTP Live Streaming
bdc6b3fc
AZ
180 '96', '95', '94', '93', '92', '132', '151',
181 # 3D
86fe61c8 182 '85', '102', '84', '101', '83', '100', '82',
939fbd26
JMF
183 # Dash video
184 '138', '248', '137', '247', '136', '246', '245',
185 '244', '135', '243', '134', '242', '133', '160',
186 # Dash audio
187 '172', '141', '171', '140', '139',
1d043b93 188 ]
bdc6b3fc
AZ
189 _video_formats_map = {
190 'flv': ['35', '34', '6', '5'],
191 '3gp': ['36', '17', '13'],
192 'mp4': ['38', '37', '22', '18'],
193 'webm': ['46', '45', '44', '43'],
194 }
c5e8d7af
PH
195 _video_extensions = {
196 '13': '3gp',
bdc6b3fc 197 '17': '3gp',
c5e8d7af
PH
198 '18': 'mp4',
199 '22': 'mp4',
bdc6b3fc 200 '36': '3gp',
c5e8d7af 201 '37': 'mp4',
d69cf69a 202 '38': 'mp4',
c5e8d7af
PH
203 '43': 'webm',
204 '44': 'webm',
205 '45': 'webm',
206 '46': 'webm',
1d043b93 207
86fe61c8
AZ
208 # 3d videos
209 '82': 'mp4',
210 '83': 'mp4',
211 '84': 'mp4',
212 '85': 'mp4',
213 '100': 'webm',
214 '101': 'webm',
215 '102': 'webm',
836a086c 216
96fb5605 217 # Apple HTTP Live Streaming
1d043b93
JMF
218 '92': 'mp4',
219 '93': 'mp4',
220 '94': 'mp4',
221 '95': 'mp4',
222 '96': 'mp4',
223 '132': 'mp4',
224 '151': 'mp4',
836a086c
AZ
225
226 # Dash mp4
227 '133': 'mp4',
228 '134': 'mp4',
229 '135': 'mp4',
230 '136': 'mp4',
231 '137': 'mp4',
232 '138': 'mp4',
233 '139': 'mp4',
234 '140': 'mp4',
235 '141': 'mp4',
236 '160': 'mp4',
237
238 # Dash webm
239 '171': 'webm',
240 '172': 'webm',
241 '242': 'webm',
242 '243': 'webm',
243 '244': 'webm',
244 '245': 'webm',
245 '246': 'webm',
246 '247': 'webm',
247 '248': 'webm',
c5e8d7af
PH
248 }
249 _video_dimensions = {
250 '5': '240x400',
251 '6': '???',
252 '13': '???',
253 '17': '144x176',
254 '18': '360x640',
255 '22': '720x1280',
256 '34': '360x640',
257 '35': '480x854',
bdc6b3fc 258 '36': '240x320',
c5e8d7af
PH
259 '37': '1080x1920',
260 '38': '3072x4096',
261 '43': '360x640',
262 '44': '480x854',
263 '45': '720x1280',
264 '46': '1080x1920',
86fe61c8
AZ
265 '82': '360p',
266 '83': '480p',
267 '84': '720p',
268 '85': '1080p',
1d043b93
JMF
269 '92': '240p',
270 '93': '360p',
271 '94': '480p',
272 '95': '720p',
273 '96': '1080p',
86fe61c8
AZ
274 '100': '360p',
275 '101': '480p',
836a086c 276 '102': '720p',
1d043b93
JMF
277 '132': '240p',
278 '151': '72p',
836a086c
AZ
279 '133': '240p',
280 '134': '360p',
281 '135': '480p',
282 '136': '720p',
283 '137': '1080p',
284 '138': '>1080p',
285 '139': '48k',
286 '140': '128k',
287 '141': '256k',
288 '160': '192p',
289 '171': '128k',
290 '172': '256k',
291 '242': '240p',
292 '243': '360p',
293 '244': '480p',
294 '245': '480p',
295 '246': '480p',
296 '247': '720p',
297 '248': '1080p',
c5e8d7af 298 }
836a086c
AZ
299 _special_itags = {
300 '82': '3D',
301 '83': '3D',
302 '84': '3D',
303 '85': '3D',
304 '100': '3D',
305 '101': '3D',
306 '102': '3D',
307 '133': 'DASH Video',
308 '134': 'DASH Video',
309 '135': 'DASH Video',
310 '136': 'DASH Video',
311 '137': 'DASH Video',
312 '138': 'DASH Video',
313 '139': 'DASH Audio',
314 '140': 'DASH Audio',
315 '141': 'DASH Audio',
316 '160': 'DASH Video',
317 '171': 'DASH Audio',
318 '172': 'DASH Audio',
319 '242': 'DASH Video',
320 '243': 'DASH Video',
321 '244': 'DASH Video',
322 '245': 'DASH Video',
323 '246': 'DASH Video',
324 '247': 'DASH Video',
325 '248': 'DASH Video',
c5e8d7af 326 }
836a086c 327
c5e8d7af 328 IE_NAME = u'youtube'
2eb88d95
PH
329 _TESTS = [
330 {
0e853ca4
PH
331 u"url": u"http://www.youtube.com/watch?v=BaW_jenozKc",
332 u"file": u"BaW_jenozKc.mp4",
333 u"info_dict": {
334 u"title": u"youtube-dl test video \"'/\\ä↭𝕐",
335 u"uploader": u"Philipp Hagemeister",
336 u"uploader_id": u"phihag",
337 u"upload_date": u"20121002",
338 u"description": u"test chars: \"'/\\ä↭𝕐\n\nThis is a test video for youtube-dl.\n\nFor more information, contact phihag@phihag.de ."
2eb88d95 339 }
0e853ca4
PH
340 },
341 {
342 u"url": u"http://www.youtube.com/watch?v=1ltcDfZMA3U",
343 u"file": u"1ltcDfZMA3U.flv",
344 u"note": u"Test VEVO video (#897)",
345 u"info_dict": {
346 u"upload_date": u"20070518",
347 u"title": u"Maps - It Will Find You",
348 u"description": u"Music video by Maps performing It Will Find You.",
349 u"uploader": u"MuteUSA",
350 u"uploader_id": u"MuteUSA"
2eb88d95 351 }
0e853ca4
PH
352 },
353 {
354 u"url": u"http://www.youtube.com/watch?v=UxxajLWwzqY",
355 u"file": u"UxxajLWwzqY.mp4",
356 u"note": u"Test generic use_cipher_signature video (#897)",
357 u"info_dict": {
358 u"upload_date": u"20120506",
359 u"title": u"Icona Pop - I Love It (feat. Charli XCX) [OFFICIAL VIDEO]",
c7bf7366 360 u"description": u"md5:3e2666e0a55044490499ea45fe9037b7",
45ed795c 361 u"uploader": u"Icona Pop",
0e853ca4 362 u"uploader_id": u"IconaPop"
2eb88d95 363 }
c108eb73
JMF
364 },
365 {
366 u"url": u"https://www.youtube.com/watch?v=07FYdnEawAQ",
367 u"file": u"07FYdnEawAQ.mp4",
368 u"note": u"Test VEVO video with age protection (#956)",
369 u"info_dict": {
370 u"upload_date": u"20130703",
371 u"title": u"Justin Timberlake - Tunnel Vision (Explicit)",
372 u"description": u"md5:64249768eec3bc4276236606ea996373",
373 u"uploader": u"justintimberlakeVEVO",
374 u"uploader_id": u"justintimberlakeVEVO"
375 }
376 },
1d043b93
JMF
377 {
378 u'url': u'https://www.youtube.com/watch?v=TGi3HqYrWHE',
379 u'file': u'TGi3HqYrWHE.mp4',
380 u'note': u'm3u8 video',
381 u'info_dict': {
382 u'title': u'Triathlon - Men - London 2012 Olympic Games',
383 u'description': u'- Men - TR02 - Triathlon - 07 August 2012 - London 2012 Olympic Games',
384 u'uploader': u'olympic',
385 u'upload_date': u'20120807',
386 u'uploader_id': u'olympic',
387 },
388 u'params': {
389 u'skip_download': True,
390 },
391 },
2eb88d95
PH
392 ]
393
c5e8d7af
PH
394
395 @classmethod
396 def suitable(cls, url):
397 """Receives a URL and returns True if suitable for this IE."""
e3ea4790 398 if YoutubePlaylistIE.suitable(url): return False
c5e8d7af
PH
399 return re.match(cls._VALID_URL, url, re.VERBOSE) is not None
400
e0df6211
PH
401 def __init__(self, *args, **kwargs):
402 super(YoutubeIE, self).__init__(*args, **kwargs)
403 self._jsplayer_cache = {}
404
c5e8d7af
PH
405 def report_video_webpage_download(self, video_id):
406 """Report attempt to download video webpage."""
407 self.to_screen(u'%s: Downloading video webpage' % video_id)
408
409 def report_video_info_webpage_download(self, video_id):
410 """Report attempt to download video info webpage."""
411 self.to_screen(u'%s: Downloading video info webpage' % video_id)
412
c5e8d7af
PH
413 def report_information_extraction(self, video_id):
414 """Report attempt to extract video information."""
415 self.to_screen(u'%s: Extracting video information' % video_id)
416
417 def report_unavailable_format(self, video_id, format):
418 """Report extracted video URL."""
419 self.to_screen(u'%s: Format %s not available' % (video_id, format))
420
421 def report_rtmp_download(self):
422 """Indicate the download will use the RTMP protocol."""
423 self.to_screen(u'RTMP download detected')
424
e0df6211
PH
425 def _extract_signature_function(self, video_id, player_url):
426 id_m = re.match(r'.*-(?P<id>[^.]+)\.(?P<ext>[^.]+)$', player_url)
427 player_type = id_m.group('ext')
428 player_id = id_m.group('id')
429
430 if player_type == 'js':
431 code = self._download_webpage(
432 player_url, video_id,
433 note=u'Downloading %s player %s' % (player_type, jsplayer_id),
434 errnote=u'Download of %s failed' % player_url)
435 return self._parse_sig_js(code)
436 elif player_tpye == 'swf':
437 urlh = self._request_webpage(
438 player_url, video_id,
439 note=u'Downloading %s player %s' % (player_type, jsplayer_id),
440 errnote=u'Download of %s failed' % player_url)
441 code = urlh.read()
442 return self._parse_sig_swf(code)
443 else:
444 assert False, 'Invalid player type %r' % player_type
445
446 def _parse_sig_js(self, jscode):
447 funcname = self._search_regex(
448 r'signature=([a-zA-Z]+)', jscode,
449 u'Initial JS player signature function name')
450
451 functions = {}
452
453 def argidx(varname):
454 return string.lowercase.index(varname)
455
456 def interpret_statement(stmt, local_vars, allow_recursion=20):
457 if allow_recursion < 0:
458 raise ExctractorError(u'Recursion limit reached')
459
460 if stmt.startswith(u'var '):
461 stmt = stmt[len(u'var '):]
462 ass_m = re.match(r'^(?P<out>[a-z]+)(?:\[(?P<index>[^\]]+)\])?' +
463 r'=(?P<expr>.*)$', stmt)
464 if ass_m:
465 if ass_m.groupdict().get('index'):
466 def assign(val):
467 lvar = local_vars[ass_m.group('out')]
468 idx = interpret_expression(ass_m.group('index'),
469 local_vars, allow_recursion)
470 assert isinstance(idx, int)
471 lvar[idx] = val
472 return val
473 expr = ass_m.group('expr')
474 else:
475 def assign(val):
476 local_vars[ass_m.group('out')] = val
477 return val
478 expr = ass_m.group('expr')
479 elif stmt.startswith(u'return '):
480 assign = lambda v: v
481 expr = stmt[len(u'return '):]
482 else:
483 raise ExtractorError(
484 u'Cannot determine left side of statement in %r' % stmt)
485
486 v = interpret_expression(expr, local_vars, allow_recursion)
487 return assign(v)
488
489 def interpret_expression(expr, local_vars, allow_recursion):
490 if expr.isdigit():
491 return int(expr)
492
493 if expr.isalpha():
494 return local_vars[expr]
495
496 m = re.match(r'^(?P<in>[a-z]+)\.(?P<member>.*)$', expr)
497 if m:
498 member = m.group('member')
499 val = local_vars[m.group('in')]
500 if member == 'split("")':
501 return list(val)
502 if member == 'join("")':
503 return u''.join(val)
504 if member == 'length':
505 return len(val)
506 if member == 'reverse()':
507 return val[::-1]
508 slice_m = re.match(r'slice\((?P<idx>.*)\)', member)
509 if slice_m:
510 idx = interpret_expression(
511 slice_m.group('idx'), local_vars, allow_recursion-1)
512 return val[idx:]
513
514 m = re.match(
515 r'^(?P<in>[a-z]+)\[(?P<idx>.+)\]$', expr)
516 if m:
517 val = local_vars[m.group('in')]
518 idx = interpret_expression(m.group('idx'), local_vars,
519 allow_recursion-1)
520 return val[idx]
521
522 m = re.match(r'^(?P<a>.+?)(?P<op>[%])(?P<b>.+?)$', expr)
523 if m:
524 a = interpret_expression(m.group('a'),
525 local_vars, allow_recursion)
526 b = interpret_expression(m.group('b'),
527 local_vars, allow_recursion)
528 return a % b
529
530 m = re.match(
531 r'^(?P<func>[a-zA-Z]+)\((?P<args>[a-z0-9,]+)\)$', expr)
532 if m:
533 fname = m.group('func')
534 if fname not in functions:
535 functions[fname] = extract_function(fname)
536 argvals = [int(v) if v.isdigit() else local_vars[v]
537 for v in m.group('args').split(',')]
538 return functions[fname](argvals)
539 raise ExtractorError(u'Unsupported JS expression %r' % expr)
540
541 def extract_function(funcname):
542 func_m = re.search(
543 r'function ' + re.escape(funcname) +
544 r'\((?P<args>[a-z,]+)\){(?P<code>[^}]+)}',
545 jscode)
546 argnames = func_m.group('args').split(',')
547
548 def resf(args):
549 local_vars = dict(zip(argnames, args))
550 for stmt in func_m.group('code').split(';'):
551 res = interpret_statement(stmt, local_vars)
552 return res
553 return resf
554
555 initial_function = extract_function(funcname)
556 return lambda s: initial_function([s])
557
558 def _parse_sig_swf(self, file_contents):
559 if file_contents[1:3] != b'WS':
560 raise ExtractorError(
561 u'Not an SWF file; header is %r' % file_contents[:3])
562 if file_contents[:1] == b'C':
563 content = zlib.decompress(file_contents[8:])
564 else:
565 raise NotImplementedError(u'Unsupported compression format %r' %
566 file_contents[:1])
567
568 def extract_tags(content):
569 pos = 0
570 while pos < len(content):
571 header16 = struct.unpack('<H', content[pos:pos+2])[0]
572 pos += 2
573 tag_code = header16 >> 6
574 tag_len = header16 & 0x3f
575 if tag_len == 0x3f:
576 tag_len = struct.unpack('<I', content[pos:pos+4])[0]
577 pos += 4
578 assert pos+tag_len <= len(content)
579 yield (tag_code, content[pos:pos+tag_len])
580 pos += tag_len
581
582 code_tag = next(tag
583 for tag_code, tag in extract_tags(content)
584 if tag_code == 82)
585 p = code_tag.index(b'\0', 4) + 1
586
587 # Parse ABC (AVM2 ByteCode)
588 def read_int(data=None, pos=None):
589 if hasattr(data, 'read'):
590 assert pos is None
591
592 res = 0
593 shift = 0
594 for _ in range(5):
595 buf = data.read(1)
596 assert len(buf) == 1
597 b = struct.unpack('<B', buf)[0]
598 res = res | ((b & 0x7f) << shift)
599 if b & 0x80 == 0:
600 break
601 shift += 7
602 return res
603
604 if data is None:
605 data = code_tag
606 if pos is None:
607 pos = p
608 res = 0
609 shift = 0
610 for _ in range(5):
611 b = struct.unpack('<B', data[pos:pos+1])[0]
612 pos += 1
613 res = res | ((b & 0x7f) << shift)
614 if b & 0x80 == 0:
615 break
616 shift += 7
617 return (res, pos)
618 assert read_int(b'\x00', 0) == (0, 1)
619 assert read_int(b'\x10', 0) == (16, 1)
620 assert read_int(b'\x34', 0) == (0x34, 1)
621 assert read_int(b'\xb4\x12', 0) == (0x12 * 0x80 + 0x34, 2)
622 assert read_int(b'\xff\xff\xff\x00', 0) == (0x1fffff, 4)
623
624 def u30(*args, **kwargs):
625 res = read_int(*args, **kwargs)
626 if isinstance(res, tuple):
627 assert res[0] & 0xf0000000 == 0
628 else:
629 assert res & 0xf0000000 == 0
630 return res
631 u32 = read_int
632
633 def s32(data=None, pos=None):
634 v, pos = read_int(data, pos)
635 if v & 0x80000000 != 0:
636 v = - ((v ^ 0xffffffff) + 1)
637 return (v, pos)
638 assert s32(b'\xff\xff\xff\xff\x0f', 0) == (-1, 5)
639
640 def string():
641 slen, p = u30()
642 return (code_tag[p:p+slen].decode('utf-8'), p + slen)
643
644 def read_byte(data=None, pos=None):
645 if data is None:
646 data = code_tag
647 if pos is None:
648 pos = p
649 res = struct.unpack('<B', data[pos:pos+1])[0]
650 return (res, pos + 1)
651
652 # minor_version + major_version
653 p += 2 + 2
654
655 # Constant pool
656 int_count, p = u30()
657 for _c in range(1, int_count):
658 _, p = s32()
659 uint_count, p = u30()
660 for _c in range(1, uint_count):
661 _, p = u32()
662 double_count, p = u30()
663 p += (double_count-1) * 8
664 string_count, p = u30()
665 constant_strings = [u'']
666 for _c in range(1, string_count):
667 s, p = string()
668 constant_strings.append(s)
669 namespace_count, p = u30()
670 for _c in range(1, namespace_count):
671 p += 1 # kind
672 _, p = u30() # name
673 ns_set_count, p = u30()
674 for _c in range(1, ns_set_count):
675 count, p = u30()
676 for _c2 in range(count):
677 _, p = u30()
678 multiname_count, p = u30()
679 MULTINAME_SIZES = {
680 0x07: 2, # QName
681 0x0d: 2, # QNameA
682 0x0f: 1, # RTQName
683 0x10: 1, # RTQNameA
684 0x11: 0, # RTQNameL
685 0x12: 0, # RTQNameLA
686 0x09: 2, # Multiname
687 0x0e: 2, # MultinameA
688 0x1b: 1, # MultinameL
689 0x1c: 1, # MultinameLA
690 }
691 multinames = [u'']
692 for _c in range(1, multiname_count):
693 kind, p = u30()
694 assert kind in MULTINAME_SIZES, u'Invalid multiname kind %r' % kind
695 if kind == 0x07:
696 namespace_idx, p = u30()
697 name_idx, p = u30()
698 multinames.append(constant_strings[name_idx])
699 else:
700 multinames.append('[MULTINAME kind: %d]' % kind)
701 for _c2 in range(MULTINAME_SIZES[kind]):
702 _, p = u30()
703
704 # Methods
705 method_count, p = u30()
706 MethodInfo = collections.namedtuple(
707 'MethodInfo',
708 ['NEED_ARGUMENTS', 'NEED_REST'])
709 method_infos = []
710 for method_id in range(method_count):
711 param_count, p = u30()
712 _, p = u30() # return type
713 for _ in range(param_count):
714 _, p = u30() # param type
715 _, p = u30() # name index (always 0 for youtube)
716 flags, p = read_byte()
717 if flags & 0x08 != 0:
718 # Options present
719 option_count, p = u30()
720 for c in range(option_count):
721 _, p = u30() # val
722 p += 1 # kind
723 if flags & 0x80 != 0:
724 # Param names present
725 for _ in range(param_count):
726 _, p = u30() # param name
727 mi = MethodInfo(flags & 0x01 != 0, flags & 0x04 != 0)
728 method_infos.append(mi)
729
730 # Metadata
731 metadata_count, p = u30()
732 for _c in range(metadata_count):
733 _, p = u30() # name
734 item_count, p = u30()
735 for _c2 in range(item_count):
736 _, p = u30() # key
737 _, p = u30() # value
738
739 def parse_traits_info(pos=None):
740 if pos is None:
741 pos = p
742 trait_name_idx, pos = u30(pos=pos)
743 kind_full, pos = read_byte(pos=pos)
744 kind = kind_full & 0x0f
745 attrs = kind_full >> 4
746 methods = {}
747 if kind in [0x00, 0x06]: # Slot or Const
748 _, pos = u30(pos=pos) # Slot id
749 type_name_idx, pos = u30(pos=pos)
750 vindex, pos = u30(pos=pos)
751 if vindex != 0:
752 _, pos = read_byte(pos=pos) # vkind
753 elif kind in [0x01, 0x02, 0x03]: # Method / Getter / Setter
754 _, pos = u30(pos=pos) # disp_id
755 method_idx, pos = u30(pos=pos)
756 methods[multinames[trait_name_idx]] = method_idx
757 elif kind == 0x04: # Class
758 _, pos = u30(pos=pos) # slot_id
759 _, pos = u30(pos=pos) # classi
760 elif kind == 0x05: # Function
761 _, pos = u30(pos=pos) # slot_id
762 function_idx, pos = u30(pos=pos)
763 methods[function_idx] = multinames[trait_name_idx]
764 else:
765 raise ExtractorError(u'Unsupported trait kind %d' % kind)
766
767 if attrs & 0x4 != 0: # Metadata present
768 metadata_count, pos = u30(pos=pos)
769 for _c3 in range(metadata_count):
770 _, pos = u30(pos=pos)
771
772 return (methods, pos)
773
774 # Classes
775 TARGET_CLASSNAME = u'SignatureDecipher'
776 searched_idx = multinames.index(TARGET_CLASSNAME)
777 searched_class_id = None
778 class_count, p = u30()
779 for class_id in range(class_count):
780 name_idx, p = u30()
781 if name_idx == searched_idx:
782 # We found the class we're looking for!
783 searched_class_id = class_id
784 _, p = u30() # super_name idx
785 flags, p = read_byte()
786 if flags & 0x08 != 0: # Protected namespace is present
787 protected_ns_idx, p = u30()
788 intrf_count, p = u30()
789 for _c2 in range(intrf_count):
790 _, p = u30()
791 _, p = u30() # iinit
792 trait_count, p = u30()
793 for _c2 in range(trait_count):
794 _, p = parse_traits_info()
795
796 if searched_class_id is None:
797 raise ExtractorError(u'Target class %r not found' %
798 TARGET_CLASSNAME)
799
800 method_names = {}
801 method_idxs = {}
802 for class_id in range(class_count):
803 _, p = u30() # cinit
804 trait_count, p = u30()
805 for _c2 in range(trait_count):
806 trait_methods, p = parse_traits_info()
807 if class_id == searched_class_id:
808 method_names.update(trait_methods.items())
809 method_idxs.update(dict(
810 (idx, name)
811 for name, idx in trait_methods.items()))
812
813 # Scripts
814 script_count, p = u30()
815 for _c in range(script_count):
816 _, p = u30() # init
817 trait_count, p = u30()
818 for _c2 in range(trait_count):
819 _, p = parse_traits_info()
820
821 # Method bodies
822 method_body_count, p = u30()
823 Method = collections.namedtuple('Method', ['code', 'local_count'])
824 methods = {}
825 for _c in range(method_body_count):
826 method_idx, p = u30()
827 max_stack, p = u30()
828 local_count, p = u30()
829 init_scope_depth, p = u30()
830 max_scope_depth, p = u30()
831 code_length, p = u30()
832 if method_idx in method_idxs:
833 m = Method(code_tag[p:p+code_length], local_count)
834 methods[method_idxs[method_idx]] = m
835 p += code_length
836 exception_count, p = u30()
837 for _c2 in range(exception_count):
838 _, p = u30() # from
839 _, p = u30() # to
840 _, p = u30() # target
841 _, p = u30() # exc_type
842 _, p = u30() # var_name
843 trait_count, p = u30()
844 for _c2 in range(trait_count):
845 _, p = parse_traits_info()
846
847 assert p == len(code_tag)
848 assert len(methods) == len(method_idxs)
849
850 method_pyfunctions = {}
851
852 def extract_function(func_name):
853 if func_name in method_pyfunctions:
854 return method_pyfunctions[func_name]
855 if func_name not in methods:
856 raise ExtractorError(u'Cannot find function %r' % func_name)
857 m = methods[func_name]
858
859 def resfunc(args):
e0df6211
PH
860 registers = ['(this)'] + list(args) + [None] * m.local_count
861 stack = []
862 coder = io.BytesIO(m.code)
863 while True:
864 opcode = struct.unpack('!B', coder.read(1))[0]
a7177865 865 if opcode == 36: # pushbyte
e0df6211
PH
866 v = struct.unpack('!B', coder.read(1))[0]
867 stack.append(v)
868 elif opcode == 44: # pushstring
869 idx = u30(coder)
870 stack.append(constant_strings[idx])
871 elif opcode == 48: # pushscope
872 # We don't implement the scope register, so we'll just
873 # ignore the popped value
874 stack.pop()
875 elif opcode == 70: # callproperty
876 index = u30(coder)
877 mname = multinames[index]
878 arg_count = u30(coder)
879 args = list(reversed(
880 [stack.pop() for _ in range(arg_count)]))
881 obj = stack.pop()
882 if mname == u'split':
883 assert len(args) == 1
884 assert isinstance(args[0], compat_str)
885 assert isinstance(obj, compat_str)
886 if args[0] == u'':
887 res = list(obj)
888 else:
889 res = obj.split(args[0])
890 stack.append(res)
a7177865
PH
891 elif mname == u'slice':
892 assert len(args) == 1
893 assert isinstance(args[0], int)
894 assert isinstance(obj, list)
895 res = obj[args[0]:]
896 stack.append(res)
897 elif mname == u'join':
898 assert len(args) == 1
899 assert isinstance(args[0], compat_str)
900 assert isinstance(obj, list)
901 res = args[0].join(obj)
902 stack.append(res)
e0df6211
PH
903 elif mname in method_pyfunctions:
904 stack.append(method_pyfunctions[mname](args))
905 else:
906 raise NotImplementedError(
907 u'Unsupported property %r on %r'
908 % (mname, obj))
a7177865
PH
909 elif opcode == 72: # returnvalue
910 res = stack.pop()
911 return res
912 elif opcode == 79: # callpropvoid
913 index = u30(coder)
914 mname = multinames[index]
915 arg_count = u30(coder)
916 args = list(reversed(
917 [stack.pop() for _ in range(arg_count)]))
918 obj = stack.pop()
919 if mname == u'reverse':
920 assert isinstance(obj, list)
921 obj.reverse()
922 else:
923 raise NotImplementedError(
924 u'Unsupported (void) property %r on %r'
925 % (mname, obj))
e0df6211
PH
926 elif opcode == 93: # findpropstrict
927 index = u30(coder)
928 mname = multinames[index]
929 res = extract_function(mname)
930 stack.append(res)
931 elif opcode == 97: # setproperty
932 index = u30(coder)
933 value = stack.pop()
934 idx = stack.pop()
935 obj = stack.pop()
936 assert isinstance(obj, list)
937 assert isinstance(idx, int)
938 obj[idx] = value
939 elif opcode == 98: # getlocal
940 index = u30(coder)
941 stack.append(registers[index])
942 elif opcode == 99: # setlocal
943 index = u30(coder)
944 value = stack.pop()
945 registers[index] = value
946 elif opcode == 102: # getproperty
947 index = u30(coder)
948 pname = multinames[index]
949 if pname == u'length':
950 obj = stack.pop()
951 assert isinstance(obj, list)
952 stack.append(len(obj))
953 else: # Assume attribute access
954 idx = stack.pop()
955 assert isinstance(idx, int)
956 obj = stack.pop()
957 assert isinstance(obj, list)
958 stack.append(obj[idx])
959 elif opcode == 128: # coerce
960 _ = u30(coder)
961 elif opcode == 133: # coerce_s
962 assert isinstance(stack[-1], (type(None), compat_str))
963 elif opcode == 164: # modulo
964 value2 = stack.pop()
965 value1 = stack.pop()
966 res = value1 % value2
967 stack.append(res)
a7177865
PH
968 elif opcode == 208: # getlocal_0
969 stack.append(registers[0])
970 elif opcode == 209: # getlocal_1
971 stack.append(registers[1])
972 elif opcode == 210: # getlocal_2
973 stack.append(registers[2])
974 elif opcode == 211: # getlocal_3
975 stack.append(registers[3])
e0df6211
PH
976 elif opcode == 214: # setlocal_2
977 registers[2] = stack.pop()
978 elif opcode == 215: # setlocal_3
979 registers[3] = stack.pop()
980 else:
981 raise NotImplementedError(
982 u'Unsupported opcode %d' % opcode)
983
984 method_pyfunctions[func_name] = resfunc
985 return resfunc
986
987 initial_function = extract_function(u'decipher')
988 return lambda s: initial_function([s])
989
990 def _decrypt_signature(self, s, video_id, jsplayer_url, age_gate=False):
257a2501 991 """Turn the encrypted s field into a working signature"""
6b37f0be 992
e0df6211
PH
993 if jsplayer_url is not None:
994 try:
995 if jsplayer_url not in self._jsplayer_cache:
996 self._jsplayer_cache[jsplayer_url] = self._extract_signature_function(
997 video_id, jsplayer_url
998 )
999 return self._jsplayer_cache[jsplayer_url]([s])
1000 except Exception as e:
1001 tb = traceback.format_exc()
1002 self._downloader.report_warning(u'Automatic signature extraction failed: ' + tb)
1003
1004 self._downloader.report_warning(u'Warning: Falling back to static signature algorithm')
1005
1006 if age_gate:
1007 # The videos with age protection use another player, so the
1008 # algorithms can be different.
1009 if len(s) == 86:
1010 return s[2:63] + s[82] + s[64:82] + s[63]
1011
1012 if len(s) == 92:
444b1165
JMF
1013 return s[25] + s[3:25] + s[0] + s[26:42] + s[79] + s[43:79] + s[91] + s[80:83]
1014 elif len(s) == 90:
1015 return s[25] + s[3:25] + s[2] + s[26:40] + s[77] + s[41:77] + s[89] + s[78:81]
8a9d86a2 1016 elif len(s) == 89:
1017 return s[84:78:-1] + s[87] + s[77:60:-1] + s[0] + s[59:3:-1]
444b1165 1018 elif len(s) == 88:
3e223834 1019 return s[7:28] + s[87] + s[29:45] + s[55] + s[46:55] + s[2] + s[56:87] + s[28]
be547e1d 1020 elif len(s) == 87:
3a725669 1021 return s[6:27] + s[4] + s[28:39] + s[27] + s[40:59] + s[2] + s[60:]
be547e1d 1022 elif len(s) == 86:
1cf911bc 1023 return s[5:34] + s[0] + s[35:38] + s[3] + s[39:45] + s[38] + s[46:53] + s[73] + s[54:73] + s[85] + s[74:85] + s[53]
be547e1d 1024 elif len(s) == 85:
6ae8ee3f 1025 return s[3:11] + s[0] + s[12:55] + s[84] + s[56:84]
be547e1d 1026 elif len(s) == 84:
23b00bc0 1027 return s[81:36:-1] + s[0] + s[35:2:-1]
be547e1d 1028 elif len(s) == 83:
e1842025 1029 return s[81:64:-1] + s[82] + s[63:52:-1] + s[45] + s[51:45:-1] + s[1] + s[44:1:-1] + s[0]
be547e1d 1030 elif len(s) == 82:
ce85f022 1031 return s[80:73:-1] + s[81] + s[72:54:-1] + s[2] + s[53:43:-1] + s[0] + s[42:2:-1] + s[43] + s[1] + s[54]
be547e1d 1032 elif len(s) == 81:
aedd6bb9 1033 return s[56] + s[79:56:-1] + s[41] + s[55:41:-1] + s[80] + s[40:34:-1] + s[0] + s[33:29:-1] + s[34] + s[28:9:-1] + s[29] + s[8:0:-1] + s[9]
066090dd
JMF
1034 elif len(s) == 80:
1035 return s[1:19] + s[0] + s[20:68] + s[19] + s[69:80]
5c468ca8
JMF
1036 elif len(s) == 79:
1037 return s[54] + s[77:54:-1] + s[39] + s[53:39:-1] + s[78] + s[38:34:-1] + s[0] + s[33:29:-1] + s[34] + s[28:9:-1] + s[29] + s[8:0:-1] + s[9]
be547e1d
PH
1038
1039 else:
1040 raise ExtractorError(u'Unable to decrypt signature, key length %d not supported; retrying might work' % (len(s)))
c5e8d7af 1041
75952c6e
JMF
1042 def _decrypt_signature_age_gate(self, s):
1043 # The videos with age protection use another player, so the algorithms
1044 # can be different.
1045 if len(s) == 86:
1046 return s[2:63] + s[82] + s[64:82] + s[63]
1047 else:
1048 # Fallback to the other algortihms
b072a9de 1049 return self._decrypt_signature(s)
c5e8d7af 1050
de7f3446 1051 def _get_available_subtitles(self, video_id):
de7f3446 1052 try:
7fad1c63
JMF
1053 sub_list = self._download_webpage(
1054 'http://video.google.com/timedtext?hl=en&type=list&v=%s' % video_id,
1055 video_id, note=False)
1056 except ExtractorError as err:
de7f3446
JMF
1057 self._downloader.report_warning(u'unable to download video subtitles: %s' % compat_str(err))
1058 return {}
1059 lang_list = re.findall(r'name="([^"]*)"[^>]+lang_code="([\w\-]+)"', sub_list)
1060
1061 sub_lang_list = {}
1062 for l in lang_list:
1063 lang = l[1]
1064 params = compat_urllib_parse.urlencode({
1065 'lang': lang,
1066 'v': video_id,
1067 'fmt': self._downloader.params.get('subtitlesformat'),
1068 })
1069 url = u'http://www.youtube.com/api/timedtext?' + params
1070 sub_lang_list[lang] = url
1071 if not sub_lang_list:
1072 self._downloader.report_warning(u'video doesn\'t have subtitles')
1073 return {}
1074 return sub_lang_list
1075
055e6f36 1076 def _get_available_automatic_caption(self, video_id, webpage):
de7f3446
JMF
1077 """We need the webpage for getting the captions url, pass it as an
1078 argument to speed up the process."""
de7f3446
JMF
1079 sub_format = self._downloader.params.get('subtitlesformat')
1080 self.to_screen(u'%s: Looking for automatic captions' % video_id)
1081 mobj = re.search(r';ytplayer.config = ({.*?});', webpage)
055e6f36 1082 err_msg = u'Couldn\'t find automatic captions for %s' % video_id
de7f3446
JMF
1083 if mobj is None:
1084 self._downloader.report_warning(err_msg)
1085 return {}
1086 player_config = json.loads(mobj.group(1))
1087 try:
1088 args = player_config[u'args']
1089 caption_url = args[u'ttsurl']
1090 timestamp = args[u'timestamp']
055e6f36
JMF
1091 # We get the available subtitles
1092 list_params = compat_urllib_parse.urlencode({
1093 'type': 'list',
1094 'tlangs': 1,
1095 'asrs': 1,
de7f3446 1096 })
055e6f36
JMF
1097 list_url = caption_url + '&' + list_params
1098 list_page = self._download_webpage(list_url, video_id)
1099 caption_list = xml.etree.ElementTree.fromstring(list_page.encode('utf-8'))
e3dc22ca
JMF
1100 original_lang_node = caption_list.find('track')
1101 if original_lang_node.attrib.get('kind') != 'asr' :
1102 self._downloader.report_warning(u'Video doesn\'t have automatic captions')
1103 return {}
1104 original_lang = original_lang_node.attrib['lang_code']
055e6f36
JMF
1105
1106 sub_lang_list = {}
1107 for lang_node in caption_list.findall('target'):
1108 sub_lang = lang_node.attrib['lang_code']
1109 params = compat_urllib_parse.urlencode({
1110 'lang': original_lang,
1111 'tlang': sub_lang,
1112 'fmt': sub_format,
1113 'ts': timestamp,
1114 'kind': 'asr',
1115 })
1116 sub_lang_list[sub_lang] = caption_url + '&' + params
1117 return sub_lang_list
de7f3446
JMF
1118 # An extractor error can be raise by the download process if there are
1119 # no automatic captions but there are subtitles
1120 except (KeyError, ExtractorError):
1121 self._downloader.report_warning(err_msg)
1122 return {}
1123
c5e8d7af
PH
1124 def _print_formats(self, formats):
1125 print('Available formats:')
1126 for x in formats:
03cc7c20
JMF
1127 print('%s\t:\t%s\t[%s]%s' %(x, self._video_extensions.get(x, 'flv'),
1128 self._video_dimensions.get(x, '???'),
836a086c 1129 ' ('+self._special_itags[x]+')' if x in self._special_itags else ''))
c5e8d7af
PH
1130
1131 def _extract_id(self, url):
1132 mobj = re.match(self._VALID_URL, url, re.VERBOSE)
1133 if mobj is None:
1134 raise ExtractorError(u'Invalid URL: %s' % url)
1135 video_id = mobj.group(2)
1136 return video_id
1137
1d043b93
JMF
1138 def _get_video_url_list(self, url_map):
1139 """
1140 Transform a dictionary in the format {itag:url} to a list of (itag, url)
1141 with the requested formats.
1142 """
1143 req_format = self._downloader.params.get('format', None)
1144 format_limit = self._downloader.params.get('format_limit', None)
1145 available_formats = self._available_formats_prefer_free if self._downloader.params.get('prefer_free_formats', False) else self._available_formats
1146 if format_limit is not None and format_limit in available_formats:
1147 format_list = available_formats[available_formats.index(format_limit):]
1148 else:
1149 format_list = available_formats
1150 existing_formats = [x for x in format_list if x in url_map]
1151 if len(existing_formats) == 0:
1152 raise ExtractorError(u'no known formats available for video')
1153 if self._downloader.params.get('listformats', None):
1154 self._print_formats(existing_formats)
1155 return
1156 if req_format is None or req_format == 'best':
1157 video_url_list = [(existing_formats[0], url_map[existing_formats[0]])] # Best quality
1158 elif req_format == 'worst':
1159 video_url_list = [(existing_formats[-1], url_map[existing_formats[-1]])] # worst quality
1160 elif req_format in ('-1', 'all'):
1161 video_url_list = [(f, url_map[f]) for f in existing_formats] # All formats
1162 else:
1163 # Specific formats. We pick the first in a slash-delimeted sequence.
bdc6b3fc
AZ
1164 # Format can be specified as itag or 'mp4' or 'flv' etc. We pick the highest quality
1165 # available in the specified format. For example,
1166 # if '1/2/3/4' is requested and '2' and '4' are available, we pick '2'.
1167 # if '1/mp4/3/4' is requested and '1' and '5' (is a mp4) are available, we pick '1'.
1168 # if '1/mp4/3/4' is requested and '4' and '5' (is a mp4) are available, we pick '5'.
1d043b93
JMF
1169 req_formats = req_format.split('/')
1170 video_url_list = None
1171 for rf in req_formats:
1172 if rf in url_map:
1173 video_url_list = [(rf, url_map[rf])]
1174 break
bdc6b3fc
AZ
1175 if rf in self._video_formats_map:
1176 for srf in self._video_formats_map[rf]:
1177 if srf in url_map:
1178 video_url_list = [(srf, url_map[srf])]
1179 break
1180 else:
1181 continue
1182 break
1d043b93
JMF
1183 if video_url_list is None:
1184 raise ExtractorError(u'requested format not available')
1185 return video_url_list
1186
1187 def _extract_from_m3u8(self, manifest_url, video_id):
1188 url_map = {}
1189 def _get_urls(_manifest):
1190 lines = _manifest.split('\n')
1191 urls = filter(lambda l: l and not l.startswith('#'),
1192 lines)
1193 return urls
1194 manifest = self._download_webpage(manifest_url, video_id, u'Downloading formats manifest')
1195 formats_urls = _get_urls(manifest)
1196 for format_url in formats_urls:
890f62e8 1197 itag = self._search_regex(r'itag/(\d+?)/', format_url, 'itag')
1d043b93
JMF
1198 url_map[itag] = format_url
1199 return url_map
1200
c5e8d7af 1201 def _real_extract(self, url):
d7f44b5b
PH
1202 if re.match(r'(?:https?://)?[^/]+/watch\?feature=[a-z_]+$', url):
1203 self._downloader.report_warning(u'Did you forget to quote the URL? Remember that & is a meta-character in most shells, so you want to put the URL in quotes, like youtube-dl \'http://www.youtube.com/watch?feature=foo&v=BaW_jenozKc\' (or simply youtube-dl BaW_jenozKc ).')
1204
c5e8d7af
PH
1205 # Extract original video URL from URL with redirection, like age verification, using next_url parameter
1206 mobj = re.search(self._NEXT_URL_RE, url)
1207 if mobj:
1208 url = 'https://www.youtube.com/' + compat_urllib_parse.unquote(mobj.group(1)).lstrip('/')
1209 video_id = self._extract_id(url)
1210
1211 # Get video webpage
1212 self.report_video_webpage_download(video_id)
1213 url = 'https://www.youtube.com/watch?v=%s&gl=US&hl=en&has_verified=1' % video_id
1214 request = compat_urllib_request.Request(url)
1215 try:
1216 video_webpage_bytes = compat_urllib_request.urlopen(request).read()
1217 except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
1218 raise ExtractorError(u'Unable to download video webpage: %s' % compat_str(err))
1219
1220 video_webpage = video_webpage_bytes.decode('utf-8', 'ignore')
1221
1222 # Attempt to extract SWF player URL
e0df6211 1223 mobj = re.search(r'swfConfig.*?"(https?:\\/\\/.*?watch.*?-.*?\.swf)"', video_webpage)
c5e8d7af
PH
1224 if mobj is not None:
1225 player_url = re.sub(r'\\(.)', r'\1', mobj.group(1))
1226 else:
1227 player_url = None
1228
1229 # Get video info
1230 self.report_video_info_webpage_download(video_id)
c108eb73
JMF
1231 if re.search(r'player-age-gate-content">', video_webpage) is not None:
1232 self.report_age_confirmation()
1233 age_gate = True
1234 # We simulate the access to the video from www.youtube.com/v/{video_id}
1235 # this can be viewed without login into Youtube
1236 data = compat_urllib_parse.urlencode({'video_id': video_id,
1237 'el': 'embedded',
1238 'gl': 'US',
1239 'hl': 'en',
1240 'eurl': 'https://youtube.googleapis.com/v/' + video_id,
1241 'asv': 3,
1242 'sts':'1588',
1243 })
1244 video_info_url = 'https://www.youtube.com/get_video_info?' + data
c5e8d7af
PH
1245 video_info_webpage = self._download_webpage(video_info_url, video_id,
1246 note=False,
1247 errnote='unable to download video info webpage')
1248 video_info = compat_parse_qs(video_info_webpage)
c108eb73
JMF
1249 else:
1250 age_gate = False
1251 for el_type in ['&el=embedded', '&el=detailpage', '&el=vevo', '']:
1252 video_info_url = ('https://www.youtube.com/get_video_info?&video_id=%s%s&ps=default&eurl=&gl=US&hl=en'
1253 % (video_id, el_type))
1254 video_info_webpage = self._download_webpage(video_info_url, video_id,
1255 note=False,
1256 errnote='unable to download video info webpage')
1257 video_info = compat_parse_qs(video_info_webpage)
1258 if 'token' in video_info:
1259 break
c5e8d7af
PH
1260 if 'token' not in video_info:
1261 if 'reason' in video_info:
9a82b238 1262 raise ExtractorError(u'YouTube said: %s' % video_info['reason'][0], expected=True)
c5e8d7af
PH
1263 else:
1264 raise ExtractorError(u'"token" parameter not in video info for unknown reason')
1265
1266 # Check for "rental" videos
1267 if 'ypc_video_rental_bar_text' in video_info and 'author' not in video_info:
1268 raise ExtractorError(u'"rental" videos not supported')
1269
1270 # Start extracting information
1271 self.report_information_extraction(video_id)
1272
1273 # uploader
1274 if 'author' not in video_info:
1275 raise ExtractorError(u'Unable to extract uploader name')
1276 video_uploader = compat_urllib_parse.unquote_plus(video_info['author'][0])
1277
1278 # uploader_id
1279 video_uploader_id = None
1280 mobj = re.search(r'<link itemprop="url" href="http://www.youtube.com/(?:user|channel)/([^"]+)">', video_webpage)
1281 if mobj is not None:
1282 video_uploader_id = mobj.group(1)
1283 else:
1284 self._downloader.report_warning(u'unable to extract uploader nickname')
1285
1286 # title
1287 if 'title' not in video_info:
1288 raise ExtractorError(u'Unable to extract video title')
1289 video_title = compat_urllib_parse.unquote_plus(video_info['title'][0])
1290
1291 # thumbnail image
7763b04e
JMF
1292 # We try first to get a high quality image:
1293 m_thumb = re.search(r'<span itemprop="thumbnail".*?href="(.*?)">',
1294 video_webpage, re.DOTALL)
1295 if m_thumb is not None:
1296 video_thumbnail = m_thumb.group(1)
1297 elif 'thumbnail_url' not in video_info:
c5e8d7af
PH
1298 self._downloader.report_warning(u'unable to extract video thumbnail')
1299 video_thumbnail = ''
1300 else: # don't panic if we can't find it
1301 video_thumbnail = compat_urllib_parse.unquote_plus(video_info['thumbnail_url'][0])
1302
1303 # upload date
1304 upload_date = None
1305 mobj = re.search(r'id="eow-date.*?>(.*?)</span>', video_webpage, re.DOTALL)
1306 if mobj is not None:
1307 upload_date = ' '.join(re.sub(r'[/,-]', r' ', mobj.group(1)).split())
1308 upload_date = unified_strdate(upload_date)
1309
1310 # description
1311 video_description = get_element_by_id("eow-description", video_webpage)
1312 if video_description:
1313 video_description = clean_html(video_description)
1314 else:
1315 fd_mobj = re.search(r'<meta name="description" content="([^"]+)"', video_webpage)
1316 if fd_mobj:
1317 video_description = unescapeHTML(fd_mobj.group(1))
1318 else:
1319 video_description = u''
1320
1321 # subtitles
d82134c3 1322 video_subtitles = self.extract_subtitles(video_id, video_webpage)
c5e8d7af 1323
c5e8d7af 1324 if self._downloader.params.get('listsubtitles', False):
d665f8d3 1325 self._list_available_subtitles(video_id, video_webpage)
c5e8d7af
PH
1326 return
1327
1328 if 'length_seconds' not in video_info:
1329 self._downloader.report_warning(u'unable to extract video duration')
1330 video_duration = ''
1331 else:
1332 video_duration = compat_urllib_parse.unquote_plus(video_info['length_seconds'][0])
1333
c5e8d7af 1334 # Decide which formats to download
c5e8d7af
PH
1335
1336 try:
1337 mobj = re.search(r';ytplayer.config = ({.*?});', video_webpage)
50be92c1
PH
1338 if not mobj:
1339 raise ValueError('Could not find vevo ID')
c5e8d7af
PH
1340 info = json.loads(mobj.group(1))
1341 args = info['args']
7ce7e394
JMF
1342 # Easy way to know if the 's' value is in url_encoded_fmt_stream_map
1343 # this signatures are encrypted
1344 m_s = re.search(r'[&,]s=', args['url_encoded_fmt_stream_map'])
1345 if m_s is not None:
1346 self.to_screen(u'%s: Encrypted signatures detected.' % video_id)
c5e8d7af 1347 video_info['url_encoded_fmt_stream_map'] = [args['url_encoded_fmt_stream_map']]
cde846b3 1348 m_s = re.search(r'[&,]s=', args.get('adaptive_fmts', u''))
b7a68384 1349 if m_s is not None:
37b6d5f6
AZ
1350 if 'url_encoded_fmt_stream_map' in video_info:
1351 video_info['url_encoded_fmt_stream_map'][0] += ',' + args['adaptive_fmts']
1352 else:
1353 video_info['url_encoded_fmt_stream_map'] = [args['adaptive_fmts']]
211fbc13 1354 elif 'adaptive_fmts' in video_info:
37b6d5f6
AZ
1355 if 'url_encoded_fmt_stream_map' in video_info:
1356 video_info['url_encoded_fmt_stream_map'][0] += ',' + video_info['adaptive_fmts'][0]
1357 else:
1358 video_info['url_encoded_fmt_stream_map'] = video_info['adaptive_fmts']
c5e8d7af
PH
1359 except ValueError:
1360 pass
1361
1362 if 'conn' in video_info and video_info['conn'][0].startswith('rtmp'):
1363 self.report_rtmp_download()
1364 video_url_list = [(None, video_info['conn'][0])]
1365 elif 'url_encoded_fmt_stream_map' in video_info and len(video_info['url_encoded_fmt_stream_map']) >= 1:
a7055eb9
JMF
1366 if 'rtmpe%3Dyes' in video_info['url_encoded_fmt_stream_map'][0]:
1367 raise ExtractorError('rtmpe downloads are not supported, see https://github.com/rg3/youtube-dl/issues/343 for more information.', expected=True)
c5e8d7af
PH
1368 url_map = {}
1369 for url_data_str in video_info['url_encoded_fmt_stream_map'][0].split(','):
1370 url_data = compat_parse_qs(url_data_str)
1371 if 'itag' in url_data and 'url' in url_data:
1372 url = url_data['url'][0]
1373 if 'sig' in url_data:
1374 url += '&signature=' + url_data['sig'][0]
1375 elif 's' in url_data:
e0df6211 1376 encrypted_sig = url_data['s'][0]
769fda3c 1377 if self._downloader.params.get('verbose'):
c108eb73 1378 if age_gate:
e0df6211
PH
1379 player_version = self._search_regex(r'-(.+)\.swf$',
1380 player_url if player_url else 'NOT FOUND',
1381 'flash player', fatal=False)
1382 player_desc = 'flash player %s' % player_version
c108eb73 1383 else:
e0df6211 1384 player_version = self._search_regex(r'html5player-(.+?)\.js', video_webpage,
c108eb73 1385 'html5 player', fatal=False)
e0df6211
PH
1386 player_desc = u'html5 player %s' % player_version
1387
1388 parts_sizes = u'.'.join(compat_str(len(part)) for part in encrypted_sig.split('.'))
5a76c651 1389 self.to_screen(u'encrypted signature length %d (%s), itag %s, %s' %
e0df6211
PH
1390 (len(encrypted_sig), parts_sizes, url_data['itag'][0], player_desc))
1391
75952c6e 1392 if age_gate:
e0df6211 1393 jsplayer_url = None
75952c6e 1394 else:
e0df6211
PH
1395 jsplayer_url_json = self._search_regex(
1396 r'"assets":.+?"js":\s*("[^"]+")',
1397 video_webpage, u'JS player URL')
1398 jsplayer_url = json.loads(jsplayer_url_json)
1399
1400 signature = self._decrypt_signature(encrypted_sig, video_id, jsplayer_url, age_gate)
c5e8d7af
PH
1401 url += '&signature=' + signature
1402 if 'ratebypass' not in url:
1403 url += '&ratebypass=yes'
1404 url_map[url_data['itag'][0]] = url
1d043b93
JMF
1405 video_url_list = self._get_video_url_list(url_map)
1406 if not video_url_list:
c5e8d7af 1407 return
1d043b93
JMF
1408 elif video_info.get('hlsvp'):
1409 manifest_url = video_info['hlsvp'][0]
1410 url_map = self._extract_from_m3u8(manifest_url, video_id)
1411 video_url_list = self._get_video_url_list(url_map)
1412 if not video_url_list:
1413 return
1414
c5e8d7af
PH
1415 else:
1416 raise ExtractorError(u'no conn or url_encoded_fmt_stream_map information found in video info')
1417
1418 results = []
1419 for format_param, video_real_url in video_url_list:
1420 # Extension
1421 video_extension = self._video_extensions.get(format_param, 'flv')
1422
03cc7c20
JMF
1423 video_format = '{0} - {1}{2}'.format(format_param if format_param else video_extension,
1424 self._video_dimensions.get(format_param, '???'),
836a086c 1425 ' ('+self._special_itags[format_param]+')' if format_param in self._special_itags else '')
c5e8d7af
PH
1426
1427 results.append({
1428 'id': video_id,
1429 'url': video_real_url,
1430 'uploader': video_uploader,
1431 'uploader_id': video_uploader_id,
1432 'upload_date': upload_date,
1433 'title': video_title,
1434 'ext': video_extension,
1435 'format': video_format,
1436 'thumbnail': video_thumbnail,
1437 'description': video_description,
1438 'player_url': player_url,
1439 'subtitles': video_subtitles,
1440 'duration': video_duration
1441 })
1442 return results
1443
1444class YoutubePlaylistIE(InfoExtractor):
0f818663 1445 IE_DESC = u'YouTube.com playlists'
c5e8d7af
PH
1446 _VALID_URL = r"""(?:
1447 (?:https?://)?
1448 (?:\w+\.)?
1449 youtube\.com/
1450 (?:
1451 (?:course|view_play_list|my_playlists|artist|playlist|watch)
1452 \? (?:.*?&)*? (?:p|a|list)=
1453 | p/
1454 )
c626a3d9 1455 ((?:PL|EC|UU|FL)?[0-9A-Za-z-_]{10,})
c5e8d7af
PH
1456 .*
1457 |
c626a3d9 1458 ((?:PL|EC|UU|FL)[0-9A-Za-z-_]{10,})
c5e8d7af
PH
1459 )"""
1460 _TEMPLATE_URL = 'https://gdata.youtube.com/feeds/api/playlists/%s?max-results=%i&start-index=%i&v=2&alt=json&safeSearch=none'
1461 _MAX_RESULTS = 50
1462 IE_NAME = u'youtube:playlist'
1463
1464 @classmethod
1465 def suitable(cls, url):
1466 """Receives a URL and returns True if suitable for this IE."""
1467 return re.match(cls._VALID_URL, url, re.VERBOSE) is not None
1468
1469 def _real_extract(self, url):
1470 # Extract playlist id
1471 mobj = re.match(self._VALID_URL, url, re.VERBOSE)
1472 if mobj is None:
1473 raise ExtractorError(u'Invalid URL: %s' % url)
1474
1475 # Download playlist videos from API
1476 playlist_id = mobj.group(1) or mobj.group(2)
c5e8d7af
PH
1477 videos = []
1478
755eb032 1479 for page_num in itertools.count(1):
771822eb
JMF
1480 start_index = self._MAX_RESULTS * (page_num - 1) + 1
1481 if start_index >= 1000:
1482 self._downloader.report_warning(u'Max number of results reached')
1483 break
1484 url = self._TEMPLATE_URL % (playlist_id, self._MAX_RESULTS, start_index)
c5e8d7af
PH
1485 page = self._download_webpage(url, playlist_id, u'Downloading page #%s' % page_num)
1486
1487 try:
1488 response = json.loads(page)
1489 except ValueError as err:
1490 raise ExtractorError(u'Invalid JSON in API response: ' + compat_str(err))
1491
1492 if 'feed' not in response:
1493 raise ExtractorError(u'Got a malformed response from YouTube API')
1494 playlist_title = response['feed']['title']['$t']
1495 if 'entry' not in response['feed']:
1496 # Number of videos is a multiple of self._MAX_RESULTS
1497 break
1498
1499 for entry in response['feed']['entry']:
1500 index = entry['yt$position']['$t']
c215217e
JMF
1501 if 'media$group' in entry and 'yt$videoid' in entry['media$group']:
1502 videos.append((
1503 index,
1504 'https://www.youtube.com/watch?v=' + entry['media$group']['yt$videoid']['$t']
1505 ))
c5e8d7af 1506
c5e8d7af
PH
1507 videos = [v[1] for v in sorted(videos)]
1508
20c3893f 1509 url_results = [self.url_result(vurl, 'Youtube') for vurl in videos]
c5e8d7af
PH
1510 return [self.playlist_result(url_results, playlist_id, playlist_title)]
1511
1512
1513class YoutubeChannelIE(InfoExtractor):
0f818663 1514 IE_DESC = u'YouTube.com channels'
c5e8d7af
PH
1515 _VALID_URL = r"^(?:https?://)?(?:youtu\.be|(?:\w+\.)?youtube(?:-nocookie)?\.com)/channel/([0-9A-Za-z_-]+)"
1516 _TEMPLATE_URL = 'http://www.youtube.com/channel/%s/videos?sort=da&flow=list&view=0&page=%s&gl=US&hl=en'
1517 _MORE_PAGES_INDICATOR = 'yt-uix-load-more'
252580c5 1518 _MORE_PAGES_URL = 'http://www.youtube.com/c4_browse_ajax?action_load_more_videos=1&flow=list&paging=%s&view=0&sort=da&channel_id=%s'
c5e8d7af
PH
1519 IE_NAME = u'youtube:channel'
1520
1521 def extract_videos_from_page(self, page):
1522 ids_in_page = []
1523 for mobj in re.finditer(r'href="/watch\?v=([0-9A-Za-z_-]+)&?', page):
1524 if mobj.group(1) not in ids_in_page:
1525 ids_in_page.append(mobj.group(1))
1526 return ids_in_page
1527
1528 def _real_extract(self, url):
1529 # Extract channel id
1530 mobj = re.match(self._VALID_URL, url)
1531 if mobj is None:
1532 raise ExtractorError(u'Invalid URL: %s' % url)
1533
1534 # Download channel page
1535 channel_id = mobj.group(1)
1536 video_ids = []
1537 pagenum = 1
1538
1539 url = self._TEMPLATE_URL % (channel_id, pagenum)
1540 page = self._download_webpage(url, channel_id,
1541 u'Downloading page #%s' % pagenum)
1542
1543 # Extract video identifiers
1544 ids_in_page = self.extract_videos_from_page(page)
1545 video_ids.extend(ids_in_page)
1546
1547 # Download any subsequent channel pages using the json-based channel_ajax query
1548 if self._MORE_PAGES_INDICATOR in page:
755eb032 1549 for pagenum in itertools.count(1):
c5e8d7af
PH
1550 url = self._MORE_PAGES_URL % (pagenum, channel_id)
1551 page = self._download_webpage(url, channel_id,
1552 u'Downloading page #%s' % pagenum)
1553
1554 page = json.loads(page)
1555
1556 ids_in_page = self.extract_videos_from_page(page['content_html'])
1557 video_ids.extend(ids_in_page)
1558
1559 if self._MORE_PAGES_INDICATOR not in page['load_more_widget_html']:
1560 break
1561
1562 self._downloader.to_screen(u'[youtube] Channel %s: Found %i videos' % (channel_id, len(video_ids)))
1563
1564 urls = ['http://www.youtube.com/watch?v=%s' % id for id in video_ids]
20c3893f 1565 url_entries = [self.url_result(eurl, 'Youtube') for eurl in urls]
c5e8d7af
PH
1566 return [self.playlist_result(url_entries, channel_id)]
1567
1568
1569class YoutubeUserIE(InfoExtractor):
0f818663 1570 IE_DESC = u'YouTube.com user videos (URL or "ytuser" keyword)'
faab1d38 1571 _VALID_URL = r'(?:(?:(?:https?://)?(?:\w+\.)?youtube\.com/(?:user/)?)|ytuser:)(?!feed/)([A-Za-z0-9_-]+)'
c5e8d7af
PH
1572 _TEMPLATE_URL = 'http://gdata.youtube.com/feeds/api/users/%s'
1573 _GDATA_PAGE_SIZE = 50
fd9cf738 1574 _GDATA_URL = 'http://gdata.youtube.com/feeds/api/users/%s/uploads?max-results=%d&start-index=%d&alt=json'
c5e8d7af
PH
1575 IE_NAME = u'youtube:user'
1576
e3ea4790 1577 @classmethod
f4b05232 1578 def suitable(cls, url):
e3ea4790
JMF
1579 # Don't return True if the url can be extracted with other youtube
1580 # extractor, the regex would is too permissive and it would match.
1581 other_ies = iter(klass for (name, klass) in globals().items() if name.endswith('IE') and klass is not cls)
1582 if any(ie.suitable(url) for ie in other_ies): return False
f4b05232
JMF
1583 else: return super(YoutubeUserIE, cls).suitable(url)
1584
c5e8d7af
PH
1585 def _real_extract(self, url):
1586 # Extract username
1587 mobj = re.match(self._VALID_URL, url)
1588 if mobj is None:
1589 raise ExtractorError(u'Invalid URL: %s' % url)
1590
1591 username = mobj.group(1)
1592
1593 # Download video ids using YouTube Data API. Result size per
1594 # query is limited (currently to 50 videos) so we need to query
1595 # page by page until there are no video ids - it means we got
1596 # all of them.
1597
1598 video_ids = []
c5e8d7af 1599
755eb032 1600 for pagenum in itertools.count(0):
c5e8d7af
PH
1601 start_index = pagenum * self._GDATA_PAGE_SIZE + 1
1602
1603 gdata_url = self._GDATA_URL % (username, self._GDATA_PAGE_SIZE, start_index)
1604 page = self._download_webpage(gdata_url, username,
1605 u'Downloading video ids from %d to %d' % (start_index, start_index + self._GDATA_PAGE_SIZE))
1606
fd9cf738
JMF
1607 try:
1608 response = json.loads(page)
1609 except ValueError as err:
1610 raise ExtractorError(u'Invalid JSON in API response: ' + compat_str(err))
71c82637
JMF
1611 if 'entry' not in response['feed']:
1612 # Number of videos is a multiple of self._MAX_RESULTS
1613 break
fd9cf738 1614
c5e8d7af
PH
1615 # Extract video identifiers
1616 ids_in_page = []
fd9cf738
JMF
1617 for entry in response['feed']['entry']:
1618 ids_in_page.append(entry['id']['$t'].split('/')[-1])
c5e8d7af
PH
1619 video_ids.extend(ids_in_page)
1620
1621 # A little optimization - if current page is not
1622 # "full", ie. does not contain PAGE_SIZE video ids then
1623 # we can assume that this page is the last one - there
1624 # are no more ids on further pages - no need to query
1625 # again.
1626
1627 if len(ids_in_page) < self._GDATA_PAGE_SIZE:
1628 break
1629
c5e8d7af 1630 urls = ['http://www.youtube.com/watch?v=%s' % video_id for video_id in video_ids]
20c3893f 1631 url_results = [self.url_result(rurl, 'Youtube') for rurl in urls]
c5e8d7af 1632 return [self.playlist_result(url_results, playlist_title = username)]
b05654f0
PH
1633
1634class YoutubeSearchIE(SearchInfoExtractor):
0f818663 1635 IE_DESC = u'YouTube.com searches'
b05654f0
PH
1636 _API_URL = 'https://gdata.youtube.com/feeds/api/videos?q=%s&start-index=%i&max-results=50&v=2&alt=jsonc'
1637 _MAX_RESULTS = 1000
1638 IE_NAME = u'youtube:search'
1639 _SEARCH_KEY = 'ytsearch'
1640
1641 def report_download_page(self, query, pagenum):
1642 """Report attempt to download search page with given number."""
1643 self._downloader.to_screen(u'[youtube] query "%s": Downloading page %s' % (query, pagenum))
1644
1645 def _get_n_results(self, query, n):
1646 """Get a specified number of results for a query"""
1647
1648 video_ids = []
1649 pagenum = 0
1650 limit = n
1651
1652 while (50 * pagenum) < limit:
1653 self.report_download_page(query, pagenum+1)
1654 result_url = self._API_URL % (compat_urllib_parse.quote_plus(query), (50*pagenum)+1)
1655 request = compat_urllib_request.Request(result_url)
1656 try:
1657 data = compat_urllib_request.urlopen(request).read().decode('utf-8')
1658 except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
1659 raise ExtractorError(u'Unable to download API page: %s' % compat_str(err))
1660 api_response = json.loads(data)['data']
1661
1662 if not 'items' in api_response:
1663 raise ExtractorError(u'[youtube] No video results')
1664
1665 new_ids = list(video['id'] for video in api_response['items'])
1666 video_ids += new_ids
1667
1668 limit = min(n, api_response['totalItems'])
1669 pagenum += 1
1670
1671 if len(video_ids) > n:
1672 video_ids = video_ids[:n]
1673 videos = [self.url_result('http://www.youtube.com/watch?v=%s' % id, 'Youtube') for id in video_ids]
1674 return self.playlist_result(videos, query)
75dff0ee
JMF
1675
1676
1677class YoutubeShowIE(InfoExtractor):
0f818663 1678 IE_DESC = u'YouTube.com (multi-season) shows'
75dff0ee
JMF
1679 _VALID_URL = r'https?://www\.youtube\.com/show/(.*)'
1680 IE_NAME = u'youtube:show'
1681
1682 def _real_extract(self, url):
1683 mobj = re.match(self._VALID_URL, url)
1684 show_name = mobj.group(1)
1685 webpage = self._download_webpage(url, show_name, u'Downloading show webpage')
1686 # There's one playlist for each season of the show
1687 m_seasons = list(re.finditer(r'href="(/playlist\?list=.*?)"', webpage))
1688 self.to_screen(u'%s: Found %s seasons' % (show_name, len(m_seasons)))
1689 return [self.url_result('https://www.youtube.com' + season.group(1), 'YoutubePlaylist') for season in m_seasons]
04cc9617
JMF
1690
1691
b2e8bc1b 1692class YoutubeFeedsInfoExtractor(YoutubeBaseInfoExtractor):
d7ae0639
JMF
1693 """
1694 Base class for extractors that fetch info from
1695 http://www.youtube.com/feed_ajax
1696 Subclasses must define the _FEED_NAME and _PLAYLIST_TITLE properties.
1697 """
b2e8bc1b 1698 _LOGIN_REQUIRED = True
04cc9617 1699 _PAGING_STEP = 30
43ba5456
JMF
1700 # use action_load_personal_feed instead of action_load_system_feed
1701 _PERSONAL_FEED = False
04cc9617 1702
d7ae0639
JMF
1703 @property
1704 def _FEED_TEMPLATE(self):
43ba5456
JMF
1705 action = 'action_load_system_feed'
1706 if self._PERSONAL_FEED:
1707 action = 'action_load_personal_feed'
1708 return 'http://www.youtube.com/feed_ajax?%s=1&feed_name=%s&paging=%%s' % (action, self._FEED_NAME)
d7ae0639
JMF
1709
1710 @property
1711 def IE_NAME(self):
1712 return u'youtube:%s' % self._FEED_NAME
04cc9617 1713
81f0259b 1714 def _real_initialize(self):
b2e8bc1b 1715 self._login()
81f0259b 1716
04cc9617
JMF
1717 def _real_extract(self, url):
1718 feed_entries = []
1719 # The step argument is available only in 2.7 or higher
1720 for i in itertools.count(0):
1721 paging = i*self._PAGING_STEP
d7ae0639
JMF
1722 info = self._download_webpage(self._FEED_TEMPLATE % paging,
1723 u'%s feed' % self._FEED_NAME,
04cc9617
JMF
1724 u'Downloading page %s' % i)
1725 info = json.loads(info)
1726 feed_html = info['feed_html']
43ba5456 1727 m_ids = re.finditer(r'"/watch\?v=(.*?)["&]', feed_html)
04cc9617
JMF
1728 ids = orderedSet(m.group(1) for m in m_ids)
1729 feed_entries.extend(self.url_result(id, 'Youtube') for id in ids)
1730 if info['paging'] is None:
1731 break
d7ae0639
JMF
1732 return self.playlist_result(feed_entries, playlist_title=self._PLAYLIST_TITLE)
1733
1734class YoutubeSubscriptionsIE(YoutubeFeedsInfoExtractor):
1735 IE_DESC = u'YouTube.com subscriptions feed, "ytsubs" keyword(requires authentication)'
1736 _VALID_URL = r'https?://www\.youtube\.com/feed/subscriptions|:ytsubs(?:criptions)?'
1737 _FEED_NAME = 'subscriptions'
1738 _PLAYLIST_TITLE = u'Youtube Subscriptions'
1739
1740class YoutubeRecommendedIE(YoutubeFeedsInfoExtractor):
1741 IE_DESC = u'YouTube.com recommended videos, "ytrec" keyword (requires authentication)'
1742 _VALID_URL = r'https?://www\.youtube\.com/feed/recommended|:ytrec(?:ommended)?'
1743 _FEED_NAME = 'recommended'
1744 _PLAYLIST_TITLE = u'Youtube Recommended videos'
c626a3d9 1745
43ba5456
JMF
1746class YoutubeWatchLaterIE(YoutubeFeedsInfoExtractor):
1747 IE_DESC = u'Youtube watch later list, "ytwatchlater" keyword (requires authentication)'
1748 _VALID_URL = r'https?://www\.youtube\.com/feed/watch_later|:ytwatchlater'
1749 _FEED_NAME = 'watch_later'
1750 _PLAYLIST_TITLE = u'Youtube Watch Later'
1751 _PAGING_STEP = 100
1752 _PERSONAL_FEED = True
c626a3d9
JMF
1753
1754class YoutubeFavouritesIE(YoutubeBaseInfoExtractor):
1755 IE_NAME = u'youtube:favorites'
1756 IE_DESC = u'YouTube.com favourite videos, "ytfav" keyword (requires authentication)'
c7a7750d 1757 _VALID_URL = r'https?://www\.youtube\.com/my_favorites|:ytfav(?:ou?rites)?'
c626a3d9
JMF
1758 _LOGIN_REQUIRED = True
1759
1760 def _real_extract(self, url):
1761 webpage = self._download_webpage('https://www.youtube.com/my_favorites', 'Youtube Favourites videos')
1762 playlist_id = self._search_regex(r'list=(.+?)["&]', webpage, u'favourites playlist id')
1763 return self.url_result(playlist_id, 'YoutubePlaylist')