]> jfr.im git - yt-dlp.git/blame - youtube_dl/extractor/youtube.py
Restore accidentally deleted commits
[yt-dlp.git] / youtube_dl / extractor / youtube.py
CommitLineData
c5e8d7af 1# coding: utf-8
c5e8d7af 2
e0df6211
PH
3import collections
4import itertools
5import io
c5e8d7af
PH
6import json
7import netrc
8import re
9import socket
e0df6211
PH
10import string
11import struct
12import traceback
13import zlib
c5e8d7af 14
b05654f0 15from .common import InfoExtractor, SearchInfoExtractor
54d39d8b 16from .subtitles import SubtitlesInfoExtractor
c5e8d7af
PH
17from ..utils import (
18 compat_http_client,
19 compat_parse_qs,
20 compat_urllib_error,
21 compat_urllib_parse,
22 compat_urllib_request,
23 compat_str,
24
25 clean_html,
26 get_element_by_id,
27 ExtractorError,
28 unescapeHTML,
29 unified_strdate,
04cc9617 30 orderedSet,
c5e8d7af
PH
31)
32
de7f3446 33class YoutubeBaseInfoExtractor(InfoExtractor):
b2e8bc1b
JMF
34 """Provide base functions for Youtube extractors"""
35 _LOGIN_URL = 'https://accounts.google.com/ServiceLogin'
36 _LANG_URL = r'https://www.youtube.com/?hl=en&persist_hl=1&gl=US&persist_gl=1&opt_out_ackd=1'
37 _AGE_URL = 'http://www.youtube.com/verify_age?next_url=/&gl=US&hl=en'
38 _NETRC_MACHINE = 'youtube'
39 # If True it will raise an error if no login info is provided
40 _LOGIN_REQUIRED = False
41
42 def report_lang(self):
43 """Report attempt to set language."""
44 self.to_screen(u'Setting language')
45
46 def _set_language(self):
47 request = compat_urllib_request.Request(self._LANG_URL)
48 try:
49 self.report_lang()
50 compat_urllib_request.urlopen(request).read()
51 except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
52 self._downloader.report_warning(u'unable to set language: %s' % compat_str(err))
53 return False
54 return True
55
56 def _login(self):
57 (username, password) = self._get_login_info()
58 # No authentication to be performed
59 if username is None:
60 if self._LOGIN_REQUIRED:
61 raise ExtractorError(u'No login info available, needed for using %s.' % self.IE_NAME, expected=True)
62 return False
63
64 request = compat_urllib_request.Request(self._LOGIN_URL)
65 try:
66 login_page = compat_urllib_request.urlopen(request).read().decode('utf-8')
67 except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
68 self._downloader.report_warning(u'unable to fetch login page: %s' % compat_str(err))
69 return False
70
71 galx = None
72 dsh = None
73 match = re.search(re.compile(r'<input.+?name="GALX".+?value="(.+?)"', re.DOTALL), login_page)
74 if match:
75 galx = match.group(1)
76 match = re.search(re.compile(r'<input.+?name="dsh".+?value="(.+?)"', re.DOTALL), login_page)
77 if match:
78 dsh = match.group(1)
c5e8d7af 79
b2e8bc1b
JMF
80 # Log in
81 login_form_strs = {
82 u'continue': u'https://www.youtube.com/signin?action_handle_signin=true&feature=sign_in_button&hl=en_US&nomobiletemp=1',
83 u'Email': username,
84 u'GALX': galx,
85 u'Passwd': password,
86 u'PersistentCookie': u'yes',
87 u'_utf8': u'霱',
88 u'bgresponse': u'js_disabled',
89 u'checkConnection': u'',
90 u'checkedDomains': u'youtube',
91 u'dnConn': u'',
92 u'dsh': dsh,
93 u'pstMsg': u'0',
94 u'rmShown': u'1',
95 u'secTok': u'',
96 u'signIn': u'Sign in',
97 u'timeStmp': u'',
98 u'service': u'youtube',
99 u'uilel': u'3',
100 u'hl': u'en_US',
101 }
102 # Convert to UTF-8 *before* urlencode because Python 2.x's urlencode
103 # chokes on unicode
104 login_form = dict((k.encode('utf-8'), v.encode('utf-8')) for k,v in login_form_strs.items())
105 login_data = compat_urllib_parse.urlencode(login_form).encode('ascii')
106 request = compat_urllib_request.Request(self._LOGIN_URL, login_data)
107 try:
108 self.report_login()
109 login_results = compat_urllib_request.urlopen(request).read().decode('utf-8')
110 if re.search(r'(?i)<form[^>]* id="gaia_loginform"', login_results) is not None:
111 self._downloader.report_warning(u'unable to log in: bad username or password')
112 return False
113 except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
114 self._downloader.report_warning(u'unable to log in: %s' % compat_str(err))
115 return False
116 return True
117
118 def _confirm_age(self):
119 age_form = {
120 'next_url': '/',
121 'action_confirm': 'Confirm',
122 }
123 request = compat_urllib_request.Request(self._AGE_URL, compat_urllib_parse.urlencode(age_form))
124 try:
125 self.report_age_confirmation()
126 compat_urllib_request.urlopen(request).read().decode('utf-8')
127 except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
128 raise ExtractorError(u'Unable to confirm age: %s' % compat_str(err))
129 return True
130
131 def _real_initialize(self):
132 if self._downloader is None:
133 return
134 if not self._set_language():
135 return
136 if not self._login():
137 return
138 self._confirm_age()
c5e8d7af 139
8377574c 140
de7f3446 141class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor):
0f818663 142 IE_DESC = u'YouTube.com'
c5e8d7af
PH
143 _VALID_URL = r"""^
144 (
145 (?:https?://)? # http(s):// (optional)
f4b05232 146 (?:(?:(?:(?:\w+\.)?youtube(?:-nocookie)?\.com/|
e69ae5b9
JMF
147 tube\.majestyc\.net/|
148 youtube\.googleapis\.com/) # the various hostnames, with wildcard subdomains
c5e8d7af
PH
149 (?:.*?\#/)? # handle anchor (#/) redirect urls
150 (?: # the various things that can precede the ID:
151 (?:(?:v|embed|e)/) # v/ or embed/ or e/
152 |(?: # or the v= param in all its forms
d741e55a 153 (?:(?:watch|movie)(?:_popup)?(?:\.php)?)? # preceding watch(_popup|.php) or nothing (like /?v=xxxx)
c5e8d7af
PH
154 (?:\?|\#!?) # the params delimiter ? or # or #!
155 (?:.*?&)? # any other preceding param (like /?s=tuff&v=xxxx)
156 v=
157 )
f4b05232
JMF
158 ))
159 |youtu\.be/ # just youtu.be/xxxx
160 )
c5e8d7af 161 )? # all until now is optional -> you can pass the naked ID
8963d9c2 162 ([0-9A-Za-z_-]{11}) # here is it! the YouTube video ID
c5e8d7af
PH
163 (?(1).+)? # if we found the ID, everything can follow
164 $"""
c5e8d7af 165 _NEXT_URL_RE = r'[\?&]next_url=([^&]+)'
c5e8d7af 166 # Listed in order of quality
bdc6b3fc 167 _available_formats = ['38', '37', '46', '22', '45', '35', '44', '34', '18', '43', '6', '5', '36', '17', '13',
96fb5605 168 # Apple HTTP Live Streaming
bdc6b3fc 169 '96', '95', '94', '93', '92', '132', '151',
939fbd26
JMF
170 # 3D
171 '85', '84', '102', '83', '101', '82', '100',
172 # Dash video
173 '138', '137', '248', '136', '247', '135', '246',
174 '245', '244', '134', '243', '133', '242', '160',
175 # Dash audio
176 '141', '172', '140', '171', '139',
1d043b93 177 ]
bdc6b3fc 178 _available_formats_prefer_free = ['38', '46', '37', '45', '22', '44', '35', '43', '34', '18', '6', '5', '36', '17', '13',
96fb5605 179 # Apple HTTP Live Streaming
bdc6b3fc
AZ
180 '96', '95', '94', '93', '92', '132', '151',
181 # 3D
86fe61c8 182 '85', '102', '84', '101', '83', '100', '82',
939fbd26
JMF
183 # Dash video
184 '138', '248', '137', '247', '136', '246', '245',
185 '244', '135', '243', '134', '242', '133', '160',
186 # Dash audio
187 '172', '141', '171', '140', '139',
1d043b93 188 ]
bdc6b3fc
AZ
189 _video_formats_map = {
190 'flv': ['35', '34', '6', '5'],
191 '3gp': ['36', '17', '13'],
192 'mp4': ['38', '37', '22', '18'],
193 'webm': ['46', '45', '44', '43'],
194 }
c5e8d7af
PH
195 _video_extensions = {
196 '13': '3gp',
bdc6b3fc 197 '17': '3gp',
c5e8d7af
PH
198 '18': 'mp4',
199 '22': 'mp4',
bdc6b3fc 200 '36': '3gp',
c5e8d7af 201 '37': 'mp4',
d69cf69a 202 '38': 'mp4',
c5e8d7af
PH
203 '43': 'webm',
204 '44': 'webm',
205 '45': 'webm',
206 '46': 'webm',
1d043b93 207
86fe61c8
AZ
208 # 3d videos
209 '82': 'mp4',
210 '83': 'mp4',
211 '84': 'mp4',
212 '85': 'mp4',
213 '100': 'webm',
214 '101': 'webm',
215 '102': 'webm',
836a086c 216
96fb5605 217 # Apple HTTP Live Streaming
1d043b93
JMF
218 '92': 'mp4',
219 '93': 'mp4',
220 '94': 'mp4',
221 '95': 'mp4',
222 '96': 'mp4',
223 '132': 'mp4',
224 '151': 'mp4',
836a086c
AZ
225
226 # Dash mp4
227 '133': 'mp4',
228 '134': 'mp4',
229 '135': 'mp4',
230 '136': 'mp4',
231 '137': 'mp4',
232 '138': 'mp4',
233 '139': 'mp4',
234 '140': 'mp4',
235 '141': 'mp4',
236 '160': 'mp4',
237
238 # Dash webm
239 '171': 'webm',
240 '172': 'webm',
241 '242': 'webm',
242 '243': 'webm',
243 '244': 'webm',
244 '245': 'webm',
245 '246': 'webm',
246 '247': 'webm',
247 '248': 'webm',
c5e8d7af
PH
248 }
249 _video_dimensions = {
250 '5': '240x400',
251 '6': '???',
252 '13': '???',
253 '17': '144x176',
254 '18': '360x640',
255 '22': '720x1280',
256 '34': '360x640',
257 '35': '480x854',
bdc6b3fc 258 '36': '240x320',
c5e8d7af
PH
259 '37': '1080x1920',
260 '38': '3072x4096',
261 '43': '360x640',
262 '44': '480x854',
263 '45': '720x1280',
264 '46': '1080x1920',
86fe61c8
AZ
265 '82': '360p',
266 '83': '480p',
267 '84': '720p',
268 '85': '1080p',
1d043b93
JMF
269 '92': '240p',
270 '93': '360p',
271 '94': '480p',
272 '95': '720p',
273 '96': '1080p',
86fe61c8
AZ
274 '100': '360p',
275 '101': '480p',
836a086c 276 '102': '720p',
1d043b93
JMF
277 '132': '240p',
278 '151': '72p',
836a086c
AZ
279 '133': '240p',
280 '134': '360p',
281 '135': '480p',
282 '136': '720p',
283 '137': '1080p',
284 '138': '>1080p',
285 '139': '48k',
286 '140': '128k',
287 '141': '256k',
288 '160': '192p',
289 '171': '128k',
290 '172': '256k',
291 '242': '240p',
292 '243': '360p',
293 '244': '480p',
294 '245': '480p',
295 '246': '480p',
296 '247': '720p',
297 '248': '1080p',
c5e8d7af 298 }
836a086c
AZ
299 _special_itags = {
300 '82': '3D',
301 '83': '3D',
302 '84': '3D',
303 '85': '3D',
304 '100': '3D',
305 '101': '3D',
306 '102': '3D',
307 '133': 'DASH Video',
308 '134': 'DASH Video',
309 '135': 'DASH Video',
310 '136': 'DASH Video',
311 '137': 'DASH Video',
312 '138': 'DASH Video',
313 '139': 'DASH Audio',
314 '140': 'DASH Audio',
315 '141': 'DASH Audio',
316 '160': 'DASH Video',
317 '171': 'DASH Audio',
318 '172': 'DASH Audio',
319 '242': 'DASH Video',
320 '243': 'DASH Video',
321 '244': 'DASH Video',
322 '245': 'DASH Video',
323 '246': 'DASH Video',
324 '247': 'DASH Video',
325 '248': 'DASH Video',
c5e8d7af 326 }
836a086c 327
c5e8d7af 328 IE_NAME = u'youtube'
2eb88d95
PH
329 _TESTS = [
330 {
0e853ca4
PH
331 u"url": u"http://www.youtube.com/watch?v=BaW_jenozKc",
332 u"file": u"BaW_jenozKc.mp4",
333 u"info_dict": {
334 u"title": u"youtube-dl test video \"'/\\ä↭𝕐",
335 u"uploader": u"Philipp Hagemeister",
336 u"uploader_id": u"phihag",
337 u"upload_date": u"20121002",
338 u"description": u"test chars: \"'/\\ä↭𝕐\n\nThis is a test video for youtube-dl.\n\nFor more information, contact phihag@phihag.de ."
2eb88d95 339 }
0e853ca4
PH
340 },
341 {
342 u"url": u"http://www.youtube.com/watch?v=1ltcDfZMA3U",
343 u"file": u"1ltcDfZMA3U.flv",
344 u"note": u"Test VEVO video (#897)",
345 u"info_dict": {
346 u"upload_date": u"20070518",
347 u"title": u"Maps - It Will Find You",
348 u"description": u"Music video by Maps performing It Will Find You.",
349 u"uploader": u"MuteUSA",
350 u"uploader_id": u"MuteUSA"
2eb88d95 351 }
0e853ca4
PH
352 },
353 {
354 u"url": u"http://www.youtube.com/watch?v=UxxajLWwzqY",
355 u"file": u"UxxajLWwzqY.mp4",
356 u"note": u"Test generic use_cipher_signature video (#897)",
357 u"info_dict": {
358 u"upload_date": u"20120506",
359 u"title": u"Icona Pop - I Love It (feat. Charli XCX) [OFFICIAL VIDEO]",
c7bf7366 360 u"description": u"md5:3e2666e0a55044490499ea45fe9037b7",
45ed795c 361 u"uploader": u"Icona Pop",
0e853ca4 362 u"uploader_id": u"IconaPop"
2eb88d95 363 }
c108eb73
JMF
364 },
365 {
366 u"url": u"https://www.youtube.com/watch?v=07FYdnEawAQ",
367 u"file": u"07FYdnEawAQ.mp4",
368 u"note": u"Test VEVO video with age protection (#956)",
369 u"info_dict": {
370 u"upload_date": u"20130703",
371 u"title": u"Justin Timberlake - Tunnel Vision (Explicit)",
372 u"description": u"md5:64249768eec3bc4276236606ea996373",
373 u"uploader": u"justintimberlakeVEVO",
374 u"uploader_id": u"justintimberlakeVEVO"
375 }
376 },
1d043b93
JMF
377 {
378 u'url': u'https://www.youtube.com/watch?v=TGi3HqYrWHE',
379 u'file': u'TGi3HqYrWHE.mp4',
380 u'note': u'm3u8 video',
381 u'info_dict': {
382 u'title': u'Triathlon - Men - London 2012 Olympic Games',
383 u'description': u'- Men - TR02 - Triathlon - 07 August 2012 - London 2012 Olympic Games',
384 u'uploader': u'olympic',
385 u'upload_date': u'20120807',
386 u'uploader_id': u'olympic',
387 },
388 u'params': {
389 u'skip_download': True,
390 },
391 },
2eb88d95
PH
392 ]
393
c5e8d7af
PH
394
395 @classmethod
396 def suitable(cls, url):
397 """Receives a URL and returns True if suitable for this IE."""
e3ea4790 398 if YoutubePlaylistIE.suitable(url): return False
c5e8d7af
PH
399 return re.match(cls._VALID_URL, url, re.VERBOSE) is not None
400
e0df6211
PH
401 def __init__(self, *args, **kwargs):
402 super(YoutubeIE, self).__init__(*args, **kwargs)
403 self._jsplayer_cache = {}
404
c5e8d7af
PH
405 def report_video_webpage_download(self, video_id):
406 """Report attempt to download video webpage."""
407 self.to_screen(u'%s: Downloading video webpage' % video_id)
408
409 def report_video_info_webpage_download(self, video_id):
410 """Report attempt to download video info webpage."""
411 self.to_screen(u'%s: Downloading video info webpage' % video_id)
412
c5e8d7af
PH
413 def report_information_extraction(self, video_id):
414 """Report attempt to extract video information."""
415 self.to_screen(u'%s: Extracting video information' % video_id)
416
417 def report_unavailable_format(self, video_id, format):
418 """Report extracted video URL."""
419 self.to_screen(u'%s: Format %s not available' % (video_id, format))
420
421 def report_rtmp_download(self):
422 """Indicate the download will use the RTMP protocol."""
423 self.to_screen(u'RTMP download detected')
424
e0df6211
PH
425 def _extract_signature_function(self, video_id, player_url):
426 id_m = re.match(r'.*-(?P<id>[^.]+)\.(?P<ext>[^.]+)$', player_url)
427 player_type = id_m.group('ext')
428 player_id = id_m.group('id')
429
430 if player_type == 'js':
431 code = self._download_webpage(
432 player_url, video_id,
433 note=u'Downloading %s player %s' % (player_type, jsplayer_id),
434 errnote=u'Download of %s failed' % player_url)
435 return self._parse_sig_js(code)
436 elif player_tpye == 'swf':
437 urlh = self._request_webpage(
438 player_url, video_id,
439 note=u'Downloading %s player %s' % (player_type, jsplayer_id),
440 errnote=u'Download of %s failed' % player_url)
441 code = urlh.read()
442 return self._parse_sig_swf(code)
443 else:
444 assert False, 'Invalid player type %r' % player_type
445
446 def _parse_sig_js(self, jscode):
447 funcname = self._search_regex(
448 r'signature=([a-zA-Z]+)', jscode,
449 u'Initial JS player signature function name')
450
451 functions = {}
452
453 def argidx(varname):
454 return string.lowercase.index(varname)
455
456 def interpret_statement(stmt, local_vars, allow_recursion=20):
457 if allow_recursion < 0:
458 raise ExctractorError(u'Recursion limit reached')
459
460 if stmt.startswith(u'var '):
461 stmt = stmt[len(u'var '):]
462 ass_m = re.match(r'^(?P<out>[a-z]+)(?:\[(?P<index>[^\]]+)\])?' +
463 r'=(?P<expr>.*)$', stmt)
464 if ass_m:
465 if ass_m.groupdict().get('index'):
466 def assign(val):
467 lvar = local_vars[ass_m.group('out')]
468 idx = interpret_expression(ass_m.group('index'),
469 local_vars, allow_recursion)
470 assert isinstance(idx, int)
471 lvar[idx] = val
472 return val
473 expr = ass_m.group('expr')
474 else:
475 def assign(val):
476 local_vars[ass_m.group('out')] = val
477 return val
478 expr = ass_m.group('expr')
479 elif stmt.startswith(u'return '):
480 assign = lambda v: v
481 expr = stmt[len(u'return '):]
482 else:
483 raise ExtractorError(
484 u'Cannot determine left side of statement in %r' % stmt)
485
486 v = interpret_expression(expr, local_vars, allow_recursion)
487 return assign(v)
488
489 def interpret_expression(expr, local_vars, allow_recursion):
490 if expr.isdigit():
491 return int(expr)
492
493 if expr.isalpha():
494 return local_vars[expr]
495
496 m = re.match(r'^(?P<in>[a-z]+)\.(?P<member>.*)$', expr)
497 if m:
498 member = m.group('member')
499 val = local_vars[m.group('in')]
500 if member == 'split("")':
501 return list(val)
502 if member == 'join("")':
503 return u''.join(val)
504 if member == 'length':
505 return len(val)
506 if member == 'reverse()':
507 return val[::-1]
508 slice_m = re.match(r'slice\((?P<idx>.*)\)', member)
509 if slice_m:
510 idx = interpret_expression(
511 slice_m.group('idx'), local_vars, allow_recursion-1)
512 return val[idx:]
513
514 m = re.match(
515 r'^(?P<in>[a-z]+)\[(?P<idx>.+)\]$', expr)
516 if m:
517 val = local_vars[m.group('in')]
518 idx = interpret_expression(m.group('idx'), local_vars,
519 allow_recursion-1)
520 return val[idx]
521
522 m = re.match(r'^(?P<a>.+?)(?P<op>[%])(?P<b>.+?)$', expr)
523 if m:
524 a = interpret_expression(m.group('a'),
525 local_vars, allow_recursion)
526 b = interpret_expression(m.group('b'),
527 local_vars, allow_recursion)
528 return a % b
529
530 m = re.match(
531 r'^(?P<func>[a-zA-Z]+)\((?P<args>[a-z0-9,]+)\)$', expr)
532 if m:
533 fname = m.group('func')
534 if fname not in functions:
535 functions[fname] = extract_function(fname)
536 argvals = [int(v) if v.isdigit() else local_vars[v]
537 for v in m.group('args').split(',')]
538 return functions[fname](argvals)
539 raise ExtractorError(u'Unsupported JS expression %r' % expr)
540
541 def extract_function(funcname):
542 func_m = re.search(
543 r'function ' + re.escape(funcname) +
544 r'\((?P<args>[a-z,]+)\){(?P<code>[^}]+)}',
545 jscode)
546 argnames = func_m.group('args').split(',')
547
548 def resf(args):
549 local_vars = dict(zip(argnames, args))
550 for stmt in func_m.group('code').split(';'):
551 res = interpret_statement(stmt, local_vars)
552 return res
553 return resf
554
555 initial_function = extract_function(funcname)
556 return lambda s: initial_function([s])
557
558 def _parse_sig_swf(self, file_contents):
559 if file_contents[1:3] != b'WS':
560 raise ExtractorError(
561 u'Not an SWF file; header is %r' % file_contents[:3])
562 if file_contents[:1] == b'C':
563 content = zlib.decompress(file_contents[8:])
564 else:
565 raise NotImplementedError(u'Unsupported compression format %r' %
566 file_contents[:1])
567
568 def extract_tags(content):
569 pos = 0
570 while pos < len(content):
571 header16 = struct.unpack('<H', content[pos:pos+2])[0]
572 pos += 2
573 tag_code = header16 >> 6
574 tag_len = header16 & 0x3f
575 if tag_len == 0x3f:
576 tag_len = struct.unpack('<I', content[pos:pos+4])[0]
577 pos += 4
578 assert pos+tag_len <= len(content)
579 yield (tag_code, content[pos:pos+tag_len])
580 pos += tag_len
581
582 code_tag = next(tag
583 for tag_code, tag in extract_tags(content)
584 if tag_code == 82)
585 p = code_tag.index(b'\0', 4) + 1
586
587 # Parse ABC (AVM2 ByteCode)
588 def read_int(data=None, pos=None):
589 if hasattr(data, 'read'):
590 assert pos is None
591
592 res = 0
593 shift = 0
594 for _ in range(5):
595 buf = data.read(1)
596 assert len(buf) == 1
597 b = struct.unpack('<B', buf)[0]
598 res = res | ((b & 0x7f) << shift)
599 if b & 0x80 == 0:
600 break
601 shift += 7
602 return res
603
604 if data is None:
605 data = code_tag
606 if pos is None:
607 pos = p
608 res = 0
609 shift = 0
610 for _ in range(5):
611 b = struct.unpack('<B', data[pos:pos+1])[0]
612 pos += 1
613 res = res | ((b & 0x7f) << shift)
614 if b & 0x80 == 0:
615 break
616 shift += 7
617 return (res, pos)
618 assert read_int(b'\x00', 0) == (0, 1)
619 assert read_int(b'\x10', 0) == (16, 1)
620 assert read_int(b'\x34', 0) == (0x34, 1)
621 assert read_int(b'\xb4\x12', 0) == (0x12 * 0x80 + 0x34, 2)
622 assert read_int(b'\xff\xff\xff\x00', 0) == (0x1fffff, 4)
623
624 def u30(*args, **kwargs):
625 res = read_int(*args, **kwargs)
626 if isinstance(res, tuple):
627 assert res[0] & 0xf0000000 == 0
628 else:
629 assert res & 0xf0000000 == 0
630 return res
631 u32 = read_int
632
633 def s32(data=None, pos=None):
634 v, pos = read_int(data, pos)
635 if v & 0x80000000 != 0:
636 v = - ((v ^ 0xffffffff) + 1)
637 return (v, pos)
638 assert s32(b'\xff\xff\xff\xff\x0f', 0) == (-1, 5)
639
640 def string():
641 slen, p = u30()
642 return (code_tag[p:p+slen].decode('utf-8'), p + slen)
643
644 def read_byte(data=None, pos=None):
645 if data is None:
646 data = code_tag
647 if pos is None:
648 pos = p
649 res = struct.unpack('<B', data[pos:pos+1])[0]
650 return (res, pos + 1)
651
652 # minor_version + major_version
653 p += 2 + 2
654
655 # Constant pool
656 int_count, p = u30()
657 for _c in range(1, int_count):
658 _, p = s32()
659 uint_count, p = u30()
660 for _c in range(1, uint_count):
661 _, p = u32()
662 double_count, p = u30()
663 p += (double_count-1) * 8
664 string_count, p = u30()
665 constant_strings = [u'']
666 for _c in range(1, string_count):
667 s, p = string()
668 constant_strings.append(s)
669 namespace_count, p = u30()
670 for _c in range(1, namespace_count):
671 p += 1 # kind
672 _, p = u30() # name
673 ns_set_count, p = u30()
674 for _c in range(1, ns_set_count):
675 count, p = u30()
676 for _c2 in range(count):
677 _, p = u30()
678 multiname_count, p = u30()
679 MULTINAME_SIZES = {
680 0x07: 2, # QName
681 0x0d: 2, # QNameA
682 0x0f: 1, # RTQName
683 0x10: 1, # RTQNameA
684 0x11: 0, # RTQNameL
685 0x12: 0, # RTQNameLA
686 0x09: 2, # Multiname
687 0x0e: 2, # MultinameA
688 0x1b: 1, # MultinameL
689 0x1c: 1, # MultinameLA
690 }
691 multinames = [u'']
692 for _c in range(1, multiname_count):
693 kind, p = u30()
694 assert kind in MULTINAME_SIZES, u'Invalid multiname kind %r' % kind
695 if kind == 0x07:
696 namespace_idx, p = u30()
697 name_idx, p = u30()
698 multinames.append(constant_strings[name_idx])
699 else:
700 multinames.append('[MULTINAME kind: %d]' % kind)
701 for _c2 in range(MULTINAME_SIZES[kind]):
702 _, p = u30()
703
704 # Methods
705 method_count, p = u30()
706 MethodInfo = collections.namedtuple(
707 'MethodInfo',
708 ['NEED_ARGUMENTS', 'NEED_REST'])
709 method_infos = []
710 for method_id in range(method_count):
711 param_count, p = u30()
712 _, p = u30() # return type
713 for _ in range(param_count):
714 _, p = u30() # param type
715 _, p = u30() # name index (always 0 for youtube)
716 flags, p = read_byte()
717 if flags & 0x08 != 0:
718 # Options present
719 option_count, p = u30()
720 for c in range(option_count):
721 _, p = u30() # val
722 p += 1 # kind
723 if flags & 0x80 != 0:
724 # Param names present
725 for _ in range(param_count):
726 _, p = u30() # param name
727 mi = MethodInfo(flags & 0x01 != 0, flags & 0x04 != 0)
728 method_infos.append(mi)
729
730 # Metadata
731 metadata_count, p = u30()
732 for _c in range(metadata_count):
733 _, p = u30() # name
734 item_count, p = u30()
735 for _c2 in range(item_count):
736 _, p = u30() # key
737 _, p = u30() # value
738
739 def parse_traits_info(pos=None):
740 if pos is None:
741 pos = p
742 trait_name_idx, pos = u30(pos=pos)
743 kind_full, pos = read_byte(pos=pos)
744 kind = kind_full & 0x0f
745 attrs = kind_full >> 4
746 methods = {}
747 if kind in [0x00, 0x06]: # Slot or Const
748 _, pos = u30(pos=pos) # Slot id
749 type_name_idx, pos = u30(pos=pos)
750 vindex, pos = u30(pos=pos)
751 if vindex != 0:
752 _, pos = read_byte(pos=pos) # vkind
753 elif kind in [0x01, 0x02, 0x03]: # Method / Getter / Setter
754 _, pos = u30(pos=pos) # disp_id
755 method_idx, pos = u30(pos=pos)
756 methods[multinames[trait_name_idx]] = method_idx
757 elif kind == 0x04: # Class
758 _, pos = u30(pos=pos) # slot_id
759 _, pos = u30(pos=pos) # classi
760 elif kind == 0x05: # Function
761 _, pos = u30(pos=pos) # slot_id
762 function_idx, pos = u30(pos=pos)
763 methods[function_idx] = multinames[trait_name_idx]
764 else:
765 raise ExtractorError(u'Unsupported trait kind %d' % kind)
766
767 if attrs & 0x4 != 0: # Metadata present
768 metadata_count, pos = u30(pos=pos)
769 for _c3 in range(metadata_count):
770 _, pos = u30(pos=pos)
771
772 return (methods, pos)
773
774 # Classes
775 TARGET_CLASSNAME = u'SignatureDecipher'
776 searched_idx = multinames.index(TARGET_CLASSNAME)
777 searched_class_id = None
778 class_count, p = u30()
779 for class_id in range(class_count):
780 name_idx, p = u30()
781 if name_idx == searched_idx:
782 # We found the class we're looking for!
783 searched_class_id = class_id
784 _, p = u30() # super_name idx
785 flags, p = read_byte()
786 if flags & 0x08 != 0: # Protected namespace is present
787 protected_ns_idx, p = u30()
788 intrf_count, p = u30()
789 for _c2 in range(intrf_count):
790 _, p = u30()
791 _, p = u30() # iinit
792 trait_count, p = u30()
793 for _c2 in range(trait_count):
794 _, p = parse_traits_info()
795
796 if searched_class_id is None:
797 raise ExtractorError(u'Target class %r not found' %
798 TARGET_CLASSNAME)
799
800 method_names = {}
801 method_idxs = {}
802 for class_id in range(class_count):
803 _, p = u30() # cinit
804 trait_count, p = u30()
805 for _c2 in range(trait_count):
806 trait_methods, p = parse_traits_info()
807 if class_id == searched_class_id:
808 method_names.update(trait_methods.items())
809 method_idxs.update(dict(
810 (idx, name)
811 for name, idx in trait_methods.items()))
812
813 # Scripts
814 script_count, p = u30()
815 for _c in range(script_count):
816 _, p = u30() # init
817 trait_count, p = u30()
818 for _c2 in range(trait_count):
819 _, p = parse_traits_info()
820
821 # Method bodies
822 method_body_count, p = u30()
823 Method = collections.namedtuple('Method', ['code', 'local_count'])
824 methods = {}
825 for _c in range(method_body_count):
826 method_idx, p = u30()
827 max_stack, p = u30()
828 local_count, p = u30()
829 init_scope_depth, p = u30()
830 max_scope_depth, p = u30()
831 code_length, p = u30()
832 if method_idx in method_idxs:
833 m = Method(code_tag[p:p+code_length], local_count)
834 methods[method_idxs[method_idx]] = m
835 p += code_length
836 exception_count, p = u30()
837 for _c2 in range(exception_count):
838 _, p = u30() # from
839 _, p = u30() # to
840 _, p = u30() # target
841 _, p = u30() # exc_type
842 _, p = u30() # var_name
843 trait_count, p = u30()
844 for _c2 in range(trait_count):
845 _, p = parse_traits_info()
846
847 assert p == len(code_tag)
848 assert len(methods) == len(method_idxs)
849
850 method_pyfunctions = {}
851
852 def extract_function(func_name):
853 if func_name in method_pyfunctions:
854 return method_pyfunctions[func_name]
855 if func_name not in methods:
856 raise ExtractorError(u'Cannot find function %r' % func_name)
857 m = methods[func_name]
858
859 def resfunc(args):
860 print('Entering function %s(%r)' % (func_name, args))
861 registers = ['(this)'] + list(args) + [None] * m.local_count
862 stack = []
863 coder = io.BytesIO(m.code)
864 while True:
865 opcode = struct.unpack('!B', coder.read(1))[0]
866 if opcode == 208: # getlocal_0
867 stack.append(registers[0])
868 elif opcode == 209: # getlocal_1
869 stack.append(registers[1])
870 elif opcode == 210: # getlocal_2
871 stack.append(registers[2])
872 elif opcode == 36: # pushbyte
873 v = struct.unpack('!B', coder.read(1))[0]
874 stack.append(v)
875 elif opcode == 44: # pushstring
876 idx = u30(coder)
877 stack.append(constant_strings[idx])
878 elif opcode == 48: # pushscope
879 # We don't implement the scope register, so we'll just
880 # ignore the popped value
881 stack.pop()
882 elif opcode == 70: # callproperty
883 index = u30(coder)
884 mname = multinames[index]
885 arg_count = u30(coder)
886 args = list(reversed(
887 [stack.pop() for _ in range(arg_count)]))
888 obj = stack.pop()
889 if mname == u'split':
890 assert len(args) == 1
891 assert isinstance(args[0], compat_str)
892 assert isinstance(obj, compat_str)
893 if args[0] == u'':
894 res = list(obj)
895 else:
896 res = obj.split(args[0])
897 stack.append(res)
898 elif mname in method_pyfunctions:
899 stack.append(method_pyfunctions[mname](args))
900 else:
901 raise NotImplementedError(
902 u'Unsupported property %r on %r'
903 % (mname, obj))
904 elif opcode == 93: # findpropstrict
905 index = u30(coder)
906 mname = multinames[index]
907 res = extract_function(mname)
908 stack.append(res)
909 elif opcode == 97: # setproperty
910 index = u30(coder)
911 value = stack.pop()
912 idx = stack.pop()
913 obj = stack.pop()
914 assert isinstance(obj, list)
915 assert isinstance(idx, int)
916 obj[idx] = value
917 elif opcode == 98: # getlocal
918 index = u30(coder)
919 stack.append(registers[index])
920 elif opcode == 99: # setlocal
921 index = u30(coder)
922 value = stack.pop()
923 registers[index] = value
924 elif opcode == 102: # getproperty
925 index = u30(coder)
926 pname = multinames[index]
927 if pname == u'length':
928 obj = stack.pop()
929 assert isinstance(obj, list)
930 stack.append(len(obj))
931 else: # Assume attribute access
932 idx = stack.pop()
933 assert isinstance(idx, int)
934 obj = stack.pop()
935 assert isinstance(obj, list)
936 stack.append(obj[idx])
937 elif opcode == 128: # coerce
938 _ = u30(coder)
939 elif opcode == 133: # coerce_s
940 assert isinstance(stack[-1], (type(None), compat_str))
941 elif opcode == 164: # modulo
942 value2 = stack.pop()
943 value1 = stack.pop()
944 res = value1 % value2
945 stack.append(res)
946 elif opcode == 214: # setlocal_2
947 registers[2] = stack.pop()
948 elif opcode == 215: # setlocal_3
949 registers[3] = stack.pop()
950 else:
951 raise NotImplementedError(
952 u'Unsupported opcode %d' % opcode)
953
954 method_pyfunctions[func_name] = resfunc
955 return resfunc
956
957 initial_function = extract_function(u'decipher')
958 return lambda s: initial_function([s])
959
960 def _decrypt_signature(self, s, video_id, jsplayer_url, age_gate=False):
257a2501 961 """Turn the encrypted s field into a working signature"""
6b37f0be 962
e0df6211
PH
963 if jsplayer_url is not None:
964 try:
965 if jsplayer_url not in self._jsplayer_cache:
966 self._jsplayer_cache[jsplayer_url] = self._extract_signature_function(
967 video_id, jsplayer_url
968 )
969 return self._jsplayer_cache[jsplayer_url]([s])
970 except Exception as e:
971 tb = traceback.format_exc()
972 self._downloader.report_warning(u'Automatic signature extraction failed: ' + tb)
973
974 self._downloader.report_warning(u'Warning: Falling back to static signature algorithm')
975
976 if age_gate:
977 # The videos with age protection use another player, so the
978 # algorithms can be different.
979 if len(s) == 86:
980 return s[2:63] + s[82] + s[64:82] + s[63]
981
982 if len(s) == 92:
444b1165
JMF
983 return s[25] + s[3:25] + s[0] + s[26:42] + s[79] + s[43:79] + s[91] + s[80:83]
984 elif len(s) == 90:
985 return s[25] + s[3:25] + s[2] + s[26:40] + s[77] + s[41:77] + s[89] + s[78:81]
8a9d86a2 986 elif len(s) == 89:
987 return s[84:78:-1] + s[87] + s[77:60:-1] + s[0] + s[59:3:-1]
444b1165 988 elif len(s) == 88:
3e223834 989 return s[7:28] + s[87] + s[29:45] + s[55] + s[46:55] + s[2] + s[56:87] + s[28]
be547e1d 990 elif len(s) == 87:
3a725669 991 return s[6:27] + s[4] + s[28:39] + s[27] + s[40:59] + s[2] + s[60:]
be547e1d 992 elif len(s) == 86:
1cf911bc 993 return s[5:34] + s[0] + s[35:38] + s[3] + s[39:45] + s[38] + s[46:53] + s[73] + s[54:73] + s[85] + s[74:85] + s[53]
be547e1d 994 elif len(s) == 85:
6ae8ee3f 995 return s[3:11] + s[0] + s[12:55] + s[84] + s[56:84]
be547e1d 996 elif len(s) == 84:
23b00bc0 997 return s[81:36:-1] + s[0] + s[35:2:-1]
be547e1d 998 elif len(s) == 83:
e1842025 999 return s[81:64:-1] + s[82] + s[63:52:-1] + s[45] + s[51:45:-1] + s[1] + s[44:1:-1] + s[0]
be547e1d 1000 elif len(s) == 82:
ce85f022 1001 return s[80:73:-1] + s[81] + s[72:54:-1] + s[2] + s[53:43:-1] + s[0] + s[42:2:-1] + s[43] + s[1] + s[54]
be547e1d 1002 elif len(s) == 81:
aedd6bb9 1003 return s[56] + s[79:56:-1] + s[41] + s[55:41:-1] + s[80] + s[40:34:-1] + s[0] + s[33:29:-1] + s[34] + s[28:9:-1] + s[29] + s[8:0:-1] + s[9]
066090dd
JMF
1004 elif len(s) == 80:
1005 return s[1:19] + s[0] + s[20:68] + s[19] + s[69:80]
5c468ca8
JMF
1006 elif len(s) == 79:
1007 return s[54] + s[77:54:-1] + s[39] + s[53:39:-1] + s[78] + s[38:34:-1] + s[0] + s[33:29:-1] + s[34] + s[28:9:-1] + s[29] + s[8:0:-1] + s[9]
be547e1d
PH
1008
1009 else:
1010 raise ExtractorError(u'Unable to decrypt signature, key length %d not supported; retrying might work' % (len(s)))
c5e8d7af 1011
75952c6e
JMF
1012 def _decrypt_signature_age_gate(self, s):
1013 # The videos with age protection use another player, so the algorithms
1014 # can be different.
1015 if len(s) == 86:
1016 return s[2:63] + s[82] + s[64:82] + s[63]
1017 else:
1018 # Fallback to the other algortihms
b072a9de 1019 return self._decrypt_signature(s)
c5e8d7af 1020
de7f3446 1021 def _get_available_subtitles(self, video_id):
de7f3446 1022 try:
7fad1c63
JMF
1023 sub_list = self._download_webpage(
1024 'http://video.google.com/timedtext?hl=en&type=list&v=%s' % video_id,
1025 video_id, note=False)
1026 except ExtractorError as err:
de7f3446
JMF
1027 self._downloader.report_warning(u'unable to download video subtitles: %s' % compat_str(err))
1028 return {}
1029 lang_list = re.findall(r'name="([^"]*)"[^>]+lang_code="([\w\-]+)"', sub_list)
1030
1031 sub_lang_list = {}
1032 for l in lang_list:
1033 lang = l[1]
1034 params = compat_urllib_parse.urlencode({
1035 'lang': lang,
1036 'v': video_id,
1037 'fmt': self._downloader.params.get('subtitlesformat'),
1038 })
1039 url = u'http://www.youtube.com/api/timedtext?' + params
1040 sub_lang_list[lang] = url
1041 if not sub_lang_list:
1042 self._downloader.report_warning(u'video doesn\'t have subtitles')
1043 return {}
1044 return sub_lang_list
1045
055e6f36 1046 def _get_available_automatic_caption(self, video_id, webpage):
de7f3446
JMF
1047 """We need the webpage for getting the captions url, pass it as an
1048 argument to speed up the process."""
de7f3446
JMF
1049 sub_format = self._downloader.params.get('subtitlesformat')
1050 self.to_screen(u'%s: Looking for automatic captions' % video_id)
1051 mobj = re.search(r';ytplayer.config = ({.*?});', webpage)
055e6f36 1052 err_msg = u'Couldn\'t find automatic captions for %s' % video_id
de7f3446
JMF
1053 if mobj is None:
1054 self._downloader.report_warning(err_msg)
1055 return {}
1056 player_config = json.loads(mobj.group(1))
1057 try:
1058 args = player_config[u'args']
1059 caption_url = args[u'ttsurl']
1060 timestamp = args[u'timestamp']
055e6f36
JMF
1061 # We get the available subtitles
1062 list_params = compat_urllib_parse.urlencode({
1063 'type': 'list',
1064 'tlangs': 1,
1065 'asrs': 1,
de7f3446 1066 })
055e6f36
JMF
1067 list_url = caption_url + '&' + list_params
1068 list_page = self._download_webpage(list_url, video_id)
1069 caption_list = xml.etree.ElementTree.fromstring(list_page.encode('utf-8'))
e3dc22ca
JMF
1070 original_lang_node = caption_list.find('track')
1071 if original_lang_node.attrib.get('kind') != 'asr' :
1072 self._downloader.report_warning(u'Video doesn\'t have automatic captions')
1073 return {}
1074 original_lang = original_lang_node.attrib['lang_code']
055e6f36
JMF
1075
1076 sub_lang_list = {}
1077 for lang_node in caption_list.findall('target'):
1078 sub_lang = lang_node.attrib['lang_code']
1079 params = compat_urllib_parse.urlencode({
1080 'lang': original_lang,
1081 'tlang': sub_lang,
1082 'fmt': sub_format,
1083 'ts': timestamp,
1084 'kind': 'asr',
1085 })
1086 sub_lang_list[sub_lang] = caption_url + '&' + params
1087 return sub_lang_list
de7f3446
JMF
1088 # An extractor error can be raise by the download process if there are
1089 # no automatic captions but there are subtitles
1090 except (KeyError, ExtractorError):
1091 self._downloader.report_warning(err_msg)
1092 return {}
1093
c5e8d7af
PH
1094 def _print_formats(self, formats):
1095 print('Available formats:')
1096 for x in formats:
03cc7c20
JMF
1097 print('%s\t:\t%s\t[%s]%s' %(x, self._video_extensions.get(x, 'flv'),
1098 self._video_dimensions.get(x, '???'),
836a086c 1099 ' ('+self._special_itags[x]+')' if x in self._special_itags else ''))
c5e8d7af
PH
1100
1101 def _extract_id(self, url):
1102 mobj = re.match(self._VALID_URL, url, re.VERBOSE)
1103 if mobj is None:
1104 raise ExtractorError(u'Invalid URL: %s' % url)
1105 video_id = mobj.group(2)
1106 return video_id
1107
1d043b93
JMF
1108 def _get_video_url_list(self, url_map):
1109 """
1110 Transform a dictionary in the format {itag:url} to a list of (itag, url)
1111 with the requested formats.
1112 """
1113 req_format = self._downloader.params.get('format', None)
1114 format_limit = self._downloader.params.get('format_limit', None)
1115 available_formats = self._available_formats_prefer_free if self._downloader.params.get('prefer_free_formats', False) else self._available_formats
1116 if format_limit is not None and format_limit in available_formats:
1117 format_list = available_formats[available_formats.index(format_limit):]
1118 else:
1119 format_list = available_formats
1120 existing_formats = [x for x in format_list if x in url_map]
1121 if len(existing_formats) == 0:
1122 raise ExtractorError(u'no known formats available for video')
1123 if self._downloader.params.get('listformats', None):
1124 self._print_formats(existing_formats)
1125 return
1126 if req_format is None or req_format == 'best':
1127 video_url_list = [(existing_formats[0], url_map[existing_formats[0]])] # Best quality
1128 elif req_format == 'worst':
1129 video_url_list = [(existing_formats[-1], url_map[existing_formats[-1]])] # worst quality
1130 elif req_format in ('-1', 'all'):
1131 video_url_list = [(f, url_map[f]) for f in existing_formats] # All formats
1132 else:
1133 # Specific formats. We pick the first in a slash-delimeted sequence.
bdc6b3fc
AZ
1134 # Format can be specified as itag or 'mp4' or 'flv' etc. We pick the highest quality
1135 # available in the specified format. For example,
1136 # if '1/2/3/4' is requested and '2' and '4' are available, we pick '2'.
1137 # if '1/mp4/3/4' is requested and '1' and '5' (is a mp4) are available, we pick '1'.
1138 # if '1/mp4/3/4' is requested and '4' and '5' (is a mp4) are available, we pick '5'.
1d043b93
JMF
1139 req_formats = req_format.split('/')
1140 video_url_list = None
1141 for rf in req_formats:
1142 if rf in url_map:
1143 video_url_list = [(rf, url_map[rf])]
1144 break
bdc6b3fc
AZ
1145 if rf in self._video_formats_map:
1146 for srf in self._video_formats_map[rf]:
1147 if srf in url_map:
1148 video_url_list = [(srf, url_map[srf])]
1149 break
1150 else:
1151 continue
1152 break
1d043b93
JMF
1153 if video_url_list is None:
1154 raise ExtractorError(u'requested format not available')
1155 return video_url_list
1156
1157 def _extract_from_m3u8(self, manifest_url, video_id):
1158 url_map = {}
1159 def _get_urls(_manifest):
1160 lines = _manifest.split('\n')
1161 urls = filter(lambda l: l and not l.startswith('#'),
1162 lines)
1163 return urls
1164 manifest = self._download_webpage(manifest_url, video_id, u'Downloading formats manifest')
1165 formats_urls = _get_urls(manifest)
1166 for format_url in formats_urls:
890f62e8 1167 itag = self._search_regex(r'itag/(\d+?)/', format_url, 'itag')
1d043b93
JMF
1168 url_map[itag] = format_url
1169 return url_map
1170
c5e8d7af 1171 def _real_extract(self, url):
d7f44b5b
PH
1172 if re.match(r'(?:https?://)?[^/]+/watch\?feature=[a-z_]+$', url):
1173 self._downloader.report_warning(u'Did you forget to quote the URL? Remember that & is a meta-character in most shells, so you want to put the URL in quotes, like youtube-dl \'http://www.youtube.com/watch?feature=foo&v=BaW_jenozKc\' (or simply youtube-dl BaW_jenozKc ).')
1174
c5e8d7af
PH
1175 # Extract original video URL from URL with redirection, like age verification, using next_url parameter
1176 mobj = re.search(self._NEXT_URL_RE, url)
1177 if mobj:
1178 url = 'https://www.youtube.com/' + compat_urllib_parse.unquote(mobj.group(1)).lstrip('/')
1179 video_id = self._extract_id(url)
1180
1181 # Get video webpage
1182 self.report_video_webpage_download(video_id)
1183 url = 'https://www.youtube.com/watch?v=%s&gl=US&hl=en&has_verified=1' % video_id
1184 request = compat_urllib_request.Request(url)
1185 try:
1186 video_webpage_bytes = compat_urllib_request.urlopen(request).read()
1187 except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
1188 raise ExtractorError(u'Unable to download video webpage: %s' % compat_str(err))
1189
1190 video_webpage = video_webpage_bytes.decode('utf-8', 'ignore')
1191
1192 # Attempt to extract SWF player URL
e0df6211 1193 mobj = re.search(r'swfConfig.*?"(https?:\\/\\/.*?watch.*?-.*?\.swf)"', video_webpage)
c5e8d7af
PH
1194 if mobj is not None:
1195 player_url = re.sub(r'\\(.)', r'\1', mobj.group(1))
1196 else:
1197 player_url = None
1198
1199 # Get video info
1200 self.report_video_info_webpage_download(video_id)
c108eb73
JMF
1201 if re.search(r'player-age-gate-content">', video_webpage) is not None:
1202 self.report_age_confirmation()
1203 age_gate = True
1204 # We simulate the access to the video from www.youtube.com/v/{video_id}
1205 # this can be viewed without login into Youtube
1206 data = compat_urllib_parse.urlencode({'video_id': video_id,
1207 'el': 'embedded',
1208 'gl': 'US',
1209 'hl': 'en',
1210 'eurl': 'https://youtube.googleapis.com/v/' + video_id,
1211 'asv': 3,
1212 'sts':'1588',
1213 })
1214 video_info_url = 'https://www.youtube.com/get_video_info?' + data
c5e8d7af
PH
1215 video_info_webpage = self._download_webpage(video_info_url, video_id,
1216 note=False,
1217 errnote='unable to download video info webpage')
1218 video_info = compat_parse_qs(video_info_webpage)
c108eb73
JMF
1219 else:
1220 age_gate = False
1221 for el_type in ['&el=embedded', '&el=detailpage', '&el=vevo', '']:
1222 video_info_url = ('https://www.youtube.com/get_video_info?&video_id=%s%s&ps=default&eurl=&gl=US&hl=en'
1223 % (video_id, el_type))
1224 video_info_webpage = self._download_webpage(video_info_url, video_id,
1225 note=False,
1226 errnote='unable to download video info webpage')
1227 video_info = compat_parse_qs(video_info_webpage)
1228 if 'token' in video_info:
1229 break
c5e8d7af
PH
1230 if 'token' not in video_info:
1231 if 'reason' in video_info:
9a82b238 1232 raise ExtractorError(u'YouTube said: %s' % video_info['reason'][0], expected=True)
c5e8d7af
PH
1233 else:
1234 raise ExtractorError(u'"token" parameter not in video info for unknown reason')
1235
1236 # Check for "rental" videos
1237 if 'ypc_video_rental_bar_text' in video_info and 'author' not in video_info:
1238 raise ExtractorError(u'"rental" videos not supported')
1239
1240 # Start extracting information
1241 self.report_information_extraction(video_id)
1242
1243 # uploader
1244 if 'author' not in video_info:
1245 raise ExtractorError(u'Unable to extract uploader name')
1246 video_uploader = compat_urllib_parse.unquote_plus(video_info['author'][0])
1247
1248 # uploader_id
1249 video_uploader_id = None
1250 mobj = re.search(r'<link itemprop="url" href="http://www.youtube.com/(?:user|channel)/([^"]+)">', video_webpage)
1251 if mobj is not None:
1252 video_uploader_id = mobj.group(1)
1253 else:
1254 self._downloader.report_warning(u'unable to extract uploader nickname')
1255
1256 # title
1257 if 'title' not in video_info:
1258 raise ExtractorError(u'Unable to extract video title')
1259 video_title = compat_urllib_parse.unquote_plus(video_info['title'][0])
1260
1261 # thumbnail image
7763b04e
JMF
1262 # We try first to get a high quality image:
1263 m_thumb = re.search(r'<span itemprop="thumbnail".*?href="(.*?)">',
1264 video_webpage, re.DOTALL)
1265 if m_thumb is not None:
1266 video_thumbnail = m_thumb.group(1)
1267 elif 'thumbnail_url' not in video_info:
c5e8d7af
PH
1268 self._downloader.report_warning(u'unable to extract video thumbnail')
1269 video_thumbnail = ''
1270 else: # don't panic if we can't find it
1271 video_thumbnail = compat_urllib_parse.unquote_plus(video_info['thumbnail_url'][0])
1272
1273 # upload date
1274 upload_date = None
1275 mobj = re.search(r'id="eow-date.*?>(.*?)</span>', video_webpage, re.DOTALL)
1276 if mobj is not None:
1277 upload_date = ' '.join(re.sub(r'[/,-]', r' ', mobj.group(1)).split())
1278 upload_date = unified_strdate(upload_date)
1279
1280 # description
1281 video_description = get_element_by_id("eow-description", video_webpage)
1282 if video_description:
1283 video_description = clean_html(video_description)
1284 else:
1285 fd_mobj = re.search(r'<meta name="description" content="([^"]+)"', video_webpage)
1286 if fd_mobj:
1287 video_description = unescapeHTML(fd_mobj.group(1))
1288 else:
1289 video_description = u''
1290
1291 # subtitles
d82134c3 1292 video_subtitles = self.extract_subtitles(video_id, video_webpage)
c5e8d7af 1293
c5e8d7af 1294 if self._downloader.params.get('listsubtitles', False):
d665f8d3 1295 self._list_available_subtitles(video_id, video_webpage)
c5e8d7af
PH
1296 return
1297
1298 if 'length_seconds' not in video_info:
1299 self._downloader.report_warning(u'unable to extract video duration')
1300 video_duration = ''
1301 else:
1302 video_duration = compat_urllib_parse.unquote_plus(video_info['length_seconds'][0])
1303
c5e8d7af 1304 # Decide which formats to download
c5e8d7af
PH
1305
1306 try:
1307 mobj = re.search(r';ytplayer.config = ({.*?});', video_webpage)
50be92c1
PH
1308 if not mobj:
1309 raise ValueError('Could not find vevo ID')
c5e8d7af
PH
1310 info = json.loads(mobj.group(1))
1311 args = info['args']
7ce7e394
JMF
1312 # Easy way to know if the 's' value is in url_encoded_fmt_stream_map
1313 # this signatures are encrypted
1314 m_s = re.search(r'[&,]s=', args['url_encoded_fmt_stream_map'])
1315 if m_s is not None:
1316 self.to_screen(u'%s: Encrypted signatures detected.' % video_id)
c5e8d7af 1317 video_info['url_encoded_fmt_stream_map'] = [args['url_encoded_fmt_stream_map']]
cde846b3 1318 m_s = re.search(r'[&,]s=', args.get('adaptive_fmts', u''))
b7a68384 1319 if m_s is not None:
37b6d5f6
AZ
1320 if 'url_encoded_fmt_stream_map' in video_info:
1321 video_info['url_encoded_fmt_stream_map'][0] += ',' + args['adaptive_fmts']
1322 else:
1323 video_info['url_encoded_fmt_stream_map'] = [args['adaptive_fmts']]
211fbc13 1324 elif 'adaptive_fmts' in video_info:
37b6d5f6
AZ
1325 if 'url_encoded_fmt_stream_map' in video_info:
1326 video_info['url_encoded_fmt_stream_map'][0] += ',' + video_info['adaptive_fmts'][0]
1327 else:
1328 video_info['url_encoded_fmt_stream_map'] = video_info['adaptive_fmts']
c5e8d7af
PH
1329 except ValueError:
1330 pass
1331
1332 if 'conn' in video_info and video_info['conn'][0].startswith('rtmp'):
1333 self.report_rtmp_download()
1334 video_url_list = [(None, video_info['conn'][0])]
1335 elif 'url_encoded_fmt_stream_map' in video_info and len(video_info['url_encoded_fmt_stream_map']) >= 1:
a7055eb9
JMF
1336 if 'rtmpe%3Dyes' in video_info['url_encoded_fmt_stream_map'][0]:
1337 raise ExtractorError('rtmpe downloads are not supported, see https://github.com/rg3/youtube-dl/issues/343 for more information.', expected=True)
c5e8d7af
PH
1338 url_map = {}
1339 for url_data_str in video_info['url_encoded_fmt_stream_map'][0].split(','):
1340 url_data = compat_parse_qs(url_data_str)
1341 if 'itag' in url_data and 'url' in url_data:
1342 url = url_data['url'][0]
1343 if 'sig' in url_data:
1344 url += '&signature=' + url_data['sig'][0]
1345 elif 's' in url_data:
e0df6211 1346 encrypted_sig = url_data['s'][0]
769fda3c 1347 if self._downloader.params.get('verbose'):
c108eb73 1348 if age_gate:
e0df6211
PH
1349 player_version = self._search_regex(r'-(.+)\.swf$',
1350 player_url if player_url else 'NOT FOUND',
1351 'flash player', fatal=False)
1352 player_desc = 'flash player %s' % player_version
c108eb73 1353 else:
e0df6211 1354 player_version = self._search_regex(r'html5player-(.+?)\.js', video_webpage,
c108eb73 1355 'html5 player', fatal=False)
e0df6211
PH
1356 player_desc = u'html5 player %s' % player_version
1357
1358 parts_sizes = u'.'.join(compat_str(len(part)) for part in encrypted_sig.split('.'))
5a76c651 1359 self.to_screen(u'encrypted signature length %d (%s), itag %s, %s' %
e0df6211
PH
1360 (len(encrypted_sig), parts_sizes, url_data['itag'][0], player_desc))
1361
75952c6e 1362 if age_gate:
e0df6211 1363 jsplayer_url = None
75952c6e 1364 else:
e0df6211
PH
1365 jsplayer_url_json = self._search_regex(
1366 r'"assets":.+?"js":\s*("[^"]+")',
1367 video_webpage, u'JS player URL')
1368 jsplayer_url = json.loads(jsplayer_url_json)
1369
1370 signature = self._decrypt_signature(encrypted_sig, video_id, jsplayer_url, age_gate)
c5e8d7af
PH
1371 url += '&signature=' + signature
1372 if 'ratebypass' not in url:
1373 url += '&ratebypass=yes'
1374 url_map[url_data['itag'][0]] = url
1d043b93
JMF
1375 video_url_list = self._get_video_url_list(url_map)
1376 if not video_url_list:
c5e8d7af 1377 return
1d043b93
JMF
1378 elif video_info.get('hlsvp'):
1379 manifest_url = video_info['hlsvp'][0]
1380 url_map = self._extract_from_m3u8(manifest_url, video_id)
1381 video_url_list = self._get_video_url_list(url_map)
1382 if not video_url_list:
1383 return
1384
c5e8d7af
PH
1385 else:
1386 raise ExtractorError(u'no conn or url_encoded_fmt_stream_map information found in video info')
1387
1388 results = []
1389 for format_param, video_real_url in video_url_list:
1390 # Extension
1391 video_extension = self._video_extensions.get(format_param, 'flv')
1392
03cc7c20
JMF
1393 video_format = '{0} - {1}{2}'.format(format_param if format_param else video_extension,
1394 self._video_dimensions.get(format_param, '???'),
836a086c 1395 ' ('+self._special_itags[format_param]+')' if format_param in self._special_itags else '')
c5e8d7af
PH
1396
1397 results.append({
1398 'id': video_id,
1399 'url': video_real_url,
1400 'uploader': video_uploader,
1401 'uploader_id': video_uploader_id,
1402 'upload_date': upload_date,
1403 'title': video_title,
1404 'ext': video_extension,
1405 'format': video_format,
1406 'thumbnail': video_thumbnail,
1407 'description': video_description,
1408 'player_url': player_url,
1409 'subtitles': video_subtitles,
1410 'duration': video_duration
1411 })
1412 return results
1413
1414class YoutubePlaylistIE(InfoExtractor):
0f818663 1415 IE_DESC = u'YouTube.com playlists'
c5e8d7af
PH
1416 _VALID_URL = r"""(?:
1417 (?:https?://)?
1418 (?:\w+\.)?
1419 youtube\.com/
1420 (?:
1421 (?:course|view_play_list|my_playlists|artist|playlist|watch)
1422 \? (?:.*?&)*? (?:p|a|list)=
1423 | p/
1424 )
c626a3d9 1425 ((?:PL|EC|UU|FL)?[0-9A-Za-z-_]{10,})
c5e8d7af
PH
1426 .*
1427 |
c626a3d9 1428 ((?:PL|EC|UU|FL)[0-9A-Za-z-_]{10,})
c5e8d7af
PH
1429 )"""
1430 _TEMPLATE_URL = 'https://gdata.youtube.com/feeds/api/playlists/%s?max-results=%i&start-index=%i&v=2&alt=json&safeSearch=none'
1431 _MAX_RESULTS = 50
1432 IE_NAME = u'youtube:playlist'
1433
1434 @classmethod
1435 def suitable(cls, url):
1436 """Receives a URL and returns True if suitable for this IE."""
1437 return re.match(cls._VALID_URL, url, re.VERBOSE) is not None
1438
1439 def _real_extract(self, url):
1440 # Extract playlist id
1441 mobj = re.match(self._VALID_URL, url, re.VERBOSE)
1442 if mobj is None:
1443 raise ExtractorError(u'Invalid URL: %s' % url)
1444
1445 # Download playlist videos from API
1446 playlist_id = mobj.group(1) or mobj.group(2)
c5e8d7af
PH
1447 videos = []
1448
755eb032 1449 for page_num in itertools.count(1):
771822eb
JMF
1450 start_index = self._MAX_RESULTS * (page_num - 1) + 1
1451 if start_index >= 1000:
1452 self._downloader.report_warning(u'Max number of results reached')
1453 break
1454 url = self._TEMPLATE_URL % (playlist_id, self._MAX_RESULTS, start_index)
c5e8d7af
PH
1455 page = self._download_webpage(url, playlist_id, u'Downloading page #%s' % page_num)
1456
1457 try:
1458 response = json.loads(page)
1459 except ValueError as err:
1460 raise ExtractorError(u'Invalid JSON in API response: ' + compat_str(err))
1461
1462 if 'feed' not in response:
1463 raise ExtractorError(u'Got a malformed response from YouTube API')
1464 playlist_title = response['feed']['title']['$t']
1465 if 'entry' not in response['feed']:
1466 # Number of videos is a multiple of self._MAX_RESULTS
1467 break
1468
1469 for entry in response['feed']['entry']:
1470 index = entry['yt$position']['$t']
c215217e
JMF
1471 if 'media$group' in entry and 'yt$videoid' in entry['media$group']:
1472 videos.append((
1473 index,
1474 'https://www.youtube.com/watch?v=' + entry['media$group']['yt$videoid']['$t']
1475 ))
c5e8d7af 1476
c5e8d7af
PH
1477 videos = [v[1] for v in sorted(videos)]
1478
20c3893f 1479 url_results = [self.url_result(vurl, 'Youtube') for vurl in videos]
c5e8d7af
PH
1480 return [self.playlist_result(url_results, playlist_id, playlist_title)]
1481
1482
1483class YoutubeChannelIE(InfoExtractor):
0f818663 1484 IE_DESC = u'YouTube.com channels'
c5e8d7af
PH
1485 _VALID_URL = r"^(?:https?://)?(?:youtu\.be|(?:\w+\.)?youtube(?:-nocookie)?\.com)/channel/([0-9A-Za-z_-]+)"
1486 _TEMPLATE_URL = 'http://www.youtube.com/channel/%s/videos?sort=da&flow=list&view=0&page=%s&gl=US&hl=en'
1487 _MORE_PAGES_INDICATOR = 'yt-uix-load-more'
252580c5 1488 _MORE_PAGES_URL = 'http://www.youtube.com/c4_browse_ajax?action_load_more_videos=1&flow=list&paging=%s&view=0&sort=da&channel_id=%s'
c5e8d7af
PH
1489 IE_NAME = u'youtube:channel'
1490
1491 def extract_videos_from_page(self, page):
1492 ids_in_page = []
1493 for mobj in re.finditer(r'href="/watch\?v=([0-9A-Za-z_-]+)&?', page):
1494 if mobj.group(1) not in ids_in_page:
1495 ids_in_page.append(mobj.group(1))
1496 return ids_in_page
1497
1498 def _real_extract(self, url):
1499 # Extract channel id
1500 mobj = re.match(self._VALID_URL, url)
1501 if mobj is None:
1502 raise ExtractorError(u'Invalid URL: %s' % url)
1503
1504 # Download channel page
1505 channel_id = mobj.group(1)
1506 video_ids = []
1507 pagenum = 1
1508
1509 url = self._TEMPLATE_URL % (channel_id, pagenum)
1510 page = self._download_webpage(url, channel_id,
1511 u'Downloading page #%s' % pagenum)
1512
1513 # Extract video identifiers
1514 ids_in_page = self.extract_videos_from_page(page)
1515 video_ids.extend(ids_in_page)
1516
1517 # Download any subsequent channel pages using the json-based channel_ajax query
1518 if self._MORE_PAGES_INDICATOR in page:
755eb032 1519 for pagenum in itertools.count(1):
c5e8d7af
PH
1520 url = self._MORE_PAGES_URL % (pagenum, channel_id)
1521 page = self._download_webpage(url, channel_id,
1522 u'Downloading page #%s' % pagenum)
1523
1524 page = json.loads(page)
1525
1526 ids_in_page = self.extract_videos_from_page(page['content_html'])
1527 video_ids.extend(ids_in_page)
1528
1529 if self._MORE_PAGES_INDICATOR not in page['load_more_widget_html']:
1530 break
1531
1532 self._downloader.to_screen(u'[youtube] Channel %s: Found %i videos' % (channel_id, len(video_ids)))
1533
1534 urls = ['http://www.youtube.com/watch?v=%s' % id for id in video_ids]
20c3893f 1535 url_entries = [self.url_result(eurl, 'Youtube') for eurl in urls]
c5e8d7af
PH
1536 return [self.playlist_result(url_entries, channel_id)]
1537
1538
1539class YoutubeUserIE(InfoExtractor):
0f818663 1540 IE_DESC = u'YouTube.com user videos (URL or "ytuser" keyword)'
faab1d38 1541 _VALID_URL = r'(?:(?:(?:https?://)?(?:\w+\.)?youtube\.com/(?:user/)?)|ytuser:)(?!feed/)([A-Za-z0-9_-]+)'
c5e8d7af
PH
1542 _TEMPLATE_URL = 'http://gdata.youtube.com/feeds/api/users/%s'
1543 _GDATA_PAGE_SIZE = 50
fd9cf738 1544 _GDATA_URL = 'http://gdata.youtube.com/feeds/api/users/%s/uploads?max-results=%d&start-index=%d&alt=json'
c5e8d7af
PH
1545 IE_NAME = u'youtube:user'
1546
e3ea4790 1547 @classmethod
f4b05232 1548 def suitable(cls, url):
e3ea4790
JMF
1549 # Don't return True if the url can be extracted with other youtube
1550 # extractor, the regex would is too permissive and it would match.
1551 other_ies = iter(klass for (name, klass) in globals().items() if name.endswith('IE') and klass is not cls)
1552 if any(ie.suitable(url) for ie in other_ies): return False
f4b05232
JMF
1553 else: return super(YoutubeUserIE, cls).suitable(url)
1554
c5e8d7af
PH
1555 def _real_extract(self, url):
1556 # Extract username
1557 mobj = re.match(self._VALID_URL, url)
1558 if mobj is None:
1559 raise ExtractorError(u'Invalid URL: %s' % url)
1560
1561 username = mobj.group(1)
1562
1563 # Download video ids using YouTube Data API. Result size per
1564 # query is limited (currently to 50 videos) so we need to query
1565 # page by page until there are no video ids - it means we got
1566 # all of them.
1567
1568 video_ids = []
c5e8d7af 1569
755eb032 1570 for pagenum in itertools.count(0):
c5e8d7af
PH
1571 start_index = pagenum * self._GDATA_PAGE_SIZE + 1
1572
1573 gdata_url = self._GDATA_URL % (username, self._GDATA_PAGE_SIZE, start_index)
1574 page = self._download_webpage(gdata_url, username,
1575 u'Downloading video ids from %d to %d' % (start_index, start_index + self._GDATA_PAGE_SIZE))
1576
fd9cf738
JMF
1577 try:
1578 response = json.loads(page)
1579 except ValueError as err:
1580 raise ExtractorError(u'Invalid JSON in API response: ' + compat_str(err))
71c82637
JMF
1581 if 'entry' not in response['feed']:
1582 # Number of videos is a multiple of self._MAX_RESULTS
1583 break
fd9cf738 1584
c5e8d7af
PH
1585 # Extract video identifiers
1586 ids_in_page = []
fd9cf738
JMF
1587 for entry in response['feed']['entry']:
1588 ids_in_page.append(entry['id']['$t'].split('/')[-1])
c5e8d7af
PH
1589 video_ids.extend(ids_in_page)
1590
1591 # A little optimization - if current page is not
1592 # "full", ie. does not contain PAGE_SIZE video ids then
1593 # we can assume that this page is the last one - there
1594 # are no more ids on further pages - no need to query
1595 # again.
1596
1597 if len(ids_in_page) < self._GDATA_PAGE_SIZE:
1598 break
1599
c5e8d7af 1600 urls = ['http://www.youtube.com/watch?v=%s' % video_id for video_id in video_ids]
20c3893f 1601 url_results = [self.url_result(rurl, 'Youtube') for rurl in urls]
c5e8d7af 1602 return [self.playlist_result(url_results, playlist_title = username)]
b05654f0
PH
1603
1604class YoutubeSearchIE(SearchInfoExtractor):
0f818663 1605 IE_DESC = u'YouTube.com searches'
b05654f0
PH
1606 _API_URL = 'https://gdata.youtube.com/feeds/api/videos?q=%s&start-index=%i&max-results=50&v=2&alt=jsonc'
1607 _MAX_RESULTS = 1000
1608 IE_NAME = u'youtube:search'
1609 _SEARCH_KEY = 'ytsearch'
1610
1611 def report_download_page(self, query, pagenum):
1612 """Report attempt to download search page with given number."""
1613 self._downloader.to_screen(u'[youtube] query "%s": Downloading page %s' % (query, pagenum))
1614
1615 def _get_n_results(self, query, n):
1616 """Get a specified number of results for a query"""
1617
1618 video_ids = []
1619 pagenum = 0
1620 limit = n
1621
1622 while (50 * pagenum) < limit:
1623 self.report_download_page(query, pagenum+1)
1624 result_url = self._API_URL % (compat_urllib_parse.quote_plus(query), (50*pagenum)+1)
1625 request = compat_urllib_request.Request(result_url)
1626 try:
1627 data = compat_urllib_request.urlopen(request).read().decode('utf-8')
1628 except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
1629 raise ExtractorError(u'Unable to download API page: %s' % compat_str(err))
1630 api_response = json.loads(data)['data']
1631
1632 if not 'items' in api_response:
1633 raise ExtractorError(u'[youtube] No video results')
1634
1635 new_ids = list(video['id'] for video in api_response['items'])
1636 video_ids += new_ids
1637
1638 limit = min(n, api_response['totalItems'])
1639 pagenum += 1
1640
1641 if len(video_ids) > n:
1642 video_ids = video_ids[:n]
1643 videos = [self.url_result('http://www.youtube.com/watch?v=%s' % id, 'Youtube') for id in video_ids]
1644 return self.playlist_result(videos, query)
75dff0ee
JMF
1645
1646
1647class YoutubeShowIE(InfoExtractor):
0f818663 1648 IE_DESC = u'YouTube.com (multi-season) shows'
75dff0ee
JMF
1649 _VALID_URL = r'https?://www\.youtube\.com/show/(.*)'
1650 IE_NAME = u'youtube:show'
1651
1652 def _real_extract(self, url):
1653 mobj = re.match(self._VALID_URL, url)
1654 show_name = mobj.group(1)
1655 webpage = self._download_webpage(url, show_name, u'Downloading show webpage')
1656 # There's one playlist for each season of the show
1657 m_seasons = list(re.finditer(r'href="(/playlist\?list=.*?)"', webpage))
1658 self.to_screen(u'%s: Found %s seasons' % (show_name, len(m_seasons)))
1659 return [self.url_result('https://www.youtube.com' + season.group(1), 'YoutubePlaylist') for season in m_seasons]
04cc9617
JMF
1660
1661
b2e8bc1b 1662class YoutubeFeedsInfoExtractor(YoutubeBaseInfoExtractor):
d7ae0639
JMF
1663 """
1664 Base class for extractors that fetch info from
1665 http://www.youtube.com/feed_ajax
1666 Subclasses must define the _FEED_NAME and _PLAYLIST_TITLE properties.
1667 """
b2e8bc1b 1668 _LOGIN_REQUIRED = True
04cc9617 1669 _PAGING_STEP = 30
43ba5456
JMF
1670 # use action_load_personal_feed instead of action_load_system_feed
1671 _PERSONAL_FEED = False
04cc9617 1672
d7ae0639
JMF
1673 @property
1674 def _FEED_TEMPLATE(self):
43ba5456
JMF
1675 action = 'action_load_system_feed'
1676 if self._PERSONAL_FEED:
1677 action = 'action_load_personal_feed'
1678 return 'http://www.youtube.com/feed_ajax?%s=1&feed_name=%s&paging=%%s' % (action, self._FEED_NAME)
d7ae0639
JMF
1679
1680 @property
1681 def IE_NAME(self):
1682 return u'youtube:%s' % self._FEED_NAME
04cc9617 1683
81f0259b 1684 def _real_initialize(self):
b2e8bc1b 1685 self._login()
81f0259b 1686
04cc9617
JMF
1687 def _real_extract(self, url):
1688 feed_entries = []
1689 # The step argument is available only in 2.7 or higher
1690 for i in itertools.count(0):
1691 paging = i*self._PAGING_STEP
d7ae0639
JMF
1692 info = self._download_webpage(self._FEED_TEMPLATE % paging,
1693 u'%s feed' % self._FEED_NAME,
04cc9617
JMF
1694 u'Downloading page %s' % i)
1695 info = json.loads(info)
1696 feed_html = info['feed_html']
43ba5456 1697 m_ids = re.finditer(r'"/watch\?v=(.*?)["&]', feed_html)
04cc9617
JMF
1698 ids = orderedSet(m.group(1) for m in m_ids)
1699 feed_entries.extend(self.url_result(id, 'Youtube') for id in ids)
1700 if info['paging'] is None:
1701 break
d7ae0639
JMF
1702 return self.playlist_result(feed_entries, playlist_title=self._PLAYLIST_TITLE)
1703
1704class YoutubeSubscriptionsIE(YoutubeFeedsInfoExtractor):
1705 IE_DESC = u'YouTube.com subscriptions feed, "ytsubs" keyword(requires authentication)'
1706 _VALID_URL = r'https?://www\.youtube\.com/feed/subscriptions|:ytsubs(?:criptions)?'
1707 _FEED_NAME = 'subscriptions'
1708 _PLAYLIST_TITLE = u'Youtube Subscriptions'
1709
1710class YoutubeRecommendedIE(YoutubeFeedsInfoExtractor):
1711 IE_DESC = u'YouTube.com recommended videos, "ytrec" keyword (requires authentication)'
1712 _VALID_URL = r'https?://www\.youtube\.com/feed/recommended|:ytrec(?:ommended)?'
1713 _FEED_NAME = 'recommended'
1714 _PLAYLIST_TITLE = u'Youtube Recommended videos'
c626a3d9 1715
43ba5456
JMF
1716class YoutubeWatchLaterIE(YoutubeFeedsInfoExtractor):
1717 IE_DESC = u'Youtube watch later list, "ytwatchlater" keyword (requires authentication)'
1718 _VALID_URL = r'https?://www\.youtube\.com/feed/watch_later|:ytwatchlater'
1719 _FEED_NAME = 'watch_later'
1720 _PLAYLIST_TITLE = u'Youtube Watch Later'
1721 _PAGING_STEP = 100
1722 _PERSONAL_FEED = True
c626a3d9
JMF
1723
1724class YoutubeFavouritesIE(YoutubeBaseInfoExtractor):
1725 IE_NAME = u'youtube:favorites'
1726 IE_DESC = u'YouTube.com favourite videos, "ytfav" keyword (requires authentication)'
c7a7750d 1727 _VALID_URL = r'https?://www\.youtube\.com/my_favorites|:ytfav(?:ou?rites)?'
c626a3d9
JMF
1728 _LOGIN_REQUIRED = True
1729
1730 def _real_extract(self, url):
1731 webpage = self._download_webpage('https://www.youtube.com/my_favorites', 'Youtube Favourites videos')
1732 playlist_id = self._search_regex(r'list=(.+?)["&]', webpage, u'favourites playlist id')
1733 return self.url_result(playlist_id, 'YoutubePlaylist')