]> jfr.im git - yt-dlp.git/blame - youtube_dl/extractor/youtube.py
[shared] Add extractor (Closes #3312)
[yt-dlp.git] / youtube_dl / extractor / youtube.py
CommitLineData
c5e8d7af 1# coding: utf-8
c5e8d7af 2
edf3e38e 3import errno
e0df6211 4import io
0ca96d48 5import itertools
c5e8d7af 6import json
c4417ddb 7import os.path
c5e8d7af 8import re
e0df6211 9import traceback
c5e8d7af 10
b05654f0 11from .common import InfoExtractor, SearchInfoExtractor
54d39d8b 12from .subtitles import SubtitlesInfoExtractor
2b25cb5d 13from ..jsinterp import JSInterpreter
54256267 14from ..swfinterp import SWFInterpreter
c5e8d7af 15from ..utils import (
edf3e38e 16 compat_chr,
c5e8d7af 17 compat_parse_qs,
c5e8d7af
PH
18 compat_urllib_parse,
19 compat_urllib_request,
7c61bd36 20 compat_urlparse,
c5e8d7af
PH
21 compat_str,
22
23 clean_html,
c38b1e77 24 get_cachedir,
c5e8d7af 25 get_element_by_id,
652cdaa2 26 get_element_by_attribute,
c5e8d7af 27 ExtractorError,
dd27fd17 28 int_or_none,
b7ab0590 29 PagedList,
c5e8d7af
PH
30 unescapeHTML,
31 unified_strdate,
04cc9617 32 orderedSet,
edf3e38e 33 write_json_file,
81c2f20b 34 uppercase_escape,
c5e8d7af
PH
35)
36
de7f3446 37class YoutubeBaseInfoExtractor(InfoExtractor):
b2e8bc1b
JMF
38 """Provide base functions for Youtube extractors"""
39 _LOGIN_URL = 'https://accounts.google.com/ServiceLogin'
40 _LANG_URL = r'https://www.youtube.com/?hl=en&persist_hl=1&gl=US&persist_gl=1&opt_out_ackd=1'
38c2e5b8 41 _AGE_URL = 'https://www.youtube.com/verify_age?next_url=/&gl=US&hl=en'
b2e8bc1b
JMF
42 _NETRC_MACHINE = 'youtube'
43 # If True it will raise an error if no login info is provided
44 _LOGIN_REQUIRED = False
45
b2e8bc1b 46 def _set_language(self):
7cc3570e
PH
47 return bool(self._download_webpage(
48 self._LANG_URL, None,
49 note=u'Setting language', errnote='unable to set language',
50 fatal=False))
b2e8bc1b
JMF
51
52 def _login(self):
53 (username, password) = self._get_login_info()
54 # No authentication to be performed
55 if username is None:
56 if self._LOGIN_REQUIRED:
57 raise ExtractorError(u'No login info available, needed for using %s.' % self.IE_NAME, expected=True)
58 return False
59
7cc3570e
PH
60 login_page = self._download_webpage(
61 self._LOGIN_URL, None,
62 note=u'Downloading login page',
63 errnote=u'unable to fetch login page', fatal=False)
64 if login_page is False:
65 return
b2e8bc1b 66
795f28f8
PH
67 galx = self._search_regex(r'(?s)<input.+?name="GALX".+?value="(.+?)"',
68 login_page, u'Login GALX parameter')
c5e8d7af 69
b2e8bc1b
JMF
70 # Log in
71 login_form_strs = {
72 u'continue': u'https://www.youtube.com/signin?action_handle_signin=true&feature=sign_in_button&hl=en_US&nomobiletemp=1',
73 u'Email': username,
74 u'GALX': galx,
75 u'Passwd': password,
76 u'PersistentCookie': u'yes',
77 u'_utf8': u'霱',
78 u'bgresponse': u'js_disabled',
79 u'checkConnection': u'',
80 u'checkedDomains': u'youtube',
81 u'dnConn': u'',
b2e8bc1b
JMF
82 u'pstMsg': u'0',
83 u'rmShown': u'1',
84 u'secTok': u'',
85 u'signIn': u'Sign in',
86 u'timeStmp': u'',
87 u'service': u'youtube',
88 u'uilel': u'3',
89 u'hl': u'en_US',
90 }
91 # Convert to UTF-8 *before* urlencode because Python 2.x's urlencode
92 # chokes on unicode
93 login_form = dict((k.encode('utf-8'), v.encode('utf-8')) for k,v in login_form_strs.items())
94 login_data = compat_urllib_parse.urlencode(login_form).encode('ascii')
7cc3570e
PH
95
96 req = compat_urllib_request.Request(self._LOGIN_URL, login_data)
97 login_results = self._download_webpage(
98 req, None,
99 note=u'Logging in', errnote=u'unable to log in', fatal=False)
100 if login_results is False:
101 return False
102 if re.search(r'(?i)<form[^>]* id="gaia_loginform"', login_results) is not None:
103 self._downloader.report_warning(u'unable to log in: bad username or password')
b2e8bc1b
JMF
104 return False
105 return True
106
107 def _confirm_age(self):
108 age_form = {
7cc3570e
PH
109 'next_url': '/',
110 'action_confirm': 'Confirm',
111 }
5700e779
JMF
112 req = compat_urllib_request.Request(self._AGE_URL,
113 compat_urllib_parse.urlencode(age_form).encode('ascii'))
7cc3570e
PH
114
115 self._download_webpage(
116 req, None,
117 note=u'Confirming age', errnote=u'Unable to confirm age')
b2e8bc1b
JMF
118 return True
119
120 def _real_initialize(self):
121 if self._downloader is None:
122 return
123 if not self._set_language():
124 return
125 if not self._login():
126 return
127 self._confirm_age()
c5e8d7af 128
8377574c 129
de7f3446 130class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor):
0f818663 131 IE_DESC = u'YouTube.com'
cb7dfeea 132 _VALID_URL = r"""(?x)^
c5e8d7af 133 (
83aa5293 134 (?:https?://|//)? # http(s):// or protocol-independent URL (optional)
cb7dfeea 135 (?:(?:(?:(?:\w+\.)?[yY][oO][uU][tT][uU][bB][eE](?:-nocookie)?\.com/|
484aaeb2 136 (?:www\.)?deturl\.com/www\.youtube\.com/|
e70dc1d1 137 (?:www\.)?pwnyoutube\.com/|
f7000f3a 138 (?:www\.)?yourepeat\.com/|
e69ae5b9
JMF
139 tube\.majestyc\.net/|
140 youtube\.googleapis\.com/) # the various hostnames, with wildcard subdomains
c5e8d7af
PH
141 (?:.*?\#/)? # handle anchor (#/) redirect urls
142 (?: # the various things that can precede the ID:
143 (?:(?:v|embed|e)/) # v/ or embed/ or e/
144 |(?: # or the v= param in all its forms
f7000f3a 145 (?:(?:watch|movie)(?:_popup)?(?:\.php)?/?)? # preceding watch(_popup|.php) or nothing (like /?v=xxxx)
c5e8d7af
PH
146 (?:\?|\#!?) # the params delimiter ? or # or #!
147 (?:.*?&)? # any other preceding param (like /?s=tuff&v=xxxx)
148 v=
149 )
f4b05232
JMF
150 ))
151 |youtu\.be/ # just youtu.be/xxxx
b9c76aa1 152 |https?://(?:www\.)?cleanvideosearch\.com/media/action/yt/watch\?videoId=
f4b05232 153 )
c5e8d7af 154 )? # all until now is optional -> you can pass the naked ID
8963d9c2 155 ([0-9A-Za-z_-]{11}) # here is it! the YouTube video ID
c5e8d7af
PH
156 (?(1).+)? # if we found the ID, everything can follow
157 $"""
c5e8d7af 158 _NEXT_URL_RE = r'[\?&]next_url=([^&]+)'
2c62dc26
PH
159 _formats = {
160 '5': {'ext': 'flv', 'width': 400, 'height': 240},
161 '6': {'ext': 'flv', 'width': 450, 'height': 270},
162 '13': {'ext': '3gp'},
163 '17': {'ext': '3gp', 'width': 176, 'height': 144},
164 '18': {'ext': 'mp4', 'width': 640, 'height': 360},
165 '22': {'ext': 'mp4', 'width': 1280, 'height': 720},
166 '34': {'ext': 'flv', 'width': 640, 'height': 360},
167 '35': {'ext': 'flv', 'width': 854, 'height': 480},
168 '36': {'ext': '3gp', 'width': 320, 'height': 240},
169 '37': {'ext': 'mp4', 'width': 1920, 'height': 1080},
170 '38': {'ext': 'mp4', 'width': 4096, 'height': 3072},
171 '43': {'ext': 'webm', 'width': 640, 'height': 360},
172 '44': {'ext': 'webm', 'width': 854, 'height': 480},
173 '45': {'ext': 'webm', 'width': 1280, 'height': 720},
174 '46': {'ext': 'webm', 'width': 1920, 'height': 1080},
175
1d043b93 176
86fe61c8 177 # 3d videos
43b81eb9
PH
178 '82': {'ext': 'mp4', 'height': 360, 'format_note': '3D', 'preference': -20},
179 '83': {'ext': 'mp4', 'height': 480, 'format_note': '3D', 'preference': -20},
180 '84': {'ext': 'mp4', 'height': 720, 'format_note': '3D', 'preference': -20},
181 '85': {'ext': 'mp4', 'height': 1080, 'format_note': '3D', 'preference': -20},
182 '100': {'ext': 'webm', 'height': 360, 'format_note': '3D', 'preference': -20},
183 '101': {'ext': 'webm', 'height': 480, 'format_note': '3D', 'preference': -20},
184 '102': {'ext': 'webm', 'height': 720, 'format_note': '3D', 'preference': -20},
836a086c 185
96fb5605 186 # Apple HTTP Live Streaming
43b81eb9
PH
187 '92': {'ext': 'mp4', 'height': 240, 'format_note': 'HLS', 'preference': -10},
188 '93': {'ext': 'mp4', 'height': 360, 'format_note': 'HLS', 'preference': -10},
189 '94': {'ext': 'mp4', 'height': 480, 'format_note': 'HLS', 'preference': -10},
190 '95': {'ext': 'mp4', 'height': 720, 'format_note': 'HLS', 'preference': -10},
191 '96': {'ext': 'mp4', 'height': 1080, 'format_note': 'HLS', 'preference': -10},
192 '132': {'ext': 'mp4', 'height': 240, 'format_note': 'HLS', 'preference': -10},
193 '151': {'ext': 'mp4', 'height': 72, 'format_note': 'HLS', 'preference': -10},
2c62dc26
PH
194
195 # DASH mp4 video
43b81eb9
PH
196 '133': {'ext': 'mp4', 'height': 240, 'format_note': 'DASH video', 'acodec': 'none', 'preference': -40},
197 '134': {'ext': 'mp4', 'height': 360, 'format_note': 'DASH video', 'acodec': 'none', 'preference': -40},
198 '135': {'ext': 'mp4', 'height': 480, 'format_note': 'DASH video', 'acodec': 'none', 'preference': -40},
199 '136': {'ext': 'mp4', 'height': 720, 'format_note': 'DASH video', 'acodec': 'none', 'preference': -40},
200 '137': {'ext': 'mp4', 'height': 1080, 'format_note': 'DASH video', 'acodec': 'none', 'preference': -40},
201 '138': {'ext': 'mp4', 'height': 2160, 'format_note': 'DASH video', 'acodec': 'none', 'preference': -40},
202 '160': {'ext': 'mp4', 'height': 144, 'format_note': 'DASH video', 'acodec': 'none', 'preference': -40},
203 '264': {'ext': 'mp4', 'height': 1440, 'format_note': 'DASH video', 'acodec': 'none', 'preference': -40},
836a086c 204
f6f1fc92 205 # Dash mp4 audio
2c62dc26
PH
206 '139': {'ext': 'm4a', 'format_note': 'DASH audio', 'vcodec': 'none', 'abr': 48, 'preference': -50},
207 '140': {'ext': 'm4a', 'format_note': 'DASH audio', 'vcodec': 'none', 'abr': 128, 'preference': -50},
208 '141': {'ext': 'm4a', 'format_note': 'DASH audio', 'vcodec': 'none', 'abr': 256, 'preference': -50},
836a086c
AZ
209
210 # Dash webm
e75cafe9
A
211 '167': {'ext': 'webm', 'height': 360, 'width': 640, 'format_note': 'DASH video', 'acodec': 'none', 'container': 'webm', 'vcodec': 'VP8', 'preference': -40},
212 '168': {'ext': 'webm', 'height': 480, 'width': 854, 'format_note': 'DASH video', 'acodec': 'none', 'container': 'webm', 'vcodec': 'VP8', 'preference': -40},
213 '169': {'ext': 'webm', 'height': 720, 'width': 1280, 'format_note': 'DASH video', 'acodec': 'none', 'container': 'webm', 'vcodec': 'VP8', 'preference': -40},
214 '170': {'ext': 'webm', 'height': 1080, 'width': 1920, 'format_note': 'DASH video', 'acodec': 'none', 'container': 'webm', 'vcodec': 'VP8', 'preference': -40},
215 '218': {'ext': 'webm', 'height': 480, 'width': 854, 'format_note': 'DASH video', 'acodec': 'none', 'container': 'webm', 'vcodec': 'VP8', 'preference': -40},
216 '219': {'ext': 'webm', 'height': 480, 'width': 854, 'format_note': 'DASH video', 'acodec': 'none', 'container': 'webm', 'vcodec': 'VP8', 'preference': -40},
217 '242': {'ext': 'webm', 'height': 240, 'format_note': 'DASH video', 'acodec': 'none', 'preference': -40},
218 '243': {'ext': 'webm', 'height': 360, 'format_note': 'DASH video', 'acodec': 'none', 'preference': -40},
219 '244': {'ext': 'webm', 'height': 480, 'format_note': 'DASH video', 'acodec': 'none', 'preference': -40},
220 '245': {'ext': 'webm', 'height': 480, 'format_note': 'DASH video', 'acodec': 'none', 'preference': -40},
221 '246': {'ext': 'webm', 'height': 480, 'format_note': 'DASH video', 'acodec': 'none', 'preference': -40},
222 '247': {'ext': 'webm', 'height': 720, 'format_note': 'DASH video', 'acodec': 'none', 'preference': -40},
223 '248': {'ext': 'webm', 'height': 1080, 'format_note': 'DASH video', 'acodec': 'none', 'preference': -40},
3c80377b 224 '271': {'ext': 'webm', 'height': 1440, 'format_note': 'DASH video', 'acodec': 'none', 'preference': -40},
18061bba 225 '272': {'ext': 'webm', 'height': 2160, 'format_note': 'DASH video', 'acodec': 'none', 'preference': -40},
2c62dc26
PH
226
227 # Dash webm audio
e75cafe9
A
228 '171': {'ext': 'webm', 'vcodec': 'none', 'format_note': 'DASH audio', 'abr': 48, 'preference': -50},
229 '172': {'ext': 'webm', 'vcodec': 'none', 'format_note': 'DASH audio', 'abr': 256, 'preference': -50},
ce6b9a2d
PH
230
231 # RTMP (unnamed)
232 '_rtmp': {'protocol': 'rtmp'},
c5e8d7af 233 }
836a086c 234
c5e8d7af 235 IE_NAME = u'youtube'
2eb88d95
PH
236 _TESTS = [
237 {
0e853ca4
PH
238 u"url": u"http://www.youtube.com/watch?v=BaW_jenozKc",
239 u"file": u"BaW_jenozKc.mp4",
240 u"info_dict": {
241 u"title": u"youtube-dl test video \"'/\\ä↭𝕐",
242 u"uploader": u"Philipp Hagemeister",
243 u"uploader_id": u"phihag",
244 u"upload_date": u"20121002",
ad3bc6ac
PH
245 u"description": u"test chars: \"'/\\ä↭𝕐\ntest URL: https://github.com/rg3/youtube-dl/issues/1892\n\nThis is a test video for youtube-dl.\n\nFor more information, contact phihag@phihag.de .",
246 u"categories": [u'Science & Technology'],
2eb88d95 247 }
0e853ca4 248 },
0e853ca4
PH
249 {
250 u"url": u"http://www.youtube.com/watch?v=UxxajLWwzqY",
251 u"file": u"UxxajLWwzqY.mp4",
252 u"note": u"Test generic use_cipher_signature video (#897)",
253 u"info_dict": {
254 u"upload_date": u"20120506",
255 u"title": u"Icona Pop - I Love It (feat. Charli XCX) [OFFICIAL VIDEO]",
ba60a3eb 256 u"description": u"md5:fea86fda2d5a5784273df5c7cc994d9f",
45ed795c 257 u"uploader": u"Icona Pop",
0e853ca4 258 u"uploader_id": u"IconaPop"
2eb88d95 259 }
c108eb73
JMF
260 },
261 {
262 u"url": u"https://www.youtube.com/watch?v=07FYdnEawAQ",
263 u"file": u"07FYdnEawAQ.mp4",
264 u"note": u"Test VEVO video with age protection (#956)",
265 u"info_dict": {
266 u"upload_date": u"20130703",
267 u"title": u"Justin Timberlake - Tunnel Vision (Explicit)",
268 u"description": u"md5:64249768eec3bc4276236606ea996373",
269 u"uploader": u"justintimberlakeVEVO",
270 u"uploader_id": u"justintimberlakeVEVO"
271 }
272 },
fccd3771 273 {
83aa5293 274 u"url": u"//www.YouTube.com/watch?v=yZIXLfi8CZQ",
fccd3771
PH
275 u"file": u"yZIXLfi8CZQ.mp4",
276 u"note": u"Embed-only video (#1746)",
277 u"info_dict": {
278 u"upload_date": u"20120608",
279 u"title": u"Principal Sexually Assaults A Teacher - Episode 117 - 8th June 2012",
280 u"description": u"md5:09b78bd971f1e3e289601dfba15ca4f7",
281 u"uploader": u"SET India",
282 u"uploader_id": u"setindia"
283 }
284 },
dd27fd17
PH
285 {
286 u"url": u"http://www.youtube.com/watch?v=a9LDPn-MO4I",
287 u"file": u"a9LDPn-MO4I.m4a",
288 u"note": u"256k DASH audio (format 141) via DASH manifest",
dd27fd17
PH
289 u"info_dict": {
290 u"upload_date": "20121002",
291 u"uploader_id": "8KVIDEO",
292 u"description": "No description available.",
293 u"uploader": "8KVIDEO",
294 u"title": "UHDTV TEST 8K VIDEO.mp4"
4919603f
PH
295 },
296 u"params": {
297 u"youtube_include_dash_manifest": True,
298 u"format": "141",
299 },
dd27fd17 300 },
3489b7d2
JMF
301 # DASH manifest with encrypted signature
302 {
303 u'url': u'https://www.youtube.com/watch?v=IB3lcPjvWLA',
304 u'info_dict': {
305 u'id': u'IB3lcPjvWLA',
306 u'ext': u'm4a',
307 u'title': u'Afrojack - The Spark ft. Spree Wilson',
e00c9cf5 308 u'description': u'md5:9717375db5a9a3992be4668bbf3bc0a8',
3489b7d2
JMF
309 u'uploader': u'AfrojackVEVO',
310 u'uploader_id': u'AfrojackVEVO',
311 u'upload_date': u'20131011',
312 },
313 u"params": {
314 u'youtube_include_dash_manifest': True,
315 u'format': '141',
316 },
317 },
2eb88d95
PH
318 ]
319
c5e8d7af
PH
320
321 @classmethod
322 def suitable(cls, url):
323 """Receives a URL and returns True if suitable for this IE."""
e3ea4790 324 if YoutubePlaylistIE.suitable(url): return False
fccd3771 325 return re.match(cls._VALID_URL, url) is not None
c5e8d7af 326
e0df6211
PH
327 def __init__(self, *args, **kwargs):
328 super(YoutubeIE, self).__init__(*args, **kwargs)
83799698 329 self._player_cache = {}
e0df6211 330
c5e8d7af
PH
331 def report_video_info_webpage_download(self, video_id):
332 """Report attempt to download video info webpage."""
333 self.to_screen(u'%s: Downloading video info webpage' % video_id)
334
c5e8d7af
PH
335 def report_information_extraction(self, video_id):
336 """Report attempt to extract video information."""
337 self.to_screen(u'%s: Extracting video information' % video_id)
338
339 def report_unavailable_format(self, video_id, format):
340 """Report extracted video URL."""
341 self.to_screen(u'%s: Format %s not available' % (video_id, format))
342
343 def report_rtmp_download(self):
344 """Indicate the download will use the RTMP protocol."""
345 self.to_screen(u'RTMP download detected')
346
c4417ddb 347 def _extract_signature_function(self, video_id, player_url, slen):
cf010131 348 id_m = re.match(
c081b35c 349 r'.*-(?P<id>[a-zA-Z0-9_-]+)(?:/watch_as3|/html5player)?\.(?P<ext>[a-z]+)$',
cf010131 350 player_url)
c081b35c
PH
351 if not id_m:
352 raise ExtractorError('Cannot identify player %r' % player_url)
e0df6211
PH
353 player_type = id_m.group('ext')
354 player_id = id_m.group('id')
355
c4417ddb
PH
356 # Read from filesystem cache
357 func_id = '%s_%s_%d' % (player_type, player_id, slen)
358 assert os.path.basename(func_id) == func_id
c38b1e77 359 cache_dir = get_cachedir(self._downloader.params)
c4417ddb 360
c3c88a26 361 cache_enabled = cache_dir is not None
f8061589 362 if cache_enabled:
c4417ddb
PH
363 cache_fn = os.path.join(os.path.expanduser(cache_dir),
364 u'youtube-sigfuncs',
365 func_id + '.json')
366 try:
edf3e38e 367 with io.open(cache_fn, 'r', encoding='utf-8') as cachef:
c4417ddb
PH
368 cache_spec = json.load(cachef)
369 return lambda s: u''.join(s[i] for i in cache_spec)
edf3e38e 370 except IOError:
c4417ddb 371 pass # No cache available
83799698 372
e0df6211
PH
373 if player_type == 'js':
374 code = self._download_webpage(
375 player_url, video_id,
83799698 376 note=u'Downloading %s player %s' % (player_type, player_id),
e0df6211 377 errnote=u'Download of %s failed' % player_url)
83799698 378 res = self._parse_sig_js(code)
c4417ddb 379 elif player_type == 'swf':
e0df6211
PH
380 urlh = self._request_webpage(
381 player_url, video_id,
83799698 382 note=u'Downloading %s player %s' % (player_type, player_id),
e0df6211
PH
383 errnote=u'Download of %s failed' % player_url)
384 code = urlh.read()
83799698 385 res = self._parse_sig_swf(code)
e0df6211
PH
386 else:
387 assert False, 'Invalid player type %r' % player_type
388
f8061589 389 if cache_enabled:
edf3e38e 390 try:
c705320f
PH
391 test_string = u''.join(map(compat_chr, range(slen)))
392 cache_res = res(test_string)
edf3e38e
PH
393 cache_spec = [ord(c) for c in cache_res]
394 try:
395 os.makedirs(os.path.dirname(cache_fn))
396 except OSError as ose:
397 if ose.errno != errno.EEXIST:
398 raise
399 write_json_file(cache_spec, cache_fn)
0ca96d48 400 except Exception:
edf3e38e
PH
401 tb = traceback.format_exc()
402 self._downloader.report_warning(
403 u'Writing cache to %r failed: %s' % (cache_fn, tb))
83799698
PH
404
405 return res
406
edf3e38e
PH
407 def _print_sig_code(self, func, slen):
408 def gen_sig_code(idxs):
409 def _genslice(start, end, step):
410 starts = u'' if start == 0 else str(start)
e35e4ddc
PH
411 ends = (u':%d' % (end+step)) if end + step >= 0 else u':'
412 steps = u'' if step == 1 else (u':%d' % step)
edf3e38e
PH
413 return u's[%s%s%s]' % (starts, ends, steps)
414
415 step = None
0ca96d48
PH
416 start = '(Never used)' # Quelch pyflakes warnings - start will be
417 # set as soon as step is set
edf3e38e
PH
418 for i, prev in zip(idxs[1:], idxs[:-1]):
419 if step is not None:
420 if i - prev == step:
421 continue
422 yield _genslice(start, prev, step)
423 step = None
424 continue
425 if i - prev in [-1, 1]:
426 step = i - prev
427 start = prev
428 continue
429 else:
430 yield u's[%d]' % prev
431 if step is None:
432 yield u's[%d]' % i
433 else:
434 yield _genslice(start, i, step)
435
c705320f
PH
436 test_string = u''.join(map(compat_chr, range(slen)))
437 cache_res = func(test_string)
edf3e38e
PH
438 cache_spec = [ord(c) for c in cache_res]
439 expr_code = u' + '.join(gen_sig_code(cache_spec))
440 code = u'if len(s) == %d:\n return %s\n' % (slen, expr_code)
f8061589 441 self.to_screen(u'Extracted signature function:\n' + code)
edf3e38e 442
e0df6211
PH
443 def _parse_sig_js(self, jscode):
444 funcname = self._search_regex(
c26e9ac4 445 r'signature=([$a-zA-Z]+)', jscode,
2b25cb5d
PH
446 u'Initial JS player signature function name')
447
448 jsi = JSInterpreter(jscode)
449 initial_function = jsi.extract_function(funcname)
e0df6211
PH
450 return lambda s: initial_function([s])
451
452 def _parse_sig_swf(self, file_contents):
54256267 453 swfi = SWFInterpreter(file_contents)
5dc3552d 454 TARGET_CLASSNAME = u'SignatureDecipher'
54256267
PH
455 searched_class = swfi.extract_class(TARGET_CLASSNAME)
456 initial_function = swfi.extract_function(searched_class, u'decipher')
e0df6211
PH
457 return lambda s: initial_function([s])
458
83799698 459 def _decrypt_signature(self, s, video_id, player_url, age_gate=False):
257a2501 460 """Turn the encrypted s field into a working signature"""
6b37f0be 461
c8bf86d5
PH
462 if player_url is None:
463 raise ExtractorError(u'Cannot decrypt signature without player_url')
920de7a2 464
c8bf86d5
PH
465 if player_url.startswith(u'//'):
466 player_url = u'https:' + player_url
467 try:
468 player_id = (player_url, len(s))
469 if player_id not in self._player_cache:
470 func = self._extract_signature_function(
471 video_id, player_url, len(s)
472 )
473 self._player_cache[player_id] = func
474 func = self._player_cache[player_id]
475 if self._downloader.params.get('youtube_print_sig_code'):
476 self._print_sig_code(func, len(s))
477 return func(s)
478 except Exception as e:
479 tb = traceback.format_exc()
480 raise ExtractorError(
481 u'Automatic signature extraction failed: ' + tb, cause=e)
e0df6211 482
1f343eaa 483 def _get_available_subtitles(self, video_id, webpage):
de7f3446 484 try:
7fad1c63 485 sub_list = self._download_webpage(
38c2e5b8 486 'https://video.google.com/timedtext?hl=en&type=list&v=%s' % video_id,
7fad1c63
JMF
487 video_id, note=False)
488 except ExtractorError as err:
de7f3446
JMF
489 self._downloader.report_warning(u'unable to download video subtitles: %s' % compat_str(err))
490 return {}
491 lang_list = re.findall(r'name="([^"]*)"[^>]+lang_code="([\w\-]+)"', sub_list)
492
493 sub_lang_list = {}
494 for l in lang_list:
495 lang = l[1]
496 params = compat_urllib_parse.urlencode({
497 'lang': lang,
498 'v': video_id,
ca715127 499 'fmt': self._downloader.params.get('subtitlesformat', 'srt'),
c3197e3e 500 'name': unescapeHTML(l[0]).encode('utf-8'),
de7f3446 501 })
38c2e5b8 502 url = u'https://www.youtube.com/api/timedtext?' + params
de7f3446
JMF
503 sub_lang_list[lang] = url
504 if not sub_lang_list:
505 self._downloader.report_warning(u'video doesn\'t have subtitles')
506 return {}
507 return sub_lang_list
508
055e6f36 509 def _get_available_automatic_caption(self, video_id, webpage):
de7f3446
JMF
510 """We need the webpage for getting the captions url, pass it as an
511 argument to speed up the process."""
ca715127 512 sub_format = self._downloader.params.get('subtitlesformat', 'srt')
de7f3446
JMF
513 self.to_screen(u'%s: Looking for automatic captions' % video_id)
514 mobj = re.search(r';ytplayer.config = ({.*?});', webpage)
055e6f36 515 err_msg = u'Couldn\'t find automatic captions for %s' % video_id
de7f3446
JMF
516 if mobj is None:
517 self._downloader.report_warning(err_msg)
518 return {}
519 player_config = json.loads(mobj.group(1))
520 try:
521 args = player_config[u'args']
522 caption_url = args[u'ttsurl']
523 timestamp = args[u'timestamp']
055e6f36
JMF
524 # We get the available subtitles
525 list_params = compat_urllib_parse.urlencode({
526 'type': 'list',
527 'tlangs': 1,
528 'asrs': 1,
de7f3446 529 })
055e6f36 530 list_url = caption_url + '&' + list_params
e26f8712 531 caption_list = self._download_xml(list_url, video_id)
e3dc22ca 532 original_lang_node = caption_list.find('track')
f6a54188 533 if original_lang_node is None or original_lang_node.attrib.get('kind') != 'asr' :
e3dc22ca
JMF
534 self._downloader.report_warning(u'Video doesn\'t have automatic captions')
535 return {}
536 original_lang = original_lang_node.attrib['lang_code']
055e6f36
JMF
537
538 sub_lang_list = {}
539 for lang_node in caption_list.findall('target'):
540 sub_lang = lang_node.attrib['lang_code']
541 params = compat_urllib_parse.urlencode({
542 'lang': original_lang,
543 'tlang': sub_lang,
544 'fmt': sub_format,
545 'ts': timestamp,
546 'kind': 'asr',
547 })
548 sub_lang_list[sub_lang] = caption_url + '&' + params
549 return sub_lang_list
de7f3446
JMF
550 # An extractor error can be raise by the download process if there are
551 # no automatic captions but there are subtitles
552 except (KeyError, ExtractorError):
553 self._downloader.report_warning(err_msg)
554 return {}
555
97665381
PH
556 @classmethod
557 def extract_id(cls, url):
558 mobj = re.match(cls._VALID_URL, url, re.VERBOSE)
c5e8d7af
PH
559 if mobj is None:
560 raise ExtractorError(u'Invalid URL: %s' % url)
561 video_id = mobj.group(2)
562 return video_id
563
1d043b93
JMF
564 def _extract_from_m3u8(self, manifest_url, video_id):
565 url_map = {}
566 def _get_urls(_manifest):
567 lines = _manifest.split('\n')
568 urls = filter(lambda l: l and not l.startswith('#'),
569 lines)
570 return urls
571 manifest = self._download_webpage(manifest_url, video_id, u'Downloading formats manifest')
572 formats_urls = _get_urls(manifest)
573 for format_url in formats_urls:
890f62e8 574 itag = self._search_regex(r'itag/(\d+?)/', format_url, 'itag')
1d043b93
JMF
575 url_map[itag] = format_url
576 return url_map
577
1fb07d10
JG
578 def _extract_annotations(self, video_id):
579 url = 'https://www.youtube.com/annotations_invideo?features=1&legacy=1&video_id=%s' % video_id
580 return self._download_webpage(url, video_id, note=u'Searching for annotations.', errnote=u'Unable to download video annotations.')
581
c5e8d7af 582 def _real_extract(self, url):
7e8c0af0
PH
583 proto = (
584 u'http' if self._downloader.params.get('prefer_insecure', False)
585 else u'https')
586
c5e8d7af
PH
587 # Extract original video URL from URL with redirection, like age verification, using next_url parameter
588 mobj = re.search(self._NEXT_URL_RE, url)
589 if mobj:
7e8c0af0 590 url = proto + '://www.youtube.com/' + compat_urllib_parse.unquote(mobj.group(1)).lstrip('/')
97665381 591 video_id = self.extract_id(url)
c5e8d7af
PH
592
593 # Get video webpage
7e8c0af0 594 url = proto + '://www.youtube.com/watch?v=%s&gl=US&hl=en&has_verified=1' % video_id
336c3a69 595 video_webpage = self._download_webpage(url, video_id)
c5e8d7af
PH
596
597 # Attempt to extract SWF player URL
e0df6211 598 mobj = re.search(r'swfConfig.*?"(https?:\\/\\/.*?watch.*?-.*?\.swf)"', video_webpage)
c5e8d7af
PH
599 if mobj is not None:
600 player_url = re.sub(r'\\(.)', r'\1', mobj.group(1))
601 else:
602 player_url = None
603
604 # Get video info
605 self.report_video_info_webpage_download(video_id)
c108eb73
JMF
606 if re.search(r'player-age-gate-content">', video_webpage) is not None:
607 self.report_age_confirmation()
608 age_gate = True
609 # We simulate the access to the video from www.youtube.com/v/{video_id}
610 # this can be viewed without login into Youtube
2c57c7fa
JMF
611 data = compat_urllib_parse.urlencode({
612 'video_id': video_id,
613 'eurl': 'https://youtube.googleapis.com/v/' + video_id,
c084c934
JMF
614 'sts': self._search_regex(
615 r'"sts"\s*:\s*(\d+)', video_webpage, 'sts'),
2c57c7fa 616 })
7e8c0af0 617 video_info_url = proto + '://www.youtube.com/get_video_info?' + data
c5e8d7af
PH
618 video_info_webpage = self._download_webpage(video_info_url, video_id,
619 note=False,
620 errnote='unable to download video info webpage')
621 video_info = compat_parse_qs(video_info_webpage)
c108eb73
JMF
622 else:
623 age_gate = False
624 for el_type in ['&el=embedded', '&el=detailpage', '&el=vevo', '']:
7e8c0af0 625 video_info_url = (proto + '://www.youtube.com/get_video_info?&video_id=%s%s&ps=default&eurl=&gl=US&hl=en'
c108eb73
JMF
626 % (video_id, el_type))
627 video_info_webpage = self._download_webpage(video_info_url, video_id,
628 note=False,
629 errnote='unable to download video info webpage')
630 video_info = compat_parse_qs(video_info_webpage)
631 if 'token' in video_info:
632 break
c5e8d7af
PH
633 if 'token' not in video_info:
634 if 'reason' in video_info:
d11271dd
PH
635 raise ExtractorError(
636 u'YouTube said: %s' % video_info['reason'][0],
637 expected=True, video_id=video_id)
c5e8d7af 638 else:
d11271dd
PH
639 raise ExtractorError(
640 u'"token" parameter not in video info for unknown reason',
641 video_id=video_id)
c5e8d7af 642
1d699755
PH
643 if 'view_count' in video_info:
644 view_count = int(video_info['view_count'][0])
645 else:
646 view_count = None
647
c5e8d7af
PH
648 # Check for "rental" videos
649 if 'ypc_video_rental_bar_text' in video_info and 'author' not in video_info:
650 raise ExtractorError(u'"rental" videos not supported')
651
652 # Start extracting information
653 self.report_information_extraction(video_id)
654
655 # uploader
656 if 'author' not in video_info:
657 raise ExtractorError(u'Unable to extract uploader name')
658 video_uploader = compat_urllib_parse.unquote_plus(video_info['author'][0])
659
660 # uploader_id
661 video_uploader_id = None
662 mobj = re.search(r'<link itemprop="url" href="http://www.youtube.com/(?:user|channel)/([^"]+)">', video_webpage)
663 if mobj is not None:
664 video_uploader_id = mobj.group(1)
665 else:
666 self._downloader.report_warning(u'unable to extract uploader nickname')
667
668 # title
a8c6b241 669 if 'title' in video_info:
aa92f063 670 video_title = video_info['title'][0]
a8c6b241
PH
671 else:
672 self._downloader.report_warning(u'Unable to extract video title')
673 video_title = u'_'
c5e8d7af
PH
674
675 # thumbnail image
7763b04e
JMF
676 # We try first to get a high quality image:
677 m_thumb = re.search(r'<span itemprop="thumbnail".*?href="(.*?)">',
678 video_webpage, re.DOTALL)
679 if m_thumb is not None:
680 video_thumbnail = m_thumb.group(1)
681 elif 'thumbnail_url' not in video_info:
c5e8d7af 682 self._downloader.report_warning(u'unable to extract video thumbnail')
f490e77e 683 video_thumbnail = None
c5e8d7af
PH
684 else: # don't panic if we can't find it
685 video_thumbnail = compat_urllib_parse.unquote_plus(video_info['thumbnail_url'][0])
686
687 # upload date
688 upload_date = None
ad3bc6ac 689 mobj = re.search(r'(?s)id="eow-date.*?>(.*?)</span>', video_webpage)
beee53de
PH
690 if mobj is None:
691 mobj = re.search(
263bd4ec 692 r'(?s)id="watch-uploader-info".*?>.*?(?:Published|Uploaded|Streamed live) on (.*?)</strong>',
beee53de 693 video_webpage)
c5e8d7af
PH
694 if mobj is not None:
695 upload_date = ' '.join(re.sub(r'[/,-]', r' ', mobj.group(1)).split())
696 upload_date = unified_strdate(upload_date)
697
ec8deefc
DG
698 m_cat_container = get_element_by_id("eow-category", video_webpage)
699 if m_cat_container:
ad3bc6ac 700 category = self._html_search_regex(
01ed5c9b 701 r'(?s)<a[^<]+>(.*?)</a>', m_cat_container, 'category',
ad3bc6ac
PH
702 default=None)
703 video_categories = None if category is None else [category]
704 else:
705 video_categories = None
ec8deefc 706
c5e8d7af
PH
707 # description
708 video_description = get_element_by_id("eow-description", video_webpage)
709 if video_description:
27dcce19
PH
710 video_description = re.sub(r'''(?x)
711 <a\s+
712 (?:[a-zA-Z-]+="[^"]+"\s+)*?
713 title="([^"]+)"\s+
714 (?:[a-zA-Z-]+="[^"]+"\s+)*?
715 class="yt-uix-redirect-link"\s*>
716 [^<]+
717 </a>
718 ''', r'\1', video_description)
c5e8d7af
PH
719 video_description = clean_html(video_description)
720 else:
721 fd_mobj = re.search(r'<meta name="description" content="([^"]+)"', video_webpage)
722 if fd_mobj:
723 video_description = unescapeHTML(fd_mobj.group(1))
724 else:
725 video_description = u''
726
336c3a69 727 def _extract_count(klass):
46374a56
PH
728 count = self._search_regex(
729 r'class="%s">([\d,]+)</span>' % re.escape(klass),
730 video_webpage, klass, default=None)
336c3a69
JMF
731 if count is not None:
732 return int(count.replace(',', ''))
733 return None
734 like_count = _extract_count(u'likes-count')
735 dislike_count = _extract_count(u'dislikes-count')
736
c5e8d7af 737 # subtitles
d82134c3 738 video_subtitles = self.extract_subtitles(video_id, video_webpage)
c5e8d7af 739
c5e8d7af 740 if self._downloader.params.get('listsubtitles', False):
d665f8d3 741 self._list_available_subtitles(video_id, video_webpage)
c5e8d7af
PH
742 return
743
744 if 'length_seconds' not in video_info:
745 self._downloader.report_warning(u'unable to extract video duration')
b466b702 746 video_duration = None
c5e8d7af 747 else:
b466b702 748 video_duration = int(compat_urllib_parse.unquote_plus(video_info['length_seconds'][0]))
c5e8d7af 749
1fb07d10
JG
750 # annotations
751 video_annotations = None
752 if self._downloader.params.get('writeannotations', False):
753 video_annotations = self._extract_annotations(video_id)
754
c5e8d7af 755 # Decide which formats to download
c5e8d7af 756 try:
ae7ed920 757 mobj = re.search(r';ytplayer\.config\s*=\s*({.*?});', video_webpage)
50be92c1
PH
758 if not mobj:
759 raise ValueError('Could not find vevo ID')
ae7ed920
PH
760 json_code = uppercase_escape(mobj.group(1))
761 ytplayer_config = json.loads(json_code)
3489b7d2 762 args = ytplayer_config['args']
7ce7e394
JMF
763 # Easy way to know if the 's' value is in url_encoded_fmt_stream_map
764 # this signatures are encrypted
44d46655 765 if 'url_encoded_fmt_stream_map' not in args:
f10503db 766 raise ValueError(u'No stream_map present') # caught below
00fe14fc
JMF
767 re_signature = re.compile(r'[&,]s=')
768 m_s = re_signature.search(args['url_encoded_fmt_stream_map'])
7ce7e394
JMF
769 if m_s is not None:
770 self.to_screen(u'%s: Encrypted signatures detected.' % video_id)
c5e8d7af 771 video_info['url_encoded_fmt_stream_map'] = [args['url_encoded_fmt_stream_map']]
00fe14fc 772 m_s = re_signature.search(args.get('adaptive_fmts', u''))
b7a68384 773 if m_s is not None:
00fe14fc
JMF
774 if 'adaptive_fmts' in video_info:
775 video_info['adaptive_fmts'][0] += ',' + args['adaptive_fmts']
37b6d5f6 776 else:
00fe14fc 777 video_info['adaptive_fmts'] = [args['adaptive_fmts']]
c5e8d7af
PH
778 except ValueError:
779 pass
780
dd27fd17
PH
781 def _map_to_format_list(urlmap):
782 formats = []
783 for itag, video_real_url in urlmap.items():
784 dct = {
785 'format_id': itag,
786 'url': video_real_url,
787 'player_url': player_url,
788 }
0b65e5d4
PH
789 if itag in self._formats:
790 dct.update(self._formats[itag])
dd27fd17
PH
791 formats.append(dct)
792 return formats
793
c5e8d7af
PH
794 if 'conn' in video_info and video_info['conn'][0].startswith('rtmp'):
795 self.report_rtmp_download()
dd27fd17
PH
796 formats = [{
797 'format_id': '_rtmp',
798 'protocol': 'rtmp',
799 'url': video_info['conn'][0],
800 'player_url': player_url,
801 }]
00fe14fc
JMF
802 elif len(video_info.get('url_encoded_fmt_stream_map', [])) >= 1 or len(video_info.get('adaptive_fmts', [])) >= 1:
803 encoded_url_map = video_info.get('url_encoded_fmt_stream_map', [''])[0] + ',' + video_info.get('adaptive_fmts',[''])[0]
804 if 'rtmpe%3Dyes' in encoded_url_map:
a7055eb9 805 raise ExtractorError('rtmpe downloads are not supported, see https://github.com/rg3/youtube-dl/issues/343 for more information.', expected=True)
c5e8d7af 806 url_map = {}
00fe14fc 807 for url_data_str in encoded_url_map.split(','):
c5e8d7af
PH
808 url_data = compat_parse_qs(url_data_str)
809 if 'itag' in url_data and 'url' in url_data:
810 url = url_data['url'][0]
811 if 'sig' in url_data:
812 url += '&signature=' + url_data['sig'][0]
813 elif 's' in url_data:
e0df6211 814 encrypted_sig = url_data['s'][0]
cf010131
PH
815
816 if not age_gate:
817 jsplayer_url_json = self._search_regex(
818 r'"assets":.+?"js":\s*("[^"]+")',
819 video_webpage, u'JS player URL')
820 player_url = json.loads(jsplayer_url_json)
821 if player_url is None:
822 player_url_json = self._search_regex(
823 r'ytplayer\.config.*?"url"\s*:\s*("[^"]+")',
824 video_webpage, u'age gate player URL')
825 player_url = json.loads(player_url_json)
826
769fda3c 827 if self._downloader.params.get('verbose'):
cf010131
PH
828 if player_url is None:
829 player_version = 'unknown'
830 player_desc = 'unknown'
831 else:
832 if player_url.endswith('swf'):
bdde940e 833 player_version = self._search_regex(
b8c74d60 834 r'-(.+?)(?:/watch_as3)?\.swf$', player_url,
bdde940e 835 u'flash player', fatal=False)
cf010131
PH
836 player_desc = 'flash player %s' % player_version
837 else:
838 player_version = self._search_regex(
839 r'html5player-(.+?)\.js', video_webpage,
840 'html5 player', fatal=False)
841 player_desc = u'html5 player %s' % player_version
e0df6211
PH
842
843 parts_sizes = u'.'.join(compat_str(len(part)) for part in encrypted_sig.split('.'))
5a76c651 844 self.to_screen(u'encrypted signature length %d (%s), itag %s, %s' %
e0df6211
PH
845 (len(encrypted_sig), parts_sizes, url_data['itag'][0], player_desc))
846
83799698
PH
847 signature = self._decrypt_signature(
848 encrypted_sig, video_id, player_url, age_gate)
c5e8d7af
PH
849 url += '&signature=' + signature
850 if 'ratebypass' not in url:
851 url += '&ratebypass=yes'
852 url_map[url_data['itag'][0]] = url
dd27fd17 853 formats = _map_to_format_list(url_map)
1d043b93
JMF
854 elif video_info.get('hlsvp'):
855 manifest_url = video_info['hlsvp'][0]
856 url_map = self._extract_from_m3u8(manifest_url, video_id)
dd27fd17 857 formats = _map_to_format_list(url_map)
c5e8d7af 858 else:
9abb3204 859 raise ExtractorError(u'no conn, hlsvp or url_encoded_fmt_stream_map information found in video info')
c5e8d7af 860
dd27fd17 861 # Look for the DASH manifest
d68f0cdb 862 if (self._downloader.params.get('youtube_include_dash_manifest', False)):
dd27fd17 863 try:
d68f0cdb 864 # The DASH manifest used needs to be the one from the original video_webpage.
865 # The one found in get_video_info seems to be using different signatures.
866 # However, in the case of an age restriction there won't be any embedded dashmpd in the video_webpage.
867 # Luckily, it seems, this case uses some kind of default signature (len == 86), so the
868 # combination of get_video_info and the _static_decrypt_signature() decryption fallback will work here.
869 if age_gate:
3489b7d2 870 dash_manifest_url = video_info.get('dashmpd')[0]
d68f0cdb 871 else:
3489b7d2 872 dash_manifest_url = ytplayer_config['args']['dashmpd']
d68f0cdb 873 def decrypt_sig(mobj):
874 s = mobj.group(1)
875 dec_s = self._decrypt_signature(s, video_id, player_url, age_gate)
876 return '/signature/%s' % dec_s
877 dash_manifest_url = re.sub(r'/s/([\w\.]+)', decrypt_sig, dash_manifest_url)
dd27fd17 878 dash_doc = self._download_xml(
d68f0cdb 879 dash_manifest_url, video_id,
dd27fd17
PH
880 note=u'Downloading DASH manifest',
881 errnote=u'Could not download DASH manifest')
882 for r in dash_doc.findall(u'.//{urn:mpeg:DASH:schema:MPD:2011}Representation'):
883 url_el = r.find('{urn:mpeg:DASH:schema:MPD:2011}BaseURL')
884 if url_el is None:
885 continue
886 format_id = r.attrib['id']
887 video_url = url_el.text
888 filesize = int_or_none(url_el.attrib.get('{http://youtube.com/yt/2012/10/10}contentLength'))
889 f = {
890 'format_id': format_id,
891 'url': video_url,
892 'width': int_or_none(r.attrib.get('width')),
893 'tbr': int_or_none(r.attrib.get('bandwidth'), 1000),
894 'asr': int_or_none(r.attrib.get('audioSamplingRate')),
895 'filesize': filesize,
896 }
897 try:
898 existing_format = next(
899 fo for fo in formats
900 if fo['format_id'] == format_id)
901 except StopIteration:
902 f.update(self._formats.get(format_id, {}))
903 formats.append(f)
904 else:
905 existing_format.update(f)
906
907 except (ExtractorError, KeyError) as e:
908 self.report_warning(u'Skipping DASH manifest: %s' % e, video_id)
d80044c2 909
4bcc7bd1 910 self._sort_formats(formats)
4ea3be0a 911
912 return {
913 'id': video_id,
914 'uploader': video_uploader,
915 'uploader_id': video_uploader_id,
916 'upload_date': upload_date,
917 'title': video_title,
918 'thumbnail': video_thumbnail,
919 'description': video_description,
ec8deefc 920 'categories': video_categories,
4ea3be0a 921 'subtitles': video_subtitles,
922 'duration': video_duration,
923 'age_limit': 18 if age_gate else 0,
924 'annotations': video_annotations,
7e8c0af0 925 'webpage_url': proto + '://www.youtube.com/watch?v=%s' % video_id,
4ea3be0a 926 'view_count': view_count,
927 'like_count': like_count,
928 'dislike_count': dislike_count,
929 'formats': formats,
930 }
c5e8d7af 931
880e1c52 932class YoutubePlaylistIE(YoutubeBaseInfoExtractor):
0f818663 933 IE_DESC = u'YouTube.com playlists'
d67cc9fa 934 _VALID_URL = r"""(?x)(?:
c5e8d7af
PH
935 (?:https?://)?
936 (?:\w+\.)?
937 youtube\.com/
938 (?:
939 (?:course|view_play_list|my_playlists|artist|playlist|watch)
940 \? (?:.*?&)*? (?:p|a|list)=
941 | p/
942 )
d67cc9fa 943 (
7d568f5a 944 (?:PL|LL|EC|UU|FL|RD)?[0-9A-Za-z-_]{10,}
d67cc9fa
JMF
945 # Top tracks, they can also include dots
946 |(?:MC)[\w\.]*
947 )
c5e8d7af
PH
948 .*
949 |
7d568f5a 950 ((?:PL|LL|EC|UU|FL|RD)[0-9A-Za-z-_]{10,})
c5e8d7af 951 )"""
dbb94fb0 952 _TEMPLATE_URL = 'https://www.youtube.com/playlist?list=%s'
dcbb4580 953 _MORE_PAGES_INDICATOR = r'data-link-type="next"'
dbb94fb0 954 _VIDEO_RE = r'href="\s*/watch\?v=(?P<id>[0-9A-Za-z_-]{11})&amp;[^"]*?index=(?P<index>\d+)'
c5e8d7af
PH
955 IE_NAME = u'youtube:playlist'
956
880e1c52
JMF
957 def _real_initialize(self):
958 self._login()
959
652cdaa2
JMF
960 def _ids_to_results(self, ids):
961 return [self.url_result(vid_id, 'Youtube', video_id=vid_id)
962 for vid_id in ids]
963
964 def _extract_mix(self, playlist_id):
965 # The mixes are generated from a a single video
966 # the id of the playlist is just 'RD' + video_id
7d4afc55 967 url = 'https://youtube.com/watch?v=%s&list=%s' % (playlist_id[-11:], playlist_id)
652cdaa2 968 webpage = self._download_webpage(url, playlist_id, u'Downloading Youtube mix')
bc2f773b
JMF
969 search_title = lambda class_name: get_element_by_attribute('class', class_name, webpage)
970 title_span = (search_title('playlist-title') or
971 search_title('title long-title') or search_title('title'))
76d1700b 972 title = clean_html(title_span)
70e32269 973 video_re = r'''(?x)data-video-username=".*?".*?
bc2f773b 974 href="/watch\?v=([0-9A-Za-z_-]{11})&amp;[^"]*?list=%s''' % re.escape(playlist_id)
70e32269 975 ids = orderedSet(re.findall(video_re, webpage, flags=re.DOTALL))
652cdaa2
JMF
976 url_results = self._ids_to_results(ids)
977
978 return self.playlist_result(url_results, playlist_id, title)
979
c5e8d7af
PH
980 def _real_extract(self, url):
981 # Extract playlist id
d67cc9fa 982 mobj = re.match(self._VALID_URL, url)
c5e8d7af
PH
983 if mobj is None:
984 raise ExtractorError(u'Invalid URL: %s' % url)
47192f92
FV
985 playlist_id = mobj.group(1) or mobj.group(2)
986
987 # Check if it's a video-specific URL
7c61bd36 988 query_dict = compat_urlparse.parse_qs(compat_urlparse.urlparse(url).query)
47192f92
FV
989 if 'v' in query_dict:
990 video_id = query_dict['v'][0]
991 if self._downloader.params.get('noplaylist'):
992 self.to_screen(u'Downloading just video %s because of --no-playlist' % video_id)
7012b23c 993 return self.url_result(video_id, 'Youtube', video_id=video_id)
47192f92 994 else:
1db26669 995 self.to_screen(u'Downloading playlist %s - add --no-playlist to just download video %s' % (playlist_id, video_id))
c5e8d7af 996
7d4afc55 997 if playlist_id.startswith('RD'):
652cdaa2
JMF
998 # Mixes require a custom extraction process
999 return self._extract_mix(playlist_id)
0a688bc0
JMF
1000 if playlist_id.startswith('TL'):
1001 raise ExtractorError(u'For downloading YouTube.com top lists, use '
1002 u'the "yttoplist" keyword, for example "youtube-dl \'yttoplist:music:Top Tracks\'"', expected=True)
652cdaa2 1003
dbb94fb0
S
1004 url = self._TEMPLATE_URL % playlist_id
1005 page = self._download_webpage(url, playlist_id)
1006 more_widget_html = content_html = page
1007
10c0e2d8 1008 # Check if the playlist exists or is private
e399853d 1009 if re.search(r'<div class="yt-alert-message">[^<]*?(The|This) playlist (does not exist|is private)[^<]*?</div>', page) is not None:
10c0e2d8
JMF
1010 raise ExtractorError(
1011 u'The playlist doesn\'t exist or is private, use --username or '
1012 '--netrc to access it.',
1013 expected=True)
1014
dcbb4580
JMF
1015 # Extract the video ids from the playlist pages
1016 ids = []
c5e8d7af 1017
755eb032 1018 for page_num in itertools.count(1):
dbb94fb0 1019 matches = re.finditer(self._VIDEO_RE, content_html)
6e47b51e
JMF
1020 # We remove the duplicates and the link with index 0
1021 # (it's not the first video of the playlist)
1022 new_ids = orderedSet(m.group('id') for m in matches if m.group('index') != '0')
dcbb4580 1023 ids.extend(new_ids)
c5e8d7af 1024
dbb94fb0
S
1025 mobj = re.search(r'data-uix-load-more-href="/?(?P<more>[^"]+)"', more_widget_html)
1026 if not mobj:
c5e8d7af
PH
1027 break
1028
dbb94fb0 1029 more = self._download_json(
5912c639
PH
1030 'https://youtube.com/%s' % mobj.group('more'), playlist_id,
1031 'Downloading page #%s' % page_num,
1032 transform_source=uppercase_escape)
dbb94fb0
S
1033 content_html = more['content_html']
1034 more_widget_html = more['load_more_widget_html']
1035
1036 playlist_title = self._html_search_regex(
68eb8e90
PH
1037 r'(?s)<h1 class="pl-header-title[^"]*">\s*(.*?)\s*</h1>',
1038 page, u'title')
c5e8d7af 1039
652cdaa2 1040 url_results = self._ids_to_results(ids)
dcbb4580 1041 return self.playlist_result(url_results, playlist_id, playlist_title)
c5e8d7af
PH
1042
1043
0a688bc0
JMF
1044class YoutubeTopListIE(YoutubePlaylistIE):
1045 IE_NAME = u'youtube:toplist'
1046 IE_DESC = (u'YouTube.com top lists, "yttoplist:{channel}:{list title}"'
1047 u' (Example: "yttoplist:music:Top Tracks")')
1048 _VALID_URL = r'yttoplist:(?P<chann>.*?):(?P<title>.*?)$'
1049
1050 def _real_extract(self, url):
1051 mobj = re.match(self._VALID_URL, url)
1052 channel = mobj.group('chann')
1053 title = mobj.group('title')
1054 query = compat_urllib_parse.urlencode({'title': title})
beddbc2a 1055 playlist_re = 'href="([^"]+?%s.*?)"' % re.escape(query)
0a688bc0
JMF
1056 channel_page = self._download_webpage('https://www.youtube.com/%s' % channel, title)
1057 link = self._html_search_regex(playlist_re, channel_page, u'list')
1058 url = compat_urlparse.urljoin('https://www.youtube.com/', link)
1059
1060 video_re = r'data-index="\d+".*?data-video-id="([0-9A-Za-z_-]{11})"'
1061 ids = []
1062 # sometimes the webpage doesn't contain the videos
1063 # retry until we get them
1064 for i in itertools.count(0):
1065 msg = u'Downloading Youtube mix'
1066 if i > 0:
1067 msg += ', retry #%d' % i
1068 webpage = self._download_webpage(url, title, msg)
1069 ids = orderedSet(re.findall(video_re, webpage))
1070 if ids:
1071 break
1072 url_results = self._ids_to_results(ids)
1073 return self.playlist_result(url_results, playlist_title=title)
1074
1075
c5e8d7af 1076class YoutubeChannelIE(InfoExtractor):
0f818663 1077 IE_DESC = u'YouTube.com channels'
c5e8d7af 1078 _VALID_URL = r"^(?:https?://)?(?:youtu\.be|(?:\w+\.)?youtube(?:-nocookie)?\.com)/channel/([0-9A-Za-z_-]+)"
c5e8d7af 1079 _MORE_PAGES_INDICATOR = 'yt-uix-load-more'
38c2e5b8 1080 _MORE_PAGES_URL = 'https://www.youtube.com/c4_browse_ajax?action_load_more_videos=1&flow=list&paging=%s&view=0&sort=da&channel_id=%s'
c5e8d7af
PH
1081 IE_NAME = u'youtube:channel'
1082
1083 def extract_videos_from_page(self, page):
1084 ids_in_page = []
1085 for mobj in re.finditer(r'href="/watch\?v=([0-9A-Za-z_-]+)&?', page):
1086 if mobj.group(1) not in ids_in_page:
1087 ids_in_page.append(mobj.group(1))
1088 return ids_in_page
1089
1090 def _real_extract(self, url):
1091 # Extract channel id
1092 mobj = re.match(self._VALID_URL, url)
1093 if mobj is None:
1094 raise ExtractorError(u'Invalid URL: %s' % url)
1095
1096 # Download channel page
1097 channel_id = mobj.group(1)
1098 video_ids = []
b9643eed
JMF
1099 url = 'https://www.youtube.com/channel/%s/videos' % channel_id
1100 channel_page = self._download_webpage(url, channel_id)
31812a9e
PH
1101 autogenerated = re.search(r'''(?x)
1102 class="[^"]*?(?:
1103 channel-header-autogenerated-label|
1104 yt-channel-title-autogenerated
1105 )[^"]*"''', channel_page) is not None
c5e8d7af 1106
b9643eed
JMF
1107 if autogenerated:
1108 # The videos are contained in a single page
1109 # the ajax pages can't be used, they are empty
1110 video_ids = self.extract_videos_from_page(channel_page)
1111 else:
1112 # Download all channel pages using the json-based channel_ajax query
1113 for pagenum in itertools.count(1):
1114 url = self._MORE_PAGES_URL % (pagenum, channel_id)
81c2f20b
PH
1115 page = self._download_json(
1116 url, channel_id, note=u'Downloading page #%s' % pagenum,
1117 transform_source=uppercase_escape)
1118
b9643eed
JMF
1119 ids_in_page = self.extract_videos_from_page(page['content_html'])
1120 video_ids.extend(ids_in_page)
1121
1122 if self._MORE_PAGES_INDICATOR not in page['load_more_widget_html']:
1123 break
c5e8d7af
PH
1124
1125 self._downloader.to_screen(u'[youtube] Channel %s: Found %i videos' % (channel_id, len(video_ids)))
1126
7012b23c
PH
1127 url_entries = [self.url_result(video_id, 'Youtube', video_id=video_id)
1128 for video_id in video_ids]
1129 return self.playlist_result(url_entries, channel_id)
c5e8d7af
PH
1130
1131
1132class YoutubeUserIE(InfoExtractor):
0f818663 1133 IE_DESC = u'YouTube.com user videos (URL or "ytuser" keyword)'
c9ae7b95 1134 _VALID_URL = r'(?:(?:(?:https?://)?(?:\w+\.)?youtube\.com/(?:user/)?(?!(?:attribution_link|watch|results)(?:$|[^a-z_A-Z0-9-])))|ytuser:)(?!feed/)([A-Za-z0-9_-]+)'
38c2e5b8 1135 _TEMPLATE_URL = 'https://gdata.youtube.com/feeds/api/users/%s'
c5e8d7af 1136 _GDATA_PAGE_SIZE = 50
38c2e5b8 1137 _GDATA_URL = 'https://gdata.youtube.com/feeds/api/users/%s/uploads?max-results=%d&start-index=%d&alt=json'
c5e8d7af
PH
1138 IE_NAME = u'youtube:user'
1139
e3ea4790 1140 @classmethod
f4b05232 1141 def suitable(cls, url):
e3ea4790
JMF
1142 # Don't return True if the url can be extracted with other youtube
1143 # extractor, the regex would is too permissive and it would match.
1144 other_ies = iter(klass for (name, klass) in globals().items() if name.endswith('IE') and klass is not cls)
1145 if any(ie.suitable(url) for ie in other_ies): return False
f4b05232
JMF
1146 else: return super(YoutubeUserIE, cls).suitable(url)
1147
c5e8d7af
PH
1148 def _real_extract(self, url):
1149 # Extract username
1150 mobj = re.match(self._VALID_URL, url)
1151 if mobj is None:
1152 raise ExtractorError(u'Invalid URL: %s' % url)
1153
1154 username = mobj.group(1)
1155
1156 # Download video ids using YouTube Data API. Result size per
1157 # query is limited (currently to 50 videos) so we need to query
1158 # page by page until there are no video ids - it means we got
1159 # all of them.
1160
b7ab0590 1161 def download_page(pagenum):
c5e8d7af
PH
1162 start_index = pagenum * self._GDATA_PAGE_SIZE + 1
1163
1164 gdata_url = self._GDATA_URL % (username, self._GDATA_PAGE_SIZE, start_index)
b7ab0590
PH
1165 page = self._download_webpage(
1166 gdata_url, username,
1167 u'Downloading video ids from %d to %d' % (
1168 start_index, start_index + self._GDATA_PAGE_SIZE))
c5e8d7af 1169
fd9cf738
JMF
1170 try:
1171 response = json.loads(page)
1172 except ValueError as err:
1173 raise ExtractorError(u'Invalid JSON in API response: ' + compat_str(err))
71c82637 1174 if 'entry' not in response['feed']:
b7ab0590 1175 return
fd9cf738 1176
c5e8d7af 1177 # Extract video identifiers
e302f9ce
PH
1178 entries = response['feed']['entry']
1179 for entry in entries:
1180 title = entry['title']['$t']
1181 video_id = entry['id']['$t'].split('/')[-1]
b7ab0590 1182 yield {
e302f9ce
PH
1183 '_type': 'url',
1184 'url': video_id,
1185 'ie_key': 'Youtube',
b11cec41 1186 'id': video_id,
e302f9ce 1187 'title': title,
b7ab0590
PH
1188 }
1189 url_results = PagedList(download_page, self._GDATA_PAGE_SIZE)
c5e8d7af 1190
7012b23c
PH
1191 return self.playlist_result(url_results, playlist_title=username)
1192
b05654f0
PH
1193
1194class YoutubeSearchIE(SearchInfoExtractor):
0f818663 1195 IE_DESC = u'YouTube.com searches'
83d548ef 1196 _API_URL = u'https://gdata.youtube.com/feeds/api/videos?q=%s&start-index=%i&max-results=50&v=2&alt=jsonc'
b05654f0
PH
1197 _MAX_RESULTS = 1000
1198 IE_NAME = u'youtube:search'
1199 _SEARCH_KEY = 'ytsearch'
1200
b05654f0
PH
1201 def _get_n_results(self, query, n):
1202 """Get a specified number of results for a query"""
1203
1204 video_ids = []
1205 pagenum = 0
1206 limit = n
83d548ef 1207 PAGE_SIZE = 50
b05654f0 1208
83d548ef
PH
1209 while (PAGE_SIZE * pagenum) < limit:
1210 result_url = self._API_URL % (
1211 compat_urllib_parse.quote_plus(query.encode('utf-8')),
1212 (PAGE_SIZE * pagenum) + 1)
7cc3570e
PH
1213 data_json = self._download_webpage(
1214 result_url, video_id=u'query "%s"' % query,
1215 note=u'Downloading page %s' % (pagenum + 1),
1216 errnote=u'Unable to download API page')
1217 data = json.loads(data_json)
1218 api_response = data['data']
1219
1220 if 'items' not in api_response:
07ad22b8
PH
1221 raise ExtractorError(
1222 u'[youtube] No video results', expected=True)
b05654f0
PH
1223
1224 new_ids = list(video['id'] for video in api_response['items'])
1225 video_ids += new_ids
1226
1227 limit = min(n, api_response['totalItems'])
1228 pagenum += 1
1229
1230 if len(video_ids) > n:
1231 video_ids = video_ids[:n]
7012b23c
PH
1232 videos = [self.url_result(video_id, 'Youtube', video_id=video_id)
1233 for video_id in video_ids]
b05654f0 1234 return self.playlist_result(videos, query)
75dff0ee 1235
c9ae7b95 1236
a3dd9248 1237class YoutubeSearchDateIE(YoutubeSearchIE):
cb7fb546 1238 IE_NAME = YoutubeSearchIE.IE_NAME + ':date'
a3dd9248
CM
1239 _API_URL = 'https://gdata.youtube.com/feeds/api/videos?q=%s&start-index=%i&max-results=50&v=2&alt=jsonc&orderby=published'
1240 _SEARCH_KEY = 'ytsearchdate'
08fb86c4 1241 IE_DESC = u'YouTube.com searches, newest videos first'
75dff0ee 1242
c9ae7b95
PH
1243
1244class YoutubeSearchURLIE(InfoExtractor):
1245 IE_DESC = u'YouTube.com search URLs'
1246 IE_NAME = u'youtube:search_url'
1247 _VALID_URL = r'https?://(?:www\.)?youtube\.com/results\?(.*?&)?search_query=(?P<query>[^&]+)(?:[&]|$)'
1248
1249 def _real_extract(self, url):
1250 mobj = re.match(self._VALID_URL, url)
1251 query = compat_urllib_parse.unquote_plus(mobj.group('query'))
1252
1253 webpage = self._download_webpage(url, query)
1254 result_code = self._search_regex(
6feb2d5e 1255 r'(?s)<ol class="item-section"(.*?)</ol>', webpage, u'result HTML')
c9ae7b95
PH
1256
1257 part_codes = re.findall(
1258 r'(?s)<h3 class="yt-lockup-title">(.*?)</h3>', result_code)
1259 entries = []
1260 for part_code in part_codes:
1261 part_title = self._html_search_regex(
6feb2d5e 1262 [r'(?s)title="([^"]+)"', r'>([^<]+)</a>'], part_code, 'item title', fatal=False)
c9ae7b95
PH
1263 part_url_snippet = self._html_search_regex(
1264 r'(?s)href="([^"]+)"', part_code, 'item URL')
1265 part_url = compat_urlparse.urljoin(
1266 'https://www.youtube.com/', part_url_snippet)
1267 entries.append({
1268 '_type': 'url',
1269 'url': part_url,
1270 'title': part_title,
1271 })
1272
1273 return {
1274 '_type': 'playlist',
1275 'entries': entries,
1276 'title': query,
1277 }
1278
1279
75dff0ee 1280class YoutubeShowIE(InfoExtractor):
0f818663 1281 IE_DESC = u'YouTube.com (multi-season) shows'
75dff0ee
JMF
1282 _VALID_URL = r'https?://www\.youtube\.com/show/(.*)'
1283 IE_NAME = u'youtube:show'
1284
1285 def _real_extract(self, url):
1286 mobj = re.match(self._VALID_URL, url)
1287 show_name = mobj.group(1)
1288 webpage = self._download_webpage(url, show_name, u'Downloading show webpage')
1289 # There's one playlist for each season of the show
1290 m_seasons = list(re.finditer(r'href="(/playlist\?list=.*?)"', webpage))
1291 self.to_screen(u'%s: Found %s seasons' % (show_name, len(m_seasons)))
1292 return [self.url_result('https://www.youtube.com' + season.group(1), 'YoutubePlaylist') for season in m_seasons]
04cc9617
JMF
1293
1294
b2e8bc1b 1295class YoutubeFeedsInfoExtractor(YoutubeBaseInfoExtractor):
d7ae0639
JMF
1296 """
1297 Base class for extractors that fetch info from
1298 http://www.youtube.com/feed_ajax
1299 Subclasses must define the _FEED_NAME and _PLAYLIST_TITLE properties.
1300 """
b2e8bc1b 1301 _LOGIN_REQUIRED = True
43ba5456
JMF
1302 # use action_load_personal_feed instead of action_load_system_feed
1303 _PERSONAL_FEED = False
04cc9617 1304
d7ae0639
JMF
1305 @property
1306 def _FEED_TEMPLATE(self):
43ba5456
JMF
1307 action = 'action_load_system_feed'
1308 if self._PERSONAL_FEED:
1309 action = 'action_load_personal_feed'
38c2e5b8 1310 return 'https://www.youtube.com/feed_ajax?%s=1&feed_name=%s&paging=%%s' % (action, self._FEED_NAME)
d7ae0639
JMF
1311
1312 @property
1313 def IE_NAME(self):
1314 return u'youtube:%s' % self._FEED_NAME
04cc9617 1315
81f0259b 1316 def _real_initialize(self):
b2e8bc1b 1317 self._login()
81f0259b 1318
04cc9617
JMF
1319 def _real_extract(self, url):
1320 feed_entries = []
0e44d838
JMF
1321 paging = 0
1322 for i in itertools.count(1):
f6177462 1323 info = self._download_json(self._FEED_TEMPLATE % paging,
d7ae0639 1324 u'%s feed' % self._FEED_NAME,
04cc9617 1325 u'Downloading page %s' % i)
f6177462 1326 feed_html = info.get('feed_html') or info.get('content_html')
43ba5456 1327 m_ids = re.finditer(r'"/watch\?v=(.*?)["&]', feed_html)
04cc9617 1328 ids = orderedSet(m.group(1) for m in m_ids)
7012b23c
PH
1329 feed_entries.extend(
1330 self.url_result(video_id, 'Youtube', video_id=video_id)
1331 for video_id in ids)
05ee2b6d
JMF
1332 mobj = re.search(
1333 r'data-uix-load-more-href="/?[^"]+paging=(?P<paging>\d+)',
1334 feed_html)
1335 if mobj is None:
04cc9617 1336 break
05ee2b6d 1337 paging = mobj.group('paging')
d7ae0639
JMF
1338 return self.playlist_result(feed_entries, playlist_title=self._PLAYLIST_TITLE)
1339
1340class YoutubeSubscriptionsIE(YoutubeFeedsInfoExtractor):
e45d40b1 1341 IE_DESC = u'YouTube.com subscriptions feed, "ytsubs" keyword (requires authentication)'
d7ae0639
JMF
1342 _VALID_URL = r'https?://www\.youtube\.com/feed/subscriptions|:ytsubs(?:criptions)?'
1343 _FEED_NAME = 'subscriptions'
1344 _PLAYLIST_TITLE = u'Youtube Subscriptions'
1345
1346class YoutubeRecommendedIE(YoutubeFeedsInfoExtractor):
1347 IE_DESC = u'YouTube.com recommended videos, "ytrec" keyword (requires authentication)'
1348 _VALID_URL = r'https?://www\.youtube\.com/feed/recommended|:ytrec(?:ommended)?'
1349 _FEED_NAME = 'recommended'
1350 _PLAYLIST_TITLE = u'Youtube Recommended videos'
c626a3d9 1351
43ba5456
JMF
1352class YoutubeWatchLaterIE(YoutubeFeedsInfoExtractor):
1353 IE_DESC = u'Youtube watch later list, "ytwatchlater" keyword (requires authentication)'
1354 _VALID_URL = r'https?://www\.youtube\.com/feed/watch_later|:ytwatchlater'
1355 _FEED_NAME = 'watch_later'
1356 _PLAYLIST_TITLE = u'Youtube Watch Later'
43ba5456 1357 _PERSONAL_FEED = True
c626a3d9 1358
f459d170
JMF
1359class YoutubeHistoryIE(YoutubeFeedsInfoExtractor):
1360 IE_DESC = u'Youtube watch history, "ythistory" keyword (requires authentication)'
1361 _VALID_URL = u'https?://www\.youtube\.com/feed/history|:ythistory'
1362 _FEED_NAME = 'history'
1363 _PERSONAL_FEED = True
1364 _PLAYLIST_TITLE = u'Youtube Watch History'
1365
c626a3d9
JMF
1366class YoutubeFavouritesIE(YoutubeBaseInfoExtractor):
1367 IE_NAME = u'youtube:favorites'
1368 IE_DESC = u'YouTube.com favourite videos, "ytfav" keyword (requires authentication)'
c7a7750d 1369 _VALID_URL = r'https?://www\.youtube\.com/my_favorites|:ytfav(?:ou?rites)?'
c626a3d9
JMF
1370 _LOGIN_REQUIRED = True
1371
1372 def _real_extract(self, url):
1373 webpage = self._download_webpage('https://www.youtube.com/my_favorites', 'Youtube Favourites videos')
1374 playlist_id = self._search_regex(r'list=(.+?)["&]', webpage, u'favourites playlist id')
1375 return self.url_result(playlist_id, 'YoutubePlaylist')
15870e90
PH
1376
1377
1378class YoutubeTruncatedURLIE(InfoExtractor):
1379 IE_NAME = 'youtube:truncated_url'
1380 IE_DESC = False # Do not list
975d35db 1381 _VALID_URL = r'''(?x)
c4808c60
PH
1382 (?:https?://)?[^/]+/watch\?(?:
1383 feature=[a-z_]+|
1384 annotation_id=annotation_[^&]+
1385 )?$|
975d35db
PH
1386 (?:https?://)?(?:www\.)?youtube\.com/attribution_link\?a=[^&]+$
1387 '''
15870e90 1388
c4808c60
PH
1389 _TESTS = [{
1390 'url': 'http://www.youtube.com/watch?annotation_id=annotation_3951667041',
1391 'only_matching': True,
dc2fc736
PH
1392 }, {
1393 'url': 'http://www.youtube.com/watch?',
1394 'only_matching': True,
c4808c60
PH
1395 }]
1396
15870e90
PH
1397 def _real_extract(self, url):
1398 raise ExtractorError(
1399 u'Did you forget to quote the URL? Remember that & is a meta '
1400 u'character in most shells, so you want to put the URL in quotes, '
1401 u'like youtube-dl '
b4622a32
PH
1402 u'"http://www.youtube.com/watch?feature=foo&v=BaW_jenozKc" '
1403 u' or simply youtube-dl BaW_jenozKc .',
15870e90 1404 expected=True)