]> jfr.im git - yt-dlp.git/blame - youtube_dl/extractor/youtube.py
Merge branch 'naglis-izlesene'
[yt-dlp.git] / youtube_dl / extractor / youtube.py
CommitLineData
c5e8d7af 1# coding: utf-8
c5e8d7af 2
edf3e38e 3import errno
e0df6211 4import io
0ca96d48 5import itertools
c5e8d7af 6import json
c4417ddb 7import os.path
c5e8d7af 8import re
e0df6211 9import traceback
c5e8d7af 10
b05654f0 11from .common import InfoExtractor, SearchInfoExtractor
54d39d8b 12from .subtitles import SubtitlesInfoExtractor
2b25cb5d 13from ..jsinterp import JSInterpreter
54256267 14from ..swfinterp import SWFInterpreter
c5e8d7af 15from ..utils import (
edf3e38e 16 compat_chr,
c5e8d7af 17 compat_parse_qs,
c5e8d7af
PH
18 compat_urllib_parse,
19 compat_urllib_request,
7c61bd36 20 compat_urlparse,
c5e8d7af
PH
21 compat_str,
22
23 clean_html,
c38b1e77 24 get_cachedir,
c5e8d7af 25 get_element_by_id,
652cdaa2 26 get_element_by_attribute,
c5e8d7af 27 ExtractorError,
dd27fd17 28 int_or_none,
b7ab0590 29 PagedList,
c5e8d7af
PH
30 unescapeHTML,
31 unified_strdate,
04cc9617 32 orderedSet,
edf3e38e 33 write_json_file,
81c2f20b 34 uppercase_escape,
c5e8d7af
PH
35)
36
de7f3446 37class YoutubeBaseInfoExtractor(InfoExtractor):
b2e8bc1b
JMF
38 """Provide base functions for Youtube extractors"""
39 _LOGIN_URL = 'https://accounts.google.com/ServiceLogin'
40 _LANG_URL = r'https://www.youtube.com/?hl=en&persist_hl=1&gl=US&persist_gl=1&opt_out_ackd=1'
38c2e5b8 41 _AGE_URL = 'https://www.youtube.com/verify_age?next_url=/&gl=US&hl=en'
b2e8bc1b
JMF
42 _NETRC_MACHINE = 'youtube'
43 # If True it will raise an error if no login info is provided
44 _LOGIN_REQUIRED = False
45
b2e8bc1b 46 def _set_language(self):
7cc3570e
PH
47 return bool(self._download_webpage(
48 self._LANG_URL, None,
49 note=u'Setting language', errnote='unable to set language',
50 fatal=False))
b2e8bc1b
JMF
51
52 def _login(self):
53 (username, password) = self._get_login_info()
54 # No authentication to be performed
55 if username is None:
56 if self._LOGIN_REQUIRED:
57 raise ExtractorError(u'No login info available, needed for using %s.' % self.IE_NAME, expected=True)
58 return False
59
7cc3570e
PH
60 login_page = self._download_webpage(
61 self._LOGIN_URL, None,
62 note=u'Downloading login page',
63 errnote=u'unable to fetch login page', fatal=False)
64 if login_page is False:
65 return
b2e8bc1b 66
795f28f8
PH
67 galx = self._search_regex(r'(?s)<input.+?name="GALX".+?value="(.+?)"',
68 login_page, u'Login GALX parameter')
c5e8d7af 69
b2e8bc1b
JMF
70 # Log in
71 login_form_strs = {
72 u'continue': u'https://www.youtube.com/signin?action_handle_signin=true&feature=sign_in_button&hl=en_US&nomobiletemp=1',
73 u'Email': username,
74 u'GALX': galx,
75 u'Passwd': password,
76 u'PersistentCookie': u'yes',
77 u'_utf8': u'霱',
78 u'bgresponse': u'js_disabled',
79 u'checkConnection': u'',
80 u'checkedDomains': u'youtube',
81 u'dnConn': u'',
b2e8bc1b
JMF
82 u'pstMsg': u'0',
83 u'rmShown': u'1',
84 u'secTok': u'',
85 u'signIn': u'Sign in',
86 u'timeStmp': u'',
87 u'service': u'youtube',
88 u'uilel': u'3',
89 u'hl': u'en_US',
90 }
91 # Convert to UTF-8 *before* urlencode because Python 2.x's urlencode
92 # chokes on unicode
93 login_form = dict((k.encode('utf-8'), v.encode('utf-8')) for k,v in login_form_strs.items())
94 login_data = compat_urllib_parse.urlencode(login_form).encode('ascii')
7cc3570e
PH
95
96 req = compat_urllib_request.Request(self._LOGIN_URL, login_data)
97 login_results = self._download_webpage(
98 req, None,
99 note=u'Logging in', errnote=u'unable to log in', fatal=False)
100 if login_results is False:
101 return False
102 if re.search(r'(?i)<form[^>]* id="gaia_loginform"', login_results) is not None:
103 self._downloader.report_warning(u'unable to log in: bad username or password')
b2e8bc1b
JMF
104 return False
105 return True
106
107 def _confirm_age(self):
108 age_form = {
7cc3570e
PH
109 'next_url': '/',
110 'action_confirm': 'Confirm',
111 }
5700e779
JMF
112 req = compat_urllib_request.Request(self._AGE_URL,
113 compat_urllib_parse.urlencode(age_form).encode('ascii'))
7cc3570e
PH
114
115 self._download_webpage(
116 req, None,
117 note=u'Confirming age', errnote=u'Unable to confirm age')
b2e8bc1b
JMF
118 return True
119
120 def _real_initialize(self):
121 if self._downloader is None:
122 return
123 if not self._set_language():
124 return
125 if not self._login():
126 return
127 self._confirm_age()
c5e8d7af 128
8377574c 129
de7f3446 130class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor):
0f818663 131 IE_DESC = u'YouTube.com'
cb7dfeea 132 _VALID_URL = r"""(?x)^
c5e8d7af 133 (
83aa5293 134 (?:https?://|//)? # http(s):// or protocol-independent URL (optional)
cb7dfeea 135 (?:(?:(?:(?:\w+\.)?[yY][oO][uU][tT][uU][bB][eE](?:-nocookie)?\.com/|
484aaeb2 136 (?:www\.)?deturl\.com/www\.youtube\.com/|
e70dc1d1 137 (?:www\.)?pwnyoutube\.com/|
f7000f3a 138 (?:www\.)?yourepeat\.com/|
e69ae5b9
JMF
139 tube\.majestyc\.net/|
140 youtube\.googleapis\.com/) # the various hostnames, with wildcard subdomains
c5e8d7af
PH
141 (?:.*?\#/)? # handle anchor (#/) redirect urls
142 (?: # the various things that can precede the ID:
143 (?:(?:v|embed|e)/) # v/ or embed/ or e/
144 |(?: # or the v= param in all its forms
f7000f3a 145 (?:(?:watch|movie)(?:_popup)?(?:\.php)?/?)? # preceding watch(_popup|.php) or nothing (like /?v=xxxx)
c5e8d7af
PH
146 (?:\?|\#!?) # the params delimiter ? or # or #!
147 (?:.*?&)? # any other preceding param (like /?s=tuff&v=xxxx)
148 v=
149 )
f4b05232
JMF
150 ))
151 |youtu\.be/ # just youtu.be/xxxx
b9c76aa1 152 |https?://(?:www\.)?cleanvideosearch\.com/media/action/yt/watch\?videoId=
f4b05232 153 )
c5e8d7af 154 )? # all until now is optional -> you can pass the naked ID
8963d9c2 155 ([0-9A-Za-z_-]{11}) # here is it! the YouTube video ID
c5e8d7af
PH
156 (?(1).+)? # if we found the ID, everything can follow
157 $"""
c5e8d7af 158 _NEXT_URL_RE = r'[\?&]next_url=([^&]+)'
2c62dc26
PH
159 _formats = {
160 '5': {'ext': 'flv', 'width': 400, 'height': 240},
161 '6': {'ext': 'flv', 'width': 450, 'height': 270},
162 '13': {'ext': '3gp'},
163 '17': {'ext': '3gp', 'width': 176, 'height': 144},
164 '18': {'ext': 'mp4', 'width': 640, 'height': 360},
165 '22': {'ext': 'mp4', 'width': 1280, 'height': 720},
166 '34': {'ext': 'flv', 'width': 640, 'height': 360},
167 '35': {'ext': 'flv', 'width': 854, 'height': 480},
168 '36': {'ext': '3gp', 'width': 320, 'height': 240},
169 '37': {'ext': 'mp4', 'width': 1920, 'height': 1080},
170 '38': {'ext': 'mp4', 'width': 4096, 'height': 3072},
171 '43': {'ext': 'webm', 'width': 640, 'height': 360},
172 '44': {'ext': 'webm', 'width': 854, 'height': 480},
173 '45': {'ext': 'webm', 'width': 1280, 'height': 720},
174 '46': {'ext': 'webm', 'width': 1920, 'height': 1080},
175
1d043b93 176
86fe61c8 177 # 3d videos
43b81eb9
PH
178 '82': {'ext': 'mp4', 'height': 360, 'format_note': '3D', 'preference': -20},
179 '83': {'ext': 'mp4', 'height': 480, 'format_note': '3D', 'preference': -20},
180 '84': {'ext': 'mp4', 'height': 720, 'format_note': '3D', 'preference': -20},
181 '85': {'ext': 'mp4', 'height': 1080, 'format_note': '3D', 'preference': -20},
182 '100': {'ext': 'webm', 'height': 360, 'format_note': '3D', 'preference': -20},
183 '101': {'ext': 'webm', 'height': 480, 'format_note': '3D', 'preference': -20},
184 '102': {'ext': 'webm', 'height': 720, 'format_note': '3D', 'preference': -20},
836a086c 185
96fb5605 186 # Apple HTTP Live Streaming
43b81eb9
PH
187 '92': {'ext': 'mp4', 'height': 240, 'format_note': 'HLS', 'preference': -10},
188 '93': {'ext': 'mp4', 'height': 360, 'format_note': 'HLS', 'preference': -10},
189 '94': {'ext': 'mp4', 'height': 480, 'format_note': 'HLS', 'preference': -10},
190 '95': {'ext': 'mp4', 'height': 720, 'format_note': 'HLS', 'preference': -10},
191 '96': {'ext': 'mp4', 'height': 1080, 'format_note': 'HLS', 'preference': -10},
192 '132': {'ext': 'mp4', 'height': 240, 'format_note': 'HLS', 'preference': -10},
193 '151': {'ext': 'mp4', 'height': 72, 'format_note': 'HLS', 'preference': -10},
2c62dc26
PH
194
195 # DASH mp4 video
43b81eb9
PH
196 '133': {'ext': 'mp4', 'height': 240, 'format_note': 'DASH video', 'acodec': 'none', 'preference': -40},
197 '134': {'ext': 'mp4', 'height': 360, 'format_note': 'DASH video', 'acodec': 'none', 'preference': -40},
198 '135': {'ext': 'mp4', 'height': 480, 'format_note': 'DASH video', 'acodec': 'none', 'preference': -40},
199 '136': {'ext': 'mp4', 'height': 720, 'format_note': 'DASH video', 'acodec': 'none', 'preference': -40},
200 '137': {'ext': 'mp4', 'height': 1080, 'format_note': 'DASH video', 'acodec': 'none', 'preference': -40},
201 '138': {'ext': 'mp4', 'height': 2160, 'format_note': 'DASH video', 'acodec': 'none', 'preference': -40},
202 '160': {'ext': 'mp4', 'height': 144, 'format_note': 'DASH video', 'acodec': 'none', 'preference': -40},
203 '264': {'ext': 'mp4', 'height': 1440, 'format_note': 'DASH video', 'acodec': 'none', 'preference': -40},
836a086c 204
f6f1fc92 205 # Dash mp4 audio
2c62dc26
PH
206 '139': {'ext': 'm4a', 'format_note': 'DASH audio', 'vcodec': 'none', 'abr': 48, 'preference': -50},
207 '140': {'ext': 'm4a', 'format_note': 'DASH audio', 'vcodec': 'none', 'abr': 128, 'preference': -50},
208 '141': {'ext': 'm4a', 'format_note': 'DASH audio', 'vcodec': 'none', 'abr': 256, 'preference': -50},
836a086c
AZ
209
210 # Dash webm
e75cafe9
A
211 '167': {'ext': 'webm', 'height': 360, 'width': 640, 'format_note': 'DASH video', 'acodec': 'none', 'container': 'webm', 'vcodec': 'VP8', 'preference': -40},
212 '168': {'ext': 'webm', 'height': 480, 'width': 854, 'format_note': 'DASH video', 'acodec': 'none', 'container': 'webm', 'vcodec': 'VP8', 'preference': -40},
213 '169': {'ext': 'webm', 'height': 720, 'width': 1280, 'format_note': 'DASH video', 'acodec': 'none', 'container': 'webm', 'vcodec': 'VP8', 'preference': -40},
214 '170': {'ext': 'webm', 'height': 1080, 'width': 1920, 'format_note': 'DASH video', 'acodec': 'none', 'container': 'webm', 'vcodec': 'VP8', 'preference': -40},
215 '218': {'ext': 'webm', 'height': 480, 'width': 854, 'format_note': 'DASH video', 'acodec': 'none', 'container': 'webm', 'vcodec': 'VP8', 'preference': -40},
216 '219': {'ext': 'webm', 'height': 480, 'width': 854, 'format_note': 'DASH video', 'acodec': 'none', 'container': 'webm', 'vcodec': 'VP8', 'preference': -40},
217 '242': {'ext': 'webm', 'height': 240, 'format_note': 'DASH video', 'acodec': 'none', 'preference': -40},
218 '243': {'ext': 'webm', 'height': 360, 'format_note': 'DASH video', 'acodec': 'none', 'preference': -40},
219 '244': {'ext': 'webm', 'height': 480, 'format_note': 'DASH video', 'acodec': 'none', 'preference': -40},
220 '245': {'ext': 'webm', 'height': 480, 'format_note': 'DASH video', 'acodec': 'none', 'preference': -40},
221 '246': {'ext': 'webm', 'height': 480, 'format_note': 'DASH video', 'acodec': 'none', 'preference': -40},
222 '247': {'ext': 'webm', 'height': 720, 'format_note': 'DASH video', 'acodec': 'none', 'preference': -40},
223 '248': {'ext': 'webm', 'height': 1080, 'format_note': 'DASH video', 'acodec': 'none', 'preference': -40},
3c80377b 224 '271': {'ext': 'webm', 'height': 1440, 'format_note': 'DASH video', 'acodec': 'none', 'preference': -40},
18061bba 225 '272': {'ext': 'webm', 'height': 2160, 'format_note': 'DASH video', 'acodec': 'none', 'preference': -40},
2c62dc26
PH
226
227 # Dash webm audio
e75cafe9
A
228 '171': {'ext': 'webm', 'vcodec': 'none', 'format_note': 'DASH audio', 'abr': 48, 'preference': -50},
229 '172': {'ext': 'webm', 'vcodec': 'none', 'format_note': 'DASH audio', 'abr': 256, 'preference': -50},
ce6b9a2d
PH
230
231 # RTMP (unnamed)
232 '_rtmp': {'protocol': 'rtmp'},
c5e8d7af 233 }
836a086c 234
c5e8d7af 235 IE_NAME = u'youtube'
2eb88d95
PH
236 _TESTS = [
237 {
0e853ca4
PH
238 u"url": u"http://www.youtube.com/watch?v=BaW_jenozKc",
239 u"file": u"BaW_jenozKc.mp4",
240 u"info_dict": {
241 u"title": u"youtube-dl test video \"'/\\ä↭𝕐",
242 u"uploader": u"Philipp Hagemeister",
243 u"uploader_id": u"phihag",
244 u"upload_date": u"20121002",
ad3bc6ac
PH
245 u"description": u"test chars: \"'/\\ä↭𝕐\ntest URL: https://github.com/rg3/youtube-dl/issues/1892\n\nThis is a test video for youtube-dl.\n\nFor more information, contact phihag@phihag.de .",
246 u"categories": [u'Science & Technology'],
2eb88d95 247 }
0e853ca4 248 },
0e853ca4
PH
249 {
250 u"url": u"http://www.youtube.com/watch?v=UxxajLWwzqY",
251 u"file": u"UxxajLWwzqY.mp4",
252 u"note": u"Test generic use_cipher_signature video (#897)",
253 u"info_dict": {
254 u"upload_date": u"20120506",
255 u"title": u"Icona Pop - I Love It (feat. Charli XCX) [OFFICIAL VIDEO]",
ba60a3eb 256 u"description": u"md5:fea86fda2d5a5784273df5c7cc994d9f",
45ed795c 257 u"uploader": u"Icona Pop",
0e853ca4 258 u"uploader_id": u"IconaPop"
2eb88d95 259 }
c108eb73
JMF
260 },
261 {
262 u"url": u"https://www.youtube.com/watch?v=07FYdnEawAQ",
263 u"file": u"07FYdnEawAQ.mp4",
264 u"note": u"Test VEVO video with age protection (#956)",
265 u"info_dict": {
266 u"upload_date": u"20130703",
267 u"title": u"Justin Timberlake - Tunnel Vision (Explicit)",
268 u"description": u"md5:64249768eec3bc4276236606ea996373",
269 u"uploader": u"justintimberlakeVEVO",
270 u"uploader_id": u"justintimberlakeVEVO"
271 }
272 },
fccd3771 273 {
83aa5293 274 u"url": u"//www.YouTube.com/watch?v=yZIXLfi8CZQ",
fccd3771
PH
275 u"file": u"yZIXLfi8CZQ.mp4",
276 u"note": u"Embed-only video (#1746)",
277 u"info_dict": {
278 u"upload_date": u"20120608",
279 u"title": u"Principal Sexually Assaults A Teacher - Episode 117 - 8th June 2012",
280 u"description": u"md5:09b78bd971f1e3e289601dfba15ca4f7",
281 u"uploader": u"SET India",
282 u"uploader_id": u"setindia"
283 }
284 },
dd27fd17
PH
285 {
286 u"url": u"http://www.youtube.com/watch?v=a9LDPn-MO4I",
287 u"file": u"a9LDPn-MO4I.m4a",
288 u"note": u"256k DASH audio (format 141) via DASH manifest",
dd27fd17
PH
289 u"info_dict": {
290 u"upload_date": "20121002",
291 u"uploader_id": "8KVIDEO",
292 u"description": "No description available.",
293 u"uploader": "8KVIDEO",
294 u"title": "UHDTV TEST 8K VIDEO.mp4"
4919603f
PH
295 },
296 u"params": {
297 u"youtube_include_dash_manifest": True,
298 u"format": "141",
299 },
dd27fd17 300 },
3489b7d2
JMF
301 # DASH manifest with encrypted signature
302 {
303 u'url': u'https://www.youtube.com/watch?v=IB3lcPjvWLA',
304 u'info_dict': {
305 u'id': u'IB3lcPjvWLA',
306 u'ext': u'm4a',
307 u'title': u'Afrojack - The Spark ft. Spree Wilson',
e00c9cf5 308 u'description': u'md5:9717375db5a9a3992be4668bbf3bc0a8',
3489b7d2
JMF
309 u'uploader': u'AfrojackVEVO',
310 u'uploader_id': u'AfrojackVEVO',
311 u'upload_date': u'20131011',
312 },
313 u"params": {
314 u'youtube_include_dash_manifest': True,
315 u'format': '141',
316 },
317 },
2eb88d95
PH
318 ]
319
c5e8d7af
PH
320
321 @classmethod
322 def suitable(cls, url):
323 """Receives a URL and returns True if suitable for this IE."""
e3ea4790 324 if YoutubePlaylistIE.suitable(url): return False
fccd3771 325 return re.match(cls._VALID_URL, url) is not None
c5e8d7af 326
e0df6211
PH
327 def __init__(self, *args, **kwargs):
328 super(YoutubeIE, self).__init__(*args, **kwargs)
83799698 329 self._player_cache = {}
e0df6211 330
c5e8d7af
PH
331 def report_video_info_webpage_download(self, video_id):
332 """Report attempt to download video info webpage."""
333 self.to_screen(u'%s: Downloading video info webpage' % video_id)
334
c5e8d7af
PH
335 def report_information_extraction(self, video_id):
336 """Report attempt to extract video information."""
337 self.to_screen(u'%s: Extracting video information' % video_id)
338
339 def report_unavailable_format(self, video_id, format):
340 """Report extracted video URL."""
341 self.to_screen(u'%s: Format %s not available' % (video_id, format))
342
343 def report_rtmp_download(self):
344 """Indicate the download will use the RTMP protocol."""
345 self.to_screen(u'RTMP download detected')
346
c4417ddb 347 def _extract_signature_function(self, video_id, player_url, slen):
cf010131 348 id_m = re.match(
c081b35c 349 r'.*-(?P<id>[a-zA-Z0-9_-]+)(?:/watch_as3|/html5player)?\.(?P<ext>[a-z]+)$',
cf010131 350 player_url)
c081b35c
PH
351 if not id_m:
352 raise ExtractorError('Cannot identify player %r' % player_url)
e0df6211
PH
353 player_type = id_m.group('ext')
354 player_id = id_m.group('id')
355
c4417ddb
PH
356 # Read from filesystem cache
357 func_id = '%s_%s_%d' % (player_type, player_id, slen)
358 assert os.path.basename(func_id) == func_id
c38b1e77 359 cache_dir = get_cachedir(self._downloader.params)
c4417ddb 360
c3c88a26 361 cache_enabled = cache_dir is not None
f8061589 362 if cache_enabled:
c4417ddb
PH
363 cache_fn = os.path.join(os.path.expanduser(cache_dir),
364 u'youtube-sigfuncs',
365 func_id + '.json')
366 try:
edf3e38e 367 with io.open(cache_fn, 'r', encoding='utf-8') as cachef:
c4417ddb
PH
368 cache_spec = json.load(cachef)
369 return lambda s: u''.join(s[i] for i in cache_spec)
edf3e38e 370 except IOError:
c4417ddb 371 pass # No cache available
83799698 372
e0df6211
PH
373 if player_type == 'js':
374 code = self._download_webpage(
375 player_url, video_id,
83799698 376 note=u'Downloading %s player %s' % (player_type, player_id),
e0df6211 377 errnote=u'Download of %s failed' % player_url)
83799698 378 res = self._parse_sig_js(code)
c4417ddb 379 elif player_type == 'swf':
e0df6211
PH
380 urlh = self._request_webpage(
381 player_url, video_id,
83799698 382 note=u'Downloading %s player %s' % (player_type, player_id),
e0df6211
PH
383 errnote=u'Download of %s failed' % player_url)
384 code = urlh.read()
83799698 385 res = self._parse_sig_swf(code)
e0df6211
PH
386 else:
387 assert False, 'Invalid player type %r' % player_type
388
f8061589 389 if cache_enabled:
edf3e38e 390 try:
c705320f
PH
391 test_string = u''.join(map(compat_chr, range(slen)))
392 cache_res = res(test_string)
edf3e38e
PH
393 cache_spec = [ord(c) for c in cache_res]
394 try:
395 os.makedirs(os.path.dirname(cache_fn))
396 except OSError as ose:
397 if ose.errno != errno.EEXIST:
398 raise
399 write_json_file(cache_spec, cache_fn)
0ca96d48 400 except Exception:
edf3e38e
PH
401 tb = traceback.format_exc()
402 self._downloader.report_warning(
403 u'Writing cache to %r failed: %s' % (cache_fn, tb))
83799698
PH
404
405 return res
406
edf3e38e
PH
407 def _print_sig_code(self, func, slen):
408 def gen_sig_code(idxs):
409 def _genslice(start, end, step):
410 starts = u'' if start == 0 else str(start)
e35e4ddc
PH
411 ends = (u':%d' % (end+step)) if end + step >= 0 else u':'
412 steps = u'' if step == 1 else (u':%d' % step)
edf3e38e
PH
413 return u's[%s%s%s]' % (starts, ends, steps)
414
415 step = None
0ca96d48
PH
416 start = '(Never used)' # Quelch pyflakes warnings - start will be
417 # set as soon as step is set
edf3e38e
PH
418 for i, prev in zip(idxs[1:], idxs[:-1]):
419 if step is not None:
420 if i - prev == step:
421 continue
422 yield _genslice(start, prev, step)
423 step = None
424 continue
425 if i - prev in [-1, 1]:
426 step = i - prev
427 start = prev
428 continue
429 else:
430 yield u's[%d]' % prev
431 if step is None:
432 yield u's[%d]' % i
433 else:
434 yield _genslice(start, i, step)
435
c705320f
PH
436 test_string = u''.join(map(compat_chr, range(slen)))
437 cache_res = func(test_string)
edf3e38e
PH
438 cache_spec = [ord(c) for c in cache_res]
439 expr_code = u' + '.join(gen_sig_code(cache_spec))
440 code = u'if len(s) == %d:\n return %s\n' % (slen, expr_code)
f8061589 441 self.to_screen(u'Extracted signature function:\n' + code)
edf3e38e 442
e0df6211
PH
443 def _parse_sig_js(self, jscode):
444 funcname = self._search_regex(
c26e9ac4 445 r'signature=([$a-zA-Z]+)', jscode,
2b25cb5d
PH
446 u'Initial JS player signature function name')
447
448 jsi = JSInterpreter(jscode)
449 initial_function = jsi.extract_function(funcname)
e0df6211
PH
450 return lambda s: initial_function([s])
451
452 def _parse_sig_swf(self, file_contents):
54256267 453 swfi = SWFInterpreter(file_contents)
5dc3552d 454 TARGET_CLASSNAME = u'SignatureDecipher'
54256267
PH
455 searched_class = swfi.extract_class(TARGET_CLASSNAME)
456 initial_function = swfi.extract_function(searched_class, u'decipher')
e0df6211
PH
457 return lambda s: initial_function([s])
458
83799698 459 def _decrypt_signature(self, s, video_id, player_url, age_gate=False):
257a2501 460 """Turn the encrypted s field into a working signature"""
6b37f0be 461
c8bf86d5
PH
462 if player_url is None:
463 raise ExtractorError(u'Cannot decrypt signature without player_url')
920de7a2 464
c8bf86d5
PH
465 if player_url.startswith(u'//'):
466 player_url = u'https:' + player_url
467 try:
468 player_id = (player_url, len(s))
469 if player_id not in self._player_cache:
470 func = self._extract_signature_function(
471 video_id, player_url, len(s)
472 )
473 self._player_cache[player_id] = func
474 func = self._player_cache[player_id]
475 if self._downloader.params.get('youtube_print_sig_code'):
476 self._print_sig_code(func, len(s))
477 return func(s)
478 except Exception as e:
479 tb = traceback.format_exc()
480 raise ExtractorError(
481 u'Automatic signature extraction failed: ' + tb, cause=e)
e0df6211 482
1f343eaa 483 def _get_available_subtitles(self, video_id, webpage):
de7f3446 484 try:
7fad1c63 485 sub_list = self._download_webpage(
38c2e5b8 486 'https://video.google.com/timedtext?hl=en&type=list&v=%s' % video_id,
7fad1c63
JMF
487 video_id, note=False)
488 except ExtractorError as err:
de7f3446
JMF
489 self._downloader.report_warning(u'unable to download video subtitles: %s' % compat_str(err))
490 return {}
491 lang_list = re.findall(r'name="([^"]*)"[^>]+lang_code="([\w\-]+)"', sub_list)
492
493 sub_lang_list = {}
494 for l in lang_list:
495 lang = l[1]
496 params = compat_urllib_parse.urlencode({
497 'lang': lang,
498 'v': video_id,
ca715127 499 'fmt': self._downloader.params.get('subtitlesformat', 'srt'),
c3197e3e 500 'name': unescapeHTML(l[0]).encode('utf-8'),
de7f3446 501 })
38c2e5b8 502 url = u'https://www.youtube.com/api/timedtext?' + params
de7f3446
JMF
503 sub_lang_list[lang] = url
504 if not sub_lang_list:
505 self._downloader.report_warning(u'video doesn\'t have subtitles')
506 return {}
507 return sub_lang_list
508
055e6f36 509 def _get_available_automatic_caption(self, video_id, webpage):
de7f3446
JMF
510 """We need the webpage for getting the captions url, pass it as an
511 argument to speed up the process."""
ca715127 512 sub_format = self._downloader.params.get('subtitlesformat', 'srt')
de7f3446
JMF
513 self.to_screen(u'%s: Looking for automatic captions' % video_id)
514 mobj = re.search(r';ytplayer.config = ({.*?});', webpage)
055e6f36 515 err_msg = u'Couldn\'t find automatic captions for %s' % video_id
de7f3446
JMF
516 if mobj is None:
517 self._downloader.report_warning(err_msg)
518 return {}
519 player_config = json.loads(mobj.group(1))
520 try:
521 args = player_config[u'args']
522 caption_url = args[u'ttsurl']
523 timestamp = args[u'timestamp']
055e6f36
JMF
524 # We get the available subtitles
525 list_params = compat_urllib_parse.urlencode({
526 'type': 'list',
527 'tlangs': 1,
528 'asrs': 1,
de7f3446 529 })
055e6f36 530 list_url = caption_url + '&' + list_params
e26f8712 531 caption_list = self._download_xml(list_url, video_id)
e3dc22ca 532 original_lang_node = caption_list.find('track')
f6a54188 533 if original_lang_node is None or original_lang_node.attrib.get('kind') != 'asr' :
e3dc22ca
JMF
534 self._downloader.report_warning(u'Video doesn\'t have automatic captions')
535 return {}
536 original_lang = original_lang_node.attrib['lang_code']
055e6f36
JMF
537
538 sub_lang_list = {}
539 for lang_node in caption_list.findall('target'):
540 sub_lang = lang_node.attrib['lang_code']
541 params = compat_urllib_parse.urlencode({
542 'lang': original_lang,
543 'tlang': sub_lang,
544 'fmt': sub_format,
545 'ts': timestamp,
546 'kind': 'asr',
547 })
548 sub_lang_list[sub_lang] = caption_url + '&' + params
549 return sub_lang_list
de7f3446
JMF
550 # An extractor error can be raise by the download process if there are
551 # no automatic captions but there are subtitles
552 except (KeyError, ExtractorError):
553 self._downloader.report_warning(err_msg)
554 return {}
555
97665381
PH
556 @classmethod
557 def extract_id(cls, url):
558 mobj = re.match(cls._VALID_URL, url, re.VERBOSE)
c5e8d7af
PH
559 if mobj is None:
560 raise ExtractorError(u'Invalid URL: %s' % url)
561 video_id = mobj.group(2)
562 return video_id
563
1d043b93
JMF
564 def _extract_from_m3u8(self, manifest_url, video_id):
565 url_map = {}
566 def _get_urls(_manifest):
567 lines = _manifest.split('\n')
568 urls = filter(lambda l: l and not l.startswith('#'),
569 lines)
570 return urls
571 manifest = self._download_webpage(manifest_url, video_id, u'Downloading formats manifest')
572 formats_urls = _get_urls(manifest)
573 for format_url in formats_urls:
890f62e8 574 itag = self._search_regex(r'itag/(\d+?)/', format_url, 'itag')
1d043b93
JMF
575 url_map[itag] = format_url
576 return url_map
577
1fb07d10
JG
578 def _extract_annotations(self, video_id):
579 url = 'https://www.youtube.com/annotations_invideo?features=1&legacy=1&video_id=%s' % video_id
580 return self._download_webpage(url, video_id, note=u'Searching for annotations.', errnote=u'Unable to download video annotations.')
581
c5e8d7af 582 def _real_extract(self, url):
7e8c0af0
PH
583 proto = (
584 u'http' if self._downloader.params.get('prefer_insecure', False)
585 else u'https')
586
c5e8d7af
PH
587 # Extract original video URL from URL with redirection, like age verification, using next_url parameter
588 mobj = re.search(self._NEXT_URL_RE, url)
589 if mobj:
7e8c0af0 590 url = proto + '://www.youtube.com/' + compat_urllib_parse.unquote(mobj.group(1)).lstrip('/')
97665381 591 video_id = self.extract_id(url)
c5e8d7af
PH
592
593 # Get video webpage
7e8c0af0 594 url = proto + '://www.youtube.com/watch?v=%s&gl=US&hl=en&has_verified=1' % video_id
336c3a69 595 video_webpage = self._download_webpage(url, video_id)
c5e8d7af
PH
596
597 # Attempt to extract SWF player URL
e0df6211 598 mobj = re.search(r'swfConfig.*?"(https?:\\/\\/.*?watch.*?-.*?\.swf)"', video_webpage)
c5e8d7af
PH
599 if mobj is not None:
600 player_url = re.sub(r'\\(.)', r'\1', mobj.group(1))
601 else:
602 player_url = None
603
604 # Get video info
605 self.report_video_info_webpage_download(video_id)
c108eb73
JMF
606 if re.search(r'player-age-gate-content">', video_webpage) is not None:
607 self.report_age_confirmation()
608 age_gate = True
609 # We simulate the access to the video from www.youtube.com/v/{video_id}
610 # this can be viewed without login into Youtube
2c57c7fa
JMF
611 data = compat_urllib_parse.urlencode({
612 'video_id': video_id,
613 'eurl': 'https://youtube.googleapis.com/v/' + video_id,
c084c934
JMF
614 'sts': self._search_regex(
615 r'"sts"\s*:\s*(\d+)', video_webpage, 'sts'),
2c57c7fa 616 })
7e8c0af0 617 video_info_url = proto + '://www.youtube.com/get_video_info?' + data
c5e8d7af
PH
618 video_info_webpage = self._download_webpage(video_info_url, video_id,
619 note=False,
620 errnote='unable to download video info webpage')
621 video_info = compat_parse_qs(video_info_webpage)
c108eb73
JMF
622 else:
623 age_gate = False
624 for el_type in ['&el=embedded', '&el=detailpage', '&el=vevo', '']:
7e8c0af0 625 video_info_url = (proto + '://www.youtube.com/get_video_info?&video_id=%s%s&ps=default&eurl=&gl=US&hl=en'
c108eb73
JMF
626 % (video_id, el_type))
627 video_info_webpage = self._download_webpage(video_info_url, video_id,
628 note=False,
629 errnote='unable to download video info webpage')
630 video_info = compat_parse_qs(video_info_webpage)
631 if 'token' in video_info:
632 break
c5e8d7af
PH
633 if 'token' not in video_info:
634 if 'reason' in video_info:
d11271dd
PH
635 raise ExtractorError(
636 u'YouTube said: %s' % video_info['reason'][0],
637 expected=True, video_id=video_id)
c5e8d7af 638 else:
d11271dd
PH
639 raise ExtractorError(
640 u'"token" parameter not in video info for unknown reason',
641 video_id=video_id)
c5e8d7af 642
1d699755
PH
643 if 'view_count' in video_info:
644 view_count = int(video_info['view_count'][0])
645 else:
646 view_count = None
647
c5e8d7af
PH
648 # Check for "rental" videos
649 if 'ypc_video_rental_bar_text' in video_info and 'author' not in video_info:
650 raise ExtractorError(u'"rental" videos not supported')
651
652 # Start extracting information
653 self.report_information_extraction(video_id)
654
655 # uploader
656 if 'author' not in video_info:
657 raise ExtractorError(u'Unable to extract uploader name')
658 video_uploader = compat_urllib_parse.unquote_plus(video_info['author'][0])
659
660 # uploader_id
661 video_uploader_id = None
662 mobj = re.search(r'<link itemprop="url" href="http://www.youtube.com/(?:user|channel)/([^"]+)">', video_webpage)
663 if mobj is not None:
664 video_uploader_id = mobj.group(1)
665 else:
666 self._downloader.report_warning(u'unable to extract uploader nickname')
667
668 # title
a8c6b241 669 if 'title' in video_info:
aa92f063 670 video_title = video_info['title'][0]
a8c6b241
PH
671 else:
672 self._downloader.report_warning(u'Unable to extract video title')
673 video_title = u'_'
c5e8d7af
PH
674
675 # thumbnail image
7763b04e
JMF
676 # We try first to get a high quality image:
677 m_thumb = re.search(r'<span itemprop="thumbnail".*?href="(.*?)">',
678 video_webpage, re.DOTALL)
679 if m_thumb is not None:
680 video_thumbnail = m_thumb.group(1)
681 elif 'thumbnail_url' not in video_info:
c5e8d7af 682 self._downloader.report_warning(u'unable to extract video thumbnail')
f490e77e 683 video_thumbnail = None
c5e8d7af
PH
684 else: # don't panic if we can't find it
685 video_thumbnail = compat_urllib_parse.unquote_plus(video_info['thumbnail_url'][0])
686
687 # upload date
688 upload_date = None
ad3bc6ac 689 mobj = re.search(r'(?s)id="eow-date.*?>(.*?)</span>', video_webpage)
beee53de
PH
690 if mobj is None:
691 mobj = re.search(
263bd4ec 692 r'(?s)id="watch-uploader-info".*?>.*?(?:Published|Uploaded|Streamed live) on (.*?)</strong>',
beee53de 693 video_webpage)
c5e8d7af
PH
694 if mobj is not None:
695 upload_date = ' '.join(re.sub(r'[/,-]', r' ', mobj.group(1)).split())
696 upload_date = unified_strdate(upload_date)
697
ec8deefc
DG
698 m_cat_container = get_element_by_id("eow-category", video_webpage)
699 if m_cat_container:
ad3bc6ac 700 category = self._html_search_regex(
01ed5c9b 701 r'(?s)<a[^<]+>(.*?)</a>', m_cat_container, 'category',
ad3bc6ac
PH
702 default=None)
703 video_categories = None if category is None else [category]
704 else:
705 video_categories = None
ec8deefc 706
c5e8d7af
PH
707 # description
708 video_description = get_element_by_id("eow-description", video_webpage)
709 if video_description:
27dcce19
PH
710 video_description = re.sub(r'''(?x)
711 <a\s+
712 (?:[a-zA-Z-]+="[^"]+"\s+)*?
713 title="([^"]+)"\s+
714 (?:[a-zA-Z-]+="[^"]+"\s+)*?
715 class="yt-uix-redirect-link"\s*>
716 [^<]+
717 </a>
718 ''', r'\1', video_description)
c5e8d7af
PH
719 video_description = clean_html(video_description)
720 else:
721 fd_mobj = re.search(r'<meta name="description" content="([^"]+)"', video_webpage)
722 if fd_mobj:
723 video_description = unescapeHTML(fd_mobj.group(1))
724 else:
725 video_description = u''
726
336c3a69 727 def _extract_count(klass):
46374a56
PH
728 count = self._search_regex(
729 r'class="%s">([\d,]+)</span>' % re.escape(klass),
730 video_webpage, klass, default=None)
336c3a69
JMF
731 if count is not None:
732 return int(count.replace(',', ''))
733 return None
734 like_count = _extract_count(u'likes-count')
735 dislike_count = _extract_count(u'dislikes-count')
736
c5e8d7af 737 # subtitles
d82134c3 738 video_subtitles = self.extract_subtitles(video_id, video_webpage)
c5e8d7af 739
c5e8d7af 740 if self._downloader.params.get('listsubtitles', False):
d665f8d3 741 self._list_available_subtitles(video_id, video_webpage)
c5e8d7af
PH
742 return
743
744 if 'length_seconds' not in video_info:
745 self._downloader.report_warning(u'unable to extract video duration')
b466b702 746 video_duration = None
c5e8d7af 747 else:
b466b702 748 video_duration = int(compat_urllib_parse.unquote_plus(video_info['length_seconds'][0]))
c5e8d7af 749
1fb07d10
JG
750 # annotations
751 video_annotations = None
752 if self._downloader.params.get('writeannotations', False):
753 video_annotations = self._extract_annotations(video_id)
754
c5e8d7af 755 # Decide which formats to download
c5e8d7af 756 try:
ae7ed920 757 mobj = re.search(r';ytplayer\.config\s*=\s*({.*?});', video_webpage)
50be92c1
PH
758 if not mobj:
759 raise ValueError('Could not find vevo ID')
ae7ed920
PH
760 json_code = uppercase_escape(mobj.group(1))
761 ytplayer_config = json.loads(json_code)
3489b7d2 762 args = ytplayer_config['args']
7ce7e394
JMF
763 # Easy way to know if the 's' value is in url_encoded_fmt_stream_map
764 # this signatures are encrypted
44d46655 765 if 'url_encoded_fmt_stream_map' not in args:
f10503db 766 raise ValueError(u'No stream_map present') # caught below
00fe14fc
JMF
767 re_signature = re.compile(r'[&,]s=')
768 m_s = re_signature.search(args['url_encoded_fmt_stream_map'])
7ce7e394
JMF
769 if m_s is not None:
770 self.to_screen(u'%s: Encrypted signatures detected.' % video_id)
c5e8d7af 771 video_info['url_encoded_fmt_stream_map'] = [args['url_encoded_fmt_stream_map']]
00fe14fc 772 m_s = re_signature.search(args.get('adaptive_fmts', u''))
b7a68384 773 if m_s is not None:
00fe14fc
JMF
774 if 'adaptive_fmts' in video_info:
775 video_info['adaptive_fmts'][0] += ',' + args['adaptive_fmts']
37b6d5f6 776 else:
00fe14fc 777 video_info['adaptive_fmts'] = [args['adaptive_fmts']]
c5e8d7af
PH
778 except ValueError:
779 pass
780
dd27fd17
PH
781 def _map_to_format_list(urlmap):
782 formats = []
783 for itag, video_real_url in urlmap.items():
784 dct = {
785 'format_id': itag,
786 'url': video_real_url,
787 'player_url': player_url,
788 }
0b65e5d4
PH
789 if itag in self._formats:
790 dct.update(self._formats[itag])
dd27fd17
PH
791 formats.append(dct)
792 return formats
793
c5e8d7af
PH
794 if 'conn' in video_info and video_info['conn'][0].startswith('rtmp'):
795 self.report_rtmp_download()
dd27fd17
PH
796 formats = [{
797 'format_id': '_rtmp',
798 'protocol': 'rtmp',
799 'url': video_info['conn'][0],
800 'player_url': player_url,
801 }]
00fe14fc
JMF
802 elif len(video_info.get('url_encoded_fmt_stream_map', [])) >= 1 or len(video_info.get('adaptive_fmts', [])) >= 1:
803 encoded_url_map = video_info.get('url_encoded_fmt_stream_map', [''])[0] + ',' + video_info.get('adaptive_fmts',[''])[0]
804 if 'rtmpe%3Dyes' in encoded_url_map:
a7055eb9 805 raise ExtractorError('rtmpe downloads are not supported, see https://github.com/rg3/youtube-dl/issues/343 for more information.', expected=True)
c5e8d7af 806 url_map = {}
00fe14fc 807 for url_data_str in encoded_url_map.split(','):
c5e8d7af
PH
808 url_data = compat_parse_qs(url_data_str)
809 if 'itag' in url_data and 'url' in url_data:
810 url = url_data['url'][0]
811 if 'sig' in url_data:
812 url += '&signature=' + url_data['sig'][0]
813 elif 's' in url_data:
e0df6211 814 encrypted_sig = url_data['s'][0]
cf010131
PH
815
816 if not age_gate:
817 jsplayer_url_json = self._search_regex(
818 r'"assets":.+?"js":\s*("[^"]+")',
819 video_webpage, u'JS player URL')
820 player_url = json.loads(jsplayer_url_json)
821 if player_url is None:
822 player_url_json = self._search_regex(
823 r'ytplayer\.config.*?"url"\s*:\s*("[^"]+")',
824 video_webpage, u'age gate player URL')
825 player_url = json.loads(player_url_json)
826
769fda3c 827 if self._downloader.params.get('verbose'):
cf010131
PH
828 if player_url is None:
829 player_version = 'unknown'
830 player_desc = 'unknown'
831 else:
832 if player_url.endswith('swf'):
bdde940e 833 player_version = self._search_regex(
b8c74d60 834 r'-(.+?)(?:/watch_as3)?\.swf$', player_url,
bdde940e 835 u'flash player', fatal=False)
cf010131
PH
836 player_desc = 'flash player %s' % player_version
837 else:
838 player_version = self._search_regex(
b081cebe
PH
839 r'html5player-([^/]+?)(?:/html5player)?\.js',
840 player_url,
cf010131
PH
841 'html5 player', fatal=False)
842 player_desc = u'html5 player %s' % player_version
e0df6211
PH
843
844 parts_sizes = u'.'.join(compat_str(len(part)) for part in encrypted_sig.split('.'))
5a76c651 845 self.to_screen(u'encrypted signature length %d (%s), itag %s, %s' %
e0df6211
PH
846 (len(encrypted_sig), parts_sizes, url_data['itag'][0], player_desc))
847
83799698
PH
848 signature = self._decrypt_signature(
849 encrypted_sig, video_id, player_url, age_gate)
c5e8d7af
PH
850 url += '&signature=' + signature
851 if 'ratebypass' not in url:
852 url += '&ratebypass=yes'
853 url_map[url_data['itag'][0]] = url
dd27fd17 854 formats = _map_to_format_list(url_map)
1d043b93
JMF
855 elif video_info.get('hlsvp'):
856 manifest_url = video_info['hlsvp'][0]
857 url_map = self._extract_from_m3u8(manifest_url, video_id)
dd27fd17 858 formats = _map_to_format_list(url_map)
c5e8d7af 859 else:
9abb3204 860 raise ExtractorError(u'no conn, hlsvp or url_encoded_fmt_stream_map information found in video info')
c5e8d7af 861
dd27fd17 862 # Look for the DASH manifest
d68f0cdb 863 if (self._downloader.params.get('youtube_include_dash_manifest', False)):
dd27fd17 864 try:
d68f0cdb 865 # The DASH manifest used needs to be the one from the original video_webpage.
866 # The one found in get_video_info seems to be using different signatures.
867 # However, in the case of an age restriction there won't be any embedded dashmpd in the video_webpage.
868 # Luckily, it seems, this case uses some kind of default signature (len == 86), so the
869 # combination of get_video_info and the _static_decrypt_signature() decryption fallback will work here.
870 if age_gate:
3489b7d2 871 dash_manifest_url = video_info.get('dashmpd')[0]
d68f0cdb 872 else:
3489b7d2 873 dash_manifest_url = ytplayer_config['args']['dashmpd']
d68f0cdb 874 def decrypt_sig(mobj):
875 s = mobj.group(1)
876 dec_s = self._decrypt_signature(s, video_id, player_url, age_gate)
877 return '/signature/%s' % dec_s
878 dash_manifest_url = re.sub(r'/s/([\w\.]+)', decrypt_sig, dash_manifest_url)
dd27fd17 879 dash_doc = self._download_xml(
d68f0cdb 880 dash_manifest_url, video_id,
dd27fd17
PH
881 note=u'Downloading DASH manifest',
882 errnote=u'Could not download DASH manifest')
883 for r in dash_doc.findall(u'.//{urn:mpeg:DASH:schema:MPD:2011}Representation'):
884 url_el = r.find('{urn:mpeg:DASH:schema:MPD:2011}BaseURL')
885 if url_el is None:
886 continue
887 format_id = r.attrib['id']
888 video_url = url_el.text
889 filesize = int_or_none(url_el.attrib.get('{http://youtube.com/yt/2012/10/10}contentLength'))
890 f = {
891 'format_id': format_id,
892 'url': video_url,
893 'width': int_or_none(r.attrib.get('width')),
894 'tbr': int_or_none(r.attrib.get('bandwidth'), 1000),
895 'asr': int_or_none(r.attrib.get('audioSamplingRate')),
896 'filesize': filesize,
897 }
898 try:
899 existing_format = next(
900 fo for fo in formats
901 if fo['format_id'] == format_id)
902 except StopIteration:
903 f.update(self._formats.get(format_id, {}))
904 formats.append(f)
905 else:
906 existing_format.update(f)
907
908 except (ExtractorError, KeyError) as e:
909 self.report_warning(u'Skipping DASH manifest: %s' % e, video_id)
d80044c2 910
4bcc7bd1 911 self._sort_formats(formats)
4ea3be0a 912
913 return {
914 'id': video_id,
915 'uploader': video_uploader,
916 'uploader_id': video_uploader_id,
917 'upload_date': upload_date,
918 'title': video_title,
919 'thumbnail': video_thumbnail,
920 'description': video_description,
ec8deefc 921 'categories': video_categories,
4ea3be0a 922 'subtitles': video_subtitles,
923 'duration': video_duration,
924 'age_limit': 18 if age_gate else 0,
925 'annotations': video_annotations,
7e8c0af0 926 'webpage_url': proto + '://www.youtube.com/watch?v=%s' % video_id,
4ea3be0a 927 'view_count': view_count,
928 'like_count': like_count,
929 'dislike_count': dislike_count,
930 'formats': formats,
931 }
c5e8d7af 932
880e1c52 933class YoutubePlaylistIE(YoutubeBaseInfoExtractor):
0f818663 934 IE_DESC = u'YouTube.com playlists'
d67cc9fa 935 _VALID_URL = r"""(?x)(?:
c5e8d7af
PH
936 (?:https?://)?
937 (?:\w+\.)?
938 youtube\.com/
939 (?:
940 (?:course|view_play_list|my_playlists|artist|playlist|watch)
941 \? (?:.*?&)*? (?:p|a|list)=
942 | p/
943 )
d67cc9fa 944 (
7d568f5a 945 (?:PL|LL|EC|UU|FL|RD)?[0-9A-Za-z-_]{10,}
d67cc9fa
JMF
946 # Top tracks, they can also include dots
947 |(?:MC)[\w\.]*
948 )
c5e8d7af
PH
949 .*
950 |
7d568f5a 951 ((?:PL|LL|EC|UU|FL|RD)[0-9A-Za-z-_]{10,})
c5e8d7af 952 )"""
dbb94fb0 953 _TEMPLATE_URL = 'https://www.youtube.com/playlist?list=%s'
dcbb4580 954 _MORE_PAGES_INDICATOR = r'data-link-type="next"'
dbb94fb0 955 _VIDEO_RE = r'href="\s*/watch\?v=(?P<id>[0-9A-Za-z_-]{11})&amp;[^"]*?index=(?P<index>\d+)'
c5e8d7af
PH
956 IE_NAME = u'youtube:playlist'
957
880e1c52
JMF
958 def _real_initialize(self):
959 self._login()
960
652cdaa2
JMF
961 def _ids_to_results(self, ids):
962 return [self.url_result(vid_id, 'Youtube', video_id=vid_id)
963 for vid_id in ids]
964
965 def _extract_mix(self, playlist_id):
966 # The mixes are generated from a a single video
967 # the id of the playlist is just 'RD' + video_id
7d4afc55 968 url = 'https://youtube.com/watch?v=%s&list=%s' % (playlist_id[-11:], playlist_id)
652cdaa2 969 webpage = self._download_webpage(url, playlist_id, u'Downloading Youtube mix')
bc2f773b
JMF
970 search_title = lambda class_name: get_element_by_attribute('class', class_name, webpage)
971 title_span = (search_title('playlist-title') or
972 search_title('title long-title') or search_title('title'))
76d1700b 973 title = clean_html(title_span)
70e32269 974 video_re = r'''(?x)data-video-username=".*?".*?
bc2f773b 975 href="/watch\?v=([0-9A-Za-z_-]{11})&amp;[^"]*?list=%s''' % re.escape(playlist_id)
70e32269 976 ids = orderedSet(re.findall(video_re, webpage, flags=re.DOTALL))
652cdaa2
JMF
977 url_results = self._ids_to_results(ids)
978
979 return self.playlist_result(url_results, playlist_id, title)
980
c5e8d7af
PH
981 def _real_extract(self, url):
982 # Extract playlist id
d67cc9fa 983 mobj = re.match(self._VALID_URL, url)
c5e8d7af
PH
984 if mobj is None:
985 raise ExtractorError(u'Invalid URL: %s' % url)
47192f92
FV
986 playlist_id = mobj.group(1) or mobj.group(2)
987
988 # Check if it's a video-specific URL
7c61bd36 989 query_dict = compat_urlparse.parse_qs(compat_urlparse.urlparse(url).query)
47192f92
FV
990 if 'v' in query_dict:
991 video_id = query_dict['v'][0]
992 if self._downloader.params.get('noplaylist'):
993 self.to_screen(u'Downloading just video %s because of --no-playlist' % video_id)
7012b23c 994 return self.url_result(video_id, 'Youtube', video_id=video_id)
47192f92 995 else:
1db26669 996 self.to_screen(u'Downloading playlist %s - add --no-playlist to just download video %s' % (playlist_id, video_id))
c5e8d7af 997
7d4afc55 998 if playlist_id.startswith('RD'):
652cdaa2
JMF
999 # Mixes require a custom extraction process
1000 return self._extract_mix(playlist_id)
0a688bc0
JMF
1001 if playlist_id.startswith('TL'):
1002 raise ExtractorError(u'For downloading YouTube.com top lists, use '
1003 u'the "yttoplist" keyword, for example "youtube-dl \'yttoplist:music:Top Tracks\'"', expected=True)
652cdaa2 1004
dbb94fb0
S
1005 url = self._TEMPLATE_URL % playlist_id
1006 page = self._download_webpage(url, playlist_id)
1007 more_widget_html = content_html = page
1008
10c0e2d8 1009 # Check if the playlist exists or is private
e399853d 1010 if re.search(r'<div class="yt-alert-message">[^<]*?(The|This) playlist (does not exist|is private)[^<]*?</div>', page) is not None:
10c0e2d8
JMF
1011 raise ExtractorError(
1012 u'The playlist doesn\'t exist or is private, use --username or '
1013 '--netrc to access it.',
1014 expected=True)
1015
dcbb4580
JMF
1016 # Extract the video ids from the playlist pages
1017 ids = []
c5e8d7af 1018
755eb032 1019 for page_num in itertools.count(1):
dbb94fb0 1020 matches = re.finditer(self._VIDEO_RE, content_html)
6e47b51e
JMF
1021 # We remove the duplicates and the link with index 0
1022 # (it's not the first video of the playlist)
1023 new_ids = orderedSet(m.group('id') for m in matches if m.group('index') != '0')
dcbb4580 1024 ids.extend(new_ids)
c5e8d7af 1025
dbb94fb0
S
1026 mobj = re.search(r'data-uix-load-more-href="/?(?P<more>[^"]+)"', more_widget_html)
1027 if not mobj:
c5e8d7af
PH
1028 break
1029
dbb94fb0 1030 more = self._download_json(
5912c639
PH
1031 'https://youtube.com/%s' % mobj.group('more'), playlist_id,
1032 'Downloading page #%s' % page_num,
1033 transform_source=uppercase_escape)
dbb94fb0
S
1034 content_html = more['content_html']
1035 more_widget_html = more['load_more_widget_html']
1036
1037 playlist_title = self._html_search_regex(
68eb8e90
PH
1038 r'(?s)<h1 class="pl-header-title[^"]*">\s*(.*?)\s*</h1>',
1039 page, u'title')
c5e8d7af 1040
652cdaa2 1041 url_results = self._ids_to_results(ids)
dcbb4580 1042 return self.playlist_result(url_results, playlist_id, playlist_title)
c5e8d7af
PH
1043
1044
0a688bc0
JMF
1045class YoutubeTopListIE(YoutubePlaylistIE):
1046 IE_NAME = u'youtube:toplist'
1047 IE_DESC = (u'YouTube.com top lists, "yttoplist:{channel}:{list title}"'
1048 u' (Example: "yttoplist:music:Top Tracks")')
1049 _VALID_URL = r'yttoplist:(?P<chann>.*?):(?P<title>.*?)$'
1050
1051 def _real_extract(self, url):
1052 mobj = re.match(self._VALID_URL, url)
1053 channel = mobj.group('chann')
1054 title = mobj.group('title')
1055 query = compat_urllib_parse.urlencode({'title': title})
beddbc2a 1056 playlist_re = 'href="([^"]+?%s.*?)"' % re.escape(query)
0a688bc0
JMF
1057 channel_page = self._download_webpage('https://www.youtube.com/%s' % channel, title)
1058 link = self._html_search_regex(playlist_re, channel_page, u'list')
1059 url = compat_urlparse.urljoin('https://www.youtube.com/', link)
1060
1061 video_re = r'data-index="\d+".*?data-video-id="([0-9A-Za-z_-]{11})"'
1062 ids = []
1063 # sometimes the webpage doesn't contain the videos
1064 # retry until we get them
1065 for i in itertools.count(0):
1066 msg = u'Downloading Youtube mix'
1067 if i > 0:
1068 msg += ', retry #%d' % i
1069 webpage = self._download_webpage(url, title, msg)
1070 ids = orderedSet(re.findall(video_re, webpage))
1071 if ids:
1072 break
1073 url_results = self._ids_to_results(ids)
1074 return self.playlist_result(url_results, playlist_title=title)
1075
1076
c5e8d7af 1077class YoutubeChannelIE(InfoExtractor):
0f818663 1078 IE_DESC = u'YouTube.com channels'
c5e8d7af 1079 _VALID_URL = r"^(?:https?://)?(?:youtu\.be|(?:\w+\.)?youtube(?:-nocookie)?\.com)/channel/([0-9A-Za-z_-]+)"
c5e8d7af 1080 _MORE_PAGES_INDICATOR = 'yt-uix-load-more'
38c2e5b8 1081 _MORE_PAGES_URL = 'https://www.youtube.com/c4_browse_ajax?action_load_more_videos=1&flow=list&paging=%s&view=0&sort=da&channel_id=%s'
c5e8d7af
PH
1082 IE_NAME = u'youtube:channel'
1083
1084 def extract_videos_from_page(self, page):
1085 ids_in_page = []
1086 for mobj in re.finditer(r'href="/watch\?v=([0-9A-Za-z_-]+)&?', page):
1087 if mobj.group(1) not in ids_in_page:
1088 ids_in_page.append(mobj.group(1))
1089 return ids_in_page
1090
1091 def _real_extract(self, url):
1092 # Extract channel id
1093 mobj = re.match(self._VALID_URL, url)
1094 if mobj is None:
1095 raise ExtractorError(u'Invalid URL: %s' % url)
1096
1097 # Download channel page
1098 channel_id = mobj.group(1)
1099 video_ids = []
b9643eed
JMF
1100 url = 'https://www.youtube.com/channel/%s/videos' % channel_id
1101 channel_page = self._download_webpage(url, channel_id)
31812a9e
PH
1102 autogenerated = re.search(r'''(?x)
1103 class="[^"]*?(?:
1104 channel-header-autogenerated-label|
1105 yt-channel-title-autogenerated
1106 )[^"]*"''', channel_page) is not None
c5e8d7af 1107
b9643eed
JMF
1108 if autogenerated:
1109 # The videos are contained in a single page
1110 # the ajax pages can't be used, they are empty
1111 video_ids = self.extract_videos_from_page(channel_page)
1112 else:
1113 # Download all channel pages using the json-based channel_ajax query
1114 for pagenum in itertools.count(1):
1115 url = self._MORE_PAGES_URL % (pagenum, channel_id)
81c2f20b
PH
1116 page = self._download_json(
1117 url, channel_id, note=u'Downloading page #%s' % pagenum,
1118 transform_source=uppercase_escape)
1119
b9643eed
JMF
1120 ids_in_page = self.extract_videos_from_page(page['content_html'])
1121 video_ids.extend(ids_in_page)
1122
1123 if self._MORE_PAGES_INDICATOR not in page['load_more_widget_html']:
1124 break
c5e8d7af
PH
1125
1126 self._downloader.to_screen(u'[youtube] Channel %s: Found %i videos' % (channel_id, len(video_ids)))
1127
7012b23c
PH
1128 url_entries = [self.url_result(video_id, 'Youtube', video_id=video_id)
1129 for video_id in video_ids]
1130 return self.playlist_result(url_entries, channel_id)
c5e8d7af
PH
1131
1132
1133class YoutubeUserIE(InfoExtractor):
0f818663 1134 IE_DESC = u'YouTube.com user videos (URL or "ytuser" keyword)'
c9ae7b95 1135 _VALID_URL = r'(?:(?:(?:https?://)?(?:\w+\.)?youtube\.com/(?:user/)?(?!(?:attribution_link|watch|results)(?:$|[^a-z_A-Z0-9-])))|ytuser:)(?!feed/)([A-Za-z0-9_-]+)'
38c2e5b8 1136 _TEMPLATE_URL = 'https://gdata.youtube.com/feeds/api/users/%s'
c5e8d7af 1137 _GDATA_PAGE_SIZE = 50
38c2e5b8 1138 _GDATA_URL = 'https://gdata.youtube.com/feeds/api/users/%s/uploads?max-results=%d&start-index=%d&alt=json'
c5e8d7af
PH
1139 IE_NAME = u'youtube:user'
1140
e3ea4790 1141 @classmethod
f4b05232 1142 def suitable(cls, url):
e3ea4790
JMF
1143 # Don't return True if the url can be extracted with other youtube
1144 # extractor, the regex would is too permissive and it would match.
1145 other_ies = iter(klass for (name, klass) in globals().items() if name.endswith('IE') and klass is not cls)
1146 if any(ie.suitable(url) for ie in other_ies): return False
f4b05232
JMF
1147 else: return super(YoutubeUserIE, cls).suitable(url)
1148
c5e8d7af
PH
1149 def _real_extract(self, url):
1150 # Extract username
1151 mobj = re.match(self._VALID_URL, url)
1152 if mobj is None:
1153 raise ExtractorError(u'Invalid URL: %s' % url)
1154
1155 username = mobj.group(1)
1156
1157 # Download video ids using YouTube Data API. Result size per
1158 # query is limited (currently to 50 videos) so we need to query
1159 # page by page until there are no video ids - it means we got
1160 # all of them.
1161
b7ab0590 1162 def download_page(pagenum):
c5e8d7af
PH
1163 start_index = pagenum * self._GDATA_PAGE_SIZE + 1
1164
1165 gdata_url = self._GDATA_URL % (username, self._GDATA_PAGE_SIZE, start_index)
b7ab0590
PH
1166 page = self._download_webpage(
1167 gdata_url, username,
1168 u'Downloading video ids from %d to %d' % (
1169 start_index, start_index + self._GDATA_PAGE_SIZE))
c5e8d7af 1170
fd9cf738
JMF
1171 try:
1172 response = json.loads(page)
1173 except ValueError as err:
1174 raise ExtractorError(u'Invalid JSON in API response: ' + compat_str(err))
71c82637 1175 if 'entry' not in response['feed']:
b7ab0590 1176 return
fd9cf738 1177
c5e8d7af 1178 # Extract video identifiers
e302f9ce
PH
1179 entries = response['feed']['entry']
1180 for entry in entries:
1181 title = entry['title']['$t']
1182 video_id = entry['id']['$t'].split('/')[-1]
b7ab0590 1183 yield {
e302f9ce
PH
1184 '_type': 'url',
1185 'url': video_id,
1186 'ie_key': 'Youtube',
b11cec41 1187 'id': video_id,
e302f9ce 1188 'title': title,
b7ab0590
PH
1189 }
1190 url_results = PagedList(download_page, self._GDATA_PAGE_SIZE)
c5e8d7af 1191
7012b23c
PH
1192 return self.playlist_result(url_results, playlist_title=username)
1193
b05654f0
PH
1194
1195class YoutubeSearchIE(SearchInfoExtractor):
0f818663 1196 IE_DESC = u'YouTube.com searches'
83d548ef 1197 _API_URL = u'https://gdata.youtube.com/feeds/api/videos?q=%s&start-index=%i&max-results=50&v=2&alt=jsonc'
b05654f0
PH
1198 _MAX_RESULTS = 1000
1199 IE_NAME = u'youtube:search'
1200 _SEARCH_KEY = 'ytsearch'
1201
b05654f0
PH
1202 def _get_n_results(self, query, n):
1203 """Get a specified number of results for a query"""
1204
1205 video_ids = []
1206 pagenum = 0
1207 limit = n
83d548ef 1208 PAGE_SIZE = 50
b05654f0 1209
83d548ef
PH
1210 while (PAGE_SIZE * pagenum) < limit:
1211 result_url = self._API_URL % (
1212 compat_urllib_parse.quote_plus(query.encode('utf-8')),
1213 (PAGE_SIZE * pagenum) + 1)
7cc3570e
PH
1214 data_json = self._download_webpage(
1215 result_url, video_id=u'query "%s"' % query,
1216 note=u'Downloading page %s' % (pagenum + 1),
1217 errnote=u'Unable to download API page')
1218 data = json.loads(data_json)
1219 api_response = data['data']
1220
1221 if 'items' not in api_response:
07ad22b8
PH
1222 raise ExtractorError(
1223 u'[youtube] No video results', expected=True)
b05654f0
PH
1224
1225 new_ids = list(video['id'] for video in api_response['items'])
1226 video_ids += new_ids
1227
1228 limit = min(n, api_response['totalItems'])
1229 pagenum += 1
1230
1231 if len(video_ids) > n:
1232 video_ids = video_ids[:n]
7012b23c
PH
1233 videos = [self.url_result(video_id, 'Youtube', video_id=video_id)
1234 for video_id in video_ids]
b05654f0 1235 return self.playlist_result(videos, query)
75dff0ee 1236
c9ae7b95 1237
a3dd9248 1238class YoutubeSearchDateIE(YoutubeSearchIE):
cb7fb546 1239 IE_NAME = YoutubeSearchIE.IE_NAME + ':date'
a3dd9248
CM
1240 _API_URL = 'https://gdata.youtube.com/feeds/api/videos?q=%s&start-index=%i&max-results=50&v=2&alt=jsonc&orderby=published'
1241 _SEARCH_KEY = 'ytsearchdate'
08fb86c4 1242 IE_DESC = u'YouTube.com searches, newest videos first'
75dff0ee 1243
c9ae7b95
PH
1244
1245class YoutubeSearchURLIE(InfoExtractor):
1246 IE_DESC = u'YouTube.com search URLs'
1247 IE_NAME = u'youtube:search_url'
1248 _VALID_URL = r'https?://(?:www\.)?youtube\.com/results\?(.*?&)?search_query=(?P<query>[^&]+)(?:[&]|$)'
1249
1250 def _real_extract(self, url):
1251 mobj = re.match(self._VALID_URL, url)
1252 query = compat_urllib_parse.unquote_plus(mobj.group('query'))
1253
1254 webpage = self._download_webpage(url, query)
1255 result_code = self._search_regex(
6feb2d5e 1256 r'(?s)<ol class="item-section"(.*?)</ol>', webpage, u'result HTML')
c9ae7b95
PH
1257
1258 part_codes = re.findall(
1259 r'(?s)<h3 class="yt-lockup-title">(.*?)</h3>', result_code)
1260 entries = []
1261 for part_code in part_codes:
1262 part_title = self._html_search_regex(
6feb2d5e 1263 [r'(?s)title="([^"]+)"', r'>([^<]+)</a>'], part_code, 'item title', fatal=False)
c9ae7b95
PH
1264 part_url_snippet = self._html_search_regex(
1265 r'(?s)href="([^"]+)"', part_code, 'item URL')
1266 part_url = compat_urlparse.urljoin(
1267 'https://www.youtube.com/', part_url_snippet)
1268 entries.append({
1269 '_type': 'url',
1270 'url': part_url,
1271 'title': part_title,
1272 })
1273
1274 return {
1275 '_type': 'playlist',
1276 'entries': entries,
1277 'title': query,
1278 }
1279
1280
75dff0ee 1281class YoutubeShowIE(InfoExtractor):
0f818663 1282 IE_DESC = u'YouTube.com (multi-season) shows'
75dff0ee
JMF
1283 _VALID_URL = r'https?://www\.youtube\.com/show/(.*)'
1284 IE_NAME = u'youtube:show'
1285
1286 def _real_extract(self, url):
1287 mobj = re.match(self._VALID_URL, url)
1288 show_name = mobj.group(1)
1289 webpage = self._download_webpage(url, show_name, u'Downloading show webpage')
1290 # There's one playlist for each season of the show
1291 m_seasons = list(re.finditer(r'href="(/playlist\?list=.*?)"', webpage))
1292 self.to_screen(u'%s: Found %s seasons' % (show_name, len(m_seasons)))
1293 return [self.url_result('https://www.youtube.com' + season.group(1), 'YoutubePlaylist') for season in m_seasons]
04cc9617
JMF
1294
1295
b2e8bc1b 1296class YoutubeFeedsInfoExtractor(YoutubeBaseInfoExtractor):
d7ae0639
JMF
1297 """
1298 Base class for extractors that fetch info from
1299 http://www.youtube.com/feed_ajax
1300 Subclasses must define the _FEED_NAME and _PLAYLIST_TITLE properties.
1301 """
b2e8bc1b 1302 _LOGIN_REQUIRED = True
43ba5456
JMF
1303 # use action_load_personal_feed instead of action_load_system_feed
1304 _PERSONAL_FEED = False
04cc9617 1305
d7ae0639
JMF
1306 @property
1307 def _FEED_TEMPLATE(self):
43ba5456
JMF
1308 action = 'action_load_system_feed'
1309 if self._PERSONAL_FEED:
1310 action = 'action_load_personal_feed'
38c2e5b8 1311 return 'https://www.youtube.com/feed_ajax?%s=1&feed_name=%s&paging=%%s' % (action, self._FEED_NAME)
d7ae0639
JMF
1312
1313 @property
1314 def IE_NAME(self):
1315 return u'youtube:%s' % self._FEED_NAME
04cc9617 1316
81f0259b 1317 def _real_initialize(self):
b2e8bc1b 1318 self._login()
81f0259b 1319
04cc9617
JMF
1320 def _real_extract(self, url):
1321 feed_entries = []
0e44d838
JMF
1322 paging = 0
1323 for i in itertools.count(1):
f6177462 1324 info = self._download_json(self._FEED_TEMPLATE % paging,
d7ae0639 1325 u'%s feed' % self._FEED_NAME,
04cc9617 1326 u'Downloading page %s' % i)
f6177462 1327 feed_html = info.get('feed_html') or info.get('content_html')
43ba5456 1328 m_ids = re.finditer(r'"/watch\?v=(.*?)["&]', feed_html)
04cc9617 1329 ids = orderedSet(m.group(1) for m in m_ids)
7012b23c
PH
1330 feed_entries.extend(
1331 self.url_result(video_id, 'Youtube', video_id=video_id)
1332 for video_id in ids)
05ee2b6d
JMF
1333 mobj = re.search(
1334 r'data-uix-load-more-href="/?[^"]+paging=(?P<paging>\d+)',
1335 feed_html)
1336 if mobj is None:
04cc9617 1337 break
05ee2b6d 1338 paging = mobj.group('paging')
d7ae0639
JMF
1339 return self.playlist_result(feed_entries, playlist_title=self._PLAYLIST_TITLE)
1340
1341class YoutubeSubscriptionsIE(YoutubeFeedsInfoExtractor):
e45d40b1 1342 IE_DESC = u'YouTube.com subscriptions feed, "ytsubs" keyword (requires authentication)'
d7ae0639
JMF
1343 _VALID_URL = r'https?://www\.youtube\.com/feed/subscriptions|:ytsubs(?:criptions)?'
1344 _FEED_NAME = 'subscriptions'
1345 _PLAYLIST_TITLE = u'Youtube Subscriptions'
1346
1347class YoutubeRecommendedIE(YoutubeFeedsInfoExtractor):
1348 IE_DESC = u'YouTube.com recommended videos, "ytrec" keyword (requires authentication)'
1349 _VALID_URL = r'https?://www\.youtube\.com/feed/recommended|:ytrec(?:ommended)?'
1350 _FEED_NAME = 'recommended'
1351 _PLAYLIST_TITLE = u'Youtube Recommended videos'
c626a3d9 1352
43ba5456
JMF
1353class YoutubeWatchLaterIE(YoutubeFeedsInfoExtractor):
1354 IE_DESC = u'Youtube watch later list, "ytwatchlater" keyword (requires authentication)'
1355 _VALID_URL = r'https?://www\.youtube\.com/feed/watch_later|:ytwatchlater'
1356 _FEED_NAME = 'watch_later'
1357 _PLAYLIST_TITLE = u'Youtube Watch Later'
43ba5456 1358 _PERSONAL_FEED = True
c626a3d9 1359
f459d170
JMF
1360class YoutubeHistoryIE(YoutubeFeedsInfoExtractor):
1361 IE_DESC = u'Youtube watch history, "ythistory" keyword (requires authentication)'
1362 _VALID_URL = u'https?://www\.youtube\.com/feed/history|:ythistory'
1363 _FEED_NAME = 'history'
1364 _PERSONAL_FEED = True
1365 _PLAYLIST_TITLE = u'Youtube Watch History'
1366
c626a3d9
JMF
1367class YoutubeFavouritesIE(YoutubeBaseInfoExtractor):
1368 IE_NAME = u'youtube:favorites'
1369 IE_DESC = u'YouTube.com favourite videos, "ytfav" keyword (requires authentication)'
c7a7750d 1370 _VALID_URL = r'https?://www\.youtube\.com/my_favorites|:ytfav(?:ou?rites)?'
c626a3d9
JMF
1371 _LOGIN_REQUIRED = True
1372
1373 def _real_extract(self, url):
1374 webpage = self._download_webpage('https://www.youtube.com/my_favorites', 'Youtube Favourites videos')
1375 playlist_id = self._search_regex(r'list=(.+?)["&]', webpage, u'favourites playlist id')
1376 return self.url_result(playlist_id, 'YoutubePlaylist')
15870e90
PH
1377
1378
1379class YoutubeTruncatedURLIE(InfoExtractor):
1380 IE_NAME = 'youtube:truncated_url'
1381 IE_DESC = False # Do not list
975d35db 1382 _VALID_URL = r'''(?x)
c4808c60
PH
1383 (?:https?://)?[^/]+/watch\?(?:
1384 feature=[a-z_]+|
1385 annotation_id=annotation_[^&]+
1386 )?$|
975d35db
PH
1387 (?:https?://)?(?:www\.)?youtube\.com/attribution_link\?a=[^&]+$
1388 '''
15870e90 1389
c4808c60
PH
1390 _TESTS = [{
1391 'url': 'http://www.youtube.com/watch?annotation_id=annotation_3951667041',
1392 'only_matching': True,
dc2fc736
PH
1393 }, {
1394 'url': 'http://www.youtube.com/watch?',
1395 'only_matching': True,
c4808c60
PH
1396 }]
1397
15870e90
PH
1398 def _real_extract(self, url):
1399 raise ExtractorError(
1400 u'Did you forget to quote the URL? Remember that & is a meta '
1401 u'character in most shells, so you want to put the URL in quotes, '
1402 u'like youtube-dl '
b4622a32
PH
1403 u'"http://www.youtube.com/watch?feature=foo&v=BaW_jenozKc" '
1404 u' or simply youtube-dl BaW_jenozKc .',
15870e90 1405 expected=True)