]> jfr.im git - yt-dlp.git/blame - youtube_dl/extractor/youtube.py
[jsinterp] Allow digits in function names
[yt-dlp.git] / youtube_dl / extractor / youtube.py
CommitLineData
c5e8d7af 1# coding: utf-8
c5e8d7af 2
edf3e38e 3import errno
e0df6211 4import io
0ca96d48 5import itertools
c5e8d7af 6import json
c4417ddb 7import os.path
c5e8d7af 8import re
e0df6211 9import traceback
c5e8d7af 10
b05654f0 11from .common import InfoExtractor, SearchInfoExtractor
54d39d8b 12from .subtitles import SubtitlesInfoExtractor
2b25cb5d 13from ..jsinterp import JSInterpreter
54256267 14from ..swfinterp import SWFInterpreter
c5e8d7af 15from ..utils import (
edf3e38e 16 compat_chr,
c5e8d7af 17 compat_parse_qs,
c5e8d7af
PH
18 compat_urllib_parse,
19 compat_urllib_request,
7c61bd36 20 compat_urlparse,
c5e8d7af
PH
21 compat_str,
22
23 clean_html,
c38b1e77 24 get_cachedir,
c5e8d7af 25 get_element_by_id,
652cdaa2 26 get_element_by_attribute,
c5e8d7af 27 ExtractorError,
dd27fd17 28 int_or_none,
b7ab0590 29 PagedList,
c5e8d7af
PH
30 unescapeHTML,
31 unified_strdate,
04cc9617 32 orderedSet,
edf3e38e 33 write_json_file,
81c2f20b 34 uppercase_escape,
c5e8d7af
PH
35)
36
de7f3446 37class YoutubeBaseInfoExtractor(InfoExtractor):
b2e8bc1b
JMF
38 """Provide base functions for Youtube extractors"""
39 _LOGIN_URL = 'https://accounts.google.com/ServiceLogin'
40 _LANG_URL = r'https://www.youtube.com/?hl=en&persist_hl=1&gl=US&persist_gl=1&opt_out_ackd=1'
38c2e5b8 41 _AGE_URL = 'https://www.youtube.com/verify_age?next_url=/&gl=US&hl=en'
b2e8bc1b
JMF
42 _NETRC_MACHINE = 'youtube'
43 # If True it will raise an error if no login info is provided
44 _LOGIN_REQUIRED = False
45
b2e8bc1b 46 def _set_language(self):
7cc3570e
PH
47 return bool(self._download_webpage(
48 self._LANG_URL, None,
49 note=u'Setting language', errnote='unable to set language',
50 fatal=False))
b2e8bc1b
JMF
51
52 def _login(self):
53 (username, password) = self._get_login_info()
54 # No authentication to be performed
55 if username is None:
56 if self._LOGIN_REQUIRED:
57 raise ExtractorError(u'No login info available, needed for using %s.' % self.IE_NAME, expected=True)
58 return False
59
7cc3570e
PH
60 login_page = self._download_webpage(
61 self._LOGIN_URL, None,
62 note=u'Downloading login page',
63 errnote=u'unable to fetch login page', fatal=False)
64 if login_page is False:
65 return
b2e8bc1b 66
795f28f8
PH
67 galx = self._search_regex(r'(?s)<input.+?name="GALX".+?value="(.+?)"',
68 login_page, u'Login GALX parameter')
c5e8d7af 69
b2e8bc1b
JMF
70 # Log in
71 login_form_strs = {
72 u'continue': u'https://www.youtube.com/signin?action_handle_signin=true&feature=sign_in_button&hl=en_US&nomobiletemp=1',
73 u'Email': username,
74 u'GALX': galx,
75 u'Passwd': password,
76 u'PersistentCookie': u'yes',
77 u'_utf8': u'霱',
78 u'bgresponse': u'js_disabled',
79 u'checkConnection': u'',
80 u'checkedDomains': u'youtube',
81 u'dnConn': u'',
b2e8bc1b
JMF
82 u'pstMsg': u'0',
83 u'rmShown': u'1',
84 u'secTok': u'',
85 u'signIn': u'Sign in',
86 u'timeStmp': u'',
87 u'service': u'youtube',
88 u'uilel': u'3',
89 u'hl': u'en_US',
90 }
91 # Convert to UTF-8 *before* urlencode because Python 2.x's urlencode
92 # chokes on unicode
93 login_form = dict((k.encode('utf-8'), v.encode('utf-8')) for k,v in login_form_strs.items())
94 login_data = compat_urllib_parse.urlencode(login_form).encode('ascii')
7cc3570e
PH
95
96 req = compat_urllib_request.Request(self._LOGIN_URL, login_data)
97 login_results = self._download_webpage(
98 req, None,
99 note=u'Logging in', errnote=u'unable to log in', fatal=False)
100 if login_results is False:
101 return False
102 if re.search(r'(?i)<form[^>]* id="gaia_loginform"', login_results) is not None:
103 self._downloader.report_warning(u'unable to log in: bad username or password')
b2e8bc1b
JMF
104 return False
105 return True
106
107 def _confirm_age(self):
108 age_form = {
7cc3570e
PH
109 'next_url': '/',
110 'action_confirm': 'Confirm',
111 }
5700e779
JMF
112 req = compat_urllib_request.Request(self._AGE_URL,
113 compat_urllib_parse.urlencode(age_form).encode('ascii'))
7cc3570e
PH
114
115 self._download_webpage(
116 req, None,
117 note=u'Confirming age', errnote=u'Unable to confirm age')
b2e8bc1b
JMF
118 return True
119
120 def _real_initialize(self):
121 if self._downloader is None:
122 return
123 if not self._set_language():
124 return
125 if not self._login():
126 return
127 self._confirm_age()
c5e8d7af 128
8377574c 129
de7f3446 130class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor):
0f818663 131 IE_DESC = u'YouTube.com'
cb7dfeea 132 _VALID_URL = r"""(?x)^
c5e8d7af 133 (
83aa5293 134 (?:https?://|//)? # http(s):// or protocol-independent URL (optional)
cb7dfeea 135 (?:(?:(?:(?:\w+\.)?[yY][oO][uU][tT][uU][bB][eE](?:-nocookie)?\.com/|
484aaeb2 136 (?:www\.)?deturl\.com/www\.youtube\.com/|
e70dc1d1 137 (?:www\.)?pwnyoutube\.com/|
f7000f3a 138 (?:www\.)?yourepeat\.com/|
e69ae5b9
JMF
139 tube\.majestyc\.net/|
140 youtube\.googleapis\.com/) # the various hostnames, with wildcard subdomains
c5e8d7af
PH
141 (?:.*?\#/)? # handle anchor (#/) redirect urls
142 (?: # the various things that can precede the ID:
143 (?:(?:v|embed|e)/) # v/ or embed/ or e/
144 |(?: # or the v= param in all its forms
f7000f3a 145 (?:(?:watch|movie)(?:_popup)?(?:\.php)?/?)? # preceding watch(_popup|.php) or nothing (like /?v=xxxx)
c5e8d7af
PH
146 (?:\?|\#!?) # the params delimiter ? or # or #!
147 (?:.*?&)? # any other preceding param (like /?s=tuff&v=xxxx)
148 v=
149 )
f4b05232
JMF
150 ))
151 |youtu\.be/ # just youtu.be/xxxx
b9c76aa1 152 |https?://(?:www\.)?cleanvideosearch\.com/media/action/yt/watch\?videoId=
f4b05232 153 )
c5e8d7af 154 )? # all until now is optional -> you can pass the naked ID
8963d9c2 155 ([0-9A-Za-z_-]{11}) # here is it! the YouTube video ID
c5e8d7af
PH
156 (?(1).+)? # if we found the ID, everything can follow
157 $"""
c5e8d7af 158 _NEXT_URL_RE = r'[\?&]next_url=([^&]+)'
2c62dc26
PH
159 _formats = {
160 '5': {'ext': 'flv', 'width': 400, 'height': 240},
161 '6': {'ext': 'flv', 'width': 450, 'height': 270},
162 '13': {'ext': '3gp'},
163 '17': {'ext': '3gp', 'width': 176, 'height': 144},
164 '18': {'ext': 'mp4', 'width': 640, 'height': 360},
165 '22': {'ext': 'mp4', 'width': 1280, 'height': 720},
166 '34': {'ext': 'flv', 'width': 640, 'height': 360},
167 '35': {'ext': 'flv', 'width': 854, 'height': 480},
168 '36': {'ext': '3gp', 'width': 320, 'height': 240},
169 '37': {'ext': 'mp4', 'width': 1920, 'height': 1080},
170 '38': {'ext': 'mp4', 'width': 4096, 'height': 3072},
171 '43': {'ext': 'webm', 'width': 640, 'height': 360},
172 '44': {'ext': 'webm', 'width': 854, 'height': 480},
173 '45': {'ext': 'webm', 'width': 1280, 'height': 720},
174 '46': {'ext': 'webm', 'width': 1920, 'height': 1080},
175
1d043b93 176
86fe61c8 177 # 3d videos
43b81eb9
PH
178 '82': {'ext': 'mp4', 'height': 360, 'format_note': '3D', 'preference': -20},
179 '83': {'ext': 'mp4', 'height': 480, 'format_note': '3D', 'preference': -20},
180 '84': {'ext': 'mp4', 'height': 720, 'format_note': '3D', 'preference': -20},
181 '85': {'ext': 'mp4', 'height': 1080, 'format_note': '3D', 'preference': -20},
182 '100': {'ext': 'webm', 'height': 360, 'format_note': '3D', 'preference': -20},
183 '101': {'ext': 'webm', 'height': 480, 'format_note': '3D', 'preference': -20},
184 '102': {'ext': 'webm', 'height': 720, 'format_note': '3D', 'preference': -20},
836a086c 185
96fb5605 186 # Apple HTTP Live Streaming
43b81eb9
PH
187 '92': {'ext': 'mp4', 'height': 240, 'format_note': 'HLS', 'preference': -10},
188 '93': {'ext': 'mp4', 'height': 360, 'format_note': 'HLS', 'preference': -10},
189 '94': {'ext': 'mp4', 'height': 480, 'format_note': 'HLS', 'preference': -10},
190 '95': {'ext': 'mp4', 'height': 720, 'format_note': 'HLS', 'preference': -10},
191 '96': {'ext': 'mp4', 'height': 1080, 'format_note': 'HLS', 'preference': -10},
192 '132': {'ext': 'mp4', 'height': 240, 'format_note': 'HLS', 'preference': -10},
193 '151': {'ext': 'mp4', 'height': 72, 'format_note': 'HLS', 'preference': -10},
2c62dc26
PH
194
195 # DASH mp4 video
43b81eb9
PH
196 '133': {'ext': 'mp4', 'height': 240, 'format_note': 'DASH video', 'acodec': 'none', 'preference': -40},
197 '134': {'ext': 'mp4', 'height': 360, 'format_note': 'DASH video', 'acodec': 'none', 'preference': -40},
198 '135': {'ext': 'mp4', 'height': 480, 'format_note': 'DASH video', 'acodec': 'none', 'preference': -40},
199 '136': {'ext': 'mp4', 'height': 720, 'format_note': 'DASH video', 'acodec': 'none', 'preference': -40},
200 '137': {'ext': 'mp4', 'height': 1080, 'format_note': 'DASH video', 'acodec': 'none', 'preference': -40},
201 '138': {'ext': 'mp4', 'height': 2160, 'format_note': 'DASH video', 'acodec': 'none', 'preference': -40},
202 '160': {'ext': 'mp4', 'height': 144, 'format_note': 'DASH video', 'acodec': 'none', 'preference': -40},
203 '264': {'ext': 'mp4', 'height': 1440, 'format_note': 'DASH video', 'acodec': 'none', 'preference': -40},
836a086c 204
f6f1fc92 205 # Dash mp4 audio
2c62dc26
PH
206 '139': {'ext': 'm4a', 'format_note': 'DASH audio', 'vcodec': 'none', 'abr': 48, 'preference': -50},
207 '140': {'ext': 'm4a', 'format_note': 'DASH audio', 'vcodec': 'none', 'abr': 128, 'preference': -50},
208 '141': {'ext': 'm4a', 'format_note': 'DASH audio', 'vcodec': 'none', 'abr': 256, 'preference': -50},
836a086c
AZ
209
210 # Dash webm
e75cafe9
A
211 '167': {'ext': 'webm', 'height': 360, 'width': 640, 'format_note': 'DASH video', 'acodec': 'none', 'container': 'webm', 'vcodec': 'VP8', 'preference': -40},
212 '168': {'ext': 'webm', 'height': 480, 'width': 854, 'format_note': 'DASH video', 'acodec': 'none', 'container': 'webm', 'vcodec': 'VP8', 'preference': -40},
213 '169': {'ext': 'webm', 'height': 720, 'width': 1280, 'format_note': 'DASH video', 'acodec': 'none', 'container': 'webm', 'vcodec': 'VP8', 'preference': -40},
214 '170': {'ext': 'webm', 'height': 1080, 'width': 1920, 'format_note': 'DASH video', 'acodec': 'none', 'container': 'webm', 'vcodec': 'VP8', 'preference': -40},
215 '218': {'ext': 'webm', 'height': 480, 'width': 854, 'format_note': 'DASH video', 'acodec': 'none', 'container': 'webm', 'vcodec': 'VP8', 'preference': -40},
216 '219': {'ext': 'webm', 'height': 480, 'width': 854, 'format_note': 'DASH video', 'acodec': 'none', 'container': 'webm', 'vcodec': 'VP8', 'preference': -40},
217 '242': {'ext': 'webm', 'height': 240, 'format_note': 'DASH video', 'acodec': 'none', 'preference': -40},
218 '243': {'ext': 'webm', 'height': 360, 'format_note': 'DASH video', 'acodec': 'none', 'preference': -40},
219 '244': {'ext': 'webm', 'height': 480, 'format_note': 'DASH video', 'acodec': 'none', 'preference': -40},
220 '245': {'ext': 'webm', 'height': 480, 'format_note': 'DASH video', 'acodec': 'none', 'preference': -40},
221 '246': {'ext': 'webm', 'height': 480, 'format_note': 'DASH video', 'acodec': 'none', 'preference': -40},
222 '247': {'ext': 'webm', 'height': 720, 'format_note': 'DASH video', 'acodec': 'none', 'preference': -40},
223 '248': {'ext': 'webm', 'height': 1080, 'format_note': 'DASH video', 'acodec': 'none', 'preference': -40},
3c80377b 224 '271': {'ext': 'webm', 'height': 1440, 'format_note': 'DASH video', 'acodec': 'none', 'preference': -40},
18061bba 225 '272': {'ext': 'webm', 'height': 2160, 'format_note': 'DASH video', 'acodec': 'none', 'preference': -40},
2c62dc26
PH
226
227 # Dash webm audio
e75cafe9
A
228 '171': {'ext': 'webm', 'vcodec': 'none', 'format_note': 'DASH audio', 'abr': 48, 'preference': -50},
229 '172': {'ext': 'webm', 'vcodec': 'none', 'format_note': 'DASH audio', 'abr': 256, 'preference': -50},
ce6b9a2d
PH
230
231 # RTMP (unnamed)
232 '_rtmp': {'protocol': 'rtmp'},
c5e8d7af 233 }
836a086c 234
c5e8d7af 235 IE_NAME = u'youtube'
2eb88d95
PH
236 _TESTS = [
237 {
0e853ca4
PH
238 u"url": u"http://www.youtube.com/watch?v=BaW_jenozKc",
239 u"file": u"BaW_jenozKc.mp4",
240 u"info_dict": {
241 u"title": u"youtube-dl test video \"'/\\ä↭𝕐",
242 u"uploader": u"Philipp Hagemeister",
243 u"uploader_id": u"phihag",
244 u"upload_date": u"20121002",
ad3bc6ac
PH
245 u"description": u"test chars: \"'/\\ä↭𝕐\ntest URL: https://github.com/rg3/youtube-dl/issues/1892\n\nThis is a test video for youtube-dl.\n\nFor more information, contact phihag@phihag.de .",
246 u"categories": [u'Science & Technology'],
2eb88d95 247 }
0e853ca4 248 },
0e853ca4
PH
249 {
250 u"url": u"http://www.youtube.com/watch?v=UxxajLWwzqY",
251 u"file": u"UxxajLWwzqY.mp4",
252 u"note": u"Test generic use_cipher_signature video (#897)",
253 u"info_dict": {
254 u"upload_date": u"20120506",
255 u"title": u"Icona Pop - I Love It (feat. Charli XCX) [OFFICIAL VIDEO]",
ba60a3eb 256 u"description": u"md5:fea86fda2d5a5784273df5c7cc994d9f",
45ed795c 257 u"uploader": u"Icona Pop",
0e853ca4 258 u"uploader_id": u"IconaPop"
2eb88d95 259 }
c108eb73
JMF
260 },
261 {
262 u"url": u"https://www.youtube.com/watch?v=07FYdnEawAQ",
263 u"file": u"07FYdnEawAQ.mp4",
264 u"note": u"Test VEVO video with age protection (#956)",
265 u"info_dict": {
266 u"upload_date": u"20130703",
267 u"title": u"Justin Timberlake - Tunnel Vision (Explicit)",
268 u"description": u"md5:64249768eec3bc4276236606ea996373",
269 u"uploader": u"justintimberlakeVEVO",
270 u"uploader_id": u"justintimberlakeVEVO"
271 }
272 },
fccd3771 273 {
83aa5293 274 u"url": u"//www.YouTube.com/watch?v=yZIXLfi8CZQ",
fccd3771
PH
275 u"file": u"yZIXLfi8CZQ.mp4",
276 u"note": u"Embed-only video (#1746)",
277 u"info_dict": {
278 u"upload_date": u"20120608",
279 u"title": u"Principal Sexually Assaults A Teacher - Episode 117 - 8th June 2012",
280 u"description": u"md5:09b78bd971f1e3e289601dfba15ca4f7",
281 u"uploader": u"SET India",
282 u"uploader_id": u"setindia"
283 }
284 },
dd27fd17
PH
285 {
286 u"url": u"http://www.youtube.com/watch?v=a9LDPn-MO4I",
287 u"file": u"a9LDPn-MO4I.m4a",
288 u"note": u"256k DASH audio (format 141) via DASH manifest",
dd27fd17
PH
289 u"info_dict": {
290 u"upload_date": "20121002",
291 u"uploader_id": "8KVIDEO",
292 u"description": "No description available.",
293 u"uploader": "8KVIDEO",
294 u"title": "UHDTV TEST 8K VIDEO.mp4"
4919603f
PH
295 },
296 u"params": {
297 u"youtube_include_dash_manifest": True,
298 u"format": "141",
299 },
dd27fd17 300 },
3489b7d2
JMF
301 # DASH manifest with encrypted signature
302 {
303 u'url': u'https://www.youtube.com/watch?v=IB3lcPjvWLA',
304 u'info_dict': {
305 u'id': u'IB3lcPjvWLA',
306 u'ext': u'm4a',
307 u'title': u'Afrojack - The Spark ft. Spree Wilson',
e00c9cf5 308 u'description': u'md5:9717375db5a9a3992be4668bbf3bc0a8',
3489b7d2
JMF
309 u'uploader': u'AfrojackVEVO',
310 u'uploader_id': u'AfrojackVEVO',
311 u'upload_date': u'20131011',
312 },
313 u"params": {
314 u'youtube_include_dash_manifest': True,
315 u'format': '141',
316 },
317 },
2eb88d95
PH
318 ]
319
c5e8d7af
PH
320
321 @classmethod
322 def suitable(cls, url):
323 """Receives a URL and returns True if suitable for this IE."""
e3ea4790 324 if YoutubePlaylistIE.suitable(url): return False
fccd3771 325 return re.match(cls._VALID_URL, url) is not None
c5e8d7af 326
e0df6211
PH
327 def __init__(self, *args, **kwargs):
328 super(YoutubeIE, self).__init__(*args, **kwargs)
83799698 329 self._player_cache = {}
e0df6211 330
c5e8d7af
PH
331 def report_video_info_webpage_download(self, video_id):
332 """Report attempt to download video info webpage."""
333 self.to_screen(u'%s: Downloading video info webpage' % video_id)
334
c5e8d7af
PH
335 def report_information_extraction(self, video_id):
336 """Report attempt to extract video information."""
337 self.to_screen(u'%s: Extracting video information' % video_id)
338
339 def report_unavailable_format(self, video_id, format):
340 """Report extracted video URL."""
341 self.to_screen(u'%s: Format %s not available' % (video_id, format))
342
343 def report_rtmp_download(self):
344 """Indicate the download will use the RTMP protocol."""
345 self.to_screen(u'RTMP download detected')
346
c4417ddb 347 def _extract_signature_function(self, video_id, player_url, slen):
cf010131
PH
348 id_m = re.match(
349 r'.*-(?P<id>[a-zA-Z0-9_-]+)(?:/watch_as3)?\.(?P<ext>[a-z]+)$',
350 player_url)
e0df6211
PH
351 player_type = id_m.group('ext')
352 player_id = id_m.group('id')
353
c4417ddb
PH
354 # Read from filesystem cache
355 func_id = '%s_%s_%d' % (player_type, player_id, slen)
356 assert os.path.basename(func_id) == func_id
c38b1e77 357 cache_dir = get_cachedir(self._downloader.params)
c4417ddb 358
c3c88a26 359 cache_enabled = cache_dir is not None
f8061589 360 if cache_enabled:
c4417ddb
PH
361 cache_fn = os.path.join(os.path.expanduser(cache_dir),
362 u'youtube-sigfuncs',
363 func_id + '.json')
364 try:
edf3e38e 365 with io.open(cache_fn, 'r', encoding='utf-8') as cachef:
c4417ddb
PH
366 cache_spec = json.load(cachef)
367 return lambda s: u''.join(s[i] for i in cache_spec)
edf3e38e 368 except IOError:
c4417ddb 369 pass # No cache available
83799698 370
e0df6211
PH
371 if player_type == 'js':
372 code = self._download_webpage(
373 player_url, video_id,
83799698 374 note=u'Downloading %s player %s' % (player_type, player_id),
e0df6211 375 errnote=u'Download of %s failed' % player_url)
83799698 376 res = self._parse_sig_js(code)
c4417ddb 377 elif player_type == 'swf':
e0df6211
PH
378 urlh = self._request_webpage(
379 player_url, video_id,
83799698 380 note=u'Downloading %s player %s' % (player_type, player_id),
e0df6211
PH
381 errnote=u'Download of %s failed' % player_url)
382 code = urlh.read()
83799698 383 res = self._parse_sig_swf(code)
e0df6211
PH
384 else:
385 assert False, 'Invalid player type %r' % player_type
386
f8061589 387 if cache_enabled:
edf3e38e 388 try:
c705320f
PH
389 test_string = u''.join(map(compat_chr, range(slen)))
390 cache_res = res(test_string)
edf3e38e
PH
391 cache_spec = [ord(c) for c in cache_res]
392 try:
393 os.makedirs(os.path.dirname(cache_fn))
394 except OSError as ose:
395 if ose.errno != errno.EEXIST:
396 raise
397 write_json_file(cache_spec, cache_fn)
0ca96d48 398 except Exception:
edf3e38e
PH
399 tb = traceback.format_exc()
400 self._downloader.report_warning(
401 u'Writing cache to %r failed: %s' % (cache_fn, tb))
83799698
PH
402
403 return res
404
edf3e38e
PH
405 def _print_sig_code(self, func, slen):
406 def gen_sig_code(idxs):
407 def _genslice(start, end, step):
408 starts = u'' if start == 0 else str(start)
e35e4ddc
PH
409 ends = (u':%d' % (end+step)) if end + step >= 0 else u':'
410 steps = u'' if step == 1 else (u':%d' % step)
edf3e38e
PH
411 return u's[%s%s%s]' % (starts, ends, steps)
412
413 step = None
0ca96d48
PH
414 start = '(Never used)' # Quelch pyflakes warnings - start will be
415 # set as soon as step is set
edf3e38e
PH
416 for i, prev in zip(idxs[1:], idxs[:-1]):
417 if step is not None:
418 if i - prev == step:
419 continue
420 yield _genslice(start, prev, step)
421 step = None
422 continue
423 if i - prev in [-1, 1]:
424 step = i - prev
425 start = prev
426 continue
427 else:
428 yield u's[%d]' % prev
429 if step is None:
430 yield u's[%d]' % i
431 else:
432 yield _genslice(start, i, step)
433
c705320f
PH
434 test_string = u''.join(map(compat_chr, range(slen)))
435 cache_res = func(test_string)
edf3e38e
PH
436 cache_spec = [ord(c) for c in cache_res]
437 expr_code = u' + '.join(gen_sig_code(cache_spec))
438 code = u'if len(s) == %d:\n return %s\n' % (slen, expr_code)
f8061589 439 self.to_screen(u'Extracted signature function:\n' + code)
edf3e38e 440
e0df6211
PH
441 def _parse_sig_js(self, jscode):
442 funcname = self._search_regex(
c26e9ac4 443 r'signature=([$a-zA-Z]+)', jscode,
2b25cb5d
PH
444 u'Initial JS player signature function name')
445
446 jsi = JSInterpreter(jscode)
447 initial_function = jsi.extract_function(funcname)
e0df6211
PH
448 return lambda s: initial_function([s])
449
450 def _parse_sig_swf(self, file_contents):
54256267 451 swfi = SWFInterpreter(file_contents)
5dc3552d 452 TARGET_CLASSNAME = u'SignatureDecipher'
54256267
PH
453 searched_class = swfi.extract_class(TARGET_CLASSNAME)
454 initial_function = swfi.extract_function(searched_class, u'decipher')
e0df6211
PH
455 return lambda s: initial_function([s])
456
83799698 457 def _decrypt_signature(self, s, video_id, player_url, age_gate=False):
257a2501 458 """Turn the encrypted s field into a working signature"""
6b37f0be 459
c8bf86d5
PH
460 if player_url is None:
461 raise ExtractorError(u'Cannot decrypt signature without player_url')
920de7a2 462
c8bf86d5
PH
463 if player_url.startswith(u'//'):
464 player_url = u'https:' + player_url
465 try:
466 player_id = (player_url, len(s))
467 if player_id not in self._player_cache:
468 func = self._extract_signature_function(
469 video_id, player_url, len(s)
470 )
471 self._player_cache[player_id] = func
472 func = self._player_cache[player_id]
473 if self._downloader.params.get('youtube_print_sig_code'):
474 self._print_sig_code(func, len(s))
475 return func(s)
476 except Exception as e:
477 tb = traceback.format_exc()
478 raise ExtractorError(
479 u'Automatic signature extraction failed: ' + tb, cause=e)
e0df6211 480
1f343eaa 481 def _get_available_subtitles(self, video_id, webpage):
de7f3446 482 try:
7fad1c63 483 sub_list = self._download_webpage(
38c2e5b8 484 'https://video.google.com/timedtext?hl=en&type=list&v=%s' % video_id,
7fad1c63
JMF
485 video_id, note=False)
486 except ExtractorError as err:
de7f3446
JMF
487 self._downloader.report_warning(u'unable to download video subtitles: %s' % compat_str(err))
488 return {}
489 lang_list = re.findall(r'name="([^"]*)"[^>]+lang_code="([\w\-]+)"', sub_list)
490
491 sub_lang_list = {}
492 for l in lang_list:
493 lang = l[1]
494 params = compat_urllib_parse.urlencode({
495 'lang': lang,
496 'v': video_id,
ca715127 497 'fmt': self._downloader.params.get('subtitlesformat', 'srt'),
c3197e3e 498 'name': unescapeHTML(l[0]).encode('utf-8'),
de7f3446 499 })
38c2e5b8 500 url = u'https://www.youtube.com/api/timedtext?' + params
de7f3446
JMF
501 sub_lang_list[lang] = url
502 if not sub_lang_list:
503 self._downloader.report_warning(u'video doesn\'t have subtitles')
504 return {}
505 return sub_lang_list
506
055e6f36 507 def _get_available_automatic_caption(self, video_id, webpage):
de7f3446
JMF
508 """We need the webpage for getting the captions url, pass it as an
509 argument to speed up the process."""
ca715127 510 sub_format = self._downloader.params.get('subtitlesformat', 'srt')
de7f3446
JMF
511 self.to_screen(u'%s: Looking for automatic captions' % video_id)
512 mobj = re.search(r';ytplayer.config = ({.*?});', webpage)
055e6f36 513 err_msg = u'Couldn\'t find automatic captions for %s' % video_id
de7f3446
JMF
514 if mobj is None:
515 self._downloader.report_warning(err_msg)
516 return {}
517 player_config = json.loads(mobj.group(1))
518 try:
519 args = player_config[u'args']
520 caption_url = args[u'ttsurl']
521 timestamp = args[u'timestamp']
055e6f36
JMF
522 # We get the available subtitles
523 list_params = compat_urllib_parse.urlencode({
524 'type': 'list',
525 'tlangs': 1,
526 'asrs': 1,
de7f3446 527 })
055e6f36 528 list_url = caption_url + '&' + list_params
e26f8712 529 caption_list = self._download_xml(list_url, video_id)
e3dc22ca 530 original_lang_node = caption_list.find('track')
f6a54188 531 if original_lang_node is None or original_lang_node.attrib.get('kind') != 'asr' :
e3dc22ca
JMF
532 self._downloader.report_warning(u'Video doesn\'t have automatic captions')
533 return {}
534 original_lang = original_lang_node.attrib['lang_code']
055e6f36
JMF
535
536 sub_lang_list = {}
537 for lang_node in caption_list.findall('target'):
538 sub_lang = lang_node.attrib['lang_code']
539 params = compat_urllib_parse.urlencode({
540 'lang': original_lang,
541 'tlang': sub_lang,
542 'fmt': sub_format,
543 'ts': timestamp,
544 'kind': 'asr',
545 })
546 sub_lang_list[sub_lang] = caption_url + '&' + params
547 return sub_lang_list
de7f3446
JMF
548 # An extractor error can be raise by the download process if there are
549 # no automatic captions but there are subtitles
550 except (KeyError, ExtractorError):
551 self._downloader.report_warning(err_msg)
552 return {}
553
97665381
PH
554 @classmethod
555 def extract_id(cls, url):
556 mobj = re.match(cls._VALID_URL, url, re.VERBOSE)
c5e8d7af
PH
557 if mobj is None:
558 raise ExtractorError(u'Invalid URL: %s' % url)
559 video_id = mobj.group(2)
560 return video_id
561
1d043b93
JMF
562 def _extract_from_m3u8(self, manifest_url, video_id):
563 url_map = {}
564 def _get_urls(_manifest):
565 lines = _manifest.split('\n')
566 urls = filter(lambda l: l and not l.startswith('#'),
567 lines)
568 return urls
569 manifest = self._download_webpage(manifest_url, video_id, u'Downloading formats manifest')
570 formats_urls = _get_urls(manifest)
571 for format_url in formats_urls:
890f62e8 572 itag = self._search_regex(r'itag/(\d+?)/', format_url, 'itag')
1d043b93
JMF
573 url_map[itag] = format_url
574 return url_map
575
1fb07d10
JG
576 def _extract_annotations(self, video_id):
577 url = 'https://www.youtube.com/annotations_invideo?features=1&legacy=1&video_id=%s' % video_id
578 return self._download_webpage(url, video_id, note=u'Searching for annotations.', errnote=u'Unable to download video annotations.')
579
c5e8d7af 580 def _real_extract(self, url):
7e8c0af0
PH
581 proto = (
582 u'http' if self._downloader.params.get('prefer_insecure', False)
583 else u'https')
584
c5e8d7af
PH
585 # Extract original video URL from URL with redirection, like age verification, using next_url parameter
586 mobj = re.search(self._NEXT_URL_RE, url)
587 if mobj:
7e8c0af0 588 url = proto + '://www.youtube.com/' + compat_urllib_parse.unquote(mobj.group(1)).lstrip('/')
97665381 589 video_id = self.extract_id(url)
c5e8d7af
PH
590
591 # Get video webpage
7e8c0af0 592 url = proto + '://www.youtube.com/watch?v=%s&gl=US&hl=en&has_verified=1' % video_id
336c3a69 593 video_webpage = self._download_webpage(url, video_id)
c5e8d7af
PH
594
595 # Attempt to extract SWF player URL
e0df6211 596 mobj = re.search(r'swfConfig.*?"(https?:\\/\\/.*?watch.*?-.*?\.swf)"', video_webpage)
c5e8d7af
PH
597 if mobj is not None:
598 player_url = re.sub(r'\\(.)', r'\1', mobj.group(1))
599 else:
600 player_url = None
601
602 # Get video info
603 self.report_video_info_webpage_download(video_id)
c108eb73
JMF
604 if re.search(r'player-age-gate-content">', video_webpage) is not None:
605 self.report_age_confirmation()
606 age_gate = True
607 # We simulate the access to the video from www.youtube.com/v/{video_id}
608 # this can be viewed without login into Youtube
2c57c7fa
JMF
609 data = compat_urllib_parse.urlencode({
610 'video_id': video_id,
611 'eurl': 'https://youtube.googleapis.com/v/' + video_id,
612 'sts':'16268',
613 })
7e8c0af0 614 video_info_url = proto + '://www.youtube.com/get_video_info?' + data
c5e8d7af
PH
615 video_info_webpage = self._download_webpage(video_info_url, video_id,
616 note=False,
617 errnote='unable to download video info webpage')
618 video_info = compat_parse_qs(video_info_webpage)
c108eb73
JMF
619 else:
620 age_gate = False
621 for el_type in ['&el=embedded', '&el=detailpage', '&el=vevo', '']:
7e8c0af0 622 video_info_url = (proto + '://www.youtube.com/get_video_info?&video_id=%s%s&ps=default&eurl=&gl=US&hl=en'
c108eb73
JMF
623 % (video_id, el_type))
624 video_info_webpage = self._download_webpage(video_info_url, video_id,
625 note=False,
626 errnote='unable to download video info webpage')
627 video_info = compat_parse_qs(video_info_webpage)
628 if 'token' in video_info:
629 break
c5e8d7af
PH
630 if 'token' not in video_info:
631 if 'reason' in video_info:
d11271dd
PH
632 raise ExtractorError(
633 u'YouTube said: %s' % video_info['reason'][0],
634 expected=True, video_id=video_id)
c5e8d7af 635 else:
d11271dd
PH
636 raise ExtractorError(
637 u'"token" parameter not in video info for unknown reason',
638 video_id=video_id)
c5e8d7af 639
1d699755
PH
640 if 'view_count' in video_info:
641 view_count = int(video_info['view_count'][0])
642 else:
643 view_count = None
644
c5e8d7af
PH
645 # Check for "rental" videos
646 if 'ypc_video_rental_bar_text' in video_info and 'author' not in video_info:
647 raise ExtractorError(u'"rental" videos not supported')
648
649 # Start extracting information
650 self.report_information_extraction(video_id)
651
652 # uploader
653 if 'author' not in video_info:
654 raise ExtractorError(u'Unable to extract uploader name')
655 video_uploader = compat_urllib_parse.unquote_plus(video_info['author'][0])
656
657 # uploader_id
658 video_uploader_id = None
659 mobj = re.search(r'<link itemprop="url" href="http://www.youtube.com/(?:user|channel)/([^"]+)">', video_webpage)
660 if mobj is not None:
661 video_uploader_id = mobj.group(1)
662 else:
663 self._downloader.report_warning(u'unable to extract uploader nickname')
664
665 # title
a8c6b241 666 if 'title' in video_info:
aa92f063 667 video_title = video_info['title'][0]
a8c6b241
PH
668 else:
669 self._downloader.report_warning(u'Unable to extract video title')
670 video_title = u'_'
c5e8d7af
PH
671
672 # thumbnail image
7763b04e
JMF
673 # We try first to get a high quality image:
674 m_thumb = re.search(r'<span itemprop="thumbnail".*?href="(.*?)">',
675 video_webpage, re.DOTALL)
676 if m_thumb is not None:
677 video_thumbnail = m_thumb.group(1)
678 elif 'thumbnail_url' not in video_info:
c5e8d7af 679 self._downloader.report_warning(u'unable to extract video thumbnail')
f490e77e 680 video_thumbnail = None
c5e8d7af
PH
681 else: # don't panic if we can't find it
682 video_thumbnail = compat_urllib_parse.unquote_plus(video_info['thumbnail_url'][0])
683
684 # upload date
685 upload_date = None
ad3bc6ac 686 mobj = re.search(r'(?s)id="eow-date.*?>(.*?)</span>', video_webpage)
beee53de
PH
687 if mobj is None:
688 mobj = re.search(
263bd4ec 689 r'(?s)id="watch-uploader-info".*?>.*?(?:Published|Uploaded|Streamed live) on (.*?)</strong>',
beee53de 690 video_webpage)
c5e8d7af
PH
691 if mobj is not None:
692 upload_date = ' '.join(re.sub(r'[/,-]', r' ', mobj.group(1)).split())
693 upload_date = unified_strdate(upload_date)
694
ec8deefc
DG
695 m_cat_container = get_element_by_id("eow-category", video_webpage)
696 if m_cat_container:
ad3bc6ac 697 category = self._html_search_regex(
01ed5c9b 698 r'(?s)<a[^<]+>(.*?)</a>', m_cat_container, 'category',
ad3bc6ac
PH
699 default=None)
700 video_categories = None if category is None else [category]
701 else:
702 video_categories = None
ec8deefc 703
c5e8d7af
PH
704 # description
705 video_description = get_element_by_id("eow-description", video_webpage)
706 if video_description:
27dcce19
PH
707 video_description = re.sub(r'''(?x)
708 <a\s+
709 (?:[a-zA-Z-]+="[^"]+"\s+)*?
710 title="([^"]+)"\s+
711 (?:[a-zA-Z-]+="[^"]+"\s+)*?
712 class="yt-uix-redirect-link"\s*>
713 [^<]+
714 </a>
715 ''', r'\1', video_description)
c5e8d7af
PH
716 video_description = clean_html(video_description)
717 else:
718 fd_mobj = re.search(r'<meta name="description" content="([^"]+)"', video_webpage)
719 if fd_mobj:
720 video_description = unescapeHTML(fd_mobj.group(1))
721 else:
722 video_description = u''
723
336c3a69 724 def _extract_count(klass):
46374a56
PH
725 count = self._search_regex(
726 r'class="%s">([\d,]+)</span>' % re.escape(klass),
727 video_webpage, klass, default=None)
336c3a69
JMF
728 if count is not None:
729 return int(count.replace(',', ''))
730 return None
731 like_count = _extract_count(u'likes-count')
732 dislike_count = _extract_count(u'dislikes-count')
733
c5e8d7af 734 # subtitles
d82134c3 735 video_subtitles = self.extract_subtitles(video_id, video_webpage)
c5e8d7af 736
c5e8d7af 737 if self._downloader.params.get('listsubtitles', False):
d665f8d3 738 self._list_available_subtitles(video_id, video_webpage)
c5e8d7af
PH
739 return
740
741 if 'length_seconds' not in video_info:
742 self._downloader.report_warning(u'unable to extract video duration')
b466b702 743 video_duration = None
c5e8d7af 744 else:
b466b702 745 video_duration = int(compat_urllib_parse.unquote_plus(video_info['length_seconds'][0]))
c5e8d7af 746
1fb07d10
JG
747 # annotations
748 video_annotations = None
749 if self._downloader.params.get('writeannotations', False):
750 video_annotations = self._extract_annotations(video_id)
751
c5e8d7af 752 # Decide which formats to download
c5e8d7af 753 try:
ae7ed920 754 mobj = re.search(r';ytplayer\.config\s*=\s*({.*?});', video_webpage)
50be92c1
PH
755 if not mobj:
756 raise ValueError('Could not find vevo ID')
ae7ed920
PH
757 json_code = uppercase_escape(mobj.group(1))
758 ytplayer_config = json.loads(json_code)
3489b7d2 759 args = ytplayer_config['args']
7ce7e394
JMF
760 # Easy way to know if the 's' value is in url_encoded_fmt_stream_map
761 # this signatures are encrypted
44d46655 762 if 'url_encoded_fmt_stream_map' not in args:
f10503db 763 raise ValueError(u'No stream_map present') # caught below
00fe14fc
JMF
764 re_signature = re.compile(r'[&,]s=')
765 m_s = re_signature.search(args['url_encoded_fmt_stream_map'])
7ce7e394
JMF
766 if m_s is not None:
767 self.to_screen(u'%s: Encrypted signatures detected.' % video_id)
c5e8d7af 768 video_info['url_encoded_fmt_stream_map'] = [args['url_encoded_fmt_stream_map']]
00fe14fc 769 m_s = re_signature.search(args.get('adaptive_fmts', u''))
b7a68384 770 if m_s is not None:
00fe14fc
JMF
771 if 'adaptive_fmts' in video_info:
772 video_info['adaptive_fmts'][0] += ',' + args['adaptive_fmts']
37b6d5f6 773 else:
00fe14fc 774 video_info['adaptive_fmts'] = [args['adaptive_fmts']]
c5e8d7af
PH
775 except ValueError:
776 pass
777
dd27fd17
PH
778 def _map_to_format_list(urlmap):
779 formats = []
780 for itag, video_real_url in urlmap.items():
781 dct = {
782 'format_id': itag,
783 'url': video_real_url,
784 'player_url': player_url,
785 }
0b65e5d4
PH
786 if itag in self._formats:
787 dct.update(self._formats[itag])
dd27fd17
PH
788 formats.append(dct)
789 return formats
790
c5e8d7af
PH
791 if 'conn' in video_info and video_info['conn'][0].startswith('rtmp'):
792 self.report_rtmp_download()
dd27fd17
PH
793 formats = [{
794 'format_id': '_rtmp',
795 'protocol': 'rtmp',
796 'url': video_info['conn'][0],
797 'player_url': player_url,
798 }]
00fe14fc
JMF
799 elif len(video_info.get('url_encoded_fmt_stream_map', [])) >= 1 or len(video_info.get('adaptive_fmts', [])) >= 1:
800 encoded_url_map = video_info.get('url_encoded_fmt_stream_map', [''])[0] + ',' + video_info.get('adaptive_fmts',[''])[0]
801 if 'rtmpe%3Dyes' in encoded_url_map:
a7055eb9 802 raise ExtractorError('rtmpe downloads are not supported, see https://github.com/rg3/youtube-dl/issues/343 for more information.', expected=True)
c5e8d7af 803 url_map = {}
00fe14fc 804 for url_data_str in encoded_url_map.split(','):
c5e8d7af
PH
805 url_data = compat_parse_qs(url_data_str)
806 if 'itag' in url_data and 'url' in url_data:
807 url = url_data['url'][0]
808 if 'sig' in url_data:
809 url += '&signature=' + url_data['sig'][0]
810 elif 's' in url_data:
e0df6211 811 encrypted_sig = url_data['s'][0]
cf010131
PH
812
813 if not age_gate:
814 jsplayer_url_json = self._search_regex(
815 r'"assets":.+?"js":\s*("[^"]+")',
816 video_webpage, u'JS player URL')
817 player_url = json.loads(jsplayer_url_json)
818 if player_url is None:
819 player_url_json = self._search_regex(
820 r'ytplayer\.config.*?"url"\s*:\s*("[^"]+")',
821 video_webpage, u'age gate player URL')
822 player_url = json.loads(player_url_json)
823
769fda3c 824 if self._downloader.params.get('verbose'):
cf010131
PH
825 if player_url is None:
826 player_version = 'unknown'
827 player_desc = 'unknown'
828 else:
829 if player_url.endswith('swf'):
bdde940e 830 player_version = self._search_regex(
b8c74d60 831 r'-(.+?)(?:/watch_as3)?\.swf$', player_url,
bdde940e 832 u'flash player', fatal=False)
cf010131
PH
833 player_desc = 'flash player %s' % player_version
834 else:
835 player_version = self._search_regex(
836 r'html5player-(.+?)\.js', video_webpage,
837 'html5 player', fatal=False)
838 player_desc = u'html5 player %s' % player_version
e0df6211
PH
839
840 parts_sizes = u'.'.join(compat_str(len(part)) for part in encrypted_sig.split('.'))
5a76c651 841 self.to_screen(u'encrypted signature length %d (%s), itag %s, %s' %
e0df6211
PH
842 (len(encrypted_sig), parts_sizes, url_data['itag'][0], player_desc))
843
83799698
PH
844 signature = self._decrypt_signature(
845 encrypted_sig, video_id, player_url, age_gate)
c5e8d7af
PH
846 url += '&signature=' + signature
847 if 'ratebypass' not in url:
848 url += '&ratebypass=yes'
849 url_map[url_data['itag'][0]] = url
dd27fd17 850 formats = _map_to_format_list(url_map)
1d043b93
JMF
851 elif video_info.get('hlsvp'):
852 manifest_url = video_info['hlsvp'][0]
853 url_map = self._extract_from_m3u8(manifest_url, video_id)
dd27fd17 854 formats = _map_to_format_list(url_map)
c5e8d7af 855 else:
9abb3204 856 raise ExtractorError(u'no conn, hlsvp or url_encoded_fmt_stream_map information found in video info')
c5e8d7af 857
dd27fd17 858 # Look for the DASH manifest
d68f0cdb 859 if (self._downloader.params.get('youtube_include_dash_manifest', False)):
dd27fd17 860 try:
d68f0cdb 861 # The DASH manifest used needs to be the one from the original video_webpage.
862 # The one found in get_video_info seems to be using different signatures.
863 # However, in the case of an age restriction there won't be any embedded dashmpd in the video_webpage.
864 # Luckily, it seems, this case uses some kind of default signature (len == 86), so the
865 # combination of get_video_info and the _static_decrypt_signature() decryption fallback will work here.
866 if age_gate:
3489b7d2 867 dash_manifest_url = video_info.get('dashmpd')[0]
d68f0cdb 868 else:
3489b7d2 869 dash_manifest_url = ytplayer_config['args']['dashmpd']
d68f0cdb 870 def decrypt_sig(mobj):
871 s = mobj.group(1)
872 dec_s = self._decrypt_signature(s, video_id, player_url, age_gate)
873 return '/signature/%s' % dec_s
874 dash_manifest_url = re.sub(r'/s/([\w\.]+)', decrypt_sig, dash_manifest_url)
dd27fd17 875 dash_doc = self._download_xml(
d68f0cdb 876 dash_manifest_url, video_id,
dd27fd17
PH
877 note=u'Downloading DASH manifest',
878 errnote=u'Could not download DASH manifest')
879 for r in dash_doc.findall(u'.//{urn:mpeg:DASH:schema:MPD:2011}Representation'):
880 url_el = r.find('{urn:mpeg:DASH:schema:MPD:2011}BaseURL')
881 if url_el is None:
882 continue
883 format_id = r.attrib['id']
884 video_url = url_el.text
885 filesize = int_or_none(url_el.attrib.get('{http://youtube.com/yt/2012/10/10}contentLength'))
886 f = {
887 'format_id': format_id,
888 'url': video_url,
889 'width': int_or_none(r.attrib.get('width')),
890 'tbr': int_or_none(r.attrib.get('bandwidth'), 1000),
891 'asr': int_or_none(r.attrib.get('audioSamplingRate')),
892 'filesize': filesize,
893 }
894 try:
895 existing_format = next(
896 fo for fo in formats
897 if fo['format_id'] == format_id)
898 except StopIteration:
899 f.update(self._formats.get(format_id, {}))
900 formats.append(f)
901 else:
902 existing_format.update(f)
903
904 except (ExtractorError, KeyError) as e:
905 self.report_warning(u'Skipping DASH manifest: %s' % e, video_id)
d80044c2 906
4bcc7bd1 907 self._sort_formats(formats)
4ea3be0a 908
909 return {
910 'id': video_id,
911 'uploader': video_uploader,
912 'uploader_id': video_uploader_id,
913 'upload_date': upload_date,
914 'title': video_title,
915 'thumbnail': video_thumbnail,
916 'description': video_description,
ec8deefc 917 'categories': video_categories,
4ea3be0a 918 'subtitles': video_subtitles,
919 'duration': video_duration,
920 'age_limit': 18 if age_gate else 0,
921 'annotations': video_annotations,
7e8c0af0 922 'webpage_url': proto + '://www.youtube.com/watch?v=%s' % video_id,
4ea3be0a 923 'view_count': view_count,
924 'like_count': like_count,
925 'dislike_count': dislike_count,
926 'formats': formats,
927 }
c5e8d7af 928
880e1c52 929class YoutubePlaylistIE(YoutubeBaseInfoExtractor):
0f818663 930 IE_DESC = u'YouTube.com playlists'
d67cc9fa 931 _VALID_URL = r"""(?x)(?:
c5e8d7af
PH
932 (?:https?://)?
933 (?:\w+\.)?
934 youtube\.com/
935 (?:
936 (?:course|view_play_list|my_playlists|artist|playlist|watch)
937 \? (?:.*?&)*? (?:p|a|list)=
938 | p/
939 )
d67cc9fa 940 (
7d568f5a 941 (?:PL|LL|EC|UU|FL|RD)?[0-9A-Za-z-_]{10,}
d67cc9fa
JMF
942 # Top tracks, they can also include dots
943 |(?:MC)[\w\.]*
944 )
c5e8d7af
PH
945 .*
946 |
7d568f5a 947 ((?:PL|LL|EC|UU|FL|RD)[0-9A-Za-z-_]{10,})
c5e8d7af 948 )"""
dbb94fb0 949 _TEMPLATE_URL = 'https://www.youtube.com/playlist?list=%s'
dcbb4580 950 _MORE_PAGES_INDICATOR = r'data-link-type="next"'
dbb94fb0 951 _VIDEO_RE = r'href="\s*/watch\?v=(?P<id>[0-9A-Za-z_-]{11})&amp;[^"]*?index=(?P<index>\d+)'
c5e8d7af
PH
952 IE_NAME = u'youtube:playlist'
953
880e1c52
JMF
954 def _real_initialize(self):
955 self._login()
956
652cdaa2
JMF
957 def _ids_to_results(self, ids):
958 return [self.url_result(vid_id, 'Youtube', video_id=vid_id)
959 for vid_id in ids]
960
961 def _extract_mix(self, playlist_id):
962 # The mixes are generated from a a single video
963 # the id of the playlist is just 'RD' + video_id
7d4afc55 964 url = 'https://youtube.com/watch?v=%s&list=%s' % (playlist_id[-11:], playlist_id)
652cdaa2 965 webpage = self._download_webpage(url, playlist_id, u'Downloading Youtube mix')
bc2f773b
JMF
966 search_title = lambda class_name: get_element_by_attribute('class', class_name, webpage)
967 title_span = (search_title('playlist-title') or
968 search_title('title long-title') or search_title('title'))
76d1700b 969 title = clean_html(title_span)
70e32269 970 video_re = r'''(?x)data-video-username=".*?".*?
bc2f773b 971 href="/watch\?v=([0-9A-Za-z_-]{11})&amp;[^"]*?list=%s''' % re.escape(playlist_id)
70e32269 972 ids = orderedSet(re.findall(video_re, webpage, flags=re.DOTALL))
652cdaa2
JMF
973 url_results = self._ids_to_results(ids)
974
975 return self.playlist_result(url_results, playlist_id, title)
976
c5e8d7af
PH
977 def _real_extract(self, url):
978 # Extract playlist id
d67cc9fa 979 mobj = re.match(self._VALID_URL, url)
c5e8d7af
PH
980 if mobj is None:
981 raise ExtractorError(u'Invalid URL: %s' % url)
47192f92
FV
982 playlist_id = mobj.group(1) or mobj.group(2)
983
984 # Check if it's a video-specific URL
7c61bd36 985 query_dict = compat_urlparse.parse_qs(compat_urlparse.urlparse(url).query)
47192f92
FV
986 if 'v' in query_dict:
987 video_id = query_dict['v'][0]
988 if self._downloader.params.get('noplaylist'):
989 self.to_screen(u'Downloading just video %s because of --no-playlist' % video_id)
7012b23c 990 return self.url_result(video_id, 'Youtube', video_id=video_id)
47192f92 991 else:
1db26669 992 self.to_screen(u'Downloading playlist %s - add --no-playlist to just download video %s' % (playlist_id, video_id))
c5e8d7af 993
7d4afc55 994 if playlist_id.startswith('RD'):
652cdaa2
JMF
995 # Mixes require a custom extraction process
996 return self._extract_mix(playlist_id)
0a688bc0
JMF
997 if playlist_id.startswith('TL'):
998 raise ExtractorError(u'For downloading YouTube.com top lists, use '
999 u'the "yttoplist" keyword, for example "youtube-dl \'yttoplist:music:Top Tracks\'"', expected=True)
652cdaa2 1000
dbb94fb0
S
1001 url = self._TEMPLATE_URL % playlist_id
1002 page = self._download_webpage(url, playlist_id)
1003 more_widget_html = content_html = page
1004
10c0e2d8 1005 # Check if the playlist exists or is private
e399853d 1006 if re.search(r'<div class="yt-alert-message">[^<]*?(The|This) playlist (does not exist|is private)[^<]*?</div>', page) is not None:
10c0e2d8
JMF
1007 raise ExtractorError(
1008 u'The playlist doesn\'t exist or is private, use --username or '
1009 '--netrc to access it.',
1010 expected=True)
1011
dcbb4580
JMF
1012 # Extract the video ids from the playlist pages
1013 ids = []
c5e8d7af 1014
755eb032 1015 for page_num in itertools.count(1):
dbb94fb0 1016 matches = re.finditer(self._VIDEO_RE, content_html)
6e47b51e
JMF
1017 # We remove the duplicates and the link with index 0
1018 # (it's not the first video of the playlist)
1019 new_ids = orderedSet(m.group('id') for m in matches if m.group('index') != '0')
dcbb4580 1020 ids.extend(new_ids)
c5e8d7af 1021
dbb94fb0
S
1022 mobj = re.search(r'data-uix-load-more-href="/?(?P<more>[^"]+)"', more_widget_html)
1023 if not mobj:
c5e8d7af
PH
1024 break
1025
dbb94fb0 1026 more = self._download_json(
5912c639
PH
1027 'https://youtube.com/%s' % mobj.group('more'), playlist_id,
1028 'Downloading page #%s' % page_num,
1029 transform_source=uppercase_escape)
dbb94fb0
S
1030 content_html = more['content_html']
1031 more_widget_html = more['load_more_widget_html']
1032
1033 playlist_title = self._html_search_regex(
68eb8e90
PH
1034 r'(?s)<h1 class="pl-header-title[^"]*">\s*(.*?)\s*</h1>',
1035 page, u'title')
c5e8d7af 1036
652cdaa2 1037 url_results = self._ids_to_results(ids)
dcbb4580 1038 return self.playlist_result(url_results, playlist_id, playlist_title)
c5e8d7af
PH
1039
1040
0a688bc0
JMF
1041class YoutubeTopListIE(YoutubePlaylistIE):
1042 IE_NAME = u'youtube:toplist'
1043 IE_DESC = (u'YouTube.com top lists, "yttoplist:{channel}:{list title}"'
1044 u' (Example: "yttoplist:music:Top Tracks")')
1045 _VALID_URL = r'yttoplist:(?P<chann>.*?):(?P<title>.*?)$'
1046
1047 def _real_extract(self, url):
1048 mobj = re.match(self._VALID_URL, url)
1049 channel = mobj.group('chann')
1050 title = mobj.group('title')
1051 query = compat_urllib_parse.urlencode({'title': title})
beddbc2a 1052 playlist_re = 'href="([^"]+?%s.*?)"' % re.escape(query)
0a688bc0
JMF
1053 channel_page = self._download_webpage('https://www.youtube.com/%s' % channel, title)
1054 link = self._html_search_regex(playlist_re, channel_page, u'list')
1055 url = compat_urlparse.urljoin('https://www.youtube.com/', link)
1056
1057 video_re = r'data-index="\d+".*?data-video-id="([0-9A-Za-z_-]{11})"'
1058 ids = []
1059 # sometimes the webpage doesn't contain the videos
1060 # retry until we get them
1061 for i in itertools.count(0):
1062 msg = u'Downloading Youtube mix'
1063 if i > 0:
1064 msg += ', retry #%d' % i
1065 webpage = self._download_webpage(url, title, msg)
1066 ids = orderedSet(re.findall(video_re, webpage))
1067 if ids:
1068 break
1069 url_results = self._ids_to_results(ids)
1070 return self.playlist_result(url_results, playlist_title=title)
1071
1072
c5e8d7af 1073class YoutubeChannelIE(InfoExtractor):
0f818663 1074 IE_DESC = u'YouTube.com channels'
c5e8d7af 1075 _VALID_URL = r"^(?:https?://)?(?:youtu\.be|(?:\w+\.)?youtube(?:-nocookie)?\.com)/channel/([0-9A-Za-z_-]+)"
c5e8d7af 1076 _MORE_PAGES_INDICATOR = 'yt-uix-load-more'
38c2e5b8 1077 _MORE_PAGES_URL = 'https://www.youtube.com/c4_browse_ajax?action_load_more_videos=1&flow=list&paging=%s&view=0&sort=da&channel_id=%s'
c5e8d7af
PH
1078 IE_NAME = u'youtube:channel'
1079
1080 def extract_videos_from_page(self, page):
1081 ids_in_page = []
1082 for mobj in re.finditer(r'href="/watch\?v=([0-9A-Za-z_-]+)&?', page):
1083 if mobj.group(1) not in ids_in_page:
1084 ids_in_page.append(mobj.group(1))
1085 return ids_in_page
1086
1087 def _real_extract(self, url):
1088 # Extract channel id
1089 mobj = re.match(self._VALID_URL, url)
1090 if mobj is None:
1091 raise ExtractorError(u'Invalid URL: %s' % url)
1092
1093 # Download channel page
1094 channel_id = mobj.group(1)
1095 video_ids = []
b9643eed
JMF
1096 url = 'https://www.youtube.com/channel/%s/videos' % channel_id
1097 channel_page = self._download_webpage(url, channel_id)
31812a9e
PH
1098 autogenerated = re.search(r'''(?x)
1099 class="[^"]*?(?:
1100 channel-header-autogenerated-label|
1101 yt-channel-title-autogenerated
1102 )[^"]*"''', channel_page) is not None
c5e8d7af 1103
b9643eed
JMF
1104 if autogenerated:
1105 # The videos are contained in a single page
1106 # the ajax pages can't be used, they are empty
1107 video_ids = self.extract_videos_from_page(channel_page)
1108 else:
1109 # Download all channel pages using the json-based channel_ajax query
1110 for pagenum in itertools.count(1):
1111 url = self._MORE_PAGES_URL % (pagenum, channel_id)
81c2f20b
PH
1112 page = self._download_json(
1113 url, channel_id, note=u'Downloading page #%s' % pagenum,
1114 transform_source=uppercase_escape)
1115
b9643eed
JMF
1116 ids_in_page = self.extract_videos_from_page(page['content_html'])
1117 video_ids.extend(ids_in_page)
1118
1119 if self._MORE_PAGES_INDICATOR not in page['load_more_widget_html']:
1120 break
c5e8d7af
PH
1121
1122 self._downloader.to_screen(u'[youtube] Channel %s: Found %i videos' % (channel_id, len(video_ids)))
1123
7012b23c
PH
1124 url_entries = [self.url_result(video_id, 'Youtube', video_id=video_id)
1125 for video_id in video_ids]
1126 return self.playlist_result(url_entries, channel_id)
c5e8d7af
PH
1127
1128
1129class YoutubeUserIE(InfoExtractor):
0f818663 1130 IE_DESC = u'YouTube.com user videos (URL or "ytuser" keyword)'
c9ae7b95 1131 _VALID_URL = r'(?:(?:(?:https?://)?(?:\w+\.)?youtube\.com/(?:user/)?(?!(?:attribution_link|watch|results)(?:$|[^a-z_A-Z0-9-])))|ytuser:)(?!feed/)([A-Za-z0-9_-]+)'
38c2e5b8 1132 _TEMPLATE_URL = 'https://gdata.youtube.com/feeds/api/users/%s'
c5e8d7af 1133 _GDATA_PAGE_SIZE = 50
38c2e5b8 1134 _GDATA_URL = 'https://gdata.youtube.com/feeds/api/users/%s/uploads?max-results=%d&start-index=%d&alt=json'
c5e8d7af
PH
1135 IE_NAME = u'youtube:user'
1136
e3ea4790 1137 @classmethod
f4b05232 1138 def suitable(cls, url):
e3ea4790
JMF
1139 # Don't return True if the url can be extracted with other youtube
1140 # extractor, the regex would is too permissive and it would match.
1141 other_ies = iter(klass for (name, klass) in globals().items() if name.endswith('IE') and klass is not cls)
1142 if any(ie.suitable(url) for ie in other_ies): return False
f4b05232
JMF
1143 else: return super(YoutubeUserIE, cls).suitable(url)
1144
c5e8d7af
PH
1145 def _real_extract(self, url):
1146 # Extract username
1147 mobj = re.match(self._VALID_URL, url)
1148 if mobj is None:
1149 raise ExtractorError(u'Invalid URL: %s' % url)
1150
1151 username = mobj.group(1)
1152
1153 # Download video ids using YouTube Data API. Result size per
1154 # query is limited (currently to 50 videos) so we need to query
1155 # page by page until there are no video ids - it means we got
1156 # all of them.
1157
b7ab0590 1158 def download_page(pagenum):
c5e8d7af
PH
1159 start_index = pagenum * self._GDATA_PAGE_SIZE + 1
1160
1161 gdata_url = self._GDATA_URL % (username, self._GDATA_PAGE_SIZE, start_index)
b7ab0590
PH
1162 page = self._download_webpage(
1163 gdata_url, username,
1164 u'Downloading video ids from %d to %d' % (
1165 start_index, start_index + self._GDATA_PAGE_SIZE))
c5e8d7af 1166
fd9cf738
JMF
1167 try:
1168 response = json.loads(page)
1169 except ValueError as err:
1170 raise ExtractorError(u'Invalid JSON in API response: ' + compat_str(err))
71c82637 1171 if 'entry' not in response['feed']:
b7ab0590 1172 return
fd9cf738 1173
c5e8d7af 1174 # Extract video identifiers
e302f9ce
PH
1175 entries = response['feed']['entry']
1176 for entry in entries:
1177 title = entry['title']['$t']
1178 video_id = entry['id']['$t'].split('/')[-1]
b7ab0590 1179 yield {
e302f9ce
PH
1180 '_type': 'url',
1181 'url': video_id,
1182 'ie_key': 'Youtube',
b11cec41 1183 'id': video_id,
e302f9ce 1184 'title': title,
b7ab0590
PH
1185 }
1186 url_results = PagedList(download_page, self._GDATA_PAGE_SIZE)
c5e8d7af 1187
7012b23c
PH
1188 return self.playlist_result(url_results, playlist_title=username)
1189
b05654f0
PH
1190
1191class YoutubeSearchIE(SearchInfoExtractor):
0f818663 1192 IE_DESC = u'YouTube.com searches'
83d548ef 1193 _API_URL = u'https://gdata.youtube.com/feeds/api/videos?q=%s&start-index=%i&max-results=50&v=2&alt=jsonc'
b05654f0
PH
1194 _MAX_RESULTS = 1000
1195 IE_NAME = u'youtube:search'
1196 _SEARCH_KEY = 'ytsearch'
1197
b05654f0
PH
1198 def _get_n_results(self, query, n):
1199 """Get a specified number of results for a query"""
1200
1201 video_ids = []
1202 pagenum = 0
1203 limit = n
83d548ef 1204 PAGE_SIZE = 50
b05654f0 1205
83d548ef
PH
1206 while (PAGE_SIZE * pagenum) < limit:
1207 result_url = self._API_URL % (
1208 compat_urllib_parse.quote_plus(query.encode('utf-8')),
1209 (PAGE_SIZE * pagenum) + 1)
7cc3570e
PH
1210 data_json = self._download_webpage(
1211 result_url, video_id=u'query "%s"' % query,
1212 note=u'Downloading page %s' % (pagenum + 1),
1213 errnote=u'Unable to download API page')
1214 data = json.loads(data_json)
1215 api_response = data['data']
1216
1217 if 'items' not in api_response:
07ad22b8
PH
1218 raise ExtractorError(
1219 u'[youtube] No video results', expected=True)
b05654f0
PH
1220
1221 new_ids = list(video['id'] for video in api_response['items'])
1222 video_ids += new_ids
1223
1224 limit = min(n, api_response['totalItems'])
1225 pagenum += 1
1226
1227 if len(video_ids) > n:
1228 video_ids = video_ids[:n]
7012b23c
PH
1229 videos = [self.url_result(video_id, 'Youtube', video_id=video_id)
1230 for video_id in video_ids]
b05654f0 1231 return self.playlist_result(videos, query)
75dff0ee 1232
c9ae7b95 1233
a3dd9248 1234class YoutubeSearchDateIE(YoutubeSearchIE):
cb7fb546 1235 IE_NAME = YoutubeSearchIE.IE_NAME + ':date'
a3dd9248
CM
1236 _API_URL = 'https://gdata.youtube.com/feeds/api/videos?q=%s&start-index=%i&max-results=50&v=2&alt=jsonc&orderby=published'
1237 _SEARCH_KEY = 'ytsearchdate'
08fb86c4 1238 IE_DESC = u'YouTube.com searches, newest videos first'
75dff0ee 1239
c9ae7b95
PH
1240
1241class YoutubeSearchURLIE(InfoExtractor):
1242 IE_DESC = u'YouTube.com search URLs'
1243 IE_NAME = u'youtube:search_url'
1244 _VALID_URL = r'https?://(?:www\.)?youtube\.com/results\?(.*?&)?search_query=(?P<query>[^&]+)(?:[&]|$)'
1245
1246 def _real_extract(self, url):
1247 mobj = re.match(self._VALID_URL, url)
1248 query = compat_urllib_parse.unquote_plus(mobj.group('query'))
1249
1250 webpage = self._download_webpage(url, query)
1251 result_code = self._search_regex(
6feb2d5e 1252 r'(?s)<ol class="item-section"(.*?)</ol>', webpage, u'result HTML')
c9ae7b95
PH
1253
1254 part_codes = re.findall(
1255 r'(?s)<h3 class="yt-lockup-title">(.*?)</h3>', result_code)
1256 entries = []
1257 for part_code in part_codes:
1258 part_title = self._html_search_regex(
6feb2d5e 1259 [r'(?s)title="([^"]+)"', r'>([^<]+)</a>'], part_code, 'item title', fatal=False)
c9ae7b95
PH
1260 part_url_snippet = self._html_search_regex(
1261 r'(?s)href="([^"]+)"', part_code, 'item URL')
1262 part_url = compat_urlparse.urljoin(
1263 'https://www.youtube.com/', part_url_snippet)
1264 entries.append({
1265 '_type': 'url',
1266 'url': part_url,
1267 'title': part_title,
1268 })
1269
1270 return {
1271 '_type': 'playlist',
1272 'entries': entries,
1273 'title': query,
1274 }
1275
1276
75dff0ee 1277class YoutubeShowIE(InfoExtractor):
0f818663 1278 IE_DESC = u'YouTube.com (multi-season) shows'
75dff0ee
JMF
1279 _VALID_URL = r'https?://www\.youtube\.com/show/(.*)'
1280 IE_NAME = u'youtube:show'
1281
1282 def _real_extract(self, url):
1283 mobj = re.match(self._VALID_URL, url)
1284 show_name = mobj.group(1)
1285 webpage = self._download_webpage(url, show_name, u'Downloading show webpage')
1286 # There's one playlist for each season of the show
1287 m_seasons = list(re.finditer(r'href="(/playlist\?list=.*?)"', webpage))
1288 self.to_screen(u'%s: Found %s seasons' % (show_name, len(m_seasons)))
1289 return [self.url_result('https://www.youtube.com' + season.group(1), 'YoutubePlaylist') for season in m_seasons]
04cc9617
JMF
1290
1291
b2e8bc1b 1292class YoutubeFeedsInfoExtractor(YoutubeBaseInfoExtractor):
d7ae0639
JMF
1293 """
1294 Base class for extractors that fetch info from
1295 http://www.youtube.com/feed_ajax
1296 Subclasses must define the _FEED_NAME and _PLAYLIST_TITLE properties.
1297 """
b2e8bc1b 1298 _LOGIN_REQUIRED = True
43ba5456
JMF
1299 # use action_load_personal_feed instead of action_load_system_feed
1300 _PERSONAL_FEED = False
04cc9617 1301
d7ae0639
JMF
1302 @property
1303 def _FEED_TEMPLATE(self):
43ba5456
JMF
1304 action = 'action_load_system_feed'
1305 if self._PERSONAL_FEED:
1306 action = 'action_load_personal_feed'
38c2e5b8 1307 return 'https://www.youtube.com/feed_ajax?%s=1&feed_name=%s&paging=%%s' % (action, self._FEED_NAME)
d7ae0639
JMF
1308
1309 @property
1310 def IE_NAME(self):
1311 return u'youtube:%s' % self._FEED_NAME
04cc9617 1312
81f0259b 1313 def _real_initialize(self):
b2e8bc1b 1314 self._login()
81f0259b 1315
04cc9617
JMF
1316 def _real_extract(self, url):
1317 feed_entries = []
0e44d838
JMF
1318 paging = 0
1319 for i in itertools.count(1):
f6177462 1320 info = self._download_json(self._FEED_TEMPLATE % paging,
d7ae0639 1321 u'%s feed' % self._FEED_NAME,
04cc9617 1322 u'Downloading page %s' % i)
f6177462 1323 feed_html = info.get('feed_html') or info.get('content_html')
43ba5456 1324 m_ids = re.finditer(r'"/watch\?v=(.*?)["&]', feed_html)
04cc9617 1325 ids = orderedSet(m.group(1) for m in m_ids)
7012b23c
PH
1326 feed_entries.extend(
1327 self.url_result(video_id, 'Youtube', video_id=video_id)
1328 for video_id in ids)
05ee2b6d
JMF
1329 mobj = re.search(
1330 r'data-uix-load-more-href="/?[^"]+paging=(?P<paging>\d+)',
1331 feed_html)
1332 if mobj is None:
04cc9617 1333 break
05ee2b6d 1334 paging = mobj.group('paging')
d7ae0639
JMF
1335 return self.playlist_result(feed_entries, playlist_title=self._PLAYLIST_TITLE)
1336
1337class YoutubeSubscriptionsIE(YoutubeFeedsInfoExtractor):
e45d40b1 1338 IE_DESC = u'YouTube.com subscriptions feed, "ytsubs" keyword (requires authentication)'
d7ae0639
JMF
1339 _VALID_URL = r'https?://www\.youtube\.com/feed/subscriptions|:ytsubs(?:criptions)?'
1340 _FEED_NAME = 'subscriptions'
1341 _PLAYLIST_TITLE = u'Youtube Subscriptions'
1342
1343class YoutubeRecommendedIE(YoutubeFeedsInfoExtractor):
1344 IE_DESC = u'YouTube.com recommended videos, "ytrec" keyword (requires authentication)'
1345 _VALID_URL = r'https?://www\.youtube\.com/feed/recommended|:ytrec(?:ommended)?'
1346 _FEED_NAME = 'recommended'
1347 _PLAYLIST_TITLE = u'Youtube Recommended videos'
c626a3d9 1348
43ba5456
JMF
1349class YoutubeWatchLaterIE(YoutubeFeedsInfoExtractor):
1350 IE_DESC = u'Youtube watch later list, "ytwatchlater" keyword (requires authentication)'
1351 _VALID_URL = r'https?://www\.youtube\.com/feed/watch_later|:ytwatchlater'
1352 _FEED_NAME = 'watch_later'
1353 _PLAYLIST_TITLE = u'Youtube Watch Later'
43ba5456 1354 _PERSONAL_FEED = True
c626a3d9 1355
f459d170
JMF
1356class YoutubeHistoryIE(YoutubeFeedsInfoExtractor):
1357 IE_DESC = u'Youtube watch history, "ythistory" keyword (requires authentication)'
1358 _VALID_URL = u'https?://www\.youtube\.com/feed/history|:ythistory'
1359 _FEED_NAME = 'history'
1360 _PERSONAL_FEED = True
1361 _PLAYLIST_TITLE = u'Youtube Watch History'
1362
c626a3d9
JMF
1363class YoutubeFavouritesIE(YoutubeBaseInfoExtractor):
1364 IE_NAME = u'youtube:favorites'
1365 IE_DESC = u'YouTube.com favourite videos, "ytfav" keyword (requires authentication)'
c7a7750d 1366 _VALID_URL = r'https?://www\.youtube\.com/my_favorites|:ytfav(?:ou?rites)?'
c626a3d9
JMF
1367 _LOGIN_REQUIRED = True
1368
1369 def _real_extract(self, url):
1370 webpage = self._download_webpage('https://www.youtube.com/my_favorites', 'Youtube Favourites videos')
1371 playlist_id = self._search_regex(r'list=(.+?)["&]', webpage, u'favourites playlist id')
1372 return self.url_result(playlist_id, 'YoutubePlaylist')
15870e90
PH
1373
1374
1375class YoutubeTruncatedURLIE(InfoExtractor):
1376 IE_NAME = 'youtube:truncated_url'
1377 IE_DESC = False # Do not list
975d35db 1378 _VALID_URL = r'''(?x)
c4808c60
PH
1379 (?:https?://)?[^/]+/watch\?(?:
1380 feature=[a-z_]+|
1381 annotation_id=annotation_[^&]+
1382 )?$|
975d35db
PH
1383 (?:https?://)?(?:www\.)?youtube\.com/attribution_link\?a=[^&]+$
1384 '''
15870e90 1385
c4808c60
PH
1386 _TESTS = [{
1387 'url': 'http://www.youtube.com/watch?annotation_id=annotation_3951667041',
1388 'only_matching': True,
dc2fc736
PH
1389 }, {
1390 'url': 'http://www.youtube.com/watch?',
1391 'only_matching': True,
c4808c60
PH
1392 }]
1393
15870e90
PH
1394 def _real_extract(self, url):
1395 raise ExtractorError(
1396 u'Did you forget to quote the URL? Remember that & is a meta '
1397 u'character in most shells, so you want to put the URL in quotes, '
1398 u'like youtube-dl '
b4622a32
PH
1399 u'"http://www.youtube.com/watch?feature=foo&v=BaW_jenozKc" '
1400 u' or simply youtube-dl BaW_jenozKc .',
15870e90 1401 expected=True)