3 from __future__
import unicode_literals
13 from .common
import InfoExtractor
, SearchInfoExtractor
14 from ..jsinterp
import JSInterpreter
15 from ..swfinterp
import SWFInterpreter
16 from ..compat
import (
20 compat_urllib_parse_unquote
,
21 compat_urllib_parse_unquote_plus
,
22 compat_urllib_parse_urlparse
,
23 compat_urllib_request
,
31 get_element_by_attribute
,
46 class YoutubeBaseInfoExtractor(InfoExtractor
):
47 """Provide base functions for Youtube extractors"""
48 _LOGIN_URL
= 'https://accounts.google.com/ServiceLogin'
49 _TWOFACTOR_URL
= 'https://accounts.google.com/SecondFactor'
50 _NETRC_MACHINE
= 'youtube'
51 # If True it will raise an error if no login info is provided
52 _LOGIN_REQUIRED
= False
54 def _set_language(self
):
56 '.youtube.com', 'PREF', 'f1=50000000&hl=en',
57 # YouTube sets the expire time to about two months
58 expire_time
=time
.time() + 2 * 30 * 24 * 3600)
60 def _ids_to_results(self
, ids
):
62 self
.url_result(vid_id
, 'Youtube', video_id
=vid_id
)
67 Attempt to log in to YouTube.
68 True is returned if successful or skipped.
69 False is returned if login failed.
71 If _LOGIN_REQUIRED is set and no authentication was provided, an error is raised.
73 (username
, password
) = self
._get
_login
_info
()
74 # No authentication to be performed
76 if self
._LOGIN
_REQUIRED
:
77 raise ExtractorError('No login info available, needed for using %s.' % self
.IE_NAME
, expected
=True)
80 login_page
= self
._download
_webpage
(
81 self
._LOGIN
_URL
, None,
82 note
='Downloading login page',
83 errnote
='unable to fetch login page', fatal
=False)
84 if login_page
is False:
87 galx
= self
._search
_regex
(r
'(?s)<input.+?name="GALX".+?value="(.+?)"',
88 login_page
, 'Login GALX parameter')
92 'continue': 'https://www.youtube.com/signin?action_handle_signin=true&feature=sign_in_button&hl=en_US&nomobiletemp=1',
97 'PersistentCookie': 'yes',
99 'bgresponse': 'js_disabled',
100 'checkConnection': '',
101 'checkedDomains': 'youtube',
108 'service': 'youtube',
113 # Convert to UTF-8 *before* urlencode because Python 2.x's urlencode
115 login_form
= dict((k
.encode('utf-8'), v
.encode('utf-8')) for k
, v
in login_form_strs
.items())
116 login_data
= compat_urllib_parse
.urlencode(login_form
).encode('ascii')
118 req
= compat_urllib_request
.Request(self
._LOGIN
_URL
, login_data
)
119 login_results
= self
._download
_webpage
(
121 note
='Logging in', errnote
='unable to log in', fatal
=False)
122 if login_results
is False:
125 if re
.search(r
'id="errormsg_0_Passwd"', login_results
) is not None:
126 raise ExtractorError('Please use your account password and a two-factor code instead of an application-specific password.', expected
=True)
129 # TODO add SMS and phone call support - these require making a request and then prompting the user
131 if re
.search(r
'(?i)<form[^>]* id="gaia_secondfactorform"', login_results
) is not None:
132 tfa_code
= self
._get
_tfa
_info
()
135 self
._downloader
.report_warning('Two-factor authentication required. Provide it with --twofactor <code>')
136 self
._downloader
.report_warning('(Note that only TOTP (Google Authenticator App) codes work at this time.)')
139 # Unlike the first login form, secTok and timeStmp are both required for the TFA form
141 match
= re
.search(r
'id="secTok"\n\s+value=\'(.+)\'/>', login_results, re.M | re.U)
143 self._downloader.report_warning('Failed to get secTok
- did the page structure change?
')
144 secTok = match.group(1)
145 match = re.search(r'id="timeStmp"\n\s
+value
=\'(.+)\'/>', login_results, re.M | re.U)
147 self._downloader.report_warning('Failed to get timeStmp
- did the page structure change?
')
148 timeStmp = match.group(1)
151 'continue': 'https
://www
.youtube
.com
/signin?action_handle_signin
=true
&feature
=sign_in_button
&hl
=en_US
&nomobiletemp
=1',
153 'smsUserPin
': tfa_code,
154 'smsVerifyPin
': 'Verify
',
156 'PersistentCookie
': 'yes
',
157 'checkConnection
': '',
158 'checkedDomains
': 'youtube
',
161 'timeStmp
': timeStmp,
162 'service
': 'youtube
',
165 tfa_form = dict((k.encode('utf
-8'), v.encode('utf
-8')) for k, v in tfa_form_strs.items())
166 tfa_data = compat_urllib_parse.urlencode(tfa_form).encode('ascii
')
168 tfa_req = compat_urllib_request.Request(self._TWOFACTOR_URL, tfa_data)
169 tfa_results = self._download_webpage(
171 note='Submitting TFA code
', errnote='unable to submit tfa
', fatal=False)
173 if tfa_results is False:
176 if re.search(r'(?i
)<form
[^
>]* id="gaia_secondfactorform"', tfa_results) is not None:
177 self._downloader.report_warning('Two
-factor code expired
. Please
try again
, or use a one
-use backup code instead
.')
179 if re.search(r'(?i
)<form
[^
>]* id="gaia_loginform"', tfa_results) is not None:
180 self._downloader.report_warning('unable to log
in - did the page structure change?
')
182 if re.search(r'smsauth
-interstitial
-reviewsettings
', tfa_results) is not None:
183 self._downloader.report_warning('Your Google account has a security notice
. Please log
in on your web browser
, resolve the notice
, and try again
.')
186 if re.search(r'(?i
)<form
[^
>]* id="gaia_loginform"', login_results) is not None:
187 self._downloader.report_warning('unable to log
in: bad username
or password
')
191 def _real_initialize(self):
192 if self._downloader is None:
195 if not self._login():
199 class YoutubeIE(YoutubeBaseInfoExtractor):
200 IE_DESC = 'YouTube
.com
'
201 _VALID_URL = r"""(?x)^
203 (?:https?://|//) # http(s):// or protocol-independent URL
204 (?:(?:(?:(?:\w+\.)?[yY][oO][uU][tT][uU][bB][eE](?:-nocookie)?\.com/|
205 (?:www\.)?deturl\.com/www\.youtube\.com/|
206 (?:www\.)?pwnyoutube\.com/|
207 (?:www\.)?yourepeat\.com/|
208 tube\.majestyc\.net/|
209 youtube\.googleapis\.com/) # the various hostnames, with wildcard subdomains
210 (?:.*?\#/)? # handle anchor (#/) redirect urls
211 (?: # the various things that can precede the ID:
212 (?:(?:v|embed|e)/(?!videoseries)) # v/ or embed/ or e/
213 |(?: # or the v= param in all its forms
214 (?:(?:watch|movie)(?:_popup)?(?:\.php)?/?)? # preceding watch(_popup|.php) or nothing (like /?v=xxxx)
215 (?:\?|\#!?) # the params delimiter ? or # or #!
216 (?:.*?&)?? # any other preceding param (like /?s=tuff&v=xxxx)
220 |youtu\.be/ # just youtu.be/xxxx
221 |(?:www\.)?cleanvideosearch\.com/media/action/yt/watch\?videoId=
223 )? # all until now is optional -> you can pass the naked ID
224 ([0-9A-Za-z_-]{11}) # here is it! the YouTube video ID
225 (?!.*?&list=) # combined list/video URLs are handled by the playlist IE
226 (?(1).+)? # if we found the ID, everything can follow
228 _NEXT_URL_RE = r'[\?&]next_url
=([^
&]+)'
230 '5': {'ext': 'flv', 'width': 400, 'height': 240},
231 '6': {'ext': 'flv', 'width': 450, 'height': 270},
232 '13': {'ext': '3gp'},
233 '17': {'ext': '3gp', 'width': 176, 'height': 144},
234 '18': {'ext': 'mp4', 'width': 640, 'height': 360},
235 '22': {'ext': 'mp4', 'width': 1280, 'height': 720},
236 '34': {'ext': 'flv', 'width': 640, 'height': 360},
237 '35': {'ext': 'flv', 'width': 854, 'height': 480},
238 '36': {'ext': '3gp', 'width': 320, 'height': 240},
239 '37': {'ext': 'mp4', 'width': 1920, 'height': 1080},
240 '38': {'ext': 'mp4', 'width': 4096, 'height': 3072},
241 '43': {'ext': 'webm', 'width': 640, 'height': 360},
242 '44': {'ext': 'webm', 'width': 854, 'height': 480},
243 '45': {'ext': 'webm', 'width': 1280, 'height': 720},
244 '46': {'ext': 'webm', 'width': 1920, 'height': 1080},
245 '59': {'ext': 'mp4', 'width': 854, 'height': 480},
246 '78': {'ext': 'mp4', 'width': 854, 'height': 480},
250 '82': {'ext': 'mp4', 'height': 360, 'format_note': '3D', 'preference': -20},
251 '83': {'ext': 'mp4', 'height': 480, 'format_note': '3D', 'preference': -20},
252 '84': {'ext': 'mp4', 'height': 720, 'format_note': '3D', 'preference': -20},
253 '85': {'ext': 'mp4', 'height': 1080, 'format_note': '3D', 'preference': -20},
254 '100': {'ext': 'webm', 'height': 360, 'format_note': '3D', 'preference': -20},
255 '101': {'ext': 'webm', 'height': 480, 'format_note': '3D', 'preference': -20},
256 '102': {'ext': 'webm', 'height': 720, 'format_note': '3D', 'preference': -20},
258 # Apple HTTP Live Streaming
259 '92': {'ext': 'mp4', 'height': 240, 'format_note': 'HLS', 'preference': -10},
260 '93': {'ext': 'mp4', 'height': 360, 'format_note': 'HLS', 'preference': -10},
261 '94': {'ext': 'mp4', 'height': 480, 'format_note': 'HLS', 'preference': -10},
262 '95': {'ext': 'mp4', 'height': 720, 'format_note': 'HLS', 'preference': -10},
263 '96': {'ext': 'mp4', 'height': 1080, 'format_note': 'HLS', 'preference': -10},
264 '132': {'ext': 'mp4', 'height': 240, 'format_note': 'HLS', 'preference': -10},
265 '151': {'ext': 'mp4', 'height': 72, 'format_note': 'HLS', 'preference': -10},
268 '133': {'ext': 'mp4', 'height': 240, 'format_note': 'DASH video', 'acodec': 'none', 'preference': -40},
269 '134': {'ext': 'mp4', 'height': 360, 'format_note': 'DASH video', 'acodec': 'none', 'preference': -40},
270 '135': {'ext': 'mp4', 'height': 480, 'format_note': 'DASH video', 'acodec': 'none', 'preference': -40},
271 '136': {'ext': 'mp4', 'height': 720, 'format_note': 'DASH video', 'acodec': 'none', 'preference': -40},
272 '137': {'ext': 'mp4', 'height': 1080, 'format_note': 'DASH video', 'acodec': 'none', 'preference': -40},
273 '138': {'ext': 'mp4', 'format_note': 'DASH video', 'acodec': 'none', 'preference': -40}, # Height can vary (https://github.com/rg3/youtube-dl/issues/4559)
274 '160': {'ext': 'mp4', 'height': 144, 'format_note': 'DASH video', 'acodec': 'none', 'preference': -40},
275 '264': {'ext': 'mp4', 'height': 1440, 'format_note': 'DASH video', 'acodec': 'none', 'preference': -40},
276 '298': {'ext': 'mp4', 'height': 720, 'format_note': 'DASH video', 'acodec': 'none', 'preference': -40, 'fps': 60, 'vcodec': 'h264'},
277 '299': {'ext': 'mp4', 'height': 1080, 'format_note': 'DASH video', 'acodec': 'none', 'preference': -40, 'fps': 60, 'vcodec': 'h264'},
278 '266': {'ext': 'mp4', 'height': 2160, 'format_note': 'DASH video', 'acodec': 'none', 'preference': -40, 'vcodec': 'h264'},
281 '139': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'vcodec': 'none', 'abr': 48, 'preference': -50, 'container': 'm4a_dash'},
282 '140': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'vcodec': 'none', 'abr': 128, 'preference': -50, 'container': 'm4a_dash'},
283 '141': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'vcodec': 'none', 'abr': 256, 'preference': -50, 'container': 'm4a_dash'},
286 '167': {'ext': 'webm', 'height': 360, 'width': 640, 'format_note': 'DASH video', 'acodec': 'none', 'container': 'webm', 'vcodec': 'vp8', 'preference': -40},
287 '168': {'ext': 'webm', 'height': 480, 'width': 854, 'format_note': 'DASH video', 'acodec': 'none', 'container': 'webm', 'vcodec': 'vp8', 'preference': -40},
288 '169': {'ext': 'webm', 'height': 720, 'width': 1280, 'format_note': 'DASH video', 'acodec': 'none', 'container': 'webm', 'vcodec': 'vp8', 'preference': -40},
289 '170': {'ext': 'webm', 'height': 1080, 'width': 1920, 'format_note': 'DASH video', 'acodec': 'none', 'container': 'webm', 'vcodec': 'vp8', 'preference': -40},
290 '218': {'ext': 'webm', 'height': 480, 'width': 854, 'format_note': 'DASH video', 'acodec': 'none', 'container': 'webm', 'vcodec': 'vp8', 'preference': -40},
291 '219': {'ext': 'webm', 'height': 480, 'width': 854, 'format_note': 'DASH video', 'acodec': 'none', 'container': 'webm', 'vcodec': 'vp8', 'preference': -40},
292 '278': {'ext': 'webm', 'height': 144, 'format_note': 'DASH video', 'acodec': 'none', 'preference': -40, 'container': 'webm', 'vcodec': 'vp9'},
293 '242': {'ext': 'webm', 'height': 240, 'format_note': 'DASH video', 'acodec': 'none', 'preference': -40},
294 '243': {'ext': 'webm', 'height': 360, 'format_note': 'DASH video', 'acodec': 'none', 'preference': -40},
295 '244': {'ext': 'webm', 'height': 480, 'format_note': 'DASH video', 'acodec': 'none', 'preference': -40},
296 '245': {'ext': 'webm', 'height': 480, 'format_note': 'DASH video', 'acodec': 'none', 'preference': -40},
297 '246': {'ext': 'webm', 'height': 480, 'format_note': 'DASH video', 'acodec': 'none', 'preference': -40},
298 '247': {'ext': 'webm', 'height': 720, 'format_note': 'DASH video', 'acodec': 'none', 'preference': -40},
299 '248': {'ext': 'webm', 'height': 1080, 'format_note': 'DASH video', 'acodec': 'none', 'preference': -40},
300 '271': {'ext': 'webm', 'height': 1440, 'format_note': 'DASH video', 'acodec': 'none', 'preference': -40},
301 '272': {'ext': 'webm', 'height': 2160, 'format_note': 'DASH video', 'acodec': 'none', 'preference': -40},
302 '302': {'ext': 'webm', 'height': 720, 'format_note': 'DASH video', 'acodec': 'none', 'preference': -40, 'fps': 60, 'vcodec': 'vp9'},
303 '303': {'ext': 'webm', 'height': 1080, 'format_note': 'DASH video', 'acodec': 'none', 'preference': -40, 'fps': 60, 'vcodec': 'vp9'},
304 '308': {'ext': 'webm', 'height': 1440, 'format_note': 'DASH video', 'acodec': 'none', 'preference': -40, 'fps': 60, 'vcodec': 'vp9'},
305 '313': {'ext': 'webm', 'height': 2160, 'format_note': 'DASH video', 'acodec': 'none', 'preference': -40, 'vcodec': 'vp9'},
306 '315': {'ext': 'webm', 'height': 2160, 'format_note': 'DASH video', 'acodec': 'none', 'preference': -40, 'fps': 60, 'vcodec': 'vp9'},
309 '171': {'ext': 'webm', 'vcodec': 'none', 'format_note': 'DASH audio', 'abr': 128, 'preference': -50},
310 '172': {'ext': 'webm', 'vcodec': 'none', 'format_note': 'DASH audio', 'abr': 256, 'preference': -50},
312 # Dash webm audio with opus inside
313 '249': {'ext': 'webm', 'vcodec': 'none', 'format_note': 'DASH audio', 'acodec': 'opus', 'abr': 50, 'preference': -50},
314 '250': {'ext': 'webm', 'vcodec': 'none', 'format_note': 'DASH audio', 'acodec': 'opus', 'abr': 70, 'preference': -50},
315 '251': {'ext': 'webm', 'vcodec': 'none', 'format_note': 'DASH audio', 'acodec': 'opus', 'abr': 160, 'preference': -50},
318 '_rtmp
': {'protocol': 'rtmp'},
324 'url
': 'http
://www
.youtube
.com
/watch?v
=BaW_jenozKcj
&t
=1s
&end
=9',
328 'title
': 'youtube
-dl test video
"\'/\\ä↭𝕐',
329 'uploader': 'Philipp Hagemeister',
330 'uploader_id': 'phihag',
331 'upload_date': '20121002',
332 'description': 'test chars: "\'/\\ä↭𝕐
\ntest URL
: https
://github
.com
/rg3
/youtube
-dl
/issues
/1892\n\nThis
is a test video
for youtube
-dl
.\n\nFor more information
, contact phihag
@phihag.de .',
333 'categories
': ['Science
& Technology
'],
334 'tags
': ['youtube
-dl
'],
336 'dislike_count
': int,
342 'url
': 'http
://www
.youtube
.com
/watch?v
=UxxajLWwzqY
',
343 'note
': 'Test generic use_cipher_signature
video (#897)',
347 'upload_date': '20120506',
348 'title': 'Icona Pop - I Love It (feat. Charli XCX) [OFFICIAL VIDEO]',
349 'description': 'md5:782e8651347686cba06e58f71ab51773',
350 'tags': ['Icona Pop i love it', 'sweden', 'pop music', 'big beat records', 'big beat', 'charli',
351 'xcx', 'charli xcx', 'girls', 'hbo', 'i love it', "i don't care", 'icona', 'pop',
352 'iconic ep', 'iconic', 'love', 'it'],
353 'uploader': 'Icona Pop',
354 'uploader_id': 'IconaPop',
358 'url': 'https://www.youtube.com/watch?v=07FYdnEawAQ',
359 'note': 'Test VEVO video with age protection (#956)',
363 'upload_date': '20130703',
364 'title': 'Justin Timberlake - Tunnel Vision (Explicit)',
365 'description': 'md5:64249768eec3bc4276236606ea996373',
366 'uploader': 'justintimberlakeVEVO',
367 'uploader_id': 'justintimberlakeVEVO',
372 'url': '//www.YouTube.com/watch?v=yZIXLfi8CZQ',
373 'note': 'Embed-only video (#1746)',
377 'upload_date': '20120608',
378 'title': 'Principal Sexually Assaults A Teacher - Episode 117 - 8th June 2012',
379 'description': 'md5:09b78bd971f1e3e289601dfba15ca4f7',
380 'uploader': 'SET India',
381 'uploader_id': 'setindia'
385 'url': 'http://www.youtube.com/watch?v=BaW_jenozKcj&v=UxxajLWwzqY',
386 'note': 'Use the first video ID in the URL',
390 'title': 'youtube-dl test video "\'/\\ä↭𝕐',
391 'uploader': 'Philipp Hagemeister',
392 'uploader_id': 'phihag',
393 'upload_date': '20121002',
394 'description': 'test chars: "\'/\\ä↭𝕐\ntest URL: https://github.com/rg3/youtube-dl/issues/1892\n\nThis is a test video for youtube-dl.\n\nFor more information, contact phihag@phihag.de .',
395 'categories': ['Science & Technology'],
396 'tags': ['youtube-dl'],
398 'dislike_count': int,
401 'skip_download': True,
405 'url': 'http://www.youtube.com/watch?v=a9LDPn-MO4I',
406 'note': '256k DASH audio (format 141) via DASH manifest',
410 'upload_date': '20121002',
411 'uploader_id': '8KVIDEO',
413 'uploader': '8KVIDEO',
414 'title': 'UHDTV TEST 8K VIDEO.mp4'
417 'youtube_include_dash_manifest': True,
421 # DASH manifest with encrypted signature
423 'url': 'https://www.youtube.com/watch?v=IB3lcPjvWLA',
427 'title': 'Afrojack, Spree Wilson - The Spark ft. Spree Wilson',
428 'description': 'md5:12e7067fa6735a77bdcbb58cb1187d2d',
429 'uploader': 'AfrojackVEVO',
430 'uploader_id': 'AfrojackVEVO',
431 'upload_date': '20131011',
434 'youtube_include_dash_manifest': True,
438 # JS player signature function name containing $
440 'url': 'https://www.youtube.com/watch?v=nfWlot6h_JM',
444 'title': 'Taylor Swift - Shake It Off',
445 'description': 'md5:2acfda1b285bdd478ccec22f9918199d',
446 'uploader': 'TaylorSwiftVEVO',
447 'uploader_id': 'TaylorSwiftVEVO',
448 'upload_date': '20140818',
451 'youtube_include_dash_manifest': True,
457 'url': 'https://www.youtube.com/watch?v=T4XJQO3qol8',
461 'upload_date': '20100909',
462 'uploader': 'The Amazing Atheist',
463 'uploader_id': 'TheAmazingAtheist',
464 'title': 'Burning Everyone\'s Koran',
465 'description': 'SUBSCRIBE: http://www.youtube.com/saturninefilms\n\nEven Obama has taken a stand against freedom on this issue: http://www.huffingtonpost.com/2010/09/09/obama-gma-interview-quran_n_710282.html',
468 # Normal age-gate video (No vevo, embed allowed)
470 'url': 'http://youtube.com/watch?v=HtVdAasjOgU',
474 'title': 'The Witcher 3: Wild Hunt - The Sword Of Destiny Trailer',
475 'description': 're:(?s).{100,}About the Game\n.*?The Witcher 3: Wild Hunt.{100,}',
476 'uploader': 'The Witcher',
477 'uploader_id': 'WitcherGame',
478 'upload_date': '20140605',
482 # Age-gate video with encrypted signature
484 'url': 'http://www.youtube.com/watch?v=6kLq3WMV1nU',
488 'title': 'Dedication To My Ex (Miss That) (Lyric Video)',
489 'description': 'md5:33765bb339e1b47e7e72b5490139bb41',
490 'uploader': 'LloydVEVO',
491 'uploader_id': 'LloydVEVO',
492 'upload_date': '20110629',
496 # video_info is None (https://github.com/rg3/youtube-dl/issues/4421)
498 'url': '__2ABJjxzNo',
502 'upload_date': '20100430',
503 'uploader_id': 'deadmau5',
504 'description': 'md5:12c56784b8032162bb936a5f76d55360',
505 'uploader': 'deadmau5',
506 'title': 'Deadmau5 - Some Chords (HD)',
508 'expected_warnings': [
509 'DASH manifest missing',
512 # Olympics (https://github.com/rg3/youtube-dl/issues/4431)
514 'url': 'lqQg6PlCWgI',
518 'upload_date': '20120731',
519 'uploader_id': 'olympic',
520 'description': 'HO09 - Women - GER-AUS - Hockey - 31 July 2012 - London 2012 Olympic Games',
521 'uploader': 'Olympics',
522 'title': 'Hockey - Women - GER-AUS - London 2012 Olympic Games',
525 'skip_download': 'requires avconv',
530 'url': 'https://www.youtube.com/watch?v=_b-2C3KPAM0',
534 'stretched_ratio': 16 / 9.,
535 'upload_date': '20110310',
536 'uploader_id': 'AllenMeow',
537 'description': 'made by Wacom from Korea | 字幕&加油添醋 by TY\'s Allen | 感謝heylisa00cavey1001同學熱情提供梗及翻譯',
539 'title': '[A-made] 變態妍字幕版 太妍 我就是這樣的人',
542 # url_encoded_fmt_stream_map is empty string
544 'url': 'qEJwOuvDf7I',
548 'title': 'Обсуждение судебной практики по выборам 14 сентября 2014 года в Санкт-Петербурге',
550 'upload_date': '20150404',
551 'uploader_id': 'spbelect',
552 'uploader': 'Наблюдатели Петербурга',
555 'skip_download': 'requires avconv',
558 # Extraction from multiple DASH manifests (https://github.com/rg3/youtube-dl/pull/6097)
560 'url': 'https://www.youtube.com/watch?v=FIl7x6_3R5Y',
564 'title': 'md5:7b81415841e02ecd4313668cde88737a',
565 'description': 'md5:116377fd2963b81ec4ce64b542173306',
566 'upload_date': '20150625',
567 'uploader_id': 'dorappi2000',
568 'uploader': 'dorappi2000',
569 'formats': 'mincount:33',
572 # DASH manifest with segment_list
574 'url': 'https://www.youtube.com/embed/CsmdDsKjzN8',
575 'md5': '8ce563a1d667b599d21064e982ab9e31',
579 'upload_date': '20150501', # According to '<meta itemprop="datePublished"', but in other places it's 20150510
580 'uploader': 'Airtek',
581 'description': 'Retransmisión en directo de la XVIII media maratón de Zaragoza.',
582 'uploader_id': 'UCzTzUmjXxxacNnL8I3m4LnQ',
583 'title': 'Retransmisión XVIII Media maratón Zaragoza 2015',
586 'youtube_include_dash_manifest': True,
587 'format': '135', # bestvideo
591 # Multifeed videos (multiple cameras), URL is for Main Camera
592 'url': 'https://www.youtube.com/watch?v=jqWvoWXjCVs',
595 'title': 'teamPGP: Rocket League Noob Stream',
596 'description': 'md5:dc7872fb300e143831327f1bae3af010',
602 'title': 'teamPGP: Rocket League Noob Stream (Main Camera)',
603 'description': 'md5:dc7872fb300e143831327f1bae3af010',
604 'upload_date': '20150721',
605 'uploader': 'Beer Games Beer',
606 'uploader_id': 'beergamesbeer',
612 'title': 'teamPGP: Rocket League Noob Stream (kreestuh)',
613 'description': 'md5:dc7872fb300e143831327f1bae3af010',
614 'upload_date': '20150721',
615 'uploader': 'Beer Games Beer',
616 'uploader_id': 'beergamesbeer',
622 'title': 'teamPGP: Rocket League Noob Stream (grizzle)',
623 'description': 'md5:dc7872fb300e143831327f1bae3af010',
624 'upload_date': '20150721',
625 'uploader': 'Beer Games Beer',
626 'uploader_id': 'beergamesbeer',
632 'title': 'teamPGP: Rocket League Noob Stream (zim)',
633 'description': 'md5:dc7872fb300e143831327f1bae3af010',
634 'upload_date': '20150721',
635 'uploader': 'Beer Games Beer',
636 'uploader_id': 'beergamesbeer',
640 'skip_download': True,
645 def __init__(self
, *args
, **kwargs
):
646 super(YoutubeIE
, self
).__init
__(*args
, **kwargs
)
647 self
._player
_cache
= {}
649 def report_video_info_webpage_download(self
, video_id
):
650 """Report attempt to download video info webpage."""
651 self
.to_screen('%s: Downloading video info webpage' % video_id
)
653 def report_information_extraction(self
, video_id
):
654 """Report attempt to extract video information."""
655 self
.to_screen('%s: Extracting video information' % video_id
)
657 def report_unavailable_format(self
, video_id
, format
):
658 """Report extracted video URL."""
659 self
.to_screen('%s: Format %s not available' % (video_id
, format
))
661 def report_rtmp_download(self
):
662 """Indicate the download will use the RTMP protocol."""
663 self
.to_screen('RTMP download detected')
665 def _signature_cache_id(self
, example_sig
):
666 """ Return a string representation of a signature """
667 return '.'.join(compat_str(len(part
)) for part
in example_sig
.split('.'))
669 def _extract_signature_function(self
, video_id
, player_url
, example_sig
):
671 r
'.*?-(?P<id>[a-zA-Z0-9_-]+)(?:/watch_as3|/html5player)?\.(?P<ext>[a-z]+)$',
674 raise ExtractorError('Cannot identify player %r' % player_url
)
675 player_type
= id_m
.group('ext')
676 player_id
= id_m
.group('id')
678 # Read from filesystem cache
679 func_id
= '%s_%s_%s' % (
680 player_type
, player_id
, self
._signature
_cache
_id
(example_sig
))
681 assert os
.path
.basename(func_id
) == func_id
683 cache_spec
= self
._downloader
.cache
.load('youtube-sigfuncs', func_id
)
684 if cache_spec
is not None:
685 return lambda s
: ''.join(s
[i
] for i
in cache_spec
)
688 'Downloading player %s' % player_url
689 if self
._downloader
.params
.get('verbose') else
690 'Downloading %s player %s' % (player_type
, player_id
)
692 if player_type
== 'js':
693 code
= self
._download
_webpage
(
694 player_url
, video_id
,
696 errnote
='Download of %s failed' % player_url
)
697 res
= self
._parse
_sig
_js
(code
)
698 elif player_type
== 'swf':
699 urlh
= self
._request
_webpage
(
700 player_url
, video_id
,
702 errnote
='Download of %s failed' % player_url
)
704 res
= self
._parse
_sig
_swf
(code
)
706 assert False, 'Invalid player type %r' % player_type
708 test_string
= ''.join(map(compat_chr
, range(len(example_sig
))))
709 cache_res
= res(test_string
)
710 cache_spec
= [ord(c
) for c
in cache_res
]
712 self
._downloader
.cache
.store('youtube-sigfuncs', func_id
, cache_spec
)
715 def _print_sig_code(self
, func
, example_sig
):
716 def gen_sig_code(idxs
):
717 def _genslice(start
, end
, step
):
718 starts
= '' if start
== 0 else str(start
)
719 ends
= (':%d' % (end
+ step
)) if end
+ step
>= 0 else ':'
720 steps
= '' if step
== 1 else (':%d' % step
)
721 return 's[%s%s%s]' % (starts
, ends
, steps
)
724 # Quelch pyflakes warnings - start will be set when step is set
725 start
= '(Never used)'
726 for i
, prev
in zip(idxs
[1:], idxs
[:-1]):
730 yield _genslice(start
, prev
, step
)
733 if i
- prev
in [-1, 1]:
742 yield _genslice(start
, i
, step
)
744 test_string
= ''.join(map(compat_chr
, range(len(example_sig
))))
745 cache_res
= func(test_string
)
746 cache_spec
= [ord(c
) for c
in cache_res
]
747 expr_code
= ' + '.join(gen_sig_code(cache_spec
))
748 signature_id_tuple
= '(%s)' % (
749 ', '.join(compat_str(len(p
)) for p
in example_sig
.split('.')))
750 code
= ('if tuple(len(p) for p in s.split(\'.\')) == %s:\n'
751 ' return %s\n') % (signature_id_tuple
, expr_code
)
752 self
.to_screen('Extracted signature function:\n' + code
)
754 def _parse_sig_js(self
, jscode
):
755 funcname
= self
._search
_regex
(
756 r
'\.sig\|\|([a-zA-Z0-9$]+)\(', jscode
,
757 'Initial JS player signature function name')
759 jsi
= JSInterpreter(jscode
)
760 initial_function
= jsi
.extract_function(funcname
)
761 return lambda s
: initial_function([s
])
763 def _parse_sig_swf(self
, file_contents
):
764 swfi
= SWFInterpreter(file_contents
)
765 TARGET_CLASSNAME
= 'SignatureDecipher'
766 searched_class
= swfi
.extract_class(TARGET_CLASSNAME
)
767 initial_function
= swfi
.extract_function(searched_class
, 'decipher')
768 return lambda s
: initial_function([s
])
770 def _decrypt_signature(self
, s
, video_id
, player_url
, age_gate
=False):
771 """Turn the encrypted s field into a working signature"""
773 if player_url
is None:
774 raise ExtractorError('Cannot decrypt signature without player_url')
776 if player_url
.startswith('//'):
777 player_url
= 'https:' + player_url
779 player_id
= (player_url
, self
._signature
_cache
_id
(s
))
780 if player_id
not in self
._player
_cache
:
781 func
= self
._extract
_signature
_function
(
782 video_id
, player_url
, s
784 self
._player
_cache
[player_id
] = func
785 func
= self
._player
_cache
[player_id
]
786 if self
._downloader
.params
.get('youtube_print_sig_code'):
787 self
._print
_sig
_code
(func
, s
)
789 except Exception as e
:
790 tb
= traceback
.format_exc()
791 raise ExtractorError(
792 'Signature extraction failed: ' + tb
, cause
=e
)
794 def _get_subtitles(self
, video_id
, webpage
):
796 subs_doc
= self
._download
_xml
(
797 'https://video.google.com/timedtext?hl=en&type=list&v=%s' % video_id
,
798 video_id
, note
=False)
799 except ExtractorError
as err
:
800 self
._downloader
.report_warning('unable to download video subtitles: %s' % compat_str(err
))
804 for track
in subs_doc
.findall('track'):
805 lang
= track
.attrib
['lang_code']
806 if lang
in sub_lang_list
:
809 for ext
in ['sbv', 'vtt', 'srt']:
810 params
= compat_urllib_parse
.urlencode({
814 'name': track
.attrib
['name'].encode('utf-8'),
817 'url': 'https://www.youtube.com/api/timedtext?' + params
,
820 sub_lang_list
[lang
] = sub_formats
821 if not sub_lang_list
:
822 self
._downloader
.report_warning('video doesn\'t have subtitles')
826 def _get_automatic_captions(self
, video_id
, webpage
):
827 """We need the webpage for getting the captions url, pass it as an
828 argument to speed up the process."""
829 self
.to_screen('%s: Looking for automatic captions' % video_id
)
830 mobj
= re
.search(r
';ytplayer.config = ({.*?});', webpage
)
831 err_msg
= 'Couldn\'t find automatic captions for %s' % video_id
833 self
._downloader
.report_warning(err_msg
)
835 player_config
= json
.loads(mobj
.group(1))
837 args
= player_config
['args']
838 caption_url
= args
['ttsurl']
839 timestamp
= args
['timestamp']
840 # We get the available subtitles
841 list_params
= compat_urllib_parse
.urlencode({
846 list_url
= caption_url
+ '&' + list_params
847 caption_list
= self
._download
_xml
(list_url
, video_id
)
848 original_lang_node
= caption_list
.find('track')
849 if original_lang_node
is None:
850 self
._downloader
.report_warning('Video doesn\'t have automatic captions')
852 original_lang
= original_lang_node
.attrib
['lang_code']
853 caption_kind
= original_lang_node
.attrib
.get('kind', '')
856 for lang_node
in caption_list
.findall('target'):
857 sub_lang
= lang_node
.attrib
['lang_code']
859 for ext
in ['sbv', 'vtt', 'srt']:
860 params
= compat_urllib_parse
.urlencode({
861 'lang': original_lang
,
865 'kind': caption_kind
,
868 'url': caption_url
+ '&' + params
,
871 sub_lang_list
[sub_lang
] = sub_formats
873 # An extractor error can be raise by the download process if there are
874 # no automatic captions but there are subtitles
875 except (KeyError, ExtractorError
):
876 self
._downloader
.report_warning(err_msg
)
880 def extract_id(cls
, url
):
881 mobj
= re
.match(cls
._VALID
_URL
, url
, re
.VERBOSE
)
883 raise ExtractorError('Invalid URL: %s' % url
)
884 video_id
= mobj
.group(2)
887 def _extract_from_m3u8(self
, manifest_url
, video_id
):
890 def _get_urls(_manifest
):
891 lines
= _manifest
.split('\n')
892 urls
= filter(lambda l
: l
and not l
.startswith('#'),
895 manifest
= self
._download
_webpage
(manifest_url
, video_id
, 'Downloading formats manifest')
896 formats_urls
= _get_urls(manifest
)
897 for format_url
in formats_urls
:
898 itag
= self
._search
_regex
(r
'itag/(\d+?)/', format_url
, 'itag')
899 url_map
[itag
] = format_url
902 def _extract_annotations(self
, video_id
):
903 url
= 'https://www.youtube.com/annotations_invideo?features=1&legacy=1&video_id=%s' % video_id
904 return self
._download
_webpage
(url
, video_id
, note
='Searching for annotations.', errnote
='Unable to download video annotations.')
906 def _parse_dash_manifest(
907 self
, video_id
, dash_manifest_url
, player_url
, age_gate
, fatal
=True):
908 def decrypt_sig(mobj
):
910 dec_s
= self
._decrypt
_signature
(s
, video_id
, player_url
, age_gate
)
911 return '/signature/%s' % dec_s
912 dash_manifest_url
= re
.sub(r
'/s/([a-fA-F0-9\.]+)', decrypt_sig
, dash_manifest_url
)
913 dash_doc
= self
._download
_xml
(
914 dash_manifest_url
, video_id
,
915 note
='Downloading DASH manifest',
916 errnote
='Could not download DASH manifest',
919 if dash_doc
is False:
923 for a
in dash_doc
.findall('.//{urn:mpeg:DASH:schema:MPD:2011}AdaptationSet'):
924 mime_type
= a
.attrib
.get('mimeType')
925 for r
in a
.findall('{urn:mpeg:DASH:schema:MPD:2011}Representation'):
926 url_el
= r
.find('{urn:mpeg:DASH:schema:MPD:2011}BaseURL')
929 if mime_type
== 'text/vtt':
930 # TODO implement WebVTT downloading
932 elif mime_type
.startswith('audio/') or mime_type
.startswith('video/'):
933 segment_list
= r
.find('{urn:mpeg:DASH:schema:MPD:2011}SegmentList')
934 format_id
= r
.attrib
['id']
935 video_url
= url_el
.text
936 filesize
= int_or_none(url_el
.attrib
.get('{http://youtube.com/yt/2012/10/10}contentLength'))
938 'format_id': format_id
,
940 'width': int_or_none(r
.attrib
.get('width')),
941 'height': int_or_none(r
.attrib
.get('height')),
942 'tbr': int_or_none(r
.attrib
.get('bandwidth'), 1000),
943 'asr': int_or_none(r
.attrib
.get('audioSamplingRate')),
944 'filesize': filesize
,
945 'fps': int_or_none(r
.attrib
.get('frameRate')),
947 if segment_list
is not None:
949 'initialization_url': segment_list
.find('{urn:mpeg:DASH:schema:MPD:2011}Initialization').attrib
['sourceURL'],
950 'segment_urls': [segment
.attrib
.get('media') for segment
in segment_list
.findall('{urn:mpeg:DASH:schema:MPD:2011}SegmentURL')],
951 'protocol': 'http_dash_segments',
954 existing_format
= next(
956 if fo
['format_id'] == format_id
)
957 except StopIteration:
958 full_info
= self
._formats
.get(format_id
, {}).copy()
960 codecs
= r
.attrib
.get('codecs')
962 if full_info
.get('acodec') == 'none' and 'vcodec' not in full_info
:
963 full_info
['vcodec'] = codecs
964 elif full_info
.get('vcodec') == 'none' and 'acodec' not in full_info
:
965 full_info
['acodec'] = codecs
966 formats
.append(full_info
)
968 existing_format
.update(f
)
970 self
.report_warning('Unknown MIME type %s in DASH manifest' % mime_type
)
973 def _real_extract(self
, url
):
974 url
, smuggled_data
= unsmuggle_url(url
, {})
977 'http' if self
._downloader
.params
.get('prefer_insecure', False)
982 parsed_url
= compat_urllib_parse_urlparse(url
)
983 for component
in [parsed_url
.fragment
, parsed_url
.query
]:
984 query
= compat_parse_qs(component
)
985 if start_time
is None and 't' in query
:
986 start_time
= parse_duration(query
['t'][0])
987 if start_time
is None and 'start' in query
:
988 start_time
= parse_duration(query
['start'][0])
989 if end_time
is None and 'end' in query
:
990 end_time
= parse_duration(query
['end'][0])
992 # Extract original video URL from URL with redirection, like age verification, using next_url parameter
993 mobj
= re
.search(self
._NEXT
_URL
_RE
, url
)
995 url
= proto
+ '://www.youtube.com/' + compat_urllib_parse_unquote(mobj
.group(1)).lstrip('/')
996 video_id
= self
.extract_id(url
)
999 url
= proto
+ '://www.youtube.com/watch?v=%s&gl=US&hl=en&has_verified=1&bpctr=9999999999' % video_id
1000 video_webpage
= self
._download
_webpage
(url
, video_id
)
1002 # Attempt to extract SWF player URL
1003 mobj
= re
.search(r
'swfConfig.*?"(https?:\\/\\/.*?watch.*?-.*?\.swf)"', video_webpage
)
1004 if mobj
is not None:
1005 player_url
= re
.sub(r
'\\(.)', r
'\1', mobj
.group(1))
1011 def add_dash_mpd(video_info
):
1012 dash_mpd
= video_info
.get('dashmpd')
1013 if dash_mpd
and dash_mpd
[0] not in dash_mpds
:
1014 dash_mpds
.append(dash_mpd
[0])
1017 embed_webpage
= None
1019 if re
.search(r
'player-age-gate-content">', video_webpage
) is not None:
1021 # We simulate the access to the video from www.youtube.com/v/{video_id}
1022 # this can be viewed without login into Youtube
1023 url
= proto
+ '://www.youtube.com/embed/%s' % video_id
1024 embed_webpage
= self
._download
_webpage
(url
, video_id
, 'Downloading embed webpage')
1025 data
= compat_urllib_parse
.urlencode({
1026 'video_id': video_id
,
1027 'eurl': 'https://youtube.googleapis.com/v/' + video_id
,
1028 'sts': self
._search
_regex
(
1029 r
'"sts"\s*:\s*(\d+)', embed_webpage
, 'sts', default
=''),
1031 video_info_url
= proto
+ '://www.youtube.com/get_video_info?' + data
1032 video_info_webpage
= self
._download
_webpage
(
1033 video_info_url
, video_id
,
1034 note
='Refetching age-gated info webpage',
1035 errnote
='unable to download video info webpage')
1036 video_info
= compat_parse_qs(video_info_webpage
)
1037 add_dash_mpd(video_info
)
1041 # Try looking directly into the video webpage
1042 mobj
= re
.search(r
';ytplayer\.config\s*=\s*({.*?});', video_webpage
)
1044 json_code
= uppercase_escape(mobj
.group(1))
1045 ytplayer_config
= json
.loads(json_code
)
1046 args
= ytplayer_config
['args']
1047 if args
.get('url_encoded_fmt_stream_map'):
1048 # Convert to the same format returned by compat_parse_qs
1049 video_info
= dict((k
, [v
]) for k
, v
in args
.items())
1050 add_dash_mpd(video_info
)
1051 if args
.get('livestream') == '1' or args
.get('live_playback') == 1:
1053 if not video_info
or self
._downloader
.params
.get('youtube_include_dash_manifest', True):
1054 # We also try looking in get_video_info since it may contain different dashmpd
1055 # URL that points to a DASH manifest with possibly different itag set (some itags
1056 # are missing from DASH manifest pointed by webpage's dashmpd, some - from DASH
1057 # manifest pointed by get_video_info's dashmpd).
1058 # The general idea is to take a union of itags of both DASH manifests (for example
1059 # video with such 'manifest behavior' see https://github.com/rg3/youtube-dl/issues/6093)
1060 self
.report_video_info_webpage_download(video_id
)
1061 for el_type
in ['&el=info', '&el=embedded', '&el=detailpage', '&el=vevo', '']:
1063 '%s://www.youtube.com/get_video_info?&video_id=%s%s&ps=default&eurl=&gl=US&hl=en'
1064 % (proto
, video_id
, el_type
))
1065 video_info_webpage
= self
._download
_webpage
(
1067 video_id
, note
=False,
1068 errnote
='unable to download video info webpage')
1069 get_video_info
= compat_parse_qs(video_info_webpage
)
1070 if get_video_info
.get('use_cipher_signature') != ['True']:
1071 add_dash_mpd(get_video_info
)
1073 video_info
= get_video_info
1074 if 'token' in get_video_info
:
1076 if 'token' not in video_info
:
1077 if 'reason' in video_info
:
1078 if 'The uploader has not made this video available in your country.' in video_info
['reason']:
1079 regions_allowed
= self
._html
_search
_meta
('regionsAllowed', video_webpage
, default
=None)
1081 raise ExtractorError('YouTube said: This video is available in %s only' % (
1082 ', '.join(map(ISO3166Utils
.short2full
, regions_allowed
.split(',')))),
1084 raise ExtractorError(
1085 'YouTube said: %s' % video_info
['reason'][0],
1086 expected
=True, video_id
=video_id
)
1088 raise ExtractorError(
1089 '"token" parameter not in video info for unknown reason',
1093 if 'title' in video_info
:
1094 video_title
= video_info
['title'][0]
1096 self
._downloader
.report_warning('Unable to extract video title')
1100 video_description
= get_element_by_id("eow-description", video_webpage
)
1101 if video_description
:
1102 video_description
= re
.sub(r
'''(?x)
1104 (?:[a-zA-Z-]+="[^"]+"\s+)*?
1106 (?:[a-zA-Z-]+="[^"]+"\s+)*?
1107 class="yt-uix-redirect-link"\s*>
1110 ''', r
'\1', video_description
)
1111 video_description
= clean_html(video_description
)
1113 fd_mobj
= re
.search(r
'<meta name="description" content="([^"]+)"', video_webpage
)
1115 video_description
= unescapeHTML(fd_mobj
.group(1))
1117 video_description
= ''
1119 if 'multifeed_metadata_list' in video_info
and not smuggled_data
.get('force_singlefeed', False):
1120 if not self
._downloader
.params
.get('noplaylist'):
1123 multifeed_metadata_list
= compat_urllib_parse_unquote_plus(video_info
['multifeed_metadata_list'][0])
1124 for feed
in multifeed_metadata_list
.split(','):
1125 feed_data
= compat_parse_qs(feed
)
1127 '_type': 'url_transparent',
1128 'ie_key': 'Youtube',
1130 '%s://www.youtube.com/watch?v=%s' % (proto
, feed_data
['id'][0]),
1131 {'force_singlefeed': True}
),
1132 'title': '%s (%s)' % (video_title
, feed_data
['title'][0]),
1134 feed_ids
.append(feed_data
['id'][0])
1136 'Downloading multifeed video (%s) - add --no-playlist to just download video %s'
1137 % (', '.join(feed_ids
), video_id
))
1138 return self
.playlist_result(entries
, video_id
, video_title
, video_description
)
1139 self
.to_screen('Downloading just video %s because of --no-playlist' % video_id
)
1141 if 'view_count' in video_info
:
1142 view_count
= int(video_info
['view_count'][0])
1146 # Check for "rental" videos
1147 if 'ypc_video_rental_bar_text' in video_info
and 'author' not in video_info
:
1148 raise ExtractorError('"rental" videos not supported')
1150 # Start extracting information
1151 self
.report_information_extraction(video_id
)
1154 if 'author' not in video_info
:
1155 raise ExtractorError('Unable to extract uploader name')
1156 video_uploader
= compat_urllib_parse_unquote_plus(video_info
['author'][0])
1159 video_uploader_id
= None
1160 mobj
= re
.search(r
'<link itemprop="url" href="http://www.youtube.com/(?:user|channel)/([^"]+)">', video_webpage
)
1161 if mobj
is not None:
1162 video_uploader_id
= mobj
.group(1)
1164 self
._downloader
.report_warning('unable to extract uploader nickname')
1167 # We try first to get a high quality image:
1168 m_thumb
= re
.search(r
'<span itemprop="thumbnail".*?href="(.*?)">',
1169 video_webpage
, re
.DOTALL
)
1170 if m_thumb
is not None:
1171 video_thumbnail
= m_thumb
.group(1)
1172 elif 'thumbnail_url' not in video_info
:
1173 self
._downloader
.report_warning('unable to extract video thumbnail')
1174 video_thumbnail
= None
1175 else: # don't panic if we can't find it
1176 video_thumbnail
= compat_urllib_parse_unquote_plus(video_info
['thumbnail_url'][0])
1179 upload_date
= self
._html
_search
_meta
(
1180 'datePublished', video_webpage
, 'upload date', default
=None)
1182 upload_date
= self
._search
_regex
(
1183 [r
'(?s)id="eow-date.*?>(.*?)</span>',
1184 r
'id="watch-uploader-info".*?>.*?(?:Published|Uploaded|Streamed live|Started) on (.+?)</strong>'],
1185 video_webpage
, 'upload date', default
=None)
1187 upload_date
= ' '.join(re
.sub(r
'[/,-]', r
' ', mobj
.group(1)).split())
1188 upload_date
= unified_strdate(upload_date
)
1190 m_cat_container
= self
._search
_regex
(
1191 r
'(?s)<h4[^>]*>\s*Category\s*</h4>\s*<ul[^>]*>(.*?)</ul>',
1192 video_webpage
, 'categories', default
=None)
1194 category
= self
._html
_search
_regex
(
1195 r
'(?s)<a[^<]+>(.*?)</a>', m_cat_container
, 'category',
1197 video_categories
= None if category
is None else [category
]
1199 video_categories
= None
1202 unescapeHTML(m
.group('content'))
1203 for m
in re
.finditer(self
._meta
_regex
('og:video:tag'), video_webpage
)]
1205 def _extract_count(count_name
):
1206 return str_to_int(self
._search
_regex
(
1207 r
'-%s-button[^>]+><span[^>]+class="yt-uix-button-content"[^>]*>([\d,]+)</span>'
1208 % re
.escape(count_name
),
1209 video_webpage
, count_name
, default
=None))
1211 like_count
= _extract_count('like')
1212 dislike_count
= _extract_count('dislike')
1215 video_subtitles
= self
.extract_subtitles(video_id
, video_webpage
)
1216 automatic_captions
= self
.extract_automatic_captions(video_id
, video_webpage
)
1218 if 'length_seconds' not in video_info
:
1219 self
._downloader
.report_warning('unable to extract video duration')
1220 video_duration
= None
1222 video_duration
= int(compat_urllib_parse_unquote_plus(video_info
['length_seconds'][0]))
1225 video_annotations
= None
1226 if self
._downloader
.params
.get('writeannotations', False):
1227 video_annotations
= self
._extract
_annotations
(video_id
)
1229 def _map_to_format_list(urlmap
):
1231 for itag
, video_real_url
in urlmap
.items():
1234 'url': video_real_url
,
1235 'player_url': player_url
,
1237 if itag
in self
._formats
:
1238 dct
.update(self
._formats
[itag
])
1242 if 'conn' in video_info
and video_info
['conn'][0].startswith('rtmp'):
1243 self
.report_rtmp_download()
1245 'format_id': '_rtmp',
1247 'url': video_info
['conn'][0],
1248 'player_url': player_url
,
1250 elif len(video_info
.get('url_encoded_fmt_stream_map', [''])[0]) >= 1 or len(video_info
.get('adaptive_fmts', [''])[0]) >= 1:
1251 encoded_url_map
= video_info
.get('url_encoded_fmt_stream_map', [''])[0] + ',' + video_info
.get('adaptive_fmts', [''])[0]
1252 if 'rtmpe%3Dyes' in encoded_url_map
:
1253 raise ExtractorError('rtmpe downloads are not supported, see https://github.com/rg3/youtube-dl/issues/343 for more information.', expected
=True)
1255 for url_data_str
in encoded_url_map
.split(','):
1256 url_data
= compat_parse_qs(url_data_str
)
1257 if 'itag' not in url_data
or 'url' not in url_data
:
1259 format_id
= url_data
['itag'][0]
1260 url
= url_data
['url'][0]
1262 if 'sig' in url_data
:
1263 url
+= '&signature=' + url_data
['sig'][0]
1264 elif 's' in url_data
:
1265 encrypted_sig
= url_data
['s'][0]
1266 ASSETS_RE
= r
'"assets":.+?"js":\s*("[^"]+")'
1268 jsplayer_url_json
= self
._search
_regex
(
1270 embed_webpage
if age_gate
else video_webpage
,
1271 'JS player URL (1)', default
=None)
1272 if not jsplayer_url_json
and not age_gate
:
1273 # We need the embed website after all
1274 if embed_webpage
is None:
1275 embed_url
= proto
+ '://www.youtube.com/embed/%s' % video_id
1276 embed_webpage
= self
._download
_webpage
(
1277 embed_url
, video_id
, 'Downloading embed webpage')
1278 jsplayer_url_json
= self
._search
_regex
(
1279 ASSETS_RE
, embed_webpage
, 'JS player URL')
1281 player_url
= json
.loads(jsplayer_url_json
)
1282 if player_url
is None:
1283 player_url_json
= self
._search
_regex
(
1284 r
'ytplayer\.config.*?"url"\s*:\s*("[^"]+")',
1285 video_webpage
, 'age gate player URL')
1286 player_url
= json
.loads(player_url_json
)
1288 if self
._downloader
.params
.get('verbose'):
1289 if player_url
is None:
1290 player_version
= 'unknown'
1291 player_desc
= 'unknown'
1293 if player_url
.endswith('swf'):
1294 player_version
= self
._search
_regex
(
1295 r
'-(.+?)(?:/watch_as3)?\.swf$', player_url
,
1296 'flash player', fatal
=False)
1297 player_desc
= 'flash player %s' % player_version
1299 player_version
= self
._search
_regex
(
1300 r
'html5player-([^/]+?)(?:/html5player)?\.js',
1302 'html5 player', fatal
=False)
1303 player_desc
= 'html5 player %s' % player_version
1305 parts_sizes
= self
._signature
_cache
_id
(encrypted_sig
)
1306 self
.to_screen('{%s} signature length %s, %s' %
1307 (format_id
, parts_sizes
, player_desc
))
1309 signature
= self
._decrypt
_signature
(
1310 encrypted_sig
, video_id
, player_url
, age_gate
)
1311 url
+= '&signature=' + signature
1312 if 'ratebypass' not in url
:
1313 url
+= '&ratebypass=yes'
1314 url_map
[format_id
] = url
1315 formats
= _map_to_format_list(url_map
)
1316 elif video_info
.get('hlsvp'):
1317 manifest_url
= video_info
['hlsvp'][0]
1318 url_map
= self
._extract
_from
_m
3u8(manifest_url
, video_id
)
1319 formats
= _map_to_format_list(url_map
)
1321 raise ExtractorError('no conn, hlsvp or url_encoded_fmt_stream_map information found in video info')
1323 # Look for the DASH manifest
1324 if self
._downloader
.params
.get('youtube_include_dash_manifest', True):
1325 dash_mpd_fatal
= True
1326 for dash_manifest_url
in dash_mpds
:
1329 for df
in self
._parse
_dash
_manifest
(
1330 video_id
, dash_manifest_url
, player_url
, age_gate
, dash_mpd_fatal
):
1331 # Do not overwrite DASH format found in some previous DASH manifest
1332 if df
['format_id'] not in dash_formats
:
1333 dash_formats
[df
['format_id']] = df
1334 # Additional DASH manifests may end up in HTTP Error 403 therefore
1335 # allow them to fail without bug report message if we already have
1336 # some DASH manifest succeeded. This is temporary workaround to reduce
1337 # burst of bug reports until we figure out the reason and whether it
1338 # can be fixed at all.
1339 dash_mpd_fatal
= False
1340 except (ExtractorError
, KeyError) as e
:
1341 self
.report_warning(
1342 'Skipping DASH manifest: %r' % e
, video_id
)
1344 # Remove the formats we found through non-DASH, they
1345 # contain less info and it can be wrong, because we use
1346 # fixed values (for example the resolution). See
1347 # https://github.com/rg3/youtube-dl/issues/5774 for an
1349 formats
= [f
for f
in formats
if f
['format_id'] not in dash_formats
.keys()]
1350 formats
.extend(dash_formats
.values())
1352 # Check for malformed aspect ratio
1353 stretched_m
= re
.search(
1354 r
'<meta\s+property="og:video:tag".*?content="yt:stretch=(?P<w>[0-9]+):(?P<h>[0-9]+)">',
1357 ratio
= float(stretched_m
.group('w')) / float(stretched_m
.group('h'))
1359 if f
.get('vcodec') != 'none':
1360 f
['stretched_ratio'] = ratio
1362 self
._sort
_formats
(formats
)
1366 'uploader': video_uploader
,
1367 'uploader_id': video_uploader_id
,
1368 'upload_date': upload_date
,
1369 'title': video_title
,
1370 'thumbnail': video_thumbnail
,
1371 'description': video_description
,
1372 'categories': video_categories
,
1374 'subtitles': video_subtitles
,
1375 'automatic_captions': automatic_captions
,
1376 'duration': video_duration
,
1377 'age_limit': 18 if age_gate
else 0,
1378 'annotations': video_annotations
,
1379 'webpage_url': proto
+ '://www.youtube.com/watch?v=%s' % video_id
,
1380 'view_count': view_count
,
1381 'like_count': like_count
,
1382 'dislike_count': dislike_count
,
1383 'average_rating': float_or_none(video_info
.get('avg_rating', [None])[0]),
1386 'start_time': start_time
,
1387 'end_time': end_time
,
1391 class YoutubePlaylistIE(YoutubeBaseInfoExtractor
):
1392 IE_DESC
= 'YouTube.com playlists'
1393 _VALID_URL
= r
"""(?x)(?:
1398 (?:course|view_play_list|my_playlists|artist|playlist|watch|embed/videoseries)
1399 \? (?:.*?&)*? (?:p|a|list)=
1403 (?:PL|LL|EC|UU|FL|RD|UL)?[0-9A-Za-z-_]{10,}
1404 # Top tracks, they can also include dots
1409 ((?:PL|LL|EC|UU|FL|RD|UL)[0-9A-Za-z-_]{10,})
1411 _TEMPLATE_URL
= 'https://www.youtube.com/playlist?list=%s'
1412 _VIDEO_RE
= r
'href="\s*/watch\?v=(?P<id>[0-9A-Za-z_-]{11})&[^"]*?index=(?P<index>\d+)'
1413 IE_NAME
= 'youtube:playlist'
1415 'url': 'https://www.youtube.com/playlist?list=PLwiyx1dc3P2JR9N8gQaQN_BCvlSlap7re',
1417 'title': 'ytdl test PL',
1418 'id': 'PLwiyx1dc3P2JR9N8gQaQN_BCvlSlap7re',
1420 'playlist_count': 3,
1422 'url': 'https://www.youtube.com/playlist?list=PLtPgu7CB4gbZDA7i_euNxn75ISqxwZPYx',
1424 'id': 'PLtPgu7CB4gbZDA7i_euNxn75ISqxwZPYx',
1425 'title': 'YDL_Empty_List',
1427 'playlist_count': 0,
1429 'note': 'Playlist with deleted videos (#651). As a bonus, the video #51 is also twice in this list.',
1430 'url': 'https://www.youtube.com/playlist?list=PLwP_SiAcdui0KVebT0mU9Apz359a4ubsC',
1432 'title': '29C3: Not my department',
1433 'id': 'PLwP_SiAcdui0KVebT0mU9Apz359a4ubsC',
1435 'playlist_count': 95,
1437 'note': 'issue #673',
1438 'url': 'PLBB231211A4F62143',
1440 'title': '[OLD]Team Fortress 2 (Class-based LP)',
1441 'id': 'PLBB231211A4F62143',
1443 'playlist_mincount': 26,
1445 'note': 'Large playlist',
1446 'url': 'https://www.youtube.com/playlist?list=UUBABnxM4Ar9ten8Mdjj1j0Q',
1448 'title': 'Uploads from Cauchemar',
1449 'id': 'UUBABnxM4Ar9ten8Mdjj1j0Q',
1451 'playlist_mincount': 799,
1453 'url': 'PLtPgu7CB4gbY9oDN3drwC3cMbJggS7dKl',
1455 'title': 'YDL_safe_search',
1456 'id': 'PLtPgu7CB4gbY9oDN3drwC3cMbJggS7dKl',
1458 'playlist_count': 2,
1461 'url': 'http://www.youtube.com/embed/videoseries?list=PL6IaIsEjSbf96XFRuNccS_RuEXwNdsoEu',
1462 'playlist_count': 4,
1465 'id': 'PL6IaIsEjSbf96XFRuNccS_RuEXwNdsoEu',
1468 'note': 'Embedded SWF player',
1469 'url': 'http://www.youtube.com/p/YN5VISEtHet5D4NEvfTd0zcgFk84NqFZ?hl=en_US&fs=1&rel=0',
1470 'playlist_count': 4,
1473 'id': 'YN5VISEtHet5D4NEvfTd0zcgFk84NqFZ',
1476 'note': 'Buggy playlist: the webpage has a "Load more" button but it doesn\'t have more videos',
1477 'url': 'https://www.youtube.com/playlist?list=UUXw-G3eDE9trcvY2sBMM_aA',
1479 'title': 'Uploads from Interstellar Movie',
1480 'id': 'UUXw-G3eDE9trcvY2sBMM_aA',
1482 'playlist_mincout': 21,
1485 def _real_initialize(self
):
1488 def _extract_mix(self
, playlist_id
):
1489 # The mixes are generated from a single video
1490 # the id of the playlist is just 'RD' + video_id
1491 url
= 'https://youtube.com/watch?v=%s&list=%s' % (playlist_id
[-11:], playlist_id
)
1492 webpage
= self
._download
_webpage
(
1493 url
, playlist_id
, 'Downloading Youtube mix')
1494 search_title
= lambda class_name
: get_element_by_attribute('class', class_name
, webpage
)
1496 search_title('playlist-title') or
1497 search_title('title long-title') or
1498 search_title('title'))
1499 title
= clean_html(title_span
)
1500 ids
= orderedSet(re
.findall(
1501 r
'''(?xs)data-video-username=".*?".*?
1502 href="/watch\?v=([0-9A-Za-z_-]{11})&[^"]*?list=%s''' % re
.escape(playlist_id
),
1504 url_results
= self
._ids
_to
_results
(ids
)
1506 return self
.playlist_result(url_results
, playlist_id
, title
)
1508 def _extract_playlist(self
, playlist_id
):
1509 url
= self
._TEMPLATE
_URL
% playlist_id
1510 page
= self
._download
_webpage
(url
, playlist_id
)
1512 for match
in re
.findall(r
'<div class="yt-alert-message">([^<]+)</div>', page
):
1513 match
= match
.strip()
1514 # Check if the playlist exists or is private
1515 if re
.match(r
'[^<]*(The|This) playlist (does not exist|is private)[^<]*', match
):
1516 raise ExtractorError(
1517 'The playlist doesn\'t exist or is private, use --username or '
1518 '--netrc to access it.',
1520 elif re
.match(r
'[^<]*Invalid parameters[^<]*', match
):
1521 raise ExtractorError(
1522 'Invalid parameters. Maybe URL is incorrect.',
1524 elif re
.match(r
'[^<]*Choose your language[^<]*', match
):
1527 self
.report_warning('Youtube gives an alert message: ' + match
)
1529 # Extract the video ids from the playlist pages
1531 more_widget_html
= content_html
= page
1532 for page_num
in itertools
.count(1):
1533 matches
= re
.finditer(self
._VIDEO
_RE
, content_html
)
1534 # We remove the duplicates and the link with index 0
1535 # (it's not the first video of the playlist)
1536 new_ids
= orderedSet(m
.group('id') for m
in matches
if m
.group('index') != '0')
1537 for vid_id
in new_ids
:
1538 yield self
.url_result(vid_id
, 'Youtube', video_id
=vid_id
)
1540 mobj
= re
.search(r
'data-uix-load-more-href="/?(?P<more>[^"]+)"', more_widget_html
)
1544 more
= self
._download
_json
(
1545 'https://youtube.com/%s' % mobj
.group('more'), playlist_id
,
1546 'Downloading page #%s' % page_num
,
1547 transform_source
=uppercase_escape
)
1548 content_html
= more
['content_html']
1549 if not content_html
.strip():
1550 # Some webpages show a "Load more" button but they don't
1553 more_widget_html
= more
['load_more_widget_html']
1555 playlist_title
= self
._html
_search
_regex
(
1556 r
'(?s)<h1 class="pl-header-title[^"]*">\s*(.*?)\s*</h1>',
1559 return self
.playlist_result(_entries(), playlist_id
, playlist_title
)
1561 def _real_extract(self
, url
):
1562 # Extract playlist id
1563 mobj
= re
.match(self
._VALID
_URL
, url
)
1565 raise ExtractorError('Invalid URL: %s' % url
)
1566 playlist_id
= mobj
.group(1) or mobj
.group(2)
1568 # Check if it's a video-specific URL
1569 query_dict
= compat_urlparse
.parse_qs(compat_urlparse
.urlparse(url
).query
)
1570 if 'v' in query_dict
:
1571 video_id
= query_dict
['v'][0]
1572 if self
._downloader
.params
.get('noplaylist'):
1573 self
.to_screen('Downloading just video %s because of --no-playlist' % video_id
)
1574 return self
.url_result(video_id
, 'Youtube', video_id
=video_id
)
1576 self
.to_screen('Downloading playlist %s - add --no-playlist to just download video %s' % (playlist_id
, video_id
))
1578 if playlist_id
.startswith('RD') or playlist_id
.startswith('UL'):
1579 # Mixes require a custom extraction process
1580 return self
._extract
_mix
(playlist_id
)
1582 return self
._extract
_playlist
(playlist_id
)
1585 class YoutubeChannelIE(InfoExtractor
):
1586 IE_DESC
= 'YouTube.com channels'
1587 _VALID_URL
= r
'https?://(?:youtu\.be|(?:\w+\.)?youtube(?:-nocookie)?\.com)/channel/(?P<id>[0-9A-Za-z_-]+)'
1588 _TEMPLATE_URL
= 'https://www.youtube.com/channel/%s/videos'
1589 IE_NAME
= 'youtube:channel'
1591 'note': 'paginated channel',
1592 'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w',
1593 'playlist_mincount': 91,
1595 'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
1600 def extract_videos_from_page(page
):
1603 for mobj
in re
.finditer(r
'(?:title="(?P<title>[^"]+)"[^>]+)?href="/watch\?v=(?P<id>[0-9A-Za-z_-]+)&?', page
):
1604 video_id
= mobj
.group('id')
1605 video_title
= unescapeHTML(mobj
.group('title'))
1607 idx
= ids_in_page
.index(video_id
)
1608 if video_title
and not titles_in_page
[idx
]:
1609 titles_in_page
[idx
] = video_title
1611 ids_in_page
.append(video_id
)
1612 titles_in_page
.append(video_title
)
1613 return zip(ids_in_page
, titles_in_page
)
1615 def _real_extract(self
, url
):
1616 channel_id
= self
._match
_id
(url
)
1618 url
= self
._TEMPLATE
_URL
% channel_id
1620 # Channel by page listing is restricted to 35 pages of 30 items, i.e. 1050 videos total (see #5778)
1621 # Workaround by extracting as a playlist if managed to obtain channel playlist URL
1622 # otherwise fallback on channel by page extraction
1623 channel_page
= self
._download
_webpage
(
1624 url
+ '?view=57', channel_id
,
1625 'Downloading channel page', fatal
=False)
1626 channel_playlist_id
= self
._html
_search
_meta
(
1627 'channelId', channel_page
, 'channel id', default
=None)
1628 if not channel_playlist_id
:
1629 channel_playlist_id
= self
._search
_regex
(
1630 r
'data-channel-external-id="([^"]+)"',
1631 channel_page
, 'channel id', default
=None)
1632 if channel_playlist_id
and channel_playlist_id
.startswith('UC'):
1633 playlist_id
= 'UU' + channel_playlist_id
[2:]
1634 return self
.url_result(
1635 compat_urlparse
.urljoin(url
, '/playlist?list=%s' % playlist_id
), 'YoutubePlaylist')
1637 channel_page
= self
._download
_webpage
(url
, channel_id
, 'Downloading page #1')
1638 autogenerated
= re
.search(r
'''(?x)
1640 channel-header-autogenerated-label|
1641 yt-channel-title-autogenerated
1642 )[^"]*"''', channel_page
) is not None
1645 # The videos are contained in a single page
1646 # the ajax pages can't be used, they are empty
1649 video_id
, 'Youtube', video_id
=video_id
,
1650 video_title
=video_title
)
1651 for video_id
, video_title
in self
.extract_videos_from_page(channel_page
)]
1652 return self
.playlist_result(entries
, channel_id
)
1655 more_widget_html
= content_html
= channel_page
1656 for pagenum
in itertools
.count(1):
1658 for video_id
, video_title
in self
.extract_videos_from_page(content_html
):
1659 yield self
.url_result(
1660 video_id
, 'Youtube', video_id
=video_id
,
1661 video_title
=video_title
)
1664 r
'data-uix-load-more-href="/?(?P<more>[^"]+)"',
1669 more
= self
._download
_json
(
1670 'https://youtube.com/%s' % mobj
.group('more'), channel_id
,
1671 'Downloading page #%s' % (pagenum
+ 1),
1672 transform_source
=uppercase_escape
)
1673 content_html
= more
['content_html']
1674 more_widget_html
= more
['load_more_widget_html']
1676 return self
.playlist_result(_entries(), channel_id
)
1679 class YoutubeUserIE(YoutubeChannelIE
):
1680 IE_DESC
= 'YouTube.com user videos (URL or "ytuser" keyword)'
1681 _VALID_URL
= r
'(?:(?:(?:https?://)?(?:\w+\.)?youtube\.com/(?:user/)?(?!(?:attribution_link|watch|results)(?:$|[^a-z_A-Z0-9-])))|ytuser:)(?!feed/)(?P<id>[A-Za-z0-9_-]+)'
1682 _TEMPLATE_URL
= 'https://www.youtube.com/user/%s/videos'
1683 IE_NAME
= 'youtube:user'
1686 'url': 'https://www.youtube.com/user/TheLinuxFoundation',
1687 'playlist_mincount': 320,
1689 'title': 'TheLinuxFoundation',
1692 'url': 'ytuser:phihag',
1693 'only_matching': True,
1697 def suitable(cls
, url
):
1698 # Don't return True if the url can be extracted with other youtube
1699 # extractor, the regex would is too permissive and it would match.
1700 other_ies
= iter(klass
for (name
, klass
) in globals().items() if name
.endswith('IE') and klass
is not cls
)
1701 if any(ie
.suitable(url
) for ie
in other_ies
):
1704 return super(YoutubeUserIE
, cls
).suitable(url
)
1707 class YoutubeSearchIE(SearchInfoExtractor
, YoutubePlaylistIE
):
1708 IE_DESC
= 'YouTube.com searches'
1709 # there doesn't appear to be a real limit, for example if you search for
1710 # 'python' you get more than 8.000.000 results
1711 _MAX_RESULTS
= float('inf')
1712 IE_NAME
= 'youtube:search'
1713 _SEARCH_KEY
= 'ytsearch'
1714 _EXTRA_QUERY_ARGS
= {}
1717 def _get_n_results(self
, query
, n
):
1718 """Get a specified number of results for a query"""
1723 for pagenum
in itertools
.count(1):
1725 'search_query': query
.encode('utf-8'),
1729 url_query
.update(self
._EXTRA
_QUERY
_ARGS
)
1730 result_url
= 'https://www.youtube.com/results?' + compat_urllib_parse
.urlencode(url_query
)
1731 data
= self
._download
_json
(
1732 result_url
, video_id
='query "%s"' % query
,
1733 note
='Downloading page %s' % pagenum
,
1734 errnote
='Unable to download API page')
1735 html_content
= data
[1]['body']['content']
1737 if 'class="search-message' in html_content
:
1738 raise ExtractorError(
1739 '[youtube] No video results', expected
=True)
1741 new_videos
= self
._ids
_to
_results
(orderedSet(re
.findall(
1742 r
'href="/watch\?v=(.{11})', html_content
)))
1743 videos
+= new_videos
1744 if not new_videos
or len(videos
) > limit
:
1749 return self
.playlist_result(videos
, query
)
1752 class YoutubeSearchDateIE(YoutubeSearchIE
):
1753 IE_NAME
= YoutubeSearchIE
.IE_NAME
+ ':date'
1754 _SEARCH_KEY
= 'ytsearchdate'
1755 IE_DESC
= 'YouTube.com searches, newest videos first'
1756 _EXTRA_QUERY_ARGS
= {'search_sort': 'video_date_uploaded'}
1759 class YoutubeSearchURLIE(InfoExtractor
):
1760 IE_DESC
= 'YouTube.com search URLs'
1761 IE_NAME
= 'youtube:search_url'
1762 _VALID_URL
= r
'https?://(?:www\.)?youtube\.com/results\?(.*?&)?search_query=(?P<query>[^&]+)(?:[&]|$)'
1764 'url': 'https://www.youtube.com/results?baz=bar&search_query=youtube-dl+test+video&filters=video&lclk=video',
1765 'playlist_mincount': 5,
1767 'title': 'youtube-dl test video',
1771 def _real_extract(self
, url
):
1772 mobj
= re
.match(self
._VALID
_URL
, url
)
1773 query
= compat_urllib_parse_unquote_plus(mobj
.group('query'))
1775 webpage
= self
._download
_webpage
(url
, query
)
1776 result_code
= self
._search
_regex
(
1777 r
'(?s)<ol[^>]+class="item-section"(.*?)</ol>', webpage
, 'result HTML')
1779 part_codes
= re
.findall(
1780 r
'(?s)<h3 class="yt-lockup-title">(.*?)</h3>', result_code
)
1782 for part_code
in part_codes
:
1783 part_title
= self
._html
_search
_regex
(
1784 [r
'(?s)title="([^"]+)"', r
'>([^<]+)</a>'], part_code
, 'item title', fatal
=False)
1785 part_url_snippet
= self
._html
_search
_regex
(
1786 r
'(?s)href="([^"]+)"', part_code
, 'item URL')
1787 part_url
= compat_urlparse
.urljoin(
1788 'https://www.youtube.com/', part_url_snippet
)
1792 'title': part_title
,
1796 '_type': 'playlist',
1802 class YoutubeShowIE(InfoExtractor
):
1803 IE_DESC
= 'YouTube.com (multi-season) shows'
1804 _VALID_URL
= r
'https?://www\.youtube\.com/show/(?P<id>[^?#]*)'
1805 IE_NAME
= 'youtube:show'
1807 'url': 'http://www.youtube.com/show/airdisasters',
1808 'playlist_mincount': 3,
1810 'id': 'airdisasters',
1811 'title': 'Air Disasters',
1815 def _real_extract(self
, url
):
1816 mobj
= re
.match(self
._VALID
_URL
, url
)
1817 playlist_id
= mobj
.group('id')
1818 webpage
= self
._download
_webpage
(
1819 url
, playlist_id
, 'Downloading show webpage')
1820 # There's one playlist for each season of the show
1821 m_seasons
= list(re
.finditer(r
'href="(/playlist\?list=.*?)"', webpage
))
1822 self
.to_screen('%s: Found %s seasons' % (playlist_id
, len(m_seasons
)))
1825 'https://www.youtube.com' + season
.group(1), 'YoutubePlaylist')
1826 for season
in m_seasons
1828 title
= self
._og
_search
_title
(webpage
, fatal
=False)
1831 '_type': 'playlist',
1838 class YoutubeFeedsInfoExtractor(YoutubeBaseInfoExtractor
):
1840 Base class for feed extractors
1841 Subclasses must define the _FEED_NAME and _PLAYLIST_TITLE properties.
1843 _LOGIN_REQUIRED
= True
1847 return 'youtube:%s' % self
._FEED
_NAME
1849 def _real_initialize(self
):
1852 def _real_extract(self
, url
):
1853 page
= self
._download
_webpage
(
1854 'https://www.youtube.com/feed/%s' % self
._FEED
_NAME
, self
._PLAYLIST
_TITLE
)
1856 # The extraction process is the same as for playlists, but the regex
1857 # for the video ids doesn't contain an index
1859 more_widget_html
= content_html
= page
1860 for page_num
in itertools
.count(1):
1861 matches
= re
.findall(r
'href="\s*/watch\?v=([0-9A-Za-z_-]{11})', content_html
)
1863 # 'recommended' feed has infinite 'load more' and each new portion spins
1864 # the same videos in (sometimes) slightly different order, so we'll check
1865 # for unicity and break when portion has no new videos
1866 new_ids
= filter(lambda video_id
: video_id
not in ids
, orderedSet(matches
))
1872 mobj
= re
.search(r
'data-uix-load-more-href="/?(?P<more>[^"]+)"', more_widget_html
)
1876 more
= self
._download
_json
(
1877 'https://youtube.com/%s' % mobj
.group('more'), self
._PLAYLIST
_TITLE
,
1878 'Downloading page #%s' % page_num
,
1879 transform_source
=uppercase_escape
)
1880 content_html
= more
['content_html']
1881 more_widget_html
= more
['load_more_widget_html']
1883 return self
.playlist_result(
1884 self
._ids
_to
_results
(ids
), playlist_title
=self
._PLAYLIST
_TITLE
)
1887 class YoutubeWatchLaterIE(YoutubePlaylistIE
):
1888 IE_NAME
= 'youtube:watchlater'
1889 IE_DESC
= 'Youtube watch later list, ":ytwatchlater" for short (requires authentication)'
1890 _VALID_URL
= r
'https?://www\.youtube\.com/(?:feed/watch_later|playlist\?list=WL)|:ytwatchlater'
1892 _TESTS
= [] # override PlaylistIE tests
1894 def _real_extract(self
, url
):
1895 return self
._extract
_playlist
('WL')
1898 class YoutubeFavouritesIE(YoutubeBaseInfoExtractor
):
1899 IE_NAME
= 'youtube:favorites'
1900 IE_DESC
= 'YouTube.com favourite videos, ":ytfav" for short (requires authentication)'
1901 _VALID_URL
= r
'https?://www\.youtube\.com/my_favorites|:ytfav(?:ou?rites)?'
1902 _LOGIN_REQUIRED
= True
1904 def _real_extract(self
, url
):
1905 webpage
= self
._download
_webpage
('https://www.youtube.com/my_favorites', 'Youtube Favourites videos')
1906 playlist_id
= self
._search
_regex
(r
'list=(.+?)["&]', webpage
, 'favourites playlist id')
1907 return self
.url_result(playlist_id
, 'YoutubePlaylist')
1910 class YoutubeRecommendedIE(YoutubeFeedsInfoExtractor
):
1911 IE_DESC
= 'YouTube.com recommended videos, ":ytrec" for short (requires authentication)'
1912 _VALID_URL
= r
'https?://www\.youtube\.com/feed/recommended|:ytrec(?:ommended)?'
1913 _FEED_NAME
= 'recommended'
1914 _PLAYLIST_TITLE
= 'Youtube Recommended videos'
1917 class YoutubeSubscriptionsIE(YoutubeFeedsInfoExtractor
):
1918 IE_DESC
= 'YouTube.com subscriptions feed, "ytsubs" keyword (requires authentication)'
1919 _VALID_URL
= r
'https?://www\.youtube\.com/feed/subscriptions|:ytsubs(?:criptions)?'
1920 _FEED_NAME
= 'subscriptions'
1921 _PLAYLIST_TITLE
= 'Youtube Subscriptions'
1924 class YoutubeHistoryIE(YoutubeFeedsInfoExtractor
):
1925 IE_DESC
= 'Youtube watch history, ":ythistory" for short (requires authentication)'
1926 _VALID_URL
= 'https?://www\.youtube\.com/feed/history|:ythistory'
1927 _FEED_NAME
= 'history'
1928 _PLAYLIST_TITLE
= 'Youtube History'
1931 class YoutubeTruncatedURLIE(InfoExtractor
):
1932 IE_NAME
= 'youtube:truncated_url'
1933 IE_DESC
= False # Do not list
1934 _VALID_URL
= r
'''(?x)
1936 (?:\w+\.)?[yY][oO][uU][tT][uU][bB][eE](?:-nocookie)?\.com/
1939 annotation_id=annotation_[^&]+|
1944 attribution_link\?a=[^&]+
1950 'url': 'http://www.youtube.com/watch?annotation_id=annotation_3951667041',
1951 'only_matching': True,
1953 'url': 'http://www.youtube.com/watch?',
1954 'only_matching': True,
1956 'url': 'https://www.youtube.com/watch?x-yt-cl=84503534',
1957 'only_matching': True,
1959 'url': 'https://www.youtube.com/watch?feature=foo',
1960 'only_matching': True,
1962 'url': 'https://www.youtube.com/watch?hl=en-GB',
1963 'only_matching': True,
1966 def _real_extract(self
, url
):
1967 raise ExtractorError(
1968 'Did you forget to quote the URL? Remember that & is a meta '
1969 'character in most shells, so you want to put the URL in quotes, '
1971 '"http://www.youtube.com/watch?feature=foo&v=BaW_jenozKc" '
1972 ' or simply youtube-dl BaW_jenozKc .',
1976 class YoutubeTruncatedIDIE(InfoExtractor
):
1977 IE_NAME
= 'youtube:truncated_id'
1978 IE_DESC
= False # Do not list
1979 _VALID_URL
= r
'https?://(?:www\.)?youtube\.com/watch\?v=(?P<id>[0-9A-Za-z_-]{1,10})$'
1982 'url': 'https://www.youtube.com/watch?v=N_708QY7Ob',
1983 'only_matching': True,
1986 def _real_extract(self
, url
):
1987 video_id
= self
._match
_id
(url
)
1988 raise ExtractorError(
1989 'Incomplete YouTube ID %s. URL %s looks truncated.' % (video_id
, url
),