]> jfr.im git - yt-dlp.git/blame - youtube_dl/extractor/youtube.py
[youtube] Set 'is_live'
[yt-dlp.git] / youtube_dl / extractor / youtube.py
CommitLineData
c5e8d7af 1# coding: utf-8
c5e8d7af 2
78caa52a
PH
3from __future__ import unicode_literals
4
5
0ca96d48 6import itertools
c5e8d7af 7import json
c4417ddb 8import os.path
c5e8d7af 9import re
42939b61 10import time
e0df6211 11import traceback
c5e8d7af 12
b05654f0 13from .common import InfoExtractor, SearchInfoExtractor
2b25cb5d 14from ..jsinterp import JSInterpreter
54256267 15from ..swfinterp import SWFInterpreter
4bb4a188 16from ..compat import (
edf3e38e 17 compat_chr,
c5e8d7af 18 compat_parse_qs,
c5e8d7af 19 compat_urllib_parse,
7fd002c0
S
20 compat_urllib_parse_unquote,
21 compat_urllib_parse_unquote_plus,
c5e8d7af 22 compat_urllib_request,
7c61bd36 23 compat_urlparse,
c5e8d7af 24 compat_str,
4bb4a188
PH
25)
26from ..utils import (
c5e8d7af 27 clean_html,
c5e8d7af 28 ExtractorError,
2d30521a 29 float_or_none,
4bb4a188
PH
30 get_element_by_attribute,
31 get_element_by_id,
dd27fd17 32 int_or_none,
4bb4a188 33 orderedSet,
c93d53f5 34 str_to_int,
c5e8d7af
PH
35 unescapeHTML,
36 unified_strdate,
81c2f20b 37 uppercase_escape,
af214c3a 38 ISO3166Utils,
c5e8d7af
PH
39)
40
5f6a1245 41
de7f3446 42class YoutubeBaseInfoExtractor(InfoExtractor):
b2e8bc1b
JMF
43 """Provide base functions for Youtube extractors"""
44 _LOGIN_URL = 'https://accounts.google.com/ServiceLogin'
83317f69 45 _TWOFACTOR_URL = 'https://accounts.google.com/SecondFactor'
b2e8bc1b
JMF
46 _NETRC_MACHINE = 'youtube'
47 # If True it will raise an error if no login info is provided
48 _LOGIN_REQUIRED = False
49
b2e8bc1b 50 def _set_language(self):
810fb84d
PH
51 self._set_cookie(
52 '.youtube.com', 'PREF', 'f1=50000000&hl=en',
42939b61 53 # YouTube sets the expire time to about two months
810fb84d 54 expire_time=time.time() + 2 * 30 * 24 * 3600)
b2e8bc1b 55
25f14e9f
S
56 def _ids_to_results(self, ids):
57 return [
58 self.url_result(vid_id, 'Youtube', video_id=vid_id)
59 for vid_id in ids]
60
b2e8bc1b 61 def _login(self):
83317f69 62 """
63 Attempt to log in to YouTube.
64 True is returned if successful or skipped.
65 False is returned if login failed.
66
67 If _LOGIN_REQUIRED is set and no authentication was provided, an error is raised.
68 """
b2e8bc1b
JMF
69 (username, password) = self._get_login_info()
70 # No authentication to be performed
71 if username is None:
72 if self._LOGIN_REQUIRED:
69ea8ca4 73 raise ExtractorError('No login info available, needed for using %s.' % self.IE_NAME, expected=True)
83317f69 74 return True
b2e8bc1b 75
7cc3570e
PH
76 login_page = self._download_webpage(
77 self._LOGIN_URL, None,
69ea8ca4
PH
78 note='Downloading login page',
79 errnote='unable to fetch login page', fatal=False)
7cc3570e
PH
80 if login_page is False:
81 return
b2e8bc1b 82
795f28f8 83 galx = self._search_regex(r'(?s)<input.+?name="GALX".+?value="(.+?)"',
78caa52a 84 login_page, 'Login GALX parameter')
c5e8d7af 85
b2e8bc1b
JMF
86 # Log in
87 login_form_strs = {
8bcc8756
JW
88 'continue': 'https://www.youtube.com/signin?action_handle_signin=true&feature=sign_in_button&hl=en_US&nomobiletemp=1',
89 'Email': username,
90 'GALX': galx,
91 'Passwd': password,
92
93 'PersistentCookie': 'yes',
94 '_utf8': '霱',
95 'bgresponse': 'js_disabled',
96 'checkConnection': '',
97 'checkedDomains': 'youtube',
98 'dnConn': '',
99 'pstMsg': '0',
100 'rmShown': '1',
101 'secTok': '',
102 'signIn': 'Sign in',
103 'timeStmp': '',
104 'service': 'youtube',
105 'uilel': '3',
106 'hl': 'en_US',
b2e8bc1b 107 }
83317f69 108
b2e8bc1b
JMF
109 # Convert to UTF-8 *before* urlencode because Python 2.x's urlencode
110 # chokes on unicode
5f6a1245 111 login_form = dict((k.encode('utf-8'), v.encode('utf-8')) for k, v in login_form_strs.items())
b2e8bc1b 112 login_data = compat_urllib_parse.urlencode(login_form).encode('ascii')
7cc3570e
PH
113
114 req = compat_urllib_request.Request(self._LOGIN_URL, login_data)
115 login_results = self._download_webpage(
116 req, None,
69ea8ca4 117 note='Logging in', errnote='unable to log in', fatal=False)
7cc3570e
PH
118 if login_results is False:
119 return False
83317f69 120
121 if re.search(r'id="errormsg_0_Passwd"', login_results) is not None:
69ea8ca4 122 raise ExtractorError('Please use your account password and a two-factor code instead of an application-specific password.', expected=True)
83317f69 123
124 # Two-Factor
125 # TODO add SMS and phone call support - these require making a request and then prompting the user
126
127 if re.search(r'(?i)<form[^>]* id="gaia_secondfactorform"', login_results) is not None:
128 tfa_code = self._get_tfa_info()
129
130 if tfa_code is None:
69ea8ca4
PH
131 self._downloader.report_warning('Two-factor authentication required. Provide it with --twofactor <code>')
132 self._downloader.report_warning('(Note that only TOTP (Google Authenticator App) codes work at this time.)')
83317f69 133 return False
134
135 # Unlike the first login form, secTok and timeStmp are both required for the TFA form
136
137 match = re.search(r'id="secTok"\n\s+value=\'(.+)\'/>', login_results, re.M | re.U)
138 if match is None:
69ea8ca4 139 self._downloader.report_warning('Failed to get secTok - did the page structure change?')
83317f69 140 secTok = match.group(1)
141 match = re.search(r'id="timeStmp"\n\s+value=\'(.+)\'/>', login_results, re.M | re.U)
142 if match is None:
69ea8ca4 143 self._downloader.report_warning('Failed to get timeStmp - did the page structure change?')
83317f69 144 timeStmp = match.group(1)
145
146 tfa_form_strs = {
78caa52a
PH
147 'continue': 'https://www.youtube.com/signin?action_handle_signin=true&feature=sign_in_button&hl=en_US&nomobiletemp=1',
148 'smsToken': '',
149 'smsUserPin': tfa_code,
150 'smsVerifyPin': 'Verify',
151
152 'PersistentCookie': 'yes',
153 'checkConnection': '',
154 'checkedDomains': 'youtube',
155 'pstMsg': '1',
156 'secTok': secTok,
157 'timeStmp': timeStmp,
158 'service': 'youtube',
159 'hl': 'en_US',
83317f69 160 }
5f6a1245 161 tfa_form = dict((k.encode('utf-8'), v.encode('utf-8')) for k, v in tfa_form_strs.items())
83317f69 162 tfa_data = compat_urllib_parse.urlencode(tfa_form).encode('ascii')
163
164 tfa_req = compat_urllib_request.Request(self._TWOFACTOR_URL, tfa_data)
165 tfa_results = self._download_webpage(
166 tfa_req, None,
69ea8ca4 167 note='Submitting TFA code', errnote='unable to submit tfa', fatal=False)
83317f69 168
169 if tfa_results is False:
170 return False
171
172 if re.search(r'(?i)<form[^>]* id="gaia_secondfactorform"', tfa_results) is not None:
69ea8ca4 173 self._downloader.report_warning('Two-factor code expired. Please try again, or use a one-use backup code instead.')
83317f69 174 return False
175 if re.search(r'(?i)<form[^>]* id="gaia_loginform"', tfa_results) is not None:
69ea8ca4 176 self._downloader.report_warning('unable to log in - did the page structure change?')
83317f69 177 return False
178 if re.search(r'smsauth-interstitial-reviewsettings', tfa_results) is not None:
69ea8ca4 179 self._downloader.report_warning('Your Google account has a security notice. Please log in on your web browser, resolve the notice, and try again.')
83317f69 180 return False
181
7cc3570e 182 if re.search(r'(?i)<form[^>]* id="gaia_loginform"', login_results) is not None:
69ea8ca4 183 self._downloader.report_warning('unable to log in: bad username or password')
b2e8bc1b
JMF
184 return False
185 return True
186
b2e8bc1b
JMF
187 def _real_initialize(self):
188 if self._downloader is None:
189 return
42939b61 190 self._set_language()
b2e8bc1b
JMF
191 if not self._login():
192 return
c5e8d7af 193
8377574c 194
360e1ca5 195class YoutubeIE(YoutubeBaseInfoExtractor):
78caa52a 196 IE_DESC = 'YouTube.com'
cb7dfeea 197 _VALID_URL = r"""(?x)^
c5e8d7af 198 (
edb53e2d 199 (?:https?://|//) # http(s):// or protocol-independent URL
cb7dfeea 200 (?:(?:(?:(?:\w+\.)?[yY][oO][uU][tT][uU][bB][eE](?:-nocookie)?\.com/|
484aaeb2 201 (?:www\.)?deturl\.com/www\.youtube\.com/|
e70dc1d1 202 (?:www\.)?pwnyoutube\.com/|
f7000f3a 203 (?:www\.)?yourepeat\.com/|
e69ae5b9
JMF
204 tube\.majestyc\.net/|
205 youtube\.googleapis\.com/) # the various hostnames, with wildcard subdomains
c5e8d7af
PH
206 (?:.*?\#/)? # handle anchor (#/) redirect urls
207 (?: # the various things that can precede the ID:
ac7553d0 208 (?:(?:v|embed|e)/(?!videoseries)) # v/ or embed/ or e/
c5e8d7af 209 |(?: # or the v= param in all its forms
f7000f3a 210 (?:(?:watch|movie)(?:_popup)?(?:\.php)?/?)? # preceding watch(_popup|.php) or nothing (like /?v=xxxx)
c5e8d7af
PH
211 (?:\?|\#!?) # the params delimiter ? or # or #!
212 (?:.*?&)? # any other preceding param (like /?s=tuff&v=xxxx)
213 v=
214 )
f4b05232
JMF
215 ))
216 |youtu\.be/ # just youtu.be/xxxx
edb53e2d 217 |(?:www\.)?cleanvideosearch\.com/media/action/yt/watch\?videoId=
f4b05232 218 )
c5e8d7af 219 )? # all until now is optional -> you can pass the naked ID
8963d9c2 220 ([0-9A-Za-z_-]{11}) # here is it! the YouTube video ID
9291475f 221 (?!.*?&list=) # combined list/video URLs are handled by the playlist IE
c5e8d7af
PH
222 (?(1).+)? # if we found the ID, everything can follow
223 $"""
c5e8d7af 224 _NEXT_URL_RE = r'[\?&]next_url=([^&]+)'
2c62dc26
PH
225 _formats = {
226 '5': {'ext': 'flv', 'width': 400, 'height': 240},
227 '6': {'ext': 'flv', 'width': 450, 'height': 270},
228 '13': {'ext': '3gp'},
229 '17': {'ext': '3gp', 'width': 176, 'height': 144},
230 '18': {'ext': 'mp4', 'width': 640, 'height': 360},
231 '22': {'ext': 'mp4', 'width': 1280, 'height': 720},
232 '34': {'ext': 'flv', 'width': 640, 'height': 360},
233 '35': {'ext': 'flv', 'width': 854, 'height': 480},
234 '36': {'ext': '3gp', 'width': 320, 'height': 240},
235 '37': {'ext': 'mp4', 'width': 1920, 'height': 1080},
236 '38': {'ext': 'mp4', 'width': 4096, 'height': 3072},
237 '43': {'ext': 'webm', 'width': 640, 'height': 360},
238 '44': {'ext': 'webm', 'width': 854, 'height': 480},
239 '45': {'ext': 'webm', 'width': 1280, 'height': 720},
240 '46': {'ext': 'webm', 'width': 1920, 'height': 1080},
c9bebed2
S
241 '59': {'ext': 'mp4', 'width': 854, 'height': 480},
242 '78': {'ext': 'mp4', 'width': 854, 'height': 480},
2c62dc26 243
1d043b93 244
86fe61c8 245 # 3d videos
43b81eb9
PH
246 '82': {'ext': 'mp4', 'height': 360, 'format_note': '3D', 'preference': -20},
247 '83': {'ext': 'mp4', 'height': 480, 'format_note': '3D', 'preference': -20},
248 '84': {'ext': 'mp4', 'height': 720, 'format_note': '3D', 'preference': -20},
249 '85': {'ext': 'mp4', 'height': 1080, 'format_note': '3D', 'preference': -20},
250 '100': {'ext': 'webm', 'height': 360, 'format_note': '3D', 'preference': -20},
251 '101': {'ext': 'webm', 'height': 480, 'format_note': '3D', 'preference': -20},
252 '102': {'ext': 'webm', 'height': 720, 'format_note': '3D', 'preference': -20},
836a086c 253
96fb5605 254 # Apple HTTP Live Streaming
43b81eb9
PH
255 '92': {'ext': 'mp4', 'height': 240, 'format_note': 'HLS', 'preference': -10},
256 '93': {'ext': 'mp4', 'height': 360, 'format_note': 'HLS', 'preference': -10},
257 '94': {'ext': 'mp4', 'height': 480, 'format_note': 'HLS', 'preference': -10},
258 '95': {'ext': 'mp4', 'height': 720, 'format_note': 'HLS', 'preference': -10},
259 '96': {'ext': 'mp4', 'height': 1080, 'format_note': 'HLS', 'preference': -10},
260 '132': {'ext': 'mp4', 'height': 240, 'format_note': 'HLS', 'preference': -10},
261 '151': {'ext': 'mp4', 'height': 72, 'format_note': 'HLS', 'preference': -10},
2c62dc26
PH
262
263 # DASH mp4 video
43b81eb9
PH
264 '133': {'ext': 'mp4', 'height': 240, 'format_note': 'DASH video', 'acodec': 'none', 'preference': -40},
265 '134': {'ext': 'mp4', 'height': 360, 'format_note': 'DASH video', 'acodec': 'none', 'preference': -40},
266 '135': {'ext': 'mp4', 'height': 480, 'format_note': 'DASH video', 'acodec': 'none', 'preference': -40},
267 '136': {'ext': 'mp4', 'height': 720, 'format_note': 'DASH video', 'acodec': 'none', 'preference': -40},
268 '137': {'ext': 'mp4', 'height': 1080, 'format_note': 'DASH video', 'acodec': 'none', 'preference': -40},
e65566a9 269 '138': {'ext': 'mp4', 'format_note': 'DASH video', 'acodec': 'none', 'preference': -40}, # Height can vary (https://github.com/rg3/youtube-dl/issues/4559)
43b81eb9
PH
270 '160': {'ext': 'mp4', 'height': 144, 'format_note': 'DASH video', 'acodec': 'none', 'preference': -40},
271 '264': {'ext': 'mp4', 'height': 1440, 'format_note': 'DASH video', 'acodec': 'none', 'preference': -40},
0d2c1418
PH
272 '298': {'ext': 'mp4', 'height': 720, 'format_note': 'DASH video', 'acodec': 'none', 'preference': -40, 'fps': 60, 'vcodec': 'h264'},
273 '299': {'ext': 'mp4', 'height': 1080, 'format_note': 'DASH video', 'acodec': 'none', 'preference': -40, 'fps': 60, 'vcodec': 'h264'},
274 '266': {'ext': 'mp4', 'height': 2160, 'format_note': 'DASH video', 'acodec': 'none', 'preference': -40, 'vcodec': 'h264'},
836a086c 275
f6f1fc92 276 # Dash mp4 audio
62cd676c
PH
277 '139': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'vcodec': 'none', 'abr': 48, 'preference': -50, 'container': 'm4a_dash'},
278 '140': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'vcodec': 'none', 'abr': 128, 'preference': -50, 'container': 'm4a_dash'},
279 '141': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'vcodec': 'none', 'abr': 256, 'preference': -50, 'container': 'm4a_dash'},
836a086c
AZ
280
281 # Dash webm
e75cafe9
A
282 '167': {'ext': 'webm', 'height': 360, 'width': 640, 'format_note': 'DASH video', 'acodec': 'none', 'container': 'webm', 'vcodec': 'VP8', 'preference': -40},
283 '168': {'ext': 'webm', 'height': 480, 'width': 854, 'format_note': 'DASH video', 'acodec': 'none', 'container': 'webm', 'vcodec': 'VP8', 'preference': -40},
284 '169': {'ext': 'webm', 'height': 720, 'width': 1280, 'format_note': 'DASH video', 'acodec': 'none', 'container': 'webm', 'vcodec': 'VP8', 'preference': -40},
285 '170': {'ext': 'webm', 'height': 1080, 'width': 1920, 'format_note': 'DASH video', 'acodec': 'none', 'container': 'webm', 'vcodec': 'VP8', 'preference': -40},
286 '218': {'ext': 'webm', 'height': 480, 'width': 854, 'format_note': 'DASH video', 'acodec': 'none', 'container': 'webm', 'vcodec': 'VP8', 'preference': -40},
287 '219': {'ext': 'webm', 'height': 480, 'width': 854, 'format_note': 'DASH video', 'acodec': 'none', 'container': 'webm', 'vcodec': 'VP8', 'preference': -40},
1cc887cb 288 '278': {'ext': 'webm', 'height': 144, 'format_note': 'DASH video', 'acodec': 'none', 'preference': -40, 'container': 'webm', 'vcodec': 'VP9'},
e75cafe9
A
289 '242': {'ext': 'webm', 'height': 240, 'format_note': 'DASH video', 'acodec': 'none', 'preference': -40},
290 '243': {'ext': 'webm', 'height': 360, 'format_note': 'DASH video', 'acodec': 'none', 'preference': -40},
291 '244': {'ext': 'webm', 'height': 480, 'format_note': 'DASH video', 'acodec': 'none', 'preference': -40},
292 '245': {'ext': 'webm', 'height': 480, 'format_note': 'DASH video', 'acodec': 'none', 'preference': -40},
293 '246': {'ext': 'webm', 'height': 480, 'format_note': 'DASH video', 'acodec': 'none', 'preference': -40},
294 '247': {'ext': 'webm', 'height': 720, 'format_note': 'DASH video', 'acodec': 'none', 'preference': -40},
295 '248': {'ext': 'webm', 'height': 1080, 'format_note': 'DASH video', 'acodec': 'none', 'preference': -40},
3c80377b 296 '271': {'ext': 'webm', 'height': 1440, 'format_note': 'DASH video', 'acodec': 'none', 'preference': -40},
18061bba 297 '272': {'ext': 'webm', 'height': 2160, 'format_note': 'DASH video', 'acodec': 'none', 'preference': -40},
a51d3aa0
PH
298 '302': {'ext': 'webm', 'height': 720, 'format_note': 'DASH video', 'acodec': 'none', 'preference': -40, 'fps': 60, 'vcodec': 'VP9'},
299 '303': {'ext': 'webm', 'height': 1080, 'format_note': 'DASH video', 'acodec': 'none', 'preference': -40, 'fps': 60, 'vcodec': 'VP9'},
76b3c610 300 '308': {'ext': 'webm', 'height': 1440, 'format_note': 'DASH video', 'acodec': 'none', 'preference': -40, 'fps': 60, 'vcodec': 'VP9'},
10a404c3 301 '313': {'ext': 'webm', 'height': 2160, 'format_note': 'DASH video', 'acodec': 'none', 'preference': -40, 'vcodec': 'VP9'},
76b3c610 302 '315': {'ext': 'webm', 'height': 2160, 'format_note': 'DASH video', 'acodec': 'none', 'preference': -40, 'fps': 60, 'vcodec': 'VP9'},
2c62dc26
PH
303
304 # Dash webm audio
55db73ef 305 '171': {'ext': 'webm', 'vcodec': 'none', 'format_note': 'DASH audio', 'abr': 128, 'preference': -50},
e75cafe9 306 '172': {'ext': 'webm', 'vcodec': 'none', 'format_note': 'DASH audio', 'abr': 256, 'preference': -50},
ce6b9a2d 307
0857baad
PH
308 # Dash webm audio with opus inside
309 '249': {'ext': 'webm', 'vcodec': 'none', 'format_note': 'DASH audio', 'acodec': 'opus', 'abr': 50, 'preference': -50},
310 '250': {'ext': 'webm', 'vcodec': 'none', 'format_note': 'DASH audio', 'acodec': 'opus', 'abr': 70, 'preference': -50},
311 '251': {'ext': 'webm', 'vcodec': 'none', 'format_note': 'DASH audio', 'acodec': 'opus', 'abr': 160, 'preference': -50},
312
ce6b9a2d
PH
313 # RTMP (unnamed)
314 '_rtmp': {'protocol': 'rtmp'},
c5e8d7af 315 }
836a086c 316
78caa52a 317 IE_NAME = 'youtube'
2eb88d95
PH
318 _TESTS = [
319 {
4bc3a23e
PH
320 'url': 'http://www.youtube.com/watch?v=BaW_jenozKc',
321 'info_dict': {
322 'id': 'BaW_jenozKc',
323 'ext': 'mp4',
324 'title': 'youtube-dl test video "\'/\\ä↭𝕐',
325 'uploader': 'Philipp Hagemeister',
326 'uploader_id': 'phihag',
327 'upload_date': '20121002',
328 'description': 'test chars: "\'/\\ä↭𝕐\ntest URL: https://github.com/rg3/youtube-dl/issues/1892\n\nThis is a test video for youtube-dl.\n\nFor more information, contact phihag@phihag.de .',
329 'categories': ['Science & Technology'],
3e7c1224
PH
330 'like_count': int,
331 'dislike_count': int,
2eb88d95 332 }
0e853ca4 333 },
0e853ca4 334 {
4bc3a23e
PH
335 'url': 'http://www.youtube.com/watch?v=UxxajLWwzqY',
336 'note': 'Test generic use_cipher_signature video (#897)',
337 'info_dict': {
338 'id': 'UxxajLWwzqY',
339 'ext': 'mp4',
340 'upload_date': '20120506',
341 'title': 'Icona Pop - I Love It (feat. Charli XCX) [OFFICIAL VIDEO]',
342 'description': 'md5:fea86fda2d5a5784273df5c7cc994d9f',
343 'uploader': 'Icona Pop',
344 'uploader_id': 'IconaPop',
2eb88d95 345 }
c108eb73
JMF
346 },
347 {
4bc3a23e
PH
348 'url': 'https://www.youtube.com/watch?v=07FYdnEawAQ',
349 'note': 'Test VEVO video with age protection (#956)',
350 'info_dict': {
351 'id': '07FYdnEawAQ',
352 'ext': 'mp4',
353 'upload_date': '20130703',
354 'title': 'Justin Timberlake - Tunnel Vision (Explicit)',
355 'description': 'md5:64249768eec3bc4276236606ea996373',
356 'uploader': 'justintimberlakeVEVO',
357 'uploader_id': 'justintimberlakeVEVO',
c108eb73
JMF
358 }
359 },
fccd3771 360 {
4bc3a23e
PH
361 'url': '//www.YouTube.com/watch?v=yZIXLfi8CZQ',
362 'note': 'Embed-only video (#1746)',
363 'info_dict': {
364 'id': 'yZIXLfi8CZQ',
365 'ext': 'mp4',
366 'upload_date': '20120608',
367 'title': 'Principal Sexually Assaults A Teacher - Episode 117 - 8th June 2012',
368 'description': 'md5:09b78bd971f1e3e289601dfba15ca4f7',
369 'uploader': 'SET India',
370 'uploader_id': 'setindia'
fccd3771
PH
371 }
372 },
dd27fd17 373 {
4bc3a23e
PH
374 'url': 'http://www.youtube.com/watch?v=a9LDPn-MO4I',
375 'note': '256k DASH audio (format 141) via DASH manifest',
376 'info_dict': {
377 'id': 'a9LDPn-MO4I',
378 'ext': 'm4a',
379 'upload_date': '20121002',
380 'uploader_id': '8KVIDEO',
381 'description': '',
382 'uploader': '8KVIDEO',
383 'title': 'UHDTV TEST 8K VIDEO.mp4'
4919603f 384 },
4bc3a23e
PH
385 'params': {
386 'youtube_include_dash_manifest': True,
387 'format': '141',
4919603f 388 },
dd27fd17 389 },
3489b7d2
JMF
390 # DASH manifest with encrypted signature
391 {
78caa52a
PH
392 'url': 'https://www.youtube.com/watch?v=IB3lcPjvWLA',
393 'info_dict': {
394 'id': 'IB3lcPjvWLA',
395 'ext': 'm4a',
b766eb27
JMF
396 'title': 'Afrojack, Spree Wilson - The Spark ft. Spree Wilson',
397 'description': 'md5:12e7067fa6735a77bdcbb58cb1187d2d',
78caa52a
PH
398 'uploader': 'AfrojackVEVO',
399 'uploader_id': 'AfrojackVEVO',
400 'upload_date': '20131011',
3489b7d2 401 },
4bc3a23e 402 'params': {
78caa52a
PH
403 'youtube_include_dash_manifest': True,
404 'format': '141',
3489b7d2
JMF
405 },
406 },
aaeb86f6
S
407 # JS player signature function name containing $
408 {
409 'url': 'https://www.youtube.com/watch?v=nfWlot6h_JM',
410 'info_dict': {
411 'id': 'nfWlot6h_JM',
412 'ext': 'm4a',
413 'title': 'Taylor Swift - Shake It Off',
414 'description': 'md5:2acfda1b285bdd478ccec22f9918199d',
415 'uploader': 'TaylorSwiftVEVO',
416 'uploader_id': 'TaylorSwiftVEVO',
417 'upload_date': '20140818',
418 },
419 'params': {
420 'youtube_include_dash_manifest': True,
421 'format': '141',
422 },
423 },
aa79ac0c
PH
424 # Controversy video
425 {
426 'url': 'https://www.youtube.com/watch?v=T4XJQO3qol8',
427 'info_dict': {
428 'id': 'T4XJQO3qol8',
429 'ext': 'mp4',
430 'upload_date': '20100909',
431 'uploader': 'The Amazing Atheist',
432 'uploader_id': 'TheAmazingAtheist',
433 'title': 'Burning Everyone\'s Koran',
434 'description': 'SUBSCRIBE: http://www.youtube.com/saturninefilms\n\nEven Obama has taken a stand against freedom on this issue: http://www.huffingtonpost.com/2010/09/09/obama-gma-interview-quran_n_710282.html',
435 }
c522adb1
JMF
436 },
437 # Normal age-gate video (No vevo, embed allowed)
438 {
439 'url': 'http://youtube.com/watch?v=HtVdAasjOgU',
440 'info_dict': {
441 'id': 'HtVdAasjOgU',
442 'ext': 'mp4',
443 'title': 'The Witcher 3: Wild Hunt - The Sword Of Destiny Trailer',
9ed99402 444 'description': 're:(?s).{100,}About the Game\n.*?The Witcher 3: Wild Hunt.{100,}',
c522adb1
JMF
445 'uploader': 'The Witcher',
446 'uploader_id': 'WitcherGame',
447 'upload_date': '20140605',
448 },
449 },
fccae2b9
S
450 # Age-gate video with encrypted signature
451 {
452 'url': 'http://www.youtube.com/watch?v=6kLq3WMV1nU',
453 'info_dict': {
454 'id': '6kLq3WMV1nU',
455 'ext': 'mp4',
456 'title': 'Dedication To My Ex (Miss That) (Lyric Video)',
457 'description': 'md5:33765bb339e1b47e7e72b5490139bb41',
458 'uploader': 'LloydVEVO',
459 'uploader_id': 'LloydVEVO',
460 'upload_date': '20110629',
461 },
462 },
774e208f
PH
463 # video_info is None (https://github.com/rg3/youtube-dl/issues/4421)
464 {
465 'url': '__2ABJjxzNo',
466 'info_dict': {
467 'id': '__2ABJjxzNo',
468 'ext': 'mp4',
469 'upload_date': '20100430',
470 'uploader_id': 'deadmau5',
471 'description': 'md5:12c56784b8032162bb936a5f76d55360',
472 'uploader': 'deadmau5',
473 'title': 'Deadmau5 - Some Chords (HD)',
474 },
475 'expected_warnings': [
476 'DASH manifest missing',
477 ]
e52a40ab
PH
478 },
479 # Olympics (https://github.com/rg3/youtube-dl/issues/4431)
480 {
481 'url': 'lqQg6PlCWgI',
482 'info_dict': {
483 'id': 'lqQg6PlCWgI',
484 'ext': 'mp4',
cbe2bd91
PH
485 'upload_date': '20120731',
486 'uploader_id': 'olympic',
487 'description': 'HO09 - Women - GER-AUS - Hockey - 31 July 2012 - London 2012 Olympic Games',
488 'uploader': 'Olympics',
489 'title': 'Hockey - Women - GER-AUS - London 2012 Olympic Games',
490 },
491 'params': {
492 'skip_download': 'requires avconv',
e52a40ab 493 }
cbe2bd91 494 },
6271f1ca
PH
495 # Non-square pixels
496 {
497 'url': 'https://www.youtube.com/watch?v=_b-2C3KPAM0',
498 'info_dict': {
499 'id': '_b-2C3KPAM0',
500 'ext': 'mp4',
501 'stretched_ratio': 16 / 9.,
502 'upload_date': '20110310',
503 'uploader_id': 'AllenMeow',
504 'description': 'made by Wacom from Korea | 字幕&加油添醋 by TY\'s Allen | 感謝heylisa00cavey1001同學熱情提供梗及翻譯',
505 'uploader': '孫艾倫',
506 'title': '[A-made] 變態妍字幕版 太妍 我就是這樣的人',
507 },
06b491eb
S
508 },
509 # url_encoded_fmt_stream_map is empty string
510 {
511 'url': 'qEJwOuvDf7I',
512 'info_dict': {
513 'id': 'qEJwOuvDf7I',
514 'ext': 'mp4',
515 'title': 'Обсуждение судебной практики по выборам 14 сентября 2014 года в Санкт-Петербурге',
516 'description': '',
517 'upload_date': '20150404',
518 'uploader_id': 'spbelect',
519 'uploader': 'Наблюдатели Петербурга',
520 },
521 'params': {
522 'skip_download': 'requires avconv',
523 }
524 },
da77d856
S
525 # Extraction from multiple DASH manifests (https://github.com/rg3/youtube-dl/pull/6097)
526 {
527 'url': 'https://www.youtube.com/watch?v=FIl7x6_3R5Y',
528 'info_dict': {
529 'id': 'FIl7x6_3R5Y',
530 'ext': 'mp4',
531 'title': 'md5:7b81415841e02ecd4313668cde88737a',
532 'description': 'md5:116377fd2963b81ec4ce64b542173306',
533 'upload_date': '20150625',
534 'uploader_id': 'dorappi2000',
535 'uploader': 'dorappi2000',
536 'formats': 'mincount:33',
537 },
2ee8f5d8 538 },
8a1a26ce
YCH
539 # DASH manifest with segment_list
540 {
541 'url': 'https://www.youtube.com/embed/CsmdDsKjzN8',
542 'md5': '8ce563a1d667b599d21064e982ab9e31',
543 'info_dict': {
544 'id': 'CsmdDsKjzN8',
545 'ext': 'mp4',
17ee98e1 546 'upload_date': '20150501', # According to '<meta itemprop="datePublished"', but in other places it's 20150510
8a1a26ce
YCH
547 'uploader': 'Airtek',
548 'description': 'Retransmisión en directo de la XVIII media maratón de Zaragoza.',
549 'uploader_id': 'UCzTzUmjXxxacNnL8I3m4LnQ',
550 'title': 'Retransmisión XVIII Media maratón Zaragoza 2015',
551 },
552 'params': {
553 'youtube_include_dash_manifest': True,
554 'format': '135', # bestvideo
555 }
2ee8f5d8 556 },
2eb88d95
PH
557 ]
558
e0df6211
PH
559 def __init__(self, *args, **kwargs):
560 super(YoutubeIE, self).__init__(*args, **kwargs)
83799698 561 self._player_cache = {}
e0df6211 562
c5e8d7af
PH
563 def report_video_info_webpage_download(self, video_id):
564 """Report attempt to download video info webpage."""
69ea8ca4 565 self.to_screen('%s: Downloading video info webpage' % video_id)
c5e8d7af 566
c5e8d7af
PH
567 def report_information_extraction(self, video_id):
568 """Report attempt to extract video information."""
69ea8ca4 569 self.to_screen('%s: Extracting video information' % video_id)
c5e8d7af
PH
570
571 def report_unavailable_format(self, video_id, format):
572 """Report extracted video URL."""
69ea8ca4 573 self.to_screen('%s: Format %s not available' % (video_id, format))
c5e8d7af
PH
574
575 def report_rtmp_download(self):
576 """Indicate the download will use the RTMP protocol."""
69ea8ca4 577 self.to_screen('RTMP download detected')
c5e8d7af 578
60064c53
PH
579 def _signature_cache_id(self, example_sig):
580 """ Return a string representation of a signature """
78caa52a 581 return '.'.join(compat_str(len(part)) for part in example_sig.split('.'))
60064c53
PH
582
583 def _extract_signature_function(self, video_id, player_url, example_sig):
cf010131 584 id_m = re.match(
60620368 585 r'.*?-(?P<id>[a-zA-Z0-9_-]+)(?:/watch_as3|/html5player)?\.(?P<ext>[a-z]+)$',
cf010131 586 player_url)
c081b35c
PH
587 if not id_m:
588 raise ExtractorError('Cannot identify player %r' % player_url)
e0df6211
PH
589 player_type = id_m.group('ext')
590 player_id = id_m.group('id')
591
c4417ddb 592 # Read from filesystem cache
60064c53
PH
593 func_id = '%s_%s_%s' % (
594 player_type, player_id, self._signature_cache_id(example_sig))
c4417ddb 595 assert os.path.basename(func_id) == func_id
a0e07d31 596
69ea8ca4 597 cache_spec = self._downloader.cache.load('youtube-sigfuncs', func_id)
a0e07d31 598 if cache_spec is not None:
78caa52a 599 return lambda s: ''.join(s[i] for i in cache_spec)
83799698 600
6d1a55a5
PH
601 download_note = (
602 'Downloading player %s' % player_url
603 if self._downloader.params.get('verbose') else
604 'Downloading %s player %s' % (player_type, player_id)
605 )
e0df6211
PH
606 if player_type == 'js':
607 code = self._download_webpage(
608 player_url, video_id,
6d1a55a5 609 note=download_note,
69ea8ca4 610 errnote='Download of %s failed' % player_url)
83799698 611 res = self._parse_sig_js(code)
c4417ddb 612 elif player_type == 'swf':
e0df6211
PH
613 urlh = self._request_webpage(
614 player_url, video_id,
6d1a55a5 615 note=download_note,
69ea8ca4 616 errnote='Download of %s failed' % player_url)
e0df6211 617 code = urlh.read()
83799698 618 res = self._parse_sig_swf(code)
e0df6211
PH
619 else:
620 assert False, 'Invalid player type %r' % player_type
621
785521bf
PH
622 test_string = ''.join(map(compat_chr, range(len(example_sig))))
623 cache_res = res(test_string)
624 cache_spec = [ord(c) for c in cache_res]
83799698 625
69ea8ca4 626 self._downloader.cache.store('youtube-sigfuncs', func_id, cache_spec)
83799698
PH
627 return res
628
60064c53 629 def _print_sig_code(self, func, example_sig):
edf3e38e
PH
630 def gen_sig_code(idxs):
631 def _genslice(start, end, step):
78caa52a 632 starts = '' if start == 0 else str(start)
8bcc8756 633 ends = (':%d' % (end + step)) if end + step >= 0 else ':'
69ea8ca4 634 steps = '' if step == 1 else (':%d' % step)
78caa52a 635 return 's[%s%s%s]' % (starts, ends, steps)
edf3e38e
PH
636
637 step = None
7af808a5
PH
638 # Quelch pyflakes warnings - start will be set when step is set
639 start = '(Never used)'
edf3e38e
PH
640 for i, prev in zip(idxs[1:], idxs[:-1]):
641 if step is not None:
642 if i - prev == step:
643 continue
644 yield _genslice(start, prev, step)
645 step = None
646 continue
647 if i - prev in [-1, 1]:
648 step = i - prev
649 start = prev
650 continue
651 else:
78caa52a 652 yield 's[%d]' % prev
edf3e38e 653 if step is None:
78caa52a 654 yield 's[%d]' % i
edf3e38e
PH
655 else:
656 yield _genslice(start, i, step)
657
78caa52a 658 test_string = ''.join(map(compat_chr, range(len(example_sig))))
c705320f 659 cache_res = func(test_string)
edf3e38e 660 cache_spec = [ord(c) for c in cache_res]
78caa52a 661 expr_code = ' + '.join(gen_sig_code(cache_spec))
60064c53
PH
662 signature_id_tuple = '(%s)' % (
663 ', '.join(compat_str(len(p)) for p in example_sig.split('.')))
69ea8ca4 664 code = ('if tuple(len(p) for p in s.split(\'.\')) == %s:\n'
78caa52a 665 ' return %s\n') % (signature_id_tuple, expr_code)
69ea8ca4 666 self.to_screen('Extracted signature function:\n' + code)
edf3e38e 667
e0df6211
PH
668 def _parse_sig_js(self, jscode):
669 funcname = self._search_regex(
aaeb86f6 670 r'\.sig\|\|([a-zA-Z0-9$]+)\(', jscode,
8bcc8756 671 'Initial JS player signature function name')
2b25cb5d
PH
672
673 jsi = JSInterpreter(jscode)
674 initial_function = jsi.extract_function(funcname)
e0df6211
PH
675 return lambda s: initial_function([s])
676
677 def _parse_sig_swf(self, file_contents):
54256267 678 swfi = SWFInterpreter(file_contents)
78caa52a 679 TARGET_CLASSNAME = 'SignatureDecipher'
54256267 680 searched_class = swfi.extract_class(TARGET_CLASSNAME)
78caa52a 681 initial_function = swfi.extract_function(searched_class, 'decipher')
e0df6211
PH
682 return lambda s: initial_function([s])
683
83799698 684 def _decrypt_signature(self, s, video_id, player_url, age_gate=False):
257a2501 685 """Turn the encrypted s field into a working signature"""
6b37f0be 686
c8bf86d5 687 if player_url is None:
69ea8ca4 688 raise ExtractorError('Cannot decrypt signature without player_url')
920de7a2 689
69ea8ca4 690 if player_url.startswith('//'):
78caa52a 691 player_url = 'https:' + player_url
c8bf86d5 692 try:
62af3a0e 693 player_id = (player_url, self._signature_cache_id(s))
c8bf86d5
PH
694 if player_id not in self._player_cache:
695 func = self._extract_signature_function(
60064c53 696 video_id, player_url, s
c8bf86d5
PH
697 )
698 self._player_cache[player_id] = func
699 func = self._player_cache[player_id]
700 if self._downloader.params.get('youtube_print_sig_code'):
60064c53 701 self._print_sig_code(func, s)
c8bf86d5
PH
702 return func(s)
703 except Exception as e:
704 tb = traceback.format_exc()
705 raise ExtractorError(
78caa52a 706 'Signature extraction failed: ' + tb, cause=e)
e0df6211 707
360e1ca5 708 def _get_subtitles(self, video_id, webpage):
de7f3446 709 try:
60e47a26 710 subs_doc = self._download_xml(
38c2e5b8 711 'https://video.google.com/timedtext?hl=en&type=list&v=%s' % video_id,
7fad1c63
JMF
712 video_id, note=False)
713 except ExtractorError as err:
69ea8ca4 714 self._downloader.report_warning('unable to download video subtitles: %s' % compat_str(err))
de7f3446 715 return {}
de7f3446
JMF
716
717 sub_lang_list = {}
60e47a26
JMF
718 for track in subs_doc.findall('track'):
719 lang = track.attrib['lang_code']
7e660ac1
LD
720 if lang in sub_lang_list:
721 continue
360e1ca5
JMF
722 sub_formats = []
723 for ext in ['sbv', 'vtt', 'srt']:
724 params = compat_urllib_parse.urlencode({
725 'lang': lang,
726 'v': video_id,
727 'fmt': ext,
728 'name': track.attrib['name'].encode('utf-8'),
729 })
730 sub_formats.append({
731 'url': 'https://www.youtube.com/api/timedtext?' + params,
732 'ext': ext,
733 })
734 sub_lang_list[lang] = sub_formats
de7f3446 735 if not sub_lang_list:
69ea8ca4 736 self._downloader.report_warning('video doesn\'t have subtitles')
de7f3446
JMF
737 return {}
738 return sub_lang_list
739
360e1ca5 740 def _get_automatic_captions(self, video_id, webpage):
de7f3446
JMF
741 """We need the webpage for getting the captions url, pass it as an
742 argument to speed up the process."""
69ea8ca4 743 self.to_screen('%s: Looking for automatic captions' % video_id)
de7f3446 744 mobj = re.search(r';ytplayer.config = ({.*?});', webpage)
78caa52a 745 err_msg = 'Couldn\'t find automatic captions for %s' % video_id
de7f3446
JMF
746 if mobj is None:
747 self._downloader.report_warning(err_msg)
748 return {}
749 player_config = json.loads(mobj.group(1))
750 try:
0792d563
PH
751 args = player_config['args']
752 caption_url = args['ttsurl']
753 timestamp = args['timestamp']
055e6f36
JMF
754 # We get the available subtitles
755 list_params = compat_urllib_parse.urlencode({
756 'type': 'list',
757 'tlangs': 1,
758 'asrs': 1,
de7f3446 759 })
055e6f36 760 list_url = caption_url + '&' + list_params
e26f8712 761 caption_list = self._download_xml(list_url, video_id)
e3dc22ca 762 original_lang_node = caption_list.find('track')
7d900ef1 763 if original_lang_node is None:
69ea8ca4 764 self._downloader.report_warning('Video doesn\'t have automatic captions')
e3dc22ca
JMF
765 return {}
766 original_lang = original_lang_node.attrib['lang_code']
7d900ef1 767 caption_kind = original_lang_node.attrib.get('kind', '')
055e6f36
JMF
768
769 sub_lang_list = {}
770 for lang_node in caption_list.findall('target'):
771 sub_lang = lang_node.attrib['lang_code']
360e1ca5
JMF
772 sub_formats = []
773 for ext in ['sbv', 'vtt', 'srt']:
774 params = compat_urllib_parse.urlencode({
775 'lang': original_lang,
776 'tlang': sub_lang,
777 'fmt': ext,
778 'ts': timestamp,
779 'kind': caption_kind,
780 })
781 sub_formats.append({
782 'url': caption_url + '&' + params,
783 'ext': ext,
784 })
785 sub_lang_list[sub_lang] = sub_formats
055e6f36 786 return sub_lang_list
de7f3446
JMF
787 # An extractor error can be raise by the download process if there are
788 # no automatic captions but there are subtitles
789 except (KeyError, ExtractorError):
790 self._downloader.report_warning(err_msg)
791 return {}
792
97665381
PH
793 @classmethod
794 def extract_id(cls, url):
795 mobj = re.match(cls._VALID_URL, url, re.VERBOSE)
c5e8d7af 796 if mobj is None:
69ea8ca4 797 raise ExtractorError('Invalid URL: %s' % url)
c5e8d7af
PH
798 video_id = mobj.group(2)
799 return video_id
800
1d043b93
JMF
801 def _extract_from_m3u8(self, manifest_url, video_id):
802 url_map = {}
5f6a1245 803
1d043b93
JMF
804 def _get_urls(_manifest):
805 lines = _manifest.split('\n')
806 urls = filter(lambda l: l and not l.startswith('#'),
8bcc8756 807 lines)
1d043b93 808 return urls
78caa52a 809 manifest = self._download_webpage(manifest_url, video_id, 'Downloading formats manifest')
1d043b93
JMF
810 formats_urls = _get_urls(manifest)
811 for format_url in formats_urls:
890f62e8 812 itag = self._search_regex(r'itag/(\d+?)/', format_url, 'itag')
1d043b93
JMF
813 url_map[itag] = format_url
814 return url_map
815
1fb07d10
JG
816 def _extract_annotations(self, video_id):
817 url = 'https://www.youtube.com/annotations_invideo?features=1&legacy=1&video_id=%s' % video_id
69ea8ca4 818 return self._download_webpage(url, video_id, note='Searching for annotations.', errnote='Unable to download video annotations.')
1fb07d10 819
da276600 820 def _parse_dash_manifest(
77c6fb5b 821 self, video_id, dash_manifest_url, player_url, age_gate, fatal=True):
774e208f
PH
822 def decrypt_sig(mobj):
823 s = mobj.group(1)
824 dec_s = self._decrypt_signature(s, video_id, player_url, age_gate)
825 return '/signature/%s' % dec_s
e1b9322b 826 dash_manifest_url = re.sub(r'/s/([a-fA-F0-9\.]+)', decrypt_sig, dash_manifest_url)
774e208f
PH
827 dash_doc = self._download_xml(
828 dash_manifest_url, video_id,
829 note='Downloading DASH manifest',
77c6fb5b
S
830 errnote='Could not download DASH manifest',
831 fatal=fatal)
832
833 if dash_doc is False:
834 return []
774e208f
PH
835
836 formats = []
de5c5456
YCH
837 for a in dash_doc.findall('.//{urn:mpeg:DASH:schema:MPD:2011}AdaptationSet'):
838 mime_type = a.attrib.get('mimeType')
839 for r in a.findall('{urn:mpeg:DASH:schema:MPD:2011}Representation'):
840 url_el = r.find('{urn:mpeg:DASH:schema:MPD:2011}BaseURL')
841 if url_el is None:
842 continue
843 if mime_type == 'text/vtt':
844 # TODO implement WebVTT downloading
845 pass
846 elif mime_type.startswith('audio/') or mime_type.startswith('video/'):
6800d337 847 segment_list = r.find('{urn:mpeg:DASH:schema:MPD:2011}SegmentList')
de5c5456
YCH
848 format_id = r.attrib['id']
849 video_url = url_el.text
850 filesize = int_or_none(url_el.attrib.get('{http://youtube.com/yt/2012/10/10}contentLength'))
851 f = {
852 'format_id': format_id,
853 'url': video_url,
854 'width': int_or_none(r.attrib.get('width')),
855 'height': int_or_none(r.attrib.get('height')),
856 'tbr': int_or_none(r.attrib.get('bandwidth'), 1000),
857 'asr': int_or_none(r.attrib.get('audioSamplingRate')),
858 'filesize': filesize,
859 'fps': int_or_none(r.attrib.get('frameRate')),
860 }
0c8662d2 861 if segment_list is not None:
6800d337
YCH
862 f.update({
863 'initialization_url': segment_list.find('{urn:mpeg:DASH:schema:MPD:2011}Initialization').attrib['sourceURL'],
b9258c61 864 'segment_urls': [segment.attrib.get('media') for segment in segment_list.findall('{urn:mpeg:DASH:schema:MPD:2011}SegmentURL')],
423d2be5 865 'protocol': 'http_dash_segments',
6800d337 866 })
de5c5456
YCH
867 try:
868 existing_format = next(
869 fo for fo in formats
870 if fo['format_id'] == format_id)
871 except StopIteration:
872 full_info = self._formats.get(format_id, {}).copy()
873 full_info.update(f)
1b5a1ae2
S
874 codecs = r.attrib.get('codecs')
875 if codecs:
876 if full_info.get('acodec') == 'none' and 'vcodec' not in full_info:
877 full_info['vcodec'] = codecs
878 elif full_info.get('vcodec') == 'none' and 'acodec' not in full_info:
879 full_info['acodec'] = codecs
de5c5456
YCH
880 formats.append(full_info)
881 else:
882 existing_format.update(f)
883 else:
884 self.report_warning('Unknown MIME type %s in DASH manifest' % mime_type)
774e208f
PH
885 return formats
886
c5e8d7af 887 def _real_extract(self, url):
7e8c0af0 888 proto = (
78caa52a
PH
889 'http' if self._downloader.params.get('prefer_insecure', False)
890 else 'https')
7e8c0af0 891
c5e8d7af
PH
892 # Extract original video URL from URL with redirection, like age verification, using next_url parameter
893 mobj = re.search(self._NEXT_URL_RE, url)
894 if mobj:
7fd002c0 895 url = proto + '://www.youtube.com/' + compat_urllib_parse_unquote(mobj.group(1)).lstrip('/')
97665381 896 video_id = self.extract_id(url)
c5e8d7af
PH
897
898 # Get video webpage
aa79ac0c 899 url = proto + '://www.youtube.com/watch?v=%s&gl=US&hl=en&has_verified=1&bpctr=9999999999' % video_id
a1f934b1 900 video_webpage = self._download_webpage(url, video_id)
c5e8d7af
PH
901
902 # Attempt to extract SWF player URL
e0df6211 903 mobj = re.search(r'swfConfig.*?"(https?:\\/\\/.*?watch.*?-.*?\.swf)"', video_webpage)
c5e8d7af
PH
904 if mobj is not None:
905 player_url = re.sub(r'\\(.)', r'\1', mobj.group(1))
906 else:
907 player_url = None
908
d8d24a92
S
909 dash_mpds = []
910
911 def add_dash_mpd(video_info):
912 dash_mpd = video_info.get('dashmpd')
913 if dash_mpd and dash_mpd[0] not in dash_mpds:
914 dash_mpds.append(dash_mpd[0])
915
c5e8d7af 916 # Get video info
6449cd80 917 embed_webpage = None
2fe1ff85 918 is_live = None
c108eb73 919 if re.search(r'player-age-gate-content">', video_webpage) is not None:
c108eb73
JMF
920 age_gate = True
921 # We simulate the access to the video from www.youtube.com/v/{video_id}
922 # this can be viewed without login into Youtube
beb95e77
CL
923 url = proto + '://www.youtube.com/embed/%s' % video_id
924 embed_webpage = self._download_webpage(url, video_id, 'Downloading embed webpage')
2c57c7fa
JMF
925 data = compat_urllib_parse.urlencode({
926 'video_id': video_id,
927 'eurl': 'https://youtube.googleapis.com/v/' + video_id,
c084c934 928 'sts': self._search_regex(
beb95e77 929 r'"sts"\s*:\s*(\d+)', embed_webpage, 'sts', default=''),
2c57c7fa 930 })
7e8c0af0 931 video_info_url = proto + '://www.youtube.com/get_video_info?' + data
94bd3613
PH
932 video_info_webpage = self._download_webpage(
933 video_info_url, video_id,
20436c30 934 note='Refetching age-gated info webpage',
94bd3613 935 errnote='unable to download video info webpage')
c5e8d7af 936 video_info = compat_parse_qs(video_info_webpage)
d8d24a92 937 add_dash_mpd(video_info)
c108eb73
JMF
938 else:
939 age_gate = False
bc93bdb5 940 video_info = None
d8d24a92
S
941 # Try looking directly into the video webpage
942 mobj = re.search(r';ytplayer\.config\s*=\s*({.*?});', video_webpage)
943 if mobj:
4e62ebe2
JMF
944 json_code = uppercase_escape(mobj.group(1))
945 ytplayer_config = json.loads(json_code)
946 args = ytplayer_config['args']
d8d24a92
S
947 if args.get('url_encoded_fmt_stream_map'):
948 # Convert to the same format returned by compat_parse_qs
949 video_info = dict((k, [v]) for k, v in args.items())
950 add_dash_mpd(video_info)
2fe1ff85
JMF
951 if args.get('livestream') == '1' or args.get('live_playback') == 1:
952 is_live = True
0a3cf9ad
S
953 if not video_info or self._downloader.params.get('youtube_include_dash_manifest', True):
954 # We also try looking in get_video_info since it may contain different dashmpd
955 # URL that points to a DASH manifest with possibly different itag set (some itags
956 # are missing from DASH manifest pointed by webpage's dashmpd, some - from DASH
957 # manifest pointed by get_video_info's dashmpd).
958 # The general idea is to take a union of itags of both DASH manifests (for example
959 # video with such 'manifest behavior' see https://github.com/rg3/youtube-dl/issues/6093)
4e62ebe2 960 self.report_video_info_webpage_download(video_id)
0a3cf9ad 961 for el_type in ['&el=info', '&el=embedded', '&el=detailpage', '&el=vevo', '']:
810fb84d
PH
962 video_info_url = (
963 '%s://www.youtube.com/get_video_info?&video_id=%s%s&ps=default&eurl=&gl=US&hl=en'
964 % (proto, video_id, el_type))
965 video_info_webpage = self._download_webpage(
966 video_info_url,
4e62ebe2
JMF
967 video_id, note=False,
968 errnote='unable to download video info webpage')
0a3cf9ad
S
969 get_video_info = compat_parse_qs(video_info_webpage)
970 add_dash_mpd(get_video_info)
971 if not video_info:
972 video_info = get_video_info
973 if 'token' in get_video_info:
4e62ebe2 974 break
c5e8d7af
PH
975 if 'token' not in video_info:
976 if 'reason' in video_info:
af214c3a
YCH
977 if 'The uploader has not made this video available in your country.' in video_info['reason']:
978 regions_allowed = self._html_search_meta('regionsAllowed', video_webpage, default=None)
979 if regions_allowed is not None:
980 raise ExtractorError('YouTube said: This video is available in %s only' % (
981 ', '.join(map(ISO3166Utils.short2full, regions_allowed.split(',')))),
982 expected=True)
d11271dd 983 raise ExtractorError(
78caa52a 984 'YouTube said: %s' % video_info['reason'][0],
d11271dd 985 expected=True, video_id=video_id)
c5e8d7af 986 else:
d11271dd 987 raise ExtractorError(
78caa52a 988 '"token" parameter not in video info for unknown reason',
d11271dd 989 video_id=video_id)
c5e8d7af 990
1d699755
PH
991 if 'view_count' in video_info:
992 view_count = int(video_info['view_count'][0])
993 else:
994 view_count = None
995
c5e8d7af
PH
996 # Check for "rental" videos
997 if 'ypc_video_rental_bar_text' in video_info and 'author' not in video_info:
69ea8ca4 998 raise ExtractorError('"rental" videos not supported')
c5e8d7af
PH
999
1000 # Start extracting information
1001 self.report_information_extraction(video_id)
1002
1003 # uploader
1004 if 'author' not in video_info:
69ea8ca4 1005 raise ExtractorError('Unable to extract uploader name')
7fd002c0 1006 video_uploader = compat_urllib_parse_unquote_plus(video_info['author'][0])
c5e8d7af
PH
1007
1008 # uploader_id
1009 video_uploader_id = None
1010 mobj = re.search(r'<link itemprop="url" href="http://www.youtube.com/(?:user|channel)/([^"]+)">', video_webpage)
1011 if mobj is not None:
1012 video_uploader_id = mobj.group(1)
1013 else:
69ea8ca4 1014 self._downloader.report_warning('unable to extract uploader nickname')
c5e8d7af
PH
1015
1016 # title
a8c6b241 1017 if 'title' in video_info:
aa92f063 1018 video_title = video_info['title'][0]
a8c6b241 1019 else:
69ea8ca4 1020 self._downloader.report_warning('Unable to extract video title')
78caa52a 1021 video_title = '_'
c5e8d7af
PH
1022
1023 # thumbnail image
7763b04e
JMF
1024 # We try first to get a high quality image:
1025 m_thumb = re.search(r'<span itemprop="thumbnail".*?href="(.*?)">',
1026 video_webpage, re.DOTALL)
1027 if m_thumb is not None:
1028 video_thumbnail = m_thumb.group(1)
1029 elif 'thumbnail_url' not in video_info:
69ea8ca4 1030 self._downloader.report_warning('unable to extract video thumbnail')
f490e77e 1031 video_thumbnail = None
c5e8d7af 1032 else: # don't panic if we can't find it
7fd002c0 1033 video_thumbnail = compat_urllib_parse_unquote_plus(video_info['thumbnail_url'][0])
c5e8d7af
PH
1034
1035 # upload date
9d0b581f
S
1036 upload_date = self._html_search_meta(
1037 'datePublished', video_webpage, 'upload date', default=None)
1038 if not upload_date:
1039 upload_date = self._search_regex(
1040 [r'(?s)id="eow-date.*?>(.*?)</span>',
1041 r'id="watch-uploader-info".*?>.*?(?:Published|Uploaded|Streamed live|Started) on (.+?)</strong>'],
1042 video_webpage, 'upload date', default=None)
1043 if upload_date:
1044 upload_date = ' '.join(re.sub(r'[/,-]', r' ', mobj.group(1)).split())
1045 upload_date = unified_strdate(upload_date)
c5e8d7af 1046
55f7bd2d
PH
1047 m_cat_container = self._search_regex(
1048 r'(?s)<h4[^>]*>\s*Category\s*</h4>\s*<ul[^>]*>(.*?)</ul>',
624dcebf 1049 video_webpage, 'categories', default=None)
ec8deefc 1050 if m_cat_container:
ad3bc6ac 1051 category = self._html_search_regex(
01ed5c9b 1052 r'(?s)<a[^<]+>(.*?)</a>', m_cat_container, 'category',
ad3bc6ac
PH
1053 default=None)
1054 video_categories = None if category is None else [category]
1055 else:
1056 video_categories = None
ec8deefc 1057
c5e8d7af
PH
1058 # description
1059 video_description = get_element_by_id("eow-description", video_webpage)
1060 if video_description:
27dcce19
PH
1061 video_description = re.sub(r'''(?x)
1062 <a\s+
1063 (?:[a-zA-Z-]+="[^"]+"\s+)*?
1064 title="([^"]+)"\s+
1065 (?:[a-zA-Z-]+="[^"]+"\s+)*?
1066 class="yt-uix-redirect-link"\s*>
1067 [^<]+
1068 </a>
1069 ''', r'\1', video_description)
c5e8d7af
PH
1070 video_description = clean_html(video_description)
1071 else:
1072 fd_mobj = re.search(r'<meta name="description" content="([^"]+)"', video_webpage)
1073 if fd_mobj:
1074 video_description = unescapeHTML(fd_mobj.group(1))
1075 else:
78caa52a 1076 video_description = ''
c5e8d7af 1077
f30a38be 1078 def _extract_count(count_name):
c93d53f5
S
1079 return str_to_int(self._search_regex(
1080 r'-%s-button[^>]+><span[^>]+class="yt-uix-button-content"[^>]*>([\d,]+)</span>'
1081 % re.escape(count_name),
1082 video_webpage, count_name, default=None))
1083
69ea8ca4
PH
1084 like_count = _extract_count('like')
1085 dislike_count = _extract_count('dislike')
336c3a69 1086
c5e8d7af 1087 # subtitles
d82134c3 1088 video_subtitles = self.extract_subtitles(video_id, video_webpage)
360e1ca5 1089 automatic_captions = self.extract_automatic_captions(video_id, video_webpage)
c5e8d7af
PH
1090
1091 if 'length_seconds' not in video_info:
69ea8ca4 1092 self._downloader.report_warning('unable to extract video duration')
b466b702 1093 video_duration = None
c5e8d7af 1094 else:
7fd002c0 1095 video_duration = int(compat_urllib_parse_unquote_plus(video_info['length_seconds'][0]))
c5e8d7af 1096
1fb07d10
JG
1097 # annotations
1098 video_annotations = None
1099 if self._downloader.params.get('writeannotations', False):
5f6a1245 1100 video_annotations = self._extract_annotations(video_id)
1fb07d10 1101
dd27fd17
PH
1102 def _map_to_format_list(urlmap):
1103 formats = []
1104 for itag, video_real_url in urlmap.items():
1105 dct = {
1106 'format_id': itag,
1107 'url': video_real_url,
1108 'player_url': player_url,
1109 }
0b65e5d4
PH
1110 if itag in self._formats:
1111 dct.update(self._formats[itag])
dd27fd17
PH
1112 formats.append(dct)
1113 return formats
1114
c5e8d7af
PH
1115 if 'conn' in video_info and video_info['conn'][0].startswith('rtmp'):
1116 self.report_rtmp_download()
dd27fd17
PH
1117 formats = [{
1118 'format_id': '_rtmp',
1119 'protocol': 'rtmp',
1120 'url': video_info['conn'][0],
1121 'player_url': player_url,
1122 }]
24270b03 1123 elif len(video_info.get('url_encoded_fmt_stream_map', [''])[0]) >= 1 or len(video_info.get('adaptive_fmts', [''])[0]) >= 1:
5f6a1245 1124 encoded_url_map = video_info.get('url_encoded_fmt_stream_map', [''])[0] + ',' + video_info.get('adaptive_fmts', [''])[0]
00fe14fc 1125 if 'rtmpe%3Dyes' in encoded_url_map:
a7055eb9 1126 raise ExtractorError('rtmpe downloads are not supported, see https://github.com/rg3/youtube-dl/issues/343 for more information.', expected=True)
c5e8d7af 1127 url_map = {}
00fe14fc 1128 for url_data_str in encoded_url_map.split(','):
c5e8d7af 1129 url_data = compat_parse_qs(url_data_str)
201e9eaa
PH
1130 if 'itag' not in url_data or 'url' not in url_data:
1131 continue
1132 format_id = url_data['itag'][0]
1133 url = url_data['url'][0]
1134
1135 if 'sig' in url_data:
1136 url += '&signature=' + url_data['sig'][0]
1137 elif 's' in url_data:
1138 encrypted_sig = url_data['s'][0]
6449cd80 1139 ASSETS_RE = r'"assets":.+?"js":\s*("[^"]+")'
201e9eaa 1140
beb95e77 1141 jsplayer_url_json = self._search_regex(
6449cd80
PH
1142 ASSETS_RE,
1143 embed_webpage if age_gate else video_webpage,
1144 'JS player URL (1)', default=None)
1145 if not jsplayer_url_json and not age_gate:
1146 # We need the embed website after all
1147 if embed_webpage is None:
1148 embed_url = proto + '://www.youtube.com/embed/%s' % video_id
1149 embed_webpage = self._download_webpage(
1150 embed_url, video_id, 'Downloading embed webpage')
1151 jsplayer_url_json = self._search_regex(
1152 ASSETS_RE, embed_webpage, 'JS player URL')
1153
beb95e77 1154 player_url = json.loads(jsplayer_url_json)
201e9eaa
PH
1155 if player_url is None:
1156 player_url_json = self._search_regex(
1157 r'ytplayer\.config.*?"url"\s*:\s*("[^"]+")',
78caa52a 1158 video_webpage, 'age gate player URL')
201e9eaa
PH
1159 player_url = json.loads(player_url_json)
1160
1161 if self._downloader.params.get('verbose'):
cf010131 1162 if player_url is None:
201e9eaa
PH
1163 player_version = 'unknown'
1164 player_desc = 'unknown'
1165 else:
1166 if player_url.endswith('swf'):
1167 player_version = self._search_regex(
1168 r'-(.+?)(?:/watch_as3)?\.swf$', player_url,
78caa52a 1169 'flash player', fatal=False)
201e9eaa 1170 player_desc = 'flash player %s' % player_version
cf010131 1171 else:
201e9eaa
PH
1172 player_version = self._search_regex(
1173 r'html5player-([^/]+?)(?:/html5player)?\.js',
1174 player_url,
1175 'html5 player', fatal=False)
78caa52a 1176 player_desc = 'html5 player %s' % player_version
201e9eaa 1177
60064c53 1178 parts_sizes = self._signature_cache_id(encrypted_sig)
69ea8ca4 1179 self.to_screen('{%s} signature length %s, %s' %
9e1a5b84 1180 (format_id, parts_sizes, player_desc))
201e9eaa
PH
1181
1182 signature = self._decrypt_signature(
1183 encrypted_sig, video_id, player_url, age_gate)
1184 url += '&signature=' + signature
1185 if 'ratebypass' not in url:
1186 url += '&ratebypass=yes'
1187 url_map[format_id] = url
dd27fd17 1188 formats = _map_to_format_list(url_map)
1d043b93
JMF
1189 elif video_info.get('hlsvp'):
1190 manifest_url = video_info['hlsvp'][0]
1191 url_map = self._extract_from_m3u8(manifest_url, video_id)
dd27fd17 1192 formats = _map_to_format_list(url_map)
c5e8d7af 1193 else:
69ea8ca4 1194 raise ExtractorError('no conn, hlsvp or url_encoded_fmt_stream_map information found in video info')
c5e8d7af 1195
dd27fd17 1196 # Look for the DASH manifest
203fb43f 1197 if self._downloader.params.get('youtube_include_dash_manifest', True):
77c6fb5b 1198 dash_mpd_fatal = True
d8d24a92
S
1199 for dash_manifest_url in dash_mpds:
1200 dash_formats = {}
774e208f 1201 try:
d8d24a92 1202 for df in self._parse_dash_manifest(
77c6fb5b 1203 video_id, dash_manifest_url, player_url, age_gate, dash_mpd_fatal):
d8d24a92
S
1204 # Do not overwrite DASH format found in some previous DASH manifest
1205 if df['format_id'] not in dash_formats:
1206 dash_formats[df['format_id']] = df
77c6fb5b
S
1207 # Additional DASH manifests may end up in HTTP Error 403 therefore
1208 # allow them to fail without bug report message if we already have
1209 # some DASH manifest succeeded. This is temporary workaround to reduce
1210 # burst of bug reports until we figure out the reason and whether it
1211 # can be fixed at all.
1212 dash_mpd_fatal = False
774e208f
PH
1213 except (ExtractorError, KeyError) as e:
1214 self.report_warning(
1215 'Skipping DASH manifest: %r' % e, video_id)
d8d24a92 1216 if dash_formats:
04b3b3df
JMF
1217 # Remove the formats we found through non-DASH, they
1218 # contain less info and it can be wrong, because we use
1219 # fixed values (for example the resolution). See
1220 # https://github.com/rg3/youtube-dl/issues/5774 for an
1221 # example.
d80265cc 1222 formats = [f for f in formats if f['format_id'] not in dash_formats.keys()]
d8d24a92 1223 formats.extend(dash_formats.values())
d80044c2 1224
6271f1ca
PH
1225 # Check for malformed aspect ratio
1226 stretched_m = re.search(
1227 r'<meta\s+property="og:video:tag".*?content="yt:stretch=(?P<w>[0-9]+):(?P<h>[0-9]+)">',
1228 video_webpage)
1229 if stretched_m:
1230 ratio = float(stretched_m.group('w')) / float(stretched_m.group('h'))
1231 for f in formats:
1232 if f.get('vcodec') != 'none':
1233 f['stretched_ratio'] = ratio
1234
4bcc7bd1 1235 self._sort_formats(formats)
4ea3be0a 1236
1237 return {
8bcc8756
JW
1238 'id': video_id,
1239 'uploader': video_uploader,
1240 'uploader_id': video_uploader_id,
1241 'upload_date': upload_date,
1242 'title': video_title,
1243 'thumbnail': video_thumbnail,
1244 'description': video_description,
1245 'categories': video_categories,
1246 'subtitles': video_subtitles,
360e1ca5 1247 'automatic_captions': automatic_captions,
8bcc8756
JW
1248 'duration': video_duration,
1249 'age_limit': 18 if age_gate else 0,
1250 'annotations': video_annotations,
7e8c0af0 1251 'webpage_url': proto + '://www.youtube.com/watch?v=%s' % video_id,
8bcc8756 1252 'view_count': view_count,
4ea3be0a 1253 'like_count': like_count,
1254 'dislike_count': dislike_count,
2d30521a 1255 'average_rating': float_or_none(video_info.get('avg_rating', [None])[0]),
8bcc8756 1256 'formats': formats,
2fe1ff85 1257 'is_live': is_live,
4ea3be0a 1258 }
c5e8d7af 1259
5f6a1245 1260
880e1c52 1261class YoutubePlaylistIE(YoutubeBaseInfoExtractor):
78caa52a 1262 IE_DESC = 'YouTube.com playlists'
d67cc9fa 1263 _VALID_URL = r"""(?x)(?:
c5e8d7af
PH
1264 (?:https?://)?
1265 (?:\w+\.)?
1266 youtube\.com/
1267 (?:
ac7553d0 1268 (?:course|view_play_list|my_playlists|artist|playlist|watch|embed/videoseries)
c5e8d7af
PH
1269 \? (?:.*?&)*? (?:p|a|list)=
1270 | p/
1271 )
d67cc9fa 1272 (
99209c29 1273 (?:PL|LL|EC|UU|FL|RD|UL)?[0-9A-Za-z-_]{10,}
5f6a1245 1274 # Top tracks, they can also include dots
d67cc9fa
JMF
1275 |(?:MC)[\w\.]*
1276 )
c5e8d7af
PH
1277 .*
1278 |
99209c29 1279 ((?:PL|LL|EC|UU|FL|RD|UL)[0-9A-Za-z-_]{10,})
c5e8d7af 1280 )"""
dbb94fb0 1281 _TEMPLATE_URL = 'https://www.youtube.com/playlist?list=%s'
dbb94fb0 1282 _VIDEO_RE = r'href="\s*/watch\?v=(?P<id>[0-9A-Za-z_-]{11})&amp;[^"]*?index=(?P<index>\d+)'
78caa52a 1283 IE_NAME = 'youtube:playlist'
81127aa5
PH
1284 _TESTS = [{
1285 'url': 'https://www.youtube.com/playlist?list=PLwiyx1dc3P2JR9N8gQaQN_BCvlSlap7re',
1286 'info_dict': {
1287 'title': 'ytdl test PL',
a1cf99d0 1288 'id': 'PLwiyx1dc3P2JR9N8gQaQN_BCvlSlap7re',
81127aa5
PH
1289 },
1290 'playlist_count': 3,
9291475f
PH
1291 }, {
1292 'url': 'https://www.youtube.com/playlist?list=PLtPgu7CB4gbZDA7i_euNxn75ISqxwZPYx',
1293 'info_dict': {
acf757f4 1294 'id': 'PLtPgu7CB4gbZDA7i_euNxn75ISqxwZPYx',
9291475f
PH
1295 'title': 'YDL_Empty_List',
1296 },
1297 'playlist_count': 0,
1298 }, {
1299 'note': 'Playlist with deleted videos (#651). As a bonus, the video #51 is also twice in this list.',
1300 'url': 'https://www.youtube.com/playlist?list=PLwP_SiAcdui0KVebT0mU9Apz359a4ubsC',
1301 'info_dict': {
1302 'title': '29C3: Not my department',
acf757f4 1303 'id': 'PLwP_SiAcdui0KVebT0mU9Apz359a4ubsC',
9291475f
PH
1304 },
1305 'playlist_count': 95,
1306 }, {
1307 'note': 'issue #673',
1308 'url': 'PLBB231211A4F62143',
1309 'info_dict': {
f46a8702 1310 'title': '[OLD]Team Fortress 2 (Class-based LP)',
acf757f4 1311 'id': 'PLBB231211A4F62143',
9291475f
PH
1312 },
1313 'playlist_mincount': 26,
1314 }, {
1315 'note': 'Large playlist',
1316 'url': 'https://www.youtube.com/playlist?list=UUBABnxM4Ar9ten8Mdjj1j0Q',
1317 'info_dict': {
1318 'title': 'Uploads from Cauchemar',
acf757f4 1319 'id': 'UUBABnxM4Ar9ten8Mdjj1j0Q',
9291475f
PH
1320 },
1321 'playlist_mincount': 799,
1322 }, {
1323 'url': 'PLtPgu7CB4gbY9oDN3drwC3cMbJggS7dKl',
1324 'info_dict': {
1325 'title': 'YDL_safe_search',
acf757f4 1326 'id': 'PLtPgu7CB4gbY9oDN3drwC3cMbJggS7dKl',
9291475f
PH
1327 },
1328 'playlist_count': 2,
ac7553d0
PH
1329 }, {
1330 'note': 'embedded',
1331 'url': 'http://www.youtube.com/embed/videoseries?list=PL6IaIsEjSbf96XFRuNccS_RuEXwNdsoEu',
1332 'playlist_count': 4,
1333 'info_dict': {
1334 'title': 'JODA15',
acf757f4 1335 'id': 'PL6IaIsEjSbf96XFRuNccS_RuEXwNdsoEu',
ac7553d0 1336 }
6b08cdf6
PH
1337 }, {
1338 'note': 'Embedded SWF player',
1339 'url': 'http://www.youtube.com/p/YN5VISEtHet5D4NEvfTd0zcgFk84NqFZ?hl=en_US&fs=1&rel=0',
1340 'playlist_count': 4,
1341 'info_dict': {
1342 'title': 'JODA7',
acf757f4 1343 'id': 'YN5VISEtHet5D4NEvfTd0zcgFk84NqFZ',
6b08cdf6 1344 }
4b7df0d3
JMF
1345 }, {
1346 'note': 'Buggy playlist: the webpage has a "Load more" button but it doesn\'t have more videos',
1347 'url': 'https://www.youtube.com/playlist?list=UUXw-G3eDE9trcvY2sBMM_aA',
1348 'info_dict': {
acf757f4
PH
1349 'title': 'Uploads from Interstellar Movie',
1350 'id': 'UUXw-G3eDE9trcvY2sBMM_aA',
4b7df0d3
JMF
1351 },
1352 'playlist_mincout': 21,
81127aa5 1353 }]
c5e8d7af 1354
880e1c52
JMF
1355 def _real_initialize(self):
1356 self._login()
1357
652cdaa2 1358 def _extract_mix(self, playlist_id):
99209c29 1359 # The mixes are generated from a single video
652cdaa2 1360 # the id of the playlist is just 'RD' + video_id
7d4afc55 1361 url = 'https://youtube.com/watch?v=%s&list=%s' % (playlist_id[-11:], playlist_id)
c9cc0bf5 1362 webpage = self._download_webpage(
78caa52a 1363 url, playlist_id, 'Downloading Youtube mix')
bc2f773b 1364 search_title = lambda class_name: get_element_by_attribute('class', class_name, webpage)
c9cc0bf5
PH
1365 title_span = (
1366 search_title('playlist-title') or
1367 search_title('title long-title') or
1368 search_title('title'))
76d1700b 1369 title = clean_html(title_span)
c9cc0bf5
PH
1370 ids = orderedSet(re.findall(
1371 r'''(?xs)data-video-username=".*?".*?
1372 href="/watch\?v=([0-9A-Za-z_-]{11})&amp;[^"]*?list=%s''' % re.escape(playlist_id),
1373 webpage))
652cdaa2
JMF
1374 url_results = self._ids_to_results(ids)
1375
1376 return self.playlist_result(url_results, playlist_id, title)
1377
448830ce 1378 def _extract_playlist(self, playlist_id):
dbb94fb0
S
1379 url = self._TEMPLATE_URL % playlist_id
1380 page = self._download_webpage(url, playlist_id)
dbb94fb0 1381
39b62db1
YCH
1382 for match in re.findall(r'<div class="yt-alert-message">([^<]+)</div>', page):
1383 match = match.strip()
1384 # Check if the playlist exists or is private
1385 if re.match(r'[^<]*(The|This) playlist (does not exist|is private)[^<]*', match):
1386 raise ExtractorError(
1387 'The playlist doesn\'t exist or is private, use --username or '
1388 '--netrc to access it.',
1389 expected=True)
1390 elif re.match(r'[^<]*Invalid parameters[^<]*', match):
1391 raise ExtractorError(
1392 'Invalid parameters. Maybe URL is incorrect.',
1393 expected=True)
1394 elif re.match(r'[^<]*Choose your language[^<]*', match):
1395 continue
1396 else:
1397 self.report_warning('Youtube gives an alert message: ' + match)
10c0e2d8 1398
dcbb4580 1399 # Extract the video ids from the playlist pages
70219b0f
JMF
1400 def _entries():
1401 more_widget_html = content_html = page
1402 for page_num in itertools.count(1):
1403 matches = re.finditer(self._VIDEO_RE, content_html)
1404 # We remove the duplicates and the link with index 0
1405 # (it's not the first video of the playlist)
1406 new_ids = orderedSet(m.group('id') for m in matches if m.group('index') != '0')
1407 for vid_id in new_ids:
1408 yield self.url_result(vid_id, 'Youtube', video_id=vid_id)
1409
1410 mobj = re.search(r'data-uix-load-more-href="/?(?P<more>[^"]+)"', more_widget_html)
1411 if not mobj:
1412 break
1413
1414 more = self._download_json(
1415 'https://youtube.com/%s' % mobj.group('more'), playlist_id,
1416 'Downloading page #%s' % page_num,
1417 transform_source=uppercase_escape)
1418 content_html = more['content_html']
1419 if not content_html.strip():
1420 # Some webpages show a "Load more" button but they don't
1421 # have more videos
1422 break
1423 more_widget_html = more['load_more_widget_html']
dbb94fb0
S
1424
1425 playlist_title = self._html_search_regex(
68eb8e90 1426 r'(?s)<h1 class="pl-header-title[^"]*">\s*(.*?)\s*</h1>',
78caa52a 1427 page, 'title')
c5e8d7af 1428
70219b0f 1429 return self.playlist_result(_entries(), playlist_id, playlist_title)
c5e8d7af 1430
448830ce
S
1431 def _real_extract(self, url):
1432 # Extract playlist id
1433 mobj = re.match(self._VALID_URL, url)
1434 if mobj is None:
1435 raise ExtractorError('Invalid URL: %s' % url)
1436 playlist_id = mobj.group(1) or mobj.group(2)
1437
1438 # Check if it's a video-specific URL
1439 query_dict = compat_urlparse.parse_qs(compat_urlparse.urlparse(url).query)
1440 if 'v' in query_dict:
1441 video_id = query_dict['v'][0]
1442 if self._downloader.params.get('noplaylist'):
1443 self.to_screen('Downloading just video %s because of --no-playlist' % video_id)
1444 return self.url_result(video_id, 'Youtube', video_id=video_id)
1445 else:
1446 self.to_screen('Downloading playlist %s - add --no-playlist to just download video %s' % (playlist_id, video_id))
1447
1448 if playlist_id.startswith('RD') or playlist_id.startswith('UL'):
1449 # Mixes require a custom extraction process
1450 return self._extract_mix(playlist_id)
1451
1452 return self._extract_playlist(playlist_id)
1453
c5e8d7af
PH
1454
1455class YoutubeChannelIE(InfoExtractor):
78caa52a 1456 IE_DESC = 'YouTube.com channels'
9ff67727 1457 _VALID_URL = r'https?://(?:youtu\.be|(?:\w+\.)?youtube(?:-nocookie)?\.com)/channel/(?P<id>[0-9A-Za-z_-]+)'
eb0f3e7e 1458 _TEMPLATE_URL = 'https://www.youtube.com/channel/%s/videos'
78caa52a 1459 IE_NAME = 'youtube:channel'
cdc628a4
PH
1460 _TESTS = [{
1461 'note': 'paginated channel',
1462 'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w',
1463 'playlist_mincount': 91,
acf757f4
PH
1464 'info_dict': {
1465 'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
1466 }
cdc628a4 1467 }]
c5e8d7af 1468
6de5dbaf
S
1469 @staticmethod
1470 def extract_videos_from_page(page):
c5e8d7af 1471 ids_in_page = []
fb69240c
S
1472 titles_in_page = []
1473 for mobj in re.finditer(r'(?:title="(?P<title>[^"]+)"[^>]+)?href="/watch\?v=(?P<id>[0-9A-Za-z_-]+)&?', page):
1474 video_id = mobj.group('id')
1475 video_title = unescapeHTML(mobj.group('title'))
1476 try:
1477 idx = ids_in_page.index(video_id)
1478 if video_title and not titles_in_page[idx]:
1479 titles_in_page[idx] = video_title
1480 except ValueError:
1481 ids_in_page.append(video_id)
1482 titles_in_page.append(video_title)
1483 return zip(ids_in_page, titles_in_page)
c5e8d7af
PH
1484
1485 def _real_extract(self, url):
9ff67727 1486 channel_id = self._match_id(url)
c5e8d7af 1487
eb0f3e7e 1488 url = self._TEMPLATE_URL % channel_id
386bdfa6
S
1489
1490 # Channel by page listing is restricted to 35 pages of 30 items, i.e. 1050 videos total (see #5778)
1491 # Workaround by extracting as a playlist if managed to obtain channel playlist URL
1492 # otherwise fallback on channel by page extraction
1493 channel_page = self._download_webpage(
1494 url + '?view=57', channel_id,
1495 'Downloading channel page', fatal=False)
3d8e9573
S
1496 channel_playlist_id = self._html_search_meta(
1497 'channelId', channel_page, 'channel id', default=None)
1498 if not channel_playlist_id:
1499 channel_playlist_id = self._search_regex(
1500 r'data-channel-external-id="([^"]+)"',
1501 channel_page, 'channel id', default=None)
386bdfa6
S
1502 if channel_playlist_id and channel_playlist_id.startswith('UC'):
1503 playlist_id = 'UU' + channel_playlist_id[2:]
d2a9de78
IK
1504 return self.url_result(
1505 compat_urlparse.urljoin(url, '/playlist?list=%s' % playlist_id), 'YoutubePlaylist')
386bdfa6 1506
60bf45c8 1507 channel_page = self._download_webpage(url, channel_id, 'Downloading page #1')
31812a9e
PH
1508 autogenerated = re.search(r'''(?x)
1509 class="[^"]*?(?:
1510 channel-header-autogenerated-label|
1511 yt-channel-title-autogenerated
1512 )[^"]*"''', channel_page) is not None
c5e8d7af 1513
b9643eed
JMF
1514 if autogenerated:
1515 # The videos are contained in a single page
1516 # the ajax pages can't be used, they are empty
b82f815f 1517 entries = [
fb69240c
S
1518 self.url_result(
1519 video_id, 'Youtube', video_id=video_id,
1520 video_title=video_title)
8f02ad4f 1521 for video_id, video_title in self.extract_videos_from_page(channel_page)]
b82f815f
PH
1522 return self.playlist_result(entries, channel_id)
1523
1524 def _entries():
23d3608c 1525 more_widget_html = content_html = channel_page
b9643eed 1526 for pagenum in itertools.count(1):
81c2f20b 1527
8f02ad4f 1528 for video_id, video_title in self.extract_videos_from_page(content_html):
b82f815f 1529 yield self.url_result(
fb69240c
S
1530 video_id, 'Youtube', video_id=video_id,
1531 video_title=video_title)
5f6a1245 1532
23d3608c
JMF
1533 mobj = re.search(
1534 r'data-uix-load-more-href="/?(?P<more>[^"]+)"',
1535 more_widget_html)
1536 if not mobj:
b9643eed 1537 break
c5e8d7af 1538
23d3608c
JMF
1539 more = self._download_json(
1540 'https://youtube.com/%s' % mobj.group('more'), channel_id,
1541 'Downloading page #%s' % (pagenum + 1),
1542 transform_source=uppercase_escape)
1543 content_html = more['content_html']
1544 more_widget_html = more['load_more_widget_html']
1545
b82f815f 1546 return self.playlist_result(_entries(), channel_id)
c5e8d7af
PH
1547
1548
eb0f3e7e 1549class YoutubeUserIE(YoutubeChannelIE):
78caa52a 1550 IE_DESC = 'YouTube.com user videos (URL or "ytuser" keyword)'
9ff67727 1551 _VALID_URL = r'(?:(?:(?:https?://)?(?:\w+\.)?youtube\.com/(?:user/)?(?!(?:attribution_link|watch|results)(?:$|[^a-z_A-Z0-9-])))|ytuser:)(?!feed/)(?P<id>[A-Za-z0-9_-]+)'
eb0f3e7e 1552 _TEMPLATE_URL = 'https://www.youtube.com/user/%s/videos'
78caa52a 1553 IE_NAME = 'youtube:user'
c5e8d7af 1554
cdc628a4
PH
1555 _TESTS = [{
1556 'url': 'https://www.youtube.com/user/TheLinuxFoundation',
1557 'playlist_mincount': 320,
1558 'info_dict': {
1559 'title': 'TheLinuxFoundation',
1560 }
1561 }, {
1562 'url': 'ytuser:phihag',
1563 'only_matching': True,
1564 }]
1565
e3ea4790 1566 @classmethod
f4b05232 1567 def suitable(cls, url):
e3ea4790
JMF
1568 # Don't return True if the url can be extracted with other youtube
1569 # extractor, the regex would is too permissive and it would match.
1570 other_ies = iter(klass for (name, klass) in globals().items() if name.endswith('IE') and klass is not cls)
5f6a1245
JW
1571 if any(ie.suitable(url) for ie in other_ies):
1572 return False
1573 else:
1574 return super(YoutubeUserIE, cls).suitable(url)
f4b05232 1575
b05654f0 1576
b4c08069 1577class YoutubeSearchIE(SearchInfoExtractor, YoutubePlaylistIE):
78caa52a 1578 IE_DESC = 'YouTube.com searches'
b4c08069
JMF
1579 # there doesn't appear to be a real limit, for example if you search for
1580 # 'python' you get more than 8.000.000 results
1581 _MAX_RESULTS = float('inf')
78caa52a 1582 IE_NAME = 'youtube:search'
b05654f0 1583 _SEARCH_KEY = 'ytsearch'
b4c08069 1584 _EXTRA_QUERY_ARGS = {}
9dd8e46a 1585 _TESTS = []
b05654f0 1586
b05654f0
PH
1587 def _get_n_results(self, query, n):
1588 """Get a specified number of results for a query"""
1589
b4c08069 1590 videos = []
b05654f0
PH
1591 limit = n
1592
b4c08069
JMF
1593 for pagenum in itertools.count(1):
1594 url_query = {
02175a79 1595 'search_query': query.encode('utf-8'),
b4c08069
JMF
1596 'page': pagenum,
1597 'spf': 'navigate',
1598 }
1599 url_query.update(self._EXTRA_QUERY_ARGS)
1600 result_url = 'https://www.youtube.com/results?' + compat_urllib_parse.urlencode(url_query)
1601 data = self._download_json(
69ea8ca4 1602 result_url, video_id='query "%s"' % query,
b4c08069 1603 note='Downloading page %s' % pagenum,
69ea8ca4 1604 errnote='Unable to download API page')
b4c08069 1605 html_content = data[1]['body']['content']
7cc3570e 1606
b4c08069 1607 if 'class="search-message' in html_content:
07ad22b8 1608 raise ExtractorError(
78caa52a 1609 '[youtube] No video results', expected=True)
b05654f0 1610
b4c08069
JMF
1611 new_videos = self._ids_to_results(orderedSet(re.findall(
1612 r'href="/watch\?v=(.{11})', html_content)))
1613 videos += new_videos
1614 if not new_videos or len(videos) > limit:
1615 break
b05654f0 1616
b4c08069
JMF
1617 if len(videos) > n:
1618 videos = videos[:n]
b05654f0 1619 return self.playlist_result(videos, query)
75dff0ee 1620
c9ae7b95 1621
a3dd9248 1622class YoutubeSearchDateIE(YoutubeSearchIE):
cb7fb546 1623 IE_NAME = YoutubeSearchIE.IE_NAME + ':date'
a3dd9248 1624 _SEARCH_KEY = 'ytsearchdate'
78caa52a 1625 IE_DESC = 'YouTube.com searches, newest videos first'
b4c08069 1626 _EXTRA_QUERY_ARGS = {'search_sort': 'video_date_uploaded'}
75dff0ee 1627
c9ae7b95
PH
1628
1629class YoutubeSearchURLIE(InfoExtractor):
78caa52a
PH
1630 IE_DESC = 'YouTube.com search URLs'
1631 IE_NAME = 'youtube:search_url'
c9ae7b95 1632 _VALID_URL = r'https?://(?:www\.)?youtube\.com/results\?(.*?&)?search_query=(?P<query>[^&]+)(?:[&]|$)'
cdc628a4
PH
1633 _TESTS = [{
1634 'url': 'https://www.youtube.com/results?baz=bar&search_query=youtube-dl+test+video&filters=video&lclk=video',
1635 'playlist_mincount': 5,
1636 'info_dict': {
1637 'title': 'youtube-dl test video',
1638 }
1639 }]
c9ae7b95
PH
1640
1641 def _real_extract(self, url):
1642 mobj = re.match(self._VALID_URL, url)
7fd002c0 1643 query = compat_urllib_parse_unquote_plus(mobj.group('query'))
c9ae7b95
PH
1644
1645 webpage = self._download_webpage(url, query)
1646 result_code = self._search_regex(
98998cde 1647 r'(?s)<ol[^>]+class="item-section"(.*?)</ol>', webpage, 'result HTML')
c9ae7b95
PH
1648
1649 part_codes = re.findall(
1650 r'(?s)<h3 class="yt-lockup-title">(.*?)</h3>', result_code)
1651 entries = []
1652 for part_code in part_codes:
1653 part_title = self._html_search_regex(
6feb2d5e 1654 [r'(?s)title="([^"]+)"', r'>([^<]+)</a>'], part_code, 'item title', fatal=False)
c9ae7b95
PH
1655 part_url_snippet = self._html_search_regex(
1656 r'(?s)href="([^"]+)"', part_code, 'item URL')
1657 part_url = compat_urlparse.urljoin(
1658 'https://www.youtube.com/', part_url_snippet)
1659 entries.append({
1660 '_type': 'url',
1661 'url': part_url,
1662 'title': part_title,
1663 })
1664
1665 return {
1666 '_type': 'playlist',
1667 'entries': entries,
1668 'title': query,
1669 }
1670
1671
75dff0ee 1672class YoutubeShowIE(InfoExtractor):
78caa52a 1673 IE_DESC = 'YouTube.com (multi-season) shows'
cdc628a4 1674 _VALID_URL = r'https?://www\.youtube\.com/show/(?P<id>[^?#]*)'
78caa52a 1675 IE_NAME = 'youtube:show'
cdc628a4
PH
1676 _TESTS = [{
1677 'url': 'http://www.youtube.com/show/airdisasters',
1678 'playlist_mincount': 3,
1679 'info_dict': {
1680 'id': 'airdisasters',
1681 'title': 'Air Disasters',
1682 }
1683 }]
75dff0ee
JMF
1684
1685 def _real_extract(self, url):
1686 mobj = re.match(self._VALID_URL, url)
cdc628a4
PH
1687 playlist_id = mobj.group('id')
1688 webpage = self._download_webpage(
1689 url, playlist_id, 'Downloading show webpage')
75dff0ee
JMF
1690 # There's one playlist for each season of the show
1691 m_seasons = list(re.finditer(r'href="(/playlist\?list=.*?)"', webpage))
cdc628a4
PH
1692 self.to_screen('%s: Found %s seasons' % (playlist_id, len(m_seasons)))
1693 entries = [
1694 self.url_result(
1695 'https://www.youtube.com' + season.group(1), 'YoutubePlaylist')
1696 for season in m_seasons
1697 ]
1698 title = self._og_search_title(webpage, fatal=False)
1699
1700 return {
1701 '_type': 'playlist',
1702 'id': playlist_id,
1703 'title': title,
1704 'entries': entries,
1705 }
04cc9617
JMF
1706
1707
b2e8bc1b 1708class YoutubeFeedsInfoExtractor(YoutubeBaseInfoExtractor):
d7ae0639 1709 """
25f14e9f 1710 Base class for feed extractors
d7ae0639
JMF
1711 Subclasses must define the _FEED_NAME and _PLAYLIST_TITLE properties.
1712 """
b2e8bc1b 1713 _LOGIN_REQUIRED = True
d7ae0639
JMF
1714
1715 @property
1716 def IE_NAME(self):
78caa52a 1717 return 'youtube:%s' % self._FEED_NAME
04cc9617 1718
81f0259b 1719 def _real_initialize(self):
b2e8bc1b 1720 self._login()
81f0259b 1721
04cc9617 1722 def _real_extract(self, url):
25f14e9f
S
1723 page = self._download_webpage(
1724 'https://www.youtube.com/feed/%s' % self._FEED_NAME, self._PLAYLIST_TITLE)
2bc43303
JMF
1725
1726 # The extraction process is the same as for playlists, but the regex
1727 # for the video ids doesn't contain an index
1728 ids = []
1729 more_widget_html = content_html = page
2bc43303
JMF
1730 for page_num in itertools.count(1):
1731 matches = re.findall(r'href="\s*/watch\?v=([0-9A-Za-z_-]{11})', content_html)
62c95fd5
S
1732
1733 # 'recommended' feed has infinite 'load more' and each new portion spins
1734 # the same videos in (sometimes) slightly different order, so we'll check
1735 # for unicity and break when portion has no new videos
1736 new_ids = filter(lambda video_id: video_id not in ids, orderedSet(matches))
1737 if not new_ids:
1738 break
1739
2bc43303
JMF
1740 ids.extend(new_ids)
1741
1742 mobj = re.search(r'data-uix-load-more-href="/?(?P<more>[^"]+)"', more_widget_html)
1743 if not mobj:
1744 break
1745
1746 more = self._download_json(
25f14e9f 1747 'https://youtube.com/%s' % mobj.group('more'), self._PLAYLIST_TITLE,
2bc43303
JMF
1748 'Downloading page #%s' % page_num,
1749 transform_source=uppercase_escape)
1750 content_html = more['content_html']
1751 more_widget_html = more['load_more_widget_html']
1752
25f14e9f
S
1753 return self.playlist_result(
1754 self._ids_to_results(ids), playlist_title=self._PLAYLIST_TITLE)
1755
1756
1757class YoutubeWatchLaterIE(YoutubePlaylistIE):
1758 IE_NAME = 'youtube:watchlater'
1759 IE_DESC = 'Youtube watch later list, ":ytwatchlater" for short (requires authentication)'
1760 _VALID_URL = r'https?://www\.youtube\.com/(?:feed/watch_later|playlist\?list=WL)|:ytwatchlater'
1761
1762 _TESTS = [] # override PlaylistIE tests
1763
1764 def _real_extract(self, url):
1765 return self._extract_playlist('WL')
f459d170 1766
5f6a1245 1767
c626a3d9 1768class YoutubeFavouritesIE(YoutubeBaseInfoExtractor):
78caa52a 1769 IE_NAME = 'youtube:favorites'
f3a34072 1770 IE_DESC = 'YouTube.com favourite videos, ":ytfav" for short (requires authentication)'
c7a7750d 1771 _VALID_URL = r'https?://www\.youtube\.com/my_favorites|:ytfav(?:ou?rites)?'
c626a3d9
JMF
1772 _LOGIN_REQUIRED = True
1773
1774 def _real_extract(self, url):
1775 webpage = self._download_webpage('https://www.youtube.com/my_favorites', 'Youtube Favourites videos')
78caa52a 1776 playlist_id = self._search_regex(r'list=(.+?)["&]', webpage, 'favourites playlist id')
c626a3d9 1777 return self.url_result(playlist_id, 'YoutubePlaylist')
15870e90
PH
1778
1779
25f14e9f
S
1780class YoutubeRecommendedIE(YoutubeFeedsInfoExtractor):
1781 IE_DESC = 'YouTube.com recommended videos, ":ytrec" for short (requires authentication)'
1782 _VALID_URL = r'https?://www\.youtube\.com/feed/recommended|:ytrec(?:ommended)?'
1783 _FEED_NAME = 'recommended'
1784 _PLAYLIST_TITLE = 'Youtube Recommended videos'
1ed5b5c9 1785
1ed5b5c9 1786
25f14e9f
S
1787class YoutubeSubscriptionsIE(YoutubeFeedsInfoExtractor):
1788 IE_DESC = 'YouTube.com subscriptions feed, "ytsubs" keyword (requires authentication)'
1789 _VALID_URL = r'https?://www\.youtube\.com/feed/subscriptions|:ytsubs(?:criptions)?'
1790 _FEED_NAME = 'subscriptions'
1791 _PLAYLIST_TITLE = 'Youtube Subscriptions'
1ed5b5c9 1792
1ed5b5c9 1793
25f14e9f
S
1794class YoutubeHistoryIE(YoutubeFeedsInfoExtractor):
1795 IE_DESC = 'Youtube watch history, ":ythistory" for short (requires authentication)'
1796 _VALID_URL = 'https?://www\.youtube\.com/feed/history|:ythistory'
1797 _FEED_NAME = 'history'
1798 _PLAYLIST_TITLE = 'Youtube History'
1ed5b5c9
JMF
1799
1800
15870e90
PH
1801class YoutubeTruncatedURLIE(InfoExtractor):
1802 IE_NAME = 'youtube:truncated_url'
1803 IE_DESC = False # Do not list
975d35db 1804 _VALID_URL = r'''(?x)
b95aab84
PH
1805 (?:https?://)?
1806 (?:\w+\.)?[yY][oO][uU][tT][uU][bB][eE](?:-nocookie)?\.com/
1807 (?:watch\?(?:
c4808c60 1808 feature=[a-z_]+|
b95aab84
PH
1809 annotation_id=annotation_[^&]+|
1810 x-yt-cl=[0-9]+|
c1708b89 1811 hl=[^&]*|
b95aab84
PH
1812 )?
1813 |
1814 attribution_link\?a=[^&]+
1815 )
1816 $
975d35db 1817 '''
15870e90 1818
c4808c60
PH
1819 _TESTS = [{
1820 'url': 'http://www.youtube.com/watch?annotation_id=annotation_3951667041',
1821 'only_matching': True,
dc2fc736
PH
1822 }, {
1823 'url': 'http://www.youtube.com/watch?',
1824 'only_matching': True,
b95aab84
PH
1825 }, {
1826 'url': 'https://www.youtube.com/watch?x-yt-cl=84503534',
1827 'only_matching': True,
1828 }, {
1829 'url': 'https://www.youtube.com/watch?feature=foo',
1830 'only_matching': True,
c1708b89
PH
1831 }, {
1832 'url': 'https://www.youtube.com/watch?hl=en-GB',
1833 'only_matching': True,
c4808c60
PH
1834 }]
1835
15870e90
PH
1836 def _real_extract(self, url):
1837 raise ExtractorError(
78caa52a
PH
1838 'Did you forget to quote the URL? Remember that & is a meta '
1839 'character in most shells, so you want to put the URL in quotes, '
1840 'like youtube-dl '
1841 '"http://www.youtube.com/watch?feature=foo&v=BaW_jenozKc" '
1842 ' or simply youtube-dl BaW_jenozKc .',
15870e90 1843 expected=True)
772fd5cc
PH
1844
1845
1846class YoutubeTruncatedIDIE(InfoExtractor):
1847 IE_NAME = 'youtube:truncated_id'
1848 IE_DESC = False # Do not list
b95aab84 1849 _VALID_URL = r'https?://(?:www\.)?youtube\.com/watch\?v=(?P<id>[0-9A-Za-z_-]{1,10})$'
772fd5cc
PH
1850
1851 _TESTS = [{
1852 'url': 'https://www.youtube.com/watch?v=N_708QY7Ob',
1853 'only_matching': True,
1854 }]
1855
1856 def _real_extract(self, url):
1857 video_id = self._match_id(url)
1858 raise ExtractorError(
1859 'Incomplete YouTube ID %s. URL %s looks truncated.' % (video_id, url),
1860 expected=True)