]> jfr.im git - yt-dlp.git/blame - youtube_dl/extractor/common.py
[soundcloud/generic] Add support for playlists
[yt-dlp.git] / youtube_dl / extractor / common.py
CommitLineData
d6983cb4 1import base64
3ec05685 2import hashlib
3d3538e4 3import json
d6983cb4
PH
4import os
5import re
6import socket
7import sys
fc79158d 8import netrc
267ed0c5 9import xml.etree.ElementTree
d6983cb4
PH
10
11from ..utils import (
12 compat_http_client,
13 compat_urllib_error,
c7deaa4c 14 compat_urllib_parse_urlparse,
d6983cb4
PH
15 compat_str,
16
17 clean_html,
18 compiled_regex_type,
19 ExtractorError,
55b3e45b 20 RegexNotFoundError,
d41e6efc 21 sanitize_filename,
f38de77f 22 unescapeHTML,
d6983cb4 23)
46374a56 24_NO_DEFAULT = object()
d6983cb4 25
dca08720 26
d6983cb4
PH
27class InfoExtractor(object):
28 """Information Extractor class.
29
30 Information extractors are the classes that, given a URL, extract
31 information about the video (or videos) the URL refers to. This
32 information includes the real video URL, the video title, author and
33 others. The information is stored in a dictionary which is then
34 passed to the FileDownloader. The FileDownloader processes this
35 information possibly downloading the video to the file system, among
36 other possible outcomes.
37
38 The dictionaries must include the following fields:
39
40 id: Video identifier.
d6983cb4 41 title: Video title, unescaped.
d67b0b15 42
f49d89ee 43 Additionally, it must contain either a formats entry or a url one:
d67b0b15 44
f49d89ee
PH
45 formats: A list of dictionaries for each format available, ordered
46 from worst to best quality.
47
48 Potential fields:
d67b0b15
PH
49 * url Mandatory. The URL of the video file
50 * ext Will be calculated from url if missing
51 * format A human-readable description of the format
52 ("mp4 container with h264/opus").
53 Calculated from the format_id, width, height.
54 and format_note fields if missing.
55 * format_id A short description of the format
5d4f3985
PH
56 ("mp4_h264_opus" or "19").
57 Technically optional, but strongly recommended.
d67b0b15
PH
58 * format_note Additional info about the format
59 ("3D" or "DASH video")
60 * width Width of the video, if known
61 * height Height of the video, if known
f49d89ee 62 * resolution Textual description of width and height
7217e148 63 * tbr Average bitrate of audio and video in KBit/s
d67b0b15
PH
64 * abr Average audio bitrate in KBit/s
65 * acodec Name of the audio codec in use
dd27fd17 66 * asr Audio sampling rate in Hertz
d67b0b15
PH
67 * vbr Average video bitrate in KBit/s
68 * vcodec Name of the video codec in use
1394ce65 69 * container Name of the container format
d67b0b15
PH
70 * filesize The number of bytes, if known in advance
71 * player_url SWF Player URL (used for rtmpdump).
c7deaa4c
PH
72 * protocol The protocol that will be used for the actual
73 download, lower-case.
db1f3888 74 "http", "https", "rtsp", "rtmp", "m3u8" or so.
f49d89ee 75 * preference Order number of this format. If this field is
08d13955 76 present and not None, the formats get sorted
38d63d84 77 by this field, regardless of all other values.
f49d89ee
PH
78 -1 for default (order by other properties),
79 -2 or smaller for less than default.
5d73273f
PH
80 * quality Order number of the video quality of this
81 format, irrespective of the file format.
82 -1 for default (order by other properties),
83 -2 or smaller for less than default.
c0ba0f48 84 url: Final video URL.
d6983cb4 85 ext: Video filename extension.
d67b0b15
PH
86 format: The video format, defaults to ext (used for --get-format)
87 player_url: SWF Player URL (used for rtmpdump).
2f5865cc 88
d6983cb4
PH
89 The following fields are optional:
90
0afef30b
PH
91 display_id An alternative identifier for the video, not necessarily
92 unique, but available before title. Typically, id is
93 something like "4234987", title "Dancing naked mole rats",
94 and display_id "dancing-naked-mole-rats"
73e79f2a
PH
95 thumbnails: A list of dictionaries (with the entries "resolution" and
96 "url") for the varying thumbnails
d6983cb4
PH
97 thumbnail: Full URL to a video thumbnail image.
98 description: One-line video description.
99 uploader: Full name of the video uploader.
955c4514 100 timestamp: UNIX timestamp of the moment the video became available.
d6983cb4 101 upload_date: Video upload date (YYYYMMDD).
955c4514 102 If not explicitly set, calculated from timestamp.
d6983cb4
PH
103 uploader_id: Nickname or id of the video uploader.
104 location: Physical location of the video.
5d51a883
JMF
105 subtitles: The subtitle file contents as a dictionary in the format
106 {language: subtitles}.
c0ba0f48 107 duration: Length of the video in seconds, as an integer.
f3d29461 108 view_count: How many users have watched the video on the platform.
19e3dfc9
PH
109 like_count: Number of positive ratings of the video
110 dislike_count: Number of negative ratings of the video
111 comment_count: Number of comments on the video
8dbe9899 112 age_limit: Age restriction for the video, as an integer (years)
9103bbc5
JMF
113 webpage_url: The url to the video webpage, if given to youtube-dl it
114 should allow to get the same result again. (It will be set
115 by YoutubeDL if it's missing)
d6983cb4 116
deefc05b 117 Unless mentioned otherwise, the fields should be Unicode strings.
d6983cb4
PH
118
119 Subclasses of this one should re-define the _real_initialize() and
120 _real_extract() methods and define a _VALID_URL regexp.
121 Probably, they should also be added to the list of extractors.
122
d6983cb4
PH
123 Finally, the _WORKING attribute should be set to False for broken IEs
124 in order to warn the users and skip the tests.
125 """
126
127 _ready = False
128 _downloader = None
129 _WORKING = True
130
131 def __init__(self, downloader=None):
132 """Constructor. Receives an optional downloader."""
133 self._ready = False
134 self.set_downloader(downloader)
135
136 @classmethod
137 def suitable(cls, url):
138 """Receives a URL and returns True if suitable for this IE."""
79cb2577
PH
139
140 # This does not use has/getattr intentionally - we want to know whether
141 # we have cached the regexp for *this* class, whereas getattr would also
142 # match the superclass
143 if '_VALID_URL_RE' not in cls.__dict__:
144 cls._VALID_URL_RE = re.compile(cls._VALID_URL)
145 return cls._VALID_URL_RE.match(url) is not None
d6983cb4
PH
146
147 @classmethod
148 def working(cls):
149 """Getter method for _WORKING."""
150 return cls._WORKING
151
152 def initialize(self):
153 """Initializes an instance (authentication, etc)."""
154 if not self._ready:
155 self._real_initialize()
156 self._ready = True
157
158 def extract(self, url):
159 """Extracts URL information and returns it in list of dicts."""
160 self.initialize()
161 return self._real_extract(url)
162
163 def set_downloader(self, downloader):
164 """Sets the downloader for this IE."""
165 self._downloader = downloader
166
167 def _real_initialize(self):
168 """Real initialization process. Redefine in subclasses."""
169 pass
170
171 def _real_extract(self, url):
172 """Real extraction process. Redefine in subclasses."""
173 pass
174
56c73665
JMF
175 @classmethod
176 def ie_key(cls):
177 """A string for getting the InfoExtractor with get_info_extractor"""
178 return cls.__name__[:-2]
179
d6983cb4
PH
180 @property
181 def IE_NAME(self):
182 return type(self).__name__[:-2]
183
7cc3570e 184 def _request_webpage(self, url_or_request, video_id, note=None, errnote=None, fatal=True):
d6983cb4
PH
185 """ Returns the response handle """
186 if note is None:
187 self.report_download_webpage(video_id)
188 elif note is not False:
7cc3570e
PH
189 if video_id is None:
190 self.to_screen(u'%s' % (note,))
191 else:
192 self.to_screen(u'%s: %s' % (video_id, note))
d6983cb4 193 try:
dca08720 194 return self._downloader.urlopen(url_or_request)
d6983cb4 195 except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
aa94a6d3
PH
196 if errnote is False:
197 return False
d6983cb4
PH
198 if errnote is None:
199 errnote = u'Unable to download webpage'
7cc3570e
PH
200 errmsg = u'%s: %s' % (errnote, compat_str(err))
201 if fatal:
202 raise ExtractorError(errmsg, sys.exc_info()[2], cause=err)
203 else:
204 self._downloader.report_warning(errmsg)
205 return False
d6983cb4 206
7cc3570e 207 def _download_webpage_handle(self, url_or_request, video_id, note=None, errnote=None, fatal=True):
d6983cb4 208 """ Returns a tuple (page content as string, URL handle) """
b9d3e163
PH
209
210 # Strip hashes from the URL (#1038)
211 if isinstance(url_or_request, (compat_str, str)):
212 url_or_request = url_or_request.partition('#')[0]
213
7cc3570e
PH
214 urlh = self._request_webpage(url_or_request, video_id, note, errnote, fatal)
215 if urlh is False:
216 assert not fatal
217 return False
d6983cb4 218 content_type = urlh.headers.get('Content-Type', '')
f143d86a 219 webpage_bytes = urlh.read()
d6983cb4
PH
220 m = re.match(r'[a-zA-Z0-9_.-]+/[a-zA-Z0-9_.-]+\s*;\s*charset=(.+)', content_type)
221 if m:
222 encoding = m.group(1)
223 else:
0d75ae2c 224 m = re.search(br'<meta[^>]+charset=[\'"]?([^\'")]+)[ /\'">]',
f143d86a
PH
225 webpage_bytes[:1024])
226 if m:
227 encoding = m.group(1).decode('ascii')
b60016e8
PH
228 elif webpage_bytes.startswith(b'\xff\xfe'):
229 encoding = 'utf-16'
f143d86a
PH
230 else:
231 encoding = 'utf-8'
d6983cb4
PH
232 if self._downloader.params.get('dump_intermediate_pages', False):
233 try:
234 url = url_or_request.get_full_url()
235 except AttributeError:
236 url = url_or_request
237 self.to_screen(u'Dumping request to ' + url)
238 dump = base64.b64encode(webpage_bytes).decode('ascii')
239 self._downloader.to_screen(dump)
d41e6efc
PH
240 if self._downloader.params.get('write_pages', False):
241 try:
242 url = url_or_request.get_full_url()
243 except AttributeError:
244 url = url_or_request
3ec05685 245 if len(url) > 200:
944d65c7 246 h = u'___' + hashlib.md5(url.encode('utf-8')).hexdigest()
3ec05685 247 url = url[:200 - len(h)] + h
d41e6efc
PH
248 raw_filename = ('%s_%s.dump' % (video_id, url))
249 filename = sanitize_filename(raw_filename, restricted=True)
250 self.to_screen(u'Saving request to ' + filename)
251 with open(filename, 'wb') as outf:
252 outf.write(webpage_bytes)
253
ec0fafbb
AA
254 try:
255 content = webpage_bytes.decode(encoding, 'replace')
256 except LookupError:
257 content = webpage_bytes.decode('utf-8', 'replace')
2410c43d
PH
258
259 if (u'<title>Access to this site is blocked</title>' in content and
260 u'Websense' in content[:512]):
b6cfde99 261 msg = u'Access to this webpage has been blocked by Websense filtering software in your network.'
2410c43d
PH
262 blocked_iframe = self._html_search_regex(
263 r'<iframe src="([^"]+)"', content,
264 u'Websense information URL', default=None)
265 if blocked_iframe:
266 msg += u' Visit %s for more details' % blocked_iframe
267 raise ExtractorError(msg, expected=True)
268
d6983cb4
PH
269 return (content, urlh)
270
7cc3570e 271 def _download_webpage(self, url_or_request, video_id, note=None, errnote=None, fatal=True):
d6983cb4 272 """ Returns the data of the page as a string """
7cc3570e
PH
273 res = self._download_webpage_handle(url_or_request, video_id, note, errnote, fatal)
274 if res is False:
275 return res
276 else:
277 content, _ = res
278 return content
d6983cb4 279
2a275ab0 280 def _download_xml(self, url_or_request, video_id,
e2b38da9 281 note=u'Downloading XML', errnote=u'Unable to download XML',
28746fbd 282 transform_source=None, fatal=True):
267ed0c5 283 """Return the xml as an xml.etree.ElementTree.Element"""
28746fbd
PH
284 xml_string = self._download_webpage(
285 url_or_request, video_id, note, errnote, fatal=fatal)
286 if xml_string is False:
287 return xml_string
e2b38da9
PH
288 if transform_source:
289 xml_string = transform_source(xml_string)
267ed0c5
JMF
290 return xml.etree.ElementTree.fromstring(xml_string.encode('utf-8'))
291
3d3538e4
PH
292 def _download_json(self, url_or_request, video_id,
293 note=u'Downloading JSON metadata',
81c2f20b
PH
294 errnote=u'Unable to download JSON metadata',
295 transform_source=None):
3d3538e4 296 json_string = self._download_webpage(url_or_request, video_id, note, errnote)
81c2f20b
PH
297 if transform_source:
298 json_string = transform_source(json_string)
3d3538e4
PH
299 try:
300 return json.loads(json_string)
301 except ValueError as ve:
302 raise ExtractorError('Failed to download JSON', cause=ve)
303
f45f96f8
PH
304 def report_warning(self, msg, video_id=None):
305 idstr = u'' if video_id is None else u'%s: ' % video_id
306 self._downloader.report_warning(
307 u'[%s] %s%s' % (self.IE_NAME, idstr, msg))
308
d6983cb4
PH
309 def to_screen(self, msg):
310 """Print msg to screen, prefixing it with '[ie_name]'"""
311 self._downloader.to_screen(u'[%s] %s' % (self.IE_NAME, msg))
312
313 def report_extraction(self, id_or_name):
314 """Report information extraction."""
315 self.to_screen(u'%s: Extracting information' % id_or_name)
316
317 def report_download_webpage(self, video_id):
318 """Report webpage download."""
319 self.to_screen(u'%s: Downloading webpage' % video_id)
320
321 def report_age_confirmation(self):
322 """Report attempt to confirm age."""
323 self.to_screen(u'Confirming age')
324
fc79158d
JMF
325 def report_login(self):
326 """Report attempt to log in."""
327 self.to_screen(u'Logging in')
328
d6983cb4 329 #Methods for following #608
c0d0b01f
JMF
330 @staticmethod
331 def url_result(url, ie=None, video_id=None):
d6983cb4
PH
332 """Returns a url that points to a page that should be processed"""
333 #TODO: ie should be the class used for getting the info
334 video_info = {'_type': 'url',
335 'url': url,
336 'ie_key': ie}
7012b23c
PH
337 if video_id is not None:
338 video_info['id'] = video_id
d6983cb4 339 return video_info
c0d0b01f
JMF
340 @staticmethod
341 def playlist_result(entries, playlist_id=None, playlist_title=None):
d6983cb4
PH
342 """Returns a playlist"""
343 video_info = {'_type': 'playlist',
344 'entries': entries}
345 if playlist_id:
346 video_info['id'] = playlist_id
347 if playlist_title:
348 video_info['title'] = playlist_title
349 return video_info
350
46374a56 351 def _search_regex(self, pattern, string, name, default=_NO_DEFAULT, fatal=True, flags=0):
d6983cb4
PH
352 """
353 Perform a regex search on the given string, using a single or a list of
354 patterns returning the first matching group.
355 In case of failure return a default value or raise a WARNING or a
55b3e45b 356 RegexNotFoundError, depending on fatal, specifying the field name.
d6983cb4
PH
357 """
358 if isinstance(pattern, (str, compat_str, compiled_regex_type)):
359 mobj = re.search(pattern, string, flags)
360 else:
361 for p in pattern:
362 mobj = re.search(p, string, flags)
363 if mobj: break
364
87a28127 365 if os.name != 'nt' and sys.stderr.isatty():
d6983cb4
PH
366 _name = u'\033[0;34m%s\033[0m' % name
367 else:
368 _name = name
369
370 if mobj:
371 # return the first matching group
372 return next(g for g in mobj.groups() if g is not None)
46374a56 373 elif default is not _NO_DEFAULT:
d6983cb4
PH
374 return default
375 elif fatal:
55b3e45b 376 raise RegexNotFoundError(u'Unable to extract %s' % _name)
d6983cb4
PH
377 else:
378 self._downloader.report_warning(u'unable to extract %s; '
98bcd283 379 u'please report this issue on http://yt-dl.org/bug' % _name)
d6983cb4
PH
380 return None
381
46374a56 382 def _html_search_regex(self, pattern, string, name, default=_NO_DEFAULT, fatal=True, flags=0):
d6983cb4
PH
383 """
384 Like _search_regex, but strips HTML tags and unescapes entities.
385 """
386 res = self._search_regex(pattern, string, name, default, fatal, flags)
387 if res:
388 return clean_html(res).strip()
389 else:
390 return res
391
fc79158d
JMF
392 def _get_login_info(self):
393 """
394 Get the the login info as (username, password)
395 It will look in the netrc file using the _NETRC_MACHINE value
396 If there's no info available, return (None, None)
397 """
398 if self._downloader is None:
399 return (None, None)
400
401 username = None
402 password = None
403 downloader_params = self._downloader.params
404
405 # Attempt to use provided username and password or .netrc data
406 if downloader_params.get('username', None) is not None:
407 username = downloader_params['username']
408 password = downloader_params['password']
409 elif downloader_params.get('usenetrc', False):
410 try:
411 info = netrc.netrc().authenticators(self._NETRC_MACHINE)
412 if info is not None:
413 username = info[0]
414 password = info[2]
415 else:
416 raise netrc.NetrcParseError('No authenticators for %s' % self._NETRC_MACHINE)
417 except (IOError, netrc.NetrcParseError) as err:
418 self._downloader.report_warning(u'parsing .netrc: %s' % compat_str(err))
419
420 return (username, password)
421
46720279
JMF
422 # Helper functions for extracting OpenGraph info
423 @staticmethod
ab2d5247 424 def _og_regexes(prop):
c1206423 425 content_re = r'content=(?:"([^>]+?)"|\'([^>]+?)\')'
9887c9b2 426 property_re = r'(?:name|property)=[\'"]og:%s[\'"]' % re.escape(prop)
78fb87b2 427 template = r'<meta[^>]+?%s[^>]+?%s'
ab2d5247 428 return [
78fb87b2
JMF
429 template % (property_re, content_re),
430 template % (content_re, property_re),
ab2d5247 431 ]
46720279 432
3c4e6d83 433 def _og_search_property(self, prop, html, name=None, **kargs):
46720279 434 if name is None:
3c4e6d83 435 name = 'OpenGraph %s' % prop
ab2d5247 436 escaped = self._search_regex(self._og_regexes(prop), html, name, flags=re.DOTALL, **kargs)
eb0a8398
PH
437 if escaped is None:
438 return None
439 return unescapeHTML(escaped)
46720279
JMF
440
441 def _og_search_thumbnail(self, html, **kargs):
3c4e6d83 442 return self._og_search_property('image', html, u'thumbnail url', fatal=False, **kargs)
46720279
JMF
443
444 def _og_search_description(self, html, **kargs):
445 return self._og_search_property('description', html, fatal=False, **kargs)
446
447 def _og_search_title(self, html, **kargs):
448 return self._og_search_property('title', html, **kargs)
449
8ffa13e0 450 def _og_search_video_url(self, html, name='video url', secure=True, **kargs):
ab2d5247
JMF
451 regexes = self._og_regexes('video')
452 if secure: regexes = self._og_regexes('video:secure_url') + regexes
8ffa13e0 453 return self._html_search_regex(regexes, html, name, **kargs)
46720279 454
9f62eaf4 455 def _html_search_meta(self, name, html, display_name=None, fatal=False):
59040888
PH
456 if display_name is None:
457 display_name = name
458 return self._html_search_regex(
aaebed13
PH
459 r'''(?ix)<meta
460 (?=[^>]+(?:itemprop|name|property)=["\']%s["\'])
59040888 461 [^>]+content=["\']([^"\']+)["\']''' % re.escape(name),
9f62eaf4 462 html, display_name, fatal=fatal)
59040888
PH
463
464 def _dc_search_uploader(self, html):
465 return self._html_search_meta('dc.creator', html, 'uploader')
466
8dbe9899
PH
467 def _rta_search(self, html):
468 # See http://www.rtalabel.org/index.php?content=howtofaq#single
469 if re.search(r'(?ix)<meta\s+name="rating"\s+'
470 r' content="RTA-5042-1996-1400-1577-RTA"',
471 html):
472 return 18
473 return 0
474
59040888
PH
475 def _media_rating_search(self, html):
476 # See http://www.tjg-designs.com/WP/metadata-code-examples-adding-metadata-to-your-web-pages/
477 rating = self._html_search_meta('rating', html)
478
479 if not rating:
480 return None
481
482 RATING_TABLE = {
483 'safe for kids': 0,
484 'general': 8,
485 '14 years': 14,
486 'mature': 17,
487 'restricted': 19,
488 }
489 return RATING_TABLE.get(rating.lower(), None)
490
0c708f11
JMF
491 def _twitter_search_player(self, html):
492 return self._html_search_meta('twitter:player', html,
493 'twitter card player')
494
4bcc7bd1 495 def _sort_formats(self, formats):
7e8caf30
PH
496 if not formats:
497 raise ExtractorError(u'No video formats found')
498
4bcc7bd1 499 def _formats_key(f):
e6812ac9
PH
500 # TODO remove the following workaround
501 from ..utils import determine_ext
502 if not f.get('ext') and 'url' in f:
503 f['ext'] = determine_ext(f['url'])
504
4bcc7bd1
PH
505 preference = f.get('preference')
506 if preference is None:
c7deaa4c
PH
507 proto = f.get('protocol')
508 if proto is None:
509 proto = compat_urllib_parse_urlparse(f.get('url', '')).scheme
510
511 preference = 0 if proto in ['http', 'https'] else -0.1
4bcc7bd1
PH
512 if f.get('ext') in ['f4f', 'f4m']: # Not yet supported
513 preference -= 0.5
514
515 if f.get('vcodec') == 'none': # audio only
516 if self._downloader.params.get('prefer_free_formats'):
517 ORDER = [u'aac', u'mp3', u'm4a', u'webm', u'ogg', u'opus']
518 else:
519 ORDER = [u'webm', u'opus', u'ogg', u'mp3', u'aac', u'm4a']
520 ext_preference = 0
521 try:
522 audio_ext_preference = ORDER.index(f['ext'])
523 except ValueError:
524 audio_ext_preference = -1
525 else:
526 if self._downloader.params.get('prefer_free_formats'):
527 ORDER = [u'flv', u'mp4', u'webm']
528 else:
529 ORDER = [u'webm', u'flv', u'mp4']
530 try:
531 ext_preference = ORDER.index(f['ext'])
532 except ValueError:
533 ext_preference = -1
534 audio_ext_preference = 0
535
536 return (
537 preference,
5d73273f 538 f.get('quality') if f.get('quality') is not None else -1,
4bcc7bd1
PH
539 f.get('height') if f.get('height') is not None else -1,
540 f.get('width') if f.get('width') is not None else -1,
541 ext_preference,
9933b574 542 f.get('tbr') if f.get('tbr') is not None else -1,
4bcc7bd1
PH
543 f.get('vbr') if f.get('vbr') is not None else -1,
544 f.get('abr') if f.get('abr') is not None else -1,
545 audio_ext_preference,
546 f.get('filesize') if f.get('filesize') is not None else -1,
547 f.get('format_id'),
548 )
549 formats.sort(key=_formats_key)
59040888 550
20991253
PH
551 def http_scheme(self):
552 """ Either "https:" or "https:", depending on the user's preferences """
553 return (
554 'http:'
555 if self._downloader.params.get('prefer_insecure', False)
556 else 'https:')
557
8dbe9899 558
d6983cb4
PH
559class SearchInfoExtractor(InfoExtractor):
560 """
561 Base class for paged search queries extractors.
562 They accept urls in the format _SEARCH_KEY(|all|[0-9]):{query}
563 Instances should define _SEARCH_KEY and _MAX_RESULTS.
564 """
565
566 @classmethod
567 def _make_valid_url(cls):
568 return r'%s(?P<prefix>|[1-9][0-9]*|all):(?P<query>[\s\S]+)' % cls._SEARCH_KEY
569
570 @classmethod
571 def suitable(cls, url):
572 return re.match(cls._make_valid_url(), url) is not None
573
574 def _real_extract(self, query):
575 mobj = re.match(self._make_valid_url(), query)
576 if mobj is None:
577 raise ExtractorError(u'Invalid search query "%s"' % query)
578
579 prefix = mobj.group('prefix')
580 query = mobj.group('query')
581 if prefix == '':
582 return self._get_n_results(query, 1)
583 elif prefix == 'all':
584 return self._get_n_results(query, self._MAX_RESULTS)
585 else:
586 n = int(prefix)
587 if n <= 0:
588 raise ExtractorError(u'invalid download number %s for query "%s"' % (n, query))
589 elif n > self._MAX_RESULTS:
590 self._downloader.report_warning(u'%s returns max %i results (you requested %i)' % (self._SEARCH_KEY, self._MAX_RESULTS, n))
591 n = self._MAX_RESULTS
592 return self._get_n_results(query, n)
593
594 def _get_n_results(self, query, n):
595 """Get a specified number of results for a query"""
416a5efc 596 raise NotImplementedError("This method must be implemented by subclasses")
0f818663
PH
597
598 @property
599 def SEARCH_KEY(self):
600 return self._SEARCH_KEY
20991253 601