]> jfr.im git - yt-dlp.git/blob - youtube_dl/YoutubeDL.py
Merge pull request #1438 from rzhxeo/fktv
[yt-dlp.git] / youtube_dl / YoutubeDL.py
1 #!/usr/bin/env python
2 # -*- coding: utf-8 -*-
3
4 from __future__ import absolute_import
5
6 import io
7 import os
8 import re
9 import shutil
10 import socket
11 import sys
12 import time
13 import traceback
14
15 from .utils import *
16 from .extractor import get_info_extractor, gen_extractors
17 from .FileDownloader import FileDownloader
18
19
20 class YoutubeDL(object):
21 """YoutubeDL class.
22
23 YoutubeDL objects are the ones responsible of downloading the
24 actual video file and writing it to disk if the user has requested
25 it, among some other tasks. In most cases there should be one per
26 program. As, given a video URL, the downloader doesn't know how to
27 extract all the needed information, task that InfoExtractors do, it
28 has to pass the URL to one of them.
29
30 For this, YoutubeDL objects have a method that allows
31 InfoExtractors to be registered in a given order. When it is passed
32 a URL, the YoutubeDL object handles it to the first InfoExtractor it
33 finds that reports being able to handle it. The InfoExtractor extracts
34 all the information about the video or videos the URL refers to, and
35 YoutubeDL process the extracted information, possibly using a File
36 Downloader to download the video.
37
38 YoutubeDL objects accept a lot of parameters. In order not to saturate
39 the object constructor with arguments, it receives a dictionary of
40 options instead. These options are available through the params
41 attribute for the InfoExtractors to use. The YoutubeDL also
42 registers itself as the downloader in charge for the InfoExtractors
43 that are added to it, so this is a "mutual registration".
44
45 Available options:
46
47 username: Username for authentication purposes.
48 password: Password for authentication purposes.
49 videopassword: Password for acces a video.
50 usenetrc: Use netrc for authentication instead.
51 verbose: Print additional info to stdout.
52 quiet: Do not print messages to stdout.
53 forceurl: Force printing final URL.
54 forcetitle: Force printing title.
55 forceid: Force printing ID.
56 forcethumbnail: Force printing thumbnail URL.
57 forcedescription: Force printing description.
58 forcefilename: Force printing final filename.
59 simulate: Do not download the video files.
60 format: Video format code.
61 format_limit: Highest quality format to try.
62 outtmpl: Template for output names.
63 restrictfilenames: Do not allow "&" and spaces in file names
64 ignoreerrors: Do not stop on download errors.
65 nooverwrites: Prevent overwriting files.
66 playliststart: Playlist item to start at.
67 playlistend: Playlist item to end at.
68 matchtitle: Download only matching titles.
69 rejecttitle: Reject downloads for matching titles.
70 logtostderr: Log messages to stderr instead of stdout.
71 writedescription: Write the video description to a .description file
72 writeinfojson: Write the video description to a .info.json file
73 writethumbnail: Write the thumbnail image to a file
74 writesubtitles: Write the video subtitles to a file
75 writeautomaticsub: Write the automatic subtitles to a file
76 allsubtitles: Downloads all the subtitles of the video
77 (requires writesubtitles or writeautomaticsub)
78 listsubtitles: Lists all available subtitles for the video
79 subtitlesformat: Subtitle format [srt/sbv/vtt] (default=srt)
80 subtitleslangs: List of languages of the subtitles to download
81 keepvideo: Keep the video file after post-processing
82 daterange: A DateRange object, download only if the upload_date is in the range.
83 skip_download: Skip the actual download of the video file
84
85 The following parameters are not used by YoutubeDL itself, they are used by
86 the FileDownloader:
87 nopart, updatetime, buffersize, ratelimit, min_filesize, max_filesize, test,
88 noresizebuffer, retries, continuedl, noprogress, consoletitle
89 """
90
91 params = None
92 _ies = []
93 _pps = []
94 _download_retcode = None
95 _num_downloads = None
96 _screen_file = None
97
98 def __init__(self, params):
99 """Create a FileDownloader object with the given options."""
100 self._ies = []
101 self._ies_instances = {}
102 self._pps = []
103 self._progress_hooks = []
104 self._download_retcode = 0
105 self._num_downloads = 0
106 self._screen_file = [sys.stdout, sys.stderr][params.get('logtostderr', False)]
107 self.params = params
108 self.fd = FileDownloader(self, self.params)
109
110 if '%(stitle)s' in self.params['outtmpl']:
111 self.report_warning(u'%(stitle)s is deprecated. Use the %(title)s and the --restrict-filenames flag(which also secures %(uploader)s et al) instead.')
112
113 def add_info_extractor(self, ie):
114 """Add an InfoExtractor object to the end of the list."""
115 self._ies.append(ie)
116 self._ies_instances[ie.ie_key()] = ie
117 ie.set_downloader(self)
118
119 def get_info_extractor(self, ie_key):
120 """
121 Get an instance of an IE with name ie_key, it will try to get one from
122 the _ies list, if there's no instance it will create a new one and add
123 it to the extractor list.
124 """
125 ie = self._ies_instances.get(ie_key)
126 if ie is None:
127 ie = get_info_extractor(ie_key)()
128 self.add_info_extractor(ie)
129 return ie
130
131 def add_default_info_extractors(self):
132 """
133 Add the InfoExtractors returned by gen_extractors to the end of the list
134 """
135 for ie in gen_extractors():
136 self.add_info_extractor(ie)
137
138 def add_post_processor(self, pp):
139 """Add a PostProcessor object to the end of the chain."""
140 self._pps.append(pp)
141 pp.set_downloader(self)
142
143 def to_screen(self, message, skip_eol=False):
144 """Print message to stdout if not in quiet mode."""
145 if not self.params.get('quiet', False):
146 terminator = [u'\n', u''][skip_eol]
147 output = message + terminator
148 write_string(output, self._screen_file)
149
150 def to_stderr(self, message):
151 """Print message to stderr."""
152 assert type(message) == type(u'')
153 output = message + u'\n'
154 if 'b' in getattr(self._screen_file, 'mode', '') or sys.version_info[0] < 3: # Python 2 lies about the mode of sys.stdout/sys.stderr
155 output = output.encode(preferredencoding())
156 sys.stderr.write(output)
157
158 def fixed_template(self):
159 """Checks if the output template is fixed."""
160 return (re.search(u'(?u)%\\(.+?\\)s', self.params['outtmpl']) is None)
161
162 def trouble(self, message=None, tb=None):
163 """Determine action to take when a download problem appears.
164
165 Depending on if the downloader has been configured to ignore
166 download errors or not, this method may throw an exception or
167 not when errors are found, after printing the message.
168
169 tb, if given, is additional traceback information.
170 """
171 if message is not None:
172 self.to_stderr(message)
173 if self.params.get('verbose'):
174 if tb is None:
175 if sys.exc_info()[0]: # if .trouble has been called from an except block
176 tb = u''
177 if hasattr(sys.exc_info()[1], 'exc_info') and sys.exc_info()[1].exc_info[0]:
178 tb += u''.join(traceback.format_exception(*sys.exc_info()[1].exc_info))
179 tb += compat_str(traceback.format_exc())
180 else:
181 tb_data = traceback.format_list(traceback.extract_stack())
182 tb = u''.join(tb_data)
183 self.to_stderr(tb)
184 if not self.params.get('ignoreerrors', False):
185 if sys.exc_info()[0] and hasattr(sys.exc_info()[1], 'exc_info') and sys.exc_info()[1].exc_info[0]:
186 exc_info = sys.exc_info()[1].exc_info
187 else:
188 exc_info = sys.exc_info()
189 raise DownloadError(message, exc_info)
190 self._download_retcode = 1
191
192 def report_warning(self, message):
193 '''
194 Print the message to stderr, it will be prefixed with 'WARNING:'
195 If stderr is a tty file the 'WARNING:' will be colored
196 '''
197 if sys.stderr.isatty() and os.name != 'nt':
198 _msg_header=u'\033[0;33mWARNING:\033[0m'
199 else:
200 _msg_header=u'WARNING:'
201 warning_message=u'%s %s' % (_msg_header,message)
202 self.to_stderr(warning_message)
203
204 def report_error(self, message, tb=None):
205 '''
206 Do the same as trouble, but prefixes the message with 'ERROR:', colored
207 in red if stderr is a tty file.
208 '''
209 if sys.stderr.isatty() and os.name != 'nt':
210 _msg_header = u'\033[0;31mERROR:\033[0m'
211 else:
212 _msg_header = u'ERROR:'
213 error_message = u'%s %s' % (_msg_header, message)
214 self.trouble(error_message, tb)
215
216 def slow_down(self, start_time, byte_counter):
217 """Sleep if the download speed is over the rate limit."""
218 rate_limit = self.params.get('ratelimit', None)
219 if rate_limit is None or byte_counter == 0:
220 return
221 now = time.time()
222 elapsed = now - start_time
223 if elapsed <= 0.0:
224 return
225 speed = float(byte_counter) / elapsed
226 if speed > rate_limit:
227 time.sleep((byte_counter - rate_limit * (now - start_time)) / rate_limit)
228
229 def report_writedescription(self, descfn):
230 """ Report that the description file is being written """
231 self.to_screen(u'[info] Writing video description to: ' + descfn)
232
233 def report_writesubtitles(self, sub_filename):
234 """ Report that the subtitles file is being written """
235 self.to_screen(u'[info] Writing video subtitles to: ' + sub_filename)
236
237 def report_writeinfojson(self, infofn):
238 """ Report that the metadata file has been written """
239 self.to_screen(u'[info] Video description metadata as JSON to: ' + infofn)
240
241 def report_file_already_downloaded(self, file_name):
242 """Report file has already been fully downloaded."""
243 try:
244 self.to_screen(u'[download] %s has already been downloaded' % file_name)
245 except (UnicodeEncodeError) as err:
246 self.to_screen(u'[download] The file has already been downloaded')
247
248 def increment_downloads(self):
249 """Increment the ordinal that assigns a number to each file."""
250 self._num_downloads += 1
251
252 def prepare_filename(self, info_dict):
253 """Generate the output filename."""
254 try:
255 template_dict = dict(info_dict)
256
257 template_dict['epoch'] = int(time.time())
258 autonumber_size = self.params.get('autonumber_size')
259 if autonumber_size is None:
260 autonumber_size = 5
261 autonumber_templ = u'%0' + str(autonumber_size) + u'd'
262 template_dict['autonumber'] = autonumber_templ % self._num_downloads
263 if template_dict['playlist_index'] is not None:
264 template_dict['playlist_index'] = u'%05d' % template_dict['playlist_index']
265
266 sanitize = lambda k,v: sanitize_filename(
267 u'NA' if v is None else compat_str(v),
268 restricted=self.params.get('restrictfilenames'),
269 is_id=(k==u'id'))
270 template_dict = dict((k, sanitize(k, v)) for k,v in template_dict.items())
271
272 filename = self.params['outtmpl'] % template_dict
273 return filename
274 except KeyError as err:
275 self.report_error(u'Erroneous output template')
276 return None
277 except ValueError as err:
278 self.report_error(u'Error in output template: ' + str(err) + u' (encoding: ' + repr(preferredencoding()) + ')')
279 return None
280
281 def _match_entry(self, info_dict):
282 """ Returns None iff the file should be downloaded """
283
284 title = info_dict['title']
285 matchtitle = self.params.get('matchtitle', False)
286 if matchtitle:
287 if not re.search(matchtitle, title, re.IGNORECASE):
288 return u'[download] "' + title + '" title did not match pattern "' + matchtitle + '"'
289 rejecttitle = self.params.get('rejecttitle', False)
290 if rejecttitle:
291 if re.search(rejecttitle, title, re.IGNORECASE):
292 return u'"' + title + '" title matched reject pattern "' + rejecttitle + '"'
293 date = info_dict.get('upload_date', None)
294 if date is not None:
295 dateRange = self.params.get('daterange', DateRange())
296 if date not in dateRange:
297 return u'[download] %s upload date is not in range %s' % (date_from_str(date).isoformat(), dateRange)
298 return None
299
300 def extract_info(self, url, download=True, ie_key=None, extra_info={}):
301 '''
302 Returns a list with a dictionary for each video we find.
303 If 'download', also downloads the videos.
304 extra_info is a dict containing the extra values to add to each result
305 '''
306
307 if ie_key:
308 ies = [self.get_info_extractor(ie_key)]
309 else:
310 ies = self._ies
311
312 for ie in ies:
313 if not ie.suitable(url):
314 continue
315
316 if not ie.working():
317 self.report_warning(u'The program functionality for this site has been marked as broken, '
318 u'and will probably not work.')
319
320 try:
321 ie_result = ie.extract(url)
322 if ie_result is None: # Finished already (backwards compatibility; listformats and friends should be moved here)
323 break
324 if isinstance(ie_result, list):
325 # Backwards compatibility: old IE result format
326 for result in ie_result:
327 result.update(extra_info)
328 ie_result = {
329 '_type': 'compat_list',
330 'entries': ie_result,
331 }
332 else:
333 ie_result.update(extra_info)
334 if 'extractor' not in ie_result:
335 ie_result['extractor'] = ie.IE_NAME
336 return self.process_ie_result(ie_result, download=download)
337 except ExtractorError as de: # An error we somewhat expected
338 self.report_error(compat_str(de), de.format_traceback())
339 break
340 except Exception as e:
341 if self.params.get('ignoreerrors', False):
342 self.report_error(compat_str(e), tb=compat_str(traceback.format_exc()))
343 break
344 else:
345 raise
346 else:
347 self.report_error(u'no suitable InfoExtractor: %s' % url)
348
349 def process_ie_result(self, ie_result, download=True, extra_info={}):
350 """
351 Take the result of the ie(may be modified) and resolve all unresolved
352 references (URLs, playlist items).
353
354 It will also download the videos if 'download'.
355 Returns the resolved ie_result.
356 """
357
358 result_type = ie_result.get('_type', 'video') # If not given we suppose it's a video, support the default old system
359 if result_type == 'video':
360 ie_result.update(extra_info)
361 if 'playlist' not in ie_result:
362 # It isn't part of a playlist
363 ie_result['playlist'] = None
364 ie_result['playlist_index'] = None
365 if download:
366 self.process_info(ie_result)
367 return ie_result
368 elif result_type == 'url':
369 # We have to add extra_info to the results because it may be
370 # contained in a playlist
371 return self.extract_info(ie_result['url'],
372 download,
373 ie_key=ie_result.get('ie_key'),
374 extra_info=extra_info)
375 elif result_type == 'playlist':
376 # We process each entry in the playlist
377 playlist = ie_result.get('title', None) or ie_result.get('id', None)
378 self.to_screen(u'[download] Downloading playlist: %s' % playlist)
379
380 playlist_results = []
381
382 n_all_entries = len(ie_result['entries'])
383 playliststart = self.params.get('playliststart', 1) - 1
384 playlistend = self.params.get('playlistend', -1)
385
386 if playlistend == -1:
387 entries = ie_result['entries'][playliststart:]
388 else:
389 entries = ie_result['entries'][playliststart:playlistend]
390
391 n_entries = len(entries)
392
393 self.to_screen(u"[%s] playlist '%s': Collected %d video ids (downloading %d of them)" %
394 (ie_result['extractor'], playlist, n_all_entries, n_entries))
395
396 for i,entry in enumerate(entries,1):
397 self.to_screen(u'[download] Downloading video #%s of %s' %(i, n_entries))
398 extra = {
399 'playlist': playlist,
400 'playlist_index': i + playliststart,
401 }
402 if not 'extractor' in entry:
403 # We set the extractor, if it's an url it will be set then to
404 # the new extractor, but if it's already a video we must make
405 # sure it's present: see issue #877
406 entry['extractor'] = ie_result['extractor']
407 entry_result = self.process_ie_result(entry,
408 download=download,
409 extra_info=extra)
410 playlist_results.append(entry_result)
411 ie_result['entries'] = playlist_results
412 return ie_result
413 elif result_type == 'compat_list':
414 def _fixup(r):
415 r.setdefault('extractor', ie_result['extractor'])
416 return r
417 ie_result['entries'] = [
418 self.process_ie_result(_fixup(r), download=download)
419 for r in ie_result['entries']
420 ]
421 return ie_result
422 else:
423 raise Exception('Invalid result type: %s' % result_type)
424
425 def process_info(self, info_dict):
426 """Process a single resolved IE result."""
427
428 assert info_dict.get('_type', 'video') == 'video'
429 #We increment the download the download count here to match the previous behaviour.
430 self.increment_downloads()
431
432 info_dict['fulltitle'] = info_dict['title']
433 if len(info_dict['title']) > 200:
434 info_dict['title'] = info_dict['title'][:197] + u'...'
435
436 # Keep for backwards compatibility
437 info_dict['stitle'] = info_dict['title']
438
439 if not 'format' in info_dict:
440 info_dict['format'] = info_dict['ext']
441
442 reason = self._match_entry(info_dict)
443 if reason is not None:
444 self.to_screen(u'[download] ' + reason)
445 return
446
447 max_downloads = self.params.get('max_downloads')
448 if max_downloads is not None:
449 if self._num_downloads > int(max_downloads):
450 raise MaxDownloadsReached()
451
452 filename = self.prepare_filename(info_dict)
453
454 # Forced printings
455 if self.params.get('forcetitle', False):
456 compat_print(info_dict['title'])
457 if self.params.get('forceid', False):
458 compat_print(info_dict['id'])
459 if self.params.get('forceurl', False):
460 # For RTMP URLs, also include the playpath
461 compat_print(info_dict['url'] + info_dict.get('play_path', u''))
462 if self.params.get('forcethumbnail', False) and 'thumbnail' in info_dict:
463 compat_print(info_dict['thumbnail'])
464 if self.params.get('forcedescription', False) and 'description' in info_dict:
465 compat_print(info_dict['description'])
466 if self.params.get('forcefilename', False) and filename is not None:
467 compat_print(filename)
468 if self.params.get('forceformat', False):
469 compat_print(info_dict['format'])
470
471 # Do nothing else if in simulate mode
472 if self.params.get('simulate', False):
473 return
474
475 if filename is None:
476 return
477
478 try:
479 dn = os.path.dirname(encodeFilename(filename))
480 if dn != '' and not os.path.exists(dn):
481 os.makedirs(dn)
482 except (OSError, IOError) as err:
483 self.report_error(u'unable to create directory ' + compat_str(err))
484 return
485
486 if self.params.get('writedescription', False):
487 try:
488 descfn = filename + u'.description'
489 self.report_writedescription(descfn)
490 with io.open(encodeFilename(descfn), 'w', encoding='utf-8') as descfile:
491 descfile.write(info_dict['description'])
492 except (KeyError, TypeError):
493 self.report_warning(u'There\'s no description to write.')
494 except (OSError, IOError):
495 self.report_error(u'Cannot write description file ' + descfn)
496 return
497
498 subtitles_are_requested = any([self.params.get('writesubtitles', False),
499 self.params.get('writeautomaticsub')])
500
501 if subtitles_are_requested and 'subtitles' in info_dict and info_dict['subtitles']:
502 # subtitles download errors are already managed as troubles in relevant IE
503 # that way it will silently go on when used with unsupporting IE
504 subtitles = info_dict['subtitles']
505 sub_format = self.params.get('subtitlesformat')
506 for sub_lang in subtitles.keys():
507 sub = subtitles[sub_lang]
508 if sub is None:
509 continue
510 try:
511 sub_filename = subtitles_filename(filename, sub_lang, sub_format)
512 self.report_writesubtitles(sub_filename)
513 with io.open(encodeFilename(sub_filename), 'w', encoding='utf-8') as subfile:
514 subfile.write(sub)
515 except (OSError, IOError):
516 self.report_error(u'Cannot write subtitles file ' + descfn)
517 return
518
519 if self.params.get('writeinfojson', False):
520 infofn = filename + u'.info.json'
521 self.report_writeinfojson(infofn)
522 try:
523 json_info_dict = dict((k, v) for k,v in info_dict.items() if not k in ['urlhandle'])
524 write_json_file(json_info_dict, encodeFilename(infofn))
525 except (OSError, IOError):
526 self.report_error(u'Cannot write metadata to JSON file ' + infofn)
527 return
528
529 if self.params.get('writethumbnail', False):
530 if info_dict.get('thumbnail') is not None:
531 thumb_format = determine_ext(info_dict['thumbnail'], u'jpg')
532 thumb_filename = filename.rpartition('.')[0] + u'.' + thumb_format
533 self.to_screen(u'[%s] %s: Downloading thumbnail ...' %
534 (info_dict['extractor'], info_dict['id']))
535 uf = compat_urllib_request.urlopen(info_dict['thumbnail'])
536 with open(thumb_filename, 'wb') as thumbf:
537 shutil.copyfileobj(uf, thumbf)
538 self.to_screen(u'[%s] %s: Writing thumbnail to: %s' %
539 (info_dict['extractor'], info_dict['id'], thumb_filename))
540
541 if not self.params.get('skip_download', False):
542 if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(filename)):
543 success = True
544 else:
545 try:
546 success = self.fd._do_download(filename, info_dict)
547 except (OSError, IOError) as err:
548 raise UnavailableVideoError(err)
549 except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
550 self.report_error(u'unable to download video data: %s' % str(err))
551 return
552 except (ContentTooShortError, ) as err:
553 self.report_error(u'content too short (expected %s bytes and served %s)' % (err.expected, err.downloaded))
554 return
555
556 if success:
557 try:
558 self.post_process(filename, info_dict)
559 except (PostProcessingError) as err:
560 self.report_error(u'postprocessing: %s' % str(err))
561 return
562
563 def download(self, url_list):
564 """Download a given list of URLs."""
565 if len(url_list) > 1 and self.fixed_template():
566 raise SameFileError(self.params['outtmpl'])
567
568 for url in url_list:
569 try:
570 #It also downloads the videos
571 videos = self.extract_info(url)
572 except UnavailableVideoError:
573 self.report_error(u'unable to download video')
574 except MaxDownloadsReached:
575 self.to_screen(u'[info] Maximum number of downloaded files reached.')
576 raise
577
578 return self._download_retcode
579
580 def post_process(self, filename, ie_info):
581 """Run all the postprocessors on the given file."""
582 info = dict(ie_info)
583 info['filepath'] = filename
584 keep_video = None
585 for pp in self._pps:
586 try:
587 keep_video_wish,new_info = pp.run(info)
588 if keep_video_wish is not None:
589 if keep_video_wish:
590 keep_video = keep_video_wish
591 elif keep_video is None:
592 # No clear decision yet, let IE decide
593 keep_video = keep_video_wish
594 except PostProcessingError as e:
595 self.report_error(e.msg)
596 if keep_video is False and not self.params.get('keepvideo', False):
597 try:
598 self.to_screen(u'Deleting original file %s (pass -k to keep)' % filename)
599 os.remove(encodeFilename(filename))
600 except (IOError, OSError):
601 self.report_warning(u'Unable to remove downloaded video file')