]> jfr.im git - yt-dlp.git/blame - youtube_dl/YoutubeDL.py
Print a message before embedding the subtitles
[yt-dlp.git] / youtube_dl / YoutubeDL.py
CommitLineData
8222d8de
JMF
1#!/usr/bin/env python
2# -*- coding: utf-8 -*-
3
4from __future__ import absolute_import
5
6import io
7import os
8import re
9import shutil
10import socket
11import sys
12import time
13import traceback
14
15from .utils import *
023fa8c4 16from .extractor import get_info_extractor, gen_extractors
8222d8de
JMF
17from .FileDownloader import FileDownloader
18
19
20class YoutubeDL(object):
21 """YoutubeDL class.
22
23 YoutubeDL objects are the ones responsible of downloading the
24 actual video file and writing it to disk if the user has requested
25 it, among some other tasks. In most cases there should be one per
26 program. As, given a video URL, the downloader doesn't know how to
27 extract all the needed information, task that InfoExtractors do, it
28 has to pass the URL to one of them.
29
30 For this, YoutubeDL objects have a method that allows
31 InfoExtractors to be registered in a given order. When it is passed
32 a URL, the YoutubeDL object handles it to the first InfoExtractor it
33 finds that reports being able to handle it. The InfoExtractor extracts
34 all the information about the video or videos the URL refers to, and
35 YoutubeDL process the extracted information, possibly using a File
36 Downloader to download the video.
37
38 YoutubeDL objects accept a lot of parameters. In order not to saturate
39 the object constructor with arguments, it receives a dictionary of
40 options instead. These options are available through the params
41 attribute for the InfoExtractors to use. The YoutubeDL also
42 registers itself as the downloader in charge for the InfoExtractors
43 that are added to it, so this is a "mutual registration".
44
45 Available options:
46
47 username: Username for authentication purposes.
48 password: Password for authentication purposes.
c6c19746 49 videopassword: Password for acces a video.
8222d8de
JMF
50 usenetrc: Use netrc for authentication instead.
51 verbose: Print additional info to stdout.
52 quiet: Do not print messages to stdout.
53 forceurl: Force printing final URL.
54 forcetitle: Force printing title.
55 forceid: Force printing ID.
56 forcethumbnail: Force printing thumbnail URL.
57 forcedescription: Force printing description.
58 forcefilename: Force printing final filename.
59 simulate: Do not download the video files.
60 format: Video format code.
61 format_limit: Highest quality format to try.
62 outtmpl: Template for output names.
63 restrictfilenames: Do not allow "&" and spaces in file names
64 ignoreerrors: Do not stop on download errors.
65 nooverwrites: Prevent overwriting files.
66 playliststart: Playlist item to start at.
67 playlistend: Playlist item to end at.
68 matchtitle: Download only matching titles.
69 rejecttitle: Reject downloads for matching titles.
70 logtostderr: Log messages to stderr instead of stdout.
71 writedescription: Write the video description to a .description file
72 writeinfojson: Write the video description to a .info.json file
73 writethumbnail: Write the thumbnail image to a file
74 writesubtitles: Write the video subtitles to a file
b004821f 75 writeautomaticsub: Write the automatic subtitles to a file
8222d8de
JMF
76 allsubtitles: Downloads all the subtitles of the video
77 listsubtitles: Lists all available subtitles for the video
b98a6b2f 78 subtitlesformat: Subtitle format [srt/sbv/vtt] (default=srt)
8222d8de
JMF
79 subtitleslang: Language of the subtitles to download
80 keepvideo: Keep the video file after post-processing
81 daterange: A DateRange object, download only if the upload_date is in the range.
82 skip_download: Skip the actual download of the video file
83
84 The following parameters are not used by YoutubeDL itself, they are used by
85 the FileDownloader:
86 nopart, updatetime, buffersize, ratelimit, min_filesize, max_filesize, test,
87 noresizebuffer, retries, continuedl, noprogress, consoletitle
88 """
89
90 params = None
91 _ies = []
92 _pps = []
93 _download_retcode = None
94 _num_downloads = None
95 _screen_file = None
96
97 def __init__(self, params):
98 """Create a FileDownloader object with the given options."""
99 self._ies = []
100 self._pps = []
101 self._progress_hooks = []
102 self._download_retcode = 0
103 self._num_downloads = 0
104 self._screen_file = [sys.stdout, sys.stderr][params.get('logtostderr', False)]
105 self.params = params
106 self.fd = FileDownloader(self, self.params)
107
108 if '%(stitle)s' in self.params['outtmpl']:
109 self.report_warning(u'%(stitle)s is deprecated. Use the %(title)s and the --restrict-filenames flag(which also secures %(uploader)s et al) instead.')
110
111 def add_info_extractor(self, ie):
112 """Add an InfoExtractor object to the end of the list."""
113 self._ies.append(ie)
114 ie.set_downloader(self)
115
023fa8c4
JMF
116 def add_default_info_extractors(self):
117 """
118 Add the InfoExtractors returned by gen_extractors to the end of the list
119 """
120 for ie in gen_extractors():
121 self.add_info_extractor(ie)
122
8222d8de
JMF
123 def add_post_processor(self, pp):
124 """Add a PostProcessor object to the end of the chain."""
125 self._pps.append(pp)
126 pp.set_downloader(self)
127
128 def to_screen(self, message, skip_eol=False):
129 """Print message to stdout if not in quiet mode."""
130 assert type(message) == type(u'')
131 if not self.params.get('quiet', False):
132 terminator = [u'\n', u''][skip_eol]
133 output = message + terminator
134 if 'b' in getattr(self._screen_file, 'mode', '') or sys.version_info[0] < 3: # Python 2 lies about the mode of sys.stdout/sys.stderr
135 output = output.encode(preferredencoding(), 'ignore')
136 self._screen_file.write(output)
137 self._screen_file.flush()
138
139 def to_stderr(self, message):
140 """Print message to stderr."""
141 assert type(message) == type(u'')
142 output = message + u'\n'
143 if 'b' in getattr(self._screen_file, 'mode', '') or sys.version_info[0] < 3: # Python 2 lies about the mode of sys.stdout/sys.stderr
144 output = output.encode(preferredencoding())
145 sys.stderr.write(output)
146
147 def fixed_template(self):
148 """Checks if the output template is fixed."""
149 return (re.search(u'(?u)%\\(.+?\\)s', self.params['outtmpl']) is None)
150
151 def trouble(self, message=None, tb=None):
152 """Determine action to take when a download problem appears.
153
154 Depending on if the downloader has been configured to ignore
155 download errors or not, this method may throw an exception or
156 not when errors are found, after printing the message.
157
158 tb, if given, is additional traceback information.
159 """
160 if message is not None:
161 self.to_stderr(message)
162 if self.params.get('verbose'):
163 if tb is None:
164 if sys.exc_info()[0]: # if .trouble has been called from an except block
165 tb = u''
166 if hasattr(sys.exc_info()[1], 'exc_info') and sys.exc_info()[1].exc_info[0]:
167 tb += u''.join(traceback.format_exception(*sys.exc_info()[1].exc_info))
168 tb += compat_str(traceback.format_exc())
169 else:
170 tb_data = traceback.format_list(traceback.extract_stack())
171 tb = u''.join(tb_data)
172 self.to_stderr(tb)
173 if not self.params.get('ignoreerrors', False):
174 if sys.exc_info()[0] and hasattr(sys.exc_info()[1], 'exc_info') and sys.exc_info()[1].exc_info[0]:
175 exc_info = sys.exc_info()[1].exc_info
176 else:
177 exc_info = sys.exc_info()
178 raise DownloadError(message, exc_info)
179 self._download_retcode = 1
180
181 def report_warning(self, message):
182 '''
183 Print the message to stderr, it will be prefixed with 'WARNING:'
184 If stderr is a tty file the 'WARNING:' will be colored
185 '''
186 if sys.stderr.isatty() and os.name != 'nt':
187 _msg_header=u'\033[0;33mWARNING:\033[0m'
188 else:
189 _msg_header=u'WARNING:'
190 warning_message=u'%s %s' % (_msg_header,message)
191 self.to_stderr(warning_message)
192
193 def report_error(self, message, tb=None):
194 '''
195 Do the same as trouble, but prefixes the message with 'ERROR:', colored
196 in red if stderr is a tty file.
197 '''
198 if sys.stderr.isatty() and os.name != 'nt':
199 _msg_header = u'\033[0;31mERROR:\033[0m'
200 else:
201 _msg_header = u'ERROR:'
202 error_message = u'%s %s' % (_msg_header, message)
203 self.trouble(error_message, tb)
204
205 def slow_down(self, start_time, byte_counter):
206 """Sleep if the download speed is over the rate limit."""
207 rate_limit = self.params.get('ratelimit', None)
208 if rate_limit is None or byte_counter == 0:
209 return
210 now = time.time()
211 elapsed = now - start_time
212 if elapsed <= 0.0:
213 return
214 speed = float(byte_counter) / elapsed
215 if speed > rate_limit:
216 time.sleep((byte_counter - rate_limit * (now - start_time)) / rate_limit)
217
218 def report_writedescription(self, descfn):
219 """ Report that the description file is being written """
220 self.to_screen(u'[info] Writing video description to: ' + descfn)
221
222 def report_writesubtitles(self, sub_filename):
223 """ Report that the subtitles file is being written """
224 self.to_screen(u'[info] Writing video subtitles to: ' + sub_filename)
225
226 def report_writeinfojson(self, infofn):
227 """ Report that the metadata file has been written """
228 self.to_screen(u'[info] Video description metadata as JSON to: ' + infofn)
229
230 def report_file_already_downloaded(self, file_name):
231 """Report file has already been fully downloaded."""
232 try:
233 self.to_screen(u'[download] %s has already been downloaded' % file_name)
234 except (UnicodeEncodeError) as err:
235 self.to_screen(u'[download] The file has already been downloaded')
236
237 def increment_downloads(self):
238 """Increment the ordinal that assigns a number to each file."""
239 self._num_downloads += 1
240
241 def prepare_filename(self, info_dict):
242 """Generate the output filename."""
243 try:
244 template_dict = dict(info_dict)
245
246 template_dict['epoch'] = int(time.time())
247 autonumber_size = self.params.get('autonumber_size')
248 if autonumber_size is None:
249 autonumber_size = 5
250 autonumber_templ = u'%0' + str(autonumber_size) + u'd'
251 template_dict['autonumber'] = autonumber_templ % self._num_downloads
252 if template_dict['playlist_index'] is not None:
253 template_dict['playlist_index'] = u'%05d' % template_dict['playlist_index']
254
255 sanitize = lambda k,v: sanitize_filename(
256 u'NA' if v is None else compat_str(v),
257 restricted=self.params.get('restrictfilenames'),
258 is_id=(k==u'id'))
259 template_dict = dict((k, sanitize(k, v)) for k,v in template_dict.items())
260
261 filename = self.params['outtmpl'] % template_dict
262 return filename
263 except KeyError as err:
264 self.report_error(u'Erroneous output template')
265 return None
266 except ValueError as err:
4efba05c 267 self.report_error(u'Error in output template: ' + str(err) + u' (encoding: ' + repr(preferredencoding()) + ')')
8222d8de
JMF
268 return None
269
270 def _match_entry(self, info_dict):
271 """ Returns None iff the file should be downloaded """
272
273 title = info_dict['title']
274 matchtitle = self.params.get('matchtitle', False)
275 if matchtitle:
276 if not re.search(matchtitle, title, re.IGNORECASE):
277 return u'[download] "' + title + '" title did not match pattern "' + matchtitle + '"'
278 rejecttitle = self.params.get('rejecttitle', False)
279 if rejecttitle:
280 if re.search(rejecttitle, title, re.IGNORECASE):
281 return u'"' + title + '" title matched reject pattern "' + rejecttitle + '"'
282 date = info_dict.get('upload_date', None)
283 if date is not None:
284 dateRange = self.params.get('daterange', DateRange())
285 if date not in dateRange:
286 return u'[download] %s upload date is not in range %s' % (date_from_str(date).isoformat(), dateRange)
287 return None
288
289 def extract_info(self, url, download=True, ie_key=None, extra_info={}):
290 '''
291 Returns a list with a dictionary for each video we find.
292 If 'download', also downloads the videos.
293 extra_info is a dict containing the extra values to add to each result
294 '''
295
296 if ie_key:
297 ie = get_info_extractor(ie_key)()
298 ie.set_downloader(self)
299 ies = [ie]
300 else:
301 ies = self._ies
302
303 for ie in ies:
304 if not ie.suitable(url):
305 continue
306
307 if not ie.working():
308 self.report_warning(u'The program functionality for this site has been marked as broken, '
309 u'and will probably not work.')
310
311 try:
312 ie_result = ie.extract(url)
313 if ie_result is None: # Finished already (backwards compatibility; listformats and friends should be moved here)
314 break
315 if isinstance(ie_result, list):
316 # Backwards compatibility: old IE result format
317 for result in ie_result:
318 result.update(extra_info)
319 ie_result = {
320 '_type': 'compat_list',
321 'entries': ie_result,
322 }
323 else:
324 ie_result.update(extra_info)
325 if 'extractor' not in ie_result:
326 ie_result['extractor'] = ie.IE_NAME
327 return self.process_ie_result(ie_result, download=download)
328 except ExtractorError as de: # An error we somewhat expected
329 self.report_error(compat_str(de), de.format_traceback())
330 break
331 except Exception as e:
332 if self.params.get('ignoreerrors', False):
333 self.report_error(compat_str(e), tb=compat_str(traceback.format_exc()))
334 break
335 else:
336 raise
337 else:
338 self.report_error(u'no suitable InfoExtractor: %s' % url)
339
340 def process_ie_result(self, ie_result, download=True, extra_info={}):
341 """
342 Take the result of the ie(may be modified) and resolve all unresolved
343 references (URLs, playlist items).
344
345 It will also download the videos if 'download'.
346 Returns the resolved ie_result.
347 """
348
349 result_type = ie_result.get('_type', 'video') # If not given we suppose it's a video, support the default old system
350 if result_type == 'video':
a4311547 351 ie_result.update(extra_info)
8222d8de
JMF
352 if 'playlist' not in ie_result:
353 # It isn't part of a playlist
354 ie_result['playlist'] = None
355 ie_result['playlist_index'] = None
356 if download:
357 self.process_info(ie_result)
358 return ie_result
359 elif result_type == 'url':
360 # We have to add extra_info to the results because it may be
361 # contained in a playlist
362 return self.extract_info(ie_result['url'],
363 download,
364 ie_key=ie_result.get('ie_key'),
365 extra_info=extra_info)
366 elif result_type == 'playlist':
367 # We process each entry in the playlist
368 playlist = ie_result.get('title', None) or ie_result.get('id', None)
369 self.to_screen(u'[download] Downloading playlist: %s' % playlist)
370
371 playlist_results = []
372
373 n_all_entries = len(ie_result['entries'])
374 playliststart = self.params.get('playliststart', 1) - 1
375 playlistend = self.params.get('playlistend', -1)
376
377 if playlistend == -1:
378 entries = ie_result['entries'][playliststart:]
379 else:
380 entries = ie_result['entries'][playliststart:playlistend]
381
382 n_entries = len(entries)
383
384 self.to_screen(u"[%s] playlist '%s': Collected %d video ids (downloading %d of them)" %
385 (ie_result['extractor'], playlist, n_all_entries, n_entries))
386
387 for i,entry in enumerate(entries,1):
388 self.to_screen(u'[download] Downloading video #%s of %s' %(i, n_entries))
389 extra = {
390 'playlist': playlist,
391 'playlist_index': i + playliststart,
392 }
393 if not 'extractor' in entry:
394 # We set the extractor, if it's an url it will be set then to
395 # the new extractor, but if it's already a video we must make
396 # sure it's present: see issue #877
397 entry['extractor'] = ie_result['extractor']
398 entry_result = self.process_ie_result(entry,
399 download=download,
400 extra_info=extra)
401 playlist_results.append(entry_result)
402 ie_result['entries'] = playlist_results
403 return ie_result
404 elif result_type == 'compat_list':
405 def _fixup(r):
406 r.setdefault('extractor', ie_result['extractor'])
407 return r
408 ie_result['entries'] = [
409 self.process_ie_result(_fixup(r), download=download)
410 for r in ie_result['entries']
411 ]
412 return ie_result
413 else:
414 raise Exception('Invalid result type: %s' % result_type)
415
416 def process_info(self, info_dict):
417 """Process a single resolved IE result."""
418
419 assert info_dict.get('_type', 'video') == 'video'
420 #We increment the download the download count here to match the previous behaviour.
421 self.increment_downloads()
422
423 info_dict['fulltitle'] = info_dict['title']
424 if len(info_dict['title']) > 200:
425 info_dict['title'] = info_dict['title'][:197] + u'...'
426
427 # Keep for backwards compatibility
428 info_dict['stitle'] = info_dict['title']
429
430 if not 'format' in info_dict:
431 info_dict['format'] = info_dict['ext']
432
433 reason = self._match_entry(info_dict)
434 if reason is not None:
435 self.to_screen(u'[download] ' + reason)
436 return
437
438 max_downloads = self.params.get('max_downloads')
439 if max_downloads is not None:
440 if self._num_downloads > int(max_downloads):
441 raise MaxDownloadsReached()
442
443 filename = self.prepare_filename(info_dict)
444
445 # Forced printings
446 if self.params.get('forcetitle', False):
447 compat_print(info_dict['title'])
448 if self.params.get('forceid', False):
449 compat_print(info_dict['id'])
450 if self.params.get('forceurl', False):
451 compat_print(info_dict['url'])
452 if self.params.get('forcethumbnail', False) and 'thumbnail' in info_dict:
453 compat_print(info_dict['thumbnail'])
454 if self.params.get('forcedescription', False) and 'description' in info_dict:
455 compat_print(info_dict['description'])
456 if self.params.get('forcefilename', False) and filename is not None:
457 compat_print(filename)
458 if self.params.get('forceformat', False):
459 compat_print(info_dict['format'])
460
461 # Do nothing else if in simulate mode
462 if self.params.get('simulate', False):
463 return
464
465 if filename is None:
466 return
467
468 try:
469 dn = os.path.dirname(encodeFilename(filename))
470 if dn != '' and not os.path.exists(dn):
471 os.makedirs(dn)
472 except (OSError, IOError) as err:
473 self.report_error(u'unable to create directory ' + compat_str(err))
474 return
475
476 if self.params.get('writedescription', False):
477 try:
478 descfn = filename + u'.description'
479 self.report_writedescription(descfn)
480 with io.open(encodeFilename(descfn), 'w', encoding='utf-8') as descfile:
481 descfile.write(info_dict['description'])
482 except (OSError, IOError):
483 self.report_error(u'Cannot write description file ' + descfn)
484 return
485
c4a91be7
JMF
486 subtitles_are_requested = any([self.params.get('writesubtitles', False),
487 self.params.get('writeautomaticsub'),
488 self.params.get('allsubtitles', False)])
489
490 if subtitles_are_requested and 'subtitles' in info_dict and info_dict['subtitles']:
8222d8de
JMF
491 # subtitles download errors are already managed as troubles in relevant IE
492 # that way it will silently go on when used with unsupporting IE
8222d8de 493 subtitles = info_dict['subtitles']
8222d8de 494 sub_format = self.params.get('subtitlesformat')
5d51a883
JMF
495 for sub_lang in subtitles.keys():
496 sub = subtitles[sub_lang]
6804038d
JMF
497 if sub is None:
498 continue
8222d8de 499 try:
d4051a8e 500 sub_filename = subtitles_filename(filename, sub_lang, sub_format)
8222d8de
JMF
501 self.report_writesubtitles(sub_filename)
502 with io.open(encodeFilename(sub_filename), 'w', encoding='utf-8') as subfile:
5d51a883 503 subfile.write(sub)
8222d8de
JMF
504 except (OSError, IOError):
505 self.report_error(u'Cannot write subtitles file ' + descfn)
506 return
507
8222d8de
JMF
508 if self.params.get('writeinfojson', False):
509 infofn = filename + u'.info.json'
510 self.report_writeinfojson(infofn)
511 try:
512 json_info_dict = dict((k, v) for k,v in info_dict.items() if not k in ['urlhandle'])
513 write_json_file(json_info_dict, encodeFilename(infofn))
514 except (OSError, IOError):
515 self.report_error(u'Cannot write metadata to JSON file ' + infofn)
516 return
517
518 if self.params.get('writethumbnail', False):
d8269e1d 519 if info_dict.get('thumbnail') is not None:
cbdbb766 520 thumb_format = determine_ext(info_dict['thumbnail'], u'jpg')
8222d8de
JMF
521 thumb_filename = filename.rpartition('.')[0] + u'.' + thumb_format
522 self.to_screen(u'[%s] %s: Downloading thumbnail ...' %
523 (info_dict['extractor'], info_dict['id']))
524 uf = compat_urllib_request.urlopen(info_dict['thumbnail'])
525 with open(thumb_filename, 'wb') as thumbf:
526 shutil.copyfileobj(uf, thumbf)
527 self.to_screen(u'[%s] %s: Writing thumbnail to: %s' %
528 (info_dict['extractor'], info_dict['id'], thumb_filename))
529
530 if not self.params.get('skip_download', False):
531 if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(filename)):
532 success = True
533 else:
534 try:
535 success = self.fd._do_download(filename, info_dict)
536 except (OSError, IOError) as err:
7edcb8f3 537 raise UnavailableVideoError(err)
8222d8de
JMF
538 except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
539 self.report_error(u'unable to download video data: %s' % str(err))
540 return
541 except (ContentTooShortError, ) as err:
542 self.report_error(u'content too short (expected %s bytes and served %s)' % (err.expected, err.downloaded))
543 return
544
545 if success:
546 try:
547 self.post_process(filename, info_dict)
548 except (PostProcessingError) as err:
549 self.report_error(u'postprocessing: %s' % str(err))
550 return
551
552 def download(self, url_list):
553 """Download a given list of URLs."""
554 if len(url_list) > 1 and self.fixed_template():
555 raise SameFileError(self.params['outtmpl'])
556
557 for url in url_list:
558 try:
559 #It also downloads the videos
560 videos = self.extract_info(url)
561 except UnavailableVideoError:
562 self.report_error(u'unable to download video')
563 except MaxDownloadsReached:
564 self.to_screen(u'[info] Maximum number of downloaded files reached.')
565 raise
566
567 return self._download_retcode
568
569 def post_process(self, filename, ie_info):
570 """Run all the postprocessors on the given file."""
571 info = dict(ie_info)
572 info['filepath'] = filename
573 keep_video = None
574 for pp in self._pps:
575 try:
576 keep_video_wish,new_info = pp.run(info)
577 if keep_video_wish is not None:
578 if keep_video_wish:
579 keep_video = keep_video_wish
580 elif keep_video is None:
581 # No clear decision yet, let IE decide
582 keep_video = keep_video_wish
583 except PostProcessingError as e:
bbcbf4d4 584 self.report_error(e.msg)
8222d8de
JMF
585 if keep_video is False and not self.params.get('keepvideo', False):
586 try:
587 self.to_screen(u'Deleting original file %s (pass -k to keep)' % filename)
588 os.remove(encodeFilename(filename))
589 except (IOError, OSError):
590 self.report_warning(u'Unable to remove downloaded video file')