]> jfr.im git - yt-dlp.git/blob - youtube_dl/YoutubeDL.py
[9gag] Add extractor
[yt-dlp.git] / youtube_dl / YoutubeDL.py
1 #!/usr/bin/env python
2 # -*- coding: utf-8 -*-
3
4 from __future__ import absolute_import
5
6 import errno
7 import io
8 import json
9 import os
10 import platform
11 import re
12 import shutil
13 import subprocess
14 import socket
15 import sys
16 import time
17 import traceback
18
19 if os.name == 'nt':
20 import ctypes
21
22 from .utils import (
23 compat_cookiejar,
24 compat_http_client,
25 compat_print,
26 compat_str,
27 compat_urllib_error,
28 compat_urllib_request,
29 ContentTooShortError,
30 date_from_str,
31 DateRange,
32 determine_ext,
33 DownloadError,
34 encodeFilename,
35 ExtractorError,
36 format_bytes,
37 locked_file,
38 make_HTTPS_handler,
39 MaxDownloadsReached,
40 PostProcessingError,
41 platform_name,
42 preferredencoding,
43 SameFileError,
44 sanitize_filename,
45 subtitles_filename,
46 takewhile_inclusive,
47 UnavailableVideoError,
48 write_json_file,
49 write_string,
50 YoutubeDLHandler,
51 )
52 from .extractor import get_info_extractor, gen_extractors
53 from .FileDownloader import FileDownloader
54 from .version import __version__
55
56
57 class YoutubeDL(object):
58 """YoutubeDL class.
59
60 YoutubeDL objects are the ones responsible of downloading the
61 actual video file and writing it to disk if the user has requested
62 it, among some other tasks. In most cases there should be one per
63 program. As, given a video URL, the downloader doesn't know how to
64 extract all the needed information, task that InfoExtractors do, it
65 has to pass the URL to one of them.
66
67 For this, YoutubeDL objects have a method that allows
68 InfoExtractors to be registered in a given order. When it is passed
69 a URL, the YoutubeDL object handles it to the first InfoExtractor it
70 finds that reports being able to handle it. The InfoExtractor extracts
71 all the information about the video or videos the URL refers to, and
72 YoutubeDL process the extracted information, possibly using a File
73 Downloader to download the video.
74
75 YoutubeDL objects accept a lot of parameters. In order not to saturate
76 the object constructor with arguments, it receives a dictionary of
77 options instead. These options are available through the params
78 attribute for the InfoExtractors to use. The YoutubeDL also
79 registers itself as the downloader in charge for the InfoExtractors
80 that are added to it, so this is a "mutual registration".
81
82 Available options:
83
84 username: Username for authentication purposes.
85 password: Password for authentication purposes.
86 videopassword: Password for acces a video.
87 usenetrc: Use netrc for authentication instead.
88 verbose: Print additional info to stdout.
89 quiet: Do not print messages to stdout.
90 forceurl: Force printing final URL.
91 forcetitle: Force printing title.
92 forceid: Force printing ID.
93 forcethumbnail: Force printing thumbnail URL.
94 forcedescription: Force printing description.
95 forcefilename: Force printing final filename.
96 forcejson: Force printing info_dict as JSON.
97 simulate: Do not download the video files.
98 format: Video format code.
99 format_limit: Highest quality format to try.
100 outtmpl: Template for output names.
101 restrictfilenames: Do not allow "&" and spaces in file names
102 ignoreerrors: Do not stop on download errors.
103 nooverwrites: Prevent overwriting files.
104 playliststart: Playlist item to start at.
105 playlistend: Playlist item to end at.
106 matchtitle: Download only matching titles.
107 rejecttitle: Reject downloads for matching titles.
108 logger: Log messages to a logging.Logger instance.
109 logtostderr: Log messages to stderr instead of stdout.
110 writedescription: Write the video description to a .description file
111 writeinfojson: Write the video description to a .info.json file
112 writeannotations: Write the video annotations to a .annotations.xml file
113 writethumbnail: Write the thumbnail image to a file
114 writesubtitles: Write the video subtitles to a file
115 writeautomaticsub: Write the automatic subtitles to a file
116 allsubtitles: Downloads all the subtitles of the video
117 (requires writesubtitles or writeautomaticsub)
118 listsubtitles: Lists all available subtitles for the video
119 subtitlesformat: Subtitle format [srt/sbv/vtt] (default=srt)
120 subtitleslangs: List of languages of the subtitles to download
121 keepvideo: Keep the video file after post-processing
122 daterange: A DateRange object, download only if the upload_date is in the range.
123 skip_download: Skip the actual download of the video file
124 cachedir: Location of the cache files in the filesystem.
125 None to disable filesystem cache.
126 noplaylist: Download single video instead of a playlist if in doubt.
127 age_limit: An integer representing the user's age in years.
128 Unsuitable videos for the given age are skipped.
129 download_archive: File name of a file where all downloads are recorded.
130 Videos already present in the file are not downloaded
131 again.
132 cookiefile: File name where cookies should be read from and dumped to.
133 nocheckcertificate:Do not verify SSL certificates
134 proxy: URL of the proxy server to use
135 socket_timeout: Time to wait for unresponsive hosts, in seconds
136
137 The following parameters are not used by YoutubeDL itself, they are used by
138 the FileDownloader:
139 nopart, updatetime, buffersize, ratelimit, min_filesize, max_filesize, test,
140 noresizebuffer, retries, continuedl, noprogress, consoletitle
141 """
142
143 params = None
144 _ies = []
145 _pps = []
146 _download_retcode = None
147 _num_downloads = None
148 _screen_file = None
149
150 def __init__(self, params=None):
151 """Create a FileDownloader object with the given options."""
152 self._ies = []
153 self._ies_instances = {}
154 self._pps = []
155 self._progress_hooks = []
156 self._download_retcode = 0
157 self._num_downloads = 0
158 self._screen_file = [sys.stdout, sys.stderr][params.get('logtostderr', False)]
159 self.params = {} if params is None else params
160
161 if (sys.version_info >= (3,) and sys.platform != 'win32' and
162 sys.getfilesystemencoding() in ['ascii', 'ANSI_X3.4-1968']
163 and not params['restrictfilenames']):
164 # On Python 3, the Unicode filesystem API will throw errors (#1474)
165 self.report_warning(
166 u'Assuming --restrict-filenames since file system encoding '
167 u'cannot encode all charactes. '
168 u'Set the LC_ALL environment variable to fix this.')
169 self.params['restrictfilenames'] = True
170
171 self.fd = FileDownloader(self, self.params)
172
173 if '%(stitle)s' in self.params.get('outtmpl', ''):
174 self.report_warning(u'%(stitle)s is deprecated. Use the %(title)s and the --restrict-filenames flag(which also secures %(uploader)s et al) instead.')
175
176 self._setup_opener()
177
178 def add_info_extractor(self, ie):
179 """Add an InfoExtractor object to the end of the list."""
180 self._ies.append(ie)
181 self._ies_instances[ie.ie_key()] = ie
182 ie.set_downloader(self)
183
184 def get_info_extractor(self, ie_key):
185 """
186 Get an instance of an IE with name ie_key, it will try to get one from
187 the _ies list, if there's no instance it will create a new one and add
188 it to the extractor list.
189 """
190 ie = self._ies_instances.get(ie_key)
191 if ie is None:
192 ie = get_info_extractor(ie_key)()
193 self.add_info_extractor(ie)
194 return ie
195
196 def add_default_info_extractors(self):
197 """
198 Add the InfoExtractors returned by gen_extractors to the end of the list
199 """
200 for ie in gen_extractors():
201 self.add_info_extractor(ie)
202
203 def add_post_processor(self, pp):
204 """Add a PostProcessor object to the end of the chain."""
205 self._pps.append(pp)
206 pp.set_downloader(self)
207
208 def to_screen(self, message, skip_eol=False):
209 """Print message to stdout if not in quiet mode."""
210 if self.params.get('logger'):
211 self.params['logger'].debug(message)
212 elif not self.params.get('quiet', False):
213 terminator = [u'\n', u''][skip_eol]
214 output = message + terminator
215 write_string(output, self._screen_file)
216
217 def to_stderr(self, message):
218 """Print message to stderr."""
219 assert type(message) == type(u'')
220 if self.params.get('logger'):
221 self.params['logger'].error(message)
222 else:
223 output = message + u'\n'
224 if 'b' in getattr(self._screen_file, 'mode', '') or sys.version_info[0] < 3: # Python 2 lies about the mode of sys.stdout/sys.stderr
225 output = output.encode(preferredencoding())
226 sys.stderr.write(output)
227
228 def to_console_title(self, message):
229 if not self.params.get('consoletitle', False):
230 return
231 if os.name == 'nt' and ctypes.windll.kernel32.GetConsoleWindow():
232 # c_wchar_p() might not be necessary if `message` is
233 # already of type unicode()
234 ctypes.windll.kernel32.SetConsoleTitleW(ctypes.c_wchar_p(message))
235 elif 'TERM' in os.environ:
236 write_string(u'\033]0;%s\007' % message, self._screen_file)
237
238 def save_console_title(self):
239 if not self.params.get('consoletitle', False):
240 return
241 if 'TERM' in os.environ:
242 # Save the title on stack
243 write_string(u'\033[22;0t', self._screen_file)
244
245 def restore_console_title(self):
246 if not self.params.get('consoletitle', False):
247 return
248 if 'TERM' in os.environ:
249 # Restore the title from stack
250 write_string(u'\033[23;0t', self._screen_file)
251
252 def __enter__(self):
253 self.save_console_title()
254 return self
255
256 def __exit__(self, *args):
257 self.restore_console_title()
258
259 if self.params.get('cookiefile') is not None:
260 self.cookiejar.save()
261
262 def trouble(self, message=None, tb=None):
263 """Determine action to take when a download problem appears.
264
265 Depending on if the downloader has been configured to ignore
266 download errors or not, this method may throw an exception or
267 not when errors are found, after printing the message.
268
269 tb, if given, is additional traceback information.
270 """
271 if message is not None:
272 self.to_stderr(message)
273 if self.params.get('verbose'):
274 if tb is None:
275 if sys.exc_info()[0]: # if .trouble has been called from an except block
276 tb = u''
277 if hasattr(sys.exc_info()[1], 'exc_info') and sys.exc_info()[1].exc_info[0]:
278 tb += u''.join(traceback.format_exception(*sys.exc_info()[1].exc_info))
279 tb += compat_str(traceback.format_exc())
280 else:
281 tb_data = traceback.format_list(traceback.extract_stack())
282 tb = u''.join(tb_data)
283 self.to_stderr(tb)
284 if not self.params.get('ignoreerrors', False):
285 if sys.exc_info()[0] and hasattr(sys.exc_info()[1], 'exc_info') and sys.exc_info()[1].exc_info[0]:
286 exc_info = sys.exc_info()[1].exc_info
287 else:
288 exc_info = sys.exc_info()
289 raise DownloadError(message, exc_info)
290 self._download_retcode = 1
291
292 def report_warning(self, message):
293 '''
294 Print the message to stderr, it will be prefixed with 'WARNING:'
295 If stderr is a tty file the 'WARNING:' will be colored
296 '''
297 if sys.stderr.isatty() and os.name != 'nt':
298 _msg_header = u'\033[0;33mWARNING:\033[0m'
299 else:
300 _msg_header = u'WARNING:'
301 warning_message = u'%s %s' % (_msg_header, message)
302 self.to_stderr(warning_message)
303
304 def report_error(self, message, tb=None):
305 '''
306 Do the same as trouble, but prefixes the message with 'ERROR:', colored
307 in red if stderr is a tty file.
308 '''
309 if sys.stderr.isatty() and os.name != 'nt':
310 _msg_header = u'\033[0;31mERROR:\033[0m'
311 else:
312 _msg_header = u'ERROR:'
313 error_message = u'%s %s' % (_msg_header, message)
314 self.trouble(error_message, tb)
315
316 def report_writedescription(self, descfn):
317 """ Report that the description file is being written """
318 self.to_screen(u'[info] Writing video description to: ' + descfn)
319
320 def report_writesubtitles(self, sub_filename):
321 """ Report that the subtitles file is being written """
322 self.to_screen(u'[info] Writing video subtitles to: ' + sub_filename)
323
324 def report_writeinfojson(self, infofn):
325 """ Report that the metadata file has been written """
326 self.to_screen(u'[info] Video description metadata as JSON to: ' + infofn)
327
328 def report_writeannotations(self, annofn):
329 """ Report that the annotations file has been written. """
330 self.to_screen(u'[info] Writing video annotations to: ' + annofn)
331
332 def report_file_already_downloaded(self, file_name):
333 """Report file has already been fully downloaded."""
334 try:
335 self.to_screen(u'[download] %s has already been downloaded' % file_name)
336 except UnicodeEncodeError:
337 self.to_screen(u'[download] The file has already been downloaded')
338
339 def increment_downloads(self):
340 """Increment the ordinal that assigns a number to each file."""
341 self._num_downloads += 1
342
343 def prepare_filename(self, info_dict):
344 """Generate the output filename."""
345 try:
346 template_dict = dict(info_dict)
347
348 template_dict['epoch'] = int(time.time())
349 autonumber_size = self.params.get('autonumber_size')
350 if autonumber_size is None:
351 autonumber_size = 5
352 autonumber_templ = u'%0' + str(autonumber_size) + u'd'
353 template_dict['autonumber'] = autonumber_templ % self._num_downloads
354 if template_dict.get('playlist_index') is not None:
355 template_dict['playlist_index'] = u'%05d' % template_dict['playlist_index']
356
357 sanitize = lambda k, v: sanitize_filename(
358 u'NA' if v is None else compat_str(v),
359 restricted=self.params.get('restrictfilenames'),
360 is_id=(k == u'id'))
361 template_dict = dict((k, sanitize(k, v))
362 for k, v in template_dict.items())
363
364 tmpl = os.path.expanduser(self.params['outtmpl'])
365 filename = tmpl % template_dict
366 return filename
367 except KeyError as err:
368 self.report_error(u'Erroneous output template')
369 return None
370 except ValueError as err:
371 self.report_error(u'Error in output template: ' + str(err) + u' (encoding: ' + repr(preferredencoding()) + ')')
372 return None
373
374 def _match_entry(self, info_dict):
375 """ Returns None iff the file should be downloaded """
376
377 if 'title' in info_dict:
378 # This can happen when we're just evaluating the playlist
379 title = info_dict['title']
380 matchtitle = self.params.get('matchtitle', False)
381 if matchtitle:
382 if not re.search(matchtitle, title, re.IGNORECASE):
383 return u'[download] "' + title + '" title did not match pattern "' + matchtitle + '"'
384 rejecttitle = self.params.get('rejecttitle', False)
385 if rejecttitle:
386 if re.search(rejecttitle, title, re.IGNORECASE):
387 return u'"' + title + '" title matched reject pattern "' + rejecttitle + '"'
388 date = info_dict.get('upload_date', None)
389 if date is not None:
390 dateRange = self.params.get('daterange', DateRange())
391 if date not in dateRange:
392 return u'[download] %s upload date is not in range %s' % (date_from_str(date).isoformat(), dateRange)
393 age_limit = self.params.get('age_limit')
394 if age_limit is not None:
395 if age_limit < info_dict.get('age_limit', 0):
396 return u'Skipping "' + title + '" because it is age restricted'
397 if self.in_download_archive(info_dict):
398 return (u'%s has already been recorded in archive'
399 % info_dict.get('title', info_dict.get('id', u'video')))
400 return None
401
402 @staticmethod
403 def add_extra_info(info_dict, extra_info):
404 '''Set the keys from extra_info in info dict if they are missing'''
405 for key, value in extra_info.items():
406 info_dict.setdefault(key, value)
407
408 def extract_info(self, url, download=True, ie_key=None, extra_info={},
409 process=True):
410 '''
411 Returns a list with a dictionary for each video we find.
412 If 'download', also downloads the videos.
413 extra_info is a dict containing the extra values to add to each result
414 '''
415
416 if ie_key:
417 ies = [self.get_info_extractor(ie_key)]
418 else:
419 ies = self._ies
420
421 for ie in ies:
422 if not ie.suitable(url):
423 continue
424
425 if not ie.working():
426 self.report_warning(u'The program functionality for this site has been marked as broken, '
427 u'and will probably not work.')
428
429 try:
430 ie_result = ie.extract(url)
431 if ie_result is None: # Finished already (backwards compatibility; listformats and friends should be moved here)
432 break
433 if isinstance(ie_result, list):
434 # Backwards compatibility: old IE result format
435 ie_result = {
436 '_type': 'compat_list',
437 'entries': ie_result,
438 }
439 self.add_extra_info(ie_result,
440 {
441 'extractor': ie.IE_NAME,
442 'webpage_url': url,
443 'extractor_key': ie.ie_key(),
444 })
445 if process:
446 return self.process_ie_result(ie_result, download, extra_info)
447 else:
448 return ie_result
449 except ExtractorError as de: # An error we somewhat expected
450 self.report_error(compat_str(de), de.format_traceback())
451 break
452 except Exception as e:
453 if self.params.get('ignoreerrors', False):
454 self.report_error(compat_str(e), tb=compat_str(traceback.format_exc()))
455 break
456 else:
457 raise
458 else:
459 self.report_error(u'no suitable InfoExtractor: %s' % url)
460
461 def process_ie_result(self, ie_result, download=True, extra_info={}):
462 """
463 Take the result of the ie(may be modified) and resolve all unresolved
464 references (URLs, playlist items).
465
466 It will also download the videos if 'download'.
467 Returns the resolved ie_result.
468 """
469
470 result_type = ie_result.get('_type', 'video') # If not given we suppose it's a video, support the default old system
471 if result_type == 'video':
472 self.add_extra_info(ie_result, extra_info)
473 return self.process_video_result(ie_result, download=download)
474 elif result_type == 'url':
475 # We have to add extra_info to the results because it may be
476 # contained in a playlist
477 return self.extract_info(ie_result['url'],
478 download,
479 ie_key=ie_result.get('ie_key'),
480 extra_info=extra_info)
481 elif result_type == 'url_transparent':
482 # Use the information from the embedding page
483 info = self.extract_info(
484 ie_result['url'], ie_key=ie_result.get('ie_key'),
485 extra_info=extra_info, download=False, process=False)
486
487 def make_result(embedded_info):
488 new_result = ie_result.copy()
489 for f in ('_type', 'url', 'ext', 'player_url', 'formats',
490 'entries', 'urlhandle', 'ie_key', 'duration',
491 'subtitles', 'annotations', 'format'):
492 if f in new_result:
493 del new_result[f]
494 if f in embedded_info:
495 new_result[f] = embedded_info[f]
496 return new_result
497 new_result = make_result(info)
498
499 assert new_result.get('_type') != 'url_transparent'
500 if new_result.get('_type') == 'compat_list':
501 new_result['entries'] = [
502 make_result(e) for e in new_result['entries']]
503
504 return self.process_ie_result(
505 new_result, download=download, extra_info=extra_info)
506 elif result_type == 'playlist':
507 # We process each entry in the playlist
508 playlist = ie_result.get('title', None) or ie_result.get('id', None)
509 self.to_screen(u'[download] Downloading playlist: %s' % playlist)
510
511 playlist_results = []
512
513 n_all_entries = len(ie_result['entries'])
514 playliststart = self.params.get('playliststart', 1) - 1
515 playlistend = self.params.get('playlistend', -1)
516
517 if playlistend == -1:
518 entries = ie_result['entries'][playliststart:]
519 else:
520 entries = ie_result['entries'][playliststart:playlistend]
521
522 n_entries = len(entries)
523
524 self.to_screen(u"[%s] playlist '%s': Collected %d video ids (downloading %d of them)" %
525 (ie_result['extractor'], playlist, n_all_entries, n_entries))
526
527 for i, entry in enumerate(entries, 1):
528 self.to_screen(u'[download] Downloading video #%s of %s' % (i, n_entries))
529 extra = {
530 'playlist': playlist,
531 'playlist_index': i + playliststart,
532 'extractor': ie_result['extractor'],
533 'webpage_url': ie_result['webpage_url'],
534 'extractor_key': ie_result['extractor_key'],
535 }
536
537 reason = self._match_entry(entry)
538 if reason is not None:
539 self.to_screen(u'[download] ' + reason)
540 continue
541
542 entry_result = self.process_ie_result(entry,
543 download=download,
544 extra_info=extra)
545 playlist_results.append(entry_result)
546 ie_result['entries'] = playlist_results
547 return ie_result
548 elif result_type == 'compat_list':
549 def _fixup(r):
550 self.add_extra_info(r,
551 {
552 'extractor': ie_result['extractor'],
553 'webpage_url': ie_result['webpage_url'],
554 'extractor_key': ie_result['extractor_key'],
555 })
556 return r
557 ie_result['entries'] = [
558 self.process_ie_result(_fixup(r), download, extra_info)
559 for r in ie_result['entries']
560 ]
561 return ie_result
562 else:
563 raise Exception('Invalid result type: %s' % result_type)
564
565 def select_format(self, format_spec, available_formats):
566 if format_spec == 'best' or format_spec is None:
567 return available_formats[-1]
568 elif format_spec == 'worst':
569 return available_formats[0]
570 else:
571 extensions = [u'mp4', u'flv', u'webm', u'3gp']
572 if format_spec in extensions:
573 filter_f = lambda f: f['ext'] == format_spec
574 else:
575 filter_f = lambda f: f['format_id'] == format_spec
576 matches = list(filter(filter_f, available_formats))
577 if matches:
578 return matches[-1]
579 return None
580
581 def process_video_result(self, info_dict, download=True):
582 assert info_dict.get('_type', 'video') == 'video'
583
584 if 'playlist' not in info_dict:
585 # It isn't part of a playlist
586 info_dict['playlist'] = None
587 info_dict['playlist_index'] = None
588
589 # This extractors handle format selection themselves
590 if info_dict['extractor'] in [u'youtube', u'Youku']:
591 if download:
592 self.process_info(info_dict)
593 return info_dict
594
595 # We now pick which formats have to be downloaded
596 if info_dict.get('formats') is None:
597 # There's only one format available
598 formats = [info_dict]
599 else:
600 formats = info_dict['formats']
601
602 # We check that all the formats have the format and format_id fields
603 for (i, format) in enumerate(formats):
604 if format.get('format_id') is None:
605 format['format_id'] = compat_str(i)
606 if format.get('format') is None:
607 format['format'] = u'{id} - {res}{note}'.format(
608 id=format['format_id'],
609 res=self.format_resolution(format),
610 note=u' ({0})'.format(format['format_note']) if format.get('format_note') is not None else '',
611 )
612 # Automatically determine file extension if missing
613 if 'ext' not in format:
614 format['ext'] = determine_ext(format['url'])
615
616 if self.params.get('listformats', None):
617 self.list_formats(info_dict)
618 return
619
620 format_limit = self.params.get('format_limit', None)
621 if format_limit:
622 formats = list(takewhile_inclusive(
623 lambda f: f['format_id'] != format_limit, formats
624 ))
625 if self.params.get('prefer_free_formats'):
626 def _free_formats_key(f):
627 try:
628 ext_ord = [u'flv', u'mp4', u'webm'].index(f['ext'])
629 except ValueError:
630 ext_ord = -1
631 # We only compare the extension if they have the same height and width
632 return (f.get('height'), f.get('width'), ext_ord)
633 formats = sorted(formats, key=_free_formats_key)
634
635 req_format = self.params.get('format', 'best')
636 if req_format is None:
637 req_format = 'best'
638 formats_to_download = []
639 # The -1 is for supporting YoutubeIE
640 if req_format in ('-1', 'all'):
641 formats_to_download = formats
642 else:
643 # We can accept formats requestd in the format: 34/5/best, we pick
644 # the first that is available, starting from left
645 req_formats = req_format.split('/')
646 for rf in req_formats:
647 selected_format = self.select_format(rf, formats)
648 if selected_format is not None:
649 formats_to_download = [selected_format]
650 break
651 if not formats_to_download:
652 raise ExtractorError(u'requested format not available',
653 expected=True)
654
655 if download:
656 if len(formats_to_download) > 1:
657 self.to_screen(u'[info] %s: downloading video in %s formats' % (info_dict['id'], len(formats_to_download)))
658 for format in formats_to_download:
659 new_info = dict(info_dict)
660 new_info.update(format)
661 self.process_info(new_info)
662 # We update the info dict with the best quality format (backwards compatibility)
663 info_dict.update(formats_to_download[-1])
664 return info_dict
665
666 def process_info(self, info_dict):
667 """Process a single resolved IE result."""
668
669 assert info_dict.get('_type', 'video') == 'video'
670 #We increment the download the download count here to match the previous behaviour.
671 self.increment_downloads()
672
673 info_dict['fulltitle'] = info_dict['title']
674 if len(info_dict['title']) > 200:
675 info_dict['title'] = info_dict['title'][:197] + u'...'
676
677 # Keep for backwards compatibility
678 info_dict['stitle'] = info_dict['title']
679
680 if not 'format' in info_dict:
681 info_dict['format'] = info_dict['ext']
682
683 reason = self._match_entry(info_dict)
684 if reason is not None:
685 self.to_screen(u'[download] ' + reason)
686 return
687
688 max_downloads = self.params.get('max_downloads')
689 if max_downloads is not None:
690 if self._num_downloads > int(max_downloads):
691 raise MaxDownloadsReached()
692
693 filename = self.prepare_filename(info_dict)
694
695 # Forced printings
696 if self.params.get('forcetitle', False):
697 compat_print(info_dict['fulltitle'])
698 if self.params.get('forceid', False):
699 compat_print(info_dict['id'])
700 if self.params.get('forceurl', False):
701 # For RTMP URLs, also include the playpath
702 compat_print(info_dict['url'] + info_dict.get('play_path', u''))
703 if self.params.get('forcethumbnail', False) and info_dict.get('thumbnail') is not None:
704 compat_print(info_dict['thumbnail'])
705 if self.params.get('forcedescription', False) and info_dict.get('description') is not None:
706 compat_print(info_dict['description'])
707 if self.params.get('forcefilename', False) and filename is not None:
708 compat_print(filename)
709 if self.params.get('forceformat', False):
710 compat_print(info_dict['format'])
711 if self.params.get('forcejson', False):
712 compat_print(json.dumps(info_dict))
713
714 # Do nothing else if in simulate mode
715 if self.params.get('simulate', False):
716 return
717
718 if filename is None:
719 return
720
721 try:
722 dn = os.path.dirname(encodeFilename(filename))
723 if dn != '' and not os.path.exists(dn):
724 os.makedirs(dn)
725 except (OSError, IOError) as err:
726 self.report_error(u'unable to create directory ' + compat_str(err))
727 return
728
729 if self.params.get('writedescription', False):
730 try:
731 descfn = filename + u'.description'
732 self.report_writedescription(descfn)
733 with io.open(encodeFilename(descfn), 'w', encoding='utf-8') as descfile:
734 descfile.write(info_dict['description'])
735 except (KeyError, TypeError):
736 self.report_warning(u'There\'s no description to write.')
737 except (OSError, IOError):
738 self.report_error(u'Cannot write description file ' + descfn)
739 return
740
741 if self.params.get('writeannotations', False):
742 try:
743 annofn = filename + u'.annotations.xml'
744 self.report_writeannotations(annofn)
745 with io.open(encodeFilename(annofn), 'w', encoding='utf-8') as annofile:
746 annofile.write(info_dict['annotations'])
747 except (KeyError, TypeError):
748 self.report_warning(u'There are no annotations to write.')
749 except (OSError, IOError):
750 self.report_error(u'Cannot write annotations file: ' + annofn)
751 return
752
753 subtitles_are_requested = any([self.params.get('writesubtitles', False),
754 self.params.get('writeautomaticsub')])
755
756 if subtitles_are_requested and 'subtitles' in info_dict and info_dict['subtitles']:
757 # subtitles download errors are already managed as troubles in relevant IE
758 # that way it will silently go on when used with unsupporting IE
759 subtitles = info_dict['subtitles']
760 sub_format = self.params.get('subtitlesformat', 'srt')
761 for sub_lang in subtitles.keys():
762 sub = subtitles[sub_lang]
763 if sub is None:
764 continue
765 try:
766 sub_filename = subtitles_filename(filename, sub_lang, sub_format)
767 self.report_writesubtitles(sub_filename)
768 with io.open(encodeFilename(sub_filename), 'w', encoding='utf-8') as subfile:
769 subfile.write(sub)
770 except (OSError, IOError):
771 self.report_error(u'Cannot write subtitles file ' + descfn)
772 return
773
774 if self.params.get('writeinfojson', False):
775 infofn = os.path.splitext(filename)[0] + u'.info.json'
776 self.report_writeinfojson(infofn)
777 try:
778 json_info_dict = dict((k, v) for k, v in info_dict.items() if not k in ['urlhandle'])
779 write_json_file(json_info_dict, encodeFilename(infofn))
780 except (OSError, IOError):
781 self.report_error(u'Cannot write metadata to JSON file ' + infofn)
782 return
783
784 if self.params.get('writethumbnail', False):
785 if info_dict.get('thumbnail') is not None:
786 thumb_format = determine_ext(info_dict['thumbnail'], u'jpg')
787 thumb_filename = filename.rpartition('.')[0] + u'.' + thumb_format
788 self.to_screen(u'[%s] %s: Downloading thumbnail ...' %
789 (info_dict['extractor'], info_dict['id']))
790 try:
791 uf = compat_urllib_request.urlopen(info_dict['thumbnail'])
792 with open(thumb_filename, 'wb') as thumbf:
793 shutil.copyfileobj(uf, thumbf)
794 self.to_screen(u'[%s] %s: Writing thumbnail to: %s' %
795 (info_dict['extractor'], info_dict['id'], thumb_filename))
796 except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
797 self.report_warning(u'Unable to download thumbnail "%s": %s' %
798 (info_dict['thumbnail'], compat_str(err)))
799
800 if not self.params.get('skip_download', False):
801 if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(filename)):
802 success = True
803 else:
804 try:
805 success = self.fd._do_download(filename, info_dict)
806 except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
807 self.report_error(u'unable to download video data: %s' % str(err))
808 return
809 except (OSError, IOError) as err:
810 raise UnavailableVideoError(err)
811 except (ContentTooShortError, ) as err:
812 self.report_error(u'content too short (expected %s bytes and served %s)' % (err.expected, err.downloaded))
813 return
814
815 if success:
816 try:
817 self.post_process(filename, info_dict)
818 except (PostProcessingError) as err:
819 self.report_error(u'postprocessing: %s' % str(err))
820 return
821
822 self.record_download_archive(info_dict)
823
824 def download(self, url_list):
825 """Download a given list of URLs."""
826 if (len(url_list) > 1 and
827 '%' not in self.params['outtmpl']
828 and self.params.get('max_downloads') != 1):
829 raise SameFileError(self.params['outtmpl'])
830
831 for url in url_list:
832 try:
833 #It also downloads the videos
834 self.extract_info(url)
835 except UnavailableVideoError:
836 self.report_error(u'unable to download video')
837 except MaxDownloadsReached:
838 self.to_screen(u'[info] Maximum number of downloaded files reached.')
839 raise
840
841 return self._download_retcode
842
843 def post_process(self, filename, ie_info):
844 """Run all the postprocessors on the given file."""
845 info = dict(ie_info)
846 info['filepath'] = filename
847 keep_video = None
848 for pp in self._pps:
849 try:
850 keep_video_wish, new_info = pp.run(info)
851 if keep_video_wish is not None:
852 if keep_video_wish:
853 keep_video = keep_video_wish
854 elif keep_video is None:
855 # No clear decision yet, let IE decide
856 keep_video = keep_video_wish
857 except PostProcessingError as e:
858 self.report_error(e.msg)
859 if keep_video is False and not self.params.get('keepvideo', False):
860 try:
861 self.to_screen(u'Deleting original file %s (pass -k to keep)' % filename)
862 os.remove(encodeFilename(filename))
863 except (IOError, OSError):
864 self.report_warning(u'Unable to remove downloaded video file')
865
866 def _make_archive_id(self, info_dict):
867 # Future-proof against any change in case
868 # and backwards compatibility with prior versions
869 extractor = info_dict.get('extractor_key')
870 if extractor is None:
871 if 'id' in info_dict:
872 extractor = info_dict.get('ie_key') # key in a playlist
873 if extractor is None:
874 return None # Incomplete video information
875 return extractor.lower() + u' ' + info_dict['id']
876
877 def in_download_archive(self, info_dict):
878 fn = self.params.get('download_archive')
879 if fn is None:
880 return False
881
882 vid_id = self._make_archive_id(info_dict)
883 if vid_id is None:
884 return False # Incomplete video information
885
886 try:
887 with locked_file(fn, 'r', encoding='utf-8') as archive_file:
888 for line in archive_file:
889 if line.strip() == vid_id:
890 return True
891 except IOError as ioe:
892 if ioe.errno != errno.ENOENT:
893 raise
894 return False
895
896 def record_download_archive(self, info_dict):
897 fn = self.params.get('download_archive')
898 if fn is None:
899 return
900 vid_id = self._make_archive_id(info_dict)
901 assert vid_id
902 with locked_file(fn, 'a', encoding='utf-8') as archive_file:
903 archive_file.write(vid_id + u'\n')
904
905 @staticmethod
906 def format_resolution(format, default='unknown'):
907 if format.get('vcodec') == 'none':
908 return 'audio only'
909 if format.get('_resolution') is not None:
910 return format['_resolution']
911 if format.get('height') is not None:
912 if format.get('width') is not None:
913 res = u'%sx%s' % (format['width'], format['height'])
914 else:
915 res = u'%sp' % format['height']
916 else:
917 res = default
918 return res
919
920 def list_formats(self, info_dict):
921 def format_note(fdict):
922 res = u''
923 if fdict.get('format_note') is not None:
924 res += fdict['format_note'] + u' '
925 if (fdict.get('vcodec') is not None and
926 fdict.get('vcodec') != 'none'):
927 res += u'%-5s' % fdict['vcodec']
928 elif fdict.get('vbr') is not None:
929 res += u'video'
930 if fdict.get('vbr') is not None:
931 res += u'@%4dk' % fdict['vbr']
932 if fdict.get('acodec') is not None:
933 if res:
934 res += u', '
935 res += u'%-5s' % fdict['acodec']
936 elif fdict.get('abr') is not None:
937 if res:
938 res += u', '
939 res += 'audio'
940 if fdict.get('abr') is not None:
941 res += u'@%3dk' % fdict['abr']
942 if fdict.get('filesize') is not None:
943 if res:
944 res += u', '
945 res += format_bytes(fdict['filesize'])
946 return res
947
948 def line(format, idlen=20):
949 return ((u'%-' + compat_str(idlen + 1) + u's%-10s%-12s%s') % (
950 format['format_id'],
951 format['ext'],
952 self.format_resolution(format),
953 format_note(format),
954 ))
955
956 formats = info_dict.get('formats', [info_dict])
957 idlen = max(len(u'format code'),
958 max(len(f['format_id']) for f in formats))
959 formats_s = [line(f, idlen) for f in formats]
960 if len(formats) > 1:
961 formats_s[0] += (' ' if format_note(formats[0]) else '') + '(worst)'
962 formats_s[-1] += (' ' if format_note(formats[-1]) else '') + '(best)'
963
964 header_line = line({
965 'format_id': u'format code', 'ext': u'extension',
966 '_resolution': u'resolution', 'format_note': u'note'}, idlen=idlen)
967 self.to_screen(u'[info] Available formats for %s:\n%s\n%s' %
968 (info_dict['id'], header_line, u"\n".join(formats_s)))
969
970 def urlopen(self, req):
971 """ Start an HTTP download """
972 return self._opener.open(req)
973
974 def print_debug_header(self):
975 if not self.params.get('verbose'):
976 return
977 write_string(u'[debug] youtube-dl version ' + __version__ + u'\n')
978 try:
979 sp = subprocess.Popen(
980 ['git', 'rev-parse', '--short', 'HEAD'],
981 stdout=subprocess.PIPE, stderr=subprocess.PIPE,
982 cwd=os.path.dirname(os.path.abspath(__file__)))
983 out, err = sp.communicate()
984 out = out.decode().strip()
985 if re.match('[0-9a-f]+', out):
986 write_string(u'[debug] Git HEAD: ' + out + u'\n')
987 except:
988 try:
989 sys.exc_clear()
990 except:
991 pass
992 write_string(u'[debug] Python version %s - %s' %
993 (platform.python_version(), platform_name()) + u'\n')
994
995 proxy_map = {}
996 for handler in self._opener.handlers:
997 if hasattr(handler, 'proxies'):
998 proxy_map.update(handler.proxies)
999 write_string(u'[debug] Proxy map: ' + compat_str(proxy_map) + u'\n')
1000
1001 def _setup_opener(self):
1002 timeout_val = self.params.get('socket_timeout')
1003 timeout = 600 if timeout_val is None else float(timeout_val)
1004
1005 opts_cookiefile = self.params.get('cookiefile')
1006 opts_proxy = self.params.get('proxy')
1007
1008 if opts_cookiefile is None:
1009 self.cookiejar = compat_cookiejar.CookieJar()
1010 else:
1011 self.cookiejar = compat_cookiejar.MozillaCookieJar(
1012 opts_cookiefile)
1013 if os.access(opts_cookiefile, os.R_OK):
1014 self.cookiejar.load()
1015
1016 cookie_processor = compat_urllib_request.HTTPCookieProcessor(
1017 self.cookiejar)
1018 if opts_proxy is not None:
1019 if opts_proxy == '':
1020 proxies = {}
1021 else:
1022 proxies = {'http': opts_proxy, 'https': opts_proxy}
1023 else:
1024 proxies = compat_urllib_request.getproxies()
1025 # Set HTTPS proxy to HTTP one if given (https://github.com/rg3/youtube-dl/issues/805)
1026 if 'http' in proxies and 'https' not in proxies:
1027 proxies['https'] = proxies['http']
1028 proxy_handler = compat_urllib_request.ProxyHandler(proxies)
1029 https_handler = make_HTTPS_handler(
1030 self.params.get('nocheckcertificate', False))
1031 opener = compat_urllib_request.build_opener(
1032 https_handler, proxy_handler, cookie_processor, YoutubeDLHandler())
1033 # Delete the default user-agent header, which would otherwise apply in
1034 # cases where our custom HTTP handler doesn't come into play
1035 # (See https://github.com/rg3/youtube-dl/issues/1309 for details)
1036 opener.addheaders = []
1037 self._opener = opener
1038
1039 # TODO remove this global modification
1040 compat_urllib_request.install_opener(opener)
1041 socket.setdefaulttimeout(timeout)