]> jfr.im git - yt-dlp.git/blob - youtube_dl/YoutubeDL.py
[ffmpeg] Warn if ffmpeg/avconv version is too old (Fixes #4026)
[yt-dlp.git] / youtube_dl / YoutubeDL.py
1 #!/usr/bin/env python
2 # -*- coding: utf-8 -*-
3
4 from __future__ import absolute_import, unicode_literals
5
6 import collections
7 import datetime
8 import errno
9 import io
10 import json
11 import locale
12 import os
13 import platform
14 import re
15 import shutil
16 import subprocess
17 import socket
18 import sys
19 import time
20 import traceback
21
22 if os.name == 'nt':
23 import ctypes
24
25 from .utils import (
26 compat_cookiejar,
27 compat_expanduser,
28 compat_http_client,
29 compat_str,
30 compat_urllib_error,
31 compat_urllib_request,
32 escape_url,
33 ContentTooShortError,
34 date_from_str,
35 DateRange,
36 DEFAULT_OUTTMPL,
37 determine_ext,
38 DownloadError,
39 encodeFilename,
40 ExtractorError,
41 format_bytes,
42 formatSeconds,
43 get_term_width,
44 locked_file,
45 make_HTTPS_handler,
46 MaxDownloadsReached,
47 PagedList,
48 PostProcessingError,
49 platform_name,
50 preferredencoding,
51 SameFileError,
52 sanitize_filename,
53 subtitles_filename,
54 takewhile_inclusive,
55 UnavailableVideoError,
56 url_basename,
57 write_json_file,
58 write_string,
59 YoutubeDLHandler,
60 prepend_extension,
61 )
62 from .cache import Cache
63 from .extractor import get_info_extractor, gen_extractors
64 from .downloader import get_suitable_downloader
65 from .postprocessor import FFmpegMergerPP, FFmpegPostProcessor
66 from .version import __version__
67
68
69 class YoutubeDL(object):
70 """YoutubeDL class.
71
72 YoutubeDL objects are the ones responsible of downloading the
73 actual video file and writing it to disk if the user has requested
74 it, among some other tasks. In most cases there should be one per
75 program. As, given a video URL, the downloader doesn't know how to
76 extract all the needed information, task that InfoExtractors do, it
77 has to pass the URL to one of them.
78
79 For this, YoutubeDL objects have a method that allows
80 InfoExtractors to be registered in a given order. When it is passed
81 a URL, the YoutubeDL object handles it to the first InfoExtractor it
82 finds that reports being able to handle it. The InfoExtractor extracts
83 all the information about the video or videos the URL refers to, and
84 YoutubeDL process the extracted information, possibly using a File
85 Downloader to download the video.
86
87 YoutubeDL objects accept a lot of parameters. In order not to saturate
88 the object constructor with arguments, it receives a dictionary of
89 options instead. These options are available through the params
90 attribute for the InfoExtractors to use. The YoutubeDL also
91 registers itself as the downloader in charge for the InfoExtractors
92 that are added to it, so this is a "mutual registration".
93
94 Available options:
95
96 username: Username for authentication purposes.
97 password: Password for authentication purposes.
98 videopassword: Password for acces a video.
99 usenetrc: Use netrc for authentication instead.
100 verbose: Print additional info to stdout.
101 quiet: Do not print messages to stdout.
102 no_warnings: Do not print out anything for warnings.
103 forceurl: Force printing final URL.
104 forcetitle: Force printing title.
105 forceid: Force printing ID.
106 forcethumbnail: Force printing thumbnail URL.
107 forcedescription: Force printing description.
108 forcefilename: Force printing final filename.
109 forceduration: Force printing duration.
110 forcejson: Force printing info_dict as JSON.
111 dump_single_json: Force printing the info_dict of the whole playlist
112 (or video) as a single JSON line.
113 simulate: Do not download the video files.
114 format: Video format code.
115 format_limit: Highest quality format to try.
116 outtmpl: Template for output names.
117 restrictfilenames: Do not allow "&" and spaces in file names
118 ignoreerrors: Do not stop on download errors.
119 nooverwrites: Prevent overwriting files.
120 playliststart: Playlist item to start at.
121 playlistend: Playlist item to end at.
122 matchtitle: Download only matching titles.
123 rejecttitle: Reject downloads for matching titles.
124 logger: Log messages to a logging.Logger instance.
125 logtostderr: Log messages to stderr instead of stdout.
126 writedescription: Write the video description to a .description file
127 writeinfojson: Write the video description to a .info.json file
128 writeannotations: Write the video annotations to a .annotations.xml file
129 writethumbnail: Write the thumbnail image to a file
130 writesubtitles: Write the video subtitles to a file
131 writeautomaticsub: Write the automatic subtitles to a file
132 allsubtitles: Downloads all the subtitles of the video
133 (requires writesubtitles or writeautomaticsub)
134 listsubtitles: Lists all available subtitles for the video
135 subtitlesformat: Subtitle format [srt/sbv/vtt] (default=srt)
136 subtitleslangs: List of languages of the subtitles to download
137 keepvideo: Keep the video file after post-processing
138 daterange: A DateRange object, download only if the upload_date is in the range.
139 skip_download: Skip the actual download of the video file
140 cachedir: Location of the cache files in the filesystem.
141 False to disable filesystem cache.
142 noplaylist: Download single video instead of a playlist if in doubt.
143 age_limit: An integer representing the user's age in years.
144 Unsuitable videos for the given age are skipped.
145 min_views: An integer representing the minimum view count the video
146 must have in order to not be skipped.
147 Videos without view count information are always
148 downloaded. None for no limit.
149 max_views: An integer representing the maximum view count.
150 Videos that are more popular than that are not
151 downloaded.
152 Videos without view count information are always
153 downloaded. None for no limit.
154 download_archive: File name of a file where all downloads are recorded.
155 Videos already present in the file are not downloaded
156 again.
157 cookiefile: File name where cookies should be read from and dumped to.
158 nocheckcertificate:Do not verify SSL certificates
159 prefer_insecure: Use HTTP instead of HTTPS to retrieve information.
160 At the moment, this is only supported by YouTube.
161 proxy: URL of the proxy server to use
162 socket_timeout: Time to wait for unresponsive hosts, in seconds
163 bidi_workaround: Work around buggy terminals without bidirectional text
164 support, using fridibi
165 debug_printtraffic:Print out sent and received HTTP traffic
166 include_ads: Download ads as well
167 default_search: Prepend this string if an input url is not valid.
168 'auto' for elaborate guessing
169 encoding: Use this encoding instead of the system-specified.
170 extract_flat: Do not resolve URLs, return the immediate result.
171 Pass in 'in_playlist' to only show this behavior for
172 playlist items.
173
174 The following parameters are not used by YoutubeDL itself, they are used by
175 the FileDownloader:
176 nopart, updatetime, buffersize, ratelimit, min_filesize, max_filesize, test,
177 noresizebuffer, retries, continuedl, noprogress, consoletitle
178
179 The following options are used by the post processors:
180 prefer_ffmpeg: If True, use ffmpeg instead of avconv if both are available,
181 otherwise prefer avconv.
182 exec_cmd: Arbitrary command to run after downloading
183 """
184
185 params = None
186 _ies = []
187 _pps = []
188 _download_retcode = None
189 _num_downloads = None
190 _screen_file = None
191
192 def __init__(self, params=None):
193 """Create a FileDownloader object with the given options."""
194 if params is None:
195 params = {}
196 self._ies = []
197 self._ies_instances = {}
198 self._pps = []
199 self._progress_hooks = []
200 self._download_retcode = 0
201 self._num_downloads = 0
202 self._screen_file = [sys.stdout, sys.stderr][params.get('logtostderr', False)]
203 self._err_file = sys.stderr
204 self.params = params
205 self.cache = Cache(self)
206
207 if params.get('bidi_workaround', False):
208 try:
209 import pty
210 master, slave = pty.openpty()
211 width = get_term_width()
212 if width is None:
213 width_args = []
214 else:
215 width_args = ['-w', str(width)]
216 sp_kwargs = dict(
217 stdin=subprocess.PIPE,
218 stdout=slave,
219 stderr=self._err_file)
220 try:
221 self._output_process = subprocess.Popen(
222 ['bidiv'] + width_args, **sp_kwargs
223 )
224 except OSError:
225 self._output_process = subprocess.Popen(
226 ['fribidi', '-c', 'UTF-8'] + width_args, **sp_kwargs)
227 self._output_channel = os.fdopen(master, 'rb')
228 except OSError as ose:
229 if ose.errno == 2:
230 self.report_warning('Could not find fribidi executable, ignoring --bidi-workaround . Make sure that fribidi is an executable file in one of the directories in your $PATH.')
231 else:
232 raise
233
234 if (sys.version_info >= (3,) and sys.platform != 'win32' and
235 sys.getfilesystemencoding() in ['ascii', 'ANSI_X3.4-1968']
236 and not params.get('restrictfilenames', False)):
237 # On Python 3, the Unicode filesystem API will throw errors (#1474)
238 self.report_warning(
239 'Assuming --restrict-filenames since file system encoding '
240 'cannot encode all characters. '
241 'Set the LC_ALL environment variable to fix this.')
242 self.params['restrictfilenames'] = True
243
244 if '%(stitle)s' in self.params.get('outtmpl', ''):
245 self.report_warning('%(stitle)s is deprecated. Use the %(title)s and the --restrict-filenames flag(which also secures %(uploader)s et al) instead.')
246
247 self._setup_opener()
248
249 def add_info_extractor(self, ie):
250 """Add an InfoExtractor object to the end of the list."""
251 self._ies.append(ie)
252 self._ies_instances[ie.ie_key()] = ie
253 ie.set_downloader(self)
254
255 def get_info_extractor(self, ie_key):
256 """
257 Get an instance of an IE with name ie_key, it will try to get one from
258 the _ies list, if there's no instance it will create a new one and add
259 it to the extractor list.
260 """
261 ie = self._ies_instances.get(ie_key)
262 if ie is None:
263 ie = get_info_extractor(ie_key)()
264 self.add_info_extractor(ie)
265 return ie
266
267 def add_default_info_extractors(self):
268 """
269 Add the InfoExtractors returned by gen_extractors to the end of the list
270 """
271 for ie in gen_extractors():
272 self.add_info_extractor(ie)
273
274 def add_post_processor(self, pp):
275 """Add a PostProcessor object to the end of the chain."""
276 self._pps.append(pp)
277 pp.set_downloader(self)
278
279 def add_progress_hook(self, ph):
280 """Add the progress hook (currently only for the file downloader)"""
281 self._progress_hooks.append(ph)
282
283 def _bidi_workaround(self, message):
284 if not hasattr(self, '_output_channel'):
285 return message
286
287 assert hasattr(self, '_output_process')
288 assert isinstance(message, compat_str)
289 line_count = message.count('\n') + 1
290 self._output_process.stdin.write((message + '\n').encode('utf-8'))
291 self._output_process.stdin.flush()
292 res = ''.join(self._output_channel.readline().decode('utf-8')
293 for _ in range(line_count))
294 return res[:-len('\n')]
295
296 def to_screen(self, message, skip_eol=False):
297 """Print message to stdout if not in quiet mode."""
298 return self.to_stdout(message, skip_eol, check_quiet=True)
299
300 def _write_string(self, s, out=None):
301 write_string(s, out=out, encoding=self.params.get('encoding'))
302
303 def to_stdout(self, message, skip_eol=False, check_quiet=False):
304 """Print message to stdout if not in quiet mode."""
305 if self.params.get('logger'):
306 self.params['logger'].debug(message)
307 elif not check_quiet or not self.params.get('quiet', False):
308 message = self._bidi_workaround(message)
309 terminator = ['\n', ''][skip_eol]
310 output = message + terminator
311
312 self._write_string(output, self._screen_file)
313
314 def to_stderr(self, message):
315 """Print message to stderr."""
316 assert isinstance(message, compat_str)
317 if self.params.get('logger'):
318 self.params['logger'].error(message)
319 else:
320 message = self._bidi_workaround(message)
321 output = message + '\n'
322 self._write_string(output, self._err_file)
323
324 def to_console_title(self, message):
325 if not self.params.get('consoletitle', False):
326 return
327 if os.name == 'nt' and ctypes.windll.kernel32.GetConsoleWindow():
328 # c_wchar_p() might not be necessary if `message` is
329 # already of type unicode()
330 ctypes.windll.kernel32.SetConsoleTitleW(ctypes.c_wchar_p(message))
331 elif 'TERM' in os.environ:
332 self._write_string('\033]0;%s\007' % message, self._screen_file)
333
334 def save_console_title(self):
335 if not self.params.get('consoletitle', False):
336 return
337 if 'TERM' in os.environ:
338 # Save the title on stack
339 self._write_string('\033[22;0t', self._screen_file)
340
341 def restore_console_title(self):
342 if not self.params.get('consoletitle', False):
343 return
344 if 'TERM' in os.environ:
345 # Restore the title from stack
346 self._write_string('\033[23;0t', self._screen_file)
347
348 def __enter__(self):
349 self.save_console_title()
350 return self
351
352 def __exit__(self, *args):
353 self.restore_console_title()
354
355 if self.params.get('cookiefile') is not None:
356 self.cookiejar.save()
357
358 def trouble(self, message=None, tb=None):
359 """Determine action to take when a download problem appears.
360
361 Depending on if the downloader has been configured to ignore
362 download errors or not, this method may throw an exception or
363 not when errors are found, after printing the message.
364
365 tb, if given, is additional traceback information.
366 """
367 if message is not None:
368 self.to_stderr(message)
369 if self.params.get('verbose'):
370 if tb is None:
371 if sys.exc_info()[0]: # if .trouble has been called from an except block
372 tb = ''
373 if hasattr(sys.exc_info()[1], 'exc_info') and sys.exc_info()[1].exc_info[0]:
374 tb += ''.join(traceback.format_exception(*sys.exc_info()[1].exc_info))
375 tb += compat_str(traceback.format_exc())
376 else:
377 tb_data = traceback.format_list(traceback.extract_stack())
378 tb = ''.join(tb_data)
379 self.to_stderr(tb)
380 if not self.params.get('ignoreerrors', False):
381 if sys.exc_info()[0] and hasattr(sys.exc_info()[1], 'exc_info') and sys.exc_info()[1].exc_info[0]:
382 exc_info = sys.exc_info()[1].exc_info
383 else:
384 exc_info = sys.exc_info()
385 raise DownloadError(message, exc_info)
386 self._download_retcode = 1
387
388 def report_warning(self, message):
389 '''
390 Print the message to stderr, it will be prefixed with 'WARNING:'
391 If stderr is a tty file the 'WARNING:' will be colored
392 '''
393 if self.params.get('logger') is not None:
394 self.params['logger'].warning(message)
395 else:
396 if self.params.get('no_warnings'):
397 return
398 if self._err_file.isatty() and os.name != 'nt':
399 _msg_header = '\033[0;33mWARNING:\033[0m'
400 else:
401 _msg_header = 'WARNING:'
402 warning_message = '%s %s' % (_msg_header, message)
403 self.to_stderr(warning_message)
404
405 def report_error(self, message, tb=None):
406 '''
407 Do the same as trouble, but prefixes the message with 'ERROR:', colored
408 in red if stderr is a tty file.
409 '''
410 if self._err_file.isatty() and os.name != 'nt':
411 _msg_header = '\033[0;31mERROR:\033[0m'
412 else:
413 _msg_header = 'ERROR:'
414 error_message = '%s %s' % (_msg_header, message)
415 self.trouble(error_message, tb)
416
417 def report_file_already_downloaded(self, file_name):
418 """Report file has already been fully downloaded."""
419 try:
420 self.to_screen('[download] %s has already been downloaded' % file_name)
421 except UnicodeEncodeError:
422 self.to_screen('[download] The file has already been downloaded')
423
424 def prepare_filename(self, info_dict):
425 """Generate the output filename."""
426 try:
427 template_dict = dict(info_dict)
428
429 template_dict['epoch'] = int(time.time())
430 autonumber_size = self.params.get('autonumber_size')
431 if autonumber_size is None:
432 autonumber_size = 5
433 autonumber_templ = '%0' + str(autonumber_size) + 'd'
434 template_dict['autonumber'] = autonumber_templ % self._num_downloads
435 if template_dict.get('playlist_index') is not None:
436 template_dict['playlist_index'] = '%0*d' % (len(str(template_dict['n_entries'])), template_dict['playlist_index'])
437 if template_dict.get('resolution') is None:
438 if template_dict.get('width') and template_dict.get('height'):
439 template_dict['resolution'] = '%dx%d' % (template_dict['width'], template_dict['height'])
440 elif template_dict.get('height'):
441 template_dict['resolution'] = '%sp' % template_dict['height']
442 elif template_dict.get('width'):
443 template_dict['resolution'] = '?x%d' % template_dict['width']
444
445 sanitize = lambda k, v: sanitize_filename(
446 compat_str(v),
447 restricted=self.params.get('restrictfilenames'),
448 is_id=(k == 'id'))
449 template_dict = dict((k, sanitize(k, v))
450 for k, v in template_dict.items()
451 if v is not None)
452 template_dict = collections.defaultdict(lambda: 'NA', template_dict)
453
454 outtmpl = self.params.get('outtmpl', DEFAULT_OUTTMPL)
455 tmpl = compat_expanduser(outtmpl)
456 filename = tmpl % template_dict
457 return filename
458 except ValueError as err:
459 self.report_error('Error in output template: ' + str(err) + ' (encoding: ' + repr(preferredencoding()) + ')')
460 return None
461
462 def _match_entry(self, info_dict):
463 """ Returns None iff the file should be downloaded """
464
465 video_title = info_dict.get('title', info_dict.get('id', 'video'))
466 if 'title' in info_dict:
467 # This can happen when we're just evaluating the playlist
468 title = info_dict['title']
469 matchtitle = self.params.get('matchtitle', False)
470 if matchtitle:
471 if not re.search(matchtitle, title, re.IGNORECASE):
472 return '"' + title + '" title did not match pattern "' + matchtitle + '"'
473 rejecttitle = self.params.get('rejecttitle', False)
474 if rejecttitle:
475 if re.search(rejecttitle, title, re.IGNORECASE):
476 return '"' + title + '" title matched reject pattern "' + rejecttitle + '"'
477 date = info_dict.get('upload_date', None)
478 if date is not None:
479 dateRange = self.params.get('daterange', DateRange())
480 if date not in dateRange:
481 return '%s upload date is not in range %s' % (date_from_str(date).isoformat(), dateRange)
482 view_count = info_dict.get('view_count', None)
483 if view_count is not None:
484 min_views = self.params.get('min_views')
485 if min_views is not None and view_count < min_views:
486 return 'Skipping %s, because it has not reached minimum view count (%d/%d)' % (video_title, view_count, min_views)
487 max_views = self.params.get('max_views')
488 if max_views is not None and view_count > max_views:
489 return 'Skipping %s, because it has exceeded the maximum view count (%d/%d)' % (video_title, view_count, max_views)
490 age_limit = self.params.get('age_limit')
491 if age_limit is not None:
492 actual_age_limit = info_dict.get('age_limit')
493 if actual_age_limit is None:
494 actual_age_limit = 0
495 if age_limit < actual_age_limit:
496 return 'Skipping "' + title + '" because it is age restricted'
497 if self.in_download_archive(info_dict):
498 return '%s has already been recorded in archive' % video_title
499 return None
500
501 @staticmethod
502 def add_extra_info(info_dict, extra_info):
503 '''Set the keys from extra_info in info dict if they are missing'''
504 for key, value in extra_info.items():
505 info_dict.setdefault(key, value)
506
507 def extract_info(self, url, download=True, ie_key=None, extra_info={},
508 process=True):
509 '''
510 Returns a list with a dictionary for each video we find.
511 If 'download', also downloads the videos.
512 extra_info is a dict containing the extra values to add to each result
513 '''
514
515 if ie_key:
516 ies = [self.get_info_extractor(ie_key)]
517 else:
518 ies = self._ies
519
520 for ie in ies:
521 if not ie.suitable(url):
522 continue
523
524 if not ie.working():
525 self.report_warning('The program functionality for this site has been marked as broken, '
526 'and will probably not work.')
527
528 try:
529 ie_result = ie.extract(url)
530 if ie_result is None: # Finished already (backwards compatibility; listformats and friends should be moved here)
531 break
532 if isinstance(ie_result, list):
533 # Backwards compatibility: old IE result format
534 ie_result = {
535 '_type': 'compat_list',
536 'entries': ie_result,
537 }
538 self.add_default_extra_info(ie_result, ie, url)
539 if process:
540 return self.process_ie_result(ie_result, download, extra_info)
541 else:
542 return ie_result
543 except ExtractorError as de: # An error we somewhat expected
544 self.report_error(compat_str(de), de.format_traceback())
545 break
546 except MaxDownloadsReached:
547 raise
548 except Exception as e:
549 if self.params.get('ignoreerrors', False):
550 self.report_error(compat_str(e), tb=compat_str(traceback.format_exc()))
551 break
552 else:
553 raise
554 else:
555 self.report_error('no suitable InfoExtractor for URL %s' % url)
556
557 def add_default_extra_info(self, ie_result, ie, url):
558 self.add_extra_info(ie_result, {
559 'extractor': ie.IE_NAME,
560 'webpage_url': url,
561 'webpage_url_basename': url_basename(url),
562 'extractor_key': ie.ie_key(),
563 })
564
565 def process_ie_result(self, ie_result, download=True, extra_info={}):
566 """
567 Take the result of the ie(may be modified) and resolve all unresolved
568 references (URLs, playlist items).
569
570 It will also download the videos if 'download'.
571 Returns the resolved ie_result.
572 """
573
574 result_type = ie_result.get('_type', 'video')
575
576 if result_type in ('url', 'url_transparent'):
577 extract_flat = self.params.get('extract_flat', False)
578 if ((extract_flat == 'in_playlist' and 'playlist' in extra_info) or
579 extract_flat is True):
580 if self.params.get('forcejson', False):
581 self.to_stdout(json.dumps(ie_result))
582 return ie_result
583
584 if result_type == 'video':
585 self.add_extra_info(ie_result, extra_info)
586 return self.process_video_result(ie_result, download=download)
587 elif result_type == 'url':
588 # We have to add extra_info to the results because it may be
589 # contained in a playlist
590 return self.extract_info(ie_result['url'],
591 download,
592 ie_key=ie_result.get('ie_key'),
593 extra_info=extra_info)
594 elif result_type == 'url_transparent':
595 # Use the information from the embedding page
596 info = self.extract_info(
597 ie_result['url'], ie_key=ie_result.get('ie_key'),
598 extra_info=extra_info, download=False, process=False)
599
600 def make_result(embedded_info):
601 new_result = ie_result.copy()
602 for f in ('_type', 'url', 'ext', 'player_url', 'formats',
603 'entries', 'ie_key', 'duration',
604 'subtitles', 'annotations', 'format',
605 'thumbnail', 'thumbnails'):
606 if f in new_result:
607 del new_result[f]
608 if f in embedded_info:
609 new_result[f] = embedded_info[f]
610 return new_result
611 new_result = make_result(info)
612
613 assert new_result.get('_type') != 'url_transparent'
614 if new_result.get('_type') == 'compat_list':
615 new_result['entries'] = [
616 make_result(e) for e in new_result['entries']]
617
618 return self.process_ie_result(
619 new_result, download=download, extra_info=extra_info)
620 elif result_type == 'playlist':
621 # We process each entry in the playlist
622 playlist = ie_result.get('title', None) or ie_result.get('id', None)
623 self.to_screen('[download] Downloading playlist: %s' % playlist)
624
625 playlist_results = []
626
627 playliststart = self.params.get('playliststart', 1) - 1
628 playlistend = self.params.get('playlistend', None)
629 # For backwards compatibility, interpret -1 as whole list
630 if playlistend == -1:
631 playlistend = None
632
633 if isinstance(ie_result['entries'], list):
634 n_all_entries = len(ie_result['entries'])
635 entries = ie_result['entries'][playliststart:playlistend]
636 n_entries = len(entries)
637 self.to_screen(
638 "[%s] playlist %s: Collected %d video ids (downloading %d of them)" %
639 (ie_result['extractor'], playlist, n_all_entries, n_entries))
640 else:
641 assert isinstance(ie_result['entries'], PagedList)
642 entries = ie_result['entries'].getslice(
643 playliststart, playlistend)
644 n_entries = len(entries)
645 self.to_screen(
646 "[%s] playlist %s: Downloading %d videos" %
647 (ie_result['extractor'], playlist, n_entries))
648
649 for i, entry in enumerate(entries, 1):
650 self.to_screen('[download] Downloading video #%s of %s' % (i, n_entries))
651 extra = {
652 'n_entries': n_entries,
653 'playlist': playlist,
654 'playlist_index': i + playliststart,
655 'extractor': ie_result['extractor'],
656 'webpage_url': ie_result['webpage_url'],
657 'webpage_url_basename': url_basename(ie_result['webpage_url']),
658 'extractor_key': ie_result['extractor_key'],
659 }
660
661 reason = self._match_entry(entry)
662 if reason is not None:
663 self.to_screen('[download] ' + reason)
664 continue
665
666 entry_result = self.process_ie_result(entry,
667 download=download,
668 extra_info=extra)
669 playlist_results.append(entry_result)
670 ie_result['entries'] = playlist_results
671 return ie_result
672 elif result_type == 'compat_list':
673 def _fixup(r):
674 self.add_extra_info(r,
675 {
676 'extractor': ie_result['extractor'],
677 'webpage_url': ie_result['webpage_url'],
678 'webpage_url_basename': url_basename(ie_result['webpage_url']),
679 'extractor_key': ie_result['extractor_key'],
680 })
681 return r
682 ie_result['entries'] = [
683 self.process_ie_result(_fixup(r), download, extra_info)
684 for r in ie_result['entries']
685 ]
686 return ie_result
687 else:
688 raise Exception('Invalid result type: %s' % result_type)
689
690 def select_format(self, format_spec, available_formats):
691 if format_spec == 'best' or format_spec is None:
692 return available_formats[-1]
693 elif format_spec == 'worst':
694 return available_formats[0]
695 elif format_spec == 'bestaudio':
696 audio_formats = [
697 f for f in available_formats
698 if f.get('vcodec') == 'none']
699 if audio_formats:
700 return audio_formats[-1]
701 elif format_spec == 'worstaudio':
702 audio_formats = [
703 f for f in available_formats
704 if f.get('vcodec') == 'none']
705 if audio_formats:
706 return audio_formats[0]
707 elif format_spec == 'bestvideo':
708 video_formats = [
709 f for f in available_formats
710 if f.get('acodec') == 'none']
711 if video_formats:
712 return video_formats[-1]
713 elif format_spec == 'worstvideo':
714 video_formats = [
715 f for f in available_formats
716 if f.get('acodec') == 'none']
717 if video_formats:
718 return video_formats[0]
719 else:
720 extensions = ['mp4', 'flv', 'webm', '3gp', 'm4a']
721 if format_spec in extensions:
722 filter_f = lambda f: f['ext'] == format_spec
723 else:
724 filter_f = lambda f: f['format_id'] == format_spec
725 matches = list(filter(filter_f, available_formats))
726 if matches:
727 return matches[-1]
728 return None
729
730 def process_video_result(self, info_dict, download=True):
731 assert info_dict.get('_type', 'video') == 'video'
732
733 if 'id' not in info_dict:
734 raise ExtractorError('Missing "id" field in extractor result')
735 if 'title' not in info_dict:
736 raise ExtractorError('Missing "title" field in extractor result')
737
738 if 'playlist' not in info_dict:
739 # It isn't part of a playlist
740 info_dict['playlist'] = None
741 info_dict['playlist_index'] = None
742
743 thumbnails = info_dict.get('thumbnails')
744 if thumbnails:
745 thumbnails.sort(key=lambda t: (
746 t.get('width'), t.get('height'), t.get('url')))
747 for t in thumbnails:
748 if 'width' in t and 'height' in t:
749 t['resolution'] = '%dx%d' % (t['width'], t['height'])
750
751 if thumbnails and 'thumbnail' not in info_dict:
752 info_dict['thumbnail'] = thumbnails[-1]['url']
753
754 if 'display_id' not in info_dict and 'id' in info_dict:
755 info_dict['display_id'] = info_dict['id']
756
757 if info_dict.get('upload_date') is None and info_dict.get('timestamp') is not None:
758 upload_date = datetime.datetime.utcfromtimestamp(
759 info_dict['timestamp'])
760 info_dict['upload_date'] = upload_date.strftime('%Y%m%d')
761
762 # This extractors handle format selection themselves
763 if info_dict['extractor'] in ['Youku']:
764 if download:
765 self.process_info(info_dict)
766 return info_dict
767
768 # We now pick which formats have to be downloaded
769 if info_dict.get('formats') is None:
770 # There's only one format available
771 formats = [info_dict]
772 else:
773 formats = info_dict['formats']
774
775 if not formats:
776 raise ExtractorError('No video formats found!')
777
778 # We check that all the formats have the format and format_id fields
779 for i, format in enumerate(formats):
780 if 'url' not in format:
781 raise ExtractorError('Missing "url" key in result (index %d)' % i)
782
783 if format.get('format_id') is None:
784 format['format_id'] = compat_str(i)
785 if format.get('format') is None:
786 format['format'] = '{id} - {res}{note}'.format(
787 id=format['format_id'],
788 res=self.format_resolution(format),
789 note=' ({0})'.format(format['format_note']) if format.get('format_note') is not None else '',
790 )
791 # Automatically determine file extension if missing
792 if 'ext' not in format:
793 format['ext'] = determine_ext(format['url']).lower()
794
795 format_limit = self.params.get('format_limit', None)
796 if format_limit:
797 formats = list(takewhile_inclusive(
798 lambda f: f['format_id'] != format_limit, formats
799 ))
800
801 # TODO Central sorting goes here
802
803 if formats[0] is not info_dict:
804 # only set the 'formats' fields if the original info_dict list them
805 # otherwise we end up with a circular reference, the first (and unique)
806 # element in the 'formats' field in info_dict is info_dict itself,
807 # wich can't be exported to json
808 info_dict['formats'] = formats
809 if self.params.get('listformats', None):
810 self.list_formats(info_dict)
811 return
812
813 req_format = self.params.get('format')
814 if req_format is None:
815 req_format = 'best'
816 formats_to_download = []
817 # The -1 is for supporting YoutubeIE
818 if req_format in ('-1', 'all'):
819 formats_to_download = formats
820 else:
821 for rfstr in req_format.split(','):
822 # We can accept formats requested in the format: 34/5/best, we pick
823 # the first that is available, starting from left
824 req_formats = rfstr.split('/')
825 for rf in req_formats:
826 if re.match(r'.+?\+.+?', rf) is not None:
827 # Two formats have been requested like '137+139'
828 format_1, format_2 = rf.split('+')
829 formats_info = (self.select_format(format_1, formats),
830 self.select_format(format_2, formats))
831 if all(formats_info):
832 selected_format = {
833 'requested_formats': formats_info,
834 'format': rf,
835 'ext': formats_info[0]['ext'],
836 }
837 else:
838 selected_format = None
839 else:
840 selected_format = self.select_format(rf, formats)
841 if selected_format is not None:
842 formats_to_download.append(selected_format)
843 break
844 if not formats_to_download:
845 raise ExtractorError('requested format not available',
846 expected=True)
847
848 if download:
849 if len(formats_to_download) > 1:
850 self.to_screen('[info] %s: downloading video in %s formats' % (info_dict['id'], len(formats_to_download)))
851 for format in formats_to_download:
852 new_info = dict(info_dict)
853 new_info.update(format)
854 self.process_info(new_info)
855 # We update the info dict with the best quality format (backwards compatibility)
856 info_dict.update(formats_to_download[-1])
857 return info_dict
858
859 def process_info(self, info_dict):
860 """Process a single resolved IE result."""
861
862 assert info_dict.get('_type', 'video') == 'video'
863
864 max_downloads = self.params.get('max_downloads')
865 if max_downloads is not None:
866 if self._num_downloads >= int(max_downloads):
867 raise MaxDownloadsReached()
868
869 info_dict['fulltitle'] = info_dict['title']
870 if len(info_dict['title']) > 200:
871 info_dict['title'] = info_dict['title'][:197] + '...'
872
873 # Keep for backwards compatibility
874 info_dict['stitle'] = info_dict['title']
875
876 if 'format' not in info_dict:
877 info_dict['format'] = info_dict['ext']
878
879 reason = self._match_entry(info_dict)
880 if reason is not None:
881 self.to_screen('[download] ' + reason)
882 return
883
884 self._num_downloads += 1
885
886 filename = self.prepare_filename(info_dict)
887
888 # Forced printings
889 if self.params.get('forcetitle', False):
890 self.to_stdout(info_dict['fulltitle'])
891 if self.params.get('forceid', False):
892 self.to_stdout(info_dict['id'])
893 if self.params.get('forceurl', False):
894 # For RTMP URLs, also include the playpath
895 self.to_stdout(info_dict['url'] + info_dict.get('play_path', ''))
896 if self.params.get('forcethumbnail', False) and info_dict.get('thumbnail') is not None:
897 self.to_stdout(info_dict['thumbnail'])
898 if self.params.get('forcedescription', False) and info_dict.get('description') is not None:
899 self.to_stdout(info_dict['description'])
900 if self.params.get('forcefilename', False) and filename is not None:
901 self.to_stdout(filename)
902 if self.params.get('forceduration', False) and info_dict.get('duration') is not None:
903 self.to_stdout(formatSeconds(info_dict['duration']))
904 if self.params.get('forceformat', False):
905 self.to_stdout(info_dict['format'])
906 if self.params.get('forcejson', False):
907 info_dict['_filename'] = filename
908 self.to_stdout(json.dumps(info_dict))
909 if self.params.get('dump_single_json', False):
910 info_dict['_filename'] = filename
911
912 # Do nothing else if in simulate mode
913 if self.params.get('simulate', False):
914 return
915
916 if filename is None:
917 return
918
919 try:
920 dn = os.path.dirname(encodeFilename(filename))
921 if dn and not os.path.exists(dn):
922 os.makedirs(dn)
923 except (OSError, IOError) as err:
924 self.report_error('unable to create directory ' + compat_str(err))
925 return
926
927 if self.params.get('writedescription', False):
928 descfn = filename + '.description'
929 if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(descfn)):
930 self.to_screen('[info] Video description is already present')
931 else:
932 try:
933 self.to_screen('[info] Writing video description to: ' + descfn)
934 with io.open(encodeFilename(descfn), 'w', encoding='utf-8') as descfile:
935 descfile.write(info_dict['description'])
936 except (KeyError, TypeError):
937 self.report_warning('There\'s no description to write.')
938 except (OSError, IOError):
939 self.report_error('Cannot write description file ' + descfn)
940 return
941
942 if self.params.get('writeannotations', False):
943 annofn = filename + '.annotations.xml'
944 if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(annofn)):
945 self.to_screen('[info] Video annotations are already present')
946 else:
947 try:
948 self.to_screen('[info] Writing video annotations to: ' + annofn)
949 with io.open(encodeFilename(annofn), 'w', encoding='utf-8') as annofile:
950 annofile.write(info_dict['annotations'])
951 except (KeyError, TypeError):
952 self.report_warning('There are no annotations to write.')
953 except (OSError, IOError):
954 self.report_error('Cannot write annotations file: ' + annofn)
955 return
956
957 subtitles_are_requested = any([self.params.get('writesubtitles', False),
958 self.params.get('writeautomaticsub')])
959
960 if subtitles_are_requested and 'subtitles' in info_dict and info_dict['subtitles']:
961 # subtitles download errors are already managed as troubles in relevant IE
962 # that way it will silently go on when used with unsupporting IE
963 subtitles = info_dict['subtitles']
964 sub_format = self.params.get('subtitlesformat', 'srt')
965 for sub_lang in subtitles.keys():
966 sub = subtitles[sub_lang]
967 if sub is None:
968 continue
969 try:
970 sub_filename = subtitles_filename(filename, sub_lang, sub_format)
971 if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(sub_filename)):
972 self.to_screen('[info] Video subtitle %s.%s is already_present' % (sub_lang, sub_format))
973 else:
974 self.to_screen('[info] Writing video subtitles to: ' + sub_filename)
975 with io.open(encodeFilename(sub_filename), 'w', encoding='utf-8') as subfile:
976 subfile.write(sub)
977 except (OSError, IOError):
978 self.report_error('Cannot write subtitles file ' + sub_filename)
979 return
980
981 if self.params.get('writeinfojson', False):
982 infofn = os.path.splitext(filename)[0] + '.info.json'
983 if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(infofn)):
984 self.to_screen('[info] Video description metadata is already present')
985 else:
986 self.to_screen('[info] Writing video description metadata as JSON to: ' + infofn)
987 try:
988 write_json_file(info_dict, encodeFilename(infofn))
989 except (OSError, IOError):
990 self.report_error('Cannot write metadata to JSON file ' + infofn)
991 return
992
993 if self.params.get('writethumbnail', False):
994 if info_dict.get('thumbnail') is not None:
995 thumb_format = determine_ext(info_dict['thumbnail'], 'jpg')
996 thumb_filename = os.path.splitext(filename)[0] + '.' + thumb_format
997 if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(thumb_filename)):
998 self.to_screen('[%s] %s: Thumbnail is already present' %
999 (info_dict['extractor'], info_dict['id']))
1000 else:
1001 self.to_screen('[%s] %s: Downloading thumbnail ...' %
1002 (info_dict['extractor'], info_dict['id']))
1003 try:
1004 uf = self.urlopen(info_dict['thumbnail'])
1005 with open(thumb_filename, 'wb') as thumbf:
1006 shutil.copyfileobj(uf, thumbf)
1007 self.to_screen('[%s] %s: Writing thumbnail to: %s' %
1008 (info_dict['extractor'], info_dict['id'], thumb_filename))
1009 except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
1010 self.report_warning('Unable to download thumbnail "%s": %s' %
1011 (info_dict['thumbnail'], compat_str(err)))
1012
1013 if not self.params.get('skip_download', False):
1014 if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(filename)):
1015 success = True
1016 else:
1017 try:
1018 def dl(name, info):
1019 fd = get_suitable_downloader(info)(self, self.params)
1020 for ph in self._progress_hooks:
1021 fd.add_progress_hook(ph)
1022 if self.params.get('verbose'):
1023 self.to_stdout('[debug] Invoking downloader on %r' % info.get('url'))
1024 return fd.download(name, info)
1025 if info_dict.get('requested_formats') is not None:
1026 downloaded = []
1027 success = True
1028 merger = FFmpegMergerPP(self, not self.params.get('keepvideo'))
1029 if not merger._executable:
1030 postprocessors = []
1031 self.report_warning('You have requested multiple '
1032 'formats but ffmpeg or avconv are not installed.'
1033 ' The formats won\'t be merged')
1034 else:
1035 postprocessors = [merger]
1036 for f in info_dict['requested_formats']:
1037 new_info = dict(info_dict)
1038 new_info.update(f)
1039 fname = self.prepare_filename(new_info)
1040 fname = prepend_extension(fname, 'f%s' % f['format_id'])
1041 downloaded.append(fname)
1042 partial_success = dl(fname, new_info)
1043 success = success and partial_success
1044 info_dict['__postprocessors'] = postprocessors
1045 info_dict['__files_to_merge'] = downloaded
1046 else:
1047 # Just a single file
1048 success = dl(filename, info_dict)
1049 except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
1050 self.report_error('unable to download video data: %s' % str(err))
1051 return
1052 except (OSError, IOError) as err:
1053 raise UnavailableVideoError(err)
1054 except (ContentTooShortError, ) as err:
1055 self.report_error('content too short (expected %s bytes and served %s)' % (err.expected, err.downloaded))
1056 return
1057
1058 if success:
1059 try:
1060 self.post_process(filename, info_dict)
1061 except (PostProcessingError) as err:
1062 self.report_error('postprocessing: %s' % str(err))
1063 return
1064
1065 self.record_download_archive(info_dict)
1066
1067 def download(self, url_list):
1068 """Download a given list of URLs."""
1069 outtmpl = self.params.get('outtmpl', DEFAULT_OUTTMPL)
1070 if (len(url_list) > 1 and
1071 '%' not in outtmpl
1072 and self.params.get('max_downloads') != 1):
1073 raise SameFileError(outtmpl)
1074
1075 for url in url_list:
1076 try:
1077 #It also downloads the videos
1078 res = self.extract_info(url)
1079 except UnavailableVideoError:
1080 self.report_error('unable to download video')
1081 except MaxDownloadsReached:
1082 self.to_screen('[info] Maximum number of downloaded files reached.')
1083 raise
1084 else:
1085 if self.params.get('dump_single_json', False):
1086 self.to_stdout(json.dumps(res))
1087
1088 return self._download_retcode
1089
1090 def download_with_info_file(self, info_filename):
1091 with io.open(info_filename, 'r', encoding='utf-8') as f:
1092 info = json.load(f)
1093 try:
1094 self.process_ie_result(info, download=True)
1095 except DownloadError:
1096 webpage_url = info.get('webpage_url')
1097 if webpage_url is not None:
1098 self.report_warning('The info failed to download, trying with "%s"' % webpage_url)
1099 return self.download([webpage_url])
1100 else:
1101 raise
1102 return self._download_retcode
1103
1104 def post_process(self, filename, ie_info):
1105 """Run all the postprocessors on the given file."""
1106 info = dict(ie_info)
1107 info['filepath'] = filename
1108 keep_video = None
1109 pps_chain = []
1110 if ie_info.get('__postprocessors') is not None:
1111 pps_chain.extend(ie_info['__postprocessors'])
1112 pps_chain.extend(self._pps)
1113 for pp in pps_chain:
1114 try:
1115 keep_video_wish, new_info = pp.run(info)
1116 if keep_video_wish is not None:
1117 if keep_video_wish:
1118 keep_video = keep_video_wish
1119 elif keep_video is None:
1120 # No clear decision yet, let IE decide
1121 keep_video = keep_video_wish
1122 except PostProcessingError as e:
1123 self.report_error(e.msg)
1124 if keep_video is False and not self.params.get('keepvideo', False):
1125 try:
1126 self.to_screen('Deleting original file %s (pass -k to keep)' % filename)
1127 os.remove(encodeFilename(filename))
1128 except (IOError, OSError):
1129 self.report_warning('Unable to remove downloaded video file')
1130
1131 def _make_archive_id(self, info_dict):
1132 # Future-proof against any change in case
1133 # and backwards compatibility with prior versions
1134 extractor = info_dict.get('extractor_key')
1135 if extractor is None:
1136 if 'id' in info_dict:
1137 extractor = info_dict.get('ie_key') # key in a playlist
1138 if extractor is None:
1139 return None # Incomplete video information
1140 return extractor.lower() + ' ' + info_dict['id']
1141
1142 def in_download_archive(self, info_dict):
1143 fn = self.params.get('download_archive')
1144 if fn is None:
1145 return False
1146
1147 vid_id = self._make_archive_id(info_dict)
1148 if vid_id is None:
1149 return False # Incomplete video information
1150
1151 try:
1152 with locked_file(fn, 'r', encoding='utf-8') as archive_file:
1153 for line in archive_file:
1154 if line.strip() == vid_id:
1155 return True
1156 except IOError as ioe:
1157 if ioe.errno != errno.ENOENT:
1158 raise
1159 return False
1160
1161 def record_download_archive(self, info_dict):
1162 fn = self.params.get('download_archive')
1163 if fn is None:
1164 return
1165 vid_id = self._make_archive_id(info_dict)
1166 assert vid_id
1167 with locked_file(fn, 'a', encoding='utf-8') as archive_file:
1168 archive_file.write(vid_id + '\n')
1169
1170 @staticmethod
1171 def format_resolution(format, default='unknown'):
1172 if format.get('vcodec') == 'none':
1173 return 'audio only'
1174 if format.get('resolution') is not None:
1175 return format['resolution']
1176 if format.get('height') is not None:
1177 if format.get('width') is not None:
1178 res = '%sx%s' % (format['width'], format['height'])
1179 else:
1180 res = '%sp' % format['height']
1181 elif format.get('width') is not None:
1182 res = '?x%d' % format['width']
1183 else:
1184 res = default
1185 return res
1186
1187 def _format_note(self, fdict):
1188 res = ''
1189 if fdict.get('ext') in ['f4f', 'f4m']:
1190 res += '(unsupported) '
1191 if fdict.get('format_note') is not None:
1192 res += fdict['format_note'] + ' '
1193 if fdict.get('tbr') is not None:
1194 res += '%4dk ' % fdict['tbr']
1195 if fdict.get('container') is not None:
1196 if res:
1197 res += ', '
1198 res += '%s container' % fdict['container']
1199 if (fdict.get('vcodec') is not None and
1200 fdict.get('vcodec') != 'none'):
1201 if res:
1202 res += ', '
1203 res += fdict['vcodec']
1204 if fdict.get('vbr') is not None:
1205 res += '@'
1206 elif fdict.get('vbr') is not None and fdict.get('abr') is not None:
1207 res += 'video@'
1208 if fdict.get('vbr') is not None:
1209 res += '%4dk' % fdict['vbr']
1210 if fdict.get('acodec') is not None:
1211 if res:
1212 res += ', '
1213 if fdict['acodec'] == 'none':
1214 res += 'video only'
1215 else:
1216 res += '%-5s' % fdict['acodec']
1217 elif fdict.get('abr') is not None:
1218 if res:
1219 res += ', '
1220 res += 'audio'
1221 if fdict.get('abr') is not None:
1222 res += '@%3dk' % fdict['abr']
1223 if fdict.get('asr') is not None:
1224 res += ' (%5dHz)' % fdict['asr']
1225 if fdict.get('filesize') is not None:
1226 if res:
1227 res += ', '
1228 res += format_bytes(fdict['filesize'])
1229 elif fdict.get('filesize_approx') is not None:
1230 if res:
1231 res += ', '
1232 res += '~' + format_bytes(fdict['filesize_approx'])
1233 return res
1234
1235 def list_formats(self, info_dict):
1236 def line(format, idlen=20):
1237 return (('%-' + compat_str(idlen + 1) + 's%-10s%-12s%s') % (
1238 format['format_id'],
1239 format['ext'],
1240 self.format_resolution(format),
1241 self._format_note(format),
1242 ))
1243
1244 formats = info_dict.get('formats', [info_dict])
1245 idlen = max(len('format code'),
1246 max(len(f['format_id']) for f in formats))
1247 formats_s = [line(f, idlen) for f in formats]
1248 if len(formats) > 1:
1249 formats_s[0] += (' ' if self._format_note(formats[0]) else '') + '(worst)'
1250 formats_s[-1] += (' ' if self._format_note(formats[-1]) else '') + '(best)'
1251
1252 header_line = line({
1253 'format_id': 'format code', 'ext': 'extension',
1254 'resolution': 'resolution', 'format_note': 'note'}, idlen=idlen)
1255 self.to_screen('[info] Available formats for %s:\n%s\n%s' %
1256 (info_dict['id'], header_line, '\n'.join(formats_s)))
1257
1258 def urlopen(self, req):
1259 """ Start an HTTP download """
1260
1261 # According to RFC 3986, URLs can not contain non-ASCII characters, however this is not
1262 # always respected by websites, some tend to give out URLs with non percent-encoded
1263 # non-ASCII characters (see telemb.py, ard.py [#3412])
1264 # urllib chokes on URLs with non-ASCII characters (see http://bugs.python.org/issue3991)
1265 # To work around aforementioned issue we will replace request's original URL with
1266 # percent-encoded one
1267 req_is_string = isinstance(req, basestring if sys.version_info < (3, 0) else compat_str)
1268 url = req if req_is_string else req.get_full_url()
1269 url_escaped = escape_url(url)
1270
1271 # Substitute URL if any change after escaping
1272 if url != url_escaped:
1273 if req_is_string:
1274 req = url_escaped
1275 else:
1276 req = compat_urllib_request.Request(
1277 url_escaped, data=req.data, headers=req.headers,
1278 origin_req_host=req.origin_req_host, unverifiable=req.unverifiable)
1279
1280 return self._opener.open(req, timeout=self._socket_timeout)
1281
1282 def print_debug_header(self):
1283 if not self.params.get('verbose'):
1284 return
1285
1286 if type('') is not compat_str:
1287 # Python 2.6 on SLES11 SP1 (https://github.com/rg3/youtube-dl/issues/3326)
1288 self.report_warning(
1289 'Your Python is broken! Update to a newer and supported version')
1290
1291 encoding_str = (
1292 '[debug] Encodings: locale %s, fs %s, out %s, pref %s\n' % (
1293 locale.getpreferredencoding(),
1294 sys.getfilesystemencoding(),
1295 sys.stdout.encoding,
1296 self.get_encoding()))
1297 write_string(encoding_str, encoding=None)
1298
1299 self._write_string('[debug] youtube-dl version ' + __version__ + '\n')
1300 try:
1301 sp = subprocess.Popen(
1302 ['git', 'rev-parse', '--short', 'HEAD'],
1303 stdout=subprocess.PIPE, stderr=subprocess.PIPE,
1304 cwd=os.path.dirname(os.path.abspath(__file__)))
1305 out, err = sp.communicate()
1306 out = out.decode().strip()
1307 if re.match('[0-9a-f]+', out):
1308 self._write_string('[debug] Git HEAD: ' + out + '\n')
1309 except:
1310 try:
1311 sys.exc_clear()
1312 except:
1313 pass
1314 self._write_string('[debug] Python version %s - %s\n' % (
1315 platform.python_version(), platform_name()))
1316
1317 exe_versions = FFmpegPostProcessor.get_versions()
1318 exe_str = ', '.join(
1319 '%s %s' % (exe, v)
1320 for exe, v in sorted(exe_versions.items())
1321 if v
1322 )
1323 if not exe_str:
1324 exe_str = 'none'
1325 self._write_string('[debug] exe versions: %s\n' % exe_str)
1326
1327 proxy_map = {}
1328 for handler in self._opener.handlers:
1329 if hasattr(handler, 'proxies'):
1330 proxy_map.update(handler.proxies)
1331 self._write_string('[debug] Proxy map: ' + compat_str(proxy_map) + '\n')
1332
1333 def _setup_opener(self):
1334 timeout_val = self.params.get('socket_timeout')
1335 self._socket_timeout = 600 if timeout_val is None else float(timeout_val)
1336
1337 opts_cookiefile = self.params.get('cookiefile')
1338 opts_proxy = self.params.get('proxy')
1339
1340 if opts_cookiefile is None:
1341 self.cookiejar = compat_cookiejar.CookieJar()
1342 else:
1343 self.cookiejar = compat_cookiejar.MozillaCookieJar(
1344 opts_cookiefile)
1345 if os.access(opts_cookiefile, os.R_OK):
1346 self.cookiejar.load()
1347
1348 cookie_processor = compat_urllib_request.HTTPCookieProcessor(
1349 self.cookiejar)
1350 if opts_proxy is not None:
1351 if opts_proxy == '':
1352 proxies = {}
1353 else:
1354 proxies = {'http': opts_proxy, 'https': opts_proxy}
1355 else:
1356 proxies = compat_urllib_request.getproxies()
1357 # Set HTTPS proxy to HTTP one if given (https://github.com/rg3/youtube-dl/issues/805)
1358 if 'http' in proxies and 'https' not in proxies:
1359 proxies['https'] = proxies['http']
1360 proxy_handler = compat_urllib_request.ProxyHandler(proxies)
1361
1362 debuglevel = 1 if self.params.get('debug_printtraffic') else 0
1363 https_handler = make_HTTPS_handler(
1364 self.params.get('nocheckcertificate', False), debuglevel=debuglevel)
1365 ydlh = YoutubeDLHandler(debuglevel=debuglevel)
1366 opener = compat_urllib_request.build_opener(
1367 https_handler, proxy_handler, cookie_processor, ydlh)
1368 # Delete the default user-agent header, which would otherwise apply in
1369 # cases where our custom HTTP handler doesn't come into play
1370 # (See https://github.com/rg3/youtube-dl/issues/1309 for details)
1371 opener.addheaders = []
1372 self._opener = opener
1373
1374 def encode(self, s):
1375 if isinstance(s, bytes):
1376 return s # Already encoded
1377
1378 try:
1379 return s.encode(self.get_encoding())
1380 except UnicodeEncodeError as err:
1381 err.reason = err.reason + '. Check your system encoding configuration or use the --encoding option.'
1382 raise
1383
1384 def get_encoding(self):
1385 encoding = self.params.get('encoding')
1386 if encoding is None:
1387 encoding = preferredencoding()
1388 return encoding