]> jfr.im git - yt-dlp.git/blob - youtube_dl/YoutubeDL.py
Merge remote-tracking branch '5moufl/behindkink'
[yt-dlp.git] / youtube_dl / YoutubeDL.py
1 #!/usr/bin/env python
2 # -*- coding: utf-8 -*-
3
4 from __future__ import absolute_import, unicode_literals
5
6 import collections
7 import datetime
8 import errno
9 import io
10 import json
11 import locale
12 import os
13 import platform
14 import re
15 import shutil
16 import subprocess
17 import socket
18 import sys
19 import time
20 import traceback
21
22 if os.name == 'nt':
23 import ctypes
24
25 from .utils import (
26 compat_cookiejar,
27 compat_http_client,
28 compat_str,
29 compat_urllib_error,
30 compat_urllib_request,
31 escape_url,
32 ContentTooShortError,
33 date_from_str,
34 DateRange,
35 DEFAULT_OUTTMPL,
36 determine_ext,
37 DownloadError,
38 encodeFilename,
39 ExtractorError,
40 format_bytes,
41 formatSeconds,
42 get_term_width,
43 locked_file,
44 make_HTTPS_handler,
45 MaxDownloadsReached,
46 PagedList,
47 PostProcessingError,
48 platform_name,
49 preferredencoding,
50 SameFileError,
51 sanitize_filename,
52 subtitles_filename,
53 takewhile_inclusive,
54 UnavailableVideoError,
55 url_basename,
56 write_json_file,
57 write_string,
58 YoutubeDLHandler,
59 prepend_extension,
60 )
61 from .cache import Cache
62 from .extractor import get_info_extractor, gen_extractors
63 from .downloader import get_suitable_downloader
64 from .postprocessor import FFmpegMergerPP
65 from .version import __version__
66
67
68 class YoutubeDL(object):
69 """YoutubeDL class.
70
71 YoutubeDL objects are the ones responsible of downloading the
72 actual video file and writing it to disk if the user has requested
73 it, among some other tasks. In most cases there should be one per
74 program. As, given a video URL, the downloader doesn't know how to
75 extract all the needed information, task that InfoExtractors do, it
76 has to pass the URL to one of them.
77
78 For this, YoutubeDL objects have a method that allows
79 InfoExtractors to be registered in a given order. When it is passed
80 a URL, the YoutubeDL object handles it to the first InfoExtractor it
81 finds that reports being able to handle it. The InfoExtractor extracts
82 all the information about the video or videos the URL refers to, and
83 YoutubeDL process the extracted information, possibly using a File
84 Downloader to download the video.
85
86 YoutubeDL objects accept a lot of parameters. In order not to saturate
87 the object constructor with arguments, it receives a dictionary of
88 options instead. These options are available through the params
89 attribute for the InfoExtractors to use. The YoutubeDL also
90 registers itself as the downloader in charge for the InfoExtractors
91 that are added to it, so this is a "mutual registration".
92
93 Available options:
94
95 username: Username for authentication purposes.
96 password: Password for authentication purposes.
97 videopassword: Password for acces a video.
98 usenetrc: Use netrc for authentication instead.
99 verbose: Print additional info to stdout.
100 quiet: Do not print messages to stdout.
101 no_warnings: Do not print out anything for warnings.
102 forceurl: Force printing final URL.
103 forcetitle: Force printing title.
104 forceid: Force printing ID.
105 forcethumbnail: Force printing thumbnail URL.
106 forcedescription: Force printing description.
107 forcefilename: Force printing final filename.
108 forceduration: Force printing duration.
109 forcejson: Force printing info_dict as JSON.
110 simulate: Do not download the video files.
111 format: Video format code.
112 format_limit: Highest quality format to try.
113 outtmpl: Template for output names.
114 restrictfilenames: Do not allow "&" and spaces in file names
115 ignoreerrors: Do not stop on download errors.
116 nooverwrites: Prevent overwriting files.
117 playliststart: Playlist item to start at.
118 playlistend: Playlist item to end at.
119 matchtitle: Download only matching titles.
120 rejecttitle: Reject downloads for matching titles.
121 logger: Log messages to a logging.Logger instance.
122 logtostderr: Log messages to stderr instead of stdout.
123 writedescription: Write the video description to a .description file
124 writeinfojson: Write the video description to a .info.json file
125 writeannotations: Write the video annotations to a .annotations.xml file
126 writethumbnail: Write the thumbnail image to a file
127 writesubtitles: Write the video subtitles to a file
128 writeautomaticsub: Write the automatic subtitles to a file
129 allsubtitles: Downloads all the subtitles of the video
130 (requires writesubtitles or writeautomaticsub)
131 listsubtitles: Lists all available subtitles for the video
132 subtitlesformat: Subtitle format [srt/sbv/vtt] (default=srt)
133 subtitleslangs: List of languages of the subtitles to download
134 keepvideo: Keep the video file after post-processing
135 daterange: A DateRange object, download only if the upload_date is in the range.
136 skip_download: Skip the actual download of the video file
137 cachedir: Location of the cache files in the filesystem.
138 False to disable filesystem cache.
139 noplaylist: Download single video instead of a playlist if in doubt.
140 age_limit: An integer representing the user's age in years.
141 Unsuitable videos for the given age are skipped.
142 min_views: An integer representing the minimum view count the video
143 must have in order to not be skipped.
144 Videos without view count information are always
145 downloaded. None for no limit.
146 max_views: An integer representing the maximum view count.
147 Videos that are more popular than that are not
148 downloaded.
149 Videos without view count information are always
150 downloaded. None for no limit.
151 download_archive: File name of a file where all downloads are recorded.
152 Videos already present in the file are not downloaded
153 again.
154 cookiefile: File name where cookies should be read from and dumped to.
155 nocheckcertificate:Do not verify SSL certificates
156 prefer_insecure: Use HTTP instead of HTTPS to retrieve information.
157 At the moment, this is only supported by YouTube.
158 proxy: URL of the proxy server to use
159 socket_timeout: Time to wait for unresponsive hosts, in seconds
160 bidi_workaround: Work around buggy terminals without bidirectional text
161 support, using fridibi
162 debug_printtraffic:Print out sent and received HTTP traffic
163 include_ads: Download ads as well
164 default_search: Prepend this string if an input url is not valid.
165 'auto' for elaborate guessing
166 encoding: Use this encoding instead of the system-specified.
167 extract_flat: Do not resolve URLs, return the immediate result.
168
169 The following parameters are not used by YoutubeDL itself, they are used by
170 the FileDownloader:
171 nopart, updatetime, buffersize, ratelimit, min_filesize, max_filesize, test,
172 noresizebuffer, retries, continuedl, noprogress, consoletitle
173
174 The following options are used by the post processors:
175 prefer_ffmpeg: If True, use ffmpeg instead of avconv if both are available,
176 otherwise prefer avconv.
177 exec_cmd: Arbitrary command to run after downloading
178 """
179
180 params = None
181 _ies = []
182 _pps = []
183 _download_retcode = None
184 _num_downloads = None
185 _screen_file = None
186
187 def __init__(self, params=None):
188 """Create a FileDownloader object with the given options."""
189 if params is None:
190 params = {}
191 self._ies = []
192 self._ies_instances = {}
193 self._pps = []
194 self._progress_hooks = []
195 self._download_retcode = 0
196 self._num_downloads = 0
197 self._screen_file = [sys.stdout, sys.stderr][params.get('logtostderr', False)]
198 self._err_file = sys.stderr
199 self.params = params
200 self.cache = Cache(self)
201
202 if params.get('bidi_workaround', False):
203 try:
204 import pty
205 master, slave = pty.openpty()
206 width = get_term_width()
207 if width is None:
208 width_args = []
209 else:
210 width_args = ['-w', str(width)]
211 sp_kwargs = dict(
212 stdin=subprocess.PIPE,
213 stdout=slave,
214 stderr=self._err_file)
215 try:
216 self._output_process = subprocess.Popen(
217 ['bidiv'] + width_args, **sp_kwargs
218 )
219 except OSError:
220 self._output_process = subprocess.Popen(
221 ['fribidi', '-c', 'UTF-8'] + width_args, **sp_kwargs)
222 self._output_channel = os.fdopen(master, 'rb')
223 except OSError as ose:
224 if ose.errno == 2:
225 self.report_warning('Could not find fribidi executable, ignoring --bidi-workaround . Make sure that fribidi is an executable file in one of the directories in your $PATH.')
226 else:
227 raise
228
229 if (sys.version_info >= (3,) and sys.platform != 'win32' and
230 sys.getfilesystemencoding() in ['ascii', 'ANSI_X3.4-1968']
231 and not params['restrictfilenames']):
232 # On Python 3, the Unicode filesystem API will throw errors (#1474)
233 self.report_warning(
234 'Assuming --restrict-filenames since file system encoding '
235 'cannot encode all charactes. '
236 'Set the LC_ALL environment variable to fix this.')
237 self.params['restrictfilenames'] = True
238
239 if '%(stitle)s' in self.params.get('outtmpl', ''):
240 self.report_warning('%(stitle)s is deprecated. Use the %(title)s and the --restrict-filenames flag(which also secures %(uploader)s et al) instead.')
241
242 self._setup_opener()
243
244 def add_info_extractor(self, ie):
245 """Add an InfoExtractor object to the end of the list."""
246 self._ies.append(ie)
247 self._ies_instances[ie.ie_key()] = ie
248 ie.set_downloader(self)
249
250 def get_info_extractor(self, ie_key):
251 """
252 Get an instance of an IE with name ie_key, it will try to get one from
253 the _ies list, if there's no instance it will create a new one and add
254 it to the extractor list.
255 """
256 ie = self._ies_instances.get(ie_key)
257 if ie is None:
258 ie = get_info_extractor(ie_key)()
259 self.add_info_extractor(ie)
260 return ie
261
262 def add_default_info_extractors(self):
263 """
264 Add the InfoExtractors returned by gen_extractors to the end of the list
265 """
266 for ie in gen_extractors():
267 self.add_info_extractor(ie)
268
269 def add_post_processor(self, pp):
270 """Add a PostProcessor object to the end of the chain."""
271 self._pps.append(pp)
272 pp.set_downloader(self)
273
274 def add_progress_hook(self, ph):
275 """Add the progress hook (currently only for the file downloader)"""
276 self._progress_hooks.append(ph)
277
278 def _bidi_workaround(self, message):
279 if not hasattr(self, '_output_channel'):
280 return message
281
282 assert hasattr(self, '_output_process')
283 assert isinstance(message, compat_str)
284 line_count = message.count('\n') + 1
285 self._output_process.stdin.write((message + '\n').encode('utf-8'))
286 self._output_process.stdin.flush()
287 res = ''.join(self._output_channel.readline().decode('utf-8')
288 for _ in range(line_count))
289 return res[:-len('\n')]
290
291 def to_screen(self, message, skip_eol=False):
292 """Print message to stdout if not in quiet mode."""
293 return self.to_stdout(message, skip_eol, check_quiet=True)
294
295 def _write_string(self, s, out=None):
296 write_string(s, out=out, encoding=self.params.get('encoding'))
297
298 def to_stdout(self, message, skip_eol=False, check_quiet=False):
299 """Print message to stdout if not in quiet mode."""
300 if self.params.get('logger'):
301 self.params['logger'].debug(message)
302 elif not check_quiet or not self.params.get('quiet', False):
303 message = self._bidi_workaround(message)
304 terminator = ['\n', ''][skip_eol]
305 output = message + terminator
306
307 self._write_string(output, self._screen_file)
308
309 def to_stderr(self, message):
310 """Print message to stderr."""
311 assert isinstance(message, compat_str)
312 if self.params.get('logger'):
313 self.params['logger'].error(message)
314 else:
315 message = self._bidi_workaround(message)
316 output = message + '\n'
317 self._write_string(output, self._err_file)
318
319 def to_console_title(self, message):
320 if not self.params.get('consoletitle', False):
321 return
322 if os.name == 'nt' and ctypes.windll.kernel32.GetConsoleWindow():
323 # c_wchar_p() might not be necessary if `message` is
324 # already of type unicode()
325 ctypes.windll.kernel32.SetConsoleTitleW(ctypes.c_wchar_p(message))
326 elif 'TERM' in os.environ:
327 self._write_string('\033]0;%s\007' % message, self._screen_file)
328
329 def save_console_title(self):
330 if not self.params.get('consoletitle', False):
331 return
332 if 'TERM' in os.environ:
333 # Save the title on stack
334 self._write_string('\033[22;0t', self._screen_file)
335
336 def restore_console_title(self):
337 if not self.params.get('consoletitle', False):
338 return
339 if 'TERM' in os.environ:
340 # Restore the title from stack
341 self._write_string('\033[23;0t', self._screen_file)
342
343 def __enter__(self):
344 self.save_console_title()
345 return self
346
347 def __exit__(self, *args):
348 self.restore_console_title()
349
350 if self.params.get('cookiefile') is not None:
351 self.cookiejar.save()
352
353 def trouble(self, message=None, tb=None):
354 """Determine action to take when a download problem appears.
355
356 Depending on if the downloader has been configured to ignore
357 download errors or not, this method may throw an exception or
358 not when errors are found, after printing the message.
359
360 tb, if given, is additional traceback information.
361 """
362 if message is not None:
363 self.to_stderr(message)
364 if self.params.get('verbose'):
365 if tb is None:
366 if sys.exc_info()[0]: # if .trouble has been called from an except block
367 tb = ''
368 if hasattr(sys.exc_info()[1], 'exc_info') and sys.exc_info()[1].exc_info[0]:
369 tb += ''.join(traceback.format_exception(*sys.exc_info()[1].exc_info))
370 tb += compat_str(traceback.format_exc())
371 else:
372 tb_data = traceback.format_list(traceback.extract_stack())
373 tb = ''.join(tb_data)
374 self.to_stderr(tb)
375 if not self.params.get('ignoreerrors', False):
376 if sys.exc_info()[0] and hasattr(sys.exc_info()[1], 'exc_info') and sys.exc_info()[1].exc_info[0]:
377 exc_info = sys.exc_info()[1].exc_info
378 else:
379 exc_info = sys.exc_info()
380 raise DownloadError(message, exc_info)
381 self._download_retcode = 1
382
383 def report_warning(self, message):
384 '''
385 Print the message to stderr, it will be prefixed with 'WARNING:'
386 If stderr is a tty file the 'WARNING:' will be colored
387 '''
388 if self.params.get('logger') is not None:
389 self.params['logger'].warning(message)
390 else:
391 if self.params.get('no_warnings'):
392 return
393 if self._err_file.isatty() and os.name != 'nt':
394 _msg_header = '\033[0;33mWARNING:\033[0m'
395 else:
396 _msg_header = 'WARNING:'
397 warning_message = '%s %s' % (_msg_header, message)
398 self.to_stderr(warning_message)
399
400 def report_error(self, message, tb=None):
401 '''
402 Do the same as trouble, but prefixes the message with 'ERROR:', colored
403 in red if stderr is a tty file.
404 '''
405 if self._err_file.isatty() and os.name != 'nt':
406 _msg_header = '\033[0;31mERROR:\033[0m'
407 else:
408 _msg_header = 'ERROR:'
409 error_message = '%s %s' % (_msg_header, message)
410 self.trouble(error_message, tb)
411
412 def report_file_already_downloaded(self, file_name):
413 """Report file has already been fully downloaded."""
414 try:
415 self.to_screen('[download] %s has already been downloaded' % file_name)
416 except UnicodeEncodeError:
417 self.to_screen('[download] The file has already been downloaded')
418
419 def prepare_filename(self, info_dict):
420 """Generate the output filename."""
421 try:
422 template_dict = dict(info_dict)
423
424 template_dict['epoch'] = int(time.time())
425 autonumber_size = self.params.get('autonumber_size')
426 if autonumber_size is None:
427 autonumber_size = 5
428 autonumber_templ = '%0' + str(autonumber_size) + 'd'
429 template_dict['autonumber'] = autonumber_templ % self._num_downloads
430 if template_dict.get('playlist_index') is not None:
431 template_dict['playlist_index'] = '%0*d' % (len(str(template_dict['n_entries'])), template_dict['playlist_index'])
432 if template_dict.get('resolution') is None:
433 if template_dict.get('width') and template_dict.get('height'):
434 template_dict['resolution'] = '%dx%d' % (template_dict['width'], template_dict['height'])
435 elif template_dict.get('height'):
436 template_dict['resolution'] = '%sp' % template_dict['height']
437 elif template_dict.get('width'):
438 template_dict['resolution'] = '?x%d' % template_dict['width']
439
440 sanitize = lambda k, v: sanitize_filename(
441 compat_str(v),
442 restricted=self.params.get('restrictfilenames'),
443 is_id=(k == 'id'))
444 template_dict = dict((k, sanitize(k, v))
445 for k, v in template_dict.items()
446 if v is not None)
447 template_dict = collections.defaultdict(lambda: 'NA', template_dict)
448
449 outtmpl = self.params.get('outtmpl', DEFAULT_OUTTMPL)
450 tmpl = os.path.expanduser(outtmpl)
451 filename = tmpl % template_dict
452 return filename
453 except ValueError as err:
454 self.report_error('Error in output template: ' + str(err) + ' (encoding: ' + repr(preferredencoding()) + ')')
455 return None
456
457 def _match_entry(self, info_dict):
458 """ Returns None iff the file should be downloaded """
459
460 video_title = info_dict.get('title', info_dict.get('id', 'video'))
461 if 'title' in info_dict:
462 # This can happen when we're just evaluating the playlist
463 title = info_dict['title']
464 matchtitle = self.params.get('matchtitle', False)
465 if matchtitle:
466 if not re.search(matchtitle, title, re.IGNORECASE):
467 return '"' + title + '" title did not match pattern "' + matchtitle + '"'
468 rejecttitle = self.params.get('rejecttitle', False)
469 if rejecttitle:
470 if re.search(rejecttitle, title, re.IGNORECASE):
471 return '"' + title + '" title matched reject pattern "' + rejecttitle + '"'
472 date = info_dict.get('upload_date', None)
473 if date is not None:
474 dateRange = self.params.get('daterange', DateRange())
475 if date not in dateRange:
476 return '%s upload date is not in range %s' % (date_from_str(date).isoformat(), dateRange)
477 view_count = info_dict.get('view_count', None)
478 if view_count is not None:
479 min_views = self.params.get('min_views')
480 if min_views is not None and view_count < min_views:
481 return 'Skipping %s, because it has not reached minimum view count (%d/%d)' % (video_title, view_count, min_views)
482 max_views = self.params.get('max_views')
483 if max_views is not None and view_count > max_views:
484 return 'Skipping %s, because it has exceeded the maximum view count (%d/%d)' % (video_title, view_count, max_views)
485 age_limit = self.params.get('age_limit')
486 if age_limit is not None:
487 actual_age_limit = info_dict.get('age_limit')
488 if actual_age_limit is None:
489 actual_age_limit = 0
490 if age_limit < actual_age_limit:
491 return 'Skipping "' + title + '" because it is age restricted'
492 if self.in_download_archive(info_dict):
493 return '%s has already been recorded in archive' % video_title
494 return None
495
496 @staticmethod
497 def add_extra_info(info_dict, extra_info):
498 '''Set the keys from extra_info in info dict if they are missing'''
499 for key, value in extra_info.items():
500 info_dict.setdefault(key, value)
501
502 def extract_info(self, url, download=True, ie_key=None, extra_info={},
503 process=True):
504 '''
505 Returns a list with a dictionary for each video we find.
506 If 'download', also downloads the videos.
507 extra_info is a dict containing the extra values to add to each result
508 '''
509
510 if ie_key:
511 ies = [self.get_info_extractor(ie_key)]
512 else:
513 ies = self._ies
514
515 for ie in ies:
516 if not ie.suitable(url):
517 continue
518
519 if not ie.working():
520 self.report_warning('The program functionality for this site has been marked as broken, '
521 'and will probably not work.')
522
523 try:
524 ie_result = ie.extract(url)
525 if ie_result is None: # Finished already (backwards compatibility; listformats and friends should be moved here)
526 break
527 if isinstance(ie_result, list):
528 # Backwards compatibility: old IE result format
529 ie_result = {
530 '_type': 'compat_list',
531 'entries': ie_result,
532 }
533 self.add_default_extra_info(ie_result, ie, url)
534 if process:
535 return self.process_ie_result(ie_result, download, extra_info)
536 else:
537 return ie_result
538 except ExtractorError as de: # An error we somewhat expected
539 self.report_error(compat_str(de), de.format_traceback())
540 break
541 except MaxDownloadsReached:
542 raise
543 except Exception as e:
544 if self.params.get('ignoreerrors', False):
545 self.report_error(compat_str(e), tb=compat_str(traceback.format_exc()))
546 break
547 else:
548 raise
549 else:
550 self.report_error('no suitable InfoExtractor for URL %s' % url)
551
552 def add_default_extra_info(self, ie_result, ie, url):
553 self.add_extra_info(ie_result, {
554 'extractor': ie.IE_NAME,
555 'webpage_url': url,
556 'webpage_url_basename': url_basename(url),
557 'extractor_key': ie.ie_key(),
558 })
559
560 def process_ie_result(self, ie_result, download=True, extra_info={}):
561 """
562 Take the result of the ie(may be modified) and resolve all unresolved
563 references (URLs, playlist items).
564
565 It will also download the videos if 'download'.
566 Returns the resolved ie_result.
567 """
568
569 result_type = ie_result.get('_type', 'video')
570
571 if self.params.get('extract_flat', False):
572 if result_type in ('url', 'url_transparent'):
573 return ie_result
574
575 if result_type == 'video':
576 self.add_extra_info(ie_result, extra_info)
577 return self.process_video_result(ie_result, download=download)
578 elif result_type == 'url':
579 # We have to add extra_info to the results because it may be
580 # contained in a playlist
581 return self.extract_info(ie_result['url'],
582 download,
583 ie_key=ie_result.get('ie_key'),
584 extra_info=extra_info)
585 elif result_type == 'url_transparent':
586 # Use the information from the embedding page
587 info = self.extract_info(
588 ie_result['url'], ie_key=ie_result.get('ie_key'),
589 extra_info=extra_info, download=False, process=False)
590
591 def make_result(embedded_info):
592 new_result = ie_result.copy()
593 for f in ('_type', 'url', 'ext', 'player_url', 'formats',
594 'entries', 'ie_key', 'duration',
595 'subtitles', 'annotations', 'format',
596 'thumbnail', 'thumbnails'):
597 if f in new_result:
598 del new_result[f]
599 if f in embedded_info:
600 new_result[f] = embedded_info[f]
601 return new_result
602 new_result = make_result(info)
603
604 assert new_result.get('_type') != 'url_transparent'
605 if new_result.get('_type') == 'compat_list':
606 new_result['entries'] = [
607 make_result(e) for e in new_result['entries']]
608
609 return self.process_ie_result(
610 new_result, download=download, extra_info=extra_info)
611 elif result_type == 'playlist':
612 # We process each entry in the playlist
613 playlist = ie_result.get('title', None) or ie_result.get('id', None)
614 self.to_screen('[download] Downloading playlist: %s' % playlist)
615
616 playlist_results = []
617
618 playliststart = self.params.get('playliststart', 1) - 1
619 playlistend = self.params.get('playlistend', None)
620 # For backwards compatibility, interpret -1 as whole list
621 if playlistend == -1:
622 playlistend = None
623
624 if isinstance(ie_result['entries'], list):
625 n_all_entries = len(ie_result['entries'])
626 entries = ie_result['entries'][playliststart:playlistend]
627 n_entries = len(entries)
628 self.to_screen(
629 "[%s] playlist %s: Collected %d video ids (downloading %d of them)" %
630 (ie_result['extractor'], playlist, n_all_entries, n_entries))
631 else:
632 assert isinstance(ie_result['entries'], PagedList)
633 entries = ie_result['entries'].getslice(
634 playliststart, playlistend)
635 n_entries = len(entries)
636 self.to_screen(
637 "[%s] playlist %s: Downloading %d videos" %
638 (ie_result['extractor'], playlist, n_entries))
639
640 for i, entry in enumerate(entries, 1):
641 self.to_screen('[download] Downloading video #%s of %s' % (i, n_entries))
642 extra = {
643 'n_entries': n_entries,
644 'playlist': playlist,
645 'playlist_index': i + playliststart,
646 'extractor': ie_result['extractor'],
647 'webpage_url': ie_result['webpage_url'],
648 'webpage_url_basename': url_basename(ie_result['webpage_url']),
649 'extractor_key': ie_result['extractor_key'],
650 }
651
652 reason = self._match_entry(entry)
653 if reason is not None:
654 self.to_screen('[download] ' + reason)
655 continue
656
657 entry_result = self.process_ie_result(entry,
658 download=download,
659 extra_info=extra)
660 playlist_results.append(entry_result)
661 ie_result['entries'] = playlist_results
662 return ie_result
663 elif result_type == 'compat_list':
664 def _fixup(r):
665 self.add_extra_info(r,
666 {
667 'extractor': ie_result['extractor'],
668 'webpage_url': ie_result['webpage_url'],
669 'webpage_url_basename': url_basename(ie_result['webpage_url']),
670 'extractor_key': ie_result['extractor_key'],
671 })
672 return r
673 ie_result['entries'] = [
674 self.process_ie_result(_fixup(r), download, extra_info)
675 for r in ie_result['entries']
676 ]
677 return ie_result
678 else:
679 raise Exception('Invalid result type: %s' % result_type)
680
681 def select_format(self, format_spec, available_formats):
682 if format_spec == 'best' or format_spec is None:
683 return available_formats[-1]
684 elif format_spec == 'worst':
685 return available_formats[0]
686 elif format_spec == 'bestaudio':
687 audio_formats = [
688 f for f in available_formats
689 if f.get('vcodec') == 'none']
690 if audio_formats:
691 return audio_formats[-1]
692 elif format_spec == 'worstaudio':
693 audio_formats = [
694 f for f in available_formats
695 if f.get('vcodec') == 'none']
696 if audio_formats:
697 return audio_formats[0]
698 elif format_spec == 'bestvideo':
699 video_formats = [
700 f for f in available_formats
701 if f.get('acodec') == 'none']
702 if video_formats:
703 return video_formats[-1]
704 elif format_spec == 'worstvideo':
705 video_formats = [
706 f for f in available_formats
707 if f.get('acodec') == 'none']
708 if video_formats:
709 return video_formats[0]
710 else:
711 extensions = ['mp4', 'flv', 'webm', '3gp']
712 if format_spec in extensions:
713 filter_f = lambda f: f['ext'] == format_spec
714 else:
715 filter_f = lambda f: f['format_id'] == format_spec
716 matches = list(filter(filter_f, available_formats))
717 if matches:
718 return matches[-1]
719 return None
720
721 def process_video_result(self, info_dict, download=True):
722 assert info_dict.get('_type', 'video') == 'video'
723
724 if 'id' not in info_dict:
725 raise ExtractorError('Missing "id" field in extractor result')
726 if 'title' not in info_dict:
727 raise ExtractorError('Missing "title" field in extractor result')
728
729 if 'playlist' not in info_dict:
730 # It isn't part of a playlist
731 info_dict['playlist'] = None
732 info_dict['playlist_index'] = None
733
734 thumbnails = info_dict.get('thumbnails')
735 if thumbnails:
736 thumbnails.sort(key=lambda t: (
737 t.get('width'), t.get('height'), t.get('url')))
738 for t in thumbnails:
739 if 'width' in t and 'height' in t:
740 t['resolution'] = '%dx%d' % (t['width'], t['height'])
741
742 if thumbnails and 'thumbnail' not in info_dict:
743 info_dict['thumbnail'] = thumbnails[-1]['url']
744
745 if 'display_id' not in info_dict and 'id' in info_dict:
746 info_dict['display_id'] = info_dict['id']
747
748 if info_dict.get('upload_date') is None and info_dict.get('timestamp') is not None:
749 upload_date = datetime.datetime.utcfromtimestamp(
750 info_dict['timestamp'])
751 info_dict['upload_date'] = upload_date.strftime('%Y%m%d')
752
753 # This extractors handle format selection themselves
754 if info_dict['extractor'] in ['Youku']:
755 if download:
756 self.process_info(info_dict)
757 return info_dict
758
759 # We now pick which formats have to be downloaded
760 if info_dict.get('formats') is None:
761 # There's only one format available
762 formats = [info_dict]
763 else:
764 formats = info_dict['formats']
765
766 if not formats:
767 raise ExtractorError('No video formats found!')
768
769 # We check that all the formats have the format and format_id fields
770 for i, format in enumerate(formats):
771 if 'url' not in format:
772 raise ExtractorError('Missing "url" key in result (index %d)' % i)
773
774 if format.get('format_id') is None:
775 format['format_id'] = compat_str(i)
776 if format.get('format') is None:
777 format['format'] = '{id} - {res}{note}'.format(
778 id=format['format_id'],
779 res=self.format_resolution(format),
780 note=' ({0})'.format(format['format_note']) if format.get('format_note') is not None else '',
781 )
782 # Automatically determine file extension if missing
783 if 'ext' not in format:
784 format['ext'] = determine_ext(format['url']).lower()
785
786 format_limit = self.params.get('format_limit', None)
787 if format_limit:
788 formats = list(takewhile_inclusive(
789 lambda f: f['format_id'] != format_limit, formats
790 ))
791
792 # TODO Central sorting goes here
793
794 if formats[0] is not info_dict:
795 # only set the 'formats' fields if the original info_dict list them
796 # otherwise we end up with a circular reference, the first (and unique)
797 # element in the 'formats' field in info_dict is info_dict itself,
798 # wich can't be exported to json
799 info_dict['formats'] = formats
800 if self.params.get('listformats', None):
801 self.list_formats(info_dict)
802 return
803
804 req_format = self.params.get('format')
805 if req_format is None:
806 req_format = 'best'
807 formats_to_download = []
808 # The -1 is for supporting YoutubeIE
809 if req_format in ('-1', 'all'):
810 formats_to_download = formats
811 else:
812 # We can accept formats requested in the format: 34/5/best, we pick
813 # the first that is available, starting from left
814 req_formats = req_format.split('/')
815 for rf in req_formats:
816 if re.match(r'.+?\+.+?', rf) is not None:
817 # Two formats have been requested like '137+139'
818 format_1, format_2 = rf.split('+')
819 formats_info = (self.select_format(format_1, formats),
820 self.select_format(format_2, formats))
821 if all(formats_info):
822 selected_format = {
823 'requested_formats': formats_info,
824 'format': rf,
825 'ext': formats_info[0]['ext'],
826 }
827 else:
828 selected_format = None
829 else:
830 selected_format = self.select_format(rf, formats)
831 if selected_format is not None:
832 formats_to_download = [selected_format]
833 break
834 if not formats_to_download:
835 raise ExtractorError('requested format not available',
836 expected=True)
837
838 if download:
839 if len(formats_to_download) > 1:
840 self.to_screen('[info] %s: downloading video in %s formats' % (info_dict['id'], len(formats_to_download)))
841 for format in formats_to_download:
842 new_info = dict(info_dict)
843 new_info.update(format)
844 self.process_info(new_info)
845 # We update the info dict with the best quality format (backwards compatibility)
846 info_dict.update(formats_to_download[-1])
847 return info_dict
848
849 def process_info(self, info_dict):
850 """Process a single resolved IE result."""
851
852 assert info_dict.get('_type', 'video') == 'video'
853
854 max_downloads = self.params.get('max_downloads')
855 if max_downloads is not None:
856 if self._num_downloads >= int(max_downloads):
857 raise MaxDownloadsReached()
858
859 info_dict['fulltitle'] = info_dict['title']
860 if len(info_dict['title']) > 200:
861 info_dict['title'] = info_dict['title'][:197] + '...'
862
863 # Keep for backwards compatibility
864 info_dict['stitle'] = info_dict['title']
865
866 if 'format' not in info_dict:
867 info_dict['format'] = info_dict['ext']
868
869 reason = self._match_entry(info_dict)
870 if reason is not None:
871 self.to_screen('[download] ' + reason)
872 return
873
874 self._num_downloads += 1
875
876 filename = self.prepare_filename(info_dict)
877
878 # Forced printings
879 if self.params.get('forcetitle', False):
880 self.to_stdout(info_dict['fulltitle'])
881 if self.params.get('forceid', False):
882 self.to_stdout(info_dict['id'])
883 if self.params.get('forceurl', False):
884 # For RTMP URLs, also include the playpath
885 self.to_stdout(info_dict['url'] + info_dict.get('play_path', ''))
886 if self.params.get('forcethumbnail', False) and info_dict.get('thumbnail') is not None:
887 self.to_stdout(info_dict['thumbnail'])
888 if self.params.get('forcedescription', False) and info_dict.get('description') is not None:
889 self.to_stdout(info_dict['description'])
890 if self.params.get('forcefilename', False) and filename is not None:
891 self.to_stdout(filename)
892 if self.params.get('forceduration', False) and info_dict.get('duration') is not None:
893 self.to_stdout(formatSeconds(info_dict['duration']))
894 if self.params.get('forceformat', False):
895 self.to_stdout(info_dict['format'])
896 if self.params.get('forcejson', False):
897 info_dict['_filename'] = filename
898 self.to_stdout(json.dumps(info_dict))
899
900 # Do nothing else if in simulate mode
901 if self.params.get('simulate', False):
902 return
903
904 if filename is None:
905 return
906
907 try:
908 dn = os.path.dirname(encodeFilename(filename))
909 if dn and not os.path.exists(dn):
910 os.makedirs(dn)
911 except (OSError, IOError) as err:
912 self.report_error('unable to create directory ' + compat_str(err))
913 return
914
915 if self.params.get('writedescription', False):
916 descfn = filename + '.description'
917 if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(descfn)):
918 self.to_screen('[info] Video description is already present')
919 else:
920 try:
921 self.to_screen('[info] Writing video description to: ' + descfn)
922 with io.open(encodeFilename(descfn), 'w', encoding='utf-8') as descfile:
923 descfile.write(info_dict['description'])
924 except (KeyError, TypeError):
925 self.report_warning('There\'s no description to write.')
926 except (OSError, IOError):
927 self.report_error('Cannot write description file ' + descfn)
928 return
929
930 if self.params.get('writeannotations', False):
931 annofn = filename + '.annotations.xml'
932 if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(annofn)):
933 self.to_screen('[info] Video annotations are already present')
934 else:
935 try:
936 self.to_screen('[info] Writing video annotations to: ' + annofn)
937 with io.open(encodeFilename(annofn), 'w', encoding='utf-8') as annofile:
938 annofile.write(info_dict['annotations'])
939 except (KeyError, TypeError):
940 self.report_warning('There are no annotations to write.')
941 except (OSError, IOError):
942 self.report_error('Cannot write annotations file: ' + annofn)
943 return
944
945 subtitles_are_requested = any([self.params.get('writesubtitles', False),
946 self.params.get('writeautomaticsub')])
947
948 if subtitles_are_requested and 'subtitles' in info_dict and info_dict['subtitles']:
949 # subtitles download errors are already managed as troubles in relevant IE
950 # that way it will silently go on when used with unsupporting IE
951 subtitles = info_dict['subtitles']
952 sub_format = self.params.get('subtitlesformat', 'srt')
953 for sub_lang in subtitles.keys():
954 sub = subtitles[sub_lang]
955 if sub is None:
956 continue
957 try:
958 sub_filename = subtitles_filename(filename, sub_lang, sub_format)
959 if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(sub_filename)):
960 self.to_screen('[info] Video subtitle %s.%s is already_present' % (sub_lang, sub_format))
961 else:
962 self.to_screen('[info] Writing video subtitles to: ' + sub_filename)
963 with io.open(encodeFilename(sub_filename), 'w', encoding='utf-8') as subfile:
964 subfile.write(sub)
965 except (OSError, IOError):
966 self.report_error('Cannot write subtitles file ' + sub_filename)
967 return
968
969 if self.params.get('writeinfojson', False):
970 infofn = os.path.splitext(filename)[0] + '.info.json'
971 if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(infofn)):
972 self.to_screen('[info] Video description metadata is already present')
973 else:
974 self.to_screen('[info] Writing video description metadata as JSON to: ' + infofn)
975 try:
976 write_json_file(info_dict, encodeFilename(infofn))
977 except (OSError, IOError):
978 self.report_error('Cannot write metadata to JSON file ' + infofn)
979 return
980
981 if self.params.get('writethumbnail', False):
982 if info_dict.get('thumbnail') is not None:
983 thumb_format = determine_ext(info_dict['thumbnail'], 'jpg')
984 thumb_filename = os.path.splitext(filename)[0] + '.' + thumb_format
985 if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(thumb_filename)):
986 self.to_screen('[%s] %s: Thumbnail is already present' %
987 (info_dict['extractor'], info_dict['id']))
988 else:
989 self.to_screen('[%s] %s: Downloading thumbnail ...' %
990 (info_dict['extractor'], info_dict['id']))
991 try:
992 uf = self.urlopen(info_dict['thumbnail'])
993 with open(thumb_filename, 'wb') as thumbf:
994 shutil.copyfileobj(uf, thumbf)
995 self.to_screen('[%s] %s: Writing thumbnail to: %s' %
996 (info_dict['extractor'], info_dict['id'], thumb_filename))
997 except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
998 self.report_warning('Unable to download thumbnail "%s": %s' %
999 (info_dict['thumbnail'], compat_str(err)))
1000
1001 if not self.params.get('skip_download', False):
1002 if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(filename)):
1003 success = True
1004 else:
1005 try:
1006 def dl(name, info):
1007 fd = get_suitable_downloader(info)(self, self.params)
1008 for ph in self._progress_hooks:
1009 fd.add_progress_hook(ph)
1010 if self.params.get('verbose'):
1011 self.to_stdout('[debug] Invoking downloader on %r' % info.get('url'))
1012 return fd.download(name, info)
1013 if info_dict.get('requested_formats') is not None:
1014 downloaded = []
1015 success = True
1016 merger = FFmpegMergerPP(self, not self.params.get('keepvideo'))
1017 if not merger._get_executable():
1018 postprocessors = []
1019 self.report_warning('You have requested multiple '
1020 'formats but ffmpeg or avconv are not installed.'
1021 ' The formats won\'t be merged')
1022 else:
1023 postprocessors = [merger]
1024 for f in info_dict['requested_formats']:
1025 new_info = dict(info_dict)
1026 new_info.update(f)
1027 fname = self.prepare_filename(new_info)
1028 fname = prepend_extension(fname, 'f%s' % f['format_id'])
1029 downloaded.append(fname)
1030 partial_success = dl(fname, new_info)
1031 success = success and partial_success
1032 info_dict['__postprocessors'] = postprocessors
1033 info_dict['__files_to_merge'] = downloaded
1034 else:
1035 # Just a single file
1036 success = dl(filename, info_dict)
1037 except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
1038 self.report_error('unable to download video data: %s' % str(err))
1039 return
1040 except (OSError, IOError) as err:
1041 raise UnavailableVideoError(err)
1042 except (ContentTooShortError, ) as err:
1043 self.report_error('content too short (expected %s bytes and served %s)' % (err.expected, err.downloaded))
1044 return
1045
1046 if success:
1047 try:
1048 self.post_process(filename, info_dict)
1049 except (PostProcessingError) as err:
1050 self.report_error('postprocessing: %s' % str(err))
1051 return
1052
1053 self.record_download_archive(info_dict)
1054
1055 def download(self, url_list):
1056 """Download a given list of URLs."""
1057 outtmpl = self.params.get('outtmpl', DEFAULT_OUTTMPL)
1058 if (len(url_list) > 1 and
1059 '%' not in outtmpl
1060 and self.params.get('max_downloads') != 1):
1061 raise SameFileError(outtmpl)
1062
1063 for url in url_list:
1064 try:
1065 #It also downloads the videos
1066 self.extract_info(url)
1067 except UnavailableVideoError:
1068 self.report_error('unable to download video')
1069 except MaxDownloadsReached:
1070 self.to_screen('[info] Maximum number of downloaded files reached.')
1071 raise
1072
1073 return self._download_retcode
1074
1075 def download_with_info_file(self, info_filename):
1076 with io.open(info_filename, 'r', encoding='utf-8') as f:
1077 info = json.load(f)
1078 try:
1079 self.process_ie_result(info, download=True)
1080 except DownloadError:
1081 webpage_url = info.get('webpage_url')
1082 if webpage_url is not None:
1083 self.report_warning('The info failed to download, trying with "%s"' % webpage_url)
1084 return self.download([webpage_url])
1085 else:
1086 raise
1087 return self._download_retcode
1088
1089 def post_process(self, filename, ie_info):
1090 """Run all the postprocessors on the given file."""
1091 info = dict(ie_info)
1092 info['filepath'] = filename
1093 keep_video = None
1094 pps_chain = []
1095 if ie_info.get('__postprocessors') is not None:
1096 pps_chain.extend(ie_info['__postprocessors'])
1097 pps_chain.extend(self._pps)
1098 for pp in pps_chain:
1099 try:
1100 keep_video_wish, new_info = pp.run(info)
1101 if keep_video_wish is not None:
1102 if keep_video_wish:
1103 keep_video = keep_video_wish
1104 elif keep_video is None:
1105 # No clear decision yet, let IE decide
1106 keep_video = keep_video_wish
1107 except PostProcessingError as e:
1108 self.report_error(e.msg)
1109 if keep_video is False and not self.params.get('keepvideo', False):
1110 try:
1111 self.to_screen('Deleting original file %s (pass -k to keep)' % filename)
1112 os.remove(encodeFilename(filename))
1113 except (IOError, OSError):
1114 self.report_warning('Unable to remove downloaded video file')
1115
1116 def _make_archive_id(self, info_dict):
1117 # Future-proof against any change in case
1118 # and backwards compatibility with prior versions
1119 extractor = info_dict.get('extractor_key')
1120 if extractor is None:
1121 if 'id' in info_dict:
1122 extractor = info_dict.get('ie_key') # key in a playlist
1123 if extractor is None:
1124 return None # Incomplete video information
1125 return extractor.lower() + ' ' + info_dict['id']
1126
1127 def in_download_archive(self, info_dict):
1128 fn = self.params.get('download_archive')
1129 if fn is None:
1130 return False
1131
1132 vid_id = self._make_archive_id(info_dict)
1133 if vid_id is None:
1134 return False # Incomplete video information
1135
1136 try:
1137 with locked_file(fn, 'r', encoding='utf-8') as archive_file:
1138 for line in archive_file:
1139 if line.strip() == vid_id:
1140 return True
1141 except IOError as ioe:
1142 if ioe.errno != errno.ENOENT:
1143 raise
1144 return False
1145
1146 def record_download_archive(self, info_dict):
1147 fn = self.params.get('download_archive')
1148 if fn is None:
1149 return
1150 vid_id = self._make_archive_id(info_dict)
1151 assert vid_id
1152 with locked_file(fn, 'a', encoding='utf-8') as archive_file:
1153 archive_file.write(vid_id + '\n')
1154
1155 @staticmethod
1156 def format_resolution(format, default='unknown'):
1157 if format.get('vcodec') == 'none':
1158 return 'audio only'
1159 if format.get('resolution') is not None:
1160 return format['resolution']
1161 if format.get('height') is not None:
1162 if format.get('width') is not None:
1163 res = '%sx%s' % (format['width'], format['height'])
1164 else:
1165 res = '%sp' % format['height']
1166 elif format.get('width') is not None:
1167 res = '?x%d' % format['width']
1168 else:
1169 res = default
1170 return res
1171
1172 def _format_note(self, fdict):
1173 res = ''
1174 if fdict.get('ext') in ['f4f', 'f4m']:
1175 res += '(unsupported) '
1176 if fdict.get('format_note') is not None:
1177 res += fdict['format_note'] + ' '
1178 if fdict.get('tbr') is not None:
1179 res += '%4dk ' % fdict['tbr']
1180 if fdict.get('container') is not None:
1181 if res:
1182 res += ', '
1183 res += '%s container' % fdict['container']
1184 if (fdict.get('vcodec') is not None and
1185 fdict.get('vcodec') != 'none'):
1186 if res:
1187 res += ', '
1188 res += fdict['vcodec']
1189 if fdict.get('vbr') is not None:
1190 res += '@'
1191 elif fdict.get('vbr') is not None and fdict.get('abr') is not None:
1192 res += 'video@'
1193 if fdict.get('vbr') is not None:
1194 res += '%4dk' % fdict['vbr']
1195 if fdict.get('acodec') is not None:
1196 if res:
1197 res += ', '
1198 if fdict['acodec'] == 'none':
1199 res += 'video only'
1200 else:
1201 res += '%-5s' % fdict['acodec']
1202 elif fdict.get('abr') is not None:
1203 if res:
1204 res += ', '
1205 res += 'audio'
1206 if fdict.get('abr') is not None:
1207 res += '@%3dk' % fdict['abr']
1208 if fdict.get('asr') is not None:
1209 res += ' (%5dHz)' % fdict['asr']
1210 if fdict.get('filesize') is not None:
1211 if res:
1212 res += ', '
1213 res += format_bytes(fdict['filesize'])
1214 elif fdict.get('filesize_approx') is not None:
1215 if res:
1216 res += ', '
1217 res += '~' + format_bytes(fdict['filesize_approx'])
1218 return res
1219
1220 def list_formats(self, info_dict):
1221 def line(format, idlen=20):
1222 return (('%-' + compat_str(idlen + 1) + 's%-10s%-12s%s') % (
1223 format['format_id'],
1224 format['ext'],
1225 self.format_resolution(format),
1226 self._format_note(format),
1227 ))
1228
1229 formats = info_dict.get('formats', [info_dict])
1230 idlen = max(len('format code'),
1231 max(len(f['format_id']) for f in formats))
1232 formats_s = [line(f, idlen) for f in formats]
1233 if len(formats) > 1:
1234 formats_s[0] += (' ' if self._format_note(formats[0]) else '') + '(worst)'
1235 formats_s[-1] += (' ' if self._format_note(formats[-1]) else '') + '(best)'
1236
1237 header_line = line({
1238 'format_id': 'format code', 'ext': 'extension',
1239 'resolution': 'resolution', 'format_note': 'note'}, idlen=idlen)
1240 self.to_screen('[info] Available formats for %s:\n%s\n%s' %
1241 (info_dict['id'], header_line, '\n'.join(formats_s)))
1242
1243 def urlopen(self, req):
1244 """ Start an HTTP download """
1245
1246 # According to RFC 3986, URLs can not contain non-ASCII characters, however this is not
1247 # always respected by websites, some tend to give out URLs with non percent-encoded
1248 # non-ASCII characters (see telemb.py, ard.py [#3412])
1249 # urllib chokes on URLs with non-ASCII characters (see http://bugs.python.org/issue3991)
1250 # To work around aforementioned issue we will replace request's original URL with
1251 # percent-encoded one
1252 url = req if isinstance(req, compat_str) else req.get_full_url()
1253 url_escaped = escape_url(url)
1254
1255 # Substitute URL if any change after escaping
1256 if url != url_escaped:
1257 if isinstance(req, compat_str):
1258 req = url_escaped
1259 else:
1260 req = compat_urllib_request.Request(
1261 url_escaped, data=req.data, headers=req.headers,
1262 origin_req_host=req.origin_req_host, unverifiable=req.unverifiable)
1263
1264 return self._opener.open(req, timeout=self._socket_timeout)
1265
1266 def print_debug_header(self):
1267 if not self.params.get('verbose'):
1268 return
1269
1270 if type('') is not compat_str:
1271 # Python 2.6 on SLES11 SP1 (https://github.com/rg3/youtube-dl/issues/3326)
1272 self.report_warning(
1273 'Your Python is broken! Update to a newer and supported version')
1274
1275 encoding_str = (
1276 '[debug] Encodings: locale %s, fs %s, out %s, pref %s\n' % (
1277 locale.getpreferredencoding(),
1278 sys.getfilesystemencoding(),
1279 sys.stdout.encoding,
1280 self.get_encoding()))
1281 write_string(encoding_str, encoding=None)
1282
1283 self._write_string('[debug] youtube-dl version ' + __version__ + '\n')
1284 try:
1285 sp = subprocess.Popen(
1286 ['git', 'rev-parse', '--short', 'HEAD'],
1287 stdout=subprocess.PIPE, stderr=subprocess.PIPE,
1288 cwd=os.path.dirname(os.path.abspath(__file__)))
1289 out, err = sp.communicate()
1290 out = out.decode().strip()
1291 if re.match('[0-9a-f]+', out):
1292 self._write_string('[debug] Git HEAD: ' + out + '\n')
1293 except:
1294 try:
1295 sys.exc_clear()
1296 except:
1297 pass
1298 self._write_string('[debug] Python version %s - %s' %
1299 (platform.python_version(), platform_name()) + '\n')
1300
1301 proxy_map = {}
1302 for handler in self._opener.handlers:
1303 if hasattr(handler, 'proxies'):
1304 proxy_map.update(handler.proxies)
1305 self._write_string('[debug] Proxy map: ' + compat_str(proxy_map) + '\n')
1306
1307 def _setup_opener(self):
1308 timeout_val = self.params.get('socket_timeout')
1309 self._socket_timeout = 600 if timeout_val is None else float(timeout_val)
1310
1311 opts_cookiefile = self.params.get('cookiefile')
1312 opts_proxy = self.params.get('proxy')
1313
1314 if opts_cookiefile is None:
1315 self.cookiejar = compat_cookiejar.CookieJar()
1316 else:
1317 self.cookiejar = compat_cookiejar.MozillaCookieJar(
1318 opts_cookiefile)
1319 if os.access(opts_cookiefile, os.R_OK):
1320 self.cookiejar.load()
1321
1322 cookie_processor = compat_urllib_request.HTTPCookieProcessor(
1323 self.cookiejar)
1324 if opts_proxy is not None:
1325 if opts_proxy == '':
1326 proxies = {}
1327 else:
1328 proxies = {'http': opts_proxy, 'https': opts_proxy}
1329 else:
1330 proxies = compat_urllib_request.getproxies()
1331 # Set HTTPS proxy to HTTP one if given (https://github.com/rg3/youtube-dl/issues/805)
1332 if 'http' in proxies and 'https' not in proxies:
1333 proxies['https'] = proxies['http']
1334 proxy_handler = compat_urllib_request.ProxyHandler(proxies)
1335
1336 debuglevel = 1 if self.params.get('debug_printtraffic') else 0
1337 https_handler = make_HTTPS_handler(
1338 self.params.get('nocheckcertificate', False), debuglevel=debuglevel)
1339 ydlh = YoutubeDLHandler(debuglevel=debuglevel)
1340 opener = compat_urllib_request.build_opener(
1341 https_handler, proxy_handler, cookie_processor, ydlh)
1342 # Delete the default user-agent header, which would otherwise apply in
1343 # cases where our custom HTTP handler doesn't come into play
1344 # (See https://github.com/rg3/youtube-dl/issues/1309 for details)
1345 opener.addheaders = []
1346 self._opener = opener
1347
1348 def encode(self, s):
1349 if isinstance(s, bytes):
1350 return s # Already encoded
1351
1352 try:
1353 return s.encode(self.get_encoding())
1354 except UnicodeEncodeError as err:
1355 err.reason = err.reason + '. Check your system encoding configuration or use the --encoding option.'
1356 raise
1357
1358 def get_encoding(self):
1359 encoding = self.params.get('encoding')
1360 if encoding is None:
1361 encoding = preferredencoding()
1362 return encoding