]> jfr.im git - yt-dlp.git/blob - youtube_dlc/YoutubeDL.py
97e4f451f1e74bc8b7b1f9c95023adf5dc6e945a
[yt-dlp.git] / youtube_dlc / YoutubeDL.py
1 #!/usr/bin/env python
2 # coding: utf-8
3
4 from __future__ import absolute_import, unicode_literals
5
6 import collections
7 import contextlib
8 import copy
9 import datetime
10 import errno
11 import fileinput
12 import io
13 import itertools
14 import json
15 import locale
16 import operator
17 import os
18 import platform
19 import re
20 import shutil
21 import subprocess
22 import socket
23 import sys
24 import time
25 import tokenize
26 import traceback
27 import random
28
29 from string import ascii_letters
30
31 from .compat import (
32 compat_basestring,
33 compat_cookiejar,
34 compat_get_terminal_size,
35 compat_http_client,
36 compat_kwargs,
37 compat_numeric_types,
38 compat_os_name,
39 compat_str,
40 compat_tokenize_tokenize,
41 compat_urllib_error,
42 compat_urllib_request,
43 compat_urllib_request_DataHandler,
44 )
45 from .utils import (
46 age_restricted,
47 args_to_str,
48 ContentTooShortError,
49 date_from_str,
50 DateRange,
51 DEFAULT_OUTTMPL,
52 determine_ext,
53 determine_protocol,
54 DOT_DESKTOP_LINK_TEMPLATE,
55 DOT_URL_LINK_TEMPLATE,
56 DOT_WEBLOC_LINK_TEMPLATE,
57 DownloadError,
58 encode_compat_str,
59 encodeFilename,
60 error_to_compat_str,
61 expand_path,
62 ExtractorError,
63 format_bytes,
64 formatSeconds,
65 GeoRestrictedError,
66 int_or_none,
67 iri_to_uri,
68 ISO3166Utils,
69 locked_file,
70 make_HTTPS_handler,
71 MaxDownloadsReached,
72 orderedSet,
73 PagedList,
74 parse_filesize,
75 PerRequestProxyHandler,
76 platform_name,
77 PostProcessingError,
78 preferredencoding,
79 prepend_extension,
80 register_socks_protocols,
81 render_table,
82 replace_extension,
83 SameFileError,
84 sanitize_filename,
85 sanitize_path,
86 sanitize_url,
87 sanitized_Request,
88 std_headers,
89 str_or_none,
90 subtitles_filename,
91 to_high_limit_path,
92 UnavailableVideoError,
93 url_basename,
94 version_tuple,
95 write_json_file,
96 write_string,
97 YoutubeDLCookieJar,
98 YoutubeDLCookieProcessor,
99 YoutubeDLHandler,
100 YoutubeDLRedirectHandler,
101 )
102 from .cache import Cache
103 from .extractor import get_info_extractor, gen_extractor_classes, _LAZY_LOADER
104 from .extractor.openload import PhantomJSwrapper
105 from .downloader import get_suitable_downloader
106 from .downloader.rtmp import rtmpdump_version
107 from .postprocessor import (
108 FFmpegFixupM3u8PP,
109 FFmpegFixupM4aPP,
110 FFmpegFixupStretchedPP,
111 FFmpegMergerPP,
112 FFmpegPostProcessor,
113 FFmpegSubtitlesConvertorPP,
114 get_postprocessor,
115 )
116 from .version import __version__
117
118 if compat_os_name == 'nt':
119 import ctypes
120
121
122 class YoutubeDL(object):
123 """YoutubeDL class.
124
125 YoutubeDL objects are the ones responsible of downloading the
126 actual video file and writing it to disk if the user has requested
127 it, among some other tasks. In most cases there should be one per
128 program. As, given a video URL, the downloader doesn't know how to
129 extract all the needed information, task that InfoExtractors do, it
130 has to pass the URL to one of them.
131
132 For this, YoutubeDL objects have a method that allows
133 InfoExtractors to be registered in a given order. When it is passed
134 a URL, the YoutubeDL object handles it to the first InfoExtractor it
135 finds that reports being able to handle it. The InfoExtractor extracts
136 all the information about the video or videos the URL refers to, and
137 YoutubeDL process the extracted information, possibly using a File
138 Downloader to download the video.
139
140 YoutubeDL objects accept a lot of parameters. In order not to saturate
141 the object constructor with arguments, it receives a dictionary of
142 options instead. These options are available through the params
143 attribute for the InfoExtractors to use. The YoutubeDL also
144 registers itself as the downloader in charge for the InfoExtractors
145 that are added to it, so this is a "mutual registration".
146
147 Available options:
148
149 username: Username for authentication purposes.
150 password: Password for authentication purposes.
151 videopassword: Password for accessing a video.
152 ap_mso: Adobe Pass multiple-system operator identifier.
153 ap_username: Multiple-system operator account username.
154 ap_password: Multiple-system operator account password.
155 usenetrc: Use netrc for authentication instead.
156 verbose: Print additional info to stdout.
157 quiet: Do not print messages to stdout.
158 no_warnings: Do not print out anything for warnings.
159 forceurl: Force printing final URL.
160 forcetitle: Force printing title.
161 forceid: Force printing ID.
162 forcethumbnail: Force printing thumbnail URL.
163 forcedescription: Force printing description.
164 forcefilename: Force printing final filename.
165 forceduration: Force printing duration.
166 forcejson: Force printing info_dict as JSON.
167 dump_single_json: Force printing the info_dict of the whole playlist
168 (or video) as a single JSON line.
169 simulate: Do not download the video files.
170 format: Video format code. see "FORMAT SELECTION" for more details.
171 format_sort: How to sort the video formats. see "Sorting Formats" for more details.
172 format_sort_force: Force the given format_sort. see "Sorting Formats" for more details.
173 allow_multiple_video_streams: Allow multiple video streams to be merged into a single file
174 allow_multiple_audio_streams: Allow multiple audio streams to be merged into a single file
175 outtmpl: Template for output names.
176 restrictfilenames: Do not allow "&" and spaces in file names.
177 trim_file_name: Limit length of filename (extension excluded).
178 ignoreerrors: Do not stop on download errors.
179 force_generic_extractor: Force downloader to use the generic extractor
180 nooverwrites: Prevent overwriting files.
181 playliststart: Playlist item to start at.
182 playlistend: Playlist item to end at.
183 playlist_items: Specific indices of playlist to download.
184 playlistreverse: Download playlist items in reverse order.
185 playlistrandom: Download playlist items in random order.
186 matchtitle: Download only matching titles.
187 rejecttitle: Reject downloads for matching titles.
188 logger: Log messages to a logging.Logger instance.
189 logtostderr: Log messages to stderr instead of stdout.
190 writedescription: Write the video description to a .description file
191 writeinfojson: Write the video description to a .info.json file
192 writeannotations: Write the video annotations to a .annotations.xml file
193 writethumbnail: Write the thumbnail image to a file
194 write_all_thumbnails: Write all thumbnail formats to files
195 writelink: Write an internet shortcut file, depending on the
196 current platform (.url/.webloc/.desktop)
197 writeurllink: Write a Windows internet shortcut file (.url)
198 writewebloclink: Write a macOS internet shortcut file (.webloc)
199 writedesktoplink: Write a Linux internet shortcut file (.desktop)
200 writesubtitles: Write the video subtitles to a file
201 writeautomaticsub: Write the automatically generated subtitles to a file
202 allsubtitles: Downloads all the subtitles of the video
203 (requires writesubtitles or writeautomaticsub)
204 listsubtitles: Lists all available subtitles for the video
205 subtitlesformat: The format code for subtitles
206 subtitleslangs: List of languages of the subtitles to download
207 keepvideo: Keep the video file after post-processing
208 daterange: A DateRange object, download only if the upload_date is in the range.
209 skip_download: Skip the actual download of the video file
210 cachedir: Location of the cache files in the filesystem.
211 False to disable filesystem cache.
212 noplaylist: Download single video instead of a playlist if in doubt.
213 age_limit: An integer representing the user's age in years.
214 Unsuitable videos for the given age are skipped.
215 min_views: An integer representing the minimum view count the video
216 must have in order to not be skipped.
217 Videos without view count information are always
218 downloaded. None for no limit.
219 max_views: An integer representing the maximum view count.
220 Videos that are more popular than that are not
221 downloaded.
222 Videos without view count information are always
223 downloaded. None for no limit.
224 download_archive: File name of a file where all downloads are recorded.
225 Videos already present in the file are not downloaded
226 again.
227 break_on_existing: Stop the download process after attempting to download a file that's
228 in the archive.
229 cookiefile: File name where cookies should be read from and dumped to.
230 nocheckcertificate:Do not verify SSL certificates
231 prefer_insecure: Use HTTP instead of HTTPS to retrieve information.
232 At the moment, this is only supported by YouTube.
233 proxy: URL of the proxy server to use
234 geo_verification_proxy: URL of the proxy to use for IP address verification
235 on geo-restricted sites.
236 socket_timeout: Time to wait for unresponsive hosts, in seconds
237 bidi_workaround: Work around buggy terminals without bidirectional text
238 support, using fridibi
239 debug_printtraffic:Print out sent and received HTTP traffic
240 include_ads: Download ads as well
241 default_search: Prepend this string if an input url is not valid.
242 'auto' for elaborate guessing
243 encoding: Use this encoding instead of the system-specified.
244 extract_flat: Do not resolve URLs, return the immediate result.
245 Pass in 'in_playlist' to only show this behavior for
246 playlist items.
247 postprocessors: A list of dictionaries, each with an entry
248 * key: The name of the postprocessor. See
249 youtube_dlc/postprocessor/__init__.py for a list.
250 as well as any further keyword arguments for the
251 postprocessor.
252 progress_hooks: A list of functions that get called on download
253 progress, with a dictionary with the entries
254 * status: One of "downloading", "error", or "finished".
255 Check this first and ignore unknown values.
256
257 If status is one of "downloading", or "finished", the
258 following properties may also be present:
259 * filename: The final filename (always present)
260 * tmpfilename: The filename we're currently writing to
261 * downloaded_bytes: Bytes on disk
262 * total_bytes: Size of the whole file, None if unknown
263 * total_bytes_estimate: Guess of the eventual file size,
264 None if unavailable.
265 * elapsed: The number of seconds since download started.
266 * eta: The estimated time in seconds, None if unknown
267 * speed: The download speed in bytes/second, None if
268 unknown
269 * fragment_index: The counter of the currently
270 downloaded video fragment.
271 * fragment_count: The number of fragments (= individual
272 files that will be merged)
273
274 Progress hooks are guaranteed to be called at least once
275 (with status "finished") if the download is successful.
276 merge_output_format: Extension to use when merging formats.
277 fixup: Automatically correct known faults of the file.
278 One of:
279 - "never": do nothing
280 - "warn": only emit a warning
281 - "detect_or_warn": check whether we can do anything
282 about it, warn otherwise (default)
283 source_address: Client-side IP address to bind to.
284 call_home: Boolean, true iff we are allowed to contact the
285 youtube-dlc servers for debugging.
286 sleep_interval: Number of seconds to sleep before each download when
287 used alone or a lower bound of a range for randomized
288 sleep before each download (minimum possible number
289 of seconds to sleep) when used along with
290 max_sleep_interval.
291 max_sleep_interval:Upper bound of a range for randomized sleep before each
292 download (maximum possible number of seconds to sleep).
293 Must only be used along with sleep_interval.
294 Actual sleep time will be a random float from range
295 [sleep_interval; max_sleep_interval].
296 listformats: Print an overview of available video formats and exit.
297 list_thumbnails: Print a table of all thumbnails and exit.
298 match_filter: A function that gets called with the info_dict of
299 every video.
300 If it returns a message, the video is ignored.
301 If it returns None, the video is downloaded.
302 match_filter_func in utils.py is one example for this.
303 no_color: Do not emit color codes in output.
304 geo_bypass: Bypass geographic restriction via faking X-Forwarded-For
305 HTTP header
306 geo_bypass_country:
307 Two-letter ISO 3166-2 country code that will be used for
308 explicit geographic restriction bypassing via faking
309 X-Forwarded-For HTTP header
310 geo_bypass_ip_block:
311 IP range in CIDR notation that will be used similarly to
312 geo_bypass_country
313
314 The following options determine which downloader is picked:
315 external_downloader: Executable of the external downloader to call.
316 None or unset for standard (built-in) downloader.
317 hls_prefer_native: Use the native HLS downloader instead of ffmpeg/avconv
318 if True, otherwise use ffmpeg/avconv if False, otherwise
319 use downloader suggested by extractor if None.
320
321 The following parameters are not used by YoutubeDL itself, they are used by
322 the downloader (see youtube_dlc/downloader/common.py):
323 nopart, updatetime, buffersize, ratelimit, min_filesize, max_filesize, test,
324 noresizebuffer, retries, continuedl, noprogress, consoletitle,
325 xattr_set_filesize, external_downloader_args, hls_use_mpegts,
326 http_chunk_size.
327
328 The following options are used by the post processors:
329 prefer_ffmpeg: If False, use avconv instead of ffmpeg if both are available,
330 otherwise prefer ffmpeg.
331 ffmpeg_location: Location of the ffmpeg/avconv binary; either the path
332 to the binary or its containing directory.
333 postprocessor_args: A list of additional command-line arguments for the
334 postprocessor.
335
336 The following options are used by the Youtube extractor:
337 youtube_include_dash_manifest: If True (default), DASH manifests and related
338 data will be downloaded and processed by extractor.
339 You can reduce network I/O by disabling it if you don't
340 care about DASH.
341 """
342
343 _NUMERIC_FIELDS = set((
344 'width', 'height', 'tbr', 'abr', 'asr', 'vbr', 'fps', 'filesize', 'filesize_approx',
345 'timestamp', 'upload_year', 'upload_month', 'upload_day',
346 'duration', 'view_count', 'like_count', 'dislike_count', 'repost_count',
347 'average_rating', 'comment_count', 'age_limit',
348 'start_time', 'end_time',
349 'chapter_number', 'season_number', 'episode_number',
350 'track_number', 'disc_number', 'release_year',
351 'playlist_index',
352 ))
353
354 params = None
355 _ies = []
356 _pps = []
357 _download_retcode = None
358 _num_downloads = None
359 _screen_file = None
360
361 def __init__(self, params=None, auto_init=True):
362 """Create a FileDownloader object with the given options."""
363 if params is None:
364 params = {}
365 self._ies = []
366 self._ies_instances = {}
367 self._pps = []
368 self._progress_hooks = []
369 self._download_retcode = 0
370 self._num_downloads = 0
371 self._screen_file = [sys.stdout, sys.stderr][params.get('logtostderr', False)]
372 self._err_file = sys.stderr
373 self.params = {
374 # Default parameters
375 'nocheckcertificate': False,
376 }
377 self.params.update(params)
378 self.cache = Cache(self)
379 self.archive = set()
380
381 """Preload the archive, if any is specified"""
382 def preload_download_archive(self):
383 fn = self.params.get('download_archive')
384 if fn is None:
385 return False
386 try:
387 with locked_file(fn, 'r', encoding='utf-8') as archive_file:
388 for line in archive_file:
389 self.archive.add(line.strip())
390 except IOError as ioe:
391 if ioe.errno != errno.ENOENT:
392 raise
393 return False
394 return True
395
396 def check_deprecated(param, option, suggestion):
397 if self.params.get(param) is not None:
398 self.report_warning(
399 '%s is deprecated. Use %s instead.' % (option, suggestion))
400 return True
401 return False
402
403 if self.params.get('verbose'):
404 self.to_stdout('[debug] Loading archive file %r' % self.params.get('download_archive'))
405
406 preload_download_archive(self)
407
408 if check_deprecated('cn_verification_proxy', '--cn-verification-proxy', '--geo-verification-proxy'):
409 if self.params.get('geo_verification_proxy') is None:
410 self.params['geo_verification_proxy'] = self.params['cn_verification_proxy']
411
412 check_deprecated('autonumber_size', '--autonumber-size', 'output template with %(autonumber)0Nd, where N in the number of digits')
413 check_deprecated('autonumber', '--auto-number', '-o "%(autonumber)s-%(title)s.%(ext)s"')
414 check_deprecated('usetitle', '--title', '-o "%(title)s-%(id)s.%(ext)s"')
415
416 if params.get('bidi_workaround', False):
417 try:
418 import pty
419 master, slave = pty.openpty()
420 width = compat_get_terminal_size().columns
421 if width is None:
422 width_args = []
423 else:
424 width_args = ['-w', str(width)]
425 sp_kwargs = dict(
426 stdin=subprocess.PIPE,
427 stdout=slave,
428 stderr=self._err_file)
429 try:
430 self._output_process = subprocess.Popen(
431 ['bidiv'] + width_args, **sp_kwargs
432 )
433 except OSError:
434 self._output_process = subprocess.Popen(
435 ['fribidi', '-c', 'UTF-8'] + width_args, **sp_kwargs)
436 self._output_channel = os.fdopen(master, 'rb')
437 except OSError as ose:
438 if ose.errno == errno.ENOENT:
439 self.report_warning('Could not find fribidi executable, ignoring --bidi-workaround . Make sure that fribidi is an executable file in one of the directories in your $PATH.')
440 else:
441 raise
442
443 if (sys.platform != 'win32'
444 and sys.getfilesystemencoding() in ['ascii', 'ANSI_X3.4-1968']
445 and not params.get('restrictfilenames', False)):
446 # Unicode filesystem API will throw errors (#1474, #13027)
447 self.report_warning(
448 'Assuming --restrict-filenames since file system encoding '
449 'cannot encode all characters. '
450 'Set the LC_ALL environment variable to fix this.')
451 self.params['restrictfilenames'] = True
452
453 if isinstance(params.get('outtmpl'), bytes):
454 self.report_warning(
455 'Parameter outtmpl is bytes, but should be a unicode string. '
456 'Put from __future__ import unicode_literals at the top of your code file or consider switching to Python 3.x.')
457
458 self._setup_opener()
459
460 if auto_init:
461 self.print_debug_header()
462 self.add_default_info_extractors()
463
464 for pp_def_raw in self.params.get('postprocessors', []):
465 pp_class = get_postprocessor(pp_def_raw['key'])
466 pp_def = dict(pp_def_raw)
467 del pp_def['key']
468 pp = pp_class(self, **compat_kwargs(pp_def))
469 self.add_post_processor(pp)
470
471 for ph in self.params.get('progress_hooks', []):
472 self.add_progress_hook(ph)
473
474 register_socks_protocols()
475
476 def warn_if_short_id(self, argv):
477 # short YouTube ID starting with dash?
478 idxs = [
479 i for i, a in enumerate(argv)
480 if re.match(r'^-[0-9A-Za-z_-]{10}$', a)]
481 if idxs:
482 correct_argv = (
483 ['youtube-dlc']
484 + [a for i, a in enumerate(argv) if i not in idxs]
485 + ['--'] + [argv[i] for i in idxs]
486 )
487 self.report_warning(
488 'Long argument string detected. '
489 'Use -- to separate parameters and URLs, like this:\n%s\n' %
490 args_to_str(correct_argv))
491
492 def add_info_extractor(self, ie):
493 """Add an InfoExtractor object to the end of the list."""
494 self._ies.append(ie)
495 if not isinstance(ie, type):
496 self._ies_instances[ie.ie_key()] = ie
497 ie.set_downloader(self)
498
499 def get_info_extractor(self, ie_key):
500 """
501 Get an instance of an IE with name ie_key, it will try to get one from
502 the _ies list, if there's no instance it will create a new one and add
503 it to the extractor list.
504 """
505 ie = self._ies_instances.get(ie_key)
506 if ie is None:
507 ie = get_info_extractor(ie_key)()
508 self.add_info_extractor(ie)
509 return ie
510
511 def add_default_info_extractors(self):
512 """
513 Add the InfoExtractors returned by gen_extractors to the end of the list
514 """
515 for ie in gen_extractor_classes():
516 self.add_info_extractor(ie)
517
518 def add_post_processor(self, pp):
519 """Add a PostProcessor object to the end of the chain."""
520 self._pps.append(pp)
521 pp.set_downloader(self)
522
523 def add_progress_hook(self, ph):
524 """Add the progress hook (currently only for the file downloader)"""
525 self._progress_hooks.append(ph)
526
527 def _bidi_workaround(self, message):
528 if not hasattr(self, '_output_channel'):
529 return message
530
531 assert hasattr(self, '_output_process')
532 assert isinstance(message, compat_str)
533 line_count = message.count('\n') + 1
534 self._output_process.stdin.write((message + '\n').encode('utf-8'))
535 self._output_process.stdin.flush()
536 res = ''.join(self._output_channel.readline().decode('utf-8')
537 for _ in range(line_count))
538 return res[:-len('\n')]
539
540 def to_screen(self, message, skip_eol=False):
541 """Print message to stdout if not in quiet mode."""
542 return self.to_stdout(message, skip_eol, check_quiet=True)
543
544 def _write_string(self, s, out=None):
545 write_string(s, out=out, encoding=self.params.get('encoding'))
546
547 def to_stdout(self, message, skip_eol=False, check_quiet=False):
548 """Print message to stdout if not in quiet mode."""
549 if self.params.get('logger'):
550 self.params['logger'].debug(message)
551 elif not check_quiet or not self.params.get('quiet', False):
552 message = self._bidi_workaround(message)
553 terminator = ['\n', ''][skip_eol]
554 output = message + terminator
555
556 self._write_string(output, self._screen_file)
557
558 def to_stderr(self, message):
559 """Print message to stderr."""
560 assert isinstance(message, compat_str)
561 if self.params.get('logger'):
562 self.params['logger'].error(message)
563 else:
564 message = self._bidi_workaround(message)
565 output = message + '\n'
566 self._write_string(output, self._err_file)
567
568 def to_console_title(self, message):
569 if not self.params.get('consoletitle', False):
570 return
571 if compat_os_name == 'nt':
572 if ctypes.windll.kernel32.GetConsoleWindow():
573 # c_wchar_p() might not be necessary if `message` is
574 # already of type unicode()
575 ctypes.windll.kernel32.SetConsoleTitleW(ctypes.c_wchar_p(message))
576 elif 'TERM' in os.environ:
577 self._write_string('\033]0;%s\007' % message, self._screen_file)
578
579 def save_console_title(self):
580 if not self.params.get('consoletitle', False):
581 return
582 if self.params.get('simulate', False):
583 return
584 if compat_os_name != 'nt' and 'TERM' in os.environ:
585 # Save the title on stack
586 self._write_string('\033[22;0t', self._screen_file)
587
588 def restore_console_title(self):
589 if not self.params.get('consoletitle', False):
590 return
591 if self.params.get('simulate', False):
592 return
593 if compat_os_name != 'nt' and 'TERM' in os.environ:
594 # Restore the title from stack
595 self._write_string('\033[23;0t', self._screen_file)
596
597 def __enter__(self):
598 self.save_console_title()
599 return self
600
601 def __exit__(self, *args):
602 self.restore_console_title()
603
604 if self.params.get('cookiefile') is not None:
605 self.cookiejar.save(ignore_discard=True, ignore_expires=True)
606
607 def trouble(self, message=None, tb=None):
608 """Determine action to take when a download problem appears.
609
610 Depending on if the downloader has been configured to ignore
611 download errors or not, this method may throw an exception or
612 not when errors are found, after printing the message.
613
614 tb, if given, is additional traceback information.
615 """
616 if message is not None:
617 self.to_stderr(message)
618 if self.params.get('verbose'):
619 if tb is None:
620 if sys.exc_info()[0]: # if .trouble has been called from an except block
621 tb = ''
622 if hasattr(sys.exc_info()[1], 'exc_info') and sys.exc_info()[1].exc_info[0]:
623 tb += ''.join(traceback.format_exception(*sys.exc_info()[1].exc_info))
624 tb += encode_compat_str(traceback.format_exc())
625 else:
626 tb_data = traceback.format_list(traceback.extract_stack())
627 tb = ''.join(tb_data)
628 self.to_stderr(tb)
629 if not self.params.get('ignoreerrors', False):
630 if sys.exc_info()[0] and hasattr(sys.exc_info()[1], 'exc_info') and sys.exc_info()[1].exc_info[0]:
631 exc_info = sys.exc_info()[1].exc_info
632 else:
633 exc_info = sys.exc_info()
634 raise DownloadError(message, exc_info)
635 self._download_retcode = 1
636
637 def report_warning(self, message):
638 '''
639 Print the message to stderr, it will be prefixed with 'WARNING:'
640 If stderr is a tty file the 'WARNING:' will be colored
641 '''
642 if self.params.get('logger') is not None:
643 self.params['logger'].warning(message)
644 else:
645 if self.params.get('no_warnings'):
646 return
647 if not self.params.get('no_color') and self._err_file.isatty() and compat_os_name != 'nt':
648 _msg_header = '\033[0;33mWARNING:\033[0m'
649 else:
650 _msg_header = 'WARNING:'
651 warning_message = '%s %s' % (_msg_header, message)
652 self.to_stderr(warning_message)
653
654 def report_error(self, message, tb=None):
655 '''
656 Do the same as trouble, but prefixes the message with 'ERROR:', colored
657 in red if stderr is a tty file.
658 '''
659 if not self.params.get('no_color') and self._err_file.isatty() and compat_os_name != 'nt':
660 _msg_header = '\033[0;31mERROR:\033[0m'
661 else:
662 _msg_header = 'ERROR:'
663 error_message = '%s %s' % (_msg_header, message)
664 self.trouble(error_message, tb)
665
666 def report_file_already_downloaded(self, file_name):
667 """Report file has already been fully downloaded."""
668 try:
669 self.to_screen('[download] %s has already been downloaded' % file_name)
670 except UnicodeEncodeError:
671 self.to_screen('[download] The file has already been downloaded')
672
673 def prepare_filename(self, info_dict):
674 """Generate the output filename."""
675 try:
676 template_dict = dict(info_dict)
677
678 template_dict['epoch'] = int(time.time())
679 autonumber_size = self.params.get('autonumber_size')
680 if autonumber_size is None:
681 autonumber_size = 5
682 template_dict['autonumber'] = self.params.get('autonumber_start', 1) - 1 + self._num_downloads
683 if template_dict.get('resolution') is None:
684 if template_dict.get('width') and template_dict.get('height'):
685 template_dict['resolution'] = '%dx%d' % (template_dict['width'], template_dict['height'])
686 elif template_dict.get('height'):
687 template_dict['resolution'] = '%sp' % template_dict['height']
688 elif template_dict.get('width'):
689 template_dict['resolution'] = '%dx?' % template_dict['width']
690
691 sanitize = lambda k, v: sanitize_filename(
692 compat_str(v),
693 restricted=self.params.get('restrictfilenames'),
694 is_id=(k == 'id' or k.endswith('_id')))
695 template_dict = dict((k, v if isinstance(v, compat_numeric_types) else sanitize(k, v))
696 for k, v in template_dict.items()
697 if v is not None and not isinstance(v, (list, tuple, dict)))
698 template_dict = collections.defaultdict(lambda: 'NA', template_dict)
699
700 outtmpl = self.params.get('outtmpl', DEFAULT_OUTTMPL)
701
702 # For fields playlist_index and autonumber convert all occurrences
703 # of %(field)s to %(field)0Nd for backward compatibility
704 field_size_compat_map = {
705 'playlist_index': len(str(template_dict['n_entries'])),
706 'autonumber': autonumber_size,
707 }
708 FIELD_SIZE_COMPAT_RE = r'(?<!%)%\((?P<field>autonumber|playlist_index)\)s'
709 mobj = re.search(FIELD_SIZE_COMPAT_RE, outtmpl)
710 if mobj:
711 outtmpl = re.sub(
712 FIELD_SIZE_COMPAT_RE,
713 r'%%(\1)0%dd' % field_size_compat_map[mobj.group('field')],
714 outtmpl)
715
716 # Missing numeric fields used together with integer presentation types
717 # in format specification will break the argument substitution since
718 # string 'NA' is returned for missing fields. We will patch output
719 # template for missing fields to meet string presentation type.
720 for numeric_field in self._NUMERIC_FIELDS:
721 if numeric_field not in template_dict:
722 # As of [1] format syntax is:
723 # %[mapping_key][conversion_flags][minimum_width][.precision][length_modifier]type
724 # 1. https://docs.python.org/2/library/stdtypes.html#string-formatting
725 FORMAT_RE = r'''(?x)
726 (?<!%)
727 %
728 \({0}\) # mapping key
729 (?:[#0\-+ ]+)? # conversion flags (optional)
730 (?:\d+)? # minimum field width (optional)
731 (?:\.\d+)? # precision (optional)
732 [hlL]? # length modifier (optional)
733 [diouxXeEfFgGcrs%] # conversion type
734 '''
735 outtmpl = re.sub(
736 FORMAT_RE.format(numeric_field),
737 r'%({0})s'.format(numeric_field), outtmpl)
738
739 # expand_path translates '%%' into '%' and '$$' into '$'
740 # correspondingly that is not what we want since we need to keep
741 # '%%' intact for template dict substitution step. Working around
742 # with boundary-alike separator hack.
743 sep = ''.join([random.choice(ascii_letters) for _ in range(32)])
744 outtmpl = outtmpl.replace('%%', '%{0}%'.format(sep)).replace('$$', '${0}$'.format(sep))
745
746 # outtmpl should be expand_path'ed before template dict substitution
747 # because meta fields may contain env variables we don't want to
748 # be expanded. For example, for outtmpl "%(title)s.%(ext)s" and
749 # title "Hello $PATH", we don't want `$PATH` to be expanded.
750 filename = expand_path(outtmpl).replace(sep, '') % template_dict
751
752 # https://github.com/blackjack4494/youtube-dlc/issues/85
753 trim_file_name = self.params.get('trim_file_name', False)
754 if trim_file_name:
755 fn_groups = filename.rsplit('.')
756 ext = fn_groups[-1]
757 sub_ext = ''
758 if len(fn_groups) > 2:
759 sub_ext = fn_groups[-2]
760 filename = '.'.join(filter(None, [fn_groups[0][:trim_file_name], sub_ext, ext]))
761
762 # Temporary fix for #4787
763 # 'Treat' all problem characters by passing filename through preferredencoding
764 # to workaround encoding issues with subprocess on python2 @ Windows
765 if sys.version_info < (3, 0) and sys.platform == 'win32':
766 filename = encodeFilename(filename, True).decode(preferredencoding())
767 return sanitize_path(filename)
768 except ValueError as err:
769 self.report_error('Error in output template: ' + str(err) + ' (encoding: ' + repr(preferredencoding()) + ')')
770 return None
771
772 def _match_entry(self, info_dict, incomplete):
773 """ Returns None if the file should be downloaded """
774
775 video_title = info_dict.get('title', info_dict.get('id', 'video'))
776 if 'title' in info_dict:
777 # This can happen when we're just evaluating the playlist
778 title = info_dict['title']
779 matchtitle = self.params.get('matchtitle', False)
780 if matchtitle:
781 if not re.search(matchtitle, title, re.IGNORECASE):
782 return '"' + title + '" title did not match pattern "' + matchtitle + '"'
783 rejecttitle = self.params.get('rejecttitle', False)
784 if rejecttitle:
785 if re.search(rejecttitle, title, re.IGNORECASE):
786 return '"' + title + '" title matched reject pattern "' + rejecttitle + '"'
787 date = info_dict.get('upload_date')
788 if date is not None:
789 dateRange = self.params.get('daterange', DateRange())
790 if date not in dateRange:
791 return '%s upload date is not in range %s' % (date_from_str(date).isoformat(), dateRange)
792 view_count = info_dict.get('view_count')
793 if view_count is not None:
794 min_views = self.params.get('min_views')
795 if min_views is not None and view_count < min_views:
796 return 'Skipping %s, because it has not reached minimum view count (%d/%d)' % (video_title, view_count, min_views)
797 max_views = self.params.get('max_views')
798 if max_views is not None and view_count > max_views:
799 return 'Skipping %s, because it has exceeded the maximum view count (%d/%d)' % (video_title, view_count, max_views)
800 if age_restricted(info_dict.get('age_limit'), self.params.get('age_limit')):
801 return 'Skipping "%s" because it is age restricted' % video_title
802 if self.in_download_archive(info_dict):
803 return '%s has already been recorded in archive' % video_title
804
805 if not incomplete:
806 match_filter = self.params.get('match_filter')
807 if match_filter is not None:
808 ret = match_filter(info_dict)
809 if ret is not None:
810 return ret
811
812 return None
813
814 @staticmethod
815 def add_extra_info(info_dict, extra_info):
816 '''Set the keys from extra_info in info dict if they are missing'''
817 for key, value in extra_info.items():
818 info_dict.setdefault(key, value)
819
820 def extract_info(self, url, download=True, ie_key=None, info_dict=None, extra_info={},
821 process=True, force_generic_extractor=False):
822 '''
823 Returns a list with a dictionary for each video we find.
824 If 'download', also downloads the videos.
825 extra_info is a dict containing the extra values to add to each result
826 '''
827
828 if not ie_key and force_generic_extractor:
829 ie_key = 'Generic'
830
831 if ie_key:
832 ies = [self.get_info_extractor(ie_key)]
833 else:
834 ies = self._ies
835
836 for ie in ies:
837 if not ie.suitable(url):
838 continue
839
840 ie_key = ie.ie_key()
841 ie = self.get_info_extractor(ie_key)
842 if not ie.working():
843 self.report_warning('The program functionality for this site has been marked as broken, '
844 'and will probably not work.')
845
846 try:
847 temp_id = ie.extract_id(url) if callable(getattr(ie, 'extract_id', None)) else ie._match_id(url)
848 except (AssertionError, IndexError, AttributeError):
849 temp_id = None
850 if temp_id is not None and self.in_download_archive({'id': temp_id, 'ie_key': ie_key}):
851 self.to_screen("[%s] %s: has already been recorded in archive" % (
852 ie_key, temp_id))
853 break
854
855 return self.__extract_info(url, ie, download, extra_info, process, info_dict)
856
857 else:
858 self.report_error('no suitable InfoExtractor for URL %s' % url)
859
860 def __handle_extraction_exceptions(func):
861 def wrapper(self, *args, **kwargs):
862 try:
863 return func(self, *args, **kwargs)
864 except GeoRestrictedError as e:
865 msg = e.msg
866 if e.countries:
867 msg += '\nThis video is available in %s.' % ', '.join(
868 map(ISO3166Utils.short2full, e.countries))
869 msg += '\nYou might want to use a VPN or a proxy server (with --proxy) to workaround.'
870 self.report_error(msg)
871 except ExtractorError as e: # An error we somewhat expected
872 self.report_error(compat_str(e), e.format_traceback())
873 except MaxDownloadsReached:
874 raise
875 except Exception as e:
876 if self.params.get('ignoreerrors', False):
877 self.report_error(error_to_compat_str(e), tb=encode_compat_str(traceback.format_exc()))
878 else:
879 raise
880 return wrapper
881
882 @__handle_extraction_exceptions
883 def __extract_info(self, url, ie, download, extra_info, process, info_dict):
884 ie_result = ie.extract(url)
885 if ie_result is None: # Finished already (backwards compatibility; listformats and friends should be moved here)
886 return
887 if isinstance(ie_result, list):
888 # Backwards compatibility: old IE result format
889 ie_result = {
890 '_type': 'compat_list',
891 'entries': ie_result,
892 }
893 if info_dict:
894 if info_dict.get('id'):
895 ie_result['id'] = info_dict['id']
896 if info_dict.get('title'):
897 ie_result['title'] = info_dict['title']
898 self.add_default_extra_info(ie_result, ie, url)
899 if process:
900 return self.process_ie_result(ie_result, download, extra_info)
901 else:
902 return ie_result
903
904 def add_default_extra_info(self, ie_result, ie, url):
905 self.add_extra_info(ie_result, {
906 'extractor': ie.IE_NAME,
907 'webpage_url': url,
908 'webpage_url_basename': url_basename(url),
909 'extractor_key': ie.ie_key(),
910 })
911
912 def process_ie_result(self, ie_result, download=True, extra_info={}):
913 """
914 Take the result of the ie(may be modified) and resolve all unresolved
915 references (URLs, playlist items).
916
917 It will also download the videos if 'download'.
918 Returns the resolved ie_result.
919 """
920 result_type = ie_result.get('_type', 'video')
921
922 if result_type in ('url', 'url_transparent'):
923 ie_result['url'] = sanitize_url(ie_result['url'])
924 extract_flat = self.params.get('extract_flat', False)
925 if ((extract_flat == 'in_playlist' and 'playlist' in extra_info)
926 or extract_flat is True):
927 self.__forced_printings(
928 ie_result, self.prepare_filename(ie_result),
929 incomplete=True)
930 return ie_result
931
932 if result_type == 'video':
933 self.add_extra_info(ie_result, extra_info)
934 return self.process_video_result(ie_result, download=download)
935 elif result_type == 'url':
936 # We have to add extra_info to the results because it may be
937 # contained in a playlist
938 return self.extract_info(ie_result['url'],
939 download, info_dict=ie_result,
940 ie_key=ie_result.get('ie_key'),
941 extra_info=extra_info)
942 elif result_type == 'url_transparent':
943 # Use the information from the embedding page
944 info = self.extract_info(
945 ie_result['url'], ie_key=ie_result.get('ie_key'),
946 extra_info=extra_info, download=False, process=False)
947
948 # extract_info may return None when ignoreerrors is enabled and
949 # extraction failed with an error, don't crash and return early
950 # in this case
951 if not info:
952 return info
953
954 force_properties = dict(
955 (k, v) for k, v in ie_result.items() if v is not None)
956 for f in ('_type', 'url', 'id', 'extractor', 'extractor_key', 'ie_key'):
957 if f in force_properties:
958 del force_properties[f]
959 new_result = info.copy()
960 new_result.update(force_properties)
961
962 # Extracted info may not be a video result (i.e.
963 # info.get('_type', 'video') != video) but rather an url or
964 # url_transparent. In such cases outer metadata (from ie_result)
965 # should be propagated to inner one (info). For this to happen
966 # _type of info should be overridden with url_transparent. This
967 # fixes issue from https://github.com/ytdl-org/youtube-dl/pull/11163.
968 if new_result.get('_type') == 'url':
969 new_result['_type'] = 'url_transparent'
970
971 return self.process_ie_result(
972 new_result, download=download, extra_info=extra_info)
973 elif result_type in ('playlist', 'multi_video'):
974 # We process each entry in the playlist
975 playlist = ie_result.get('title') or ie_result.get('id')
976 self.to_screen('[download] Downloading playlist: %s' % playlist)
977
978 playlist_results = []
979
980 playliststart = self.params.get('playliststart', 1) - 1
981 playlistend = self.params.get('playlistend')
982 # For backwards compatibility, interpret -1 as whole list
983 if playlistend == -1:
984 playlistend = None
985
986 playlistitems_str = self.params.get('playlist_items')
987 playlistitems = None
988 if playlistitems_str is not None:
989 def iter_playlistitems(format):
990 for string_segment in format.split(','):
991 if '-' in string_segment:
992 start, end = string_segment.split('-')
993 for item in range(int(start), int(end) + 1):
994 yield int(item)
995 else:
996 yield int(string_segment)
997 playlistitems = orderedSet(iter_playlistitems(playlistitems_str))
998
999 ie_entries = ie_result['entries']
1000
1001 def make_playlistitems_entries(list_ie_entries):
1002 num_entries = len(list_ie_entries)
1003 return [
1004 list_ie_entries[i - 1] for i in playlistitems
1005 if -num_entries <= i - 1 < num_entries]
1006
1007 def report_download(num_entries):
1008 self.to_screen(
1009 '[%s] playlist %s: Downloading %d videos' %
1010 (ie_result['extractor'], playlist, num_entries))
1011
1012 if isinstance(ie_entries, list):
1013 n_all_entries = len(ie_entries)
1014 if playlistitems:
1015 entries = make_playlistitems_entries(ie_entries)
1016 else:
1017 entries = ie_entries[playliststart:playlistend]
1018 n_entries = len(entries)
1019 self.to_screen(
1020 '[%s] playlist %s: Collected %d video ids (downloading %d of them)' %
1021 (ie_result['extractor'], playlist, n_all_entries, n_entries))
1022 elif isinstance(ie_entries, PagedList):
1023 if playlistitems:
1024 entries = []
1025 for item in playlistitems:
1026 entries.extend(ie_entries.getslice(
1027 item - 1, item
1028 ))
1029 else:
1030 entries = ie_entries.getslice(
1031 playliststart, playlistend)
1032 n_entries = len(entries)
1033 report_download(n_entries)
1034 else: # iterable
1035 if playlistitems:
1036 entries = make_playlistitems_entries(list(itertools.islice(
1037 ie_entries, 0, max(playlistitems))))
1038 else:
1039 entries = list(itertools.islice(
1040 ie_entries, playliststart, playlistend))
1041 n_entries = len(entries)
1042 report_download(n_entries)
1043
1044 if self.params.get('playlistreverse', False):
1045 entries = entries[::-1]
1046
1047 if self.params.get('playlistrandom', False):
1048 random.shuffle(entries)
1049
1050 x_forwarded_for = ie_result.get('__x_forwarded_for_ip')
1051
1052 for i, entry in enumerate(entries, 1):
1053 self.to_screen('[download] Downloading video %s of %s' % (i, n_entries))
1054 # This __x_forwarded_for_ip thing is a bit ugly but requires
1055 # minimal changes
1056 if x_forwarded_for:
1057 entry['__x_forwarded_for_ip'] = x_forwarded_for
1058 extra = {
1059 'n_entries': n_entries,
1060 'playlist': playlist,
1061 'playlist_id': ie_result.get('id'),
1062 'playlist_title': ie_result.get('title'),
1063 'playlist_uploader': ie_result.get('uploader'),
1064 'playlist_uploader_id': ie_result.get('uploader_id'),
1065 'playlist_index': playlistitems[i - 1] if playlistitems else i + playliststart,
1066 'extractor': ie_result['extractor'],
1067 'webpage_url': ie_result['webpage_url'],
1068 'webpage_url_basename': url_basename(ie_result['webpage_url']),
1069 'extractor_key': ie_result['extractor_key'],
1070 }
1071
1072 reason = self._match_entry(entry, incomplete=True)
1073 if reason is not None:
1074 if reason.endswith('has already been recorded in the archive') and self.params.get('break_on_existing'):
1075 print('[download] tried downloading a file that\'s already in the archive, stopping since --break-on-existing is set.')
1076 break
1077 else:
1078 self.to_screen('[download] ' + reason)
1079 continue
1080
1081 entry_result = self.__process_iterable_entry(entry, download, extra)
1082 # TODO: skip failed (empty) entries?
1083 playlist_results.append(entry_result)
1084 ie_result['entries'] = playlist_results
1085 self.to_screen('[download] Finished downloading playlist: %s' % playlist)
1086 return ie_result
1087 elif result_type == 'compat_list':
1088 self.report_warning(
1089 'Extractor %s returned a compat_list result. '
1090 'It needs to be updated.' % ie_result.get('extractor'))
1091
1092 def _fixup(r):
1093 self.add_extra_info(
1094 r,
1095 {
1096 'extractor': ie_result['extractor'],
1097 'webpage_url': ie_result['webpage_url'],
1098 'webpage_url_basename': url_basename(ie_result['webpage_url']),
1099 'extractor_key': ie_result['extractor_key'],
1100 }
1101 )
1102 return r
1103 ie_result['entries'] = [
1104 self.process_ie_result(_fixup(r), download, extra_info)
1105 for r in ie_result['entries']
1106 ]
1107 return ie_result
1108 else:
1109 raise Exception('Invalid result type: %s' % result_type)
1110
1111 @__handle_extraction_exceptions
1112 def __process_iterable_entry(self, entry, download, extra_info):
1113 return self.process_ie_result(
1114 entry, download=download, extra_info=extra_info)
1115
1116 def _build_format_filter(self, filter_spec):
1117 " Returns a function to filter the formats according to the filter_spec "
1118
1119 OPERATORS = {
1120 '<': operator.lt,
1121 '<=': operator.le,
1122 '>': operator.gt,
1123 '>=': operator.ge,
1124 '=': operator.eq,
1125 '!=': operator.ne,
1126 }
1127 operator_rex = re.compile(r'''(?x)\s*
1128 (?P<key>width|height|tbr|abr|vbr|asr|filesize|filesize_approx|fps)
1129 \s*(?P<op>%s)(?P<none_inclusive>\s*\?)?\s*
1130 (?P<value>[0-9.]+(?:[kKmMgGtTpPeEzZyY]i?[Bb]?)?)
1131 $
1132 ''' % '|'.join(map(re.escape, OPERATORS.keys())))
1133 m = operator_rex.search(filter_spec)
1134 if m:
1135 try:
1136 comparison_value = int(m.group('value'))
1137 except ValueError:
1138 comparison_value = parse_filesize(m.group('value'))
1139 if comparison_value is None:
1140 comparison_value = parse_filesize(m.group('value') + 'B')
1141 if comparison_value is None:
1142 raise ValueError(
1143 'Invalid value %r in format specification %r' % (
1144 m.group('value'), filter_spec))
1145 op = OPERATORS[m.group('op')]
1146
1147 if not m:
1148 STR_OPERATORS = {
1149 '=': operator.eq,
1150 '^=': lambda attr, value: attr.startswith(value),
1151 '$=': lambda attr, value: attr.endswith(value),
1152 '*=': lambda attr, value: value in attr,
1153 }
1154 str_operator_rex = re.compile(r'''(?x)
1155 \s*(?P<key>[a-zA-Z0-9._-]+)
1156 \s*(?P<negation>!\s*)?(?P<op>%s)(?P<none_inclusive>\s*\?)?
1157 \s*(?P<value>[a-zA-Z0-9._-]+)
1158 \s*$
1159 ''' % '|'.join(map(re.escape, STR_OPERATORS.keys())))
1160 m = str_operator_rex.search(filter_spec)
1161 if m:
1162 comparison_value = m.group('value')
1163 str_op = STR_OPERATORS[m.group('op')]
1164 if m.group('negation'):
1165 op = lambda attr, value: not str_op(attr, value)
1166 else:
1167 op = str_op
1168
1169 if not m:
1170 raise ValueError('Invalid filter specification %r' % filter_spec)
1171
1172 def _filter(f):
1173 actual_value = f.get(m.group('key'))
1174 if actual_value is None:
1175 return m.group('none_inclusive')
1176 return op(actual_value, comparison_value)
1177 return _filter
1178
1179 def _default_format_spec(self, info_dict, download=True):
1180
1181 def can_merge():
1182 merger = FFmpegMergerPP(self)
1183 return merger.available and merger.can_merge()
1184
1185 def prefer_best():
1186 if self.params.get('simulate', False):
1187 return False
1188 if not download:
1189 return False
1190 if self.params.get('outtmpl', DEFAULT_OUTTMPL) == '-':
1191 return True
1192 if info_dict.get('is_live'):
1193 return True
1194 if not can_merge():
1195 return True
1196 return False
1197
1198 req_format_list = ['bestvideo+bestaudio', 'best']
1199 if prefer_best():
1200 req_format_list.reverse()
1201 return '/'.join(req_format_list)
1202
1203 def build_format_selector(self, format_spec):
1204 def syntax_error(note, start):
1205 message = (
1206 'Invalid format specification: '
1207 '{0}\n\t{1}\n\t{2}^'.format(note, format_spec, ' ' * start[1]))
1208 return SyntaxError(message)
1209
1210 PICKFIRST = 'PICKFIRST'
1211 MERGE = 'MERGE'
1212 SINGLE = 'SINGLE'
1213 GROUP = 'GROUP'
1214 FormatSelector = collections.namedtuple('FormatSelector', ['type', 'selector', 'filters'])
1215
1216 allow_multiple_streams = {'audio': self.params.get('allow_multiple_audio_streams', True),
1217 'video': self.params.get('allow_multiple_video_streams', True)}
1218
1219 def _parse_filter(tokens):
1220 filter_parts = []
1221 for type, string, start, _, _ in tokens:
1222 if type == tokenize.OP and string == ']':
1223 return ''.join(filter_parts)
1224 else:
1225 filter_parts.append(string)
1226
1227 def _remove_unused_ops(tokens):
1228 # Remove operators that we don't use and join them with the surrounding strings
1229 # for example: 'mp4' '-' 'baseline' '-' '16x9' is converted to 'mp4-baseline-16x9'
1230 ALLOWED_OPS = ('/', '+', ',', '(', ')')
1231 last_string, last_start, last_end, last_line = None, None, None, None
1232 for type, string, start, end, line in tokens:
1233 if type == tokenize.OP and string == '[':
1234 if last_string:
1235 yield tokenize.NAME, last_string, last_start, last_end, last_line
1236 last_string = None
1237 yield type, string, start, end, line
1238 # everything inside brackets will be handled by _parse_filter
1239 for type, string, start, end, line in tokens:
1240 yield type, string, start, end, line
1241 if type == tokenize.OP and string == ']':
1242 break
1243 elif type == tokenize.OP and string in ALLOWED_OPS:
1244 if last_string:
1245 yield tokenize.NAME, last_string, last_start, last_end, last_line
1246 last_string = None
1247 yield type, string, start, end, line
1248 elif type in [tokenize.NAME, tokenize.NUMBER, tokenize.OP]:
1249 if not last_string:
1250 last_string = string
1251 last_start = start
1252 last_end = end
1253 else:
1254 last_string += string
1255 if last_string:
1256 yield tokenize.NAME, last_string, last_start, last_end, last_line
1257
1258 def _parse_format_selection(tokens, inside_merge=False, inside_choice=False, inside_group=False):
1259 selectors = []
1260 current_selector = None
1261 for type, string, start, _, _ in tokens:
1262 # ENCODING is only defined in python 3.x
1263 if type == getattr(tokenize, 'ENCODING', None):
1264 continue
1265 elif type in [tokenize.NAME, tokenize.NUMBER]:
1266 current_selector = FormatSelector(SINGLE, string, [])
1267 elif type == tokenize.OP:
1268 if string == ')':
1269 if not inside_group:
1270 # ')' will be handled by the parentheses group
1271 tokens.restore_last_token()
1272 break
1273 elif inside_merge and string in ['/', ',']:
1274 tokens.restore_last_token()
1275 break
1276 elif inside_choice and string == ',':
1277 tokens.restore_last_token()
1278 break
1279 elif string == ',':
1280 if not current_selector:
1281 raise syntax_error('"," must follow a format selector', start)
1282 selectors.append(current_selector)
1283 current_selector = None
1284 elif string == '/':
1285 if not current_selector:
1286 raise syntax_error('"/" must follow a format selector', start)
1287 first_choice = current_selector
1288 second_choice = _parse_format_selection(tokens, inside_choice=True)
1289 current_selector = FormatSelector(PICKFIRST, (first_choice, second_choice), [])
1290 elif string == '[':
1291 if not current_selector:
1292 current_selector = FormatSelector(SINGLE, 'best', [])
1293 format_filter = _parse_filter(tokens)
1294 current_selector.filters.append(format_filter)
1295 elif string == '(':
1296 if current_selector:
1297 raise syntax_error('Unexpected "("', start)
1298 group = _parse_format_selection(tokens, inside_group=True)
1299 current_selector = FormatSelector(GROUP, group, [])
1300 elif string == '+':
1301 if not current_selector:
1302 raise syntax_error('Unexpected "+"', start)
1303 selector_1 = current_selector
1304 selector_2 = _parse_format_selection(tokens, inside_merge=True)
1305 if not selector_2:
1306 raise syntax_error('Expected a selector', start)
1307 current_selector = FormatSelector(MERGE, (selector_1, selector_2), [])
1308 else:
1309 raise syntax_error('Operator not recognized: "{0}"'.format(string), start)
1310 elif type == tokenize.ENDMARKER:
1311 break
1312 if current_selector:
1313 selectors.append(current_selector)
1314 return selectors
1315
1316 def _build_selector_function(selector):
1317 if isinstance(selector, list): # ,
1318 fs = [_build_selector_function(s) for s in selector]
1319
1320 def selector_function(ctx):
1321 for f in fs:
1322 for format in f(ctx):
1323 yield format
1324 return selector_function
1325
1326 elif selector.type == GROUP: # ()
1327 selector_function = _build_selector_function(selector.selector)
1328
1329 elif selector.type == PICKFIRST: # /
1330 fs = [_build_selector_function(s) for s in selector.selector]
1331
1332 def selector_function(ctx):
1333 for f in fs:
1334 picked_formats = list(f(ctx))
1335 if picked_formats:
1336 return picked_formats
1337 return []
1338
1339 elif selector.type == SINGLE: # atom
1340 format_spec = selector.selector if selector.selector is not None else 'best'
1341
1342 if format_spec == 'all':
1343 def selector_function(ctx):
1344 formats = list(ctx['formats'])
1345 if formats:
1346 for f in formats:
1347 yield f
1348
1349 else:
1350 format_fallback = False
1351 format_spec_obj = re.match(r'(best|worst|b|w)(video|audio|v|a)?(\*)?$', format_spec)
1352 if format_spec_obj is not None:
1353 format_idx = 0 if format_spec_obj.group(1)[0] == 'w' else -1
1354 format_type = format_spec_obj.group(2)[0] if format_spec_obj.group(2) else False
1355 not_format_type = 'v' if format_type == 'a' else 'a'
1356 format_modified = format_spec_obj.group(3) is not None
1357
1358 format_fallback = not format_type and not format_modified # for b, w
1359 filter_f = ((lambda f: f.get(format_type + 'codec') != 'none')
1360 if format_type and format_modified # bv*, ba*, wv*, wa*
1361 else (lambda f: f.get(not_format_type + 'codec') == 'none')
1362 if format_type # bv, ba, wv, wa
1363 else (lambda f: f.get('vcodec') != 'none' and f.get('acodec') != 'none')
1364 if not format_modified # b, w
1365 else None) # b*, w*
1366 else:
1367 format_idx = -1
1368 filter_f = ((lambda f: f.get('ext') == format_spec)
1369 if format_spec in ['mp4', 'flv', 'webm', '3gp', 'm4a', 'mp3', 'ogg', 'aac', 'wav'] # extension
1370 else (lambda f: f.get('format_id') == format_spec)) # id
1371
1372 def selector_function(ctx):
1373 formats = list(ctx['formats'])
1374 if not formats:
1375 return
1376 matches = list(filter(filter_f, formats)) if filter_f is not None else formats
1377 if matches:
1378 yield matches[format_idx]
1379 elif format_fallback == 'force' or (format_fallback and ctx['incomplete_formats']):
1380 # for extractors with incomplete formats (audio only (soundcloud)
1381 # or video only (imgur)) best/worst will fallback to
1382 # best/worst {video,audio}-only format
1383 yield formats[format_idx]
1384
1385 elif selector.type == MERGE: # +
1386 def _merge(formats_pair):
1387 format_1, format_2 = formats_pair
1388
1389 formats_info = []
1390 formats_info.extend(format_1.get('requested_formats', (format_1,)))
1391 formats_info.extend(format_2.get('requested_formats', (format_2,)))
1392
1393 if not allow_multiple_streams['video'] or not allow_multiple_streams['audio']:
1394 get_no_more = {"video": False, "audio": False}
1395 for (i, fmt_info) in enumerate(formats_info):
1396 for aud_vid in ["audio", "video"]:
1397 if not allow_multiple_streams[aud_vid] and fmt_info.get(aud_vid[0] + 'codec') != 'none':
1398 if get_no_more[aud_vid]:
1399 formats_info.pop(i)
1400 get_no_more[aud_vid] = True
1401
1402 if len(formats_info) == 1:
1403 return formats_info[0]
1404
1405 video_fmts = [fmt_info for fmt_info in formats_info if fmt_info.get('vcodec') != 'none']
1406 audio_fmts = [fmt_info for fmt_info in formats_info if fmt_info.get('acodec') != 'none']
1407
1408 the_only_video = video_fmts[0] if len(video_fmts) == 1 else None
1409 the_only_audio = audio_fmts[0] if len(audio_fmts) == 1 else None
1410
1411 output_ext = self.params.get('merge_output_format')
1412 if not output_ext:
1413 if the_only_video:
1414 output_ext = the_only_video['ext']
1415 elif the_only_audio and not video_fmts:
1416 output_ext = the_only_audio['ext']
1417 else:
1418 output_ext = 'mkv'
1419
1420 new_dict = {
1421 'requested_formats': formats_info,
1422 'format': '+'.join(fmt_info.get('format') for fmt_info in formats_info),
1423 'format_id': '+'.join(fmt_info.get('format_id') for fmt_info in formats_info),
1424 'ext': output_ext,
1425 }
1426
1427 if the_only_video:
1428 new_dict.update({
1429 'width': the_only_video.get('width'),
1430 'height': the_only_video.get('height'),
1431 'resolution': the_only_video.get('resolution'),
1432 'fps': the_only_video.get('fps'),
1433 'vcodec': the_only_video.get('vcodec'),
1434 'vbr': the_only_video.get('vbr'),
1435 'stretched_ratio': the_only_video.get('stretched_ratio'),
1436 })
1437
1438 if the_only_audio:
1439 new_dict.update({
1440 'acodec': the_only_audio.get('acodec'),
1441 'abr': the_only_audio.get('abr'),
1442 })
1443
1444 return new_dict
1445
1446 selector_1, selector_2 = map(_build_selector_function, selector.selector)
1447
1448 def selector_function(ctx):
1449 for pair in itertools.product(
1450 selector_1(copy.deepcopy(ctx)), selector_2(copy.deepcopy(ctx))):
1451 yield _merge(pair)
1452
1453 filters = [self._build_format_filter(f) for f in selector.filters]
1454
1455 def final_selector(ctx):
1456 ctx_copy = copy.deepcopy(ctx)
1457 for _filter in filters:
1458 ctx_copy['formats'] = list(filter(_filter, ctx_copy['formats']))
1459 return selector_function(ctx_copy)
1460 return final_selector
1461
1462 stream = io.BytesIO(format_spec.encode('utf-8'))
1463 try:
1464 tokens = list(_remove_unused_ops(compat_tokenize_tokenize(stream.readline)))
1465 except tokenize.TokenError:
1466 raise syntax_error('Missing closing/opening brackets or parenthesis', (0, len(format_spec)))
1467
1468 class TokenIterator(object):
1469 def __init__(self, tokens):
1470 self.tokens = tokens
1471 self.counter = 0
1472
1473 def __iter__(self):
1474 return self
1475
1476 def __next__(self):
1477 if self.counter >= len(self.tokens):
1478 raise StopIteration()
1479 value = self.tokens[self.counter]
1480 self.counter += 1
1481 return value
1482
1483 next = __next__
1484
1485 def restore_last_token(self):
1486 self.counter -= 1
1487
1488 parsed_selector = _parse_format_selection(iter(TokenIterator(tokens)))
1489 return _build_selector_function(parsed_selector)
1490
1491 def _calc_headers(self, info_dict):
1492 res = std_headers.copy()
1493
1494 add_headers = info_dict.get('http_headers')
1495 if add_headers:
1496 res.update(add_headers)
1497
1498 cookies = self._calc_cookies(info_dict)
1499 if cookies:
1500 res['Cookie'] = cookies
1501
1502 if 'X-Forwarded-For' not in res:
1503 x_forwarded_for_ip = info_dict.get('__x_forwarded_for_ip')
1504 if x_forwarded_for_ip:
1505 res['X-Forwarded-For'] = x_forwarded_for_ip
1506
1507 return res
1508
1509 def _calc_cookies(self, info_dict):
1510 pr = sanitized_Request(info_dict['url'])
1511 self.cookiejar.add_cookie_header(pr)
1512 return pr.get_header('Cookie')
1513
1514 def process_video_result(self, info_dict, download=True):
1515 assert info_dict.get('_type', 'video') == 'video'
1516
1517 if 'id' not in info_dict:
1518 raise ExtractorError('Missing "id" field in extractor result')
1519 if 'title' not in info_dict:
1520 raise ExtractorError('Missing "title" field in extractor result')
1521
1522 def report_force_conversion(field, field_not, conversion):
1523 self.report_warning(
1524 '"%s" field is not %s - forcing %s conversion, there is an error in extractor'
1525 % (field, field_not, conversion))
1526
1527 def sanitize_string_field(info, string_field):
1528 field = info.get(string_field)
1529 if field is None or isinstance(field, compat_str):
1530 return
1531 report_force_conversion(string_field, 'a string', 'string')
1532 info[string_field] = compat_str(field)
1533
1534 def sanitize_numeric_fields(info):
1535 for numeric_field in self._NUMERIC_FIELDS:
1536 field = info.get(numeric_field)
1537 if field is None or isinstance(field, compat_numeric_types):
1538 continue
1539 report_force_conversion(numeric_field, 'numeric', 'int')
1540 info[numeric_field] = int_or_none(field)
1541
1542 sanitize_string_field(info_dict, 'id')
1543 sanitize_numeric_fields(info_dict)
1544
1545 if 'playlist' not in info_dict:
1546 # It isn't part of a playlist
1547 info_dict['playlist'] = None
1548 info_dict['playlist_index'] = None
1549
1550 thumbnails = info_dict.get('thumbnails')
1551 if thumbnails is None:
1552 thumbnail = info_dict.get('thumbnail')
1553 if thumbnail:
1554 info_dict['thumbnails'] = thumbnails = [{'url': thumbnail}]
1555 if thumbnails:
1556 thumbnails.sort(key=lambda t: (
1557 t.get('preference') if t.get('preference') is not None else -1,
1558 t.get('width') if t.get('width') is not None else -1,
1559 t.get('height') if t.get('height') is not None else -1,
1560 t.get('id') if t.get('id') is not None else '', t.get('url')))
1561 for i, t in enumerate(thumbnails):
1562 t['url'] = sanitize_url(t['url'])
1563 if t.get('width') and t.get('height'):
1564 t['resolution'] = '%dx%d' % (t['width'], t['height'])
1565 if t.get('id') is None:
1566 t['id'] = '%d' % i
1567
1568 if self.params.get('list_thumbnails'):
1569 self.list_thumbnails(info_dict)
1570 return
1571
1572 thumbnail = info_dict.get('thumbnail')
1573 if thumbnail:
1574 info_dict['thumbnail'] = sanitize_url(thumbnail)
1575 elif thumbnails:
1576 info_dict['thumbnail'] = thumbnails[-1]['url']
1577
1578 if 'display_id' not in info_dict and 'id' in info_dict:
1579 info_dict['display_id'] = info_dict['id']
1580
1581 if info_dict.get('upload_date') is None and info_dict.get('timestamp') is not None:
1582 # Working around out-of-range timestamp values (e.g. negative ones on Windows,
1583 # see http://bugs.python.org/issue1646728)
1584 try:
1585 upload_date = datetime.datetime.utcfromtimestamp(info_dict['timestamp'])
1586 info_dict['upload_date'] = upload_date.strftime('%Y%m%d')
1587 except (ValueError, OverflowError, OSError):
1588 pass
1589
1590 # Auto generate title fields corresponding to the *_number fields when missing
1591 # in order to always have clean titles. This is very common for TV series.
1592 for field in ('chapter', 'season', 'episode'):
1593 if info_dict.get('%s_number' % field) is not None and not info_dict.get(field):
1594 info_dict[field] = '%s %d' % (field.capitalize(), info_dict['%s_number' % field])
1595
1596 for cc_kind in ('subtitles', 'automatic_captions'):
1597 cc = info_dict.get(cc_kind)
1598 if cc:
1599 for _, subtitle in cc.items():
1600 for subtitle_format in subtitle:
1601 if subtitle_format.get('url'):
1602 subtitle_format['url'] = sanitize_url(subtitle_format['url'])
1603 if subtitle_format.get('ext') is None:
1604 subtitle_format['ext'] = determine_ext(subtitle_format['url']).lower()
1605
1606 automatic_captions = info_dict.get('automatic_captions')
1607 subtitles = info_dict.get('subtitles')
1608
1609 if self.params.get('listsubtitles', False):
1610 if 'automatic_captions' in info_dict:
1611 self.list_subtitles(
1612 info_dict['id'], automatic_captions, 'automatic captions')
1613 self.list_subtitles(info_dict['id'], subtitles, 'subtitles')
1614 return
1615
1616 info_dict['requested_subtitles'] = self.process_subtitles(
1617 info_dict['id'], subtitles, automatic_captions)
1618
1619 # We now pick which formats have to be downloaded
1620 if info_dict.get('formats') is None:
1621 # There's only one format available
1622 formats = [info_dict]
1623 else:
1624 formats = info_dict['formats']
1625
1626 if not formats:
1627 raise ExtractorError('No video formats found!')
1628
1629 def is_wellformed(f):
1630 url = f.get('url')
1631 if not url:
1632 self.report_warning(
1633 '"url" field is missing or empty - skipping format, '
1634 'there is an error in extractor')
1635 return False
1636 if isinstance(url, bytes):
1637 sanitize_string_field(f, 'url')
1638 return True
1639
1640 # Filter out malformed formats for better extraction robustness
1641 formats = list(filter(is_wellformed, formats))
1642
1643 formats_dict = {}
1644
1645 # We check that all the formats have the format and format_id fields
1646 for i, format in enumerate(formats):
1647 sanitize_string_field(format, 'format_id')
1648 sanitize_numeric_fields(format)
1649 format['url'] = sanitize_url(format['url'])
1650 if not format.get('format_id'):
1651 format['format_id'] = compat_str(i)
1652 else:
1653 # Sanitize format_id from characters used in format selector expression
1654 format['format_id'] = re.sub(r'[\s,/+\[\]()]', '_', format['format_id'])
1655 format_id = format['format_id']
1656 if format_id not in formats_dict:
1657 formats_dict[format_id] = []
1658 formats_dict[format_id].append(format)
1659
1660 # Make sure all formats have unique format_id
1661 for format_id, ambiguous_formats in formats_dict.items():
1662 if len(ambiguous_formats) > 1:
1663 for i, format in enumerate(ambiguous_formats):
1664 format['format_id'] = '%s-%d' % (format_id, i)
1665
1666 for i, format in enumerate(formats):
1667 if format.get('format') is None:
1668 format['format'] = '{id} - {res}{note}'.format(
1669 id=format['format_id'],
1670 res=self.format_resolution(format),
1671 note=' ({0})'.format(format['format_note']) if format.get('format_note') is not None else '',
1672 )
1673 # Automatically determine file extension if missing
1674 if format.get('ext') is None:
1675 format['ext'] = determine_ext(format['url']).lower()
1676 # Automatically determine protocol if missing (useful for format
1677 # selection purposes)
1678 if format.get('protocol') is None:
1679 format['protocol'] = determine_protocol(format)
1680 # Add HTTP headers, so that external programs can use them from the
1681 # json output
1682 full_format_info = info_dict.copy()
1683 full_format_info.update(format)
1684 format['http_headers'] = self._calc_headers(full_format_info)
1685 # Remove private housekeeping stuff
1686 if '__x_forwarded_for_ip' in info_dict:
1687 del info_dict['__x_forwarded_for_ip']
1688
1689 # TODO Central sorting goes here
1690
1691 if formats[0] is not info_dict:
1692 # only set the 'formats' fields if the original info_dict list them
1693 # otherwise we end up with a circular reference, the first (and unique)
1694 # element in the 'formats' field in info_dict is info_dict itself,
1695 # which can't be exported to json
1696 info_dict['formats'] = formats
1697 if self.params.get('listformats'):
1698 self.list_formats(info_dict)
1699 return
1700
1701 req_format = self.params.get('format')
1702 if req_format is None:
1703 req_format = self._default_format_spec(info_dict, download=download)
1704 if self.params.get('verbose'):
1705 self.to_stdout('[debug] Default format spec: %s' % req_format)
1706
1707 format_selector = self.build_format_selector(req_format)
1708
1709 # While in format selection we may need to have an access to the original
1710 # format set in order to calculate some metrics or do some processing.
1711 # For now we need to be able to guess whether original formats provided
1712 # by extractor are incomplete or not (i.e. whether extractor provides only
1713 # video-only or audio-only formats) for proper formats selection for
1714 # extractors with such incomplete formats (see
1715 # https://github.com/ytdl-org/youtube-dl/pull/5556).
1716 # Since formats may be filtered during format selection and may not match
1717 # the original formats the results may be incorrect. Thus original formats
1718 # or pre-calculated metrics should be passed to format selection routines
1719 # as well.
1720 # We will pass a context object containing all necessary additional data
1721 # instead of just formats.
1722 # This fixes incorrect format selection issue (see
1723 # https://github.com/ytdl-org/youtube-dl/issues/10083).
1724 incomplete_formats = (
1725 # All formats are video-only or
1726 all(f.get('vcodec') != 'none' and f.get('acodec') == 'none' for f in formats)
1727 # all formats are audio-only
1728 or all(f.get('vcodec') == 'none' and f.get('acodec') != 'none' for f in formats))
1729
1730 ctx = {
1731 'formats': formats,
1732 'incomplete_formats': incomplete_formats,
1733 }
1734
1735 formats_to_download = list(format_selector(ctx))
1736 if not formats_to_download:
1737 raise ExtractorError('requested format not available',
1738 expected=True)
1739
1740 if download:
1741 self.to_screen('[info] Downloading format(s) %s' % ", ".join([f['format_id'] for f in formats_to_download]))
1742 if len(formats_to_download) > 1:
1743 self.to_screen('[info] %s: downloading video in %s formats' % (info_dict['id'], len(formats_to_download)))
1744 for format in formats_to_download:
1745 new_info = dict(info_dict)
1746 new_info.update(format)
1747 self.process_info(new_info)
1748 # We update the info dict with the best quality format (backwards compatibility)
1749 info_dict.update(formats_to_download[-1])
1750 return info_dict
1751
1752 def process_subtitles(self, video_id, normal_subtitles, automatic_captions):
1753 """Select the requested subtitles and their format"""
1754 available_subs = {}
1755 if normal_subtitles and self.params.get('writesubtitles'):
1756 available_subs.update(normal_subtitles)
1757 if automatic_captions and self.params.get('writeautomaticsub'):
1758 for lang, cap_info in automatic_captions.items():
1759 if lang not in available_subs:
1760 available_subs[lang] = cap_info
1761
1762 if (not self.params.get('writesubtitles') and not
1763 self.params.get('writeautomaticsub') or not
1764 available_subs):
1765 return None
1766
1767 if self.params.get('allsubtitles', False):
1768 requested_langs = available_subs.keys()
1769 else:
1770 if self.params.get('subtitleslangs', False):
1771 requested_langs = self.params.get('subtitleslangs')
1772 elif 'en' in available_subs:
1773 requested_langs = ['en']
1774 else:
1775 requested_langs = [list(available_subs.keys())[0]]
1776
1777 formats_query = self.params.get('subtitlesformat', 'best')
1778 formats_preference = formats_query.split('/') if formats_query else []
1779 subs = {}
1780 for lang in requested_langs:
1781 formats = available_subs.get(lang)
1782 if formats is None:
1783 self.report_warning('%s subtitles not available for %s' % (lang, video_id))
1784 continue
1785 for ext in formats_preference:
1786 if ext == 'best':
1787 f = formats[-1]
1788 break
1789 matches = list(filter(lambda f: f['ext'] == ext, formats))
1790 if matches:
1791 f = matches[-1]
1792 break
1793 else:
1794 f = formats[-1]
1795 self.report_warning(
1796 'No subtitle format found matching "%s" for language %s, '
1797 'using %s' % (formats_query, lang, f['ext']))
1798 subs[lang] = f
1799 return subs
1800
1801 def __forced_printings(self, info_dict, filename, incomplete):
1802 def print_mandatory(field):
1803 if (self.params.get('force%s' % field, False)
1804 and (not incomplete or info_dict.get(field) is not None)):
1805 self.to_stdout(info_dict[field])
1806
1807 def print_optional(field):
1808 if (self.params.get('force%s' % field, False)
1809 and info_dict.get(field) is not None):
1810 self.to_stdout(info_dict[field])
1811
1812 print_mandatory('title')
1813 print_mandatory('id')
1814 if self.params.get('forceurl', False) and not incomplete:
1815 if info_dict.get('requested_formats') is not None:
1816 for f in info_dict['requested_formats']:
1817 self.to_stdout(f['url'] + f.get('play_path', ''))
1818 else:
1819 # For RTMP URLs, also include the playpath
1820 self.to_stdout(info_dict['url'] + info_dict.get('play_path', ''))
1821 print_optional('thumbnail')
1822 print_optional('description')
1823 if self.params.get('forcefilename', False) and filename is not None:
1824 self.to_stdout(filename)
1825 if self.params.get('forceduration', False) and info_dict.get('duration') is not None:
1826 self.to_stdout(formatSeconds(info_dict['duration']))
1827 print_mandatory('format')
1828 if self.params.get('forcejson', False):
1829 self.to_stdout(json.dumps(info_dict))
1830
1831 def process_info(self, info_dict):
1832 """Process a single resolved IE result."""
1833
1834 assert info_dict.get('_type', 'video') == 'video'
1835
1836 max_downloads = self.params.get('max_downloads')
1837 if max_downloads is not None:
1838 if self._num_downloads >= int(max_downloads):
1839 raise MaxDownloadsReached()
1840
1841 # TODO: backward compatibility, to be removed
1842 info_dict['fulltitle'] = info_dict['title']
1843
1844 if 'format' not in info_dict:
1845 info_dict['format'] = info_dict['ext']
1846
1847 reason = self._match_entry(info_dict, incomplete=False)
1848 if reason is not None:
1849 self.to_screen('[download] ' + reason)
1850 return
1851
1852 self._num_downloads += 1
1853
1854 info_dict['_filename'] = filename = self.prepare_filename(info_dict)
1855
1856 # Forced printings
1857 self.__forced_printings(info_dict, filename, incomplete=False)
1858
1859 # Do nothing else if in simulate mode
1860 if self.params.get('simulate', False):
1861 return
1862
1863 if filename is None:
1864 return
1865
1866 def ensure_dir_exists(path):
1867 try:
1868 dn = os.path.dirname(path)
1869 if dn and not os.path.exists(dn):
1870 os.makedirs(dn)
1871 return True
1872 except (OSError, IOError) as err:
1873 self.report_error('unable to create directory ' + error_to_compat_str(err))
1874 return False
1875
1876 if not ensure_dir_exists(sanitize_path(encodeFilename(filename))):
1877 return
1878
1879 if self.params.get('writedescription', False):
1880 descfn = replace_extension(filename, 'description', info_dict.get('ext'))
1881 if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(descfn)):
1882 self.to_screen('[info] Video description is already present')
1883 elif info_dict.get('description') is None:
1884 self.report_warning('There\'s no description to write.')
1885 else:
1886 try:
1887 self.to_screen('[info] Writing video description to: ' + descfn)
1888 with io.open(encodeFilename(descfn), 'w', encoding='utf-8') as descfile:
1889 descfile.write(info_dict['description'])
1890 except (OSError, IOError):
1891 self.report_error('Cannot write description file ' + descfn)
1892 return
1893
1894 if self.params.get('writeannotations', False):
1895 annofn = replace_extension(filename, 'annotations.xml', info_dict.get('ext'))
1896 if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(annofn)):
1897 self.to_screen('[info] Video annotations are already present')
1898 elif not info_dict.get('annotations'):
1899 self.report_warning('There are no annotations to write.')
1900 else:
1901 try:
1902 self.to_screen('[info] Writing video annotations to: ' + annofn)
1903 with io.open(encodeFilename(annofn), 'w', encoding='utf-8') as annofile:
1904 annofile.write(info_dict['annotations'])
1905 except (KeyError, TypeError):
1906 self.report_warning('There are no annotations to write.')
1907 except (OSError, IOError):
1908 self.report_error('Cannot write annotations file: ' + annofn)
1909 return
1910
1911 def dl(name, info, subtitle=False):
1912 fd = get_suitable_downloader(info, self.params)(self, self.params)
1913 for ph in self._progress_hooks:
1914 fd.add_progress_hook(ph)
1915 if self.params.get('verbose'):
1916 self.to_stdout('[debug] Invoking downloader on %r' % info.get('url'))
1917 return fd.download(name, info, subtitle)
1918
1919 subtitles_are_requested = any([self.params.get('writesubtitles', False),
1920 self.params.get('writeautomaticsub')])
1921
1922 if subtitles_are_requested and info_dict.get('requested_subtitles'):
1923 # subtitles download errors are already managed as troubles in relevant IE
1924 # that way it will silently go on when used with unsupporting IE
1925 subtitles = info_dict['requested_subtitles']
1926 # ie = self.get_info_extractor(info_dict['extractor_key'])
1927 for sub_lang, sub_info in subtitles.items():
1928 sub_format = sub_info['ext']
1929 sub_filename = subtitles_filename(filename, sub_lang, sub_format, info_dict.get('ext'))
1930 if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(sub_filename)):
1931 self.to_screen('[info] Video subtitle %s.%s is already present' % (sub_lang, sub_format))
1932 else:
1933 self.to_screen('[info] Writing video subtitles to: ' + sub_filename)
1934 if sub_info.get('data') is not None:
1935 try:
1936 # Use newline='' to prevent conversion of newline characters
1937 # See https://github.com/ytdl-org/youtube-dl/issues/10268
1938 with io.open(encodeFilename(sub_filename), 'w', encoding='utf-8', newline='') as subfile:
1939 subfile.write(sub_info['data'])
1940 except (OSError, IOError):
1941 self.report_error('Cannot write subtitles file ' + sub_filename)
1942 return
1943 else:
1944 try:
1945 dl(sub_filename, sub_info, subtitle=True)
1946 '''
1947 if self.params.get('sleep_interval_subtitles', False):
1948 dl(sub_filename, sub_info)
1949 else:
1950 sub_data = ie._request_webpage(
1951 sub_info['url'], info_dict['id'], note=False).read()
1952 with io.open(encodeFilename(sub_filename), 'wb') as subfile:
1953 subfile.write(sub_data)
1954 '''
1955 except (ExtractorError, IOError, OSError, ValueError, compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
1956 self.report_warning('Unable to download subtitle for "%s": %s' %
1957 (sub_lang, error_to_compat_str(err)))
1958 continue
1959
1960 if self.params.get('skip_download', False):
1961 if self.params.get('convertsubtitles', False):
1962 subconv = FFmpegSubtitlesConvertorPP(self, format=self.params.get('convertsubtitles'))
1963 filename_real_ext = os.path.splitext(filename)[1][1:]
1964 filename_wo_ext = (
1965 os.path.splitext(filename)[0]
1966 if filename_real_ext == info_dict['ext']
1967 else filename)
1968 afilename = '%s.%s' % (filename_wo_ext, self.params.get('convertsubtitles'))
1969 if subconv.available:
1970 info_dict.setdefault('__postprocessors', [])
1971 # info_dict['__postprocessors'].append(subconv)
1972 if os.path.exists(encodeFilename(afilename)):
1973 self.to_screen(
1974 '[download] %s has already been downloaded and '
1975 'converted' % afilename)
1976 else:
1977 try:
1978 self.post_process(filename, info_dict)
1979 except (PostProcessingError) as err:
1980 self.report_error('postprocessing: %s' % str(err))
1981 return
1982
1983 if self.params.get('writeinfojson', False):
1984 infofn = replace_extension(filename, 'info.json', info_dict.get('ext'))
1985 if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(infofn)):
1986 self.to_screen('[info] Video description metadata is already present')
1987 else:
1988 self.to_screen('[info] Writing video description metadata as JSON to: ' + infofn)
1989 try:
1990 write_json_file(self.filter_requested_info(info_dict), infofn)
1991 except (OSError, IOError):
1992 self.report_error('Cannot write metadata to JSON file ' + infofn)
1993 return
1994
1995 self._write_thumbnails(info_dict, filename)
1996
1997 # Write internet shortcut files
1998 url_link = webloc_link = desktop_link = False
1999 if self.params.get('writelink', False):
2000 if sys.platform == "darwin": # macOS.
2001 webloc_link = True
2002 elif sys.platform.startswith("linux"):
2003 desktop_link = True
2004 else: # if sys.platform in ['win32', 'cygwin']:
2005 url_link = True
2006 if self.params.get('writeurllink', False):
2007 url_link = True
2008 if self.params.get('writewebloclink', False):
2009 webloc_link = True
2010 if self.params.get('writedesktoplink', False):
2011 desktop_link = True
2012
2013 if url_link or webloc_link or desktop_link:
2014 if 'webpage_url' not in info_dict:
2015 self.report_error('Cannot write internet shortcut file because the "webpage_url" field is missing in the media information')
2016 return
2017 ascii_url = iri_to_uri(info_dict['webpage_url'])
2018
2019 def _write_link_file(extension, template, newline, embed_filename):
2020 linkfn = replace_extension(filename, extension, info_dict.get('ext'))
2021 if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(linkfn)):
2022 self.to_screen('[info] Internet shortcut is already present')
2023 else:
2024 try:
2025 self.to_screen('[info] Writing internet shortcut to: ' + linkfn)
2026 with io.open(encodeFilename(to_high_limit_path(linkfn)), 'w', encoding='utf-8', newline=newline) as linkfile:
2027 template_vars = {'url': ascii_url}
2028 if embed_filename:
2029 template_vars['filename'] = linkfn[:-(len(extension) + 1)]
2030 linkfile.write(template % template_vars)
2031 except (OSError, IOError):
2032 self.report_error('Cannot write internet shortcut ' + linkfn)
2033 return False
2034 return True
2035
2036 if url_link:
2037 if not _write_link_file('url', DOT_URL_LINK_TEMPLATE, '\r\n', embed_filename=False):
2038 return
2039 if webloc_link:
2040 if not _write_link_file('webloc', DOT_WEBLOC_LINK_TEMPLATE, '\n', embed_filename=False):
2041 return
2042 if desktop_link:
2043 if not _write_link_file('desktop', DOT_DESKTOP_LINK_TEMPLATE, '\n', embed_filename=True):
2044 return
2045
2046 # Download
2047 must_record_download_archive = False
2048 if not self.params.get('skip_download', False):
2049 try:
2050 if info_dict.get('requested_formats') is not None:
2051 downloaded = []
2052 success = True
2053 merger = FFmpegMergerPP(self)
2054 if not merger.available:
2055 postprocessors = []
2056 self.report_warning('You have requested multiple '
2057 'formats but ffmpeg or avconv are not installed.'
2058 ' The formats won\'t be merged.')
2059 else:
2060 postprocessors = [merger]
2061
2062 def compatible_formats(formats):
2063 # TODO: some formats actually allow this (mkv, webm, ogg, mp4), but not all of them.
2064 video_formats = [format for format in formats if format.get('vcodec') != 'none']
2065 audio_formats = [format for format in formats if format.get('acodec') != 'none']
2066 if len(video_formats) > 2 or len(audio_formats) > 2:
2067 return False
2068
2069 # Check extension
2070 exts = set(format.get('ext') for format in formats)
2071 COMPATIBLE_EXTS = (
2072 set(('mp3', 'mp4', 'm4a', 'm4p', 'm4b', 'm4r', 'm4v', 'ismv', 'isma')),
2073 set(('webm',)),
2074 )
2075 for ext_sets in COMPATIBLE_EXTS:
2076 if ext_sets.issuperset(exts):
2077 return True
2078 # TODO: Check acodec/vcodec
2079 return False
2080
2081 filename_real_ext = os.path.splitext(filename)[1][1:]
2082 filename_wo_ext = (
2083 os.path.splitext(filename)[0]
2084 if filename_real_ext == info_dict['ext']
2085 else filename)
2086 requested_formats = info_dict['requested_formats']
2087 if self.params.get('merge_output_format') is None and not compatible_formats(requested_formats):
2088 info_dict['ext'] = 'mkv'
2089 self.report_warning(
2090 'Requested formats are incompatible for merge and will be merged into mkv.')
2091 # Ensure filename always has a correct extension for successful merge
2092 filename = '%s.%s' % (filename_wo_ext, info_dict['ext'])
2093 if os.path.exists(encodeFilename(filename)):
2094 self.to_screen(
2095 '[download] %s has already been downloaded and '
2096 'merged' % filename)
2097 else:
2098 for f in requested_formats:
2099 new_info = dict(info_dict)
2100 new_info.update(f)
2101 fname = prepend_extension(
2102 self.prepare_filename(new_info),
2103 'f%s' % f['format_id'], new_info['ext'])
2104 if not ensure_dir_exists(fname):
2105 return
2106 downloaded.append(fname)
2107 partial_success = dl(fname, new_info)
2108 success = success and partial_success
2109 info_dict['__postprocessors'] = postprocessors
2110 info_dict['__files_to_merge'] = downloaded
2111 else:
2112 # Just a single file
2113 success = dl(filename, info_dict)
2114 except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
2115 self.report_error('unable to download video data: %s' % error_to_compat_str(err))
2116 return
2117 except (OSError, IOError) as err:
2118 raise UnavailableVideoError(err)
2119 except (ContentTooShortError, ) as err:
2120 self.report_error('content too short (expected %s bytes and served %s)' % (err.expected, err.downloaded))
2121 return
2122
2123 if success and filename != '-':
2124 # Fixup content
2125 fixup_policy = self.params.get('fixup')
2126 if fixup_policy is None:
2127 fixup_policy = 'detect_or_warn'
2128
2129 INSTALL_FFMPEG_MESSAGE = 'Install ffmpeg or avconv to fix this automatically.'
2130
2131 stretched_ratio = info_dict.get('stretched_ratio')
2132 if stretched_ratio is not None and stretched_ratio != 1:
2133 if fixup_policy == 'warn':
2134 self.report_warning('%s: Non-uniform pixel ratio (%s)' % (
2135 info_dict['id'], stretched_ratio))
2136 elif fixup_policy == 'detect_or_warn':
2137 stretched_pp = FFmpegFixupStretchedPP(self)
2138 if stretched_pp.available:
2139 info_dict.setdefault('__postprocessors', [])
2140 info_dict['__postprocessors'].append(stretched_pp)
2141 else:
2142 self.report_warning(
2143 '%s: Non-uniform pixel ratio (%s). %s'
2144 % (info_dict['id'], stretched_ratio, INSTALL_FFMPEG_MESSAGE))
2145 else:
2146 assert fixup_policy in ('ignore', 'never')
2147
2148 if (info_dict.get('requested_formats') is None
2149 and info_dict.get('container') == 'm4a_dash'):
2150 if fixup_policy == 'warn':
2151 self.report_warning(
2152 '%s: writing DASH m4a. '
2153 'Only some players support this container.'
2154 % info_dict['id'])
2155 elif fixup_policy == 'detect_or_warn':
2156 fixup_pp = FFmpegFixupM4aPP(self)
2157 if fixup_pp.available:
2158 info_dict.setdefault('__postprocessors', [])
2159 info_dict['__postprocessors'].append(fixup_pp)
2160 else:
2161 self.report_warning(
2162 '%s: writing DASH m4a. '
2163 'Only some players support this container. %s'
2164 % (info_dict['id'], INSTALL_FFMPEG_MESSAGE))
2165 else:
2166 assert fixup_policy in ('ignore', 'never')
2167
2168 if (info_dict.get('protocol') == 'm3u8_native'
2169 or info_dict.get('protocol') == 'm3u8'
2170 and self.params.get('hls_prefer_native')):
2171 if fixup_policy == 'warn':
2172 self.report_warning('%s: malformed AAC bitstream detected.' % (
2173 info_dict['id']))
2174 elif fixup_policy == 'detect_or_warn':
2175 fixup_pp = FFmpegFixupM3u8PP(self)
2176 if fixup_pp.available:
2177 info_dict.setdefault('__postprocessors', [])
2178 info_dict['__postprocessors'].append(fixup_pp)
2179 else:
2180 self.report_warning(
2181 '%s: malformed AAC bitstream detected. %s'
2182 % (info_dict['id'], INSTALL_FFMPEG_MESSAGE))
2183 else:
2184 assert fixup_policy in ('ignore', 'never')
2185
2186 try:
2187 self.post_process(filename, info_dict)
2188 except (PostProcessingError) as err:
2189 self.report_error('postprocessing: %s' % str(err))
2190 return
2191 self.record_download_archive(info_dict)
2192
2193 def download(self, url_list):
2194 """Download a given list of URLs."""
2195 outtmpl = self.params.get('outtmpl', DEFAULT_OUTTMPL)
2196 if (len(url_list) > 1
2197 and outtmpl != '-'
2198 and '%' not in outtmpl
2199 and self.params.get('max_downloads') != 1):
2200 raise SameFileError(outtmpl)
2201
2202 for url in url_list:
2203 try:
2204 # It also downloads the videos
2205 res = self.extract_info(
2206 url, force_generic_extractor=self.params.get('force_generic_extractor', False))
2207 except UnavailableVideoError:
2208 self.report_error('unable to download video')
2209 except MaxDownloadsReached:
2210 self.to_screen('[info] Maximum number of downloaded files reached.')
2211 raise
2212 else:
2213 if self.params.get('dump_single_json', False):
2214 self.to_stdout(json.dumps(res))
2215
2216 return self._download_retcode
2217
2218 def download_with_info_file(self, info_filename):
2219 with contextlib.closing(fileinput.FileInput(
2220 [info_filename], mode='r',
2221 openhook=fileinput.hook_encoded('utf-8'))) as f:
2222 # FileInput doesn't have a read method, we can't call json.load
2223 info = self.filter_requested_info(json.loads('\n'.join(f)))
2224 try:
2225 self.process_ie_result(info, download=True)
2226 except DownloadError:
2227 webpage_url = info.get('webpage_url')
2228 if webpage_url is not None:
2229 self.report_warning('The info failed to download, trying with "%s"' % webpage_url)
2230 return self.download([webpage_url])
2231 else:
2232 raise
2233 return self._download_retcode
2234
2235 @staticmethod
2236 def filter_requested_info(info_dict):
2237 return dict(
2238 (k, v) for k, v in info_dict.items()
2239 if k not in ['requested_formats', 'requested_subtitles'])
2240
2241 def post_process(self, filename, ie_info):
2242 """Run all the postprocessors on the given file."""
2243 info = dict(ie_info)
2244 info['filepath'] = filename
2245 pps_chain = []
2246 if ie_info.get('__postprocessors') is not None:
2247 pps_chain.extend(ie_info['__postprocessors'])
2248 pps_chain.extend(self._pps)
2249 for pp in pps_chain:
2250 files_to_delete = []
2251 try:
2252 files_to_delete, info = pp.run(info)
2253 except PostProcessingError as e:
2254 self.report_error(e.msg)
2255 if files_to_delete and not self.params.get('keepvideo', False):
2256 for old_filename in set(files_to_delete):
2257 self.to_screen('Deleting original file %s (pass -k to keep)' % old_filename)
2258 try:
2259 os.remove(encodeFilename(old_filename))
2260 except (IOError, OSError):
2261 self.report_warning('Unable to remove downloaded original file')
2262
2263 def _make_archive_id(self, info_dict):
2264 video_id = info_dict.get('id')
2265 if not video_id:
2266 return
2267 # Future-proof against any change in case
2268 # and backwards compatibility with prior versions
2269 extractor = info_dict.get('extractor_key') or info_dict.get('ie_key') # key in a playlist
2270 if extractor is None:
2271 url = str_or_none(info_dict.get('url'))
2272 if not url:
2273 return
2274 # Try to find matching extractor for the URL and take its ie_key
2275 for ie in self._ies:
2276 if ie.suitable(url):
2277 extractor = ie.ie_key()
2278 break
2279 else:
2280 return
2281 return extractor.lower() + ' ' + video_id
2282
2283 def in_download_archive(self, info_dict):
2284 fn = self.params.get('download_archive')
2285 if fn is None:
2286 return False
2287
2288 vid_id = self._make_archive_id(info_dict)
2289 if not vid_id:
2290 return False # Incomplete video information
2291
2292 return vid_id in self.archive
2293
2294 def record_download_archive(self, info_dict):
2295 fn = self.params.get('download_archive')
2296 if fn is None:
2297 return
2298 vid_id = self._make_archive_id(info_dict)
2299 assert vid_id
2300 with locked_file(fn, 'a', encoding='utf-8') as archive_file:
2301 archive_file.write(vid_id + '\n')
2302 self.archive.add(vid_id)
2303
2304 @staticmethod
2305 def format_resolution(format, default='unknown'):
2306 if format.get('vcodec') == 'none':
2307 return 'audio only'
2308 if format.get('resolution') is not None:
2309 return format['resolution']
2310 if format.get('height') is not None:
2311 if format.get('width') is not None:
2312 res = '%sx%s' % (format['width'], format['height'])
2313 else:
2314 res = '%sp' % format['height']
2315 elif format.get('width') is not None:
2316 res = '%dx?' % format['width']
2317 else:
2318 res = default
2319 return res
2320
2321 def _format_note(self, fdict):
2322 res = ''
2323 if fdict.get('ext') in ['f4f', 'f4m']:
2324 res += '(unsupported) '
2325 if fdict.get('language'):
2326 if res:
2327 res += ' '
2328 res += '[%s] ' % fdict['language']
2329 if fdict.get('format_note') is not None:
2330 res += fdict['format_note'] + ' '
2331 if fdict.get('tbr') is not None:
2332 res += '%4dk ' % fdict['tbr']
2333 if fdict.get('container') is not None:
2334 if res:
2335 res += ', '
2336 res += '%s container' % fdict['container']
2337 if (fdict.get('vcodec') is not None
2338 and fdict.get('vcodec') != 'none'):
2339 if res:
2340 res += ', '
2341 res += fdict['vcodec']
2342 if fdict.get('vbr') is not None:
2343 res += '@'
2344 elif fdict.get('vbr') is not None and fdict.get('abr') is not None:
2345 res += 'video@'
2346 if fdict.get('vbr') is not None:
2347 res += '%4dk' % fdict['vbr']
2348 if fdict.get('fps') is not None:
2349 if res:
2350 res += ', '
2351 res += '%sfps' % fdict['fps']
2352 if fdict.get('acodec') is not None:
2353 if res:
2354 res += ', '
2355 if fdict['acodec'] == 'none':
2356 res += 'video only'
2357 else:
2358 res += '%-5s' % fdict['acodec']
2359 elif fdict.get('abr') is not None:
2360 if res:
2361 res += ', '
2362 res += 'audio'
2363 if fdict.get('abr') is not None:
2364 res += '@%3dk' % fdict['abr']
2365 if fdict.get('asr') is not None:
2366 res += ' (%5dHz)' % fdict['asr']
2367 if fdict.get('filesize') is not None:
2368 if res:
2369 res += ', '
2370 res += format_bytes(fdict['filesize'])
2371 elif fdict.get('filesize_approx') is not None:
2372 if res:
2373 res += ', '
2374 res += '~' + format_bytes(fdict['filesize_approx'])
2375 return res
2376
2377 def list_formats(self, info_dict):
2378 formats = info_dict.get('formats', [info_dict])
2379 table = [
2380 [f['format_id'], f['ext'], self.format_resolution(f), self._format_note(f)]
2381 for f in formats
2382 if f.get('preference') is None or f['preference'] >= -1000]
2383 # if len(formats) > 1:
2384 # table[-1][-1] += (' ' if table[-1][-1] else '') + '(best*)'
2385
2386 header_line = ['format code', 'extension', 'resolution', 'note']
2387 self.to_screen(
2388 '[info] Available formats for %s:\n%s' %
2389 (info_dict['id'], render_table(header_line, table)))
2390
2391 def list_thumbnails(self, info_dict):
2392 thumbnails = info_dict.get('thumbnails')
2393 if not thumbnails:
2394 self.to_screen('[info] No thumbnails present for %s' % info_dict['id'])
2395 return
2396
2397 self.to_screen(
2398 '[info] Thumbnails for %s:' % info_dict['id'])
2399 self.to_screen(render_table(
2400 ['ID', 'width', 'height', 'URL'],
2401 [[t['id'], t.get('width', 'unknown'), t.get('height', 'unknown'), t['url']] for t in thumbnails]))
2402
2403 def list_subtitles(self, video_id, subtitles, name='subtitles'):
2404 if not subtitles:
2405 self.to_screen('%s has no %s' % (video_id, name))
2406 return
2407 self.to_screen(
2408 'Available %s for %s:' % (name, video_id))
2409 self.to_screen(render_table(
2410 ['Language', 'formats'],
2411 [[lang, ', '.join(f['ext'] for f in reversed(formats))]
2412 for lang, formats in subtitles.items()]))
2413
2414 def urlopen(self, req):
2415 """ Start an HTTP download """
2416 if isinstance(req, compat_basestring):
2417 req = sanitized_Request(req)
2418 return self._opener.open(req, timeout=self._socket_timeout)
2419
2420 def print_debug_header(self):
2421 if not self.params.get('verbose'):
2422 return
2423
2424 if type('') is not compat_str:
2425 # Python 2.6 on SLES11 SP1 (https://github.com/ytdl-org/youtube-dl/issues/3326)
2426 self.report_warning(
2427 'Your Python is broken! Update to a newer and supported version')
2428
2429 stdout_encoding = getattr(
2430 sys.stdout, 'encoding', 'missing (%s)' % type(sys.stdout).__name__)
2431 encoding_str = (
2432 '[debug] Encodings: locale %s, fs %s, out %s, pref %s\n' % (
2433 locale.getpreferredencoding(),
2434 sys.getfilesystemencoding(),
2435 stdout_encoding,
2436 self.get_encoding()))
2437 write_string(encoding_str, encoding=None)
2438
2439 self._write_string('[debug] youtube-dlc version ' + __version__ + '\n')
2440 if _LAZY_LOADER:
2441 self._write_string('[debug] Lazy loading extractors enabled' + '\n')
2442 try:
2443 sp = subprocess.Popen(
2444 ['git', 'rev-parse', '--short', 'HEAD'],
2445 stdout=subprocess.PIPE, stderr=subprocess.PIPE,
2446 cwd=os.path.dirname(os.path.abspath(__file__)))
2447 out, err = sp.communicate()
2448 out = out.decode().strip()
2449 if re.match('[0-9a-f]+', out):
2450 self._write_string('[debug] Git HEAD: ' + out + '\n')
2451 except Exception:
2452 try:
2453 sys.exc_clear()
2454 except Exception:
2455 pass
2456
2457 def python_implementation():
2458 impl_name = platform.python_implementation()
2459 if impl_name == 'PyPy' and hasattr(sys, 'pypy_version_info'):
2460 return impl_name + ' version %d.%d.%d' % sys.pypy_version_info[:3]
2461 return impl_name
2462
2463 self._write_string('[debug] Python version %s (%s) - %s\n' % (
2464 platform.python_version(), python_implementation(),
2465 platform_name()))
2466
2467 exe_versions = FFmpegPostProcessor.get_versions(self)
2468 exe_versions['rtmpdump'] = rtmpdump_version()
2469 exe_versions['phantomjs'] = PhantomJSwrapper._version()
2470 exe_str = ', '.join(
2471 '%s %s' % (exe, v)
2472 for exe, v in sorted(exe_versions.items())
2473 if v
2474 )
2475 if not exe_str:
2476 exe_str = 'none'
2477 self._write_string('[debug] exe versions: %s\n' % exe_str)
2478
2479 proxy_map = {}
2480 for handler in self._opener.handlers:
2481 if hasattr(handler, 'proxies'):
2482 proxy_map.update(handler.proxies)
2483 self._write_string('[debug] Proxy map: ' + compat_str(proxy_map) + '\n')
2484
2485 if self.params.get('call_home', False):
2486 ipaddr = self.urlopen('https://yt-dl.org/ip').read().decode('utf-8')
2487 self._write_string('[debug] Public IP address: %s\n' % ipaddr)
2488 latest_version = self.urlopen(
2489 'https://yt-dl.org/latest/version').read().decode('utf-8')
2490 if version_tuple(latest_version) > version_tuple(__version__):
2491 self.report_warning(
2492 'You are using an outdated version (newest version: %s)! '
2493 'See https://yt-dl.org/update if you need help updating.' %
2494 latest_version)
2495
2496 def _setup_opener(self):
2497 timeout_val = self.params.get('socket_timeout')
2498 self._socket_timeout = 600 if timeout_val is None else float(timeout_val)
2499
2500 opts_cookiefile = self.params.get('cookiefile')
2501 opts_proxy = self.params.get('proxy')
2502
2503 if opts_cookiefile is None:
2504 self.cookiejar = compat_cookiejar.CookieJar()
2505 else:
2506 opts_cookiefile = expand_path(opts_cookiefile)
2507 self.cookiejar = YoutubeDLCookieJar(opts_cookiefile)
2508 if os.access(opts_cookiefile, os.R_OK):
2509 self.cookiejar.load(ignore_discard=True, ignore_expires=True)
2510
2511 cookie_processor = YoutubeDLCookieProcessor(self.cookiejar)
2512 if opts_proxy is not None:
2513 if opts_proxy == '':
2514 proxies = {}
2515 else:
2516 proxies = {'http': opts_proxy, 'https': opts_proxy}
2517 else:
2518 proxies = compat_urllib_request.getproxies()
2519 # Set HTTPS proxy to HTTP one if given (https://github.com/ytdl-org/youtube-dl/issues/805)
2520 if 'http' in proxies and 'https' not in proxies:
2521 proxies['https'] = proxies['http']
2522 proxy_handler = PerRequestProxyHandler(proxies)
2523
2524 debuglevel = 1 if self.params.get('debug_printtraffic') else 0
2525 https_handler = make_HTTPS_handler(self.params, debuglevel=debuglevel)
2526 ydlh = YoutubeDLHandler(self.params, debuglevel=debuglevel)
2527 redirect_handler = YoutubeDLRedirectHandler()
2528 data_handler = compat_urllib_request_DataHandler()
2529
2530 # When passing our own FileHandler instance, build_opener won't add the
2531 # default FileHandler and allows us to disable the file protocol, which
2532 # can be used for malicious purposes (see
2533 # https://github.com/ytdl-org/youtube-dl/issues/8227)
2534 file_handler = compat_urllib_request.FileHandler()
2535
2536 def file_open(*args, **kwargs):
2537 raise compat_urllib_error.URLError('file:// scheme is explicitly disabled in youtube-dlc for security reasons')
2538 file_handler.file_open = file_open
2539
2540 opener = compat_urllib_request.build_opener(
2541 proxy_handler, https_handler, cookie_processor, ydlh, redirect_handler, data_handler, file_handler)
2542
2543 # Delete the default user-agent header, which would otherwise apply in
2544 # cases where our custom HTTP handler doesn't come into play
2545 # (See https://github.com/ytdl-org/youtube-dl/issues/1309 for details)
2546 opener.addheaders = []
2547 self._opener = opener
2548
2549 def encode(self, s):
2550 if isinstance(s, bytes):
2551 return s # Already encoded
2552
2553 try:
2554 return s.encode(self.get_encoding())
2555 except UnicodeEncodeError as err:
2556 err.reason = err.reason + '. Check your system encoding configuration or use the --encoding option.'
2557 raise
2558
2559 def get_encoding(self):
2560 encoding = self.params.get('encoding')
2561 if encoding is None:
2562 encoding = preferredencoding()
2563 return encoding
2564
2565 def _write_thumbnails(self, info_dict, filename):
2566 if self.params.get('writethumbnail', False):
2567 thumbnails = info_dict.get('thumbnails')
2568 if thumbnails:
2569 thumbnails = [thumbnails[-1]]
2570 elif self.params.get('write_all_thumbnails', False):
2571 thumbnails = info_dict.get('thumbnails')
2572 else:
2573 return
2574
2575 if not thumbnails:
2576 # No thumbnails present, so return immediately
2577 return
2578
2579 for t in thumbnails:
2580 thumb_ext = determine_ext(t['url'], 'jpg')
2581 suffix = '_%s' % t['id'] if len(thumbnails) > 1 else ''
2582 thumb_display_id = '%s ' % t['id'] if len(thumbnails) > 1 else ''
2583 t['filename'] = thumb_filename = os.path.splitext(filename)[0] + suffix + '.' + thumb_ext
2584
2585 if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(thumb_filename)):
2586 self.to_screen('[%s] %s: Thumbnail %sis already present' %
2587 (info_dict['extractor'], info_dict['id'], thumb_display_id))
2588 else:
2589 self.to_screen('[%s] %s: Downloading thumbnail %s...' %
2590 (info_dict['extractor'], info_dict['id'], thumb_display_id))
2591 try:
2592 uf = self.urlopen(t['url'])
2593 with open(encodeFilename(thumb_filename), 'wb') as thumbf:
2594 shutil.copyfileobj(uf, thumbf)
2595 self.to_screen('[%s] %s: Writing thumbnail %sto: %s' %
2596 (info_dict['extractor'], info_dict['id'], thumb_display_id, thumb_filename))
2597 except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
2598 self.report_warning('Unable to download thumbnail "%s": %s' %
2599 (t['url'], error_to_compat_str(err)))