]> jfr.im git - yt-dlp.git/blob - youtube_dlc/YoutubeDL.py
#45 Allow date/time formatting in output template
[yt-dlp.git] / youtube_dlc / YoutubeDL.py
1 #!/usr/bin/env python
2 # coding: utf-8
3
4 from __future__ import absolute_import, unicode_literals
5
6 import collections
7 import contextlib
8 import copy
9 import datetime
10 import errno
11 import fileinput
12 import io
13 import itertools
14 import json
15 import locale
16 import operator
17 import os
18 import platform
19 import re
20 import shutil
21 import subprocess
22 import socket
23 import sys
24 import time
25 import tokenize
26 import traceback
27 import random
28
29 from string import ascii_letters
30
31 from .compat import (
32 compat_basestring,
33 compat_cookiejar,
34 compat_get_terminal_size,
35 compat_http_client,
36 compat_kwargs,
37 compat_numeric_types,
38 compat_os_name,
39 compat_str,
40 compat_tokenize_tokenize,
41 compat_urllib_error,
42 compat_urllib_request,
43 compat_urllib_request_DataHandler,
44 )
45 from .utils import (
46 age_restricted,
47 args_to_str,
48 ContentTooShortError,
49 date_from_str,
50 DateRange,
51 DEFAULT_OUTTMPL,
52 determine_ext,
53 determine_protocol,
54 DOT_DESKTOP_LINK_TEMPLATE,
55 DOT_URL_LINK_TEMPLATE,
56 DOT_WEBLOC_LINK_TEMPLATE,
57 DownloadError,
58 encode_compat_str,
59 encodeFilename,
60 error_to_compat_str,
61 ExistingVideoReached,
62 expand_path,
63 ExtractorError,
64 float_or_none,
65 format_bytes,
66 format_field,
67 formatSeconds,
68 GeoRestrictedError,
69 int_or_none,
70 iri_to_uri,
71 ISO3166Utils,
72 locked_file,
73 make_dir,
74 make_HTTPS_handler,
75 MaxDownloadsReached,
76 orderedSet,
77 PagedList,
78 parse_filesize,
79 PerRequestProxyHandler,
80 platform_name,
81 PostProcessingError,
82 preferredencoding,
83 prepend_extension,
84 register_socks_protocols,
85 render_table,
86 replace_extension,
87 RejectedVideoReached,
88 SameFileError,
89 sanitize_filename,
90 sanitize_path,
91 sanitize_url,
92 sanitized_Request,
93 std_headers,
94 str_or_none,
95 strftime_or_none,
96 subtitles_filename,
97 to_high_limit_path,
98 UnavailableVideoError,
99 url_basename,
100 version_tuple,
101 write_json_file,
102 write_string,
103 YoutubeDLCookieJar,
104 YoutubeDLCookieProcessor,
105 YoutubeDLHandler,
106 YoutubeDLRedirectHandler,
107 process_communicate_or_kill,
108 )
109 from .cache import Cache
110 from .extractor import get_info_extractor, gen_extractor_classes, _LAZY_LOADER, _PLUGIN_CLASSES
111 from .extractor.openload import PhantomJSwrapper
112 from .downloader import get_suitable_downloader
113 from .downloader.rtmp import rtmpdump_version
114 from .postprocessor import (
115 FFmpegFixupM3u8PP,
116 FFmpegFixupM4aPP,
117 FFmpegFixupStretchedPP,
118 FFmpegMergerPP,
119 FFmpegPostProcessor,
120 # FFmpegSubtitlesConvertorPP,
121 get_postprocessor,
122 MoveFilesAfterDownloadPP,
123 )
124 from .version import __version__
125
126 if compat_os_name == 'nt':
127 import ctypes
128
129
130 class YoutubeDL(object):
131 """YoutubeDL class.
132
133 YoutubeDL objects are the ones responsible of downloading the
134 actual video file and writing it to disk if the user has requested
135 it, among some other tasks. In most cases there should be one per
136 program. As, given a video URL, the downloader doesn't know how to
137 extract all the needed information, task that InfoExtractors do, it
138 has to pass the URL to one of them.
139
140 For this, YoutubeDL objects have a method that allows
141 InfoExtractors to be registered in a given order. When it is passed
142 a URL, the YoutubeDL object handles it to the first InfoExtractor it
143 finds that reports being able to handle it. The InfoExtractor extracts
144 all the information about the video or videos the URL refers to, and
145 YoutubeDL process the extracted information, possibly using a File
146 Downloader to download the video.
147
148 YoutubeDL objects accept a lot of parameters. In order not to saturate
149 the object constructor with arguments, it receives a dictionary of
150 options instead. These options are available through the params
151 attribute for the InfoExtractors to use. The YoutubeDL also
152 registers itself as the downloader in charge for the InfoExtractors
153 that are added to it, so this is a "mutual registration".
154
155 Available options:
156
157 username: Username for authentication purposes.
158 password: Password for authentication purposes.
159 videopassword: Password for accessing a video.
160 ap_mso: Adobe Pass multiple-system operator identifier.
161 ap_username: Multiple-system operator account username.
162 ap_password: Multiple-system operator account password.
163 usenetrc: Use netrc for authentication instead.
164 verbose: Print additional info to stdout.
165 quiet: Do not print messages to stdout.
166 no_warnings: Do not print out anything for warnings.
167 forceurl: Force printing final URL.
168 forcetitle: Force printing title.
169 forceid: Force printing ID.
170 forcethumbnail: Force printing thumbnail URL.
171 forcedescription: Force printing description.
172 forcefilename: Force printing final filename.
173 forceduration: Force printing duration.
174 forcejson: Force printing info_dict as JSON.
175 dump_single_json: Force printing the info_dict of the whole playlist
176 (or video) as a single JSON line.
177 force_write_download_archive: Force writing download archive regardless of
178 'skip_download' or 'simulate'.
179 simulate: Do not download the video files.
180 format: Video format code. see "FORMAT SELECTION" for more details.
181 format_sort: How to sort the video formats. see "Sorting Formats" for more details.
182 format_sort_force: Force the given format_sort. see "Sorting Formats" for more details.
183 allow_multiple_video_streams: Allow multiple video streams to be merged into a single file
184 allow_multiple_audio_streams: Allow multiple audio streams to be merged into a single file
185 outtmpl: Template for output names.
186 outtmpl_na_placeholder: Placeholder for unavailable meta fields.
187 restrictfilenames: Do not allow "&" and spaces in file names
188 trim_file_name: Limit length of filename (extension excluded)
189 ignoreerrors: Do not stop on download errors
190 (Default True when running youtube-dlc,
191 but False when directly accessing YoutubeDL class)
192 force_generic_extractor: Force downloader to use the generic extractor
193 overwrites: Overwrite all video and metadata files if True,
194 overwrite only non-video files if None
195 and don't overwrite any file if False
196 playliststart: Playlist item to start at.
197 playlistend: Playlist item to end at.
198 playlist_items: Specific indices of playlist to download.
199 playlistreverse: Download playlist items in reverse order.
200 playlistrandom: Download playlist items in random order.
201 matchtitle: Download only matching titles.
202 rejecttitle: Reject downloads for matching titles.
203 logger: Log messages to a logging.Logger instance.
204 logtostderr: Log messages to stderr instead of stdout.
205 writedescription: Write the video description to a .description file
206 writeinfojson: Write the video description to a .info.json file
207 writecomments: Extract video comments. This will not be written to disk
208 unless writeinfojson is also given
209 writeannotations: Write the video annotations to a .annotations.xml file
210 writethumbnail: Write the thumbnail image to a file
211 allow_playlist_files: Also write playlists' description, infojson etc in a seperate file
212 write_all_thumbnails: Write all thumbnail formats to files
213 writelink: Write an internet shortcut file, depending on the
214 current platform (.url/.webloc/.desktop)
215 writeurllink: Write a Windows internet shortcut file (.url)
216 writewebloclink: Write a macOS internet shortcut file (.webloc)
217 writedesktoplink: Write a Linux internet shortcut file (.desktop)
218 writesubtitles: Write the video subtitles to a file
219 writeautomaticsub: Write the automatically generated subtitles to a file
220 allsubtitles: Downloads all the subtitles of the video
221 (requires writesubtitles or writeautomaticsub)
222 listsubtitles: Lists all available subtitles for the video
223 subtitlesformat: The format code for subtitles
224 subtitleslangs: List of languages of the subtitles to download
225 keepvideo: Keep the video file after post-processing
226 daterange: A DateRange object, download only if the upload_date is in the range.
227 skip_download: Skip the actual download of the video file
228 cachedir: Location of the cache files in the filesystem.
229 False to disable filesystem cache.
230 noplaylist: Download single video instead of a playlist if in doubt.
231 age_limit: An integer representing the user's age in years.
232 Unsuitable videos for the given age are skipped.
233 min_views: An integer representing the minimum view count the video
234 must have in order to not be skipped.
235 Videos without view count information are always
236 downloaded. None for no limit.
237 max_views: An integer representing the maximum view count.
238 Videos that are more popular than that are not
239 downloaded.
240 Videos without view count information are always
241 downloaded. None for no limit.
242 download_archive: File name of a file where all downloads are recorded.
243 Videos already present in the file are not downloaded
244 again.
245 break_on_existing: Stop the download process after attempting to download a
246 file that is in the archive.
247 break_on_reject: Stop the download process when encountering a video that
248 has been filtered out.
249 cookiefile: File name where cookies should be read from and dumped to
250 nocheckcertificate:Do not verify SSL certificates
251 prefer_insecure: Use HTTP instead of HTTPS to retrieve information.
252 At the moment, this is only supported by YouTube.
253 proxy: URL of the proxy server to use
254 geo_verification_proxy: URL of the proxy to use for IP address verification
255 on geo-restricted sites.
256 socket_timeout: Time to wait for unresponsive hosts, in seconds
257 bidi_workaround: Work around buggy terminals without bidirectional text
258 support, using fridibi
259 debug_printtraffic:Print out sent and received HTTP traffic
260 include_ads: Download ads as well
261 default_search: Prepend this string if an input url is not valid.
262 'auto' for elaborate guessing
263 encoding: Use this encoding instead of the system-specified.
264 extract_flat: Do not resolve URLs, return the immediate result.
265 Pass in 'in_playlist' to only show this behavior for
266 playlist items.
267 postprocessors: A list of dictionaries, each with an entry
268 * key: The name of the postprocessor. See
269 youtube_dlc/postprocessor/__init__.py for a list.
270 * _after_move: Optional. If True, run this post_processor
271 after 'MoveFilesAfterDownload'
272 as well as any further keyword arguments for the
273 postprocessor.
274 post_hooks: A list of functions that get called as the final step
275 for each video file, after all postprocessors have been
276 called. The filename will be passed as the only argument.
277 progress_hooks: A list of functions that get called on download
278 progress, with a dictionary with the entries
279 * status: One of "downloading", "error", or "finished".
280 Check this first and ignore unknown values.
281
282 If status is one of "downloading", or "finished", the
283 following properties may also be present:
284 * filename: The final filename (always present)
285 * tmpfilename: The filename we're currently writing to
286 * downloaded_bytes: Bytes on disk
287 * total_bytes: Size of the whole file, None if unknown
288 * total_bytes_estimate: Guess of the eventual file size,
289 None if unavailable.
290 * elapsed: The number of seconds since download started.
291 * eta: The estimated time in seconds, None if unknown
292 * speed: The download speed in bytes/second, None if
293 unknown
294 * fragment_index: The counter of the currently
295 downloaded video fragment.
296 * fragment_count: The number of fragments (= individual
297 files that will be merged)
298
299 Progress hooks are guaranteed to be called at least once
300 (with status "finished") if the download is successful.
301 merge_output_format: Extension to use when merging formats.
302 final_ext: Expected final extension; used to detect when the file was
303 already downloaded and converted. "merge_output_format" is
304 replaced by this extension when given
305 fixup: Automatically correct known faults of the file.
306 One of:
307 - "never": do nothing
308 - "warn": only emit a warning
309 - "detect_or_warn": check whether we can do anything
310 about it, warn otherwise (default)
311 source_address: Client-side IP address to bind to.
312 call_home: Boolean, true iff we are allowed to contact the
313 youtube-dlc servers for debugging.
314 sleep_interval: Number of seconds to sleep before each download when
315 used alone or a lower bound of a range for randomized
316 sleep before each download (minimum possible number
317 of seconds to sleep) when used along with
318 max_sleep_interval.
319 max_sleep_interval:Upper bound of a range for randomized sleep before each
320 download (maximum possible number of seconds to sleep).
321 Must only be used along with sleep_interval.
322 Actual sleep time will be a random float from range
323 [sleep_interval; max_sleep_interval].
324 listformats: Print an overview of available video formats and exit.
325 list_thumbnails: Print a table of all thumbnails and exit.
326 match_filter: A function that gets called with the info_dict of
327 every video.
328 If it returns a message, the video is ignored.
329 If it returns None, the video is downloaded.
330 match_filter_func in utils.py is one example for this.
331 no_color: Do not emit color codes in output.
332 geo_bypass: Bypass geographic restriction via faking X-Forwarded-For
333 HTTP header
334 geo_bypass_country:
335 Two-letter ISO 3166-2 country code that will be used for
336 explicit geographic restriction bypassing via faking
337 X-Forwarded-For HTTP header
338 geo_bypass_ip_block:
339 IP range in CIDR notation that will be used similarly to
340 geo_bypass_country
341
342 The following options determine which downloader is picked:
343 external_downloader: Executable of the external downloader to call.
344 None or unset for standard (built-in) downloader.
345 hls_prefer_native: Use the native HLS downloader instead of ffmpeg/avconv
346 if True, otherwise use ffmpeg/avconv if False, otherwise
347 use downloader suggested by extractor if None.
348
349 The following parameters are not used by YoutubeDL itself, they are used by
350 the downloader (see youtube_dlc/downloader/common.py):
351 nopart, updatetime, buffersize, ratelimit, min_filesize, max_filesize, test,
352 noresizebuffer, retries, continuedl, noprogress, consoletitle,
353 xattr_set_filesize, external_downloader_args, hls_use_mpegts,
354 http_chunk_size.
355
356 The following options are used by the post processors:
357 prefer_ffmpeg: If False, use avconv instead of ffmpeg if both are available,
358 otherwise prefer ffmpeg. (avconv support is deprecated)
359 ffmpeg_location: Location of the ffmpeg/avconv binary; either the path
360 to the binary or its containing directory.
361 postprocessor_args: A dictionary of postprocessor/executable keys (in lower case)
362 and a list of additional command-line arguments for the
363 postprocessor/executable. The dict can also have "PP+EXE" keys
364 which are used when the given exe is used by the given PP.
365 Use 'default' as the name for arguments to passed to all PP
366 The following options are used by the Youtube extractor:
367 youtube_include_dash_manifest: If True (default), DASH manifests and related
368 data will be downloaded and processed by extractor.
369 You can reduce network I/O by disabling it if you don't
370 care about DASH.
371 """
372
373 _NUMERIC_FIELDS = set((
374 'width', 'height', 'tbr', 'abr', 'asr', 'vbr', 'fps', 'filesize', 'filesize_approx',
375 'timestamp', 'upload_year', 'upload_month', 'upload_day',
376 'duration', 'view_count', 'like_count', 'dislike_count', 'repost_count',
377 'average_rating', 'comment_count', 'age_limit',
378 'start_time', 'end_time',
379 'chapter_number', 'season_number', 'episode_number',
380 'track_number', 'disc_number', 'release_year',
381 'playlist_index',
382 ))
383
384 params = None
385 _ies = []
386 _pps = {'beforedl': [], 'aftermove': [], 'normal': []}
387 __prepare_filename_warned = False
388 _download_retcode = None
389 _num_downloads = None
390 _playlist_level = 0
391 _playlist_urls = set()
392 _screen_file = None
393
394 def __init__(self, params=None, auto_init=True):
395 """Create a FileDownloader object with the given options."""
396 if params is None:
397 params = {}
398 self._ies = []
399 self._ies_instances = {}
400 self._pps = {'beforedl': [], 'aftermove': [], 'normal': []}
401 self.__prepare_filename_warned = False
402 self._post_hooks = []
403 self._progress_hooks = []
404 self._download_retcode = 0
405 self._num_downloads = 0
406 self._screen_file = [sys.stdout, sys.stderr][params.get('logtostderr', False)]
407 self._err_file = sys.stderr
408 self.params = {
409 # Default parameters
410 'nocheckcertificate': False,
411 }
412 self.params.update(params)
413 self.cache = Cache(self)
414 self.archive = set()
415
416 """Preload the archive, if any is specified"""
417 def preload_download_archive(self):
418 fn = self.params.get('download_archive')
419 if fn is None:
420 return False
421 try:
422 with locked_file(fn, 'r', encoding='utf-8') as archive_file:
423 for line in archive_file:
424 self.archive.add(line.strip())
425 except IOError as ioe:
426 if ioe.errno != errno.ENOENT:
427 raise
428 return False
429 return True
430
431 def check_deprecated(param, option, suggestion):
432 if self.params.get(param) is not None:
433 self.report_warning(
434 '%s is deprecated. Use %s instead.' % (option, suggestion))
435 return True
436 return False
437
438 if self.params.get('verbose'):
439 self.to_stdout('[debug] Loading archive file %r' % self.params.get('download_archive'))
440
441 preload_download_archive(self)
442
443 if check_deprecated('cn_verification_proxy', '--cn-verification-proxy', '--geo-verification-proxy'):
444 if self.params.get('geo_verification_proxy') is None:
445 self.params['geo_verification_proxy'] = self.params['cn_verification_proxy']
446
447 if self.params.get('final_ext'):
448 if self.params.get('merge_output_format'):
449 self.report_warning('--merge-output-format will be ignored since --remux-video or --recode-video is given')
450 self.params['merge_output_format'] = self.params['final_ext']
451
452 if 'overwrites' in self.params and self.params['overwrites'] is None:
453 del self.params['overwrites']
454
455 check_deprecated('autonumber_size', '--autonumber-size', 'output template with %(autonumber)0Nd, where N in the number of digits')
456 check_deprecated('autonumber', '--auto-number', '-o "%(autonumber)s-%(title)s.%(ext)s"')
457 check_deprecated('usetitle', '--title', '-o "%(title)s-%(id)s.%(ext)s"')
458
459 if params.get('bidi_workaround', False):
460 try:
461 import pty
462 master, slave = pty.openpty()
463 width = compat_get_terminal_size().columns
464 if width is None:
465 width_args = []
466 else:
467 width_args = ['-w', str(width)]
468 sp_kwargs = dict(
469 stdin=subprocess.PIPE,
470 stdout=slave,
471 stderr=self._err_file)
472 try:
473 self._output_process = subprocess.Popen(
474 ['bidiv'] + width_args, **sp_kwargs
475 )
476 except OSError:
477 self._output_process = subprocess.Popen(
478 ['fribidi', '-c', 'UTF-8'] + width_args, **sp_kwargs)
479 self._output_channel = os.fdopen(master, 'rb')
480 except OSError as ose:
481 if ose.errno == errno.ENOENT:
482 self.report_warning('Could not find fribidi executable, ignoring --bidi-workaround . Make sure that fribidi is an executable file in one of the directories in your $PATH.')
483 else:
484 raise
485
486 if (sys.platform != 'win32'
487 and sys.getfilesystemencoding() in ['ascii', 'ANSI_X3.4-1968']
488 and not params.get('restrictfilenames', False)):
489 # Unicode filesystem API will throw errors (#1474, #13027)
490 self.report_warning(
491 'Assuming --restrict-filenames since file system encoding '
492 'cannot encode all characters. '
493 'Set the LC_ALL environment variable to fix this.')
494 self.params['restrictfilenames'] = True
495
496 if isinstance(params.get('outtmpl'), bytes):
497 self.report_warning(
498 'Parameter outtmpl is bytes, but should be a unicode string. '
499 'Put from __future__ import unicode_literals at the top of your code file or consider switching to Python 3.x.')
500
501 self._setup_opener()
502
503 if auto_init:
504 self.print_debug_header()
505 self.add_default_info_extractors()
506
507 for pp_def_raw in self.params.get('postprocessors', []):
508 pp_class = get_postprocessor(pp_def_raw['key'])
509 pp_def = dict(pp_def_raw)
510 del pp_def['key']
511 if 'when' in pp_def:
512 when = pp_def['when']
513 del pp_def['when']
514 else:
515 when = 'normal'
516 pp = pp_class(self, **compat_kwargs(pp_def))
517 self.add_post_processor(pp, when=when)
518
519 for ph in self.params.get('post_hooks', []):
520 self.add_post_hook(ph)
521
522 for ph in self.params.get('progress_hooks', []):
523 self.add_progress_hook(ph)
524
525 register_socks_protocols()
526
527 def warn_if_short_id(self, argv):
528 # short YouTube ID starting with dash?
529 idxs = [
530 i for i, a in enumerate(argv)
531 if re.match(r'^-[0-9A-Za-z_-]{10}$', a)]
532 if idxs:
533 correct_argv = (
534 ['youtube-dlc']
535 + [a for i, a in enumerate(argv) if i not in idxs]
536 + ['--'] + [argv[i] for i in idxs]
537 )
538 self.report_warning(
539 'Long argument string detected. '
540 'Use -- to separate parameters and URLs, like this:\n%s\n' %
541 args_to_str(correct_argv))
542
543 def add_info_extractor(self, ie):
544 """Add an InfoExtractor object to the end of the list."""
545 self._ies.append(ie)
546 if not isinstance(ie, type):
547 self._ies_instances[ie.ie_key()] = ie
548 ie.set_downloader(self)
549
550 def get_info_extractor(self, ie_key):
551 """
552 Get an instance of an IE with name ie_key, it will try to get one from
553 the _ies list, if there's no instance it will create a new one and add
554 it to the extractor list.
555 """
556 ie = self._ies_instances.get(ie_key)
557 if ie is None:
558 ie = get_info_extractor(ie_key)()
559 self.add_info_extractor(ie)
560 return ie
561
562 def add_default_info_extractors(self):
563 """
564 Add the InfoExtractors returned by gen_extractors to the end of the list
565 """
566 for ie in gen_extractor_classes():
567 self.add_info_extractor(ie)
568
569 def add_post_processor(self, pp, when='normal'):
570 """Add a PostProcessor object to the end of the chain."""
571 self._pps[when].append(pp)
572 pp.set_downloader(self)
573
574 def add_post_hook(self, ph):
575 """Add the post hook"""
576 self._post_hooks.append(ph)
577
578 def add_progress_hook(self, ph):
579 """Add the progress hook (currently only for the file downloader)"""
580 self._progress_hooks.append(ph)
581
582 def _bidi_workaround(self, message):
583 if not hasattr(self, '_output_channel'):
584 return message
585
586 assert hasattr(self, '_output_process')
587 assert isinstance(message, compat_str)
588 line_count = message.count('\n') + 1
589 self._output_process.stdin.write((message + '\n').encode('utf-8'))
590 self._output_process.stdin.flush()
591 res = ''.join(self._output_channel.readline().decode('utf-8')
592 for _ in range(line_count))
593 return res[:-len('\n')]
594
595 def to_screen(self, message, skip_eol=False):
596 """Print message to stdout if not in quiet mode."""
597 return self.to_stdout(message, skip_eol, check_quiet=True)
598
599 def _write_string(self, s, out=None):
600 write_string(s, out=out, encoding=self.params.get('encoding'))
601
602 def to_stdout(self, message, skip_eol=False, check_quiet=False):
603 """Print message to stdout if not in quiet mode."""
604 if self.params.get('logger'):
605 self.params['logger'].debug(message)
606 elif not check_quiet or not self.params.get('quiet', False):
607 message = self._bidi_workaround(message)
608 terminator = ['\n', ''][skip_eol]
609 output = message + terminator
610
611 self._write_string(output, self._screen_file)
612
613 def to_stderr(self, message):
614 """Print message to stderr."""
615 assert isinstance(message, compat_str)
616 if self.params.get('logger'):
617 self.params['logger'].error(message)
618 else:
619 message = self._bidi_workaround(message)
620 output = message + '\n'
621 self._write_string(output, self._err_file)
622
623 def to_console_title(self, message):
624 if not self.params.get('consoletitle', False):
625 return
626 if compat_os_name == 'nt':
627 if ctypes.windll.kernel32.GetConsoleWindow():
628 # c_wchar_p() might not be necessary if `message` is
629 # already of type unicode()
630 ctypes.windll.kernel32.SetConsoleTitleW(ctypes.c_wchar_p(message))
631 elif 'TERM' in os.environ:
632 self._write_string('\033]0;%s\007' % message, self._screen_file)
633
634 def save_console_title(self):
635 if not self.params.get('consoletitle', False):
636 return
637 if self.params.get('simulate', False):
638 return
639 if compat_os_name != 'nt' and 'TERM' in os.environ:
640 # Save the title on stack
641 self._write_string('\033[22;0t', self._screen_file)
642
643 def restore_console_title(self):
644 if not self.params.get('consoletitle', False):
645 return
646 if self.params.get('simulate', False):
647 return
648 if compat_os_name != 'nt' and 'TERM' in os.environ:
649 # Restore the title from stack
650 self._write_string('\033[23;0t', self._screen_file)
651
652 def __enter__(self):
653 self.save_console_title()
654 return self
655
656 def __exit__(self, *args):
657 self.restore_console_title()
658
659 if self.params.get('cookiefile') is not None:
660 self.cookiejar.save(ignore_discard=True, ignore_expires=True)
661
662 def trouble(self, message=None, tb=None):
663 """Determine action to take when a download problem appears.
664
665 Depending on if the downloader has been configured to ignore
666 download errors or not, this method may throw an exception or
667 not when errors are found, after printing the message.
668
669 tb, if given, is additional traceback information.
670 """
671 if message is not None:
672 self.to_stderr(message)
673 if self.params.get('verbose'):
674 if tb is None:
675 if sys.exc_info()[0]: # if .trouble has been called from an except block
676 tb = ''
677 if hasattr(sys.exc_info()[1], 'exc_info') and sys.exc_info()[1].exc_info[0]:
678 tb += ''.join(traceback.format_exception(*sys.exc_info()[1].exc_info))
679 tb += encode_compat_str(traceback.format_exc())
680 else:
681 tb_data = traceback.format_list(traceback.extract_stack())
682 tb = ''.join(tb_data)
683 self.to_stderr(tb)
684 if not self.params.get('ignoreerrors', False):
685 if sys.exc_info()[0] and hasattr(sys.exc_info()[1], 'exc_info') and sys.exc_info()[1].exc_info[0]:
686 exc_info = sys.exc_info()[1].exc_info
687 else:
688 exc_info = sys.exc_info()
689 raise DownloadError(message, exc_info)
690 self._download_retcode = 1
691
692 def report_warning(self, message):
693 '''
694 Print the message to stderr, it will be prefixed with 'WARNING:'
695 If stderr is a tty file the 'WARNING:' will be colored
696 '''
697 if self.params.get('logger') is not None:
698 self.params['logger'].warning(message)
699 else:
700 if self.params.get('no_warnings'):
701 return
702 if not self.params.get('no_color') and self._err_file.isatty() and compat_os_name != 'nt':
703 _msg_header = '\033[0;33mWARNING:\033[0m'
704 else:
705 _msg_header = 'WARNING:'
706 warning_message = '%s %s' % (_msg_header, message)
707 self.to_stderr(warning_message)
708
709 def report_error(self, message, tb=None):
710 '''
711 Do the same as trouble, but prefixes the message with 'ERROR:', colored
712 in red if stderr is a tty file.
713 '''
714 if not self.params.get('no_color') and self._err_file.isatty() and compat_os_name != 'nt':
715 _msg_header = '\033[0;31mERROR:\033[0m'
716 else:
717 _msg_header = 'ERROR:'
718 error_message = '%s %s' % (_msg_header, message)
719 self.trouble(error_message, tb)
720
721 def report_file_already_downloaded(self, file_name):
722 """Report file has already been fully downloaded."""
723 try:
724 self.to_screen('[download] %s has already been downloaded' % file_name)
725 except UnicodeEncodeError:
726 self.to_screen('[download] The file has already been downloaded')
727
728 def report_file_delete(self, file_name):
729 """Report that existing file will be deleted."""
730 try:
731 self.to_screen('Deleting already existent file %s' % file_name)
732 except UnicodeEncodeError:
733 self.to_screen('Deleting already existent file')
734
735 def prepare_filename(self, info_dict, warn=False):
736 """Generate the output filename."""
737 try:
738 template_dict = dict(info_dict)
739
740 template_dict['duration_string'] = ( # %(duration>%H-%M-%S)s is wrong if duration > 24hrs
741 formatSeconds(info_dict['duration'], '-')
742 if info_dict.get('duration', None) is not None
743 else None)
744
745 template_dict['epoch'] = int(time.time())
746 autonumber_size = self.params.get('autonumber_size')
747 if autonumber_size is None:
748 autonumber_size = 5
749 template_dict['autonumber'] = self.params.get('autonumber_start', 1) - 1 + self._num_downloads
750 if template_dict.get('resolution') is None:
751 if template_dict.get('width') and template_dict.get('height'):
752 template_dict['resolution'] = '%dx%d' % (template_dict['width'], template_dict['height'])
753 elif template_dict.get('height'):
754 template_dict['resolution'] = '%sp' % template_dict['height']
755 elif template_dict.get('width'):
756 template_dict['resolution'] = '%dx?' % template_dict['width']
757
758 sanitize = lambda k, v: sanitize_filename(
759 compat_str(v),
760 restricted=self.params.get('restrictfilenames'),
761 is_id=(k == 'id' or k.endswith('_id')))
762 template_dict = dict((k, v if isinstance(v, compat_numeric_types) else sanitize(k, v))
763 for k, v in template_dict.items()
764 if v is not None and not isinstance(v, (list, tuple, dict)))
765 na = self.params.get('outtmpl_na_placeholder', 'NA')
766 template_dict = collections.defaultdict(lambda: na, template_dict)
767
768 outtmpl = self.params.get('outtmpl', DEFAULT_OUTTMPL)
769
770 # For fields playlist_index and autonumber convert all occurrences
771 # of %(field)s to %(field)0Nd for backward compatibility
772 field_size_compat_map = {
773 'playlist_index': len(str(template_dict['n_entries'])),
774 'autonumber': autonumber_size,
775 }
776 FIELD_SIZE_COMPAT_RE = r'(?<!%)%\((?P<field>autonumber|playlist_index)\)s'
777 mobj = re.search(FIELD_SIZE_COMPAT_RE, outtmpl)
778 if mobj:
779 outtmpl = re.sub(
780 FIELD_SIZE_COMPAT_RE,
781 r'%%(\1)0%dd' % field_size_compat_map[mobj.group('field')],
782 outtmpl)
783
784 # As of [1] format syntax is:
785 # %[mapping_key][conversion_flags][minimum_width][.precision][length_modifier]type
786 # 1. https://docs.python.org/2/library/stdtypes.html#string-formatting
787 FORMAT_RE = r'''(?x)
788 (?<!%)
789 %
790 \({0}\) # mapping key
791 (?:[#0\-+ ]+)? # conversion flags (optional)
792 (?:\d+)? # minimum field width (optional)
793 (?:\.\d+)? # precision (optional)
794 [hlL]? # length modifier (optional)
795 (?P<type>[diouxXeEfFgGcrs%]) # conversion type
796 '''
797
798 numeric_fields = list(self._NUMERIC_FIELDS)
799
800 # Format date
801 FORMAT_DATE_RE = FORMAT_RE.format(r'(?P<key>(?P<field>\w+)>(?P<format>.+?))')
802 for mobj in re.finditer(FORMAT_DATE_RE, outtmpl):
803 conv_type, field, frmt, key = mobj.group('type', 'field', 'format', 'key')
804 if key in template_dict:
805 continue
806 value = strftime_or_none(template_dict.get(field), frmt, na)
807 if conv_type in 'crs': # string
808 value = sanitize(field, value)
809 else: # number
810 numeric_fields.append(key)
811 value = float_or_none(value, default=None)
812 if value is not None:
813 template_dict[key] = value
814
815 # Missing numeric fields used together with integer presentation types
816 # in format specification will break the argument substitution since
817 # string NA placeholder is returned for missing fields. We will patch
818 # output template for missing fields to meet string presentation type.
819 for numeric_field in numeric_fields:
820 if numeric_field not in template_dict:
821 outtmpl = re.sub(
822 FORMAT_RE.format(re.escape(numeric_field)),
823 r'%({0})s'.format(numeric_field), outtmpl)
824
825 # expand_path translates '%%' into '%' and '$$' into '$'
826 # correspondingly that is not what we want since we need to keep
827 # '%%' intact for template dict substitution step. Working around
828 # with boundary-alike separator hack.
829 sep = ''.join([random.choice(ascii_letters) for _ in range(32)])
830 outtmpl = outtmpl.replace('%%', '%{0}%'.format(sep)).replace('$$', '${0}$'.format(sep))
831
832 # outtmpl should be expand_path'ed before template dict substitution
833 # because meta fields may contain env variables we don't want to
834 # be expanded. For example, for outtmpl "%(title)s.%(ext)s" and
835 # title "Hello $PATH", we don't want `$PATH` to be expanded.
836 filename = expand_path(outtmpl).replace(sep, '') % template_dict
837
838 # https://github.com/blackjack4494/youtube-dlc/issues/85
839 trim_file_name = self.params.get('trim_file_name', False)
840 if trim_file_name:
841 fn_groups = filename.rsplit('.')
842 ext = fn_groups[-1]
843 sub_ext = ''
844 if len(fn_groups) > 2:
845 sub_ext = fn_groups[-2]
846 filename = '.'.join(filter(None, [fn_groups[0][:trim_file_name], sub_ext, ext]))
847
848 # Temporary fix for #4787
849 # 'Treat' all problem characters by passing filename through preferredencoding
850 # to workaround encoding issues with subprocess on python2 @ Windows
851 if sys.version_info < (3, 0) and sys.platform == 'win32':
852 filename = encodeFilename(filename, True).decode(preferredencoding())
853 filename = sanitize_path(filename)
854
855 if warn and not self.__prepare_filename_warned:
856 if not self.params.get('paths'):
857 pass
858 elif filename == '-':
859 self.report_warning('--paths is ignored when an outputting to stdout')
860 elif os.path.isabs(filename):
861 self.report_warning('--paths is ignored since an absolute path is given in output template')
862 self.__prepare_filename_warned = True
863
864 return filename
865 except ValueError as err:
866 self.report_error('Error in output template: ' + str(err) + ' (encoding: ' + repr(preferredencoding()) + ')')
867 return None
868
869 def prepare_filepath(self, filename, dir_type=''):
870 if filename == '-':
871 return filename
872 paths = self.params.get('paths', {})
873 assert isinstance(paths, dict)
874 homepath = expand_path(paths.get('home', '').strip())
875 assert isinstance(homepath, compat_str)
876 subdir = expand_path(paths.get(dir_type, '').strip()) if dir_type else ''
877 assert isinstance(subdir, compat_str)
878 return sanitize_path(os.path.join(homepath, subdir, filename))
879
880 def _match_entry(self, info_dict, incomplete):
881 """ Returns None if the file should be downloaded """
882
883 def check_filter():
884 video_title = info_dict.get('title', info_dict.get('id', 'video'))
885 if 'title' in info_dict:
886 # This can happen when we're just evaluating the playlist
887 title = info_dict['title']
888 matchtitle = self.params.get('matchtitle', False)
889 if matchtitle:
890 if not re.search(matchtitle, title, re.IGNORECASE):
891 return '"' + title + '" title did not match pattern "' + matchtitle + '"'
892 rejecttitle = self.params.get('rejecttitle', False)
893 if rejecttitle:
894 if re.search(rejecttitle, title, re.IGNORECASE):
895 return '"' + title + '" title matched reject pattern "' + rejecttitle + '"'
896 date = info_dict.get('upload_date')
897 if date is not None:
898 dateRange = self.params.get('daterange', DateRange())
899 if date not in dateRange:
900 return '%s upload date is not in range %s' % (date_from_str(date).isoformat(), dateRange)
901 view_count = info_dict.get('view_count')
902 if view_count is not None:
903 min_views = self.params.get('min_views')
904 if min_views is not None and view_count < min_views:
905 return 'Skipping %s, because it has not reached minimum view count (%d/%d)' % (video_title, view_count, min_views)
906 max_views = self.params.get('max_views')
907 if max_views is not None and view_count > max_views:
908 return 'Skipping %s, because it has exceeded the maximum view count (%d/%d)' % (video_title, view_count, max_views)
909 if age_restricted(info_dict.get('age_limit'), self.params.get('age_limit')):
910 return 'Skipping "%s" because it is age restricted' % video_title
911 if self.in_download_archive(info_dict):
912 return '%s has already been recorded in archive' % video_title
913
914 if not incomplete:
915 match_filter = self.params.get('match_filter')
916 if match_filter is not None:
917 ret = match_filter(info_dict)
918 if ret is not None:
919 return ret
920 return None
921
922 reason = check_filter()
923 if reason is not None:
924 self.to_screen('[download] ' + reason)
925 if reason.endswith('has already been recorded in the archive') and self.params.get('break_on_existing', False):
926 raise ExistingVideoReached()
927 elif self.params.get('break_on_reject', False):
928 raise RejectedVideoReached()
929 return reason
930
931 @staticmethod
932 def add_extra_info(info_dict, extra_info):
933 '''Set the keys from extra_info in info dict if they are missing'''
934 for key, value in extra_info.items():
935 info_dict.setdefault(key, value)
936
937 def extract_info(self, url, download=True, ie_key=None, info_dict=None, extra_info={},
938 process=True, force_generic_extractor=False):
939 '''
940 Returns a list with a dictionary for each video we find.
941 If 'download', also downloads the videos.
942 extra_info is a dict containing the extra values to add to each result
943 '''
944
945 if not ie_key and force_generic_extractor:
946 ie_key = 'Generic'
947
948 if ie_key:
949 ies = [self.get_info_extractor(ie_key)]
950 else:
951 ies = self._ies
952
953 for ie in ies:
954 if not ie.suitable(url):
955 continue
956
957 ie_key = ie.ie_key()
958 ie = self.get_info_extractor(ie_key)
959 if not ie.working():
960 self.report_warning('The program functionality for this site has been marked as broken, '
961 'and will probably not work.')
962
963 try:
964 temp_id = str_or_none(
965 ie.extract_id(url) if callable(getattr(ie, 'extract_id', None))
966 else ie._match_id(url))
967 except (AssertionError, IndexError, AttributeError):
968 temp_id = None
969 if temp_id is not None and self.in_download_archive({'id': temp_id, 'ie_key': ie_key}):
970 self.to_screen("[%s] %s: has already been recorded in archive" % (
971 ie_key, temp_id))
972 break
973 return self.__extract_info(url, ie, download, extra_info, process, info_dict)
974 else:
975 self.report_error('no suitable InfoExtractor for URL %s' % url)
976
977 def __handle_extraction_exceptions(func):
978 def wrapper(self, *args, **kwargs):
979 try:
980 return func(self, *args, **kwargs)
981 except GeoRestrictedError as e:
982 msg = e.msg
983 if e.countries:
984 msg += '\nThis video is available in %s.' % ', '.join(
985 map(ISO3166Utils.short2full, e.countries))
986 msg += '\nYou might want to use a VPN or a proxy server (with --proxy) to workaround.'
987 self.report_error(msg)
988 except ExtractorError as e: # An error we somewhat expected
989 self.report_error(compat_str(e), e.format_traceback())
990 except (MaxDownloadsReached, ExistingVideoReached, RejectedVideoReached):
991 raise
992 except Exception as e:
993 if self.params.get('ignoreerrors', False):
994 self.report_error(error_to_compat_str(e), tb=encode_compat_str(traceback.format_exc()))
995 else:
996 raise
997 return wrapper
998
999 @__handle_extraction_exceptions
1000 def __extract_info(self, url, ie, download, extra_info, process, info_dict):
1001 ie_result = ie.extract(url)
1002 if ie_result is None: # Finished already (backwards compatibility; listformats and friends should be moved here)
1003 return
1004 if isinstance(ie_result, list):
1005 # Backwards compatibility: old IE result format
1006 ie_result = {
1007 '_type': 'compat_list',
1008 'entries': ie_result,
1009 }
1010 if info_dict:
1011 if info_dict.get('id'):
1012 ie_result['id'] = info_dict['id']
1013 if info_dict.get('title'):
1014 ie_result['title'] = info_dict['title']
1015 self.add_default_extra_info(ie_result, ie, url)
1016 if process:
1017 return self.process_ie_result(ie_result, download, extra_info)
1018 else:
1019 return ie_result
1020
1021 def add_default_extra_info(self, ie_result, ie, url):
1022 self.add_extra_info(ie_result, {
1023 'extractor': ie.IE_NAME,
1024 'webpage_url': url,
1025 'webpage_url_basename': url_basename(url),
1026 'extractor_key': ie.ie_key(),
1027 })
1028
1029 def process_ie_result(self, ie_result, download=True, extra_info={}):
1030 """
1031 Take the result of the ie(may be modified) and resolve all unresolved
1032 references (URLs, playlist items).
1033
1034 It will also download the videos if 'download'.
1035 Returns the resolved ie_result.
1036 """
1037 result_type = ie_result.get('_type', 'video')
1038
1039 if result_type in ('url', 'url_transparent'):
1040 ie_result['url'] = sanitize_url(ie_result['url'])
1041 extract_flat = self.params.get('extract_flat', False)
1042 if ((extract_flat == 'in_playlist' and 'playlist' in extra_info)
1043 or extract_flat is True):
1044 self.__forced_printings(
1045 ie_result,
1046 self.prepare_filepath(self.prepare_filename(ie_result)),
1047 incomplete=True)
1048 return ie_result
1049
1050 if result_type == 'video':
1051 self.add_extra_info(ie_result, extra_info)
1052 return self.process_video_result(ie_result, download=download)
1053 elif result_type == 'url':
1054 # We have to add extra_info to the results because it may be
1055 # contained in a playlist
1056 return self.extract_info(ie_result['url'],
1057 download, info_dict=ie_result,
1058 ie_key=ie_result.get('ie_key'),
1059 extra_info=extra_info)
1060 elif result_type == 'url_transparent':
1061 # Use the information from the embedding page
1062 info = self.extract_info(
1063 ie_result['url'], ie_key=ie_result.get('ie_key'),
1064 extra_info=extra_info, download=False, process=False)
1065
1066 # extract_info may return None when ignoreerrors is enabled and
1067 # extraction failed with an error, don't crash and return early
1068 # in this case
1069 if not info:
1070 return info
1071
1072 force_properties = dict(
1073 (k, v) for k, v in ie_result.items() if v is not None)
1074 for f in ('_type', 'url', 'id', 'extractor', 'extractor_key', 'ie_key'):
1075 if f in force_properties:
1076 del force_properties[f]
1077 new_result = info.copy()
1078 new_result.update(force_properties)
1079
1080 # Extracted info may not be a video result (i.e.
1081 # info.get('_type', 'video') != video) but rather an url or
1082 # url_transparent. In such cases outer metadata (from ie_result)
1083 # should be propagated to inner one (info). For this to happen
1084 # _type of info should be overridden with url_transparent. This
1085 # fixes issue from https://github.com/ytdl-org/youtube-dl/pull/11163.
1086 if new_result.get('_type') == 'url':
1087 new_result['_type'] = 'url_transparent'
1088
1089 return self.process_ie_result(
1090 new_result, download=download, extra_info=extra_info)
1091 elif result_type in ('playlist', 'multi_video'):
1092 # Protect from infinite recursion due to recursively nested playlists
1093 # (see https://github.com/ytdl-org/youtube-dl/issues/27833)
1094 webpage_url = ie_result['webpage_url']
1095 if webpage_url in self._playlist_urls:
1096 self.to_screen(
1097 '[download] Skipping already downloaded playlist: %s'
1098 % ie_result.get('title') or ie_result.get('id'))
1099 return
1100
1101 self._playlist_level += 1
1102 self._playlist_urls.add(webpage_url)
1103 try:
1104 return self.__process_playlist(ie_result, download)
1105 finally:
1106 self._playlist_level -= 1
1107 if not self._playlist_level:
1108 self._playlist_urls.clear()
1109 elif result_type == 'compat_list':
1110 self.report_warning(
1111 'Extractor %s returned a compat_list result. '
1112 'It needs to be updated.' % ie_result.get('extractor'))
1113
1114 def _fixup(r):
1115 self.add_extra_info(
1116 r,
1117 {
1118 'extractor': ie_result['extractor'],
1119 'webpage_url': ie_result['webpage_url'],
1120 'webpage_url_basename': url_basename(ie_result['webpage_url']),
1121 'extractor_key': ie_result['extractor_key'],
1122 }
1123 )
1124 return r
1125 ie_result['entries'] = [
1126 self.process_ie_result(_fixup(r), download, extra_info)
1127 for r in ie_result['entries']
1128 ]
1129 return ie_result
1130 else:
1131 raise Exception('Invalid result type: %s' % result_type)
1132
1133 def __process_playlist(self, ie_result, download):
1134 # We process each entry in the playlist
1135 playlist = ie_result.get('title') or ie_result.get('id')
1136 self.to_screen('[download] Downloading playlist: %s' % playlist)
1137
1138 if self.params.get('allow_playlist_files', True):
1139 ie_copy = {
1140 'playlist': playlist,
1141 'playlist_id': ie_result.get('id'),
1142 'playlist_title': ie_result.get('title'),
1143 'playlist_uploader': ie_result.get('uploader'),
1144 'playlist_uploader_id': ie_result.get('uploader_id'),
1145 'playlist_index': 0
1146 }
1147 ie_copy.update(dict(ie_result))
1148
1149 def ensure_dir_exists(path):
1150 return make_dir(path, self.report_error)
1151
1152 if self.params.get('writeinfojson', False):
1153 infofn = replace_extension(
1154 self.prepare_filepath(self.prepare_filename(ie_copy), 'infojson'),
1155 'info.json', ie_result.get('ext'))
1156 if not ensure_dir_exists(encodeFilename(infofn)):
1157 return
1158 if not self.params.get('overwrites', True) and os.path.exists(encodeFilename(infofn)):
1159 self.to_screen('[info] Playlist metadata is already present')
1160 else:
1161 playlist_info = dict(ie_result)
1162 # playlist_info['entries'] = list(playlist_info['entries']) # Entries is a generator which shouldnot be resolved here
1163 del playlist_info['entries']
1164 self.to_screen('[info] Writing playlist metadata as JSON to: ' + infofn)
1165 try:
1166 write_json_file(self.filter_requested_info(playlist_info), infofn)
1167 except (OSError, IOError):
1168 self.report_error('Cannot write playlist metadata to JSON file ' + infofn)
1169
1170 if self.params.get('writedescription', False):
1171 descfn = replace_extension(
1172 self.prepare_filepath(self.prepare_filename(ie_copy), 'description'),
1173 'description', ie_result.get('ext'))
1174 if not ensure_dir_exists(encodeFilename(descfn)):
1175 return
1176 if not self.params.get('overwrites', True) and os.path.exists(encodeFilename(descfn)):
1177 self.to_screen('[info] Playlist description is already present')
1178 elif ie_result.get('description') is None:
1179 self.report_warning('There\'s no playlist description to write.')
1180 else:
1181 try:
1182 self.to_screen('[info] Writing playlist description to: ' + descfn)
1183 with io.open(encodeFilename(descfn), 'w', encoding='utf-8') as descfile:
1184 descfile.write(ie_result['description'])
1185 except (OSError, IOError):
1186 self.report_error('Cannot write playlist description file ' + descfn)
1187 return
1188
1189 playlist_results = []
1190
1191 playliststart = self.params.get('playliststart', 1) - 1
1192 playlistend = self.params.get('playlistend')
1193 # For backwards compatibility, interpret -1 as whole list
1194 if playlistend == -1:
1195 playlistend = None
1196
1197 playlistitems_str = self.params.get('playlist_items')
1198 playlistitems = None
1199 if playlistitems_str is not None:
1200 def iter_playlistitems(format):
1201 for string_segment in format.split(','):
1202 if '-' in string_segment:
1203 start, end = string_segment.split('-')
1204 for item in range(int(start), int(end) + 1):
1205 yield int(item)
1206 else:
1207 yield int(string_segment)
1208 playlistitems = orderedSet(iter_playlistitems(playlistitems_str))
1209
1210 ie_entries = ie_result['entries']
1211
1212 def make_playlistitems_entries(list_ie_entries):
1213 num_entries = len(list_ie_entries)
1214 return [
1215 list_ie_entries[i - 1] for i in playlistitems
1216 if -num_entries <= i - 1 < num_entries]
1217
1218 def report_download(num_entries):
1219 self.to_screen(
1220 '[%s] playlist %s: Downloading %d videos' %
1221 (ie_result['extractor'], playlist, num_entries))
1222
1223 if isinstance(ie_entries, list):
1224 n_all_entries = len(ie_entries)
1225 if playlistitems:
1226 entries = make_playlistitems_entries(ie_entries)
1227 else:
1228 entries = ie_entries[playliststart:playlistend]
1229 n_entries = len(entries)
1230 self.to_screen(
1231 '[%s] playlist %s: Collected %d video ids (downloading %d of them)' %
1232 (ie_result['extractor'], playlist, n_all_entries, n_entries))
1233 elif isinstance(ie_entries, PagedList):
1234 if playlistitems:
1235 entries = []
1236 for item in playlistitems:
1237 entries.extend(ie_entries.getslice(
1238 item - 1, item
1239 ))
1240 else:
1241 entries = ie_entries.getslice(
1242 playliststart, playlistend)
1243 n_entries = len(entries)
1244 report_download(n_entries)
1245 else: # iterable
1246 if playlistitems:
1247 entries = make_playlistitems_entries(list(itertools.islice(
1248 ie_entries, 0, max(playlistitems))))
1249 else:
1250 entries = list(itertools.islice(
1251 ie_entries, playliststart, playlistend))
1252 n_entries = len(entries)
1253 report_download(n_entries)
1254
1255 if self.params.get('playlistreverse', False):
1256 entries = entries[::-1]
1257
1258 if self.params.get('playlistrandom', False):
1259 random.shuffle(entries)
1260
1261 x_forwarded_for = ie_result.get('__x_forwarded_for_ip')
1262
1263 for i, entry in enumerate(entries, 1):
1264 self.to_screen('[download] Downloading video %s of %s' % (i, n_entries))
1265 # This __x_forwarded_for_ip thing is a bit ugly but requires
1266 # minimal changes
1267 if x_forwarded_for:
1268 entry['__x_forwarded_for_ip'] = x_forwarded_for
1269 extra = {
1270 'n_entries': n_entries,
1271 'playlist': playlist,
1272 'playlist_id': ie_result.get('id'),
1273 'playlist_title': ie_result.get('title'),
1274 'playlist_uploader': ie_result.get('uploader'),
1275 'playlist_uploader_id': ie_result.get('uploader_id'),
1276 'playlist_index': playlistitems[i - 1] if playlistitems else i + playliststart,
1277 'extractor': ie_result['extractor'],
1278 'webpage_url': ie_result['webpage_url'],
1279 'webpage_url_basename': url_basename(ie_result['webpage_url']),
1280 'extractor_key': ie_result['extractor_key'],
1281 }
1282
1283 if self._match_entry(entry, incomplete=True) is not None:
1284 continue
1285
1286 entry_result = self.__process_iterable_entry(entry, download, extra)
1287 # TODO: skip failed (empty) entries?
1288 playlist_results.append(entry_result)
1289 ie_result['entries'] = playlist_results
1290 self.to_screen('[download] Finished downloading playlist: %s' % playlist)
1291 return ie_result
1292
1293 @__handle_extraction_exceptions
1294 def __process_iterable_entry(self, entry, download, extra_info):
1295 return self.process_ie_result(
1296 entry, download=download, extra_info=extra_info)
1297
1298 def _build_format_filter(self, filter_spec):
1299 " Returns a function to filter the formats according to the filter_spec "
1300
1301 OPERATORS = {
1302 '<': operator.lt,
1303 '<=': operator.le,
1304 '>': operator.gt,
1305 '>=': operator.ge,
1306 '=': operator.eq,
1307 '!=': operator.ne,
1308 }
1309 operator_rex = re.compile(r'''(?x)\s*
1310 (?P<key>width|height|tbr|abr|vbr|asr|filesize|filesize_approx|fps)
1311 \s*(?P<op>%s)(?P<none_inclusive>\s*\?)?\s*
1312 (?P<value>[0-9.]+(?:[kKmMgGtTpPeEzZyY]i?[Bb]?)?)
1313 $
1314 ''' % '|'.join(map(re.escape, OPERATORS.keys())))
1315 m = operator_rex.search(filter_spec)
1316 if m:
1317 try:
1318 comparison_value = int(m.group('value'))
1319 except ValueError:
1320 comparison_value = parse_filesize(m.group('value'))
1321 if comparison_value is None:
1322 comparison_value = parse_filesize(m.group('value') + 'B')
1323 if comparison_value is None:
1324 raise ValueError(
1325 'Invalid value %r in format specification %r' % (
1326 m.group('value'), filter_spec))
1327 op = OPERATORS[m.group('op')]
1328
1329 if not m:
1330 STR_OPERATORS = {
1331 '=': operator.eq,
1332 '^=': lambda attr, value: attr.startswith(value),
1333 '$=': lambda attr, value: attr.endswith(value),
1334 '*=': lambda attr, value: value in attr,
1335 }
1336 str_operator_rex = re.compile(r'''(?x)
1337 \s*(?P<key>[a-zA-Z0-9._-]+)
1338 \s*(?P<negation>!\s*)?(?P<op>%s)(?P<none_inclusive>\s*\?)?
1339 \s*(?P<value>[a-zA-Z0-9._-]+)
1340 \s*$
1341 ''' % '|'.join(map(re.escape, STR_OPERATORS.keys())))
1342 m = str_operator_rex.search(filter_spec)
1343 if m:
1344 comparison_value = m.group('value')
1345 str_op = STR_OPERATORS[m.group('op')]
1346 if m.group('negation'):
1347 op = lambda attr, value: not str_op(attr, value)
1348 else:
1349 op = str_op
1350
1351 if not m:
1352 raise ValueError('Invalid filter specification %r' % filter_spec)
1353
1354 def _filter(f):
1355 actual_value = f.get(m.group('key'))
1356 if actual_value is None:
1357 return m.group('none_inclusive')
1358 return op(actual_value, comparison_value)
1359 return _filter
1360
1361 def _default_format_spec(self, info_dict, download=True):
1362
1363 def can_merge():
1364 merger = FFmpegMergerPP(self)
1365 return merger.available and merger.can_merge()
1366
1367 prefer_best = (
1368 not self.params.get('simulate', False)
1369 and download
1370 and (
1371 not can_merge()
1372 or info_dict.get('is_live', False)
1373 or self.params.get('outtmpl', DEFAULT_OUTTMPL) == '-'))
1374
1375 return (
1376 'best/bestvideo+bestaudio'
1377 if prefer_best
1378 else 'bestvideo*+bestaudio/best'
1379 if not self.params.get('allow_multiple_audio_streams', False)
1380 else 'bestvideo+bestaudio/best')
1381
1382 def build_format_selector(self, format_spec):
1383 def syntax_error(note, start):
1384 message = (
1385 'Invalid format specification: '
1386 '{0}\n\t{1}\n\t{2}^'.format(note, format_spec, ' ' * start[1]))
1387 return SyntaxError(message)
1388
1389 PICKFIRST = 'PICKFIRST'
1390 MERGE = 'MERGE'
1391 SINGLE = 'SINGLE'
1392 GROUP = 'GROUP'
1393 FormatSelector = collections.namedtuple('FormatSelector', ['type', 'selector', 'filters'])
1394
1395 allow_multiple_streams = {'audio': self.params.get('allow_multiple_audio_streams', False),
1396 'video': self.params.get('allow_multiple_video_streams', False)}
1397
1398 def _parse_filter(tokens):
1399 filter_parts = []
1400 for type, string, start, _, _ in tokens:
1401 if type == tokenize.OP and string == ']':
1402 return ''.join(filter_parts)
1403 else:
1404 filter_parts.append(string)
1405
1406 def _remove_unused_ops(tokens):
1407 # Remove operators that we don't use and join them with the surrounding strings
1408 # for example: 'mp4' '-' 'baseline' '-' '16x9' is converted to 'mp4-baseline-16x9'
1409 ALLOWED_OPS = ('/', '+', ',', '(', ')')
1410 last_string, last_start, last_end, last_line = None, None, None, None
1411 for type, string, start, end, line in tokens:
1412 if type == tokenize.OP and string == '[':
1413 if last_string:
1414 yield tokenize.NAME, last_string, last_start, last_end, last_line
1415 last_string = None
1416 yield type, string, start, end, line
1417 # everything inside brackets will be handled by _parse_filter
1418 for type, string, start, end, line in tokens:
1419 yield type, string, start, end, line
1420 if type == tokenize.OP and string == ']':
1421 break
1422 elif type == tokenize.OP and string in ALLOWED_OPS:
1423 if last_string:
1424 yield tokenize.NAME, last_string, last_start, last_end, last_line
1425 last_string = None
1426 yield type, string, start, end, line
1427 elif type in [tokenize.NAME, tokenize.NUMBER, tokenize.OP]:
1428 if not last_string:
1429 last_string = string
1430 last_start = start
1431 last_end = end
1432 else:
1433 last_string += string
1434 if last_string:
1435 yield tokenize.NAME, last_string, last_start, last_end, last_line
1436
1437 def _parse_format_selection(tokens, inside_merge=False, inside_choice=False, inside_group=False):
1438 selectors = []
1439 current_selector = None
1440 for type, string, start, _, _ in tokens:
1441 # ENCODING is only defined in python 3.x
1442 if type == getattr(tokenize, 'ENCODING', None):
1443 continue
1444 elif type in [tokenize.NAME, tokenize.NUMBER]:
1445 current_selector = FormatSelector(SINGLE, string, [])
1446 elif type == tokenize.OP:
1447 if string == ')':
1448 if not inside_group:
1449 # ')' will be handled by the parentheses group
1450 tokens.restore_last_token()
1451 break
1452 elif inside_merge and string in ['/', ',']:
1453 tokens.restore_last_token()
1454 break
1455 elif inside_choice and string == ',':
1456 tokens.restore_last_token()
1457 break
1458 elif string == ',':
1459 if not current_selector:
1460 raise syntax_error('"," must follow a format selector', start)
1461 selectors.append(current_selector)
1462 current_selector = None
1463 elif string == '/':
1464 if not current_selector:
1465 raise syntax_error('"/" must follow a format selector', start)
1466 first_choice = current_selector
1467 second_choice = _parse_format_selection(tokens, inside_choice=True)
1468 current_selector = FormatSelector(PICKFIRST, (first_choice, second_choice), [])
1469 elif string == '[':
1470 if not current_selector:
1471 current_selector = FormatSelector(SINGLE, 'best', [])
1472 format_filter = _parse_filter(tokens)
1473 current_selector.filters.append(format_filter)
1474 elif string == '(':
1475 if current_selector:
1476 raise syntax_error('Unexpected "("', start)
1477 group = _parse_format_selection(tokens, inside_group=True)
1478 current_selector = FormatSelector(GROUP, group, [])
1479 elif string == '+':
1480 if not current_selector:
1481 raise syntax_error('Unexpected "+"', start)
1482 selector_1 = current_selector
1483 selector_2 = _parse_format_selection(tokens, inside_merge=True)
1484 if not selector_2:
1485 raise syntax_error('Expected a selector', start)
1486 current_selector = FormatSelector(MERGE, (selector_1, selector_2), [])
1487 else:
1488 raise syntax_error('Operator not recognized: "{0}"'.format(string), start)
1489 elif type == tokenize.ENDMARKER:
1490 break
1491 if current_selector:
1492 selectors.append(current_selector)
1493 return selectors
1494
1495 def _build_selector_function(selector):
1496 if isinstance(selector, list): # ,
1497 fs = [_build_selector_function(s) for s in selector]
1498
1499 def selector_function(ctx):
1500 for f in fs:
1501 for format in f(ctx):
1502 yield format
1503 return selector_function
1504
1505 elif selector.type == GROUP: # ()
1506 selector_function = _build_selector_function(selector.selector)
1507
1508 elif selector.type == PICKFIRST: # /
1509 fs = [_build_selector_function(s) for s in selector.selector]
1510
1511 def selector_function(ctx):
1512 for f in fs:
1513 picked_formats = list(f(ctx))
1514 if picked_formats:
1515 return picked_formats
1516 return []
1517
1518 elif selector.type == SINGLE: # atom
1519 format_spec = selector.selector if selector.selector is not None else 'best'
1520
1521 if format_spec == 'all':
1522 def selector_function(ctx):
1523 formats = list(ctx['formats'])
1524 if formats:
1525 for f in formats:
1526 yield f
1527
1528 else:
1529 format_fallback = False
1530 format_spec_obj = re.match(r'(best|worst|b|w)(video|audio|v|a)?(\*)?$', format_spec)
1531 if format_spec_obj is not None:
1532 format_idx = 0 if format_spec_obj.group(1)[0] == 'w' else -1
1533 format_type = format_spec_obj.group(2)[0] if format_spec_obj.group(2) else False
1534 not_format_type = 'v' if format_type == 'a' else 'a'
1535 format_modified = format_spec_obj.group(3) is not None
1536
1537 format_fallback = not format_type and not format_modified # for b, w
1538 filter_f = ((lambda f: f.get(format_type + 'codec') != 'none')
1539 if format_type and format_modified # bv*, ba*, wv*, wa*
1540 else (lambda f: f.get(not_format_type + 'codec') == 'none')
1541 if format_type # bv, ba, wv, wa
1542 else (lambda f: f.get('vcodec') != 'none' and f.get('acodec') != 'none')
1543 if not format_modified # b, w
1544 else None) # b*, w*
1545 else:
1546 format_idx = -1
1547 filter_f = ((lambda f: f.get('ext') == format_spec)
1548 if format_spec in ['mp4', 'flv', 'webm', '3gp', 'm4a', 'mp3', 'ogg', 'aac', 'wav'] # extension
1549 else (lambda f: f.get('format_id') == format_spec)) # id
1550
1551 def selector_function(ctx):
1552 formats = list(ctx['formats'])
1553 if not formats:
1554 return
1555 matches = list(filter(filter_f, formats)) if filter_f is not None else formats
1556 if matches:
1557 yield matches[format_idx]
1558 elif format_fallback == 'force' or (format_fallback and ctx['incomplete_formats']):
1559 # for extractors with incomplete formats (audio only (soundcloud)
1560 # or video only (imgur)) best/worst will fallback to
1561 # best/worst {video,audio}-only format
1562 yield formats[format_idx]
1563
1564 elif selector.type == MERGE: # +
1565 def _merge(formats_pair):
1566 format_1, format_2 = formats_pair
1567
1568 formats_info = []
1569 formats_info.extend(format_1.get('requested_formats', (format_1,)))
1570 formats_info.extend(format_2.get('requested_formats', (format_2,)))
1571
1572 if not allow_multiple_streams['video'] or not allow_multiple_streams['audio']:
1573 get_no_more = {"video": False, "audio": False}
1574 for (i, fmt_info) in enumerate(formats_info):
1575 for aud_vid in ["audio", "video"]:
1576 if not allow_multiple_streams[aud_vid] and fmt_info.get(aud_vid[0] + 'codec') != 'none':
1577 if get_no_more[aud_vid]:
1578 formats_info.pop(i)
1579 get_no_more[aud_vid] = True
1580
1581 if len(formats_info) == 1:
1582 return formats_info[0]
1583
1584 video_fmts = [fmt_info for fmt_info in formats_info if fmt_info.get('vcodec') != 'none']
1585 audio_fmts = [fmt_info for fmt_info in formats_info if fmt_info.get('acodec') != 'none']
1586
1587 the_only_video = video_fmts[0] if len(video_fmts) == 1 else None
1588 the_only_audio = audio_fmts[0] if len(audio_fmts) == 1 else None
1589
1590 output_ext = self.params.get('merge_output_format')
1591 if not output_ext:
1592 if the_only_video:
1593 output_ext = the_only_video['ext']
1594 elif the_only_audio and not video_fmts:
1595 output_ext = the_only_audio['ext']
1596 else:
1597 output_ext = 'mkv'
1598
1599 new_dict = {
1600 'requested_formats': formats_info,
1601 'format': '+'.join(fmt_info.get('format') for fmt_info in formats_info),
1602 'format_id': '+'.join(fmt_info.get('format_id') for fmt_info in formats_info),
1603 'ext': output_ext,
1604 }
1605
1606 if the_only_video:
1607 new_dict.update({
1608 'width': the_only_video.get('width'),
1609 'height': the_only_video.get('height'),
1610 'resolution': the_only_video.get('resolution'),
1611 'fps': the_only_video.get('fps'),
1612 'vcodec': the_only_video.get('vcodec'),
1613 'vbr': the_only_video.get('vbr'),
1614 'stretched_ratio': the_only_video.get('stretched_ratio'),
1615 })
1616
1617 if the_only_audio:
1618 new_dict.update({
1619 'acodec': the_only_audio.get('acodec'),
1620 'abr': the_only_audio.get('abr'),
1621 })
1622
1623 return new_dict
1624
1625 selector_1, selector_2 = map(_build_selector_function, selector.selector)
1626
1627 def selector_function(ctx):
1628 for pair in itertools.product(
1629 selector_1(copy.deepcopy(ctx)), selector_2(copy.deepcopy(ctx))):
1630 yield _merge(pair)
1631
1632 filters = [self._build_format_filter(f) for f in selector.filters]
1633
1634 def final_selector(ctx):
1635 ctx_copy = copy.deepcopy(ctx)
1636 for _filter in filters:
1637 ctx_copy['formats'] = list(filter(_filter, ctx_copy['formats']))
1638 return selector_function(ctx_copy)
1639 return final_selector
1640
1641 stream = io.BytesIO(format_spec.encode('utf-8'))
1642 try:
1643 tokens = list(_remove_unused_ops(compat_tokenize_tokenize(stream.readline)))
1644 except tokenize.TokenError:
1645 raise syntax_error('Missing closing/opening brackets or parenthesis', (0, len(format_spec)))
1646
1647 class TokenIterator(object):
1648 def __init__(self, tokens):
1649 self.tokens = tokens
1650 self.counter = 0
1651
1652 def __iter__(self):
1653 return self
1654
1655 def __next__(self):
1656 if self.counter >= len(self.tokens):
1657 raise StopIteration()
1658 value = self.tokens[self.counter]
1659 self.counter += 1
1660 return value
1661
1662 next = __next__
1663
1664 def restore_last_token(self):
1665 self.counter -= 1
1666
1667 parsed_selector = _parse_format_selection(iter(TokenIterator(tokens)))
1668 return _build_selector_function(parsed_selector)
1669
1670 def _calc_headers(self, info_dict):
1671 res = std_headers.copy()
1672
1673 add_headers = info_dict.get('http_headers')
1674 if add_headers:
1675 res.update(add_headers)
1676
1677 cookies = self._calc_cookies(info_dict)
1678 if cookies:
1679 res['Cookie'] = cookies
1680
1681 if 'X-Forwarded-For' not in res:
1682 x_forwarded_for_ip = info_dict.get('__x_forwarded_for_ip')
1683 if x_forwarded_for_ip:
1684 res['X-Forwarded-For'] = x_forwarded_for_ip
1685
1686 return res
1687
1688 def _calc_cookies(self, info_dict):
1689 pr = sanitized_Request(info_dict['url'])
1690 self.cookiejar.add_cookie_header(pr)
1691 return pr.get_header('Cookie')
1692
1693 def process_video_result(self, info_dict, download=True):
1694 assert info_dict.get('_type', 'video') == 'video'
1695
1696 if 'id' not in info_dict:
1697 raise ExtractorError('Missing "id" field in extractor result')
1698 if 'title' not in info_dict:
1699 raise ExtractorError('Missing "title" field in extractor result')
1700
1701 def report_force_conversion(field, field_not, conversion):
1702 self.report_warning(
1703 '"%s" field is not %s - forcing %s conversion, there is an error in extractor'
1704 % (field, field_not, conversion))
1705
1706 def sanitize_string_field(info, string_field):
1707 field = info.get(string_field)
1708 if field is None or isinstance(field, compat_str):
1709 return
1710 report_force_conversion(string_field, 'a string', 'string')
1711 info[string_field] = compat_str(field)
1712
1713 def sanitize_numeric_fields(info):
1714 for numeric_field in self._NUMERIC_FIELDS:
1715 field = info.get(numeric_field)
1716 if field is None or isinstance(field, compat_numeric_types):
1717 continue
1718 report_force_conversion(numeric_field, 'numeric', 'int')
1719 info[numeric_field] = int_or_none(field)
1720
1721 sanitize_string_field(info_dict, 'id')
1722 sanitize_numeric_fields(info_dict)
1723
1724 if 'playlist' not in info_dict:
1725 # It isn't part of a playlist
1726 info_dict['playlist'] = None
1727 info_dict['playlist_index'] = None
1728
1729 thumbnails = info_dict.get('thumbnails')
1730 if thumbnails is None:
1731 thumbnail = info_dict.get('thumbnail')
1732 if thumbnail:
1733 info_dict['thumbnails'] = thumbnails = [{'url': thumbnail}]
1734 if thumbnails:
1735 thumbnails.sort(key=lambda t: (
1736 t.get('preference') if t.get('preference') is not None else -1,
1737 t.get('width') if t.get('width') is not None else -1,
1738 t.get('height') if t.get('height') is not None else -1,
1739 t.get('id') if t.get('id') is not None else '', t.get('url')))
1740 for i, t in enumerate(thumbnails):
1741 t['url'] = sanitize_url(t['url'])
1742 if t.get('width') and t.get('height'):
1743 t['resolution'] = '%dx%d' % (t['width'], t['height'])
1744 if t.get('id') is None:
1745 t['id'] = '%d' % i
1746
1747 if self.params.get('list_thumbnails'):
1748 self.list_thumbnails(info_dict)
1749 return
1750
1751 thumbnail = info_dict.get('thumbnail')
1752 if thumbnail:
1753 info_dict['thumbnail'] = sanitize_url(thumbnail)
1754 elif thumbnails:
1755 info_dict['thumbnail'] = thumbnails[-1]['url']
1756
1757 if 'display_id' not in info_dict and 'id' in info_dict:
1758 info_dict['display_id'] = info_dict['id']
1759
1760 if info_dict.get('upload_date') is None and info_dict.get('timestamp') is not None:
1761 # Working around out-of-range timestamp values (e.g. negative ones on Windows,
1762 # see http://bugs.python.org/issue1646728)
1763 try:
1764 upload_date = datetime.datetime.utcfromtimestamp(info_dict['timestamp'])
1765 info_dict['upload_date'] = upload_date.strftime('%Y%m%d')
1766 except (ValueError, OverflowError, OSError):
1767 pass
1768
1769 # Auto generate title fields corresponding to the *_number fields when missing
1770 # in order to always have clean titles. This is very common for TV series.
1771 for field in ('chapter', 'season', 'episode'):
1772 if info_dict.get('%s_number' % field) is not None and not info_dict.get(field):
1773 info_dict[field] = '%s %d' % (field.capitalize(), info_dict['%s_number' % field])
1774
1775 for cc_kind in ('subtitles', 'automatic_captions'):
1776 cc = info_dict.get(cc_kind)
1777 if cc:
1778 for _, subtitle in cc.items():
1779 for subtitle_format in subtitle:
1780 if subtitle_format.get('url'):
1781 subtitle_format['url'] = sanitize_url(subtitle_format['url'])
1782 if subtitle_format.get('ext') is None:
1783 subtitle_format['ext'] = determine_ext(subtitle_format['url']).lower()
1784
1785 automatic_captions = info_dict.get('automatic_captions')
1786 subtitles = info_dict.get('subtitles')
1787
1788 if self.params.get('listsubtitles', False):
1789 if 'automatic_captions' in info_dict:
1790 self.list_subtitles(
1791 info_dict['id'], automatic_captions, 'automatic captions')
1792 self.list_subtitles(info_dict['id'], subtitles, 'subtitles')
1793 return
1794
1795 info_dict['requested_subtitles'] = self.process_subtitles(
1796 info_dict['id'], subtitles, automatic_captions)
1797
1798 # We now pick which formats have to be downloaded
1799 if info_dict.get('formats') is None:
1800 # There's only one format available
1801 formats = [info_dict]
1802 else:
1803 formats = info_dict['formats']
1804
1805 if not formats:
1806 raise ExtractorError('No video formats found!')
1807
1808 def is_wellformed(f):
1809 url = f.get('url')
1810 if not url:
1811 self.report_warning(
1812 '"url" field is missing or empty - skipping format, '
1813 'there is an error in extractor')
1814 return False
1815 if isinstance(url, bytes):
1816 sanitize_string_field(f, 'url')
1817 return True
1818
1819 # Filter out malformed formats for better extraction robustness
1820 formats = list(filter(is_wellformed, formats))
1821
1822 formats_dict = {}
1823
1824 # We check that all the formats have the format and format_id fields
1825 for i, format in enumerate(formats):
1826 sanitize_string_field(format, 'format_id')
1827 sanitize_numeric_fields(format)
1828 format['url'] = sanitize_url(format['url'])
1829 if not format.get('format_id'):
1830 format['format_id'] = compat_str(i)
1831 else:
1832 # Sanitize format_id from characters used in format selector expression
1833 format['format_id'] = re.sub(r'[\s,/+\[\]()]', '_', format['format_id'])
1834 format_id = format['format_id']
1835 if format_id not in formats_dict:
1836 formats_dict[format_id] = []
1837 formats_dict[format_id].append(format)
1838
1839 # Make sure all formats have unique format_id
1840 for format_id, ambiguous_formats in formats_dict.items():
1841 if len(ambiguous_formats) > 1:
1842 for i, format in enumerate(ambiguous_formats):
1843 format['format_id'] = '%s-%d' % (format_id, i)
1844
1845 for i, format in enumerate(formats):
1846 if format.get('format') is None:
1847 format['format'] = '{id} - {res}{note}'.format(
1848 id=format['format_id'],
1849 res=self.format_resolution(format),
1850 note=' ({0})'.format(format['format_note']) if format.get('format_note') is not None else '',
1851 )
1852 # Automatically determine file extension if missing
1853 if format.get('ext') is None:
1854 format['ext'] = determine_ext(format['url']).lower()
1855 # Automatically determine protocol if missing (useful for format
1856 # selection purposes)
1857 if format.get('protocol') is None:
1858 format['protocol'] = determine_protocol(format)
1859 # Add HTTP headers, so that external programs can use them from the
1860 # json output
1861 full_format_info = info_dict.copy()
1862 full_format_info.update(format)
1863 format['http_headers'] = self._calc_headers(full_format_info)
1864 # Remove private housekeeping stuff
1865 if '__x_forwarded_for_ip' in info_dict:
1866 del info_dict['__x_forwarded_for_ip']
1867
1868 # TODO Central sorting goes here
1869
1870 if formats[0] is not info_dict:
1871 # only set the 'formats' fields if the original info_dict list them
1872 # otherwise we end up with a circular reference, the first (and unique)
1873 # element in the 'formats' field in info_dict is info_dict itself,
1874 # which can't be exported to json
1875 info_dict['formats'] = formats
1876 if self.params.get('listformats'):
1877 self.list_formats(info_dict)
1878 return
1879
1880 req_format = self.params.get('format')
1881 if req_format is None:
1882 req_format = self._default_format_spec(info_dict, download=download)
1883 if self.params.get('verbose'):
1884 self.to_screen('[debug] Default format spec: %s' % req_format)
1885
1886 format_selector = self.build_format_selector(req_format)
1887
1888 # While in format selection we may need to have an access to the original
1889 # format set in order to calculate some metrics or do some processing.
1890 # For now we need to be able to guess whether original formats provided
1891 # by extractor are incomplete or not (i.e. whether extractor provides only
1892 # video-only or audio-only formats) for proper formats selection for
1893 # extractors with such incomplete formats (see
1894 # https://github.com/ytdl-org/youtube-dl/pull/5556).
1895 # Since formats may be filtered during format selection and may not match
1896 # the original formats the results may be incorrect. Thus original formats
1897 # or pre-calculated metrics should be passed to format selection routines
1898 # as well.
1899 # We will pass a context object containing all necessary additional data
1900 # instead of just formats.
1901 # This fixes incorrect format selection issue (see
1902 # https://github.com/ytdl-org/youtube-dl/issues/10083).
1903 incomplete_formats = (
1904 # All formats are video-only or
1905 all(f.get('vcodec') != 'none' and f.get('acodec') == 'none' for f in formats)
1906 # all formats are audio-only
1907 or all(f.get('vcodec') == 'none' and f.get('acodec') != 'none' for f in formats))
1908
1909 ctx = {
1910 'formats': formats,
1911 'incomplete_formats': incomplete_formats,
1912 }
1913
1914 formats_to_download = list(format_selector(ctx))
1915 if not formats_to_download:
1916 raise ExtractorError('requested format not available',
1917 expected=True)
1918
1919 if download:
1920 self.to_screen('[info] Downloading format(s) %s' % ", ".join([f['format_id'] for f in formats_to_download]))
1921 if len(formats_to_download) > 1:
1922 self.to_screen('[info] %s: downloading video in %s formats' % (info_dict['id'], len(formats_to_download)))
1923 for format in formats_to_download:
1924 new_info = dict(info_dict)
1925 new_info.update(format)
1926 self.process_info(new_info)
1927 # We update the info dict with the best quality format (backwards compatibility)
1928 info_dict.update(formats_to_download[-1])
1929 return info_dict
1930
1931 def process_subtitles(self, video_id, normal_subtitles, automatic_captions):
1932 """Select the requested subtitles and their format"""
1933 available_subs = {}
1934 if normal_subtitles and self.params.get('writesubtitles'):
1935 available_subs.update(normal_subtitles)
1936 if automatic_captions and self.params.get('writeautomaticsub'):
1937 for lang, cap_info in automatic_captions.items():
1938 if lang not in available_subs:
1939 available_subs[lang] = cap_info
1940
1941 if (not self.params.get('writesubtitles') and not
1942 self.params.get('writeautomaticsub') or not
1943 available_subs):
1944 return None
1945
1946 if self.params.get('allsubtitles', False):
1947 requested_langs = available_subs.keys()
1948 else:
1949 if self.params.get('subtitleslangs', False):
1950 requested_langs = self.params.get('subtitleslangs')
1951 elif 'en' in available_subs:
1952 requested_langs = ['en']
1953 else:
1954 requested_langs = [list(available_subs.keys())[0]]
1955
1956 formats_query = self.params.get('subtitlesformat', 'best')
1957 formats_preference = formats_query.split('/') if formats_query else []
1958 subs = {}
1959 for lang in requested_langs:
1960 formats = available_subs.get(lang)
1961 if formats is None:
1962 self.report_warning('%s subtitles not available for %s' % (lang, video_id))
1963 continue
1964 for ext in formats_preference:
1965 if ext == 'best':
1966 f = formats[-1]
1967 break
1968 matches = list(filter(lambda f: f['ext'] == ext, formats))
1969 if matches:
1970 f = matches[-1]
1971 break
1972 else:
1973 f = formats[-1]
1974 self.report_warning(
1975 'No subtitle format found matching "%s" for language %s, '
1976 'using %s' % (formats_query, lang, f['ext']))
1977 subs[lang] = f
1978 return subs
1979
1980 def __forced_printings(self, info_dict, filename, incomplete):
1981 def print_mandatory(field):
1982 if (self.params.get('force%s' % field, False)
1983 and (not incomplete or info_dict.get(field) is not None)):
1984 self.to_stdout(info_dict[field])
1985
1986 def print_optional(field):
1987 if (self.params.get('force%s' % field, False)
1988 and info_dict.get(field) is not None):
1989 self.to_stdout(info_dict[field])
1990
1991 print_mandatory('title')
1992 print_mandatory('id')
1993 if self.params.get('forceurl', False) and not incomplete:
1994 if info_dict.get('requested_formats') is not None:
1995 for f in info_dict['requested_formats']:
1996 self.to_stdout(f['url'] + f.get('play_path', ''))
1997 else:
1998 # For RTMP URLs, also include the playpath
1999 self.to_stdout(info_dict['url'] + info_dict.get('play_path', ''))
2000 print_optional('thumbnail')
2001 print_optional('description')
2002 if self.params.get('forcefilename', False) and filename is not None:
2003 self.to_stdout(filename)
2004 if self.params.get('forceduration', False) and info_dict.get('duration') is not None:
2005 self.to_stdout(formatSeconds(info_dict['duration']))
2006 print_mandatory('format')
2007 if self.params.get('forcejson', False):
2008 self.to_stdout(json.dumps(info_dict))
2009
2010 def process_info(self, info_dict):
2011 """Process a single resolved IE result."""
2012
2013 assert info_dict.get('_type', 'video') == 'video'
2014
2015 info_dict.setdefault('__postprocessors', [])
2016
2017 max_downloads = self.params.get('max_downloads')
2018 if max_downloads is not None:
2019 if self._num_downloads >= int(max_downloads):
2020 raise MaxDownloadsReached()
2021
2022 # TODO: backward compatibility, to be removed
2023 info_dict['fulltitle'] = info_dict['title']
2024
2025 if 'format' not in info_dict:
2026 info_dict['format'] = info_dict['ext']
2027
2028 if self._match_entry(info_dict, incomplete=False) is not None:
2029 return
2030
2031 self._num_downloads += 1
2032
2033 info_dict = self.pre_process(info_dict)
2034
2035 filename = self.prepare_filename(info_dict, warn=True)
2036 info_dict['_filename'] = full_filename = self.prepare_filepath(filename)
2037 temp_filename = self.prepare_filepath(filename, 'temp')
2038 files_to_move = {}
2039
2040 # Forced printings
2041 self.__forced_printings(info_dict, full_filename, incomplete=False)
2042
2043 if self.params.get('simulate', False):
2044 if self.params.get('force_write_download_archive', False):
2045 self.record_download_archive(info_dict)
2046
2047 # Do nothing else if in simulate mode
2048 return
2049
2050 if filename is None:
2051 return
2052
2053 def ensure_dir_exists(path):
2054 return make_dir(path, self.report_error)
2055
2056 if not ensure_dir_exists(encodeFilename(full_filename)):
2057 return
2058 if not ensure_dir_exists(encodeFilename(temp_filename)):
2059 return
2060
2061 if self.params.get('writedescription', False):
2062 descfn = replace_extension(
2063 self.prepare_filepath(filename, 'description'),
2064 'description', info_dict.get('ext'))
2065 if not ensure_dir_exists(encodeFilename(descfn)):
2066 return
2067 if not self.params.get('overwrites', True) and os.path.exists(encodeFilename(descfn)):
2068 self.to_screen('[info] Video description is already present')
2069 elif info_dict.get('description') is None:
2070 self.report_warning('There\'s no description to write.')
2071 else:
2072 try:
2073 self.to_screen('[info] Writing video description to: ' + descfn)
2074 with io.open(encodeFilename(descfn), 'w', encoding='utf-8') as descfile:
2075 descfile.write(info_dict['description'])
2076 except (OSError, IOError):
2077 self.report_error('Cannot write description file ' + descfn)
2078 return
2079
2080 if self.params.get('writeannotations', False):
2081 annofn = replace_extension(
2082 self.prepare_filepath(filename, 'annotation'),
2083 'annotations.xml', info_dict.get('ext'))
2084 if not ensure_dir_exists(encodeFilename(annofn)):
2085 return
2086 if not self.params.get('overwrites', True) and os.path.exists(encodeFilename(annofn)):
2087 self.to_screen('[info] Video annotations are already present')
2088 elif not info_dict.get('annotations'):
2089 self.report_warning('There are no annotations to write.')
2090 else:
2091 try:
2092 self.to_screen('[info] Writing video annotations to: ' + annofn)
2093 with io.open(encodeFilename(annofn), 'w', encoding='utf-8') as annofile:
2094 annofile.write(info_dict['annotations'])
2095 except (KeyError, TypeError):
2096 self.report_warning('There are no annotations to write.')
2097 except (OSError, IOError):
2098 self.report_error('Cannot write annotations file: ' + annofn)
2099 return
2100
2101 def dl(name, info, subtitle=False):
2102 fd = get_suitable_downloader(info, self.params)(self, self.params)
2103 for ph in self._progress_hooks:
2104 fd.add_progress_hook(ph)
2105 if self.params.get('verbose'):
2106 self.to_screen('[debug] Invoking downloader on %r' % info.get('url'))
2107 return fd.download(name, info, subtitle)
2108
2109 subtitles_are_requested = any([self.params.get('writesubtitles', False),
2110 self.params.get('writeautomaticsub')])
2111
2112 if subtitles_are_requested and info_dict.get('requested_subtitles'):
2113 # subtitles download errors are already managed as troubles in relevant IE
2114 # that way it will silently go on when used with unsupporting IE
2115 subtitles = info_dict['requested_subtitles']
2116 # ie = self.get_info_extractor(info_dict['extractor_key'])
2117 for sub_lang, sub_info in subtitles.items():
2118 sub_format = sub_info['ext']
2119 sub_filename = subtitles_filename(temp_filename, sub_lang, sub_format, info_dict.get('ext'))
2120 sub_filename_final = subtitles_filename(
2121 self.prepare_filepath(filename, 'subtitle'),
2122 sub_lang, sub_format, info_dict.get('ext'))
2123 if not self.params.get('overwrites', True) and os.path.exists(encodeFilename(sub_filename)):
2124 self.to_screen('[info] Video subtitle %s.%s is already present' % (sub_lang, sub_format))
2125 files_to_move[sub_filename] = sub_filename_final
2126 else:
2127 self.to_screen('[info] Writing video subtitles to: ' + sub_filename)
2128 if sub_info.get('data') is not None:
2129 try:
2130 # Use newline='' to prevent conversion of newline characters
2131 # See https://github.com/ytdl-org/youtube-dl/issues/10268
2132 with io.open(encodeFilename(sub_filename), 'w', encoding='utf-8', newline='') as subfile:
2133 subfile.write(sub_info['data'])
2134 files_to_move[sub_filename] = sub_filename_final
2135 except (OSError, IOError):
2136 self.report_error('Cannot write subtitles file ' + sub_filename)
2137 return
2138 else:
2139 try:
2140 dl(sub_filename, sub_info, subtitle=True)
2141 '''
2142 if self.params.get('sleep_interval_subtitles', False):
2143 dl(sub_filename, sub_info)
2144 else:
2145 sub_data = ie._request_webpage(
2146 sub_info['url'], info_dict['id'], note=False).read()
2147 with io.open(encodeFilename(sub_filename), 'wb') as subfile:
2148 subfile.write(sub_data)
2149 '''
2150 files_to_move[sub_filename] = sub_filename_final
2151 except (ExtractorError, IOError, OSError, ValueError, compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
2152 self.report_warning('Unable to download subtitle for "%s": %s' %
2153 (sub_lang, error_to_compat_str(err)))
2154 continue
2155
2156 if self.params.get('skip_download', False):
2157 if self.params.get('convertsubtitles', False):
2158 # subconv = FFmpegSubtitlesConvertorPP(self, format=self.params.get('convertsubtitles'))
2159 filename_real_ext = os.path.splitext(filename)[1][1:]
2160 filename_wo_ext = (
2161 os.path.splitext(full_filename)[0]
2162 if filename_real_ext == info_dict['ext']
2163 else full_filename)
2164 afilename = '%s.%s' % (filename_wo_ext, self.params.get('convertsubtitles'))
2165 # if subconv.available:
2166 # info_dict['__postprocessors'].append(subconv)
2167 if os.path.exists(encodeFilename(afilename)):
2168 self.to_screen(
2169 '[download] %s has already been downloaded and '
2170 'converted' % afilename)
2171 else:
2172 try:
2173 self.post_process(full_filename, info_dict, files_to_move)
2174 except PostProcessingError as err:
2175 self.report_error('Postprocessing: %s' % str(err))
2176 return
2177
2178 if self.params.get('writeinfojson', False):
2179 infofn = replace_extension(
2180 self.prepare_filepath(filename, 'infojson'),
2181 'info.json', info_dict.get('ext'))
2182 if not ensure_dir_exists(encodeFilename(infofn)):
2183 return
2184 if not self.params.get('overwrites', True) and os.path.exists(encodeFilename(infofn)):
2185 self.to_screen('[info] Video metadata is already present')
2186 else:
2187 self.to_screen('[info] Writing video metadata as JSON to: ' + infofn)
2188 try:
2189 write_json_file(self.filter_requested_info(info_dict), infofn)
2190 except (OSError, IOError):
2191 self.report_error('Cannot write video metadata to JSON file ' + infofn)
2192 return
2193 info_dict['__infojson_filepath'] = infofn
2194
2195 thumbdir = os.path.dirname(self.prepare_filepath(filename, 'thumbnail'))
2196 for thumbfn in self._write_thumbnails(info_dict, temp_filename):
2197 files_to_move[thumbfn] = os.path.join(thumbdir, os.path.basename(thumbfn))
2198
2199 # Write internet shortcut files
2200 url_link = webloc_link = desktop_link = False
2201 if self.params.get('writelink', False):
2202 if sys.platform == "darwin": # macOS.
2203 webloc_link = True
2204 elif sys.platform.startswith("linux"):
2205 desktop_link = True
2206 else: # if sys.platform in ['win32', 'cygwin']:
2207 url_link = True
2208 if self.params.get('writeurllink', False):
2209 url_link = True
2210 if self.params.get('writewebloclink', False):
2211 webloc_link = True
2212 if self.params.get('writedesktoplink', False):
2213 desktop_link = True
2214
2215 if url_link or webloc_link or desktop_link:
2216 if 'webpage_url' not in info_dict:
2217 self.report_error('Cannot write internet shortcut file because the "webpage_url" field is missing in the media information')
2218 return
2219 ascii_url = iri_to_uri(info_dict['webpage_url'])
2220
2221 def _write_link_file(extension, template, newline, embed_filename):
2222 linkfn = replace_extension(full_filename, extension, info_dict.get('ext'))
2223 if self.params.get('overwrites', True) and os.path.exists(encodeFilename(linkfn)):
2224 self.to_screen('[info] Internet shortcut is already present')
2225 else:
2226 try:
2227 self.to_screen('[info] Writing internet shortcut to: ' + linkfn)
2228 with io.open(encodeFilename(to_high_limit_path(linkfn)), 'w', encoding='utf-8', newline=newline) as linkfile:
2229 template_vars = {'url': ascii_url}
2230 if embed_filename:
2231 template_vars['filename'] = linkfn[:-(len(extension) + 1)]
2232 linkfile.write(template % template_vars)
2233 except (OSError, IOError):
2234 self.report_error('Cannot write internet shortcut ' + linkfn)
2235 return False
2236 return True
2237
2238 if url_link:
2239 if not _write_link_file('url', DOT_URL_LINK_TEMPLATE, '\r\n', embed_filename=False):
2240 return
2241 if webloc_link:
2242 if not _write_link_file('webloc', DOT_WEBLOC_LINK_TEMPLATE, '\n', embed_filename=False):
2243 return
2244 if desktop_link:
2245 if not _write_link_file('desktop', DOT_DESKTOP_LINK_TEMPLATE, '\n', embed_filename=True):
2246 return
2247
2248 # Download
2249 must_record_download_archive = False
2250 if not self.params.get('skip_download', False):
2251 try:
2252
2253 def existing_file(*filepaths):
2254 ext = info_dict.get('ext')
2255 final_ext = self.params.get('final_ext', ext)
2256 existing_files = []
2257 for file in orderedSet(filepaths):
2258 if final_ext != ext:
2259 converted = replace_extension(file, final_ext, ext)
2260 if os.path.exists(encodeFilename(converted)):
2261 existing_files.append(converted)
2262 if os.path.exists(encodeFilename(file)):
2263 existing_files.append(file)
2264
2265 if not existing_files or self.params.get('overwrites', False):
2266 for file in orderedSet(existing_files):
2267 self.report_file_delete(file)
2268 os.remove(encodeFilename(file))
2269 return None
2270
2271 self.report_file_already_downloaded(existing_files[0])
2272 info_dict['ext'] = os.path.splitext(existing_files[0])[1][1:]
2273 return existing_files[0]
2274
2275 success = True
2276 if info_dict.get('requested_formats') is not None:
2277 downloaded = []
2278 merger = FFmpegMergerPP(self)
2279 if not merger.available:
2280 postprocessors = []
2281 self.report_warning('You have requested multiple '
2282 'formats but ffmpeg is not installed.'
2283 ' The formats won\'t be merged.')
2284 else:
2285 postprocessors = [merger]
2286
2287 def compatible_formats(formats):
2288 # TODO: some formats actually allow this (mkv, webm, ogg, mp4), but not all of them.
2289 video_formats = [format for format in formats if format.get('vcodec') != 'none']
2290 audio_formats = [format for format in formats if format.get('acodec') != 'none']
2291 if len(video_formats) > 2 or len(audio_formats) > 2:
2292 return False
2293
2294 # Check extension
2295 exts = set(format.get('ext') for format in formats)
2296 COMPATIBLE_EXTS = (
2297 set(('mp3', 'mp4', 'm4a', 'm4p', 'm4b', 'm4r', 'm4v', 'ismv', 'isma')),
2298 set(('webm',)),
2299 )
2300 for ext_sets in COMPATIBLE_EXTS:
2301 if ext_sets.issuperset(exts):
2302 return True
2303 # TODO: Check acodec/vcodec
2304 return False
2305
2306 requested_formats = info_dict['requested_formats']
2307 old_ext = info_dict['ext']
2308 if self.params.get('merge_output_format') is None and not compatible_formats(requested_formats):
2309 info_dict['ext'] = 'mkv'
2310 self.report_warning(
2311 'Requested formats are incompatible for merge and will be merged into mkv.')
2312
2313 def correct_ext(filename):
2314 filename_real_ext = os.path.splitext(filename)[1][1:]
2315 filename_wo_ext = (
2316 os.path.splitext(filename)[0]
2317 if filename_real_ext == old_ext
2318 else filename)
2319 return '%s.%s' % (filename_wo_ext, info_dict['ext'])
2320
2321 # Ensure filename always has a correct extension for successful merge
2322 full_filename = correct_ext(full_filename)
2323 temp_filename = correct_ext(temp_filename)
2324 dl_filename = existing_file(full_filename, temp_filename)
2325 if dl_filename is None:
2326 for f in requested_formats:
2327 new_info = dict(info_dict)
2328 new_info.update(f)
2329 fname = prepend_extension(
2330 self.prepare_filepath(self.prepare_filename(new_info), 'temp'),
2331 'f%s' % f['format_id'], new_info['ext'])
2332 if not ensure_dir_exists(fname):
2333 return
2334 downloaded.append(fname)
2335 partial_success, real_download = dl(fname, new_info)
2336 success = success and partial_success
2337 info_dict['__postprocessors'] = postprocessors
2338 info_dict['__files_to_merge'] = downloaded
2339 # Even if there were no downloads, it is being merged only now
2340 info_dict['__real_download'] = True
2341 else:
2342 # Just a single file
2343 dl_filename = existing_file(full_filename, temp_filename)
2344 if dl_filename is None:
2345 success, real_download = dl(temp_filename, info_dict)
2346 info_dict['__real_download'] = real_download
2347
2348 dl_filename = dl_filename or temp_filename
2349 info_dict['__finaldir'] = os.path.dirname(os.path.abspath(encodeFilename(full_filename)))
2350
2351 except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
2352 self.report_error('unable to download video data: %s' % error_to_compat_str(err))
2353 return
2354 except (OSError, IOError) as err:
2355 raise UnavailableVideoError(err)
2356 except (ContentTooShortError, ) as err:
2357 self.report_error('content too short (expected %s bytes and served %s)' % (err.expected, err.downloaded))
2358 return
2359
2360 if success and filename != '-':
2361 # Fixup content
2362 fixup_policy = self.params.get('fixup')
2363 if fixup_policy is None:
2364 fixup_policy = 'detect_or_warn'
2365
2366 INSTALL_FFMPEG_MESSAGE = 'Install ffmpeg to fix this automatically.'
2367
2368 stretched_ratio = info_dict.get('stretched_ratio')
2369 if stretched_ratio is not None and stretched_ratio != 1:
2370 if fixup_policy == 'warn':
2371 self.report_warning('%s: Non-uniform pixel ratio (%s)' % (
2372 info_dict['id'], stretched_ratio))
2373 elif fixup_policy == 'detect_or_warn':
2374 stretched_pp = FFmpegFixupStretchedPP(self)
2375 if stretched_pp.available:
2376 info_dict['__postprocessors'].append(stretched_pp)
2377 else:
2378 self.report_warning(
2379 '%s: Non-uniform pixel ratio (%s). %s'
2380 % (info_dict['id'], stretched_ratio, INSTALL_FFMPEG_MESSAGE))
2381 else:
2382 assert fixup_policy in ('ignore', 'never')
2383
2384 if (info_dict.get('requested_formats') is None
2385 and info_dict.get('container') == 'm4a_dash'
2386 and info_dict.get('ext') == 'm4a'):
2387 if fixup_policy == 'warn':
2388 self.report_warning(
2389 '%s: writing DASH m4a. '
2390 'Only some players support this container.'
2391 % info_dict['id'])
2392 elif fixup_policy == 'detect_or_warn':
2393 fixup_pp = FFmpegFixupM4aPP(self)
2394 if fixup_pp.available:
2395 info_dict['__postprocessors'].append(fixup_pp)
2396 else:
2397 self.report_warning(
2398 '%s: writing DASH m4a. '
2399 'Only some players support this container. %s'
2400 % (info_dict['id'], INSTALL_FFMPEG_MESSAGE))
2401 else:
2402 assert fixup_policy in ('ignore', 'never')
2403
2404 if (info_dict.get('protocol') == 'm3u8_native'
2405 or info_dict.get('protocol') == 'm3u8'
2406 and self.params.get('hls_prefer_native')):
2407 if fixup_policy == 'warn':
2408 self.report_warning('%s: malformed AAC bitstream detected.' % (
2409 info_dict['id']))
2410 elif fixup_policy == 'detect_or_warn':
2411 fixup_pp = FFmpegFixupM3u8PP(self)
2412 if fixup_pp.available:
2413 info_dict['__postprocessors'].append(fixup_pp)
2414 else:
2415 self.report_warning(
2416 '%s: malformed AAC bitstream detected. %s'
2417 % (info_dict['id'], INSTALL_FFMPEG_MESSAGE))
2418 else:
2419 assert fixup_policy in ('ignore', 'never')
2420
2421 try:
2422 self.post_process(dl_filename, info_dict, files_to_move)
2423 except PostProcessingError as err:
2424 self.report_error('Postprocessing: %s' % str(err))
2425 return
2426 try:
2427 for ph in self._post_hooks:
2428 ph(full_filename)
2429 except Exception as err:
2430 self.report_error('post hooks: %s' % str(err))
2431 return
2432 must_record_download_archive = True
2433
2434 if must_record_download_archive or self.params.get('force_write_download_archive', False):
2435 self.record_download_archive(info_dict)
2436 max_downloads = self.params.get('max_downloads')
2437 if max_downloads is not None and self._num_downloads >= int(max_downloads):
2438 raise MaxDownloadsReached()
2439
2440 def download(self, url_list):
2441 """Download a given list of URLs."""
2442 outtmpl = self.params.get('outtmpl', DEFAULT_OUTTMPL)
2443 if (len(url_list) > 1
2444 and outtmpl != '-'
2445 and '%' not in outtmpl
2446 and self.params.get('max_downloads') != 1):
2447 raise SameFileError(outtmpl)
2448
2449 for url in url_list:
2450 try:
2451 # It also downloads the videos
2452 res = self.extract_info(
2453 url, force_generic_extractor=self.params.get('force_generic_extractor', False))
2454 except UnavailableVideoError:
2455 self.report_error('unable to download video')
2456 except MaxDownloadsReached:
2457 self.to_screen('[info] Maximum number of downloaded files reached')
2458 raise
2459 except ExistingVideoReached:
2460 self.to_screen('[info] Encountered a file that is already in the archive, stopping due to --break-on-existing')
2461 raise
2462 except RejectedVideoReached:
2463 self.to_screen('[info] Encountered a file that did not match filter, stopping due to --break-on-reject')
2464 raise
2465 else:
2466 if self.params.get('dump_single_json', False):
2467 self.to_stdout(json.dumps(res))
2468
2469 return self._download_retcode
2470
2471 def download_with_info_file(self, info_filename):
2472 with contextlib.closing(fileinput.FileInput(
2473 [info_filename], mode='r',
2474 openhook=fileinput.hook_encoded('utf-8'))) as f:
2475 # FileInput doesn't have a read method, we can't call json.load
2476 info = self.filter_requested_info(json.loads('\n'.join(f)))
2477 try:
2478 self.process_ie_result(info, download=True)
2479 except DownloadError:
2480 webpage_url = info.get('webpage_url')
2481 if webpage_url is not None:
2482 self.report_warning('The info failed to download, trying with "%s"' % webpage_url)
2483 return self.download([webpage_url])
2484 else:
2485 raise
2486 return self._download_retcode
2487
2488 @staticmethod
2489 def filter_requested_info(info_dict):
2490 fields_to_remove = ('requested_formats', 'requested_subtitles')
2491 return dict(
2492 (k, v) for k, v in info_dict.items()
2493 if (k[0] != '_' or k == '_type') and k not in fields_to_remove)
2494
2495 def run_pp(self, pp, infodict, files_to_move={}):
2496 files_to_delete = []
2497 files_to_delete, infodict = pp.run(infodict)
2498 if not files_to_delete:
2499 return files_to_move, infodict
2500
2501 if self.params.get('keepvideo', False):
2502 for f in files_to_delete:
2503 files_to_move.setdefault(f, '')
2504 else:
2505 for old_filename in set(files_to_delete):
2506 self.to_screen('Deleting original file %s (pass -k to keep)' % old_filename)
2507 try:
2508 os.remove(encodeFilename(old_filename))
2509 except (IOError, OSError):
2510 self.report_warning('Unable to remove downloaded original file')
2511 if old_filename in files_to_move:
2512 del files_to_move[old_filename]
2513 return files_to_move, infodict
2514
2515 def pre_process(self, ie_info):
2516 info = dict(ie_info)
2517 for pp in self._pps['beforedl']:
2518 info = self.run_pp(pp, info)[1]
2519 return info
2520
2521 def post_process(self, filename, ie_info, files_to_move={}):
2522 """Run all the postprocessors on the given file."""
2523 info = dict(ie_info)
2524 info['filepath'] = filename
2525
2526 for pp in ie_info.get('__postprocessors', []) + self._pps['normal']:
2527 files_to_move, info = self.run_pp(pp, info, files_to_move)
2528 info = self.run_pp(MoveFilesAfterDownloadPP(self, files_to_move), info, files_to_move)[1]
2529 for pp in self._pps['aftermove']:
2530 files_to_move, info = self.run_pp(pp, info, {})
2531
2532 def _make_archive_id(self, info_dict):
2533 video_id = info_dict.get('id')
2534 if not video_id:
2535 return
2536 # Future-proof against any change in case
2537 # and backwards compatibility with prior versions
2538 extractor = info_dict.get('extractor_key') or info_dict.get('ie_key') # key in a playlist
2539 if extractor is None:
2540 url = str_or_none(info_dict.get('url'))
2541 if not url:
2542 return
2543 # Try to find matching extractor for the URL and take its ie_key
2544 for ie in self._ies:
2545 if ie.suitable(url):
2546 extractor = ie.ie_key()
2547 break
2548 else:
2549 return
2550 return '%s %s' % (extractor.lower(), video_id)
2551
2552 def in_download_archive(self, info_dict):
2553 fn = self.params.get('download_archive')
2554 if fn is None:
2555 return False
2556
2557 vid_id = self._make_archive_id(info_dict)
2558 if not vid_id:
2559 return False # Incomplete video information
2560
2561 return vid_id in self.archive
2562
2563 def record_download_archive(self, info_dict):
2564 fn = self.params.get('download_archive')
2565 if fn is None:
2566 return
2567 vid_id = self._make_archive_id(info_dict)
2568 assert vid_id
2569 with locked_file(fn, 'a', encoding='utf-8') as archive_file:
2570 archive_file.write(vid_id + '\n')
2571 self.archive.add(vid_id)
2572
2573 @staticmethod
2574 def format_resolution(format, default='unknown'):
2575 if format.get('vcodec') == 'none':
2576 return 'audio only'
2577 if format.get('resolution') is not None:
2578 return format['resolution']
2579 if format.get('height') is not None:
2580 if format.get('width') is not None:
2581 res = '%sx%s' % (format['width'], format['height'])
2582 else:
2583 res = '%sp' % format['height']
2584 elif format.get('width') is not None:
2585 res = '%dx?' % format['width']
2586 else:
2587 res = default
2588 return res
2589
2590 def _format_note(self, fdict):
2591 res = ''
2592 if fdict.get('ext') in ['f4f', 'f4m']:
2593 res += '(unsupported) '
2594 if fdict.get('language'):
2595 if res:
2596 res += ' '
2597 res += '[%s] ' % fdict['language']
2598 if fdict.get('format_note') is not None:
2599 res += fdict['format_note'] + ' '
2600 if fdict.get('tbr') is not None:
2601 res += '%4dk ' % fdict['tbr']
2602 if fdict.get('container') is not None:
2603 if res:
2604 res += ', '
2605 res += '%s container' % fdict['container']
2606 if (fdict.get('vcodec') is not None
2607 and fdict.get('vcodec') != 'none'):
2608 if res:
2609 res += ', '
2610 res += fdict['vcodec']
2611 if fdict.get('vbr') is not None:
2612 res += '@'
2613 elif fdict.get('vbr') is not None and fdict.get('abr') is not None:
2614 res += 'video@'
2615 if fdict.get('vbr') is not None:
2616 res += '%4dk' % fdict['vbr']
2617 if fdict.get('fps') is not None:
2618 if res:
2619 res += ', '
2620 res += '%sfps' % fdict['fps']
2621 if fdict.get('acodec') is not None:
2622 if res:
2623 res += ', '
2624 if fdict['acodec'] == 'none':
2625 res += 'video only'
2626 else:
2627 res += '%-5s' % fdict['acodec']
2628 elif fdict.get('abr') is not None:
2629 if res:
2630 res += ', '
2631 res += 'audio'
2632 if fdict.get('abr') is not None:
2633 res += '@%3dk' % fdict['abr']
2634 if fdict.get('asr') is not None:
2635 res += ' (%5dHz)' % fdict['asr']
2636 if fdict.get('filesize') is not None:
2637 if res:
2638 res += ', '
2639 res += format_bytes(fdict['filesize'])
2640 elif fdict.get('filesize_approx') is not None:
2641 if res:
2642 res += ', '
2643 res += '~' + format_bytes(fdict['filesize_approx'])
2644 return res
2645
2646 def _format_note_table(self, f):
2647 def join_fields(*vargs):
2648 return ', '.join((val for val in vargs if val != ''))
2649
2650 return join_fields(
2651 'UNSUPPORTED' if f.get('ext') in ('f4f', 'f4m') else '',
2652 format_field(f, 'language', '[%s]'),
2653 format_field(f, 'format_note'),
2654 format_field(f, 'container', ignore=(None, f.get('ext'))),
2655 format_field(f, 'asr', '%5dHz'))
2656
2657 def list_formats(self, info_dict):
2658 formats = info_dict.get('formats', [info_dict])
2659 new_format = self.params.get('listformats_table', False)
2660 if new_format:
2661 table = [
2662 [
2663 format_field(f, 'format_id'),
2664 format_field(f, 'ext'),
2665 self.format_resolution(f),
2666 format_field(f, 'fps', '%d'),
2667 '|',
2668 format_field(f, 'filesize', ' %s', func=format_bytes) + format_field(f, 'filesize_approx', '~%s', func=format_bytes),
2669 format_field(f, 'tbr', '%4dk'),
2670 f.get('protocol').replace('http_dash_segments', 'dash').replace("native", "n"),
2671 '|',
2672 format_field(f, 'vcodec', default='unknown').replace('none', ''),
2673 format_field(f, 'vbr', '%4dk'),
2674 format_field(f, 'acodec', default='unknown').replace('none', ''),
2675 format_field(f, 'abr', '%3dk'),
2676 format_field(f, 'asr', '%5dHz'),
2677 self._format_note_table(f)]
2678 for f in formats
2679 if f.get('preference') is None or f['preference'] >= -1000]
2680 header_line = ['ID', 'EXT', 'RESOLUTION', 'FPS', '|', ' FILESIZE', ' TBR', 'PROTO',
2681 '|', 'VCODEC', ' VBR', 'ACODEC', ' ABR', ' ASR', 'NOTE']
2682 else:
2683 table = [
2684 [
2685 format_field(f, 'format_id'),
2686 format_field(f, 'ext'),
2687 self.format_resolution(f),
2688 self._format_note(f)]
2689 for f in formats
2690 if f.get('preference') is None or f['preference'] >= -1000]
2691 header_line = ['format code', 'extension', 'resolution', 'note']
2692
2693 # if len(formats) > 1:
2694 # table[-1][-1] += (' ' if table[-1][-1] else '') + '(best)'
2695 self.to_screen(
2696 '[info] Available formats for %s:\n%s' % (info_dict['id'], render_table(
2697 header_line,
2698 table,
2699 delim=new_format,
2700 extraGap=(0 if new_format else 1),
2701 hideEmpty=new_format)))
2702
2703 def list_thumbnails(self, info_dict):
2704 thumbnails = info_dict.get('thumbnails')
2705 if not thumbnails:
2706 self.to_screen('[info] No thumbnails present for %s' % info_dict['id'])
2707 return
2708
2709 self.to_screen(
2710 '[info] Thumbnails for %s:' % info_dict['id'])
2711 self.to_screen(render_table(
2712 ['ID', 'width', 'height', 'URL'],
2713 [[t['id'], t.get('width', 'unknown'), t.get('height', 'unknown'), t['url']] for t in thumbnails]))
2714
2715 def list_subtitles(self, video_id, subtitles, name='subtitles'):
2716 if not subtitles:
2717 self.to_screen('%s has no %s' % (video_id, name))
2718 return
2719 self.to_screen(
2720 'Available %s for %s:' % (name, video_id))
2721 self.to_screen(render_table(
2722 ['Language', 'formats'],
2723 [[lang, ', '.join(f['ext'] for f in reversed(formats))]
2724 for lang, formats in subtitles.items()]))
2725
2726 def urlopen(self, req):
2727 """ Start an HTTP download """
2728 if isinstance(req, compat_basestring):
2729 req = sanitized_Request(req)
2730 return self._opener.open(req, timeout=self._socket_timeout)
2731
2732 def print_debug_header(self):
2733 if not self.params.get('verbose'):
2734 return
2735
2736 if type('') is not compat_str:
2737 # Python 2.6 on SLES11 SP1 (https://github.com/ytdl-org/youtube-dl/issues/3326)
2738 self.report_warning(
2739 'Your Python is broken! Update to a newer and supported version')
2740
2741 stdout_encoding = getattr(
2742 sys.stdout, 'encoding', 'missing (%s)' % type(sys.stdout).__name__)
2743 encoding_str = (
2744 '[debug] Encodings: locale %s, fs %s, out %s, pref %s\n' % (
2745 locale.getpreferredencoding(),
2746 sys.getfilesystemencoding(),
2747 stdout_encoding,
2748 self.get_encoding()))
2749 write_string(encoding_str, encoding=None)
2750
2751 self._write_string('[debug] yt-dlp version %s\n' % __version__)
2752 if _LAZY_LOADER:
2753 self._write_string('[debug] Lazy loading extractors enabled\n')
2754 if _PLUGIN_CLASSES:
2755 self._write_string(
2756 '[debug] Plugin Extractors: %s\n' % [ie.ie_key() for ie in _PLUGIN_CLASSES])
2757 try:
2758 sp = subprocess.Popen(
2759 ['git', 'rev-parse', '--short', 'HEAD'],
2760 stdout=subprocess.PIPE, stderr=subprocess.PIPE,
2761 cwd=os.path.dirname(os.path.abspath(__file__)))
2762 out, err = process_communicate_or_kill(sp)
2763 out = out.decode().strip()
2764 if re.match('[0-9a-f]+', out):
2765 self._write_string('[debug] Git HEAD: %s\n' % out)
2766 except Exception:
2767 try:
2768 sys.exc_clear()
2769 except Exception:
2770 pass
2771
2772 def python_implementation():
2773 impl_name = platform.python_implementation()
2774 if impl_name == 'PyPy' and hasattr(sys, 'pypy_version_info'):
2775 return impl_name + ' version %d.%d.%d' % sys.pypy_version_info[:3]
2776 return impl_name
2777
2778 self._write_string('[debug] Python version %s (%s) - %s\n' % (
2779 platform.python_version(), python_implementation(),
2780 platform_name()))
2781
2782 exe_versions = FFmpegPostProcessor.get_versions(self)
2783 exe_versions['rtmpdump'] = rtmpdump_version()
2784 exe_versions['phantomjs'] = PhantomJSwrapper._version()
2785 exe_str = ', '.join(
2786 '%s %s' % (exe, v)
2787 for exe, v in sorted(exe_versions.items())
2788 if v
2789 )
2790 if not exe_str:
2791 exe_str = 'none'
2792 self._write_string('[debug] exe versions: %s\n' % exe_str)
2793
2794 proxy_map = {}
2795 for handler in self._opener.handlers:
2796 if hasattr(handler, 'proxies'):
2797 proxy_map.update(handler.proxies)
2798 self._write_string('[debug] Proxy map: ' + compat_str(proxy_map) + '\n')
2799
2800 if self.params.get('call_home', False):
2801 ipaddr = self.urlopen('https://yt-dl.org/ip').read().decode('utf-8')
2802 self._write_string('[debug] Public IP address: %s\n' % ipaddr)
2803 return
2804 latest_version = self.urlopen(
2805 'https://yt-dl.org/latest/version').read().decode('utf-8')
2806 if version_tuple(latest_version) > version_tuple(__version__):
2807 self.report_warning(
2808 'You are using an outdated version (newest version: %s)! '
2809 'See https://yt-dl.org/update if you need help updating.' %
2810 latest_version)
2811
2812 def _setup_opener(self):
2813 timeout_val = self.params.get('socket_timeout')
2814 self._socket_timeout = 600 if timeout_val is None else float(timeout_val)
2815
2816 opts_cookiefile = self.params.get('cookiefile')
2817 opts_proxy = self.params.get('proxy')
2818
2819 if opts_cookiefile is None:
2820 self.cookiejar = compat_cookiejar.CookieJar()
2821 else:
2822 opts_cookiefile = expand_path(opts_cookiefile)
2823 self.cookiejar = YoutubeDLCookieJar(opts_cookiefile)
2824 if os.access(opts_cookiefile, os.R_OK):
2825 self.cookiejar.load(ignore_discard=True, ignore_expires=True)
2826
2827 cookie_processor = YoutubeDLCookieProcessor(self.cookiejar)
2828 if opts_proxy is not None:
2829 if opts_proxy == '':
2830 proxies = {}
2831 else:
2832 proxies = {'http': opts_proxy, 'https': opts_proxy}
2833 else:
2834 proxies = compat_urllib_request.getproxies()
2835 # Set HTTPS proxy to HTTP one if given (https://github.com/ytdl-org/youtube-dl/issues/805)
2836 if 'http' in proxies and 'https' not in proxies:
2837 proxies['https'] = proxies['http']
2838 proxy_handler = PerRequestProxyHandler(proxies)
2839
2840 debuglevel = 1 if self.params.get('debug_printtraffic') else 0
2841 https_handler = make_HTTPS_handler(self.params, debuglevel=debuglevel)
2842 ydlh = YoutubeDLHandler(self.params, debuglevel=debuglevel)
2843 redirect_handler = YoutubeDLRedirectHandler()
2844 data_handler = compat_urllib_request_DataHandler()
2845
2846 # When passing our own FileHandler instance, build_opener won't add the
2847 # default FileHandler and allows us to disable the file protocol, which
2848 # can be used for malicious purposes (see
2849 # https://github.com/ytdl-org/youtube-dl/issues/8227)
2850 file_handler = compat_urllib_request.FileHandler()
2851
2852 def file_open(*args, **kwargs):
2853 raise compat_urllib_error.URLError('file:// scheme is explicitly disabled in youtube-dlc for security reasons')
2854 file_handler.file_open = file_open
2855
2856 opener = compat_urllib_request.build_opener(
2857 proxy_handler, https_handler, cookie_processor, ydlh, redirect_handler, data_handler, file_handler)
2858
2859 # Delete the default user-agent header, which would otherwise apply in
2860 # cases where our custom HTTP handler doesn't come into play
2861 # (See https://github.com/ytdl-org/youtube-dl/issues/1309 for details)
2862 opener.addheaders = []
2863 self._opener = opener
2864
2865 def encode(self, s):
2866 if isinstance(s, bytes):
2867 return s # Already encoded
2868
2869 try:
2870 return s.encode(self.get_encoding())
2871 except UnicodeEncodeError as err:
2872 err.reason = err.reason + '. Check your system encoding configuration or use the --encoding option.'
2873 raise
2874
2875 def get_encoding(self):
2876 encoding = self.params.get('encoding')
2877 if encoding is None:
2878 encoding = preferredencoding()
2879 return encoding
2880
2881 def _write_thumbnails(self, info_dict, filename):
2882 if self.params.get('writethumbnail', False):
2883 thumbnails = info_dict.get('thumbnails')
2884 if thumbnails:
2885 thumbnails = [thumbnails[-1]]
2886 elif self.params.get('write_all_thumbnails', False):
2887 thumbnails = info_dict.get('thumbnails') or []
2888 else:
2889 thumbnails = []
2890
2891 ret = []
2892 for t in thumbnails:
2893 thumb_ext = determine_ext(t['url'], 'jpg')
2894 suffix = '_%s' % t['id'] if len(thumbnails) > 1 else ''
2895 thumb_display_id = '%s ' % t['id'] if len(thumbnails) > 1 else ''
2896 t['filename'] = thumb_filename = replace_extension(filename + suffix, thumb_ext, info_dict.get('ext'))
2897
2898 if not self.params.get('overwrites', True) and os.path.exists(encodeFilename(thumb_filename)):
2899 ret.append(thumb_filename)
2900 self.to_screen('[%s] %s: Thumbnail %sis already present' %
2901 (info_dict['extractor'], info_dict['id'], thumb_display_id))
2902 else:
2903 self.to_screen('[%s] %s: Downloading thumbnail %s...' %
2904 (info_dict['extractor'], info_dict['id'], thumb_display_id))
2905 try:
2906 uf = self.urlopen(t['url'])
2907 with open(encodeFilename(thumb_filename), 'wb') as thumbf:
2908 shutil.copyfileobj(uf, thumbf)
2909 ret.append(thumb_filename)
2910 self.to_screen('[%s] %s: Writing thumbnail %sto: %s' %
2911 (info_dict['extractor'], info_dict['id'], thumb_display_id, thumb_filename))
2912 except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
2913 self.report_warning('Unable to download thumbnail "%s": %s' %
2914 (t['url'], error_to_compat_str(err)))
2915 return ret