]> jfr.im git - yt-dlp.git/blob - youtube_dlc/YoutubeDL.py
Multiple output templates for different file types
[yt-dlp.git] / youtube_dlc / YoutubeDL.py
1 #!/usr/bin/env python
2 # coding: utf-8
3
4 from __future__ import absolute_import, unicode_literals
5
6 import collections
7 import contextlib
8 import copy
9 import datetime
10 import errno
11 import fileinput
12 import io
13 import itertools
14 import json
15 import locale
16 import operator
17 import os
18 import platform
19 import re
20 import shutil
21 import subprocess
22 import socket
23 import sys
24 import time
25 import tokenize
26 import traceback
27 import random
28
29 from string import ascii_letters
30
31 from .compat import (
32 compat_basestring,
33 compat_cookiejar,
34 compat_get_terminal_size,
35 compat_http_client,
36 compat_kwargs,
37 compat_numeric_types,
38 compat_os_name,
39 compat_str,
40 compat_tokenize_tokenize,
41 compat_urllib_error,
42 compat_urllib_request,
43 compat_urllib_request_DataHandler,
44 )
45 from .utils import (
46 age_restricted,
47 args_to_str,
48 ContentTooShortError,
49 date_from_str,
50 DateRange,
51 DEFAULT_OUTTMPL,
52 OUTTMPL_TYPES,
53 determine_ext,
54 determine_protocol,
55 DOT_DESKTOP_LINK_TEMPLATE,
56 DOT_URL_LINK_TEMPLATE,
57 DOT_WEBLOC_LINK_TEMPLATE,
58 DownloadError,
59 encode_compat_str,
60 encodeFilename,
61 error_to_compat_str,
62 ExistingVideoReached,
63 expand_path,
64 ExtractorError,
65 float_or_none,
66 format_bytes,
67 format_field,
68 formatSeconds,
69 GeoRestrictedError,
70 int_or_none,
71 iri_to_uri,
72 ISO3166Utils,
73 locked_file,
74 make_dir,
75 make_HTTPS_handler,
76 MaxDownloadsReached,
77 orderedSet,
78 PagedList,
79 parse_filesize,
80 PerRequestProxyHandler,
81 platform_name,
82 PostProcessingError,
83 preferredencoding,
84 prepend_extension,
85 register_socks_protocols,
86 render_table,
87 replace_extension,
88 RejectedVideoReached,
89 SameFileError,
90 sanitize_filename,
91 sanitize_path,
92 sanitize_url,
93 sanitized_Request,
94 std_headers,
95 str_or_none,
96 strftime_or_none,
97 subtitles_filename,
98 to_high_limit_path,
99 UnavailableVideoError,
100 url_basename,
101 version_tuple,
102 write_json_file,
103 write_string,
104 YoutubeDLCookieJar,
105 YoutubeDLCookieProcessor,
106 YoutubeDLHandler,
107 YoutubeDLRedirectHandler,
108 process_communicate_or_kill,
109 )
110 from .cache import Cache
111 from .extractor import get_info_extractor, gen_extractor_classes, _LAZY_LOADER, _PLUGIN_CLASSES
112 from .extractor.openload import PhantomJSwrapper
113 from .downloader import get_suitable_downloader
114 from .downloader.rtmp import rtmpdump_version
115 from .postprocessor import (
116 FFmpegFixupM3u8PP,
117 FFmpegFixupM4aPP,
118 FFmpegFixupStretchedPP,
119 FFmpegMergerPP,
120 FFmpegPostProcessor,
121 # FFmpegSubtitlesConvertorPP,
122 get_postprocessor,
123 MoveFilesAfterDownloadPP,
124 )
125 from .version import __version__
126
127 if compat_os_name == 'nt':
128 import ctypes
129
130
131 class YoutubeDL(object):
132 """YoutubeDL class.
133
134 YoutubeDL objects are the ones responsible of downloading the
135 actual video file and writing it to disk if the user has requested
136 it, among some other tasks. In most cases there should be one per
137 program. As, given a video URL, the downloader doesn't know how to
138 extract all the needed information, task that InfoExtractors do, it
139 has to pass the URL to one of them.
140
141 For this, YoutubeDL objects have a method that allows
142 InfoExtractors to be registered in a given order. When it is passed
143 a URL, the YoutubeDL object handles it to the first InfoExtractor it
144 finds that reports being able to handle it. The InfoExtractor extracts
145 all the information about the video or videos the URL refers to, and
146 YoutubeDL process the extracted information, possibly using a File
147 Downloader to download the video.
148
149 YoutubeDL objects accept a lot of parameters. In order not to saturate
150 the object constructor with arguments, it receives a dictionary of
151 options instead. These options are available through the params
152 attribute for the InfoExtractors to use. The YoutubeDL also
153 registers itself as the downloader in charge for the InfoExtractors
154 that are added to it, so this is a "mutual registration".
155
156 Available options:
157
158 username: Username for authentication purposes.
159 password: Password for authentication purposes.
160 videopassword: Password for accessing a video.
161 ap_mso: Adobe Pass multiple-system operator identifier.
162 ap_username: Multiple-system operator account username.
163 ap_password: Multiple-system operator account password.
164 usenetrc: Use netrc for authentication instead.
165 verbose: Print additional info to stdout.
166 quiet: Do not print messages to stdout.
167 no_warnings: Do not print out anything for warnings.
168 forceurl: Force printing final URL.
169 forcetitle: Force printing title.
170 forceid: Force printing ID.
171 forcethumbnail: Force printing thumbnail URL.
172 forcedescription: Force printing description.
173 forcefilename: Force printing final filename.
174 forceduration: Force printing duration.
175 forcejson: Force printing info_dict as JSON.
176 dump_single_json: Force printing the info_dict of the whole playlist
177 (or video) as a single JSON line.
178 force_write_download_archive: Force writing download archive regardless of
179 'skip_download' or 'simulate'.
180 simulate: Do not download the video files.
181 format: Video format code. see "FORMAT SELECTION" for more details.
182 format_sort: How to sort the video formats. see "Sorting Formats" for more details.
183 format_sort_force: Force the given format_sort. see "Sorting Formats" for more details.
184 allow_multiple_video_streams: Allow multiple video streams to be merged into a single file
185 allow_multiple_audio_streams: Allow multiple audio streams to be merged into a single file
186 outtmpl: Dictionary of templates for output names. Allowed keys
187 are 'default' and the keys of OUTTMPL_TYPES (in utils.py)
188 outtmpl_na_placeholder: Placeholder for unavailable meta fields.
189 restrictfilenames: Do not allow "&" and spaces in file names
190 trim_file_name: Limit length of filename (extension excluded)
191 ignoreerrors: Do not stop on download errors
192 (Default True when running youtube-dlc,
193 but False when directly accessing YoutubeDL class)
194 force_generic_extractor: Force downloader to use the generic extractor
195 overwrites: Overwrite all video and metadata files if True,
196 overwrite only non-video files if None
197 and don't overwrite any file if False
198 playliststart: Playlist item to start at.
199 playlistend: Playlist item to end at.
200 playlist_items: Specific indices of playlist to download.
201 playlistreverse: Download playlist items in reverse order.
202 playlistrandom: Download playlist items in random order.
203 matchtitle: Download only matching titles.
204 rejecttitle: Reject downloads for matching titles.
205 logger: Log messages to a logging.Logger instance.
206 logtostderr: Log messages to stderr instead of stdout.
207 writedescription: Write the video description to a .description file
208 writeinfojson: Write the video description to a .info.json file
209 writecomments: Extract video comments. This will not be written to disk
210 unless writeinfojson is also given
211 writeannotations: Write the video annotations to a .annotations.xml file
212 writethumbnail: Write the thumbnail image to a file
213 allow_playlist_files: Also write playlists' description, infojson etc in a seperate file
214 write_all_thumbnails: Write all thumbnail formats to files
215 writelink: Write an internet shortcut file, depending on the
216 current platform (.url/.webloc/.desktop)
217 writeurllink: Write a Windows internet shortcut file (.url)
218 writewebloclink: Write a macOS internet shortcut file (.webloc)
219 writedesktoplink: Write a Linux internet shortcut file (.desktop)
220 writesubtitles: Write the video subtitles to a file
221 writeautomaticsub: Write the automatically generated subtitles to a file
222 allsubtitles: Downloads all the subtitles of the video
223 (requires writesubtitles or writeautomaticsub)
224 listsubtitles: Lists all available subtitles for the video
225 subtitlesformat: The format code for subtitles
226 subtitleslangs: List of languages of the subtitles to download
227 keepvideo: Keep the video file after post-processing
228 daterange: A DateRange object, download only if the upload_date is in the range.
229 skip_download: Skip the actual download of the video file
230 cachedir: Location of the cache files in the filesystem.
231 False to disable filesystem cache.
232 noplaylist: Download single video instead of a playlist if in doubt.
233 age_limit: An integer representing the user's age in years.
234 Unsuitable videos for the given age are skipped.
235 min_views: An integer representing the minimum view count the video
236 must have in order to not be skipped.
237 Videos without view count information are always
238 downloaded. None for no limit.
239 max_views: An integer representing the maximum view count.
240 Videos that are more popular than that are not
241 downloaded.
242 Videos without view count information are always
243 downloaded. None for no limit.
244 download_archive: File name of a file where all downloads are recorded.
245 Videos already present in the file are not downloaded
246 again.
247 break_on_existing: Stop the download process after attempting to download a
248 file that is in the archive.
249 break_on_reject: Stop the download process when encountering a video that
250 has been filtered out.
251 cookiefile: File name where cookies should be read from and dumped to
252 nocheckcertificate:Do not verify SSL certificates
253 prefer_insecure: Use HTTP instead of HTTPS to retrieve information.
254 At the moment, this is only supported by YouTube.
255 proxy: URL of the proxy server to use
256 geo_verification_proxy: URL of the proxy to use for IP address verification
257 on geo-restricted sites.
258 socket_timeout: Time to wait for unresponsive hosts, in seconds
259 bidi_workaround: Work around buggy terminals without bidirectional text
260 support, using fridibi
261 debug_printtraffic:Print out sent and received HTTP traffic
262 include_ads: Download ads as well
263 default_search: Prepend this string if an input url is not valid.
264 'auto' for elaborate guessing
265 encoding: Use this encoding instead of the system-specified.
266 extract_flat: Do not resolve URLs, return the immediate result.
267 Pass in 'in_playlist' to only show this behavior for
268 playlist items.
269 postprocessors: A list of dictionaries, each with an entry
270 * key: The name of the postprocessor. See
271 youtube_dlc/postprocessor/__init__.py for a list.
272 * _after_move: Optional. If True, run this post_processor
273 after 'MoveFilesAfterDownload'
274 as well as any further keyword arguments for the
275 postprocessor.
276 post_hooks: A list of functions that get called as the final step
277 for each video file, after all postprocessors have been
278 called. The filename will be passed as the only argument.
279 progress_hooks: A list of functions that get called on download
280 progress, with a dictionary with the entries
281 * status: One of "downloading", "error", or "finished".
282 Check this first and ignore unknown values.
283
284 If status is one of "downloading", or "finished", the
285 following properties may also be present:
286 * filename: The final filename (always present)
287 * tmpfilename: The filename we're currently writing to
288 * downloaded_bytes: Bytes on disk
289 * total_bytes: Size of the whole file, None if unknown
290 * total_bytes_estimate: Guess of the eventual file size,
291 None if unavailable.
292 * elapsed: The number of seconds since download started.
293 * eta: The estimated time in seconds, None if unknown
294 * speed: The download speed in bytes/second, None if
295 unknown
296 * fragment_index: The counter of the currently
297 downloaded video fragment.
298 * fragment_count: The number of fragments (= individual
299 files that will be merged)
300
301 Progress hooks are guaranteed to be called at least once
302 (with status "finished") if the download is successful.
303 merge_output_format: Extension to use when merging formats.
304 final_ext: Expected final extension; used to detect when the file was
305 already downloaded and converted. "merge_output_format" is
306 replaced by this extension when given
307 fixup: Automatically correct known faults of the file.
308 One of:
309 - "never": do nothing
310 - "warn": only emit a warning
311 - "detect_or_warn": check whether we can do anything
312 about it, warn otherwise (default)
313 source_address: Client-side IP address to bind to.
314 call_home: Boolean, true iff we are allowed to contact the
315 youtube-dlc servers for debugging.
316 sleep_interval: Number of seconds to sleep before each download when
317 used alone or a lower bound of a range for randomized
318 sleep before each download (minimum possible number
319 of seconds to sleep) when used along with
320 max_sleep_interval.
321 max_sleep_interval:Upper bound of a range for randomized sleep before each
322 download (maximum possible number of seconds to sleep).
323 Must only be used along with sleep_interval.
324 Actual sleep time will be a random float from range
325 [sleep_interval; max_sleep_interval].
326 listformats: Print an overview of available video formats and exit.
327 list_thumbnails: Print a table of all thumbnails and exit.
328 match_filter: A function that gets called with the info_dict of
329 every video.
330 If it returns a message, the video is ignored.
331 If it returns None, the video is downloaded.
332 match_filter_func in utils.py is one example for this.
333 no_color: Do not emit color codes in output.
334 geo_bypass: Bypass geographic restriction via faking X-Forwarded-For
335 HTTP header
336 geo_bypass_country:
337 Two-letter ISO 3166-2 country code that will be used for
338 explicit geographic restriction bypassing via faking
339 X-Forwarded-For HTTP header
340 geo_bypass_ip_block:
341 IP range in CIDR notation that will be used similarly to
342 geo_bypass_country
343
344 The following options determine which downloader is picked:
345 external_downloader: Executable of the external downloader to call.
346 None or unset for standard (built-in) downloader.
347 hls_prefer_native: Use the native HLS downloader instead of ffmpeg/avconv
348 if True, otherwise use ffmpeg/avconv if False, otherwise
349 use downloader suggested by extractor if None.
350
351 The following parameters are not used by YoutubeDL itself, they are used by
352 the downloader (see youtube_dlc/downloader/common.py):
353 nopart, updatetime, buffersize, ratelimit, min_filesize, max_filesize, test,
354 noresizebuffer, retries, continuedl, noprogress, consoletitle,
355 xattr_set_filesize, external_downloader_args, hls_use_mpegts,
356 http_chunk_size.
357
358 The following options are used by the post processors:
359 prefer_ffmpeg: If False, use avconv instead of ffmpeg if both are available,
360 otherwise prefer ffmpeg. (avconv support is deprecated)
361 ffmpeg_location: Location of the ffmpeg/avconv binary; either the path
362 to the binary or its containing directory.
363 postprocessor_args: A dictionary of postprocessor/executable keys (in lower case)
364 and a list of additional command-line arguments for the
365 postprocessor/executable. The dict can also have "PP+EXE" keys
366 which are used when the given exe is used by the given PP.
367 Use 'default' as the name for arguments to passed to all PP
368 The following options are used by the Youtube extractor:
369 youtube_include_dash_manifest: If True (default), DASH manifests and related
370 data will be downloaded and processed by extractor.
371 You can reduce network I/O by disabling it if you don't
372 care about DASH.
373 """
374
375 _NUMERIC_FIELDS = set((
376 'width', 'height', 'tbr', 'abr', 'asr', 'vbr', 'fps', 'filesize', 'filesize_approx',
377 'timestamp', 'upload_year', 'upload_month', 'upload_day',
378 'duration', 'view_count', 'like_count', 'dislike_count', 'repost_count',
379 'average_rating', 'comment_count', 'age_limit',
380 'start_time', 'end_time',
381 'chapter_number', 'season_number', 'episode_number',
382 'track_number', 'disc_number', 'release_year',
383 'playlist_index',
384 ))
385
386 params = None
387 _ies = []
388 _pps = {'beforedl': [], 'aftermove': [], 'normal': []}
389 __prepare_filename_warned = False
390 _download_retcode = None
391 _num_downloads = None
392 _playlist_level = 0
393 _playlist_urls = set()
394 _screen_file = None
395
396 def __init__(self, params=None, auto_init=True):
397 """Create a FileDownloader object with the given options."""
398 if params is None:
399 params = {}
400 self._ies = []
401 self._ies_instances = {}
402 self._pps = {'beforedl': [], 'aftermove': [], 'normal': []}
403 self.__prepare_filename_warned = False
404 self._post_hooks = []
405 self._progress_hooks = []
406 self._download_retcode = 0
407 self._num_downloads = 0
408 self._screen_file = [sys.stdout, sys.stderr][params.get('logtostderr', False)]
409 self._err_file = sys.stderr
410 self.params = {
411 # Default parameters
412 'nocheckcertificate': False,
413 }
414 self.params.update(params)
415 self.cache = Cache(self)
416 self.archive = set()
417
418 """Preload the archive, if any is specified"""
419 def preload_download_archive(self):
420 fn = self.params.get('download_archive')
421 if fn is None:
422 return False
423 try:
424 with locked_file(fn, 'r', encoding='utf-8') as archive_file:
425 for line in archive_file:
426 self.archive.add(line.strip())
427 except IOError as ioe:
428 if ioe.errno != errno.ENOENT:
429 raise
430 return False
431 return True
432
433 def check_deprecated(param, option, suggestion):
434 if self.params.get(param) is not None:
435 self.report_warning(
436 '%s is deprecated. Use %s instead.' % (option, suggestion))
437 return True
438 return False
439
440 if self.params.get('verbose'):
441 self.to_stdout('[debug] Loading archive file %r' % self.params.get('download_archive'))
442
443 preload_download_archive(self)
444
445 if check_deprecated('cn_verification_proxy', '--cn-verification-proxy', '--geo-verification-proxy'):
446 if self.params.get('geo_verification_proxy') is None:
447 self.params['geo_verification_proxy'] = self.params['cn_verification_proxy']
448
449 if self.params.get('final_ext'):
450 if self.params.get('merge_output_format'):
451 self.report_warning('--merge-output-format will be ignored since --remux-video or --recode-video is given')
452 self.params['merge_output_format'] = self.params['final_ext']
453
454 if 'overwrites' in self.params and self.params['overwrites'] is None:
455 del self.params['overwrites']
456
457 check_deprecated('autonumber_size', '--autonumber-size', 'output template with %(autonumber)0Nd, where N in the number of digits')
458 check_deprecated('autonumber', '--auto-number', '-o "%(autonumber)s-%(title)s.%(ext)s"')
459 check_deprecated('usetitle', '--title', '-o "%(title)s-%(id)s.%(ext)s"')
460
461 if params.get('bidi_workaround', False):
462 try:
463 import pty
464 master, slave = pty.openpty()
465 width = compat_get_terminal_size().columns
466 if width is None:
467 width_args = []
468 else:
469 width_args = ['-w', str(width)]
470 sp_kwargs = dict(
471 stdin=subprocess.PIPE,
472 stdout=slave,
473 stderr=self._err_file)
474 try:
475 self._output_process = subprocess.Popen(
476 ['bidiv'] + width_args, **sp_kwargs
477 )
478 except OSError:
479 self._output_process = subprocess.Popen(
480 ['fribidi', '-c', 'UTF-8'] + width_args, **sp_kwargs)
481 self._output_channel = os.fdopen(master, 'rb')
482 except OSError as ose:
483 if ose.errno == errno.ENOENT:
484 self.report_warning('Could not find fribidi executable, ignoring --bidi-workaround . Make sure that fribidi is an executable file in one of the directories in your $PATH.')
485 else:
486 raise
487
488 if (sys.platform != 'win32'
489 and sys.getfilesystemencoding() in ['ascii', 'ANSI_X3.4-1968']
490 and not params.get('restrictfilenames', False)):
491 # Unicode filesystem API will throw errors (#1474, #13027)
492 self.report_warning(
493 'Assuming --restrict-filenames since file system encoding '
494 'cannot encode all characters. '
495 'Set the LC_ALL environment variable to fix this.')
496 self.params['restrictfilenames'] = True
497
498 self.outtmpl_dict = self.parse_outtmpl()
499
500 self._setup_opener()
501
502 if auto_init:
503 self.print_debug_header()
504 self.add_default_info_extractors()
505
506 for pp_def_raw in self.params.get('postprocessors', []):
507 pp_class = get_postprocessor(pp_def_raw['key'])
508 pp_def = dict(pp_def_raw)
509 del pp_def['key']
510 if 'when' in pp_def:
511 when = pp_def['when']
512 del pp_def['when']
513 else:
514 when = 'normal'
515 pp = pp_class(self, **compat_kwargs(pp_def))
516 self.add_post_processor(pp, when=when)
517
518 for ph in self.params.get('post_hooks', []):
519 self.add_post_hook(ph)
520
521 for ph in self.params.get('progress_hooks', []):
522 self.add_progress_hook(ph)
523
524 register_socks_protocols()
525
526 def warn_if_short_id(self, argv):
527 # short YouTube ID starting with dash?
528 idxs = [
529 i for i, a in enumerate(argv)
530 if re.match(r'^-[0-9A-Za-z_-]{10}$', a)]
531 if idxs:
532 correct_argv = (
533 ['youtube-dlc']
534 + [a for i, a in enumerate(argv) if i not in idxs]
535 + ['--'] + [argv[i] for i in idxs]
536 )
537 self.report_warning(
538 'Long argument string detected. '
539 'Use -- to separate parameters and URLs, like this:\n%s\n' %
540 args_to_str(correct_argv))
541
542 def add_info_extractor(self, ie):
543 """Add an InfoExtractor object to the end of the list."""
544 self._ies.append(ie)
545 if not isinstance(ie, type):
546 self._ies_instances[ie.ie_key()] = ie
547 ie.set_downloader(self)
548
549 def get_info_extractor(self, ie_key):
550 """
551 Get an instance of an IE with name ie_key, it will try to get one from
552 the _ies list, if there's no instance it will create a new one and add
553 it to the extractor list.
554 """
555 ie = self._ies_instances.get(ie_key)
556 if ie is None:
557 ie = get_info_extractor(ie_key)()
558 self.add_info_extractor(ie)
559 return ie
560
561 def add_default_info_extractors(self):
562 """
563 Add the InfoExtractors returned by gen_extractors to the end of the list
564 """
565 for ie in gen_extractor_classes():
566 self.add_info_extractor(ie)
567
568 def add_post_processor(self, pp, when='normal'):
569 """Add a PostProcessor object to the end of the chain."""
570 self._pps[when].append(pp)
571 pp.set_downloader(self)
572
573 def add_post_hook(self, ph):
574 """Add the post hook"""
575 self._post_hooks.append(ph)
576
577 def add_progress_hook(self, ph):
578 """Add the progress hook (currently only for the file downloader)"""
579 self._progress_hooks.append(ph)
580
581 def _bidi_workaround(self, message):
582 if not hasattr(self, '_output_channel'):
583 return message
584
585 assert hasattr(self, '_output_process')
586 assert isinstance(message, compat_str)
587 line_count = message.count('\n') + 1
588 self._output_process.stdin.write((message + '\n').encode('utf-8'))
589 self._output_process.stdin.flush()
590 res = ''.join(self._output_channel.readline().decode('utf-8')
591 for _ in range(line_count))
592 return res[:-len('\n')]
593
594 def to_screen(self, message, skip_eol=False):
595 """Print message to stdout if not in quiet mode."""
596 return self.to_stdout(message, skip_eol, check_quiet=True)
597
598 def _write_string(self, s, out=None):
599 write_string(s, out=out, encoding=self.params.get('encoding'))
600
601 def to_stdout(self, message, skip_eol=False, check_quiet=False):
602 """Print message to stdout if not in quiet mode."""
603 if self.params.get('logger'):
604 self.params['logger'].debug(message)
605 elif not check_quiet or not self.params.get('quiet', False):
606 message = self._bidi_workaround(message)
607 terminator = ['\n', ''][skip_eol]
608 output = message + terminator
609
610 self._write_string(output, self._screen_file)
611
612 def to_stderr(self, message):
613 """Print message to stderr."""
614 assert isinstance(message, compat_str)
615 if self.params.get('logger'):
616 self.params['logger'].error(message)
617 else:
618 message = self._bidi_workaround(message)
619 output = message + '\n'
620 self._write_string(output, self._err_file)
621
622 def to_console_title(self, message):
623 if not self.params.get('consoletitle', False):
624 return
625 if compat_os_name == 'nt':
626 if ctypes.windll.kernel32.GetConsoleWindow():
627 # c_wchar_p() might not be necessary if `message` is
628 # already of type unicode()
629 ctypes.windll.kernel32.SetConsoleTitleW(ctypes.c_wchar_p(message))
630 elif 'TERM' in os.environ:
631 self._write_string('\033]0;%s\007' % message, self._screen_file)
632
633 def save_console_title(self):
634 if not self.params.get('consoletitle', False):
635 return
636 if self.params.get('simulate', False):
637 return
638 if compat_os_name != 'nt' and 'TERM' in os.environ:
639 # Save the title on stack
640 self._write_string('\033[22;0t', self._screen_file)
641
642 def restore_console_title(self):
643 if not self.params.get('consoletitle', False):
644 return
645 if self.params.get('simulate', False):
646 return
647 if compat_os_name != 'nt' and 'TERM' in os.environ:
648 # Restore the title from stack
649 self._write_string('\033[23;0t', self._screen_file)
650
651 def __enter__(self):
652 self.save_console_title()
653 return self
654
655 def __exit__(self, *args):
656 self.restore_console_title()
657
658 if self.params.get('cookiefile') is not None:
659 self.cookiejar.save(ignore_discard=True, ignore_expires=True)
660
661 def trouble(self, message=None, tb=None):
662 """Determine action to take when a download problem appears.
663
664 Depending on if the downloader has been configured to ignore
665 download errors or not, this method may throw an exception or
666 not when errors are found, after printing the message.
667
668 tb, if given, is additional traceback information.
669 """
670 if message is not None:
671 self.to_stderr(message)
672 if self.params.get('verbose'):
673 if tb is None:
674 if sys.exc_info()[0]: # if .trouble has been called from an except block
675 tb = ''
676 if hasattr(sys.exc_info()[1], 'exc_info') and sys.exc_info()[1].exc_info[0]:
677 tb += ''.join(traceback.format_exception(*sys.exc_info()[1].exc_info))
678 tb += encode_compat_str(traceback.format_exc())
679 else:
680 tb_data = traceback.format_list(traceback.extract_stack())
681 tb = ''.join(tb_data)
682 self.to_stderr(tb)
683 if not self.params.get('ignoreerrors', False):
684 if sys.exc_info()[0] and hasattr(sys.exc_info()[1], 'exc_info') and sys.exc_info()[1].exc_info[0]:
685 exc_info = sys.exc_info()[1].exc_info
686 else:
687 exc_info = sys.exc_info()
688 raise DownloadError(message, exc_info)
689 self._download_retcode = 1
690
691 def report_warning(self, message):
692 '''
693 Print the message to stderr, it will be prefixed with 'WARNING:'
694 If stderr is a tty file the 'WARNING:' will be colored
695 '''
696 if self.params.get('logger') is not None:
697 self.params['logger'].warning(message)
698 else:
699 if self.params.get('no_warnings'):
700 return
701 if not self.params.get('no_color') and self._err_file.isatty() and compat_os_name != 'nt':
702 _msg_header = '\033[0;33mWARNING:\033[0m'
703 else:
704 _msg_header = 'WARNING:'
705 warning_message = '%s %s' % (_msg_header, message)
706 self.to_stderr(warning_message)
707
708 def report_error(self, message, tb=None):
709 '''
710 Do the same as trouble, but prefixes the message with 'ERROR:', colored
711 in red if stderr is a tty file.
712 '''
713 if not self.params.get('no_color') and self._err_file.isatty() and compat_os_name != 'nt':
714 _msg_header = '\033[0;31mERROR:\033[0m'
715 else:
716 _msg_header = 'ERROR:'
717 error_message = '%s %s' % (_msg_header, message)
718 self.trouble(error_message, tb)
719
720 def report_file_already_downloaded(self, file_name):
721 """Report file has already been fully downloaded."""
722 try:
723 self.to_screen('[download] %s has already been downloaded' % file_name)
724 except UnicodeEncodeError:
725 self.to_screen('[download] The file has already been downloaded')
726
727 def report_file_delete(self, file_name):
728 """Report that existing file will be deleted."""
729 try:
730 self.to_screen('Deleting already existent file %s' % file_name)
731 except UnicodeEncodeError:
732 self.to_screen('Deleting already existent file')
733
734 def parse_outtmpl(self):
735 outtmpl_dict = self.params.get('outtmpl', {})
736 if not isinstance(outtmpl_dict, dict):
737 outtmpl_dict = {'default': outtmpl_dict}
738 outtmpl_dict.update({
739 k: v for k, v in DEFAULT_OUTTMPL.items()
740 if not outtmpl_dict.get(k)})
741 for key, val in outtmpl_dict.items():
742 if isinstance(val, bytes):
743 self.report_warning(
744 'Parameter outtmpl is bytes, but should be a unicode string. '
745 'Put from __future__ import unicode_literals at the top of your code file or consider switching to Python 3.x.')
746 return outtmpl_dict
747
748 def _prepare_filename(self, info_dict, tmpl_type='default'):
749 try:
750 template_dict = dict(info_dict)
751
752 template_dict['duration_string'] = ( # %(duration>%H-%M-%S)s is wrong if duration > 24hrs
753 formatSeconds(info_dict['duration'], '-')
754 if info_dict.get('duration', None) is not None
755 else None)
756
757 template_dict['epoch'] = int(time.time())
758 autonumber_size = self.params.get('autonumber_size')
759 if autonumber_size is None:
760 autonumber_size = 5
761 template_dict['autonumber'] = self.params.get('autonumber_start', 1) - 1 + self._num_downloads
762 if template_dict.get('resolution') is None:
763 if template_dict.get('width') and template_dict.get('height'):
764 template_dict['resolution'] = '%dx%d' % (template_dict['width'], template_dict['height'])
765 elif template_dict.get('height'):
766 template_dict['resolution'] = '%sp' % template_dict['height']
767 elif template_dict.get('width'):
768 template_dict['resolution'] = '%dx?' % template_dict['width']
769
770 sanitize = lambda k, v: sanitize_filename(
771 compat_str(v),
772 restricted=self.params.get('restrictfilenames'),
773 is_id=(k == 'id' or k.endswith('_id')))
774 template_dict = dict((k, v if isinstance(v, compat_numeric_types) else sanitize(k, v))
775 for k, v in template_dict.items()
776 if v is not None and not isinstance(v, (list, tuple, dict)))
777 na = self.params.get('outtmpl_na_placeholder', 'NA')
778 template_dict = collections.defaultdict(lambda: na, template_dict)
779
780 outtmpl = self.outtmpl_dict.get(tmpl_type, self.outtmpl_dict['default'])
781 force_ext = OUTTMPL_TYPES.get(tmpl_type)
782
783 # For fields playlist_index and autonumber convert all occurrences
784 # of %(field)s to %(field)0Nd for backward compatibility
785 field_size_compat_map = {
786 'playlist_index': len(str(template_dict['n_entries'])),
787 'autonumber': autonumber_size,
788 }
789 FIELD_SIZE_COMPAT_RE = r'(?<!%)%\((?P<field>autonumber|playlist_index)\)s'
790 mobj = re.search(FIELD_SIZE_COMPAT_RE, outtmpl)
791 if mobj:
792 outtmpl = re.sub(
793 FIELD_SIZE_COMPAT_RE,
794 r'%%(\1)0%dd' % field_size_compat_map[mobj.group('field')],
795 outtmpl)
796
797 # As of [1] format syntax is:
798 # %[mapping_key][conversion_flags][minimum_width][.precision][length_modifier]type
799 # 1. https://docs.python.org/2/library/stdtypes.html#string-formatting
800 FORMAT_RE = r'''(?x)
801 (?<!%)
802 %
803 \({0}\) # mapping key
804 (?:[#0\-+ ]+)? # conversion flags (optional)
805 (?:\d+)? # minimum field width (optional)
806 (?:\.\d+)? # precision (optional)
807 [hlL]? # length modifier (optional)
808 (?P<type>[diouxXeEfFgGcrs%]) # conversion type
809 '''
810
811 numeric_fields = list(self._NUMERIC_FIELDS)
812
813 # Format date
814 FORMAT_DATE_RE = FORMAT_RE.format(r'(?P<key>(?P<field>\w+)>(?P<format>.+?))')
815 for mobj in re.finditer(FORMAT_DATE_RE, outtmpl):
816 conv_type, field, frmt, key = mobj.group('type', 'field', 'format', 'key')
817 if key in template_dict:
818 continue
819 value = strftime_or_none(template_dict.get(field), frmt, na)
820 if conv_type in 'crs': # string
821 value = sanitize(field, value)
822 else: # number
823 numeric_fields.append(key)
824 value = float_or_none(value, default=None)
825 if value is not None:
826 template_dict[key] = value
827
828 # Missing numeric fields used together with integer presentation types
829 # in format specification will break the argument substitution since
830 # string NA placeholder is returned for missing fields. We will patch
831 # output template for missing fields to meet string presentation type.
832 for numeric_field in numeric_fields:
833 if numeric_field not in template_dict:
834 outtmpl = re.sub(
835 FORMAT_RE.format(re.escape(numeric_field)),
836 r'%({0})s'.format(numeric_field), outtmpl)
837
838 # expand_path translates '%%' into '%' and '$$' into '$'
839 # correspondingly that is not what we want since we need to keep
840 # '%%' intact for template dict substitution step. Working around
841 # with boundary-alike separator hack.
842 sep = ''.join([random.choice(ascii_letters) for _ in range(32)])
843 outtmpl = outtmpl.replace('%%', '%{0}%'.format(sep)).replace('$$', '${0}$'.format(sep))
844
845 # outtmpl should be expand_path'ed before template dict substitution
846 # because meta fields may contain env variables we don't want to
847 # be expanded. For example, for outtmpl "%(title)s.%(ext)s" and
848 # title "Hello $PATH", we don't want `$PATH` to be expanded.
849 filename = expand_path(outtmpl).replace(sep, '') % template_dict
850
851 if force_ext is not None:
852 filename = replace_extension(filename, force_ext, template_dict.get('ext'))
853
854 # https://github.com/blackjack4494/youtube-dlc/issues/85
855 trim_file_name = self.params.get('trim_file_name', False)
856 if trim_file_name:
857 fn_groups = filename.rsplit('.')
858 ext = fn_groups[-1]
859 sub_ext = ''
860 if len(fn_groups) > 2:
861 sub_ext = fn_groups[-2]
862 filename = '.'.join(filter(None, [fn_groups[0][:trim_file_name], sub_ext, ext]))
863
864 # Temporary fix for #4787
865 # 'Treat' all problem characters by passing filename through preferredencoding
866 # to workaround encoding issues with subprocess on python2 @ Windows
867 if sys.version_info < (3, 0) and sys.platform == 'win32':
868 filename = encodeFilename(filename, True).decode(preferredencoding())
869 filename = sanitize_path(filename)
870
871 return filename
872 except ValueError as err:
873 self.report_error('Error in output template: ' + str(err) + ' (encoding: ' + repr(preferredencoding()) + ')')
874 return None
875
876 def prepare_filename(self, info_dict, dir_type='', warn=False):
877 """Generate the output filename."""
878 paths = self.params.get('paths', {})
879 assert isinstance(paths, dict)
880 filename = self._prepare_filename(info_dict, dir_type or 'default')
881
882 if warn and not self.__prepare_filename_warned:
883 if not paths:
884 pass
885 elif filename == '-':
886 self.report_warning('--paths is ignored when an outputting to stdout')
887 elif os.path.isabs(filename):
888 self.report_warning('--paths is ignored since an absolute path is given in output template')
889 self.__prepare_filename_warned = True
890 if filename == '-' or not filename:
891 return filename
892
893 homepath = expand_path(paths.get('home', '').strip())
894 assert isinstance(homepath, compat_str)
895 subdir = expand_path(paths.get(dir_type, '').strip()) if dir_type else ''
896 assert isinstance(subdir, compat_str)
897 return sanitize_path(os.path.join(homepath, subdir, filename))
898
899 def _match_entry(self, info_dict, incomplete):
900 """ Returns None if the file should be downloaded """
901
902 def check_filter():
903 video_title = info_dict.get('title', info_dict.get('id', 'video'))
904 if 'title' in info_dict:
905 # This can happen when we're just evaluating the playlist
906 title = info_dict['title']
907 matchtitle = self.params.get('matchtitle', False)
908 if matchtitle:
909 if not re.search(matchtitle, title, re.IGNORECASE):
910 return '"' + title + '" title did not match pattern "' + matchtitle + '"'
911 rejecttitle = self.params.get('rejecttitle', False)
912 if rejecttitle:
913 if re.search(rejecttitle, title, re.IGNORECASE):
914 return '"' + title + '" title matched reject pattern "' + rejecttitle + '"'
915 date = info_dict.get('upload_date')
916 if date is not None:
917 dateRange = self.params.get('daterange', DateRange())
918 if date not in dateRange:
919 return '%s upload date is not in range %s' % (date_from_str(date).isoformat(), dateRange)
920 view_count = info_dict.get('view_count')
921 if view_count is not None:
922 min_views = self.params.get('min_views')
923 if min_views is not None and view_count < min_views:
924 return 'Skipping %s, because it has not reached minimum view count (%d/%d)' % (video_title, view_count, min_views)
925 max_views = self.params.get('max_views')
926 if max_views is not None and view_count > max_views:
927 return 'Skipping %s, because it has exceeded the maximum view count (%d/%d)' % (video_title, view_count, max_views)
928 if age_restricted(info_dict.get('age_limit'), self.params.get('age_limit')):
929 return 'Skipping "%s" because it is age restricted' % video_title
930 if self.in_download_archive(info_dict):
931 return '%s has already been recorded in archive' % video_title
932
933 if not incomplete:
934 match_filter = self.params.get('match_filter')
935 if match_filter is not None:
936 ret = match_filter(info_dict)
937 if ret is not None:
938 return ret
939 return None
940
941 reason = check_filter()
942 if reason is not None:
943 self.to_screen('[download] ' + reason)
944 if reason.endswith('has already been recorded in the archive') and self.params.get('break_on_existing', False):
945 raise ExistingVideoReached()
946 elif self.params.get('break_on_reject', False):
947 raise RejectedVideoReached()
948 return reason
949
950 @staticmethod
951 def add_extra_info(info_dict, extra_info):
952 '''Set the keys from extra_info in info dict if they are missing'''
953 for key, value in extra_info.items():
954 info_dict.setdefault(key, value)
955
956 def extract_info(self, url, download=True, ie_key=None, info_dict=None, extra_info={},
957 process=True, force_generic_extractor=False):
958 '''
959 Returns a list with a dictionary for each video we find.
960 If 'download', also downloads the videos.
961 extra_info is a dict containing the extra values to add to each result
962 '''
963
964 if not ie_key and force_generic_extractor:
965 ie_key = 'Generic'
966
967 if ie_key:
968 ies = [self.get_info_extractor(ie_key)]
969 else:
970 ies = self._ies
971
972 for ie in ies:
973 if not ie.suitable(url):
974 continue
975
976 ie_key = ie.ie_key()
977 ie = self.get_info_extractor(ie_key)
978 if not ie.working():
979 self.report_warning('The program functionality for this site has been marked as broken, '
980 'and will probably not work.')
981
982 try:
983 temp_id = str_or_none(
984 ie.extract_id(url) if callable(getattr(ie, 'extract_id', None))
985 else ie._match_id(url))
986 except (AssertionError, IndexError, AttributeError):
987 temp_id = None
988 if temp_id is not None and self.in_download_archive({'id': temp_id, 'ie_key': ie_key}):
989 self.to_screen("[%s] %s: has already been recorded in archive" % (
990 ie_key, temp_id))
991 break
992 return self.__extract_info(url, ie, download, extra_info, process, info_dict)
993 else:
994 self.report_error('no suitable InfoExtractor for URL %s' % url)
995
996 def __handle_extraction_exceptions(func):
997 def wrapper(self, *args, **kwargs):
998 try:
999 return func(self, *args, **kwargs)
1000 except GeoRestrictedError as e:
1001 msg = e.msg
1002 if e.countries:
1003 msg += '\nThis video is available in %s.' % ', '.join(
1004 map(ISO3166Utils.short2full, e.countries))
1005 msg += '\nYou might want to use a VPN or a proxy server (with --proxy) to workaround.'
1006 self.report_error(msg)
1007 except ExtractorError as e: # An error we somewhat expected
1008 self.report_error(compat_str(e), e.format_traceback())
1009 except (MaxDownloadsReached, ExistingVideoReached, RejectedVideoReached):
1010 raise
1011 except Exception as e:
1012 if self.params.get('ignoreerrors', False):
1013 self.report_error(error_to_compat_str(e), tb=encode_compat_str(traceback.format_exc()))
1014 else:
1015 raise
1016 return wrapper
1017
1018 @__handle_extraction_exceptions
1019 def __extract_info(self, url, ie, download, extra_info, process, info_dict):
1020 ie_result = ie.extract(url)
1021 if ie_result is None: # Finished already (backwards compatibility; listformats and friends should be moved here)
1022 return
1023 if isinstance(ie_result, list):
1024 # Backwards compatibility: old IE result format
1025 ie_result = {
1026 '_type': 'compat_list',
1027 'entries': ie_result,
1028 }
1029 if info_dict:
1030 if info_dict.get('id'):
1031 ie_result['id'] = info_dict['id']
1032 if info_dict.get('title'):
1033 ie_result['title'] = info_dict['title']
1034 self.add_default_extra_info(ie_result, ie, url)
1035 if process:
1036 return self.process_ie_result(ie_result, download, extra_info)
1037 else:
1038 return ie_result
1039
1040 def add_default_extra_info(self, ie_result, ie, url):
1041 self.add_extra_info(ie_result, {
1042 'extractor': ie.IE_NAME,
1043 'webpage_url': url,
1044 'webpage_url_basename': url_basename(url),
1045 'extractor_key': ie.ie_key(),
1046 })
1047
1048 def process_ie_result(self, ie_result, download=True, extra_info={}):
1049 """
1050 Take the result of the ie(may be modified) and resolve all unresolved
1051 references (URLs, playlist items).
1052
1053 It will also download the videos if 'download'.
1054 Returns the resolved ie_result.
1055 """
1056 result_type = ie_result.get('_type', 'video')
1057
1058 if result_type in ('url', 'url_transparent'):
1059 ie_result['url'] = sanitize_url(ie_result['url'])
1060 extract_flat = self.params.get('extract_flat', False)
1061 if ((extract_flat == 'in_playlist' and 'playlist' in extra_info)
1062 or extract_flat is True):
1063 self.__forced_printings(ie_result, self.prepare_filename(ie_result), incomplete=True)
1064 return ie_result
1065
1066 if result_type == 'video':
1067 self.add_extra_info(ie_result, extra_info)
1068 return self.process_video_result(ie_result, download=download)
1069 elif result_type == 'url':
1070 # We have to add extra_info to the results because it may be
1071 # contained in a playlist
1072 return self.extract_info(ie_result['url'],
1073 download, info_dict=ie_result,
1074 ie_key=ie_result.get('ie_key'),
1075 extra_info=extra_info)
1076 elif result_type == 'url_transparent':
1077 # Use the information from the embedding page
1078 info = self.extract_info(
1079 ie_result['url'], ie_key=ie_result.get('ie_key'),
1080 extra_info=extra_info, download=False, process=False)
1081
1082 # extract_info may return None when ignoreerrors is enabled and
1083 # extraction failed with an error, don't crash and return early
1084 # in this case
1085 if not info:
1086 return info
1087
1088 force_properties = dict(
1089 (k, v) for k, v in ie_result.items() if v is not None)
1090 for f in ('_type', 'url', 'id', 'extractor', 'extractor_key', 'ie_key'):
1091 if f in force_properties:
1092 del force_properties[f]
1093 new_result = info.copy()
1094 new_result.update(force_properties)
1095
1096 # Extracted info may not be a video result (i.e.
1097 # info.get('_type', 'video') != video) but rather an url or
1098 # url_transparent. In such cases outer metadata (from ie_result)
1099 # should be propagated to inner one (info). For this to happen
1100 # _type of info should be overridden with url_transparent. This
1101 # fixes issue from https://github.com/ytdl-org/youtube-dl/pull/11163.
1102 if new_result.get('_type') == 'url':
1103 new_result['_type'] = 'url_transparent'
1104
1105 return self.process_ie_result(
1106 new_result, download=download, extra_info=extra_info)
1107 elif result_type in ('playlist', 'multi_video'):
1108 # Protect from infinite recursion due to recursively nested playlists
1109 # (see https://github.com/ytdl-org/youtube-dl/issues/27833)
1110 webpage_url = ie_result['webpage_url']
1111 if webpage_url in self._playlist_urls:
1112 self.to_screen(
1113 '[download] Skipping already downloaded playlist: %s'
1114 % ie_result.get('title') or ie_result.get('id'))
1115 return
1116
1117 self._playlist_level += 1
1118 self._playlist_urls.add(webpage_url)
1119 try:
1120 return self.__process_playlist(ie_result, download)
1121 finally:
1122 self._playlist_level -= 1
1123 if not self._playlist_level:
1124 self._playlist_urls.clear()
1125 elif result_type == 'compat_list':
1126 self.report_warning(
1127 'Extractor %s returned a compat_list result. '
1128 'It needs to be updated.' % ie_result.get('extractor'))
1129
1130 def _fixup(r):
1131 self.add_extra_info(
1132 r,
1133 {
1134 'extractor': ie_result['extractor'],
1135 'webpage_url': ie_result['webpage_url'],
1136 'webpage_url_basename': url_basename(ie_result['webpage_url']),
1137 'extractor_key': ie_result['extractor_key'],
1138 }
1139 )
1140 return r
1141 ie_result['entries'] = [
1142 self.process_ie_result(_fixup(r), download, extra_info)
1143 for r in ie_result['entries']
1144 ]
1145 return ie_result
1146 else:
1147 raise Exception('Invalid result type: %s' % result_type)
1148
1149 def __process_playlist(self, ie_result, download):
1150 # We process each entry in the playlist
1151 playlist = ie_result.get('title') or ie_result.get('id')
1152 self.to_screen('[download] Downloading playlist: %s' % playlist)
1153
1154 if self.params.get('allow_playlist_files', True):
1155 ie_copy = {
1156 'playlist': playlist,
1157 'playlist_id': ie_result.get('id'),
1158 'playlist_title': ie_result.get('title'),
1159 'playlist_uploader': ie_result.get('uploader'),
1160 'playlist_uploader_id': ie_result.get('uploader_id'),
1161 'playlist_index': 0
1162 }
1163 ie_copy.update(dict(ie_result))
1164
1165 def ensure_dir_exists(path):
1166 return make_dir(path, self.report_error)
1167
1168 if self.params.get('writeinfojson', False):
1169 infofn = self.prepare_filename(ie_copy, 'pl_infojson')
1170 if not ensure_dir_exists(encodeFilename(infofn)):
1171 return
1172 if not self.params.get('overwrites', True) and os.path.exists(encodeFilename(infofn)):
1173 self.to_screen('[info] Playlist metadata is already present')
1174 else:
1175 playlist_info = dict(ie_result)
1176 # playlist_info['entries'] = list(playlist_info['entries']) # Entries is a generator which shouldnot be resolved here
1177 del playlist_info['entries']
1178 self.to_screen('[info] Writing playlist metadata as JSON to: ' + infofn)
1179 try:
1180 write_json_file(self.filter_requested_info(playlist_info), infofn)
1181 except (OSError, IOError):
1182 self.report_error('Cannot write playlist metadata to JSON file ' + infofn)
1183
1184 if self.params.get('writedescription', False):
1185 descfn = self.prepare_filename(ie_copy, 'pl_description')
1186 if not ensure_dir_exists(encodeFilename(descfn)):
1187 return
1188 if not self.params.get('overwrites', True) and os.path.exists(encodeFilename(descfn)):
1189 self.to_screen('[info] Playlist description is already present')
1190 elif ie_result.get('description') is None:
1191 self.report_warning('There\'s no playlist description to write.')
1192 else:
1193 try:
1194 self.to_screen('[info] Writing playlist description to: ' + descfn)
1195 with io.open(encodeFilename(descfn), 'w', encoding='utf-8') as descfile:
1196 descfile.write(ie_result['description'])
1197 except (OSError, IOError):
1198 self.report_error('Cannot write playlist description file ' + descfn)
1199 return
1200
1201 playlist_results = []
1202
1203 playliststart = self.params.get('playliststart', 1) - 1
1204 playlistend = self.params.get('playlistend')
1205 # For backwards compatibility, interpret -1 as whole list
1206 if playlistend == -1:
1207 playlistend = None
1208
1209 playlistitems_str = self.params.get('playlist_items')
1210 playlistitems = None
1211 if playlistitems_str is not None:
1212 def iter_playlistitems(format):
1213 for string_segment in format.split(','):
1214 if '-' in string_segment:
1215 start, end = string_segment.split('-')
1216 for item in range(int(start), int(end) + 1):
1217 yield int(item)
1218 else:
1219 yield int(string_segment)
1220 playlistitems = orderedSet(iter_playlistitems(playlistitems_str))
1221
1222 ie_entries = ie_result['entries']
1223
1224 def make_playlistitems_entries(list_ie_entries):
1225 num_entries = len(list_ie_entries)
1226 return [
1227 list_ie_entries[i - 1] for i in playlistitems
1228 if -num_entries <= i - 1 < num_entries]
1229
1230 def report_download(num_entries):
1231 self.to_screen(
1232 '[%s] playlist %s: Downloading %d videos' %
1233 (ie_result['extractor'], playlist, num_entries))
1234
1235 if isinstance(ie_entries, list):
1236 n_all_entries = len(ie_entries)
1237 if playlistitems:
1238 entries = make_playlistitems_entries(ie_entries)
1239 else:
1240 entries = ie_entries[playliststart:playlistend]
1241 n_entries = len(entries)
1242 self.to_screen(
1243 '[%s] playlist %s: Collected %d video ids (downloading %d of them)' %
1244 (ie_result['extractor'], playlist, n_all_entries, n_entries))
1245 elif isinstance(ie_entries, PagedList):
1246 if playlistitems:
1247 entries = []
1248 for item in playlistitems:
1249 entries.extend(ie_entries.getslice(
1250 item - 1, item
1251 ))
1252 else:
1253 entries = ie_entries.getslice(
1254 playliststart, playlistend)
1255 n_entries = len(entries)
1256 report_download(n_entries)
1257 else: # iterable
1258 if playlistitems:
1259 entries = make_playlistitems_entries(list(itertools.islice(
1260 ie_entries, 0, max(playlistitems))))
1261 else:
1262 entries = list(itertools.islice(
1263 ie_entries, playliststart, playlistend))
1264 n_entries = len(entries)
1265 report_download(n_entries)
1266
1267 if self.params.get('playlistreverse', False):
1268 entries = entries[::-1]
1269
1270 if self.params.get('playlistrandom', False):
1271 random.shuffle(entries)
1272
1273 x_forwarded_for = ie_result.get('__x_forwarded_for_ip')
1274
1275 for i, entry in enumerate(entries, 1):
1276 self.to_screen('[download] Downloading video %s of %s' % (i, n_entries))
1277 # This __x_forwarded_for_ip thing is a bit ugly but requires
1278 # minimal changes
1279 if x_forwarded_for:
1280 entry['__x_forwarded_for_ip'] = x_forwarded_for
1281 extra = {
1282 'n_entries': n_entries,
1283 'playlist': playlist,
1284 'playlist_id': ie_result.get('id'),
1285 'playlist_title': ie_result.get('title'),
1286 'playlist_uploader': ie_result.get('uploader'),
1287 'playlist_uploader_id': ie_result.get('uploader_id'),
1288 'playlist_index': playlistitems[i - 1] if playlistitems else i + playliststart,
1289 'extractor': ie_result['extractor'],
1290 'webpage_url': ie_result['webpage_url'],
1291 'webpage_url_basename': url_basename(ie_result['webpage_url']),
1292 'extractor_key': ie_result['extractor_key'],
1293 }
1294
1295 if self._match_entry(entry, incomplete=True) is not None:
1296 continue
1297
1298 entry_result = self.__process_iterable_entry(entry, download, extra)
1299 # TODO: skip failed (empty) entries?
1300 playlist_results.append(entry_result)
1301 ie_result['entries'] = playlist_results
1302 self.to_screen('[download] Finished downloading playlist: %s' % playlist)
1303 return ie_result
1304
1305 @__handle_extraction_exceptions
1306 def __process_iterable_entry(self, entry, download, extra_info):
1307 return self.process_ie_result(
1308 entry, download=download, extra_info=extra_info)
1309
1310 def _build_format_filter(self, filter_spec):
1311 " Returns a function to filter the formats according to the filter_spec "
1312
1313 OPERATORS = {
1314 '<': operator.lt,
1315 '<=': operator.le,
1316 '>': operator.gt,
1317 '>=': operator.ge,
1318 '=': operator.eq,
1319 '!=': operator.ne,
1320 }
1321 operator_rex = re.compile(r'''(?x)\s*
1322 (?P<key>width|height|tbr|abr|vbr|asr|filesize|filesize_approx|fps)
1323 \s*(?P<op>%s)(?P<none_inclusive>\s*\?)?\s*
1324 (?P<value>[0-9.]+(?:[kKmMgGtTpPeEzZyY]i?[Bb]?)?)
1325 $
1326 ''' % '|'.join(map(re.escape, OPERATORS.keys())))
1327 m = operator_rex.search(filter_spec)
1328 if m:
1329 try:
1330 comparison_value = int(m.group('value'))
1331 except ValueError:
1332 comparison_value = parse_filesize(m.group('value'))
1333 if comparison_value is None:
1334 comparison_value = parse_filesize(m.group('value') + 'B')
1335 if comparison_value is None:
1336 raise ValueError(
1337 'Invalid value %r in format specification %r' % (
1338 m.group('value'), filter_spec))
1339 op = OPERATORS[m.group('op')]
1340
1341 if not m:
1342 STR_OPERATORS = {
1343 '=': operator.eq,
1344 '^=': lambda attr, value: attr.startswith(value),
1345 '$=': lambda attr, value: attr.endswith(value),
1346 '*=': lambda attr, value: value in attr,
1347 }
1348 str_operator_rex = re.compile(r'''(?x)
1349 \s*(?P<key>[a-zA-Z0-9._-]+)
1350 \s*(?P<negation>!\s*)?(?P<op>%s)(?P<none_inclusive>\s*\?)?
1351 \s*(?P<value>[a-zA-Z0-9._-]+)
1352 \s*$
1353 ''' % '|'.join(map(re.escape, STR_OPERATORS.keys())))
1354 m = str_operator_rex.search(filter_spec)
1355 if m:
1356 comparison_value = m.group('value')
1357 str_op = STR_OPERATORS[m.group('op')]
1358 if m.group('negation'):
1359 op = lambda attr, value: not str_op(attr, value)
1360 else:
1361 op = str_op
1362
1363 if not m:
1364 raise ValueError('Invalid filter specification %r' % filter_spec)
1365
1366 def _filter(f):
1367 actual_value = f.get(m.group('key'))
1368 if actual_value is None:
1369 return m.group('none_inclusive')
1370 return op(actual_value, comparison_value)
1371 return _filter
1372
1373 def _default_format_spec(self, info_dict, download=True):
1374
1375 def can_merge():
1376 merger = FFmpegMergerPP(self)
1377 return merger.available and merger.can_merge()
1378
1379 prefer_best = (
1380 not self.params.get('simulate', False)
1381 and download
1382 and (
1383 not can_merge()
1384 or info_dict.get('is_live', False)
1385 or self.outtmpl_dict['default'] == '-'))
1386
1387 return (
1388 'best/bestvideo+bestaudio'
1389 if prefer_best
1390 else 'bestvideo*+bestaudio/best'
1391 if not self.params.get('allow_multiple_audio_streams', False)
1392 else 'bestvideo+bestaudio/best')
1393
1394 def build_format_selector(self, format_spec):
1395 def syntax_error(note, start):
1396 message = (
1397 'Invalid format specification: '
1398 '{0}\n\t{1}\n\t{2}^'.format(note, format_spec, ' ' * start[1]))
1399 return SyntaxError(message)
1400
1401 PICKFIRST = 'PICKFIRST'
1402 MERGE = 'MERGE'
1403 SINGLE = 'SINGLE'
1404 GROUP = 'GROUP'
1405 FormatSelector = collections.namedtuple('FormatSelector', ['type', 'selector', 'filters'])
1406
1407 allow_multiple_streams = {'audio': self.params.get('allow_multiple_audio_streams', False),
1408 'video': self.params.get('allow_multiple_video_streams', False)}
1409
1410 def _parse_filter(tokens):
1411 filter_parts = []
1412 for type, string, start, _, _ in tokens:
1413 if type == tokenize.OP and string == ']':
1414 return ''.join(filter_parts)
1415 else:
1416 filter_parts.append(string)
1417
1418 def _remove_unused_ops(tokens):
1419 # Remove operators that we don't use and join them with the surrounding strings
1420 # for example: 'mp4' '-' 'baseline' '-' '16x9' is converted to 'mp4-baseline-16x9'
1421 ALLOWED_OPS = ('/', '+', ',', '(', ')')
1422 last_string, last_start, last_end, last_line = None, None, None, None
1423 for type, string, start, end, line in tokens:
1424 if type == tokenize.OP and string == '[':
1425 if last_string:
1426 yield tokenize.NAME, last_string, last_start, last_end, last_line
1427 last_string = None
1428 yield type, string, start, end, line
1429 # everything inside brackets will be handled by _parse_filter
1430 for type, string, start, end, line in tokens:
1431 yield type, string, start, end, line
1432 if type == tokenize.OP and string == ']':
1433 break
1434 elif type == tokenize.OP and string in ALLOWED_OPS:
1435 if last_string:
1436 yield tokenize.NAME, last_string, last_start, last_end, last_line
1437 last_string = None
1438 yield type, string, start, end, line
1439 elif type in [tokenize.NAME, tokenize.NUMBER, tokenize.OP]:
1440 if not last_string:
1441 last_string = string
1442 last_start = start
1443 last_end = end
1444 else:
1445 last_string += string
1446 if last_string:
1447 yield tokenize.NAME, last_string, last_start, last_end, last_line
1448
1449 def _parse_format_selection(tokens, inside_merge=False, inside_choice=False, inside_group=False):
1450 selectors = []
1451 current_selector = None
1452 for type, string, start, _, _ in tokens:
1453 # ENCODING is only defined in python 3.x
1454 if type == getattr(tokenize, 'ENCODING', None):
1455 continue
1456 elif type in [tokenize.NAME, tokenize.NUMBER]:
1457 current_selector = FormatSelector(SINGLE, string, [])
1458 elif type == tokenize.OP:
1459 if string == ')':
1460 if not inside_group:
1461 # ')' will be handled by the parentheses group
1462 tokens.restore_last_token()
1463 break
1464 elif inside_merge and string in ['/', ',']:
1465 tokens.restore_last_token()
1466 break
1467 elif inside_choice and string == ',':
1468 tokens.restore_last_token()
1469 break
1470 elif string == ',':
1471 if not current_selector:
1472 raise syntax_error('"," must follow a format selector', start)
1473 selectors.append(current_selector)
1474 current_selector = None
1475 elif string == '/':
1476 if not current_selector:
1477 raise syntax_error('"/" must follow a format selector', start)
1478 first_choice = current_selector
1479 second_choice = _parse_format_selection(tokens, inside_choice=True)
1480 current_selector = FormatSelector(PICKFIRST, (first_choice, second_choice), [])
1481 elif string == '[':
1482 if not current_selector:
1483 current_selector = FormatSelector(SINGLE, 'best', [])
1484 format_filter = _parse_filter(tokens)
1485 current_selector.filters.append(format_filter)
1486 elif string == '(':
1487 if current_selector:
1488 raise syntax_error('Unexpected "("', start)
1489 group = _parse_format_selection(tokens, inside_group=True)
1490 current_selector = FormatSelector(GROUP, group, [])
1491 elif string == '+':
1492 if not current_selector:
1493 raise syntax_error('Unexpected "+"', start)
1494 selector_1 = current_selector
1495 selector_2 = _parse_format_selection(tokens, inside_merge=True)
1496 if not selector_2:
1497 raise syntax_error('Expected a selector', start)
1498 current_selector = FormatSelector(MERGE, (selector_1, selector_2), [])
1499 else:
1500 raise syntax_error('Operator not recognized: "{0}"'.format(string), start)
1501 elif type == tokenize.ENDMARKER:
1502 break
1503 if current_selector:
1504 selectors.append(current_selector)
1505 return selectors
1506
1507 def _build_selector_function(selector):
1508 if isinstance(selector, list): # ,
1509 fs = [_build_selector_function(s) for s in selector]
1510
1511 def selector_function(ctx):
1512 for f in fs:
1513 for format in f(ctx):
1514 yield format
1515 return selector_function
1516
1517 elif selector.type == GROUP: # ()
1518 selector_function = _build_selector_function(selector.selector)
1519
1520 elif selector.type == PICKFIRST: # /
1521 fs = [_build_selector_function(s) for s in selector.selector]
1522
1523 def selector_function(ctx):
1524 for f in fs:
1525 picked_formats = list(f(ctx))
1526 if picked_formats:
1527 return picked_formats
1528 return []
1529
1530 elif selector.type == SINGLE: # atom
1531 format_spec = selector.selector if selector.selector is not None else 'best'
1532
1533 if format_spec == 'all':
1534 def selector_function(ctx):
1535 formats = list(ctx['formats'])
1536 if formats:
1537 for f in formats:
1538 yield f
1539
1540 else:
1541 format_fallback = False
1542 format_spec_obj = re.match(r'(best|worst|b|w)(video|audio|v|a)?(\*)?$', format_spec)
1543 if format_spec_obj is not None:
1544 format_idx = 0 if format_spec_obj.group(1)[0] == 'w' else -1
1545 format_type = format_spec_obj.group(2)[0] if format_spec_obj.group(2) else False
1546 not_format_type = 'v' if format_type == 'a' else 'a'
1547 format_modified = format_spec_obj.group(3) is not None
1548
1549 format_fallback = not format_type and not format_modified # for b, w
1550 filter_f = ((lambda f: f.get(format_type + 'codec') != 'none')
1551 if format_type and format_modified # bv*, ba*, wv*, wa*
1552 else (lambda f: f.get(not_format_type + 'codec') == 'none')
1553 if format_type # bv, ba, wv, wa
1554 else (lambda f: f.get('vcodec') != 'none' and f.get('acodec') != 'none')
1555 if not format_modified # b, w
1556 else None) # b*, w*
1557 else:
1558 format_idx = -1
1559 filter_f = ((lambda f: f.get('ext') == format_spec)
1560 if format_spec in ['mp4', 'flv', 'webm', '3gp', 'm4a', 'mp3', 'ogg', 'aac', 'wav'] # extension
1561 else (lambda f: f.get('format_id') == format_spec)) # id
1562
1563 def selector_function(ctx):
1564 formats = list(ctx['formats'])
1565 if not formats:
1566 return
1567 matches = list(filter(filter_f, formats)) if filter_f is not None else formats
1568 if matches:
1569 yield matches[format_idx]
1570 elif format_fallback == 'force' or (format_fallback and ctx['incomplete_formats']):
1571 # for extractors with incomplete formats (audio only (soundcloud)
1572 # or video only (imgur)) best/worst will fallback to
1573 # best/worst {video,audio}-only format
1574 yield formats[format_idx]
1575
1576 elif selector.type == MERGE: # +
1577 def _merge(formats_pair):
1578 format_1, format_2 = formats_pair
1579
1580 formats_info = []
1581 formats_info.extend(format_1.get('requested_formats', (format_1,)))
1582 formats_info.extend(format_2.get('requested_formats', (format_2,)))
1583
1584 if not allow_multiple_streams['video'] or not allow_multiple_streams['audio']:
1585 get_no_more = {"video": False, "audio": False}
1586 for (i, fmt_info) in enumerate(formats_info):
1587 for aud_vid in ["audio", "video"]:
1588 if not allow_multiple_streams[aud_vid] and fmt_info.get(aud_vid[0] + 'codec') != 'none':
1589 if get_no_more[aud_vid]:
1590 formats_info.pop(i)
1591 get_no_more[aud_vid] = True
1592
1593 if len(formats_info) == 1:
1594 return formats_info[0]
1595
1596 video_fmts = [fmt_info for fmt_info in formats_info if fmt_info.get('vcodec') != 'none']
1597 audio_fmts = [fmt_info for fmt_info in formats_info if fmt_info.get('acodec') != 'none']
1598
1599 the_only_video = video_fmts[0] if len(video_fmts) == 1 else None
1600 the_only_audio = audio_fmts[0] if len(audio_fmts) == 1 else None
1601
1602 output_ext = self.params.get('merge_output_format')
1603 if not output_ext:
1604 if the_only_video:
1605 output_ext = the_only_video['ext']
1606 elif the_only_audio and not video_fmts:
1607 output_ext = the_only_audio['ext']
1608 else:
1609 output_ext = 'mkv'
1610
1611 new_dict = {
1612 'requested_formats': formats_info,
1613 'format': '+'.join(fmt_info.get('format') for fmt_info in formats_info),
1614 'format_id': '+'.join(fmt_info.get('format_id') for fmt_info in formats_info),
1615 'ext': output_ext,
1616 }
1617
1618 if the_only_video:
1619 new_dict.update({
1620 'width': the_only_video.get('width'),
1621 'height': the_only_video.get('height'),
1622 'resolution': the_only_video.get('resolution'),
1623 'fps': the_only_video.get('fps'),
1624 'vcodec': the_only_video.get('vcodec'),
1625 'vbr': the_only_video.get('vbr'),
1626 'stretched_ratio': the_only_video.get('stretched_ratio'),
1627 })
1628
1629 if the_only_audio:
1630 new_dict.update({
1631 'acodec': the_only_audio.get('acodec'),
1632 'abr': the_only_audio.get('abr'),
1633 })
1634
1635 return new_dict
1636
1637 selector_1, selector_2 = map(_build_selector_function, selector.selector)
1638
1639 def selector_function(ctx):
1640 for pair in itertools.product(
1641 selector_1(copy.deepcopy(ctx)), selector_2(copy.deepcopy(ctx))):
1642 yield _merge(pair)
1643
1644 filters = [self._build_format_filter(f) for f in selector.filters]
1645
1646 def final_selector(ctx):
1647 ctx_copy = copy.deepcopy(ctx)
1648 for _filter in filters:
1649 ctx_copy['formats'] = list(filter(_filter, ctx_copy['formats']))
1650 return selector_function(ctx_copy)
1651 return final_selector
1652
1653 stream = io.BytesIO(format_spec.encode('utf-8'))
1654 try:
1655 tokens = list(_remove_unused_ops(compat_tokenize_tokenize(stream.readline)))
1656 except tokenize.TokenError:
1657 raise syntax_error('Missing closing/opening brackets or parenthesis', (0, len(format_spec)))
1658
1659 class TokenIterator(object):
1660 def __init__(self, tokens):
1661 self.tokens = tokens
1662 self.counter = 0
1663
1664 def __iter__(self):
1665 return self
1666
1667 def __next__(self):
1668 if self.counter >= len(self.tokens):
1669 raise StopIteration()
1670 value = self.tokens[self.counter]
1671 self.counter += 1
1672 return value
1673
1674 next = __next__
1675
1676 def restore_last_token(self):
1677 self.counter -= 1
1678
1679 parsed_selector = _parse_format_selection(iter(TokenIterator(tokens)))
1680 return _build_selector_function(parsed_selector)
1681
1682 def _calc_headers(self, info_dict):
1683 res = std_headers.copy()
1684
1685 add_headers = info_dict.get('http_headers')
1686 if add_headers:
1687 res.update(add_headers)
1688
1689 cookies = self._calc_cookies(info_dict)
1690 if cookies:
1691 res['Cookie'] = cookies
1692
1693 if 'X-Forwarded-For' not in res:
1694 x_forwarded_for_ip = info_dict.get('__x_forwarded_for_ip')
1695 if x_forwarded_for_ip:
1696 res['X-Forwarded-For'] = x_forwarded_for_ip
1697
1698 return res
1699
1700 def _calc_cookies(self, info_dict):
1701 pr = sanitized_Request(info_dict['url'])
1702 self.cookiejar.add_cookie_header(pr)
1703 return pr.get_header('Cookie')
1704
1705 def process_video_result(self, info_dict, download=True):
1706 assert info_dict.get('_type', 'video') == 'video'
1707
1708 if 'id' not in info_dict:
1709 raise ExtractorError('Missing "id" field in extractor result')
1710 if 'title' not in info_dict:
1711 raise ExtractorError('Missing "title" field in extractor result')
1712
1713 def report_force_conversion(field, field_not, conversion):
1714 self.report_warning(
1715 '"%s" field is not %s - forcing %s conversion, there is an error in extractor'
1716 % (field, field_not, conversion))
1717
1718 def sanitize_string_field(info, string_field):
1719 field = info.get(string_field)
1720 if field is None or isinstance(field, compat_str):
1721 return
1722 report_force_conversion(string_field, 'a string', 'string')
1723 info[string_field] = compat_str(field)
1724
1725 def sanitize_numeric_fields(info):
1726 for numeric_field in self._NUMERIC_FIELDS:
1727 field = info.get(numeric_field)
1728 if field is None or isinstance(field, compat_numeric_types):
1729 continue
1730 report_force_conversion(numeric_field, 'numeric', 'int')
1731 info[numeric_field] = int_or_none(field)
1732
1733 sanitize_string_field(info_dict, 'id')
1734 sanitize_numeric_fields(info_dict)
1735
1736 if 'playlist' not in info_dict:
1737 # It isn't part of a playlist
1738 info_dict['playlist'] = None
1739 info_dict['playlist_index'] = None
1740
1741 thumbnails = info_dict.get('thumbnails')
1742 if thumbnails is None:
1743 thumbnail = info_dict.get('thumbnail')
1744 if thumbnail:
1745 info_dict['thumbnails'] = thumbnails = [{'url': thumbnail}]
1746 if thumbnails:
1747 thumbnails.sort(key=lambda t: (
1748 t.get('preference') if t.get('preference') is not None else -1,
1749 t.get('width') if t.get('width') is not None else -1,
1750 t.get('height') if t.get('height') is not None else -1,
1751 t.get('id') if t.get('id') is not None else '', t.get('url')))
1752 for i, t in enumerate(thumbnails):
1753 t['url'] = sanitize_url(t['url'])
1754 if t.get('width') and t.get('height'):
1755 t['resolution'] = '%dx%d' % (t['width'], t['height'])
1756 if t.get('id') is None:
1757 t['id'] = '%d' % i
1758
1759 if self.params.get('list_thumbnails'):
1760 self.list_thumbnails(info_dict)
1761 return
1762
1763 thumbnail = info_dict.get('thumbnail')
1764 if thumbnail:
1765 info_dict['thumbnail'] = sanitize_url(thumbnail)
1766 elif thumbnails:
1767 info_dict['thumbnail'] = thumbnails[-1]['url']
1768
1769 if 'display_id' not in info_dict and 'id' in info_dict:
1770 info_dict['display_id'] = info_dict['id']
1771
1772 if info_dict.get('upload_date') is None and info_dict.get('timestamp') is not None:
1773 # Working around out-of-range timestamp values (e.g. negative ones on Windows,
1774 # see http://bugs.python.org/issue1646728)
1775 try:
1776 upload_date = datetime.datetime.utcfromtimestamp(info_dict['timestamp'])
1777 info_dict['upload_date'] = upload_date.strftime('%Y%m%d')
1778 except (ValueError, OverflowError, OSError):
1779 pass
1780
1781 # Auto generate title fields corresponding to the *_number fields when missing
1782 # in order to always have clean titles. This is very common for TV series.
1783 for field in ('chapter', 'season', 'episode'):
1784 if info_dict.get('%s_number' % field) is not None and not info_dict.get(field):
1785 info_dict[field] = '%s %d' % (field.capitalize(), info_dict['%s_number' % field])
1786
1787 for cc_kind in ('subtitles', 'automatic_captions'):
1788 cc = info_dict.get(cc_kind)
1789 if cc:
1790 for _, subtitle in cc.items():
1791 for subtitle_format in subtitle:
1792 if subtitle_format.get('url'):
1793 subtitle_format['url'] = sanitize_url(subtitle_format['url'])
1794 if subtitle_format.get('ext') is None:
1795 subtitle_format['ext'] = determine_ext(subtitle_format['url']).lower()
1796
1797 automatic_captions = info_dict.get('automatic_captions')
1798 subtitles = info_dict.get('subtitles')
1799
1800 if self.params.get('listsubtitles', False):
1801 if 'automatic_captions' in info_dict:
1802 self.list_subtitles(
1803 info_dict['id'], automatic_captions, 'automatic captions')
1804 self.list_subtitles(info_dict['id'], subtitles, 'subtitles')
1805 return
1806
1807 info_dict['requested_subtitles'] = self.process_subtitles(
1808 info_dict['id'], subtitles, automatic_captions)
1809
1810 # We now pick which formats have to be downloaded
1811 if info_dict.get('formats') is None:
1812 # There's only one format available
1813 formats = [info_dict]
1814 else:
1815 formats = info_dict['formats']
1816
1817 if not formats:
1818 raise ExtractorError('No video formats found!')
1819
1820 def is_wellformed(f):
1821 url = f.get('url')
1822 if not url:
1823 self.report_warning(
1824 '"url" field is missing or empty - skipping format, '
1825 'there is an error in extractor')
1826 return False
1827 if isinstance(url, bytes):
1828 sanitize_string_field(f, 'url')
1829 return True
1830
1831 # Filter out malformed formats for better extraction robustness
1832 formats = list(filter(is_wellformed, formats))
1833
1834 formats_dict = {}
1835
1836 # We check that all the formats have the format and format_id fields
1837 for i, format in enumerate(formats):
1838 sanitize_string_field(format, 'format_id')
1839 sanitize_numeric_fields(format)
1840 format['url'] = sanitize_url(format['url'])
1841 if not format.get('format_id'):
1842 format['format_id'] = compat_str(i)
1843 else:
1844 # Sanitize format_id from characters used in format selector expression
1845 format['format_id'] = re.sub(r'[\s,/+\[\]()]', '_', format['format_id'])
1846 format_id = format['format_id']
1847 if format_id not in formats_dict:
1848 formats_dict[format_id] = []
1849 formats_dict[format_id].append(format)
1850
1851 # Make sure all formats have unique format_id
1852 for format_id, ambiguous_formats in formats_dict.items():
1853 if len(ambiguous_formats) > 1:
1854 for i, format in enumerate(ambiguous_formats):
1855 format['format_id'] = '%s-%d' % (format_id, i)
1856
1857 for i, format in enumerate(formats):
1858 if format.get('format') is None:
1859 format['format'] = '{id} - {res}{note}'.format(
1860 id=format['format_id'],
1861 res=self.format_resolution(format),
1862 note=' ({0})'.format(format['format_note']) if format.get('format_note') is not None else '',
1863 )
1864 # Automatically determine file extension if missing
1865 if format.get('ext') is None:
1866 format['ext'] = determine_ext(format['url']).lower()
1867 # Automatically determine protocol if missing (useful for format
1868 # selection purposes)
1869 if format.get('protocol') is None:
1870 format['protocol'] = determine_protocol(format)
1871 # Add HTTP headers, so that external programs can use them from the
1872 # json output
1873 full_format_info = info_dict.copy()
1874 full_format_info.update(format)
1875 format['http_headers'] = self._calc_headers(full_format_info)
1876 # Remove private housekeeping stuff
1877 if '__x_forwarded_for_ip' in info_dict:
1878 del info_dict['__x_forwarded_for_ip']
1879
1880 # TODO Central sorting goes here
1881
1882 if formats[0] is not info_dict:
1883 # only set the 'formats' fields if the original info_dict list them
1884 # otherwise we end up with a circular reference, the first (and unique)
1885 # element in the 'formats' field in info_dict is info_dict itself,
1886 # which can't be exported to json
1887 info_dict['formats'] = formats
1888 if self.params.get('listformats'):
1889 self.list_formats(info_dict)
1890 return
1891
1892 req_format = self.params.get('format')
1893 if req_format is None:
1894 req_format = self._default_format_spec(info_dict, download=download)
1895 if self.params.get('verbose'):
1896 self.to_screen('[debug] Default format spec: %s' % req_format)
1897
1898 format_selector = self.build_format_selector(req_format)
1899
1900 # While in format selection we may need to have an access to the original
1901 # format set in order to calculate some metrics or do some processing.
1902 # For now we need to be able to guess whether original formats provided
1903 # by extractor are incomplete or not (i.e. whether extractor provides only
1904 # video-only or audio-only formats) for proper formats selection for
1905 # extractors with such incomplete formats (see
1906 # https://github.com/ytdl-org/youtube-dl/pull/5556).
1907 # Since formats may be filtered during format selection and may not match
1908 # the original formats the results may be incorrect. Thus original formats
1909 # or pre-calculated metrics should be passed to format selection routines
1910 # as well.
1911 # We will pass a context object containing all necessary additional data
1912 # instead of just formats.
1913 # This fixes incorrect format selection issue (see
1914 # https://github.com/ytdl-org/youtube-dl/issues/10083).
1915 incomplete_formats = (
1916 # All formats are video-only or
1917 all(f.get('vcodec') != 'none' and f.get('acodec') == 'none' for f in formats)
1918 # all formats are audio-only
1919 or all(f.get('vcodec') == 'none' and f.get('acodec') != 'none' for f in formats))
1920
1921 ctx = {
1922 'formats': formats,
1923 'incomplete_formats': incomplete_formats,
1924 }
1925
1926 formats_to_download = list(format_selector(ctx))
1927 if not formats_to_download:
1928 raise ExtractorError('requested format not available',
1929 expected=True)
1930
1931 if download:
1932 self.to_screen('[info] Downloading format(s) %s' % ", ".join([f['format_id'] for f in formats_to_download]))
1933 if len(formats_to_download) > 1:
1934 self.to_screen('[info] %s: downloading video in %s formats' % (info_dict['id'], len(formats_to_download)))
1935 for format in formats_to_download:
1936 new_info = dict(info_dict)
1937 new_info.update(format)
1938 self.process_info(new_info)
1939 # We update the info dict with the best quality format (backwards compatibility)
1940 info_dict.update(formats_to_download[-1])
1941 return info_dict
1942
1943 def process_subtitles(self, video_id, normal_subtitles, automatic_captions):
1944 """Select the requested subtitles and their format"""
1945 available_subs = {}
1946 if normal_subtitles and self.params.get('writesubtitles'):
1947 available_subs.update(normal_subtitles)
1948 if automatic_captions and self.params.get('writeautomaticsub'):
1949 for lang, cap_info in automatic_captions.items():
1950 if lang not in available_subs:
1951 available_subs[lang] = cap_info
1952
1953 if (not self.params.get('writesubtitles') and not
1954 self.params.get('writeautomaticsub') or not
1955 available_subs):
1956 return None
1957
1958 if self.params.get('allsubtitles', False):
1959 requested_langs = available_subs.keys()
1960 else:
1961 if self.params.get('subtitleslangs', False):
1962 requested_langs = self.params.get('subtitleslangs')
1963 elif 'en' in available_subs:
1964 requested_langs = ['en']
1965 else:
1966 requested_langs = [list(available_subs.keys())[0]]
1967
1968 formats_query = self.params.get('subtitlesformat', 'best')
1969 formats_preference = formats_query.split('/') if formats_query else []
1970 subs = {}
1971 for lang in requested_langs:
1972 formats = available_subs.get(lang)
1973 if formats is None:
1974 self.report_warning('%s subtitles not available for %s' % (lang, video_id))
1975 continue
1976 for ext in formats_preference:
1977 if ext == 'best':
1978 f = formats[-1]
1979 break
1980 matches = list(filter(lambda f: f['ext'] == ext, formats))
1981 if matches:
1982 f = matches[-1]
1983 break
1984 else:
1985 f = formats[-1]
1986 self.report_warning(
1987 'No subtitle format found matching "%s" for language %s, '
1988 'using %s' % (formats_query, lang, f['ext']))
1989 subs[lang] = f
1990 return subs
1991
1992 def __forced_printings(self, info_dict, filename, incomplete):
1993 def print_mandatory(field):
1994 if (self.params.get('force%s' % field, False)
1995 and (not incomplete or info_dict.get(field) is not None)):
1996 self.to_stdout(info_dict[field])
1997
1998 def print_optional(field):
1999 if (self.params.get('force%s' % field, False)
2000 and info_dict.get(field) is not None):
2001 self.to_stdout(info_dict[field])
2002
2003 print_mandatory('title')
2004 print_mandatory('id')
2005 if self.params.get('forceurl', False) and not incomplete:
2006 if info_dict.get('requested_formats') is not None:
2007 for f in info_dict['requested_formats']:
2008 self.to_stdout(f['url'] + f.get('play_path', ''))
2009 else:
2010 # For RTMP URLs, also include the playpath
2011 self.to_stdout(info_dict['url'] + info_dict.get('play_path', ''))
2012 print_optional('thumbnail')
2013 print_optional('description')
2014 if self.params.get('forcefilename', False) and filename is not None:
2015 self.to_stdout(filename)
2016 if self.params.get('forceduration', False) and info_dict.get('duration') is not None:
2017 self.to_stdout(formatSeconds(info_dict['duration']))
2018 print_mandatory('format')
2019 if self.params.get('forcejson', False):
2020 self.to_stdout(json.dumps(info_dict))
2021
2022 def process_info(self, info_dict):
2023 """Process a single resolved IE result."""
2024
2025 assert info_dict.get('_type', 'video') == 'video'
2026
2027 info_dict.setdefault('__postprocessors', [])
2028
2029 max_downloads = self.params.get('max_downloads')
2030 if max_downloads is not None:
2031 if self._num_downloads >= int(max_downloads):
2032 raise MaxDownloadsReached()
2033
2034 # TODO: backward compatibility, to be removed
2035 info_dict['fulltitle'] = info_dict['title']
2036
2037 if 'format' not in info_dict:
2038 info_dict['format'] = info_dict['ext']
2039
2040 if self._match_entry(info_dict, incomplete=False) is not None:
2041 return
2042
2043 self._num_downloads += 1
2044
2045 info_dict = self.pre_process(info_dict)
2046
2047 info_dict['_filename'] = full_filename = self.prepare_filename(info_dict, warn=True)
2048 temp_filename = self.prepare_filename(info_dict, 'temp')
2049 files_to_move = {}
2050 skip_dl = self.params.get('skip_download', False)
2051
2052 # Forced printings
2053 self.__forced_printings(info_dict, full_filename, incomplete=False)
2054
2055 if self.params.get('simulate', False):
2056 if self.params.get('force_write_download_archive', False):
2057 self.record_download_archive(info_dict)
2058
2059 # Do nothing else if in simulate mode
2060 return
2061
2062 if full_filename is None:
2063 return
2064
2065 def ensure_dir_exists(path):
2066 return make_dir(path, self.report_error)
2067
2068 if not ensure_dir_exists(encodeFilename(full_filename)):
2069 return
2070 if not ensure_dir_exists(encodeFilename(temp_filename)):
2071 return
2072
2073 if self.params.get('writedescription', False):
2074 descfn = self.prepare_filename(info_dict, 'description')
2075 if not ensure_dir_exists(encodeFilename(descfn)):
2076 return
2077 if not self.params.get('overwrites', True) and os.path.exists(encodeFilename(descfn)):
2078 self.to_screen('[info] Video description is already present')
2079 elif info_dict.get('description') is None:
2080 self.report_warning('There\'s no description to write.')
2081 else:
2082 try:
2083 self.to_screen('[info] Writing video description to: ' + descfn)
2084 with io.open(encodeFilename(descfn), 'w', encoding='utf-8') as descfile:
2085 descfile.write(info_dict['description'])
2086 except (OSError, IOError):
2087 self.report_error('Cannot write description file ' + descfn)
2088 return
2089
2090 if self.params.get('writeannotations', False):
2091 annofn = self.prepare_filename(info_dict, 'annotation')
2092 if not ensure_dir_exists(encodeFilename(annofn)):
2093 return
2094 if not self.params.get('overwrites', True) and os.path.exists(encodeFilename(annofn)):
2095 self.to_screen('[info] Video annotations are already present')
2096 elif not info_dict.get('annotations'):
2097 self.report_warning('There are no annotations to write.')
2098 else:
2099 try:
2100 self.to_screen('[info] Writing video annotations to: ' + annofn)
2101 with io.open(encodeFilename(annofn), 'w', encoding='utf-8') as annofile:
2102 annofile.write(info_dict['annotations'])
2103 except (KeyError, TypeError):
2104 self.report_warning('There are no annotations to write.')
2105 except (OSError, IOError):
2106 self.report_error('Cannot write annotations file: ' + annofn)
2107 return
2108
2109 def dl(name, info, subtitle=False):
2110 fd = get_suitable_downloader(info, self.params)(self, self.params)
2111 for ph in self._progress_hooks:
2112 fd.add_progress_hook(ph)
2113 if self.params.get('verbose'):
2114 self.to_screen('[debug] Invoking downloader on %r' % info.get('url'))
2115 return fd.download(name, info, subtitle)
2116
2117 subtitles_are_requested = any([self.params.get('writesubtitles', False),
2118 self.params.get('writeautomaticsub')])
2119
2120 if subtitles_are_requested and info_dict.get('requested_subtitles'):
2121 # subtitles download errors are already managed as troubles in relevant IE
2122 # that way it will silently go on when used with unsupporting IE
2123 subtitles = info_dict['requested_subtitles']
2124 # ie = self.get_info_extractor(info_dict['extractor_key'])
2125 for sub_lang, sub_info in subtitles.items():
2126 sub_format = sub_info['ext']
2127 sub_fn = self.prepare_filename(info_dict, 'subtitle')
2128 sub_filename = subtitles_filename(
2129 temp_filename if not skip_dl else sub_fn,
2130 sub_lang, sub_format, info_dict.get('ext'))
2131 sub_filename_final = subtitles_filename(sub_fn, sub_lang, sub_format, info_dict.get('ext'))
2132 if not self.params.get('overwrites', True) and os.path.exists(encodeFilename(sub_filename)):
2133 self.to_screen('[info] Video subtitle %s.%s is already present' % (sub_lang, sub_format))
2134 files_to_move[sub_filename] = sub_filename_final
2135 else:
2136 self.to_screen('[info] Writing video subtitles to: ' + sub_filename)
2137 if sub_info.get('data') is not None:
2138 try:
2139 # Use newline='' to prevent conversion of newline characters
2140 # See https://github.com/ytdl-org/youtube-dl/issues/10268
2141 with io.open(encodeFilename(sub_filename), 'w', encoding='utf-8', newline='') as subfile:
2142 subfile.write(sub_info['data'])
2143 files_to_move[sub_filename] = sub_filename_final
2144 except (OSError, IOError):
2145 self.report_error('Cannot write subtitles file ' + sub_filename)
2146 return
2147 else:
2148 try:
2149 dl(sub_filename, sub_info, subtitle=True)
2150 '''
2151 if self.params.get('sleep_interval_subtitles', False):
2152 dl(sub_filename, sub_info)
2153 else:
2154 sub_data = ie._request_webpage(
2155 sub_info['url'], info_dict['id'], note=False).read()
2156 with io.open(encodeFilename(sub_filename), 'wb') as subfile:
2157 subfile.write(sub_data)
2158 '''
2159 files_to_move[sub_filename] = sub_filename_final
2160 except (ExtractorError, IOError, OSError, ValueError, compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
2161 self.report_warning('Unable to download subtitle for "%s": %s' %
2162 (sub_lang, error_to_compat_str(err)))
2163 continue
2164
2165 if skip_dl:
2166 if self.params.get('convertsubtitles', False):
2167 # subconv = FFmpegSubtitlesConvertorPP(self, format=self.params.get('convertsubtitles'))
2168 filename_real_ext = os.path.splitext(full_filename)[1][1:]
2169 filename_wo_ext = (
2170 os.path.splitext(full_filename)[0]
2171 if filename_real_ext == info_dict['ext']
2172 else full_filename)
2173 afilename = '%s.%s' % (filename_wo_ext, self.params.get('convertsubtitles'))
2174 # if subconv.available:
2175 # info_dict['__postprocessors'].append(subconv)
2176 if os.path.exists(encodeFilename(afilename)):
2177 self.to_screen(
2178 '[download] %s has already been downloaded and '
2179 'converted' % afilename)
2180 else:
2181 try:
2182 self.post_process(full_filename, info_dict, files_to_move)
2183 except PostProcessingError as err:
2184 self.report_error('Postprocessing: %s' % str(err))
2185 return
2186
2187 if self.params.get('writeinfojson', False):
2188 infofn = self.prepare_filename(info_dict, 'infojson')
2189 if not ensure_dir_exists(encodeFilename(infofn)):
2190 return
2191 if not self.params.get('overwrites', True) and os.path.exists(encodeFilename(infofn)):
2192 self.to_screen('[info] Video metadata is already present')
2193 else:
2194 self.to_screen('[info] Writing video metadata as JSON to: ' + infofn)
2195 try:
2196 write_json_file(self.filter_requested_info(info_dict), infofn)
2197 except (OSError, IOError):
2198 self.report_error('Cannot write video metadata to JSON file ' + infofn)
2199 return
2200 info_dict['__infojson_filename'] = infofn
2201
2202 thumbfn = self.prepare_filename(info_dict, 'thumbnail')
2203 thumb_fn_temp = temp_filename if not skip_dl else thumbfn
2204 for thumb_ext in self._write_thumbnails(info_dict, thumb_fn_temp):
2205 thumb_filename_temp = replace_extension(thumb_fn_temp, thumb_ext, info_dict.get('ext'))
2206 thumb_filename = replace_extension(thumbfn, thumb_ext, info_dict.get('ext'))
2207 files_to_move[thumb_filename_temp] = info_dict['__thumbnail_filename'] = thumb_filename
2208
2209 # Write internet shortcut files
2210 url_link = webloc_link = desktop_link = False
2211 if self.params.get('writelink', False):
2212 if sys.platform == "darwin": # macOS.
2213 webloc_link = True
2214 elif sys.platform.startswith("linux"):
2215 desktop_link = True
2216 else: # if sys.platform in ['win32', 'cygwin']:
2217 url_link = True
2218 if self.params.get('writeurllink', False):
2219 url_link = True
2220 if self.params.get('writewebloclink', False):
2221 webloc_link = True
2222 if self.params.get('writedesktoplink', False):
2223 desktop_link = True
2224
2225 if url_link or webloc_link or desktop_link:
2226 if 'webpage_url' not in info_dict:
2227 self.report_error('Cannot write internet shortcut file because the "webpage_url" field is missing in the media information')
2228 return
2229 ascii_url = iri_to_uri(info_dict['webpage_url'])
2230
2231 def _write_link_file(extension, template, newline, embed_filename):
2232 linkfn = replace_extension(full_filename, extension, info_dict.get('ext'))
2233 if self.params.get('overwrites', True) and os.path.exists(encodeFilename(linkfn)):
2234 self.to_screen('[info] Internet shortcut is already present')
2235 else:
2236 try:
2237 self.to_screen('[info] Writing internet shortcut to: ' + linkfn)
2238 with io.open(encodeFilename(to_high_limit_path(linkfn)), 'w', encoding='utf-8', newline=newline) as linkfile:
2239 template_vars = {'url': ascii_url}
2240 if embed_filename:
2241 template_vars['filename'] = linkfn[:-(len(extension) + 1)]
2242 linkfile.write(template % template_vars)
2243 except (OSError, IOError):
2244 self.report_error('Cannot write internet shortcut ' + linkfn)
2245 return False
2246 return True
2247
2248 if url_link:
2249 if not _write_link_file('url', DOT_URL_LINK_TEMPLATE, '\r\n', embed_filename=False):
2250 return
2251 if webloc_link:
2252 if not _write_link_file('webloc', DOT_WEBLOC_LINK_TEMPLATE, '\n', embed_filename=False):
2253 return
2254 if desktop_link:
2255 if not _write_link_file('desktop', DOT_DESKTOP_LINK_TEMPLATE, '\n', embed_filename=True):
2256 return
2257
2258 # Download
2259 must_record_download_archive = False
2260 if not skip_dl:
2261 try:
2262
2263 def existing_file(*filepaths):
2264 ext = info_dict.get('ext')
2265 final_ext = self.params.get('final_ext', ext)
2266 existing_files = []
2267 for file in orderedSet(filepaths):
2268 if final_ext != ext:
2269 converted = replace_extension(file, final_ext, ext)
2270 if os.path.exists(encodeFilename(converted)):
2271 existing_files.append(converted)
2272 if os.path.exists(encodeFilename(file)):
2273 existing_files.append(file)
2274
2275 if not existing_files or self.params.get('overwrites', False):
2276 for file in orderedSet(existing_files):
2277 self.report_file_delete(file)
2278 os.remove(encodeFilename(file))
2279 return None
2280
2281 self.report_file_already_downloaded(existing_files[0])
2282 info_dict['ext'] = os.path.splitext(existing_files[0])[1][1:]
2283 return existing_files[0]
2284
2285 success = True
2286 if info_dict.get('requested_formats') is not None:
2287 downloaded = []
2288 merger = FFmpegMergerPP(self)
2289 if not merger.available:
2290 postprocessors = []
2291 self.report_warning('You have requested multiple '
2292 'formats but ffmpeg is not installed.'
2293 ' The formats won\'t be merged.')
2294 else:
2295 postprocessors = [merger]
2296
2297 def compatible_formats(formats):
2298 # TODO: some formats actually allow this (mkv, webm, ogg, mp4), but not all of them.
2299 video_formats = [format for format in formats if format.get('vcodec') != 'none']
2300 audio_formats = [format for format in formats if format.get('acodec') != 'none']
2301 if len(video_formats) > 2 or len(audio_formats) > 2:
2302 return False
2303
2304 # Check extension
2305 exts = set(format.get('ext') for format in formats)
2306 COMPATIBLE_EXTS = (
2307 set(('mp3', 'mp4', 'm4a', 'm4p', 'm4b', 'm4r', 'm4v', 'ismv', 'isma')),
2308 set(('webm',)),
2309 )
2310 for ext_sets in COMPATIBLE_EXTS:
2311 if ext_sets.issuperset(exts):
2312 return True
2313 # TODO: Check acodec/vcodec
2314 return False
2315
2316 requested_formats = info_dict['requested_formats']
2317 old_ext = info_dict['ext']
2318 if self.params.get('merge_output_format') is None and not compatible_formats(requested_formats):
2319 info_dict['ext'] = 'mkv'
2320 self.report_warning(
2321 'Requested formats are incompatible for merge and will be merged into mkv.')
2322
2323 def correct_ext(filename):
2324 filename_real_ext = os.path.splitext(filename)[1][1:]
2325 filename_wo_ext = (
2326 os.path.splitext(filename)[0]
2327 if filename_real_ext == old_ext
2328 else filename)
2329 return '%s.%s' % (filename_wo_ext, info_dict['ext'])
2330
2331 # Ensure filename always has a correct extension for successful merge
2332 full_filename = correct_ext(full_filename)
2333 temp_filename = correct_ext(temp_filename)
2334 dl_filename = existing_file(full_filename, temp_filename)
2335 if dl_filename is None:
2336 for f in requested_formats:
2337 new_info = dict(info_dict)
2338 new_info.update(f)
2339 fname = prepend_extension(
2340 self.prepare_filename(new_info, 'temp'),
2341 'f%s' % f['format_id'], new_info['ext'])
2342 if not ensure_dir_exists(fname):
2343 return
2344 downloaded.append(fname)
2345 partial_success, real_download = dl(fname, new_info)
2346 success = success and partial_success
2347 info_dict['__postprocessors'] = postprocessors
2348 info_dict['__files_to_merge'] = downloaded
2349 # Even if there were no downloads, it is being merged only now
2350 info_dict['__real_download'] = True
2351 else:
2352 # Just a single file
2353 dl_filename = existing_file(full_filename, temp_filename)
2354 if dl_filename is None:
2355 success, real_download = dl(temp_filename, info_dict)
2356 info_dict['__real_download'] = real_download
2357
2358 dl_filename = dl_filename or temp_filename
2359 info_dict['__finaldir'] = os.path.dirname(os.path.abspath(encodeFilename(full_filename)))
2360
2361 except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
2362 self.report_error('unable to download video data: %s' % error_to_compat_str(err))
2363 return
2364 except (OSError, IOError) as err:
2365 raise UnavailableVideoError(err)
2366 except (ContentTooShortError, ) as err:
2367 self.report_error('content too short (expected %s bytes and served %s)' % (err.expected, err.downloaded))
2368 return
2369
2370 if success and full_filename != '-':
2371 # Fixup content
2372 fixup_policy = self.params.get('fixup')
2373 if fixup_policy is None:
2374 fixup_policy = 'detect_or_warn'
2375
2376 INSTALL_FFMPEG_MESSAGE = 'Install ffmpeg to fix this automatically.'
2377
2378 stretched_ratio = info_dict.get('stretched_ratio')
2379 if stretched_ratio is not None and stretched_ratio != 1:
2380 if fixup_policy == 'warn':
2381 self.report_warning('%s: Non-uniform pixel ratio (%s)' % (
2382 info_dict['id'], stretched_ratio))
2383 elif fixup_policy == 'detect_or_warn':
2384 stretched_pp = FFmpegFixupStretchedPP(self)
2385 if stretched_pp.available:
2386 info_dict['__postprocessors'].append(stretched_pp)
2387 else:
2388 self.report_warning(
2389 '%s: Non-uniform pixel ratio (%s). %s'
2390 % (info_dict['id'], stretched_ratio, INSTALL_FFMPEG_MESSAGE))
2391 else:
2392 assert fixup_policy in ('ignore', 'never')
2393
2394 if (info_dict.get('requested_formats') is None
2395 and info_dict.get('container') == 'm4a_dash'
2396 and info_dict.get('ext') == 'm4a'):
2397 if fixup_policy == 'warn':
2398 self.report_warning(
2399 '%s: writing DASH m4a. '
2400 'Only some players support this container.'
2401 % info_dict['id'])
2402 elif fixup_policy == 'detect_or_warn':
2403 fixup_pp = FFmpegFixupM4aPP(self)
2404 if fixup_pp.available:
2405 info_dict['__postprocessors'].append(fixup_pp)
2406 else:
2407 self.report_warning(
2408 '%s: writing DASH m4a. '
2409 'Only some players support this container. %s'
2410 % (info_dict['id'], INSTALL_FFMPEG_MESSAGE))
2411 else:
2412 assert fixup_policy in ('ignore', 'never')
2413
2414 if (info_dict.get('protocol') == 'm3u8_native'
2415 or info_dict.get('protocol') == 'm3u8'
2416 and self.params.get('hls_prefer_native')):
2417 if fixup_policy == 'warn':
2418 self.report_warning('%s: malformed AAC bitstream detected.' % (
2419 info_dict['id']))
2420 elif fixup_policy == 'detect_or_warn':
2421 fixup_pp = FFmpegFixupM3u8PP(self)
2422 if fixup_pp.available:
2423 info_dict['__postprocessors'].append(fixup_pp)
2424 else:
2425 self.report_warning(
2426 '%s: malformed AAC bitstream detected. %s'
2427 % (info_dict['id'], INSTALL_FFMPEG_MESSAGE))
2428 else:
2429 assert fixup_policy in ('ignore', 'never')
2430
2431 try:
2432 self.post_process(dl_filename, info_dict, files_to_move)
2433 except PostProcessingError as err:
2434 self.report_error('Postprocessing: %s' % str(err))
2435 return
2436 try:
2437 for ph in self._post_hooks:
2438 ph(full_filename)
2439 except Exception as err:
2440 self.report_error('post hooks: %s' % str(err))
2441 return
2442 must_record_download_archive = True
2443
2444 if must_record_download_archive or self.params.get('force_write_download_archive', False):
2445 self.record_download_archive(info_dict)
2446 max_downloads = self.params.get('max_downloads')
2447 if max_downloads is not None and self._num_downloads >= int(max_downloads):
2448 raise MaxDownloadsReached()
2449
2450 def download(self, url_list):
2451 """Download a given list of URLs."""
2452 outtmpl = self.outtmpl_dict['default']
2453 if (len(url_list) > 1
2454 and outtmpl != '-'
2455 and '%' not in outtmpl
2456 and self.params.get('max_downloads') != 1):
2457 raise SameFileError(outtmpl)
2458
2459 for url in url_list:
2460 try:
2461 # It also downloads the videos
2462 res = self.extract_info(
2463 url, force_generic_extractor=self.params.get('force_generic_extractor', False))
2464 except UnavailableVideoError:
2465 self.report_error('unable to download video')
2466 except MaxDownloadsReached:
2467 self.to_screen('[info] Maximum number of downloaded files reached')
2468 raise
2469 except ExistingVideoReached:
2470 self.to_screen('[info] Encountered a file that is already in the archive, stopping due to --break-on-existing')
2471 raise
2472 except RejectedVideoReached:
2473 self.to_screen('[info] Encountered a file that did not match filter, stopping due to --break-on-reject')
2474 raise
2475 else:
2476 if self.params.get('dump_single_json', False):
2477 self.to_stdout(json.dumps(res))
2478
2479 return self._download_retcode
2480
2481 def download_with_info_file(self, info_filename):
2482 with contextlib.closing(fileinput.FileInput(
2483 [info_filename], mode='r',
2484 openhook=fileinput.hook_encoded('utf-8'))) as f:
2485 # FileInput doesn't have a read method, we can't call json.load
2486 info = self.filter_requested_info(json.loads('\n'.join(f)))
2487 try:
2488 self.process_ie_result(info, download=True)
2489 except DownloadError:
2490 webpage_url = info.get('webpage_url')
2491 if webpage_url is not None:
2492 self.report_warning('The info failed to download, trying with "%s"' % webpage_url)
2493 return self.download([webpage_url])
2494 else:
2495 raise
2496 return self._download_retcode
2497
2498 @staticmethod
2499 def filter_requested_info(info_dict):
2500 fields_to_remove = ('requested_formats', 'requested_subtitles')
2501 return dict(
2502 (k, v) for k, v in info_dict.items()
2503 if (k[0] != '_' or k == '_type') and k not in fields_to_remove)
2504
2505 def run_pp(self, pp, infodict, files_to_move={}):
2506 files_to_delete = []
2507 files_to_delete, infodict = pp.run(infodict)
2508 if not files_to_delete:
2509 return files_to_move, infodict
2510
2511 if self.params.get('keepvideo', False):
2512 for f in files_to_delete:
2513 files_to_move.setdefault(f, '')
2514 else:
2515 for old_filename in set(files_to_delete):
2516 self.to_screen('Deleting original file %s (pass -k to keep)' % old_filename)
2517 try:
2518 os.remove(encodeFilename(old_filename))
2519 except (IOError, OSError):
2520 self.report_warning('Unable to remove downloaded original file')
2521 if old_filename in files_to_move:
2522 del files_to_move[old_filename]
2523 return files_to_move, infodict
2524
2525 def pre_process(self, ie_info):
2526 info = dict(ie_info)
2527 for pp in self._pps['beforedl']:
2528 info = self.run_pp(pp, info)[1]
2529 return info
2530
2531 def post_process(self, filename, ie_info, files_to_move={}):
2532 """Run all the postprocessors on the given file."""
2533 info = dict(ie_info)
2534 info['filepath'] = filename
2535 info['__files_to_move'] = {}
2536
2537 for pp in ie_info.get('__postprocessors', []) + self._pps['normal']:
2538 files_to_move, info = self.run_pp(pp, info, files_to_move)
2539 info = self.run_pp(MoveFilesAfterDownloadPP(self, files_to_move), info)[1]
2540 for pp in self._pps['aftermove']:
2541 info = self.run_pp(pp, info, {})[1]
2542
2543 def _make_archive_id(self, info_dict):
2544 video_id = info_dict.get('id')
2545 if not video_id:
2546 return
2547 # Future-proof against any change in case
2548 # and backwards compatibility with prior versions
2549 extractor = info_dict.get('extractor_key') or info_dict.get('ie_key') # key in a playlist
2550 if extractor is None:
2551 url = str_or_none(info_dict.get('url'))
2552 if not url:
2553 return
2554 # Try to find matching extractor for the URL and take its ie_key
2555 for ie in self._ies:
2556 if ie.suitable(url):
2557 extractor = ie.ie_key()
2558 break
2559 else:
2560 return
2561 return '%s %s' % (extractor.lower(), video_id)
2562
2563 def in_download_archive(self, info_dict):
2564 fn = self.params.get('download_archive')
2565 if fn is None:
2566 return False
2567
2568 vid_id = self._make_archive_id(info_dict)
2569 if not vid_id:
2570 return False # Incomplete video information
2571
2572 return vid_id in self.archive
2573
2574 def record_download_archive(self, info_dict):
2575 fn = self.params.get('download_archive')
2576 if fn is None:
2577 return
2578 vid_id = self._make_archive_id(info_dict)
2579 assert vid_id
2580 with locked_file(fn, 'a', encoding='utf-8') as archive_file:
2581 archive_file.write(vid_id + '\n')
2582 self.archive.add(vid_id)
2583
2584 @staticmethod
2585 def format_resolution(format, default='unknown'):
2586 if format.get('vcodec') == 'none':
2587 return 'audio only'
2588 if format.get('resolution') is not None:
2589 return format['resolution']
2590 if format.get('height') is not None:
2591 if format.get('width') is not None:
2592 res = '%sx%s' % (format['width'], format['height'])
2593 else:
2594 res = '%sp' % format['height']
2595 elif format.get('width') is not None:
2596 res = '%dx?' % format['width']
2597 else:
2598 res = default
2599 return res
2600
2601 def _format_note(self, fdict):
2602 res = ''
2603 if fdict.get('ext') in ['f4f', 'f4m']:
2604 res += '(unsupported) '
2605 if fdict.get('language'):
2606 if res:
2607 res += ' '
2608 res += '[%s] ' % fdict['language']
2609 if fdict.get('format_note') is not None:
2610 res += fdict['format_note'] + ' '
2611 if fdict.get('tbr') is not None:
2612 res += '%4dk ' % fdict['tbr']
2613 if fdict.get('container') is not None:
2614 if res:
2615 res += ', '
2616 res += '%s container' % fdict['container']
2617 if (fdict.get('vcodec') is not None
2618 and fdict.get('vcodec') != 'none'):
2619 if res:
2620 res += ', '
2621 res += fdict['vcodec']
2622 if fdict.get('vbr') is not None:
2623 res += '@'
2624 elif fdict.get('vbr') is not None and fdict.get('abr') is not None:
2625 res += 'video@'
2626 if fdict.get('vbr') is not None:
2627 res += '%4dk' % fdict['vbr']
2628 if fdict.get('fps') is not None:
2629 if res:
2630 res += ', '
2631 res += '%sfps' % fdict['fps']
2632 if fdict.get('acodec') is not None:
2633 if res:
2634 res += ', '
2635 if fdict['acodec'] == 'none':
2636 res += 'video only'
2637 else:
2638 res += '%-5s' % fdict['acodec']
2639 elif fdict.get('abr') is not None:
2640 if res:
2641 res += ', '
2642 res += 'audio'
2643 if fdict.get('abr') is not None:
2644 res += '@%3dk' % fdict['abr']
2645 if fdict.get('asr') is not None:
2646 res += ' (%5dHz)' % fdict['asr']
2647 if fdict.get('filesize') is not None:
2648 if res:
2649 res += ', '
2650 res += format_bytes(fdict['filesize'])
2651 elif fdict.get('filesize_approx') is not None:
2652 if res:
2653 res += ', '
2654 res += '~' + format_bytes(fdict['filesize_approx'])
2655 return res
2656
2657 def _format_note_table(self, f):
2658 def join_fields(*vargs):
2659 return ', '.join((val for val in vargs if val != ''))
2660
2661 return join_fields(
2662 'UNSUPPORTED' if f.get('ext') in ('f4f', 'f4m') else '',
2663 format_field(f, 'language', '[%s]'),
2664 format_field(f, 'format_note'),
2665 format_field(f, 'container', ignore=(None, f.get('ext'))),
2666 format_field(f, 'asr', '%5dHz'))
2667
2668 def list_formats(self, info_dict):
2669 formats = info_dict.get('formats', [info_dict])
2670 new_format = self.params.get('listformats_table', False)
2671 if new_format:
2672 table = [
2673 [
2674 format_field(f, 'format_id'),
2675 format_field(f, 'ext'),
2676 self.format_resolution(f),
2677 format_field(f, 'fps', '%d'),
2678 '|',
2679 format_field(f, 'filesize', ' %s', func=format_bytes) + format_field(f, 'filesize_approx', '~%s', func=format_bytes),
2680 format_field(f, 'tbr', '%4dk'),
2681 f.get('protocol').replace('http_dash_segments', 'dash').replace("native", "n"),
2682 '|',
2683 format_field(f, 'vcodec', default='unknown').replace('none', ''),
2684 format_field(f, 'vbr', '%4dk'),
2685 format_field(f, 'acodec', default='unknown').replace('none', ''),
2686 format_field(f, 'abr', '%3dk'),
2687 format_field(f, 'asr', '%5dHz'),
2688 self._format_note_table(f)]
2689 for f in formats
2690 if f.get('preference') is None or f['preference'] >= -1000]
2691 header_line = ['ID', 'EXT', 'RESOLUTION', 'FPS', '|', ' FILESIZE', ' TBR', 'PROTO',
2692 '|', 'VCODEC', ' VBR', 'ACODEC', ' ABR', ' ASR', 'NOTE']
2693 else:
2694 table = [
2695 [
2696 format_field(f, 'format_id'),
2697 format_field(f, 'ext'),
2698 self.format_resolution(f),
2699 self._format_note(f)]
2700 for f in formats
2701 if f.get('preference') is None or f['preference'] >= -1000]
2702 header_line = ['format code', 'extension', 'resolution', 'note']
2703
2704 # if len(formats) > 1:
2705 # table[-1][-1] += (' ' if table[-1][-1] else '') + '(best)'
2706 self.to_screen(
2707 '[info] Available formats for %s:\n%s' % (info_dict['id'], render_table(
2708 header_line,
2709 table,
2710 delim=new_format,
2711 extraGap=(0 if new_format else 1),
2712 hideEmpty=new_format)))
2713
2714 def list_thumbnails(self, info_dict):
2715 thumbnails = info_dict.get('thumbnails')
2716 if not thumbnails:
2717 self.to_screen('[info] No thumbnails present for %s' % info_dict['id'])
2718 return
2719
2720 self.to_screen(
2721 '[info] Thumbnails for %s:' % info_dict['id'])
2722 self.to_screen(render_table(
2723 ['ID', 'width', 'height', 'URL'],
2724 [[t['id'], t.get('width', 'unknown'), t.get('height', 'unknown'), t['url']] for t in thumbnails]))
2725
2726 def list_subtitles(self, video_id, subtitles, name='subtitles'):
2727 if not subtitles:
2728 self.to_screen('%s has no %s' % (video_id, name))
2729 return
2730 self.to_screen(
2731 'Available %s for %s:' % (name, video_id))
2732 self.to_screen(render_table(
2733 ['Language', 'formats'],
2734 [[lang, ', '.join(f['ext'] for f in reversed(formats))]
2735 for lang, formats in subtitles.items()]))
2736
2737 def urlopen(self, req):
2738 """ Start an HTTP download """
2739 if isinstance(req, compat_basestring):
2740 req = sanitized_Request(req)
2741 return self._opener.open(req, timeout=self._socket_timeout)
2742
2743 def print_debug_header(self):
2744 if not self.params.get('verbose'):
2745 return
2746
2747 if type('') is not compat_str:
2748 # Python 2.6 on SLES11 SP1 (https://github.com/ytdl-org/youtube-dl/issues/3326)
2749 self.report_warning(
2750 'Your Python is broken! Update to a newer and supported version')
2751
2752 stdout_encoding = getattr(
2753 sys.stdout, 'encoding', 'missing (%s)' % type(sys.stdout).__name__)
2754 encoding_str = (
2755 '[debug] Encodings: locale %s, fs %s, out %s, pref %s\n' % (
2756 locale.getpreferredencoding(),
2757 sys.getfilesystemencoding(),
2758 stdout_encoding,
2759 self.get_encoding()))
2760 write_string(encoding_str, encoding=None)
2761
2762 self._write_string('[debug] yt-dlp version %s\n' % __version__)
2763 if _LAZY_LOADER:
2764 self._write_string('[debug] Lazy loading extractors enabled\n')
2765 if _PLUGIN_CLASSES:
2766 self._write_string(
2767 '[debug] Plugin Extractors: %s\n' % [ie.ie_key() for ie in _PLUGIN_CLASSES])
2768 try:
2769 sp = subprocess.Popen(
2770 ['git', 'rev-parse', '--short', 'HEAD'],
2771 stdout=subprocess.PIPE, stderr=subprocess.PIPE,
2772 cwd=os.path.dirname(os.path.abspath(__file__)))
2773 out, err = process_communicate_or_kill(sp)
2774 out = out.decode().strip()
2775 if re.match('[0-9a-f]+', out):
2776 self._write_string('[debug] Git HEAD: %s\n' % out)
2777 except Exception:
2778 try:
2779 sys.exc_clear()
2780 except Exception:
2781 pass
2782
2783 def python_implementation():
2784 impl_name = platform.python_implementation()
2785 if impl_name == 'PyPy' and hasattr(sys, 'pypy_version_info'):
2786 return impl_name + ' version %d.%d.%d' % sys.pypy_version_info[:3]
2787 return impl_name
2788
2789 self._write_string('[debug] Python version %s (%s) - %s\n' % (
2790 platform.python_version(), python_implementation(),
2791 platform_name()))
2792
2793 exe_versions = FFmpegPostProcessor.get_versions(self)
2794 exe_versions['rtmpdump'] = rtmpdump_version()
2795 exe_versions['phantomjs'] = PhantomJSwrapper._version()
2796 exe_str = ', '.join(
2797 '%s %s' % (exe, v)
2798 for exe, v in sorted(exe_versions.items())
2799 if v
2800 )
2801 if not exe_str:
2802 exe_str = 'none'
2803 self._write_string('[debug] exe versions: %s\n' % exe_str)
2804
2805 proxy_map = {}
2806 for handler in self._opener.handlers:
2807 if hasattr(handler, 'proxies'):
2808 proxy_map.update(handler.proxies)
2809 self._write_string('[debug] Proxy map: ' + compat_str(proxy_map) + '\n')
2810
2811 if self.params.get('call_home', False):
2812 ipaddr = self.urlopen('https://yt-dl.org/ip').read().decode('utf-8')
2813 self._write_string('[debug] Public IP address: %s\n' % ipaddr)
2814 return
2815 latest_version = self.urlopen(
2816 'https://yt-dl.org/latest/version').read().decode('utf-8')
2817 if version_tuple(latest_version) > version_tuple(__version__):
2818 self.report_warning(
2819 'You are using an outdated version (newest version: %s)! '
2820 'See https://yt-dl.org/update if you need help updating.' %
2821 latest_version)
2822
2823 def _setup_opener(self):
2824 timeout_val = self.params.get('socket_timeout')
2825 self._socket_timeout = 600 if timeout_val is None else float(timeout_val)
2826
2827 opts_cookiefile = self.params.get('cookiefile')
2828 opts_proxy = self.params.get('proxy')
2829
2830 if opts_cookiefile is None:
2831 self.cookiejar = compat_cookiejar.CookieJar()
2832 else:
2833 opts_cookiefile = expand_path(opts_cookiefile)
2834 self.cookiejar = YoutubeDLCookieJar(opts_cookiefile)
2835 if os.access(opts_cookiefile, os.R_OK):
2836 self.cookiejar.load(ignore_discard=True, ignore_expires=True)
2837
2838 cookie_processor = YoutubeDLCookieProcessor(self.cookiejar)
2839 if opts_proxy is not None:
2840 if opts_proxy == '':
2841 proxies = {}
2842 else:
2843 proxies = {'http': opts_proxy, 'https': opts_proxy}
2844 else:
2845 proxies = compat_urllib_request.getproxies()
2846 # Set HTTPS proxy to HTTP one if given (https://github.com/ytdl-org/youtube-dl/issues/805)
2847 if 'http' in proxies and 'https' not in proxies:
2848 proxies['https'] = proxies['http']
2849 proxy_handler = PerRequestProxyHandler(proxies)
2850
2851 debuglevel = 1 if self.params.get('debug_printtraffic') else 0
2852 https_handler = make_HTTPS_handler(self.params, debuglevel=debuglevel)
2853 ydlh = YoutubeDLHandler(self.params, debuglevel=debuglevel)
2854 redirect_handler = YoutubeDLRedirectHandler()
2855 data_handler = compat_urllib_request_DataHandler()
2856
2857 # When passing our own FileHandler instance, build_opener won't add the
2858 # default FileHandler and allows us to disable the file protocol, which
2859 # can be used for malicious purposes (see
2860 # https://github.com/ytdl-org/youtube-dl/issues/8227)
2861 file_handler = compat_urllib_request.FileHandler()
2862
2863 def file_open(*args, **kwargs):
2864 raise compat_urllib_error.URLError('file:// scheme is explicitly disabled in youtube-dlc for security reasons')
2865 file_handler.file_open = file_open
2866
2867 opener = compat_urllib_request.build_opener(
2868 proxy_handler, https_handler, cookie_processor, ydlh, redirect_handler, data_handler, file_handler)
2869
2870 # Delete the default user-agent header, which would otherwise apply in
2871 # cases where our custom HTTP handler doesn't come into play
2872 # (See https://github.com/ytdl-org/youtube-dl/issues/1309 for details)
2873 opener.addheaders = []
2874 self._opener = opener
2875
2876 def encode(self, s):
2877 if isinstance(s, bytes):
2878 return s # Already encoded
2879
2880 try:
2881 return s.encode(self.get_encoding())
2882 except UnicodeEncodeError as err:
2883 err.reason = err.reason + '. Check your system encoding configuration or use the --encoding option.'
2884 raise
2885
2886 def get_encoding(self):
2887 encoding = self.params.get('encoding')
2888 if encoding is None:
2889 encoding = preferredencoding()
2890 return encoding
2891
2892 def _write_thumbnails(self, info_dict, filename): # return the extensions
2893 if self.params.get('writethumbnail', False):
2894 thumbnails = info_dict.get('thumbnails')
2895 if thumbnails:
2896 thumbnails = [thumbnails[-1]]
2897 elif self.params.get('write_all_thumbnails', False):
2898 thumbnails = info_dict.get('thumbnails') or []
2899 else:
2900 thumbnails = []
2901
2902 ret = []
2903 for t in thumbnails:
2904 thumb_ext = determine_ext(t['url'], 'jpg')
2905 suffix = '%s.' % t['id'] if len(thumbnails) > 1 else ''
2906 thumb_display_id = '%s ' % t['id'] if len(thumbnails) > 1 else ''
2907 t['filename'] = thumb_filename = replace_extension(filename, suffix + thumb_ext, info_dict.get('ext'))
2908
2909 if not self.params.get('overwrites', True) and os.path.exists(encodeFilename(thumb_filename)):
2910 ret.append(suffix + thumb_ext)
2911 self.to_screen('[%s] %s: Thumbnail %sis already present' %
2912 (info_dict['extractor'], info_dict['id'], thumb_display_id))
2913 else:
2914 self.to_screen('[%s] %s: Downloading thumbnail %s...' %
2915 (info_dict['extractor'], info_dict['id'], thumb_display_id))
2916 try:
2917 uf = self.urlopen(t['url'])
2918 with open(encodeFilename(thumb_filename), 'wb') as thumbf:
2919 shutil.copyfileobj(uf, thumbf)
2920 ret.append(suffix + thumb_ext)
2921 self.to_screen('[%s] %s: Writing thumbnail %sto: %s' %
2922 (info_dict['extractor'], info_dict['id'], thumb_display_id, thumb_filename))
2923 except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
2924 self.report_warning('Unable to download thumbnail "%s": %s' %
2925 (t['url'], error_to_compat_str(err)))
2926 return ret