]> jfr.im git - yt-dlp.git/blame - yt_dlp/YoutubeDL.py
[fragment] Ensure the file is closed on error
[yt-dlp.git] / yt_dlp / YoutubeDL.py
CommitLineData
8222d8de 1#!/usr/bin/env python
dcdb292f 2# coding: utf-8
8222d8de 3
6febd1c1 4from __future__ import absolute_import, unicode_literals
8222d8de 5
26e63931 6import collections
31bd3925 7import contextlib
317f7ab6 8import copy
9d2ecdbc 9import datetime
c1c9a79c 10import errno
31bd3925 11import fileinput
8222d8de 12import io
b82f815f 13import itertools
8694c600 14import json
62fec3b2 15import locale
083c9df9 16import operator
8222d8de 17import os
dca08720 18import platform
8222d8de
JMF
19import re
20import shutil
dca08720 21import subprocess
8222d8de
JMF
22import socket
23import sys
24import time
67134eab 25import tokenize
8222d8de 26import traceback
75822ca7 27import random
8222d8de 28
961ea474 29from string import ascii_letters
e5813e53 30from zipimport import zipimporter
961ea474 31
8c25f81b 32from .compat import (
82d8a8b6 33 compat_basestring,
dca08720 34 compat_cookiejar,
003c69a8 35 compat_get_terminal_size,
ce02ed60 36 compat_http_client,
4f026faf 37 compat_kwargs,
d0d9ade4 38 compat_numeric_types,
e9c0cdd3 39 compat_os_name,
ce02ed60 40 compat_str,
67134eab 41 compat_tokenize_tokenize,
ce02ed60
PH
42 compat_urllib_error,
43 compat_urllib_request,
8b172c2e 44 compat_urllib_request_DataHandler,
8c25f81b
PH
45)
46from .utils import (
eedb7ba5
S
47 age_restricted,
48 args_to_str,
ce02ed60
PH
49 ContentTooShortError,
50 date_from_str,
51 DateRange,
acd69589 52 DEFAULT_OUTTMPL,
de6000d9 53 OUTTMPL_TYPES,
ce02ed60 54 determine_ext,
b5559424 55 determine_protocol,
732044af 56 DOT_DESKTOP_LINK_TEMPLATE,
57 DOT_URL_LINK_TEMPLATE,
58 DOT_WEBLOC_LINK_TEMPLATE,
ce02ed60 59 DownloadError,
c0384f22 60 encode_compat_str,
ce02ed60 61 encodeFilename,
9b9c5355 62 error_to_compat_str,
498f5606 63 EntryNotInPlaylist,
8b0d7497 64 ExistingVideoReached,
590bc6f6 65 expand_path,
ce02ed60 66 ExtractorError,
e29663c6 67 float_or_none,
02dbf93f 68 format_bytes,
76d321f6 69 format_field,
143db31d 70 FORMAT_RE,
525ef922 71 formatSeconds,
773f291d 72 GeoRestrictedError,
c9969434 73 int_or_none,
732044af 74 iri_to_uri,
773f291d 75 ISO3166Utils,
ce02ed60 76 locked_file,
0202b52a 77 make_dir,
dca08720 78 make_HTTPS_handler,
ce02ed60 79 MaxDownloadsReached,
cd6fc19e 80 orderedSet,
b7ab0590 81 PagedList,
083c9df9 82 parse_filesize,
91410c9b 83 PerRequestProxyHandler,
dca08720 84 platform_name,
eedb7ba5 85 PostProcessingError,
ce02ed60 86 preferredencoding,
eedb7ba5 87 prepend_extension,
51fb4995 88 register_socks_protocols,
cfb56d1a 89 render_table,
eedb7ba5 90 replace_extension,
8b0d7497 91 RejectedVideoReached,
ce02ed60
PH
92 SameFileError,
93 sanitize_filename,
1bb5c511 94 sanitize_path,
dcf77cf1 95 sanitize_url,
67dda517 96 sanitized_Request,
e5660ee6 97 std_headers,
1211bb6d 98 str_or_none,
e29663c6 99 strftime_or_none,
ce02ed60 100 subtitles_filename,
732044af 101 to_high_limit_path,
a439a3a4 102 traverse_dict,
ce02ed60 103 UnavailableVideoError,
29eb5174 104 url_basename,
58b1f00d 105 version_tuple,
ce02ed60
PH
106 write_json_file,
107 write_string,
1bab3437 108 YoutubeDLCookieJar,
6a3f4c3f 109 YoutubeDLCookieProcessor,
dca08720 110 YoutubeDLHandler,
fca6dba8 111 YoutubeDLRedirectHandler,
f5b1bca9 112 process_communicate_or_kill,
ce02ed60 113)
a0e07d31 114from .cache import Cache
52a8a1e1 115from .extractor import (
116 gen_extractor_classes,
117 get_info_extractor,
118 _LAZY_LOADER,
119 _PLUGIN_CLASSES
120)
4c54b89e 121from .extractor.openload import PhantomJSwrapper
52a8a1e1 122from .downloader import (
123 get_suitable_downloader,
124 shorten_protocol_name
125)
4c83c967 126from .downloader.rtmp import rtmpdump_version
4f026faf 127from .postprocessor import (
f17f8651 128 FFmpegFixupM3u8PP,
62cd676c 129 FFmpegFixupM4aPP,
6271f1ca 130 FFmpegFixupStretchedPP,
4f026faf
PH
131 FFmpegMergerPP,
132 FFmpegPostProcessor,
0202b52a 133 # FFmpegSubtitlesConvertorPP,
4f026faf 134 get_postprocessor,
0202b52a 135 MoveFilesAfterDownloadPP,
4f026faf 136)
dca08720 137from .version import __version__
8222d8de 138
e9c0cdd3
YCH
139if compat_os_name == 'nt':
140 import ctypes
141
2459b6e1 142
8222d8de
JMF
143class YoutubeDL(object):
144 """YoutubeDL class.
145
146 YoutubeDL objects are the ones responsible of downloading the
147 actual video file and writing it to disk if the user has requested
148 it, among some other tasks. In most cases there should be one per
149 program. As, given a video URL, the downloader doesn't know how to
150 extract all the needed information, task that InfoExtractors do, it
151 has to pass the URL to one of them.
152
153 For this, YoutubeDL objects have a method that allows
154 InfoExtractors to be registered in a given order. When it is passed
155 a URL, the YoutubeDL object handles it to the first InfoExtractor it
156 finds that reports being able to handle it. The InfoExtractor extracts
157 all the information about the video or videos the URL refers to, and
158 YoutubeDL process the extracted information, possibly using a File
159 Downloader to download the video.
160
161 YoutubeDL objects accept a lot of parameters. In order not to saturate
162 the object constructor with arguments, it receives a dictionary of
163 options instead. These options are available through the params
164 attribute for the InfoExtractors to use. The YoutubeDL also
165 registers itself as the downloader in charge for the InfoExtractors
166 that are added to it, so this is a "mutual registration".
167
168 Available options:
169
170 username: Username for authentication purposes.
171 password: Password for authentication purposes.
180940e0 172 videopassword: Password for accessing a video.
1da50aa3
S
173 ap_mso: Adobe Pass multiple-system operator identifier.
174 ap_username: Multiple-system operator account username.
175 ap_password: Multiple-system operator account password.
8222d8de
JMF
176 usenetrc: Use netrc for authentication instead.
177 verbose: Print additional info to stdout.
178 quiet: Do not print messages to stdout.
ad8915b7 179 no_warnings: Do not print out anything for warnings.
8222d8de
JMF
180 forceurl: Force printing final URL.
181 forcetitle: Force printing title.
182 forceid: Force printing ID.
183 forcethumbnail: Force printing thumbnail URL.
184 forcedescription: Force printing description.
185 forcefilename: Force printing final filename.
525ef922 186 forceduration: Force printing duration.
8694c600 187 forcejson: Force printing info_dict as JSON.
63e0be34
PH
188 dump_single_json: Force printing the info_dict of the whole playlist
189 (or video) as a single JSON line.
c25228e5 190 force_write_download_archive: Force writing download archive regardless
191 of 'skip_download' or 'simulate'.
8222d8de 192 simulate: Do not download the video files.
eb8a4433 193 format: Video format code. see "FORMAT SELECTION" for more details.
63ad4d43 194 allow_unplayable_formats: Allow unplayable formats to be extracted and downloaded.
b7da73eb 195 ignore_no_formats_error: Ignore "No video formats" error. Usefull for
196 extracting metadata even if the video is not actually
197 available for download (experimental)
c25228e5 198 format_sort: How to sort the video formats. see "Sorting Formats"
199 for more details.
200 format_sort_force: Force the given format_sort. see "Sorting Formats"
201 for more details.
202 allow_multiple_video_streams: Allow multiple video streams to be merged
203 into a single file
204 allow_multiple_audio_streams: Allow multiple audio streams to be merged
205 into a single file
4524baf0 206 paths: Dictionary of output paths. The allowed keys are 'home'
207 'temp' and the keys of OUTTMPL_TYPES (in utils.py)
de6000d9 208 outtmpl: Dictionary of templates for output names. Allowed keys
4524baf0 209 are 'default' and the keys of OUTTMPL_TYPES (in utils.py).
210 A string a also accepted for backward compatibility
a820dc72
RA
211 outtmpl_na_placeholder: Placeholder for unavailable meta fields.
212 restrictfilenames: Do not allow "&" and spaces in file names
213 trim_file_name: Limit length of filename (extension excluded)
4524baf0 214 windowsfilenames: Force the filenames to be windows compatible
a820dc72 215 ignoreerrors: Do not stop on download errors
7a5c1cfe 216 (Default True when running yt-dlp,
a820dc72 217 but False when directly accessing YoutubeDL class)
26e2805c 218 skip_playlist_after_errors: Number of allowed failures until the rest of
219 the playlist is skipped
d22dec74 220 force_generic_extractor: Force downloader to use the generic extractor
0c3d0f51 221 overwrites: Overwrite all video and metadata files if True,
222 overwrite only non-video files if None
223 and don't overwrite any file if False
8222d8de
JMF
224 playliststart: Playlist item to start at.
225 playlistend: Playlist item to end at.
c14e88f0 226 playlist_items: Specific indices of playlist to download.
ff815fe6 227 playlistreverse: Download playlist items in reverse order.
75822ca7 228 playlistrandom: Download playlist items in random order.
8222d8de
JMF
229 matchtitle: Download only matching titles.
230 rejecttitle: Reject downloads for matching titles.
8bf9319e 231 logger: Log messages to a logging.Logger instance.
8222d8de
JMF
232 logtostderr: Log messages to stderr instead of stdout.
233 writedescription: Write the video description to a .description file
234 writeinfojson: Write the video description to a .info.json file
75d43ca0 235 clean_infojson: Remove private fields from the infojson
06167fbb 236 writecomments: Extract video comments. This will not be written to disk
237 unless writeinfojson is also given
1fb07d10 238 writeannotations: Write the video annotations to a .annotations.xml file
8222d8de 239 writethumbnail: Write the thumbnail image to a file
c25228e5 240 allow_playlist_files: Whether to write playlists' description, infojson etc
241 also to disk when using the 'write*' options
ec82d85a 242 write_all_thumbnails: Write all thumbnail formats to files
732044af 243 writelink: Write an internet shortcut file, depending on the
244 current platform (.url/.webloc/.desktop)
245 writeurllink: Write a Windows internet shortcut file (.url)
246 writewebloclink: Write a macOS internet shortcut file (.webloc)
247 writedesktoplink: Write a Linux internet shortcut file (.desktop)
8222d8de 248 writesubtitles: Write the video subtitles to a file
741dd8ea 249 writeautomaticsub: Write the automatically generated subtitles to a file
c32b0aab 250 allsubtitles: Deprecated - Use subtitlelangs = ['all']
251 Downloads all the subtitles of the video
0b7f3118 252 (requires writesubtitles or writeautomaticsub)
8222d8de 253 listsubtitles: Lists all available subtitles for the video
a504ced0 254 subtitlesformat: The format code for subtitles
c32b0aab 255 subtitleslangs: List of languages of the subtitles to download (can be regex).
256 The list may contain "all" to refer to all the available
257 subtitles. The language can be prefixed with a "-" to
258 exclude it from the requested languages. Eg: ['all', '-live_chat']
8222d8de
JMF
259 keepvideo: Keep the video file after post-processing
260 daterange: A DateRange object, download only if the upload_date is in the range.
261 skip_download: Skip the actual download of the video file
c35f9e72 262 cachedir: Location of the cache files in the filesystem.
a0e07d31 263 False to disable filesystem cache.
47192f92 264 noplaylist: Download single video instead of a playlist if in doubt.
8dbe9899
PH
265 age_limit: An integer representing the user's age in years.
266 Unsuitable videos for the given age are skipped.
5fe18bdb
PH
267 min_views: An integer representing the minimum view count the video
268 must have in order to not be skipped.
269 Videos without view count information are always
270 downloaded. None for no limit.
271 max_views: An integer representing the maximum view count.
272 Videos that are more popular than that are not
273 downloaded.
274 Videos without view count information are always
275 downloaded. None for no limit.
276 download_archive: File name of a file where all downloads are recorded.
c1c9a79c
PH
277 Videos already present in the file are not downloaded
278 again.
8a51f564 279 break_on_existing: Stop the download process after attempting to download a
280 file that is in the archive.
281 break_on_reject: Stop the download process when encountering a video that
282 has been filtered out.
283 cookiefile: File name where cookies should be read from and dumped to
a1ee09e8 284 nocheckcertificate:Do not verify SSL certificates
7e8c0af0
PH
285 prefer_insecure: Use HTTP instead of HTTPS to retrieve information.
286 At the moment, this is only supported by YouTube.
a1ee09e8 287 proxy: URL of the proxy server to use
38cce791 288 geo_verification_proxy: URL of the proxy to use for IP address verification
504f20dd 289 on geo-restricted sites.
e344693b 290 socket_timeout: Time to wait for unresponsive hosts, in seconds
0783b09b
PH
291 bidi_workaround: Work around buggy terminals without bidirectional text
292 support, using fridibi
a0ddb8a2 293 debug_printtraffic:Print out sent and received HTTP traffic
7b0817e8 294 include_ads: Download ads as well
04b4d394
PH
295 default_search: Prepend this string if an input url is not valid.
296 'auto' for elaborate guessing
62fec3b2 297 encoding: Use this encoding instead of the system-specified.
e8ee972c 298 extract_flat: Do not resolve URLs, return the immediate result.
057a5206
PH
299 Pass in 'in_playlist' to only show this behavior for
300 playlist items.
4f026faf 301 postprocessors: A list of dictionaries, each with an entry
71b640cc 302 * key: The name of the postprocessor. See
7a5c1cfe 303 yt_dlp/postprocessor/__init__.py for a list.
56d868db 304 * when: When to run the postprocessor. Can be one of
305 pre_process|before_dl|post_process|after_move.
306 Assumed to be 'post_process' if not given
ab8e5e51
AM
307 post_hooks: A list of functions that get called as the final step
308 for each video file, after all postprocessors have been
309 called. The filename will be passed as the only argument.
71b640cc
PH
310 progress_hooks: A list of functions that get called on download
311 progress, with a dictionary with the entries
5cda4eda 312 * status: One of "downloading", "error", or "finished".
ee69b99a 313 Check this first and ignore unknown values.
71b640cc 314
5cda4eda 315 If status is one of "downloading", or "finished", the
ee69b99a
PH
316 following properties may also be present:
317 * filename: The final filename (always present)
5cda4eda 318 * tmpfilename: The filename we're currently writing to
71b640cc
PH
319 * downloaded_bytes: Bytes on disk
320 * total_bytes: Size of the whole file, None if unknown
5cda4eda
PH
321 * total_bytes_estimate: Guess of the eventual file size,
322 None if unavailable.
323 * elapsed: The number of seconds since download started.
71b640cc
PH
324 * eta: The estimated time in seconds, None if unknown
325 * speed: The download speed in bytes/second, None if
326 unknown
5cda4eda
PH
327 * fragment_index: The counter of the currently
328 downloaded video fragment.
329 * fragment_count: The number of fragments (= individual
330 files that will be merged)
71b640cc
PH
331
332 Progress hooks are guaranteed to be called at least once
333 (with status "finished") if the download is successful.
45598f15 334 merge_output_format: Extension to use when merging formats.
6b591b29 335 final_ext: Expected final extension; used to detect when the file was
336 already downloaded and converted. "merge_output_format" is
337 replaced by this extension when given
6271f1ca
PH
338 fixup: Automatically correct known faults of the file.
339 One of:
340 - "never": do nothing
341 - "warn": only emit a warning
342 - "detect_or_warn": check whether we can do anything
62cd676c 343 about it, warn otherwise (default)
504f20dd 344 source_address: Client-side IP address to bind to.
6ec6cb4e 345 call_home: Boolean, true iff we are allowed to contact the
7a5c1cfe 346 yt-dlp servers for debugging. (BROKEN)
1cf376f5 347 sleep_interval_requests: Number of seconds to sleep between requests
348 during extraction
7aa589a5
S
349 sleep_interval: Number of seconds to sleep before each download when
350 used alone or a lower bound of a range for randomized
351 sleep before each download (minimum possible number
352 of seconds to sleep) when used along with
353 max_sleep_interval.
354 max_sleep_interval:Upper bound of a range for randomized sleep before each
355 download (maximum possible number of seconds to sleep).
356 Must only be used along with sleep_interval.
357 Actual sleep time will be a random float from range
358 [sleep_interval; max_sleep_interval].
1cf376f5 359 sleep_interval_subtitles: Number of seconds to sleep before each subtitle download
cfb56d1a
PH
360 listformats: Print an overview of available video formats and exit.
361 list_thumbnails: Print a table of all thumbnails and exit.
347de493
PH
362 match_filter: A function that gets called with the info_dict of
363 every video.
364 If it returns a message, the video is ignored.
365 If it returns None, the video is downloaded.
366 match_filter_func in utils.py is one example for this.
7e5db8c9 367 no_color: Do not emit color codes in output.
0a840f58 368 geo_bypass: Bypass geographic restriction via faking X-Forwarded-For
504f20dd 369 HTTP header
0a840f58 370 geo_bypass_country:
773f291d
S
371 Two-letter ISO 3166-2 country code that will be used for
372 explicit geographic restriction bypassing via faking
504f20dd 373 X-Forwarded-For HTTP header
5f95927a
S
374 geo_bypass_ip_block:
375 IP range in CIDR notation that will be used similarly to
504f20dd 376 geo_bypass_country
71b640cc 377
85729c51 378 The following options determine which downloader is picked:
52a8a1e1 379 external_downloader: A dictionary of protocol keys and the executable of the
380 external downloader to use for it. The allowed protocols
381 are default|http|ftp|m3u8|dash|rtsp|rtmp|mms.
382 Set the value to 'native' to use the native downloader
383 hls_prefer_native: Deprecated - Use external_downloader = {'m3u8': 'native'}
384 or {'m3u8': 'ffmpeg'} instead.
385 Use the native HLS downloader instead of ffmpeg/avconv
bf09af3a
S
386 if True, otherwise use ffmpeg/avconv if False, otherwise
387 use downloader suggested by extractor if None.
fe7e0c98 388
8222d8de 389 The following parameters are not used by YoutubeDL itself, they are used by
7a5c1cfe 390 the downloader (see yt_dlp/downloader/common.py):
8222d8de 391 nopart, updatetime, buffersize, ratelimit, min_filesize, max_filesize, test,
881e6a1f 392 noresizebuffer, retries, continuedl, noprogress, consoletitle,
b54d4a5c 393 xattr_set_filesize, external_downloader_args, hls_use_mpegts,
e409895f 394 http_chunk_size.
76b1bd67
JMF
395
396 The following options are used by the post processors:
d4a24f40 397 prefer_ffmpeg: If False, use avconv instead of ffmpeg if both are available,
e4172ac9 398 otherwise prefer ffmpeg. (avconv support is deprecated)
c0b7d117
S
399 ffmpeg_location: Location of the ffmpeg/avconv binary; either the path
400 to the binary or its containing directory.
43820c03 401 postprocessor_args: A dictionary of postprocessor/executable keys (in lower case)
402 and a list of additional command-line arguments for the
403 postprocessor/executable. The dict can also have "PP+EXE" keys
404 which are used when the given exe is used by the given PP.
405 Use 'default' as the name for arguments to passed to all PP
e409895f 406
407 The following options are used by the extractors:
62bff2c1 408 extractor_retries: Number of times to retry for known errors
409 dynamic_mpd: Whether to process dynamic DASH manifests (default: True)
e409895f 410 hls_split_discontinuity: Split HLS playlists to different formats at
62bff2c1 411 discontinuities such as ad breaks (default: False)
3600fd59 412 youtube_include_dash_manifest: If True (default), DASH manifests and related
62bff2c1 413 data will be downloaded and processed by extractor.
414 You can reduce network I/O by disabling it if you don't
415 care about DASH. (only for youtube)
e409895f 416 youtube_include_hls_manifest: If True (default), HLS manifests and related
62bff2c1 417 data will be downloaded and processed by extractor.
418 You can reduce network I/O by disabling it if you don't
419 care about HLS. (only for youtube)
8222d8de
JMF
420 """
421
c9969434
S
422 _NUMERIC_FIELDS = set((
423 'width', 'height', 'tbr', 'abr', 'asr', 'vbr', 'fps', 'filesize', 'filesize_approx',
424 'timestamp', 'upload_year', 'upload_month', 'upload_day',
425 'duration', 'view_count', 'like_count', 'dislike_count', 'repost_count',
426 'average_rating', 'comment_count', 'age_limit',
427 'start_time', 'end_time',
428 'chapter_number', 'season_number', 'episode_number',
429 'track_number', 'disc_number', 'release_year',
430 'playlist_index',
431 ))
432
8222d8de
JMF
433 params = None
434 _ies = []
56d868db 435 _pps = {'pre_process': [], 'before_dl': [], 'after_move': [], 'post_process': []}
0202b52a 436 __prepare_filename_warned = False
1cf376f5 437 _first_webpage_request = True
8222d8de
JMF
438 _download_retcode = None
439 _num_downloads = None
30a074c2 440 _playlist_level = 0
441 _playlist_urls = set()
8222d8de
JMF
442 _screen_file = None
443
3511266b 444 def __init__(self, params=None, auto_init=True):
8222d8de 445 """Create a FileDownloader object with the given options."""
e9f9a10f
JMF
446 if params is None:
447 params = {}
8222d8de 448 self._ies = []
56c73665 449 self._ies_instances = {}
56d868db 450 self._pps = {'pre_process': [], 'before_dl': [], 'after_move': [], 'post_process': []}
0202b52a 451 self.__prepare_filename_warned = False
1cf376f5 452 self._first_webpage_request = True
ab8e5e51 453 self._post_hooks = []
933605d7 454 self._progress_hooks = []
8222d8de
JMF
455 self._download_retcode = 0
456 self._num_downloads = 0
457 self._screen_file = [sys.stdout, sys.stderr][params.get('logtostderr', False)]
0783b09b 458 self._err_file = sys.stderr
4abf617b
S
459 self.params = {
460 # Default parameters
461 'nocheckcertificate': False,
462 }
463 self.params.update(params)
a0e07d31 464 self.cache = Cache(self)
34308b30 465
be5df5ee
S
466 def check_deprecated(param, option, suggestion):
467 if self.params.get(param) is not None:
468 self.report_warning(
4cd0a709 469 '%s is deprecated. Use %s instead' % (option, suggestion))
be5df5ee
S
470 return True
471 return False
472
473 if check_deprecated('cn_verification_proxy', '--cn-verification-proxy', '--geo-verification-proxy'):
38cce791
YCH
474 if self.params.get('geo_verification_proxy') is None:
475 self.params['geo_verification_proxy'] = self.params['cn_verification_proxy']
476
0d1bb027 477 check_deprecated('autonumber_size', '--autonumber-size', 'output template with %(autonumber)0Nd, where N in the number of digits')
478 check_deprecated('autonumber', '--auto-number', '-o "%(autonumber)s-%(title)s.%(ext)s"')
479 check_deprecated('usetitle', '--title', '-o "%(title)s-%(id)s.%(ext)s"')
480
481 for msg in self.params.get('warnings', []):
482 self.report_warning(msg)
483
6b591b29 484 if self.params.get('final_ext'):
485 if self.params.get('merge_output_format'):
486 self.report_warning('--merge-output-format will be ignored since --remux-video or --recode-video is given')
487 self.params['merge_output_format'] = self.params['final_ext']
488
b9d973be 489 if 'overwrites' in self.params and self.params['overwrites'] is None:
490 del self.params['overwrites']
491
0783b09b 492 if params.get('bidi_workaround', False):
1c088fa8
PH
493 try:
494 import pty
495 master, slave = pty.openpty()
003c69a8 496 width = compat_get_terminal_size().columns
1c088fa8
PH
497 if width is None:
498 width_args = []
499 else:
500 width_args = ['-w', str(width)]
5d681e96 501 sp_kwargs = dict(
1c088fa8
PH
502 stdin=subprocess.PIPE,
503 stdout=slave,
504 stderr=self._err_file)
5d681e96
PH
505 try:
506 self._output_process = subprocess.Popen(
507 ['bidiv'] + width_args, **sp_kwargs
508 )
509 except OSError:
5d681e96
PH
510 self._output_process = subprocess.Popen(
511 ['fribidi', '-c', 'UTF-8'] + width_args, **sp_kwargs)
512 self._output_channel = os.fdopen(master, 'rb')
1c088fa8 513 except OSError as ose:
66e7ace1 514 if ose.errno == errno.ENOENT:
6febd1c1 515 self.report_warning('Could not find fribidi executable, ignoring --bidi-workaround . Make sure that fribidi is an executable file in one of the directories in your $PATH.')
1c088fa8
PH
516 else:
517 raise
0783b09b 518
3089bc74
S
519 if (sys.platform != 'win32'
520 and sys.getfilesystemencoding() in ['ascii', 'ANSI_X3.4-1968']
521 and not params.get('restrictfilenames', False)):
e9137224 522 # Unicode filesystem API will throw errors (#1474, #13027)
34308b30 523 self.report_warning(
6febd1c1 524 'Assuming --restrict-filenames since file system encoding '
1b725173 525 'cannot encode all characters. '
6febd1c1 526 'Set the LC_ALL environment variable to fix this.')
4a98cdbf 527 self.params['restrictfilenames'] = True
34308b30 528
de6000d9 529 self.outtmpl_dict = self.parse_outtmpl()
486dd09e 530
dca08720
PH
531 self._setup_opener()
532
4cd0a709 533 """Preload the archive, if any is specified"""
534 def preload_download_archive(fn):
535 if fn is None:
536 return False
537 if self.params.get('verbose'):
538 self._write_string('[debug] Loading archive file %r\n' % fn)
539 try:
540 with locked_file(fn, 'r', encoding='utf-8') as archive_file:
541 for line in archive_file:
542 self.archive.add(line.strip())
543 except IOError as ioe:
544 if ioe.errno != errno.ENOENT:
545 raise
546 return False
547 return True
548
549 self.archive = set()
550 preload_download_archive(self.params.get('download_archive'))
551
3511266b
PH
552 if auto_init:
553 self.print_debug_header()
554 self.add_default_info_extractors()
555
4f026faf
PH
556 for pp_def_raw in self.params.get('postprocessors', []):
557 pp_class = get_postprocessor(pp_def_raw['key'])
558 pp_def = dict(pp_def_raw)
559 del pp_def['key']
5bfa4862 560 if 'when' in pp_def:
561 when = pp_def['when']
562 del pp_def['when']
563 else:
56d868db 564 when = 'post_process'
4f026faf 565 pp = pp_class(self, **compat_kwargs(pp_def))
5bfa4862 566 self.add_post_processor(pp, when=when)
4f026faf 567
ab8e5e51
AM
568 for ph in self.params.get('post_hooks', []):
569 self.add_post_hook(ph)
570
71b640cc
PH
571 for ph in self.params.get('progress_hooks', []):
572 self.add_progress_hook(ph)
573
51fb4995
YCH
574 register_socks_protocols()
575
7d4111ed
PH
576 def warn_if_short_id(self, argv):
577 # short YouTube ID starting with dash?
578 idxs = [
579 i for i, a in enumerate(argv)
580 if re.match(r'^-[0-9A-Za-z_-]{10}$', a)]
581 if idxs:
582 correct_argv = (
7a5c1cfe 583 ['yt-dlp']
3089bc74
S
584 + [a for i, a in enumerate(argv) if i not in idxs]
585 + ['--'] + [argv[i] for i in idxs]
7d4111ed
PH
586 )
587 self.report_warning(
588 'Long argument string detected. '
589 'Use -- to separate parameters and URLs, like this:\n%s\n' %
590 args_to_str(correct_argv))
591
8222d8de
JMF
592 def add_info_extractor(self, ie):
593 """Add an InfoExtractor object to the end of the list."""
594 self._ies.append(ie)
e52d7f85
JMF
595 if not isinstance(ie, type):
596 self._ies_instances[ie.ie_key()] = ie
597 ie.set_downloader(self)
8222d8de 598
56c73665
JMF
599 def get_info_extractor(self, ie_key):
600 """
601 Get an instance of an IE with name ie_key, it will try to get one from
602 the _ies list, if there's no instance it will create a new one and add
603 it to the extractor list.
604 """
605 ie = self._ies_instances.get(ie_key)
606 if ie is None:
607 ie = get_info_extractor(ie_key)()
608 self.add_info_extractor(ie)
609 return ie
610
023fa8c4
JMF
611 def add_default_info_extractors(self):
612 """
613 Add the InfoExtractors returned by gen_extractors to the end of the list
614 """
e52d7f85 615 for ie in gen_extractor_classes():
023fa8c4
JMF
616 self.add_info_extractor(ie)
617
56d868db 618 def add_post_processor(self, pp, when='post_process'):
8222d8de 619 """Add a PostProcessor object to the end of the chain."""
5bfa4862 620 self._pps[when].append(pp)
8222d8de
JMF
621 pp.set_downloader(self)
622
ab8e5e51
AM
623 def add_post_hook(self, ph):
624 """Add the post hook"""
625 self._post_hooks.append(ph)
626
933605d7
JMF
627 def add_progress_hook(self, ph):
628 """Add the progress hook (currently only for the file downloader)"""
629 self._progress_hooks.append(ph)
8ab470f1 630
1c088fa8 631 def _bidi_workaround(self, message):
5d681e96 632 if not hasattr(self, '_output_channel'):
1c088fa8
PH
633 return message
634
5d681e96 635 assert hasattr(self, '_output_process')
11b85ce6 636 assert isinstance(message, compat_str)
6febd1c1
PH
637 line_count = message.count('\n') + 1
638 self._output_process.stdin.write((message + '\n').encode('utf-8'))
5d681e96 639 self._output_process.stdin.flush()
6febd1c1 640 res = ''.join(self._output_channel.readline().decode('utf-8')
9e1a5b84 641 for _ in range(line_count))
6febd1c1 642 return res[:-len('\n')]
1c088fa8 643
8222d8de 644 def to_screen(self, message, skip_eol=False):
0783b09b
PH
645 """Print message to stdout if not in quiet mode."""
646 return self.to_stdout(message, skip_eol, check_quiet=True)
647
734f90bb 648 def _write_string(self, s, out=None):
b58ddb32 649 write_string(s, out=out, encoding=self.params.get('encoding'))
734f90bb 650
0783b09b 651 def to_stdout(self, message, skip_eol=False, check_quiet=False):
8222d8de 652 """Print message to stdout if not in quiet mode."""
8bf9319e 653 if self.params.get('logger'):
43afe285 654 self.params['logger'].debug(message)
0783b09b 655 elif not check_quiet or not self.params.get('quiet', False):
1c088fa8 656 message = self._bidi_workaround(message)
6febd1c1 657 terminator = ['\n', ''][skip_eol]
8222d8de 658 output = message + terminator
1c088fa8 659
734f90bb 660 self._write_string(output, self._screen_file)
8222d8de
JMF
661
662 def to_stderr(self, message):
663 """Print message to stderr."""
11b85ce6 664 assert isinstance(message, compat_str)
8bf9319e 665 if self.params.get('logger'):
43afe285
IB
666 self.params['logger'].error(message)
667 else:
1c088fa8 668 message = self._bidi_workaround(message)
6febd1c1 669 output = message + '\n'
734f90bb 670 self._write_string(output, self._err_file)
8222d8de 671
1e5b9a95
PH
672 def to_console_title(self, message):
673 if not self.params.get('consoletitle', False):
674 return
4bede0d8
C
675 if compat_os_name == 'nt':
676 if ctypes.windll.kernel32.GetConsoleWindow():
677 # c_wchar_p() might not be necessary if `message` is
678 # already of type unicode()
679 ctypes.windll.kernel32.SetConsoleTitleW(ctypes.c_wchar_p(message))
1e5b9a95 680 elif 'TERM' in os.environ:
b46696bd 681 self._write_string('\033]0;%s\007' % message, self._screen_file)
1e5b9a95 682
bdde425c
PH
683 def save_console_title(self):
684 if not self.params.get('consoletitle', False):
685 return
94c3442e
S
686 if self.params.get('simulate', False):
687 return
4bede0d8 688 if compat_os_name != 'nt' and 'TERM' in os.environ:
efd6c574 689 # Save the title on stack
734f90bb 690 self._write_string('\033[22;0t', self._screen_file)
bdde425c
PH
691
692 def restore_console_title(self):
693 if not self.params.get('consoletitle', False):
694 return
94c3442e
S
695 if self.params.get('simulate', False):
696 return
4bede0d8 697 if compat_os_name != 'nt' and 'TERM' in os.environ:
efd6c574 698 # Restore the title from stack
734f90bb 699 self._write_string('\033[23;0t', self._screen_file)
bdde425c
PH
700
701 def __enter__(self):
702 self.save_console_title()
703 return self
704
705 def __exit__(self, *args):
706 self.restore_console_title()
f89197d7 707
dca08720 708 if self.params.get('cookiefile') is not None:
1bab3437 709 self.cookiejar.save(ignore_discard=True, ignore_expires=True)
bdde425c 710
8222d8de
JMF
711 def trouble(self, message=None, tb=None):
712 """Determine action to take when a download problem appears.
713
714 Depending on if the downloader has been configured to ignore
715 download errors or not, this method may throw an exception or
716 not when errors are found, after printing the message.
717
718 tb, if given, is additional traceback information.
719 """
720 if message is not None:
721 self.to_stderr(message)
722 if self.params.get('verbose'):
723 if tb is None:
724 if sys.exc_info()[0]: # if .trouble has been called from an except block
6febd1c1 725 tb = ''
8222d8de 726 if hasattr(sys.exc_info()[1], 'exc_info') and sys.exc_info()[1].exc_info[0]:
6febd1c1 727 tb += ''.join(traceback.format_exception(*sys.exc_info()[1].exc_info))
c0384f22 728 tb += encode_compat_str(traceback.format_exc())
8222d8de
JMF
729 else:
730 tb_data = traceback.format_list(traceback.extract_stack())
6febd1c1 731 tb = ''.join(tb_data)
8222d8de
JMF
732 self.to_stderr(tb)
733 if not self.params.get('ignoreerrors', False):
734 if sys.exc_info()[0] and hasattr(sys.exc_info()[1], 'exc_info') and sys.exc_info()[1].exc_info[0]:
735 exc_info = sys.exc_info()[1].exc_info
736 else:
737 exc_info = sys.exc_info()
738 raise DownloadError(message, exc_info)
739 self._download_retcode = 1
740
741 def report_warning(self, message):
742 '''
743 Print the message to stderr, it will be prefixed with 'WARNING:'
744 If stderr is a tty file the 'WARNING:' will be colored
745 '''
6d07ce01
JMF
746 if self.params.get('logger') is not None:
747 self.params['logger'].warning(message)
8222d8de 748 else:
ad8915b7
PH
749 if self.params.get('no_warnings'):
750 return
e9c0cdd3 751 if not self.params.get('no_color') and self._err_file.isatty() and compat_os_name != 'nt':
6d07ce01
JMF
752 _msg_header = '\033[0;33mWARNING:\033[0m'
753 else:
754 _msg_header = 'WARNING:'
755 warning_message = '%s %s' % (_msg_header, message)
756 self.to_stderr(warning_message)
8222d8de
JMF
757
758 def report_error(self, message, tb=None):
759 '''
760 Do the same as trouble, but prefixes the message with 'ERROR:', colored
761 in red if stderr is a tty file.
762 '''
e9c0cdd3 763 if not self.params.get('no_color') and self._err_file.isatty() and compat_os_name != 'nt':
6febd1c1 764 _msg_header = '\033[0;31mERROR:\033[0m'
8222d8de 765 else:
6febd1c1
PH
766 _msg_header = 'ERROR:'
767 error_message = '%s %s' % (_msg_header, message)
8222d8de
JMF
768 self.trouble(error_message, tb)
769
8222d8de
JMF
770 def report_file_already_downloaded(self, file_name):
771 """Report file has already been fully downloaded."""
772 try:
6febd1c1 773 self.to_screen('[download] %s has already been downloaded' % file_name)
ce02ed60 774 except UnicodeEncodeError:
6febd1c1 775 self.to_screen('[download] The file has already been downloaded')
8222d8de 776
0c3d0f51 777 def report_file_delete(self, file_name):
778 """Report that existing file will be deleted."""
779 try:
c25228e5 780 self.to_screen('Deleting existing file %s' % file_name)
0c3d0f51 781 except UnicodeEncodeError:
c25228e5 782 self.to_screen('Deleting existing file')
0c3d0f51 783
de6000d9 784 def parse_outtmpl(self):
785 outtmpl_dict = self.params.get('outtmpl', {})
786 if not isinstance(outtmpl_dict, dict):
787 outtmpl_dict = {'default': outtmpl_dict}
788 outtmpl_dict.update({
789 k: v for k, v in DEFAULT_OUTTMPL.items()
790 if not outtmpl_dict.get(k)})
791 for key, val in outtmpl_dict.items():
792 if isinstance(val, bytes):
793 self.report_warning(
794 'Parameter outtmpl is bytes, but should be a unicode string. '
795 'Put from __future__ import unicode_literals at the top of your code file or consider switching to Python 3.x.')
796 return outtmpl_dict
797
143db31d 798 def prepare_outtmpl(self, outtmpl, info_dict, sanitize=None):
799 """ Make the template and info_dict suitable for substitution (outtmpl % info_dict)"""
800 template_dict = dict(info_dict)
a439a3a4 801 na = self.params.get('outtmpl_na_placeholder', 'NA')
143db31d 802
803 # duration_string
804 template_dict['duration_string'] = ( # %(duration>%H-%M-%S)s is wrong if duration > 24hrs
805 formatSeconds(info_dict['duration'], '-')
806 if info_dict.get('duration', None) is not None
807 else None)
808
809 # epoch
810 template_dict['epoch'] = int(time.time())
811
812 # autonumber
813 autonumber_size = self.params.get('autonumber_size')
814 if autonumber_size is None:
815 autonumber_size = 5
816 template_dict['autonumber'] = self.params.get('autonumber_start', 1) - 1 + self._num_downloads
817
818 # resolution if not defined
819 if template_dict.get('resolution') is None:
820 if template_dict.get('width') and template_dict.get('height'):
821 template_dict['resolution'] = '%dx%d' % (template_dict['width'], template_dict['height'])
822 elif template_dict.get('height'):
823 template_dict['resolution'] = '%sp' % template_dict['height']
824 elif template_dict.get('width'):
825 template_dict['resolution'] = '%dx?' % template_dict['width']
826
143db31d 827 # For fields playlist_index and autonumber convert all occurrences
828 # of %(field)s to %(field)0Nd for backward compatibility
829 field_size_compat_map = {
f59ae581 830 'playlist_index': len(str(template_dict.get('_last_playlist_index') or '')),
143db31d 831 'autonumber': autonumber_size,
832 }
833 FIELD_SIZE_COMPAT_RE = r'(?<!%)%\((?P<field>autonumber|playlist_index)\)s'
834 mobj = re.search(FIELD_SIZE_COMPAT_RE, outtmpl)
835 if mobj:
836 outtmpl = re.sub(
837 FIELD_SIZE_COMPAT_RE,
838 r'%%(\1)0%dd' % field_size_compat_map[mobj.group('field')],
839 outtmpl)
840
841 numeric_fields = list(self._NUMERIC_FIELDS)
a439a3a4 842 if sanitize is None:
843 sanitize = lambda k, v: v
143db31d 844
a439a3a4 845 # Internal Formatting = name.key1.key2+number>strf
846 INTERNAL_FORMAT_RE = FORMAT_RE.format(
847 r'''(?P<final_key>
848 (?P<fields>\w+(?:\.[-\w]+)*)
849 (?:\+(?P<add>-?\d+(?:\.\d+)?))?
850 (?:>(?P<strf_format>.+?))?
851 )''')
852 for mobj in re.finditer(INTERNAL_FORMAT_RE, outtmpl):
853 mobj = mobj.groupdict()
854 # Object traversal
855 fields = mobj['fields'].split('.')
856 final_key = mobj['final_key']
857 value = traverse_dict(template_dict, fields)
858 # Offset the value
859 if mobj['add']:
860 value = float_or_none(value)
861 if value is not None:
862 value = value + float(mobj['add'])
863 # Datetime formatting
864 if mobj['strf_format']:
865 value = strftime_or_none(value, mobj['strf_format'])
866 if mobj['type'] in 'crs' and value is not None: # string
867 value = sanitize('%{}'.format(mobj['type']) % fields[-1], value)
868 else: # numeric
869 numeric_fields.append(final_key)
870 value = float_or_none(value)
143db31d 871 if value is not None:
a439a3a4 872 template_dict[final_key] = value
143db31d 873
874 # Missing numeric fields used together with integer presentation types
875 # in format specification will break the argument substitution since
876 # string NA placeholder is returned for missing fields. We will patch
877 # output template for missing fields to meet string presentation type.
878 for numeric_field in numeric_fields:
a439a3a4 879 if template_dict.get(numeric_field) is None:
143db31d 880 outtmpl = re.sub(
881 FORMAT_RE.format(re.escape(numeric_field)),
882 r'%({0})s'.format(numeric_field), outtmpl)
883
a439a3a4 884 template_dict = collections.defaultdict(lambda: na, (
885 (k, v if isinstance(v, compat_numeric_types) else sanitize(k, v))
886 for k, v in template_dict.items() if v is not None))
143db31d 887 return outtmpl, template_dict
888
de6000d9 889 def _prepare_filename(self, info_dict, tmpl_type='default'):
8222d8de 890 try:
586a91b6 891 sanitize = lambda k, v: sanitize_filename(
45598aab 892 compat_str(v),
1bb5c511 893 restricted=self.params.get('restrictfilenames'),
40df485f 894 is_id=(k == 'id' or k.endswith('_id')))
de6000d9 895 outtmpl = self.outtmpl_dict.get(tmpl_type, self.outtmpl_dict['default'])
143db31d 896 outtmpl, template_dict = self.prepare_outtmpl(outtmpl, info_dict, sanitize)
d0d9ade4 897
15da37c7
S
898 # expand_path translates '%%' into '%' and '$$' into '$'
899 # correspondingly that is not what we want since we need to keep
900 # '%%' intact for template dict substitution step. Working around
901 # with boundary-alike separator hack.
961ea474 902 sep = ''.join([random.choice(ascii_letters) for _ in range(32)])
15da37c7
S
903 outtmpl = outtmpl.replace('%%', '%{0}%'.format(sep)).replace('$$', '${0}$'.format(sep))
904
905 # outtmpl should be expand_path'ed before template dict substitution
906 # because meta fields may contain env variables we don't want to
907 # be expanded. For example, for outtmpl "%(title)s.%(ext)s" and
908 # title "Hello $PATH", we don't want `$PATH` to be expanded.
909 filename = expand_path(outtmpl).replace(sep, '') % template_dict
910
143db31d 911 force_ext = OUTTMPL_TYPES.get(tmpl_type)
de6000d9 912 if force_ext is not None:
913 filename = replace_extension(filename, force_ext, template_dict.get('ext'))
914
bdc3fd2f
U
915 # https://github.com/blackjack4494/youtube-dlc/issues/85
916 trim_file_name = self.params.get('trim_file_name', False)
917 if trim_file_name:
918 fn_groups = filename.rsplit('.')
919 ext = fn_groups[-1]
920 sub_ext = ''
921 if len(fn_groups) > 2:
922 sub_ext = fn_groups[-2]
923 filename = '.'.join(filter(None, [fn_groups[0][:trim_file_name], sub_ext, ext]))
924
0202b52a 925 return filename
8222d8de 926 except ValueError as err:
6febd1c1 927 self.report_error('Error in output template: ' + str(err) + ' (encoding: ' + repr(preferredencoding()) + ')')
8222d8de
JMF
928 return None
929
de6000d9 930 def prepare_filename(self, info_dict, dir_type='', warn=False):
931 """Generate the output filename."""
0202b52a 932 paths = self.params.get('paths', {})
933 assert isinstance(paths, dict)
de6000d9 934 filename = self._prepare_filename(info_dict, dir_type or 'default')
935
936 if warn and not self.__prepare_filename_warned:
937 if not paths:
938 pass
939 elif filename == '-':
940 self.report_warning('--paths is ignored when an outputting to stdout')
941 elif os.path.isabs(filename):
942 self.report_warning('--paths is ignored since an absolute path is given in output template')
943 self.__prepare_filename_warned = True
944 if filename == '-' or not filename:
945 return filename
946
0202b52a 947 homepath = expand_path(paths.get('home', '').strip())
948 assert isinstance(homepath, compat_str)
949 subdir = expand_path(paths.get(dir_type, '').strip()) if dir_type else ''
950 assert isinstance(subdir, compat_str)
c2934512 951 path = os.path.join(homepath, subdir, filename)
952
953 # Temporary fix for #4787
954 # 'Treat' all problem characters by passing filename through preferredencoding
955 # to workaround encoding issues with subprocess on python2 @ Windows
956 if sys.version_info < (3, 0) and sys.platform == 'win32':
957 path = encodeFilename(path, True).decode(preferredencoding())
958 return sanitize_path(path, force=self.params.get('windowsfilenames'))
0202b52a 959
442c37b7 960 def _match_entry(self, info_dict, incomplete):
ecdec191 961 """ Returns None if the file should be downloaded """
8222d8de 962
8b0d7497 963 def check_filter():
964 video_title = info_dict.get('title', info_dict.get('id', 'video'))
965 if 'title' in info_dict:
966 # This can happen when we're just evaluating the playlist
967 title = info_dict['title']
968 matchtitle = self.params.get('matchtitle', False)
969 if matchtitle:
970 if not re.search(matchtitle, title, re.IGNORECASE):
971 return '"' + title + '" title did not match pattern "' + matchtitle + '"'
972 rejecttitle = self.params.get('rejecttitle', False)
973 if rejecttitle:
974 if re.search(rejecttitle, title, re.IGNORECASE):
975 return '"' + title + '" title matched reject pattern "' + rejecttitle + '"'
976 date = info_dict.get('upload_date')
977 if date is not None:
978 dateRange = self.params.get('daterange', DateRange())
979 if date not in dateRange:
980 return '%s upload date is not in range %s' % (date_from_str(date).isoformat(), dateRange)
981 view_count = info_dict.get('view_count')
982 if view_count is not None:
983 min_views = self.params.get('min_views')
984 if min_views is not None and view_count < min_views:
985 return 'Skipping %s, because it has not reached minimum view count (%d/%d)' % (video_title, view_count, min_views)
986 max_views = self.params.get('max_views')
987 if max_views is not None and view_count > max_views:
988 return 'Skipping %s, because it has exceeded the maximum view count (%d/%d)' % (video_title, view_count, max_views)
989 if age_restricted(info_dict.get('age_limit'), self.params.get('age_limit')):
990 return 'Skipping "%s" because it is age restricted' % video_title
991 if self.in_download_archive(info_dict):
992 return '%s has already been recorded in archive' % video_title
993
994 if not incomplete:
995 match_filter = self.params.get('match_filter')
996 if match_filter is not None:
997 ret = match_filter(info_dict)
998 if ret is not None:
999 return ret
1000 return None
1001
1002 reason = check_filter()
1003 if reason is not None:
1004 self.to_screen('[download] ' + reason)
d83cb531 1005 if reason.endswith('has already been recorded in the archive') and self.params.get('break_on_existing', False):
8b0d7497 1006 raise ExistingVideoReached()
d83cb531 1007 elif self.params.get('break_on_reject', False):
8b0d7497 1008 raise RejectedVideoReached()
1009 return reason
fe7e0c98 1010
b6c45014
JMF
1011 @staticmethod
1012 def add_extra_info(info_dict, extra_info):
1013 '''Set the keys from extra_info in info dict if they are missing'''
1014 for key, value in extra_info.items():
1015 info_dict.setdefault(key, value)
1016
58f197b7 1017 def extract_info(self, url, download=True, ie_key=None, extra_info={},
61aa5ba3 1018 process=True, force_generic_extractor=False):
8222d8de
JMF
1019 '''
1020 Returns a list with a dictionary for each video we find.
1021 If 'download', also downloads the videos.
1022 extra_info is a dict containing the extra values to add to each result
613b2d9d 1023 '''
fe7e0c98 1024
61aa5ba3 1025 if not ie_key and force_generic_extractor:
d22dec74
S
1026 ie_key = 'Generic'
1027
8222d8de 1028 if ie_key:
56c73665 1029 ies = [self.get_info_extractor(ie_key)]
8222d8de
JMF
1030 else:
1031 ies = self._ies
1032
1033 for ie in ies:
1034 if not ie.suitable(url):
1035 continue
1036
9a68de12 1037 ie_key = ie.ie_key()
1038 ie = self.get_info_extractor(ie_key)
8222d8de 1039 if not ie.working():
6febd1c1
PH
1040 self.report_warning('The program functionality for this site has been marked as broken, '
1041 'and will probably not work.')
8222d8de
JMF
1042
1043 try:
d0757229 1044 temp_id = str_or_none(
63be1aab 1045 ie.extract_id(url) if callable(getattr(ie, 'extract_id', None))
1046 else ie._match_id(url))
a0566bbf 1047 except (AssertionError, IndexError, AttributeError):
1048 temp_id = None
1049 if temp_id is not None and self.in_download_archive({'id': temp_id, 'ie_key': ie_key}):
1050 self.to_screen("[%s] %s: has already been recorded in archive" % (
1051 ie_key, temp_id))
1052 break
58f197b7 1053 return self.__extract_info(url, ie, download, extra_info, process)
a0566bbf 1054 else:
1055 self.report_error('no suitable InfoExtractor for URL %s' % url)
1056
1057 def __handle_extraction_exceptions(func):
1058 def wrapper(self, *args, **kwargs):
1059 try:
1060 return func(self, *args, **kwargs)
773f291d
S
1061 except GeoRestrictedError as e:
1062 msg = e.msg
1063 if e.countries:
1064 msg += '\nThis video is available in %s.' % ', '.join(
1065 map(ISO3166Utils.short2full, e.countries))
1066 msg += '\nYou might want to use a VPN or a proxy server (with --proxy) to workaround.'
1067 self.report_error(msg)
fb043a6e 1068 except ExtractorError as e: # An error we somewhat expected
2c74e6fa 1069 self.report_error(compat_str(e), e.format_traceback())
8b0d7497 1070 except (MaxDownloadsReached, ExistingVideoReached, RejectedVideoReached):
d3e5bbf4 1071 raise
8222d8de
JMF
1072 except Exception as e:
1073 if self.params.get('ignoreerrors', False):
9b9c5355 1074 self.report_error(error_to_compat_str(e), tb=encode_compat_str(traceback.format_exc()))
8222d8de
JMF
1075 else:
1076 raise
a0566bbf 1077 return wrapper
1078
1079 @__handle_extraction_exceptions
58f197b7 1080 def __extract_info(self, url, ie, download, extra_info, process):
a0566bbf 1081 ie_result = ie.extract(url)
1082 if ie_result is None: # Finished already (backwards compatibility; listformats and friends should be moved here)
1083 return
1084 if isinstance(ie_result, list):
1085 # Backwards compatibility: old IE result format
1086 ie_result = {
1087 '_type': 'compat_list',
1088 'entries': ie_result,
1089 }
a0566bbf 1090 self.add_default_extra_info(ie_result, ie, url)
1091 if process:
1092 return self.process_ie_result(ie_result, download, extra_info)
8222d8de 1093 else:
a0566bbf 1094 return ie_result
fe7e0c98 1095
ea38e55f
PH
1096 def add_default_extra_info(self, ie_result, ie, url):
1097 self.add_extra_info(ie_result, {
1098 'extractor': ie.IE_NAME,
1099 'webpage_url': url,
1100 'webpage_url_basename': url_basename(url),
1101 'extractor_key': ie.ie_key(),
1102 })
1103
8222d8de
JMF
1104 def process_ie_result(self, ie_result, download=True, extra_info={}):
1105 """
1106 Take the result of the ie(may be modified) and resolve all unresolved
1107 references (URLs, playlist items).
1108
1109 It will also download the videos if 'download'.
1110 Returns the resolved ie_result.
1111 """
e8ee972c
PH
1112 result_type = ie_result.get('_type', 'video')
1113
057a5206 1114 if result_type in ('url', 'url_transparent'):
134c6ea8 1115 ie_result['url'] = sanitize_url(ie_result['url'])
057a5206 1116 extract_flat = self.params.get('extract_flat', False)
3089bc74
S
1117 if ((extract_flat == 'in_playlist' and 'playlist' in extra_info)
1118 or extract_flat is True):
de6000d9 1119 self.__forced_printings(ie_result, self.prepare_filename(ie_result), incomplete=True)
e8ee972c
PH
1120 return ie_result
1121
8222d8de 1122 if result_type == 'video':
b6c45014 1123 self.add_extra_info(ie_result, extra_info)
feee2ecf 1124 return self.process_video_result(ie_result, download=download)
8222d8de
JMF
1125 elif result_type == 'url':
1126 # We have to add extra_info to the results because it may be
1127 # contained in a playlist
1128 return self.extract_info(ie_result['url'],
58f197b7 1129 download,
8222d8de
JMF
1130 ie_key=ie_result.get('ie_key'),
1131 extra_info=extra_info)
7fc3fa05
PH
1132 elif result_type == 'url_transparent':
1133 # Use the information from the embedding page
1134 info = self.extract_info(
1135 ie_result['url'], ie_key=ie_result.get('ie_key'),
1136 extra_info=extra_info, download=False, process=False)
1137
1640eb09
S
1138 # extract_info may return None when ignoreerrors is enabled and
1139 # extraction failed with an error, don't crash and return early
1140 # in this case
1141 if not info:
1142 return info
1143
412c617d
PH
1144 force_properties = dict(
1145 (k, v) for k, v in ie_result.items() if v is not None)
0396806f 1146 for f in ('_type', 'url', 'id', 'extractor', 'extractor_key', 'ie_key'):
412c617d
PH
1147 if f in force_properties:
1148 del force_properties[f]
1149 new_result = info.copy()
1150 new_result.update(force_properties)
7fc3fa05 1151
0563f7ac
S
1152 # Extracted info may not be a video result (i.e.
1153 # info.get('_type', 'video') != video) but rather an url or
1154 # url_transparent. In such cases outer metadata (from ie_result)
1155 # should be propagated to inner one (info). For this to happen
1156 # _type of info should be overridden with url_transparent. This
067aa17e 1157 # fixes issue from https://github.com/ytdl-org/youtube-dl/pull/11163.
0563f7ac
S
1158 if new_result.get('_type') == 'url':
1159 new_result['_type'] = 'url_transparent'
7fc3fa05
PH
1160
1161 return self.process_ie_result(
1162 new_result, download=download, extra_info=extra_info)
40fcba5e 1163 elif result_type in ('playlist', 'multi_video'):
30a074c2 1164 # Protect from infinite recursion due to recursively nested playlists
1165 # (see https://github.com/ytdl-org/youtube-dl/issues/27833)
1166 webpage_url = ie_result['webpage_url']
1167 if webpage_url in self._playlist_urls:
7e85e872 1168 self.to_screen(
30a074c2 1169 '[download] Skipping already downloaded playlist: %s'
1170 % ie_result.get('title') or ie_result.get('id'))
1171 return
7e85e872 1172
30a074c2 1173 self._playlist_level += 1
1174 self._playlist_urls.add(webpage_url)
1175 try:
1176 return self.__process_playlist(ie_result, download)
1177 finally:
1178 self._playlist_level -= 1
1179 if not self._playlist_level:
1180 self._playlist_urls.clear()
8222d8de 1181 elif result_type == 'compat_list':
c9bf4114
PH
1182 self.report_warning(
1183 'Extractor %s returned a compat_list result. '
1184 'It needs to be updated.' % ie_result.get('extractor'))
5f6a1245 1185
8222d8de 1186 def _fixup(r):
9e1a5b84
JW
1187 self.add_extra_info(
1188 r,
9103bbc5
JMF
1189 {
1190 'extractor': ie_result['extractor'],
1191 'webpage_url': ie_result['webpage_url'],
29eb5174 1192 'webpage_url_basename': url_basename(ie_result['webpage_url']),
be97abc2 1193 'extractor_key': ie_result['extractor_key'],
9e1a5b84
JW
1194 }
1195 )
8222d8de
JMF
1196 return r
1197 ie_result['entries'] = [
b6c45014 1198 self.process_ie_result(_fixup(r), download, extra_info)
8222d8de
JMF
1199 for r in ie_result['entries']
1200 ]
1201 return ie_result
1202 else:
1203 raise Exception('Invalid result type: %s' % result_type)
1204
e92caff5 1205 def _ensure_dir_exists(self, path):
1206 return make_dir(path, self.report_error)
1207
30a074c2 1208 def __process_playlist(self, ie_result, download):
1209 # We process each entry in the playlist
1210 playlist = ie_result.get('title') or ie_result.get('id')
1211 self.to_screen('[download] Downloading playlist: %s' % playlist)
1212
498f5606 1213 if 'entries' not in ie_result:
1214 raise EntryNotInPlaylist()
1215 incomplete_entries = bool(ie_result.get('requested_entries'))
1216 if incomplete_entries:
1217 def fill_missing_entries(entries, indexes):
1218 ret = [None] * max(*indexes)
1219 for i, entry in zip(indexes, entries):
1220 ret[i - 1] = entry
1221 return ret
1222 ie_result['entries'] = fill_missing_entries(ie_result['entries'], ie_result['requested_entries'])
02fd60d3 1223
30a074c2 1224 playlist_results = []
1225
1226 playliststart = self.params.get('playliststart', 1) - 1
1227 playlistend = self.params.get('playlistend')
1228 # For backwards compatibility, interpret -1 as whole list
1229 if playlistend == -1:
1230 playlistend = None
1231
1232 playlistitems_str = self.params.get('playlist_items')
1233 playlistitems = None
1234 if playlistitems_str is not None:
1235 def iter_playlistitems(format):
1236 for string_segment in format.split(','):
1237 if '-' in string_segment:
1238 start, end = string_segment.split('-')
1239 for item in range(int(start), int(end) + 1):
1240 yield int(item)
1241 else:
1242 yield int(string_segment)
1243 playlistitems = orderedSet(iter_playlistitems(playlistitems_str))
1244
1245 ie_entries = ie_result['entries']
1246
1247 def make_playlistitems_entries(list_ie_entries):
1248 num_entries = len(list_ie_entries)
498f5606 1249 for i in playlistitems:
1250 if -num_entries < i <= num_entries:
1251 yield list_ie_entries[i - 1]
1252 elif incomplete_entries:
1253 raise EntryNotInPlaylist()
30a074c2 1254
1255 if isinstance(ie_entries, list):
1256 n_all_entries = len(ie_entries)
1257 if playlistitems:
498f5606 1258 entries = list(make_playlistitems_entries(ie_entries))
30a074c2 1259 else:
1260 entries = ie_entries[playliststart:playlistend]
1261 n_entries = len(entries)
498f5606 1262 msg = 'Collected %d videos; downloading %d of them' % (n_all_entries, n_entries)
30a074c2 1263 elif isinstance(ie_entries, PagedList):
1264 if playlistitems:
1265 entries = []
1266 for item in playlistitems:
1267 entries.extend(ie_entries.getslice(
1268 item - 1, item
1269 ))
1270 else:
1271 entries = ie_entries.getslice(
1272 playliststart, playlistend)
1273 n_entries = len(entries)
498f5606 1274 msg = 'Downloading %d videos' % n_entries
30a074c2 1275 else: # iterable
1276 if playlistitems:
498f5606 1277 entries = list(make_playlistitems_entries(list(itertools.islice(
1278 ie_entries, 0, max(playlistitems)))))
30a074c2 1279 else:
1280 entries = list(itertools.islice(
1281 ie_entries, playliststart, playlistend))
1282 n_entries = len(entries)
498f5606 1283 msg = 'Downloading %d videos' % n_entries
1284
1285 if any((entry is None for entry in entries)):
1286 raise EntryNotInPlaylist()
1287 if not playlistitems and (playliststart or playlistend):
1288 playlistitems = list(range(1 + playliststart, 1 + playliststart + len(entries)))
1289 ie_result['entries'] = entries
1290 ie_result['requested_entries'] = playlistitems
1291
1292 if self.params.get('allow_playlist_files', True):
1293 ie_copy = {
1294 'playlist': playlist,
1295 'playlist_id': ie_result.get('id'),
1296 'playlist_title': ie_result.get('title'),
1297 'playlist_uploader': ie_result.get('uploader'),
1298 'playlist_uploader_id': ie_result.get('uploader_id'),
1299 'playlist_index': 0
1300 }
1301 ie_copy.update(dict(ie_result))
1302
1303 if self.params.get('writeinfojson', False):
1304 infofn = self.prepare_filename(ie_copy, 'pl_infojson')
1305 if not self._ensure_dir_exists(encodeFilename(infofn)):
1306 return
1307 if not self.params.get('overwrites', True) and os.path.exists(encodeFilename(infofn)):
1308 self.to_screen('[info] Playlist metadata is already present')
1309 else:
1310 self.to_screen('[info] Writing playlist metadata as JSON to: ' + infofn)
1311 try:
1312 write_json_file(self.filter_requested_info(ie_result, self.params.get('clean_infojson', True)), infofn)
1313 except (OSError, IOError):
1314 self.report_error('Cannot write playlist metadata to JSON file ' + infofn)
1315
1316 if self.params.get('writedescription', False):
1317 descfn = self.prepare_filename(ie_copy, 'pl_description')
1318 if not self._ensure_dir_exists(encodeFilename(descfn)):
1319 return
1320 if not self.params.get('overwrites', True) and os.path.exists(encodeFilename(descfn)):
1321 self.to_screen('[info] Playlist description is already present')
1322 elif ie_result.get('description') is None:
1323 self.report_warning('There\'s no playlist description to write.')
1324 else:
1325 try:
1326 self.to_screen('[info] Writing playlist description to: ' + descfn)
1327 with io.open(encodeFilename(descfn), 'w', encoding='utf-8') as descfile:
1328 descfile.write(ie_result['description'])
1329 except (OSError, IOError):
1330 self.report_error('Cannot write playlist description file ' + descfn)
1331 return
30a074c2 1332
1333 if self.params.get('playlistreverse', False):
1334 entries = entries[::-1]
30a074c2 1335 if self.params.get('playlistrandom', False):
1336 random.shuffle(entries)
1337
1338 x_forwarded_for = ie_result.get('__x_forwarded_for_ip')
1339
498f5606 1340 self.to_screen('[%s] playlist %s: %s' % (ie_result['extractor'], playlist, msg))
26e2805c 1341 failures = 0
1342 max_failures = self.params.get('skip_playlist_after_errors') or float('inf')
30a074c2 1343 for i, entry in enumerate(entries, 1):
1344 self.to_screen('[download] Downloading video %s of %s' % (i, n_entries))
1345 # This __x_forwarded_for_ip thing is a bit ugly but requires
1346 # minimal changes
1347 if x_forwarded_for:
1348 entry['__x_forwarded_for_ip'] = x_forwarded_for
1349 extra = {
1350 'n_entries': n_entries,
f59ae581 1351 '_last_playlist_index': max(playlistitems) if playlistitems else (playlistend or n_entries),
30a074c2 1352 'playlist': playlist,
1353 'playlist_id': ie_result.get('id'),
1354 'playlist_title': ie_result.get('title'),
1355 'playlist_uploader': ie_result.get('uploader'),
1356 'playlist_uploader_id': ie_result.get('uploader_id'),
498f5606 1357 'playlist_index': playlistitems[i - 1] if playlistitems else i,
30a074c2 1358 'extractor': ie_result['extractor'],
1359 'webpage_url': ie_result['webpage_url'],
1360 'webpage_url_basename': url_basename(ie_result['webpage_url']),
1361 'extractor_key': ie_result['extractor_key'],
1362 }
1363
1364 if self._match_entry(entry, incomplete=True) is not None:
1365 continue
1366
1367 entry_result = self.__process_iterable_entry(entry, download, extra)
26e2805c 1368 if not entry_result:
1369 failures += 1
1370 if failures >= max_failures:
1371 self.report_error(
1372 'Skipping the remaining entries in playlist "%s" since %d items failed extraction' % (playlist, failures))
1373 break
30a074c2 1374 # TODO: skip failed (empty) entries?
1375 playlist_results.append(entry_result)
1376 ie_result['entries'] = playlist_results
1377 self.to_screen('[download] Finished downloading playlist: %s' % playlist)
1378 return ie_result
1379
a0566bbf 1380 @__handle_extraction_exceptions
1381 def __process_iterable_entry(self, entry, download, extra_info):
1382 return self.process_ie_result(
1383 entry, download=download, extra_info=extra_info)
1384
67134eab
JMF
1385 def _build_format_filter(self, filter_spec):
1386 " Returns a function to filter the formats according to the filter_spec "
083c9df9
PH
1387
1388 OPERATORS = {
1389 '<': operator.lt,
1390 '<=': operator.le,
1391 '>': operator.gt,
1392 '>=': operator.ge,
1393 '=': operator.eq,
1394 '!=': operator.ne,
1395 }
67134eab 1396 operator_rex = re.compile(r'''(?x)\s*
a03a3c80 1397 (?P<key>width|height|tbr|abr|vbr|asr|filesize|filesize_approx|fps)
083c9df9
PH
1398 \s*(?P<op>%s)(?P<none_inclusive>\s*\?)?\s*
1399 (?P<value>[0-9.]+(?:[kKmMgGtTpPeEzZyY]i?[Bb]?)?)
67134eab 1400 $
083c9df9 1401 ''' % '|'.join(map(re.escape, OPERATORS.keys())))
67134eab 1402 m = operator_rex.search(filter_spec)
9ddb6925
S
1403 if m:
1404 try:
1405 comparison_value = int(m.group('value'))
1406 except ValueError:
1407 comparison_value = parse_filesize(m.group('value'))
1408 if comparison_value is None:
1409 comparison_value = parse_filesize(m.group('value') + 'B')
1410 if comparison_value is None:
1411 raise ValueError(
1412 'Invalid value %r in format specification %r' % (
67134eab 1413 m.group('value'), filter_spec))
9ddb6925
S
1414 op = OPERATORS[m.group('op')]
1415
083c9df9 1416 if not m:
9ddb6925
S
1417 STR_OPERATORS = {
1418 '=': operator.eq,
10d33b34
YCH
1419 '^=': lambda attr, value: attr.startswith(value),
1420 '$=': lambda attr, value: attr.endswith(value),
1421 '*=': lambda attr, value: value in attr,
9ddb6925 1422 }
67134eab 1423 str_operator_rex = re.compile(r'''(?x)
f96bff99 1424 \s*(?P<key>[a-zA-Z0-9._-]+)
2cc779f4 1425 \s*(?P<negation>!\s*)?(?P<op>%s)(?P<none_inclusive>\s*\?)?
b0df5223 1426 \s*(?P<value>[a-zA-Z0-9._-]+)
67134eab 1427 \s*$
9ddb6925 1428 ''' % '|'.join(map(re.escape, STR_OPERATORS.keys())))
67134eab 1429 m = str_operator_rex.search(filter_spec)
9ddb6925
S
1430 if m:
1431 comparison_value = m.group('value')
2cc779f4
S
1432 str_op = STR_OPERATORS[m.group('op')]
1433 if m.group('negation'):
e118a879 1434 op = lambda attr, value: not str_op(attr, value)
2cc779f4
S
1435 else:
1436 op = str_op
083c9df9 1437
9ddb6925 1438 if not m:
67134eab 1439 raise ValueError('Invalid filter specification %r' % filter_spec)
083c9df9
PH
1440
1441 def _filter(f):
1442 actual_value = f.get(m.group('key'))
1443 if actual_value is None:
1444 return m.group('none_inclusive')
1445 return op(actual_value, comparison_value)
67134eab
JMF
1446 return _filter
1447
0017d9ad 1448 def _default_format_spec(self, info_dict, download=True):
0017d9ad 1449
af0f7428
S
1450 def can_merge():
1451 merger = FFmpegMergerPP(self)
1452 return merger.available and merger.can_merge()
1453
91ebc640 1454 prefer_best = (
1455 not self.params.get('simulate', False)
1456 and download
1457 and (
1458 not can_merge()
19807826 1459 or info_dict.get('is_live', False)
de6000d9 1460 or self.outtmpl_dict['default'] == '-'))
91ebc640 1461
1462 return (
1463 'best/bestvideo+bestaudio'
1464 if prefer_best
1465 else 'bestvideo*+bestaudio/best'
19807826 1466 if not self.params.get('allow_multiple_audio_streams', False)
91ebc640 1467 else 'bestvideo+bestaudio/best')
0017d9ad 1468
67134eab
JMF
1469 def build_format_selector(self, format_spec):
1470 def syntax_error(note, start):
1471 message = (
1472 'Invalid format specification: '
1473 '{0}\n\t{1}\n\t{2}^'.format(note, format_spec, ' ' * start[1]))
1474 return SyntaxError(message)
1475
1476 PICKFIRST = 'PICKFIRST'
1477 MERGE = 'MERGE'
1478 SINGLE = 'SINGLE'
0130afb7 1479 GROUP = 'GROUP'
67134eab
JMF
1480 FormatSelector = collections.namedtuple('FormatSelector', ['type', 'selector', 'filters'])
1481
91ebc640 1482 allow_multiple_streams = {'audio': self.params.get('allow_multiple_audio_streams', False),
1483 'video': self.params.get('allow_multiple_video_streams', False)}
909d24dd 1484
67134eab
JMF
1485 def _parse_filter(tokens):
1486 filter_parts = []
1487 for type, string, start, _, _ in tokens:
1488 if type == tokenize.OP and string == ']':
1489 return ''.join(filter_parts)
1490 else:
1491 filter_parts.append(string)
1492
232541df 1493 def _remove_unused_ops(tokens):
17cc1534 1494 # Remove operators that we don't use and join them with the surrounding strings
232541df
JMF
1495 # for example: 'mp4' '-' 'baseline' '-' '16x9' is converted to 'mp4-baseline-16x9'
1496 ALLOWED_OPS = ('/', '+', ',', '(', ')')
1497 last_string, last_start, last_end, last_line = None, None, None, None
1498 for type, string, start, end, line in tokens:
1499 if type == tokenize.OP and string == '[':
1500 if last_string:
1501 yield tokenize.NAME, last_string, last_start, last_end, last_line
1502 last_string = None
1503 yield type, string, start, end, line
1504 # everything inside brackets will be handled by _parse_filter
1505 for type, string, start, end, line in tokens:
1506 yield type, string, start, end, line
1507 if type == tokenize.OP and string == ']':
1508 break
1509 elif type == tokenize.OP and string in ALLOWED_OPS:
1510 if last_string:
1511 yield tokenize.NAME, last_string, last_start, last_end, last_line
1512 last_string = None
1513 yield type, string, start, end, line
1514 elif type in [tokenize.NAME, tokenize.NUMBER, tokenize.OP]:
1515 if not last_string:
1516 last_string = string
1517 last_start = start
1518 last_end = end
1519 else:
1520 last_string += string
1521 if last_string:
1522 yield tokenize.NAME, last_string, last_start, last_end, last_line
1523
cf2ac6df 1524 def _parse_format_selection(tokens, inside_merge=False, inside_choice=False, inside_group=False):
67134eab
JMF
1525 selectors = []
1526 current_selector = None
1527 for type, string, start, _, _ in tokens:
1528 # ENCODING is only defined in python 3.x
1529 if type == getattr(tokenize, 'ENCODING', None):
1530 continue
1531 elif type in [tokenize.NAME, tokenize.NUMBER]:
1532 current_selector = FormatSelector(SINGLE, string, [])
1533 elif type == tokenize.OP:
cf2ac6df
JMF
1534 if string == ')':
1535 if not inside_group:
1536 # ')' will be handled by the parentheses group
1537 tokens.restore_last_token()
67134eab 1538 break
cf2ac6df 1539 elif inside_merge and string in ['/', ',']:
0130afb7
JMF
1540 tokens.restore_last_token()
1541 break
cf2ac6df
JMF
1542 elif inside_choice and string == ',':
1543 tokens.restore_last_token()
1544 break
1545 elif string == ',':
0a31a350
JMF
1546 if not current_selector:
1547 raise syntax_error('"," must follow a format selector', start)
67134eab
JMF
1548 selectors.append(current_selector)
1549 current_selector = None
1550 elif string == '/':
d96d604e
JMF
1551 if not current_selector:
1552 raise syntax_error('"/" must follow a format selector', start)
67134eab 1553 first_choice = current_selector
cf2ac6df 1554 second_choice = _parse_format_selection(tokens, inside_choice=True)
f5f4a27a 1555 current_selector = FormatSelector(PICKFIRST, (first_choice, second_choice), [])
67134eab
JMF
1556 elif string == '[':
1557 if not current_selector:
1558 current_selector = FormatSelector(SINGLE, 'best', [])
1559 format_filter = _parse_filter(tokens)
1560 current_selector.filters.append(format_filter)
0130afb7
JMF
1561 elif string == '(':
1562 if current_selector:
1563 raise syntax_error('Unexpected "("', start)
cf2ac6df
JMF
1564 group = _parse_format_selection(tokens, inside_group=True)
1565 current_selector = FormatSelector(GROUP, group, [])
67134eab 1566 elif string == '+':
d03cfdce 1567 if not current_selector:
1568 raise syntax_error('Unexpected "+"', start)
1569 selector_1 = current_selector
1570 selector_2 = _parse_format_selection(tokens, inside_merge=True)
1571 if not selector_2:
1572 raise syntax_error('Expected a selector', start)
1573 current_selector = FormatSelector(MERGE, (selector_1, selector_2), [])
67134eab
JMF
1574 else:
1575 raise syntax_error('Operator not recognized: "{0}"'.format(string), start)
1576 elif type == tokenize.ENDMARKER:
1577 break
1578 if current_selector:
1579 selectors.append(current_selector)
1580 return selectors
1581
f8d4ad9a 1582 def _merge(formats_pair):
1583 format_1, format_2 = formats_pair
1584
1585 formats_info = []
1586 formats_info.extend(format_1.get('requested_formats', (format_1,)))
1587 formats_info.extend(format_2.get('requested_formats', (format_2,)))
1588
1589 if not allow_multiple_streams['video'] or not allow_multiple_streams['audio']:
1590 get_no_more = {"video": False, "audio": False}
1591 for (i, fmt_info) in enumerate(formats_info):
1592 for aud_vid in ["audio", "video"]:
1593 if not allow_multiple_streams[aud_vid] and fmt_info.get(aud_vid[0] + 'codec') != 'none':
1594 if get_no_more[aud_vid]:
1595 formats_info.pop(i)
1596 get_no_more[aud_vid] = True
1597
1598 if len(formats_info) == 1:
1599 return formats_info[0]
1600
1601 video_fmts = [fmt_info for fmt_info in formats_info if fmt_info.get('vcodec') != 'none']
1602 audio_fmts = [fmt_info for fmt_info in formats_info if fmt_info.get('acodec') != 'none']
1603
1604 the_only_video = video_fmts[0] if len(video_fmts) == 1 else None
1605 the_only_audio = audio_fmts[0] if len(audio_fmts) == 1 else None
1606
1607 output_ext = self.params.get('merge_output_format')
1608 if not output_ext:
1609 if the_only_video:
1610 output_ext = the_only_video['ext']
1611 elif the_only_audio and not video_fmts:
1612 output_ext = the_only_audio['ext']
1613 else:
1614 output_ext = 'mkv'
1615
1616 new_dict = {
1617 'requested_formats': formats_info,
1618 'format': '+'.join(fmt_info.get('format') for fmt_info in formats_info),
1619 'format_id': '+'.join(fmt_info.get('format_id') for fmt_info in formats_info),
1620 'ext': output_ext,
1621 }
1622
1623 if the_only_video:
1624 new_dict.update({
1625 'width': the_only_video.get('width'),
1626 'height': the_only_video.get('height'),
1627 'resolution': the_only_video.get('resolution') or self.format_resolution(the_only_video),
1628 'fps': the_only_video.get('fps'),
1629 'vcodec': the_only_video.get('vcodec'),
1630 'vbr': the_only_video.get('vbr'),
1631 'stretched_ratio': the_only_video.get('stretched_ratio'),
1632 })
1633
1634 if the_only_audio:
1635 new_dict.update({
1636 'acodec': the_only_audio.get('acodec'),
1637 'abr': the_only_audio.get('abr'),
1638 })
1639
1640 return new_dict
1641
67134eab 1642 def _build_selector_function(selector):
909d24dd 1643 if isinstance(selector, list): # ,
67134eab
JMF
1644 fs = [_build_selector_function(s) for s in selector]
1645
317f7ab6 1646 def selector_function(ctx):
67134eab 1647 for f in fs:
317f7ab6 1648 for format in f(ctx):
67134eab
JMF
1649 yield format
1650 return selector_function
909d24dd 1651
1652 elif selector.type == GROUP: # ()
0130afb7 1653 selector_function = _build_selector_function(selector.selector)
909d24dd 1654
1655 elif selector.type == PICKFIRST: # /
67134eab
JMF
1656 fs = [_build_selector_function(s) for s in selector.selector]
1657
317f7ab6 1658 def selector_function(ctx):
67134eab 1659 for f in fs:
317f7ab6 1660 picked_formats = list(f(ctx))
67134eab
JMF
1661 if picked_formats:
1662 return picked_formats
1663 return []
67134eab 1664
909d24dd 1665 elif selector.type == SINGLE: # atom
598d185d 1666 format_spec = selector.selector or 'best'
909d24dd 1667
f8d4ad9a 1668 # TODO: Add allvideo, allaudio etc by generalizing the code with best/worst selector
909d24dd 1669 if format_spec == 'all':
1670 def selector_function(ctx):
1671 formats = list(ctx['formats'])
1672 if formats:
1673 for f in formats:
1674 yield f
f8d4ad9a 1675 elif format_spec == 'mergeall':
1676 def selector_function(ctx):
1677 formats = list(ctx['formats'])
e01d6aa4 1678 if not formats:
1679 return
921b76ca 1680 merged_format = formats[-1]
1681 for f in formats[-2::-1]:
f8d4ad9a 1682 merged_format = _merge((merged_format, f))
1683 yield merged_format
909d24dd 1684
1685 else:
1686 format_fallback = False
eff63539 1687 mobj = re.match(
1688 r'(?P<bw>best|worst|b|w)(?P<type>video|audio|v|a)?(?P<mod>\*)?(?:\.(?P<n>[1-9]\d*))?$',
1689 format_spec)
1690 if mobj is not None:
1691 format_idx = int_or_none(mobj.group('n'), default=1)
1692 format_idx = format_idx - 1 if mobj.group('bw')[0] == 'w' else -format_idx
1693 format_type = (mobj.group('type') or [None])[0]
1694 not_format_type = {'v': 'a', 'a': 'v'}.get(format_type)
1695 format_modified = mobj.group('mod') is not None
909d24dd 1696
1697 format_fallback = not format_type and not format_modified # for b, w
eff63539 1698 filter_f = (
1699 (lambda f: f.get('%scodec' % format_type) != 'none')
1700 if format_type and format_modified # bv*, ba*, wv*, wa*
1701 else (lambda f: f.get('%scodec' % not_format_type) == 'none')
1702 if format_type # bv, ba, wv, wa
1703 else (lambda f: f.get('vcodec') != 'none' and f.get('acodec') != 'none')
1704 if not format_modified # b, w
1705 else None) # b*, w*
67134eab 1706 else:
909d24dd 1707 format_idx = -1
1708 filter_f = ((lambda f: f.get('ext') == format_spec)
1709 if format_spec in ['mp4', 'flv', 'webm', '3gp', 'm4a', 'mp3', 'ogg', 'aac', 'wav'] # extension
1710 else (lambda f: f.get('format_id') == format_spec)) # id
1711
1712 def selector_function(ctx):
1713 formats = list(ctx['formats'])
1714 if not formats:
1715 return
1716 matches = list(filter(filter_f, formats)) if filter_f is not None else formats
eff63539 1717 n = len(matches)
1718 if -n <= format_idx < n:
909d24dd 1719 yield matches[format_idx]
eff63539 1720 elif format_fallback and ctx['incomplete_formats']:
909d24dd 1721 # for extractors with incomplete formats (audio only (soundcloud)
1722 # or video only (imgur)) best/worst will fallback to
1723 # best/worst {video,audio}-only format
eff63539 1724 n = len(formats)
1725 if -n <= format_idx < n:
1726 yield formats[format_idx]
909d24dd 1727
1728 elif selector.type == MERGE: # +
d03cfdce 1729 selector_1, selector_2 = map(_build_selector_function, selector.selector)
083c9df9 1730
317f7ab6
S
1731 def selector_function(ctx):
1732 for pair in itertools.product(
d03cfdce 1733 selector_1(copy.deepcopy(ctx)), selector_2(copy.deepcopy(ctx))):
67134eab 1734 yield _merge(pair)
083c9df9 1735
67134eab 1736 filters = [self._build_format_filter(f) for f in selector.filters]
083c9df9 1737
317f7ab6
S
1738 def final_selector(ctx):
1739 ctx_copy = copy.deepcopy(ctx)
67134eab 1740 for _filter in filters:
317f7ab6
S
1741 ctx_copy['formats'] = list(filter(_filter, ctx_copy['formats']))
1742 return selector_function(ctx_copy)
67134eab 1743 return final_selector
083c9df9 1744
67134eab 1745 stream = io.BytesIO(format_spec.encode('utf-8'))
0130afb7 1746 try:
232541df 1747 tokens = list(_remove_unused_ops(compat_tokenize_tokenize(stream.readline)))
0130afb7
JMF
1748 except tokenize.TokenError:
1749 raise syntax_error('Missing closing/opening brackets or parenthesis', (0, len(format_spec)))
1750
1751 class TokenIterator(object):
1752 def __init__(self, tokens):
1753 self.tokens = tokens
1754 self.counter = 0
1755
1756 def __iter__(self):
1757 return self
1758
1759 def __next__(self):
1760 if self.counter >= len(self.tokens):
1761 raise StopIteration()
1762 value = self.tokens[self.counter]
1763 self.counter += 1
1764 return value
1765
1766 next = __next__
1767
1768 def restore_last_token(self):
1769 self.counter -= 1
1770
1771 parsed_selector = _parse_format_selection(iter(TokenIterator(tokens)))
67134eab 1772 return _build_selector_function(parsed_selector)
a9c58ad9 1773
e5660ee6
JMF
1774 def _calc_headers(self, info_dict):
1775 res = std_headers.copy()
1776
1777 add_headers = info_dict.get('http_headers')
1778 if add_headers:
1779 res.update(add_headers)
1780
1781 cookies = self._calc_cookies(info_dict)
1782 if cookies:
1783 res['Cookie'] = cookies
1784
0016b84e
S
1785 if 'X-Forwarded-For' not in res:
1786 x_forwarded_for_ip = info_dict.get('__x_forwarded_for_ip')
1787 if x_forwarded_for_ip:
1788 res['X-Forwarded-For'] = x_forwarded_for_ip
1789
e5660ee6
JMF
1790 return res
1791
1792 def _calc_cookies(self, info_dict):
5c2266df 1793 pr = sanitized_Request(info_dict['url'])
e5660ee6 1794 self.cookiejar.add_cookie_header(pr)
662435f7 1795 return pr.get_header('Cookie')
e5660ee6 1796
dd82ffea
JMF
1797 def process_video_result(self, info_dict, download=True):
1798 assert info_dict.get('_type', 'video') == 'video'
1799
bec1fad2
PH
1800 if 'id' not in info_dict:
1801 raise ExtractorError('Missing "id" field in extractor result')
1802 if 'title' not in info_dict:
1803 raise ExtractorError('Missing "title" field in extractor result')
1804
c9969434
S
1805 def report_force_conversion(field, field_not, conversion):
1806 self.report_warning(
1807 '"%s" field is not %s - forcing %s conversion, there is an error in extractor'
1808 % (field, field_not, conversion))
1809
1810 def sanitize_string_field(info, string_field):
1811 field = info.get(string_field)
1812 if field is None or isinstance(field, compat_str):
1813 return
1814 report_force_conversion(string_field, 'a string', 'string')
1815 info[string_field] = compat_str(field)
1816
1817 def sanitize_numeric_fields(info):
1818 for numeric_field in self._NUMERIC_FIELDS:
1819 field = info.get(numeric_field)
1820 if field is None or isinstance(field, compat_numeric_types):
1821 continue
1822 report_force_conversion(numeric_field, 'numeric', 'int')
1823 info[numeric_field] = int_or_none(field)
1824
1825 sanitize_string_field(info_dict, 'id')
1826 sanitize_numeric_fields(info_dict)
be6217b2 1827
dd82ffea
JMF
1828 if 'playlist' not in info_dict:
1829 # It isn't part of a playlist
1830 info_dict['playlist'] = None
1831 info_dict['playlist_index'] = None
1832
d5519808 1833 thumbnails = info_dict.get('thumbnails')
cfb56d1a
PH
1834 if thumbnails is None:
1835 thumbnail = info_dict.get('thumbnail')
1836 if thumbnail:
a7a14d95 1837 info_dict['thumbnails'] = thumbnails = [{'url': thumbnail}]
d5519808 1838 if thumbnails:
be6d7229 1839 thumbnails.sort(key=lambda t: (
d37708fc
RA
1840 t.get('preference') if t.get('preference') is not None else -1,
1841 t.get('width') if t.get('width') is not None else -1,
1842 t.get('height') if t.get('height') is not None else -1,
1843 t.get('id') if t.get('id') is not None else '', t.get('url')))
f6c24009 1844 for i, t in enumerate(thumbnails):
dcf77cf1 1845 t['url'] = sanitize_url(t['url'])
9603e8a7 1846 if t.get('width') and t.get('height'):
d5519808 1847 t['resolution'] = '%dx%d' % (t['width'], t['height'])
f6c24009
PH
1848 if t.get('id') is None:
1849 t['id'] = '%d' % i
d5519808 1850
b7b72db9 1851 if self.params.get('list_thumbnails'):
1852 self.list_thumbnails(info_dict)
1853 return
1854
536a55da
S
1855 thumbnail = info_dict.get('thumbnail')
1856 if thumbnail:
1857 info_dict['thumbnail'] = sanitize_url(thumbnail)
1858 elif thumbnails:
d5519808
PH
1859 info_dict['thumbnail'] = thumbnails[-1]['url']
1860
c9ae7b95 1861 if 'display_id' not in info_dict and 'id' in info_dict:
0afef30b
PH
1862 info_dict['display_id'] = info_dict['id']
1863
10db0d2f 1864 for ts_key, date_key in (
1865 ('timestamp', 'upload_date'),
1866 ('release_timestamp', 'release_date'),
1867 ):
1868 if info_dict.get(date_key) is None and info_dict.get(ts_key) is not None:
1869 # Working around out-of-range timestamp values (e.g. negative ones on Windows,
1870 # see http://bugs.python.org/issue1646728)
1871 try:
1872 upload_date = datetime.datetime.utcfromtimestamp(info_dict[ts_key])
1873 info_dict[date_key] = upload_date.strftime('%Y%m%d')
1874 except (ValueError, OverflowError, OSError):
1875 pass
9d2ecdbc 1876
33d2fc2f
S
1877 # Auto generate title fields corresponding to the *_number fields when missing
1878 # in order to always have clean titles. This is very common for TV series.
1879 for field in ('chapter', 'season', 'episode'):
1880 if info_dict.get('%s_number' % field) is not None and not info_dict.get(field):
1881 info_dict[field] = '%s %d' % (field.capitalize(), info_dict['%s_number' % field])
1882
05108a49
S
1883 for cc_kind in ('subtitles', 'automatic_captions'):
1884 cc = info_dict.get(cc_kind)
1885 if cc:
1886 for _, subtitle in cc.items():
1887 for subtitle_format in subtitle:
1888 if subtitle_format.get('url'):
1889 subtitle_format['url'] = sanitize_url(subtitle_format['url'])
1890 if subtitle_format.get('ext') is None:
1891 subtitle_format['ext'] = determine_ext(subtitle_format['url']).lower()
1892
1893 automatic_captions = info_dict.get('automatic_captions')
4bba3716 1894 subtitles = info_dict.get('subtitles')
4bba3716 1895
a504ced0 1896 if self.params.get('listsubtitles', False):
360e1ca5 1897 if 'automatic_captions' in info_dict:
05108a49
S
1898 self.list_subtitles(
1899 info_dict['id'], automatic_captions, 'automatic captions')
4bba3716 1900 self.list_subtitles(info_dict['id'], subtitles, 'subtitles')
a504ced0 1901 return
05108a49 1902
360e1ca5 1903 info_dict['requested_subtitles'] = self.process_subtitles(
05108a49 1904 info_dict['id'], subtitles, automatic_captions)
a504ced0 1905
dd82ffea
JMF
1906 # We now pick which formats have to be downloaded
1907 if info_dict.get('formats') is None:
1908 # There's only one format available
1909 formats = [info_dict]
1910 else:
1911 formats = info_dict['formats']
1912
db95dc13 1913 if not formats:
b7da73eb 1914 if not self.params.get('ignore_no_formats_error'):
1915 raise ExtractorError('No video formats found!')
1916 else:
1917 self.report_warning('No video formats found!')
db95dc13 1918
73af5cc8
S
1919 def is_wellformed(f):
1920 url = f.get('url')
a5ac0c47 1921 if not url:
73af5cc8
S
1922 self.report_warning(
1923 '"url" field is missing or empty - skipping format, '
1924 'there is an error in extractor')
a5ac0c47
S
1925 return False
1926 if isinstance(url, bytes):
1927 sanitize_string_field(f, 'url')
1928 return True
73af5cc8
S
1929
1930 # Filter out malformed formats for better extraction robustness
1931 formats = list(filter(is_wellformed, formats))
1932
181c7053
S
1933 formats_dict = {}
1934
dd82ffea 1935 # We check that all the formats have the format and format_id fields
db95dc13 1936 for i, format in enumerate(formats):
c9969434
S
1937 sanitize_string_field(format, 'format_id')
1938 sanitize_numeric_fields(format)
dcf77cf1 1939 format['url'] = sanitize_url(format['url'])
e74e3b63 1940 if not format.get('format_id'):
8016c922 1941 format['format_id'] = compat_str(i)
e2effb08
S
1942 else:
1943 # Sanitize format_id from characters used in format selector expression
ec85ded8 1944 format['format_id'] = re.sub(r'[\s,/+\[\]()]', '_', format['format_id'])
181c7053
S
1945 format_id = format['format_id']
1946 if format_id not in formats_dict:
1947 formats_dict[format_id] = []
1948 formats_dict[format_id].append(format)
1949
1950 # Make sure all formats have unique format_id
1951 for format_id, ambiguous_formats in formats_dict.items():
1952 if len(ambiguous_formats) > 1:
1953 for i, format in enumerate(ambiguous_formats):
1954 format['format_id'] = '%s-%d' % (format_id, i)
1955
1956 for i, format in enumerate(formats):
8c51aa65 1957 if format.get('format') is None:
6febd1c1 1958 format['format'] = '{id} - {res}{note}'.format(
8c51aa65
JMF
1959 id=format['format_id'],
1960 res=self.format_resolution(format),
6febd1c1 1961 note=' ({0})'.format(format['format_note']) if format.get('format_note') is not None else '',
8c51aa65 1962 )
c1002e96 1963 # Automatically determine file extension if missing
5b1d8575 1964 if format.get('ext') is None:
cce929ea 1965 format['ext'] = determine_ext(format['url']).lower()
b5559424
S
1966 # Automatically determine protocol if missing (useful for format
1967 # selection purposes)
6f0be937 1968 if format.get('protocol') is None:
b5559424 1969 format['protocol'] = determine_protocol(format)
e5660ee6
JMF
1970 # Add HTTP headers, so that external programs can use them from the
1971 # json output
1972 full_format_info = info_dict.copy()
1973 full_format_info.update(format)
1974 format['http_headers'] = self._calc_headers(full_format_info)
0016b84e
S
1975 # Remove private housekeeping stuff
1976 if '__x_forwarded_for_ip' in info_dict:
1977 del info_dict['__x_forwarded_for_ip']
dd82ffea 1978
4bcc7bd1 1979 # TODO Central sorting goes here
99e206d5 1980
b7da73eb 1981 if formats and formats[0] is not info_dict:
b3d9ef88
JMF
1982 # only set the 'formats' fields if the original info_dict list them
1983 # otherwise we end up with a circular reference, the first (and unique)
f89197d7 1984 # element in the 'formats' field in info_dict is info_dict itself,
dfb1b146 1985 # which can't be exported to json
b3d9ef88 1986 info_dict['formats'] = formats
cfb56d1a 1987 if self.params.get('listformats'):
b7da73eb 1988 if not info_dict.get('formats'):
1989 raise ExtractorError('No video formats found', expected=True)
bfaae0a7 1990 self.list_formats(info_dict)
1991 return
1992
de3ef3ed 1993 req_format = self.params.get('format')
a9c58ad9 1994 if req_format is None:
0017d9ad
S
1995 req_format = self._default_format_spec(info_dict, download=download)
1996 if self.params.get('verbose'):
e8be92f9 1997 self.to_screen('[debug] Default format spec: %s' % req_format)
0017d9ad 1998
5acfa126 1999 format_selector = self.build_format_selector(req_format)
317f7ab6
S
2000
2001 # While in format selection we may need to have an access to the original
2002 # format set in order to calculate some metrics or do some processing.
2003 # For now we need to be able to guess whether original formats provided
2004 # by extractor are incomplete or not (i.e. whether extractor provides only
2005 # video-only or audio-only formats) for proper formats selection for
2006 # extractors with such incomplete formats (see
067aa17e 2007 # https://github.com/ytdl-org/youtube-dl/pull/5556).
317f7ab6
S
2008 # Since formats may be filtered during format selection and may not match
2009 # the original formats the results may be incorrect. Thus original formats
2010 # or pre-calculated metrics should be passed to format selection routines
2011 # as well.
2012 # We will pass a context object containing all necessary additional data
2013 # instead of just formats.
2014 # This fixes incorrect format selection issue (see
067aa17e 2015 # https://github.com/ytdl-org/youtube-dl/issues/10083).
2e221ca3 2016 incomplete_formats = (
317f7ab6 2017 # All formats are video-only or
3089bc74 2018 all(f.get('vcodec') != 'none' and f.get('acodec') == 'none' for f in formats)
317f7ab6 2019 # all formats are audio-only
3089bc74 2020 or all(f.get('vcodec') == 'none' and f.get('acodec') != 'none' for f in formats))
317f7ab6
S
2021
2022 ctx = {
2023 'formats': formats,
2024 'incomplete_formats': incomplete_formats,
2025 }
2026
2027 formats_to_download = list(format_selector(ctx))
dd82ffea 2028 if not formats_to_download:
b7da73eb 2029 if not self.params.get('ignore_no_formats_error'):
2030 raise ExtractorError('Requested format is not available', expected=True)
2031 else:
2032 self.report_warning('Requested format is not available')
2033 elif download:
2034 self.to_screen(
2035 '[info] %s: Downloading format(s) %s'
2036 % (info_dict['id'], ", ".join([f['format_id'] for f in formats_to_download])))
dd82ffea 2037 if len(formats_to_download) > 1:
b7da73eb 2038 self.to_screen(
2039 '[info] %s: Downloading video in %s formats'
2040 % (info_dict['id'], len(formats_to_download)))
2041 for fmt in formats_to_download:
dd82ffea 2042 new_info = dict(info_dict)
b7da73eb 2043 new_info.update(fmt)
dd82ffea
JMF
2044 self.process_info(new_info)
2045 # We update the info dict with the best quality format (backwards compatibility)
b7da73eb 2046 if formats_to_download:
2047 info_dict.update(formats_to_download[-1])
dd82ffea
JMF
2048 return info_dict
2049
98c70d6f 2050 def process_subtitles(self, video_id, normal_subtitles, automatic_captions):
a504ced0 2051 """Select the requested subtitles and their format"""
98c70d6f
JMF
2052 available_subs = {}
2053 if normal_subtitles and self.params.get('writesubtitles'):
2054 available_subs.update(normal_subtitles)
2055 if automatic_captions and self.params.get('writeautomaticsub'):
2056 for lang, cap_info in automatic_captions.items():
360e1ca5
JMF
2057 if lang not in available_subs:
2058 available_subs[lang] = cap_info
2059
4d171848
JMF
2060 if (not self.params.get('writesubtitles') and not
2061 self.params.get('writeautomaticsub') or not
2062 available_subs):
2063 return None
a504ced0 2064
c32b0aab 2065 all_sub_langs = available_subs.keys()
a504ced0 2066 if self.params.get('allsubtitles', False):
c32b0aab 2067 requested_langs = all_sub_langs
2068 elif self.params.get('subtitleslangs', False):
2069 requested_langs = set()
2070 for lang in self.params.get('subtitleslangs'):
2071 if lang == 'all':
2072 requested_langs.update(all_sub_langs)
2073 continue
2074 discard = lang[0] == '-'
2075 if discard:
2076 lang = lang[1:]
2077 current_langs = filter(re.compile(lang + '$').match, all_sub_langs)
2078 if discard:
2079 for lang in current_langs:
2080 requested_langs.discard(lang)
2081 else:
2082 requested_langs.update(current_langs)
2083 elif 'en' in available_subs:
2084 requested_langs = ['en']
a504ced0 2085 else:
c32b0aab 2086 requested_langs = [list(all_sub_langs)[0]]
a504ced0
JMF
2087
2088 formats_query = self.params.get('subtitlesformat', 'best')
2089 formats_preference = formats_query.split('/') if formats_query else []
2090 subs = {}
2091 for lang in requested_langs:
2092 formats = available_subs.get(lang)
2093 if formats is None:
2094 self.report_warning('%s subtitles not available for %s' % (lang, video_id))
2095 continue
a504ced0
JMF
2096 for ext in formats_preference:
2097 if ext == 'best':
2098 f = formats[-1]
2099 break
2100 matches = list(filter(lambda f: f['ext'] == ext, formats))
2101 if matches:
2102 f = matches[-1]
2103 break
2104 else:
2105 f = formats[-1]
2106 self.report_warning(
2107 'No subtitle format found matching "%s" for language %s, '
2108 'using %s' % (formats_query, lang, f['ext']))
2109 subs[lang] = f
2110 return subs
2111
d06daf23
S
2112 def __forced_printings(self, info_dict, filename, incomplete):
2113 def print_mandatory(field):
2114 if (self.params.get('force%s' % field, False)
2115 and (not incomplete or info_dict.get(field) is not None)):
2116 self.to_stdout(info_dict[field])
2117
2118 def print_optional(field):
2119 if (self.params.get('force%s' % field, False)
2120 and info_dict.get(field) is not None):
2121 self.to_stdout(info_dict[field])
2122
2123 print_mandatory('title')
2124 print_mandatory('id')
2125 if self.params.get('forceurl', False) and not incomplete:
2126 if info_dict.get('requested_formats') is not None:
2127 for f in info_dict['requested_formats']:
2128 self.to_stdout(f['url'] + f.get('play_path', ''))
2129 else:
2130 # For RTMP URLs, also include the playpath
2131 self.to_stdout(info_dict['url'] + info_dict.get('play_path', ''))
2132 print_optional('thumbnail')
2133 print_optional('description')
2134 if self.params.get('forcefilename', False) and filename is not None:
2135 self.to_stdout(filename)
2136 if self.params.get('forceduration', False) and info_dict.get('duration') is not None:
2137 self.to_stdout(formatSeconds(info_dict['duration']))
2138 print_mandatory('format')
2139 if self.params.get('forcejson', False):
277d6ff5 2140 self.post_extract(info_dict)
75d43ca0 2141 self.to_stdout(json.dumps(info_dict, default=repr))
d06daf23 2142
8222d8de
JMF
2143 def process_info(self, info_dict):
2144 """Process a single resolved IE result."""
2145
2146 assert info_dict.get('_type', 'video') == 'video'
fd288278 2147
0202b52a 2148 info_dict.setdefault('__postprocessors', [])
2149
fd288278
PH
2150 max_downloads = self.params.get('max_downloads')
2151 if max_downloads is not None:
2152 if self._num_downloads >= int(max_downloads):
2153 raise MaxDownloadsReached()
8222d8de 2154
d06daf23 2155 # TODO: backward compatibility, to be removed
8222d8de 2156 info_dict['fulltitle'] = info_dict['title']
8222d8de 2157
11b85ce6 2158 if 'format' not in info_dict:
8222d8de
JMF
2159 info_dict['format'] = info_dict['ext']
2160
8b0d7497 2161 if self._match_entry(info_dict, incomplete=False) is not None:
8222d8de
JMF
2162 return
2163
277d6ff5 2164 self.post_extract(info_dict)
fd288278 2165 self._num_downloads += 1
8222d8de 2166
56d868db 2167 info_dict, _ = self.pre_process(info_dict)
5bfa4862 2168
dcf64d43 2169 # info_dict['_filename'] needs to be set for backward compatibility
de6000d9 2170 info_dict['_filename'] = full_filename = self.prepare_filename(info_dict, warn=True)
2171 temp_filename = self.prepare_filename(info_dict, 'temp')
0202b52a 2172 files_to_move = {}
8222d8de
JMF
2173
2174 # Forced printings
0202b52a 2175 self.__forced_printings(info_dict, full_filename, incomplete=False)
8222d8de 2176
8222d8de 2177 if self.params.get('simulate', False):
2d30509f 2178 if self.params.get('force_write_download_archive', False):
2179 self.record_download_archive(info_dict)
2180
2181 # Do nothing else if in simulate mode
8222d8de
JMF
2182 return
2183
de6000d9 2184 if full_filename is None:
8222d8de
JMF
2185 return
2186
e92caff5 2187 if not self._ensure_dir_exists(encodeFilename(full_filename)):
0202b52a 2188 return
e92caff5 2189 if not self._ensure_dir_exists(encodeFilename(temp_filename)):
8222d8de
JMF
2190 return
2191
2192 if self.params.get('writedescription', False):
de6000d9 2193 descfn = self.prepare_filename(info_dict, 'description')
e92caff5 2194 if not self._ensure_dir_exists(encodeFilename(descfn)):
0202b52a 2195 return
0c3d0f51 2196 if not self.params.get('overwrites', True) and os.path.exists(encodeFilename(descfn)):
6febd1c1 2197 self.to_screen('[info] Video description is already present')
f00fd51d
JMF
2198 elif info_dict.get('description') is None:
2199 self.report_warning('There\'s no description to write.')
7b6fefc9
PH
2200 else:
2201 try:
6febd1c1 2202 self.to_screen('[info] Writing video description to: ' + descfn)
7b6fefc9
PH
2203 with io.open(encodeFilename(descfn), 'w', encoding='utf-8') as descfile:
2204 descfile.write(info_dict['description'])
7b6fefc9 2205 except (OSError, IOError):
6febd1c1 2206 self.report_error('Cannot write description file ' + descfn)
7b6fefc9 2207 return
8222d8de 2208
1fb07d10 2209 if self.params.get('writeannotations', False):
de6000d9 2210 annofn = self.prepare_filename(info_dict, 'annotation')
e92caff5 2211 if not self._ensure_dir_exists(encodeFilename(annofn)):
0202b52a 2212 return
0c3d0f51 2213 if not self.params.get('overwrites', True) and os.path.exists(encodeFilename(annofn)):
6febd1c1 2214 self.to_screen('[info] Video annotations are already present')
ffddb112
RA
2215 elif not info_dict.get('annotations'):
2216 self.report_warning('There are no annotations to write.')
7b6fefc9
PH
2217 else:
2218 try:
6febd1c1 2219 self.to_screen('[info] Writing video annotations to: ' + annofn)
7b6fefc9
PH
2220 with io.open(encodeFilename(annofn), 'w', encoding='utf-8') as annofile:
2221 annofile.write(info_dict['annotations'])
2222 except (KeyError, TypeError):
6febd1c1 2223 self.report_warning('There are no annotations to write.')
7b6fefc9 2224 except (OSError, IOError):
6febd1c1 2225 self.report_error('Cannot write annotations file: ' + annofn)
7b6fefc9 2226 return
1fb07d10 2227
9f448fcb 2228 def dl(name, info, subtitle=False):
98b69821 2229 fd = get_suitable_downloader(info, self.params)(self, self.params)
2230 for ph in self._progress_hooks:
2231 fd.add_progress_hook(ph)
2232 if self.params.get('verbose'):
29f7c58a 2233 self.to_screen('[debug] Invoking downloader on %r' % info.get('url'))
46906886
DA
2234 new_info = dict(info)
2235 if new_info.get('http_headers') is None:
2236 new_info['http_headers'] = self._calc_headers(new_info)
2237 return fd.download(name, new_info, subtitle)
98b69821 2238
c4a91be7 2239 subtitles_are_requested = any([self.params.get('writesubtitles', False),
0b7f3118 2240 self.params.get('writeautomaticsub')])
c4a91be7 2241
c84dd8a9 2242 if subtitles_are_requested and info_dict.get('requested_subtitles'):
8222d8de
JMF
2243 # subtitles download errors are already managed as troubles in relevant IE
2244 # that way it will silently go on when used with unsupporting IE
c84dd8a9 2245 subtitles = info_dict['requested_subtitles']
fa57af1e 2246 # ie = self.get_info_extractor(info_dict['extractor_key'])
a504ced0
JMF
2247 for sub_lang, sub_info in subtitles.items():
2248 sub_format = sub_info['ext']
56d868db 2249 sub_filename = subtitles_filename(temp_filename, sub_lang, sub_format, info_dict.get('ext'))
2250 sub_filename_final = subtitles_filename(
2251 self.prepare_filename(info_dict, 'subtitle'), sub_lang, sub_format, info_dict.get('ext'))
0c3d0f51 2252 if not self.params.get('overwrites', True) and os.path.exists(encodeFilename(sub_filename)):
5ff1bc0c 2253 self.to_screen('[info] Video subtitle %s.%s is already present' % (sub_lang, sub_format))
dcf64d43 2254 sub_info['filepath'] = sub_filename
0202b52a 2255 files_to_move[sub_filename] = sub_filename_final
a504ced0 2256 else:
0c9df79e 2257 self.to_screen('[info] Writing video subtitles to: ' + sub_filename)
5ff1bc0c
RA
2258 if sub_info.get('data') is not None:
2259 try:
2260 # Use newline='' to prevent conversion of newline characters
067aa17e 2261 # See https://github.com/ytdl-org/youtube-dl/issues/10268
5ff1bc0c
RA
2262 with io.open(encodeFilename(sub_filename), 'w', encoding='utf-8', newline='') as subfile:
2263 subfile.write(sub_info['data'])
dcf64d43 2264 sub_info['filepath'] = sub_filename
0202b52a 2265 files_to_move[sub_filename] = sub_filename_final
5ff1bc0c
RA
2266 except (OSError, IOError):
2267 self.report_error('Cannot write subtitles file ' + sub_filename)
2268 return
7b6fefc9 2269 else:
5ff1bc0c 2270 try:
dcf64d43 2271 dl(sub_filename, sub_info.copy(), subtitle=True)
2272 sub_info['filepath'] = sub_filename
0202b52a 2273 files_to_move[sub_filename] = sub_filename_final
0c9df79e 2274 except (ExtractorError, IOError, OSError, ValueError, compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
5ff1bc0c
RA
2275 self.report_warning('Unable to download subtitle for "%s": %s' %
2276 (sub_lang, error_to_compat_str(err)))
2277 continue
8222d8de 2278
8222d8de 2279 if self.params.get('writeinfojson', False):
de6000d9 2280 infofn = self.prepare_filename(info_dict, 'infojson')
e92caff5 2281 if not self._ensure_dir_exists(encodeFilename(infofn)):
0202b52a 2282 return
0c3d0f51 2283 if not self.params.get('overwrites', True) and os.path.exists(encodeFilename(infofn)):
66c935fb 2284 self.to_screen('[info] Video metadata is already present')
7b6fefc9 2285 else:
66c935fb 2286 self.to_screen('[info] Writing video metadata as JSON to: ' + infofn)
7b6fefc9 2287 try:
75d43ca0 2288 write_json_file(self.filter_requested_info(info_dict, self.params.get('clean_infojson', True)), infofn)
7b6fefc9 2289 except (OSError, IOError):
66c935fb 2290 self.report_error('Cannot write video metadata to JSON file ' + infofn)
7b6fefc9 2291 return
de6000d9 2292 info_dict['__infojson_filename'] = infofn
8222d8de 2293
56d868db 2294 for thumb_ext in self._write_thumbnails(info_dict, temp_filename):
2295 thumb_filename_temp = replace_extension(temp_filename, thumb_ext, info_dict.get('ext'))
2296 thumb_filename = replace_extension(
2297 self.prepare_filename(info_dict, 'thumbnail'), thumb_ext, info_dict.get('ext'))
dcf64d43 2298 files_to_move[thumb_filename_temp] = thumb_filename
8222d8de 2299
732044af 2300 # Write internet shortcut files
2301 url_link = webloc_link = desktop_link = False
2302 if self.params.get('writelink', False):
2303 if sys.platform == "darwin": # macOS.
2304 webloc_link = True
2305 elif sys.platform.startswith("linux"):
2306 desktop_link = True
2307 else: # if sys.platform in ['win32', 'cygwin']:
2308 url_link = True
2309 if self.params.get('writeurllink', False):
2310 url_link = True
2311 if self.params.get('writewebloclink', False):
2312 webloc_link = True
2313 if self.params.get('writedesktoplink', False):
2314 desktop_link = True
2315
2316 if url_link or webloc_link or desktop_link:
2317 if 'webpage_url' not in info_dict:
2318 self.report_error('Cannot write internet shortcut file because the "webpage_url" field is missing in the media information')
2319 return
2320 ascii_url = iri_to_uri(info_dict['webpage_url'])
2321
2322 def _write_link_file(extension, template, newline, embed_filename):
0202b52a 2323 linkfn = replace_extension(full_filename, extension, info_dict.get('ext'))
10e3742e 2324 if self.params.get('overwrites', True) and os.path.exists(encodeFilename(linkfn)):
732044af 2325 self.to_screen('[info] Internet shortcut is already present')
2326 else:
2327 try:
2328 self.to_screen('[info] Writing internet shortcut to: ' + linkfn)
2329 with io.open(encodeFilename(to_high_limit_path(linkfn)), 'w', encoding='utf-8', newline=newline) as linkfile:
2330 template_vars = {'url': ascii_url}
2331 if embed_filename:
2332 template_vars['filename'] = linkfn[:-(len(extension) + 1)]
2333 linkfile.write(template % template_vars)
2334 except (OSError, IOError):
2335 self.report_error('Cannot write internet shortcut ' + linkfn)
2336 return False
2337 return True
2338
2339 if url_link:
2340 if not _write_link_file('url', DOT_URL_LINK_TEMPLATE, '\r\n', embed_filename=False):
2341 return
2342 if webloc_link:
2343 if not _write_link_file('webloc', DOT_WEBLOC_LINK_TEMPLATE, '\n', embed_filename=False):
2344 return
2345 if desktop_link:
2346 if not _write_link_file('desktop', DOT_DESKTOP_LINK_TEMPLATE, '\n', embed_filename=True):
2347 return
2348
56d868db 2349 try:
2350 info_dict, files_to_move = self.pre_process(info_dict, 'before_dl', files_to_move)
2351 except PostProcessingError as err:
2352 self.report_error('Preprocessing: %s' % str(err))
2353 return
2354
732044af 2355 must_record_download_archive = False
56d868db 2356 if self.params.get('skip_download', False):
2357 info_dict['filepath'] = temp_filename
2358 info_dict['__finaldir'] = os.path.dirname(os.path.abspath(encodeFilename(full_filename)))
2359 info_dict['__files_to_move'] = files_to_move
2360 info_dict = self.run_pp(MoveFilesAfterDownloadPP(self, False), info_dict)
2361 else:
2362 # Download
4340deca 2363 try:
0202b52a 2364
6b591b29 2365 def existing_file(*filepaths):
2366 ext = info_dict.get('ext')
2367 final_ext = self.params.get('final_ext', ext)
2368 existing_files = []
2369 for file in orderedSet(filepaths):
2370 if final_ext != ext:
2371 converted = replace_extension(file, final_ext, ext)
2372 if os.path.exists(encodeFilename(converted)):
2373 existing_files.append(converted)
2374 if os.path.exists(encodeFilename(file)):
2375 existing_files.append(file)
2376
2377 if not existing_files or self.params.get('overwrites', False):
2378 for file in orderedSet(existing_files):
2379 self.report_file_delete(file)
2380 os.remove(encodeFilename(file))
2381 return None
2382
2383 self.report_file_already_downloaded(existing_files[0])
2384 info_dict['ext'] = os.path.splitext(existing_files[0])[1][1:]
2385 return existing_files[0]
0202b52a 2386
2387 success = True
4340deca
P
2388 if info_dict.get('requested_formats') is not None:
2389 downloaded = []
d47aeb22 2390 merger = FFmpegMergerPP(self)
63ad4d43 2391 if self.params.get('allow_unplayable_formats'):
2392 self.report_warning(
2393 'You have requested merging of multiple formats '
2394 'while also allowing unplayable formats to be downloaded. '
2395 'The formats won\'t be merged to prevent data corruption.')
2396 elif not merger.available:
2397 self.report_warning(
2398 'You have requested merging of multiple formats but ffmpeg is not installed. '
2399 'The formats won\'t be merged.')
81cd954a
S
2400
2401 def compatible_formats(formats):
d03cfdce 2402 # TODO: some formats actually allow this (mkv, webm, ogg, mp4), but not all of them.
2403 video_formats = [format for format in formats if format.get('vcodec') != 'none']
2404 audio_formats = [format for format in formats if format.get('acodec') != 'none']
2405 if len(video_formats) > 2 or len(audio_formats) > 2:
2406 return False
2407
81cd954a 2408 # Check extension
d03cfdce 2409 exts = set(format.get('ext') for format in formats)
2410 COMPATIBLE_EXTS = (
2411 set(('mp3', 'mp4', 'm4a', 'm4p', 'm4b', 'm4r', 'm4v', 'ismv', 'isma')),
2412 set(('webm',)),
2413 )
2414 for ext_sets in COMPATIBLE_EXTS:
2415 if ext_sets.issuperset(exts):
2416 return True
81cd954a
S
2417 # TODO: Check acodec/vcodec
2418 return False
2419
2420 requested_formats = info_dict['requested_formats']
0202b52a 2421 old_ext = info_dict['ext']
4d971a16 2422 if self.params.get('merge_output_format') is None:
2423 if not compatible_formats(requested_formats):
2424 info_dict['ext'] = 'mkv'
2425 self.report_warning(
2426 'Requested formats are incompatible for merge and will be merged into mkv.')
2427 if (info_dict['ext'] == 'webm'
2428 and self.params.get('writethumbnail', False)
2429 and info_dict.get('thumbnails')):
2430 info_dict['ext'] = 'mkv'
2431 self.report_warning(
2432 'webm doesn\'t support embedding a thumbnail, mkv will be used.')
0202b52a 2433
2434 def correct_ext(filename):
2435 filename_real_ext = os.path.splitext(filename)[1][1:]
2436 filename_wo_ext = (
2437 os.path.splitext(filename)[0]
2438 if filename_real_ext == old_ext
2439 else filename)
2440 return '%s.%s' % (filename_wo_ext, info_dict['ext'])
2441
38c6902b 2442 # Ensure filename always has a correct extension for successful merge
0202b52a 2443 full_filename = correct_ext(full_filename)
2444 temp_filename = correct_ext(temp_filename)
2445 dl_filename = existing_file(full_filename, temp_filename)
1ea24129 2446 info_dict['__real_download'] = False
0202b52a 2447 if dl_filename is None:
81cd954a 2448 for f in requested_formats:
5b5fbc08
JMF
2449 new_info = dict(info_dict)
2450 new_info.update(f)
c5c9bf0c 2451 fname = prepend_extension(
de6000d9 2452 self.prepare_filename(new_info, 'temp'),
c5c9bf0c 2453 'f%s' % f['format_id'], new_info['ext'])
e92caff5 2454 if not self._ensure_dir_exists(fname):
c5c9bf0c 2455 return
5b5fbc08 2456 downloaded.append(fname)
a9e7f546 2457 partial_success, real_download = dl(fname, new_info)
1ea24129 2458 info_dict['__real_download'] = info_dict['__real_download'] or real_download
5b5fbc08 2459 success = success and partial_success
63ad4d43 2460 if merger.available and not self.params.get('allow_unplayable_formats'):
efabc161 2461 info_dict['__postprocessors'].append(merger)
1ea24129 2462 info_dict['__files_to_merge'] = downloaded
2463 # Even if there were no downloads, it is being merged only now
2464 info_dict['__real_download'] = True
42bb0c59 2465 else:
2466 for file in downloaded:
2467 files_to_move[file] = None
4340deca
P
2468 else:
2469 # Just a single file
0202b52a 2470 dl_filename = existing_file(full_filename, temp_filename)
2471 if dl_filename is None:
2472 success, real_download = dl(temp_filename, info_dict)
2473 info_dict['__real_download'] = real_download
2474
0202b52a 2475 dl_filename = dl_filename or temp_filename
c571435f 2476 info_dict['__finaldir'] = os.path.dirname(os.path.abspath(encodeFilename(full_filename)))
0202b52a 2477
4340deca 2478 except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
7960b056 2479 self.report_error('unable to download video data: %s' % error_to_compat_str(err))
4340deca
P
2480 return
2481 except (OSError, IOError) as err:
2482 raise UnavailableVideoError(err)
2483 except (ContentTooShortError, ) as err:
2484 self.report_error('content too short (expected %s bytes and served %s)' % (err.expected, err.downloaded))
2485 return
8222d8de 2486
de6000d9 2487 if success and full_filename != '-':
6271f1ca 2488 # Fixup content
62cd676c
PH
2489 fixup_policy = self.params.get('fixup')
2490 if fixup_policy is None:
2491 fixup_policy = 'detect_or_warn'
2492
e4172ac9 2493 INSTALL_FFMPEG_MESSAGE = 'Install ffmpeg to fix this automatically.'
d1e4a464 2494
6271f1ca
PH
2495 stretched_ratio = info_dict.get('stretched_ratio')
2496 if stretched_ratio is not None and stretched_ratio != 1:
6271f1ca
PH
2497 if fixup_policy == 'warn':
2498 self.report_warning('%s: Non-uniform pixel ratio (%s)' % (
2499 info_dict['id'], stretched_ratio))
2500 elif fixup_policy == 'detect_or_warn':
2501 stretched_pp = FFmpegFixupStretchedPP(self)
2502 if stretched_pp.available:
6271f1ca
PH
2503 info_dict['__postprocessors'].append(stretched_pp)
2504 else:
2505 self.report_warning(
d1e4a464
S
2506 '%s: Non-uniform pixel ratio (%s). %s'
2507 % (info_dict['id'], stretched_ratio, INSTALL_FFMPEG_MESSAGE))
6271f1ca 2508 else:
62cd676c
PH
2509 assert fixup_policy in ('ignore', 'never')
2510
3089bc74 2511 if (info_dict.get('requested_formats') is None
6b591b29 2512 and info_dict.get('container') == 'm4a_dash'
2513 and info_dict.get('ext') == 'm4a'):
62cd676c 2514 if fixup_policy == 'warn':
d1e4a464
S
2515 self.report_warning(
2516 '%s: writing DASH m4a. '
2517 'Only some players support this container.'
2518 % info_dict['id'])
62cd676c
PH
2519 elif fixup_policy == 'detect_or_warn':
2520 fixup_pp = FFmpegFixupM4aPP(self)
2521 if fixup_pp.available:
62cd676c
PH
2522 info_dict['__postprocessors'].append(fixup_pp)
2523 else:
2524 self.report_warning(
d1e4a464
S
2525 '%s: writing DASH m4a. '
2526 'Only some players support this container. %s'
2527 % (info_dict['id'], INSTALL_FFMPEG_MESSAGE))
62cd676c
PH
2528 else:
2529 assert fixup_policy in ('ignore', 'never')
6271f1ca 2530
0a473f2f 2531 if ('protocol' in info_dict
2532 and get_suitable_downloader(info_dict, self.params).__name__ == 'HlsFD'):
f17f8651 2533 if fixup_policy == 'warn':
a02682fd 2534 self.report_warning('%s: malformed AAC bitstream detected.' % (
f17f8651 2535 info_dict['id']))
2536 elif fixup_policy == 'detect_or_warn':
2537 fixup_pp = FFmpegFixupM3u8PP(self)
2538 if fixup_pp.available:
f17f8651 2539 info_dict['__postprocessors'].append(fixup_pp)
2540 else:
2541 self.report_warning(
a02682fd 2542 '%s: malformed AAC bitstream detected. %s'
d1e4a464 2543 % (info_dict['id'], INSTALL_FFMPEG_MESSAGE))
f17f8651 2544 else:
2545 assert fixup_policy in ('ignore', 'never')
2546
8222d8de 2547 try:
23c1a667 2548 info_dict = self.post_process(dl_filename, info_dict, files_to_move)
af819c21 2549 except PostProcessingError as err:
2550 self.report_error('Postprocessing: %s' % str(err))
8222d8de 2551 return
ab8e5e51
AM
2552 try:
2553 for ph in self._post_hooks:
23c1a667 2554 ph(info_dict['filepath'])
ab8e5e51
AM
2555 except Exception as err:
2556 self.report_error('post hooks: %s' % str(err))
2557 return
2d30509f 2558 must_record_download_archive = True
2559
2560 if must_record_download_archive or self.params.get('force_write_download_archive', False):
2561 self.record_download_archive(info_dict)
c3e6ffba 2562 max_downloads = self.params.get('max_downloads')
2563 if max_downloads is not None and self._num_downloads >= int(max_downloads):
2564 raise MaxDownloadsReached()
8222d8de
JMF
2565
2566 def download(self, url_list):
2567 """Download a given list of URLs."""
de6000d9 2568 outtmpl = self.outtmpl_dict['default']
3089bc74
S
2569 if (len(url_list) > 1
2570 and outtmpl != '-'
2571 and '%' not in outtmpl
2572 and self.params.get('max_downloads') != 1):
acd69589 2573 raise SameFileError(outtmpl)
8222d8de
JMF
2574
2575 for url in url_list:
2576 try:
5f6a1245 2577 # It also downloads the videos
61aa5ba3
S
2578 res = self.extract_info(
2579 url, force_generic_extractor=self.params.get('force_generic_extractor', False))
8222d8de 2580 except UnavailableVideoError:
6febd1c1 2581 self.report_error('unable to download video')
8222d8de 2582 except MaxDownloadsReached:
8b0d7497 2583 self.to_screen('[info] Maximum number of downloaded files reached')
2584 raise
2585 except ExistingVideoReached:
d83cb531 2586 self.to_screen('[info] Encountered a file that is already in the archive, stopping due to --break-on-existing')
8b0d7497 2587 raise
2588 except RejectedVideoReached:
d83cb531 2589 self.to_screen('[info] Encountered a file that did not match filter, stopping due to --break-on-reject')
8222d8de 2590 raise
63e0be34
PH
2591 else:
2592 if self.params.get('dump_single_json', False):
277d6ff5 2593 self.post_extract(res)
75d43ca0 2594 self.to_stdout(json.dumps(res, default=repr))
8222d8de
JMF
2595
2596 return self._download_retcode
2597
1dcc4c0c 2598 def download_with_info_file(self, info_filename):
31bd3925
JMF
2599 with contextlib.closing(fileinput.FileInput(
2600 [info_filename], mode='r',
2601 openhook=fileinput.hook_encoded('utf-8'))) as f:
2602 # FileInput doesn't have a read method, we can't call json.load
498f5606 2603 info = self.filter_requested_info(json.loads('\n'.join(f)), self.params.get('clean_infojson', True))
d4943898
JMF
2604 try:
2605 self.process_ie_result(info, download=True)
498f5606 2606 except (DownloadError, EntryNotInPlaylist):
d4943898
JMF
2607 webpage_url = info.get('webpage_url')
2608 if webpage_url is not None:
6febd1c1 2609 self.report_warning('The info failed to download, trying with "%s"' % webpage_url)
d4943898
JMF
2610 return self.download([webpage_url])
2611 else:
2612 raise
2613 return self._download_retcode
1dcc4c0c 2614
cb202fd2 2615 @staticmethod
75d43ca0 2616 def filter_requested_info(info_dict, actually_filter=True):
2617 if not actually_filter:
394dcd44 2618 info_dict['epoch'] = int(time.time())
75d43ca0 2619 return info_dict
5226731e 2620 exceptions = {
498f5606 2621 'remove': ['requested_formats', 'requested_subtitles', 'requested_entries', 'filepath', 'entries'],
5226731e 2622 'keep': ['_type'],
2623 }
2624 keep_key = lambda k: k in exceptions['keep'] or not (k.startswith('_') or k in exceptions['remove'])
2625 filter_fn = lambda obj: (
a515a78d 2626 list(map(filter_fn, obj)) if isinstance(obj, (list, tuple))
2627 else obj if not isinstance(obj, dict)
2628 else dict((k, filter_fn(v)) for k, v in obj.items() if keep_key(k)))
5226731e 2629 return filter_fn(info_dict)
cb202fd2 2630
dcf64d43 2631 def run_pp(self, pp, infodict):
5bfa4862 2632 files_to_delete = []
dcf64d43 2633 if '__files_to_move' not in infodict:
2634 infodict['__files_to_move'] = {}
af819c21 2635 files_to_delete, infodict = pp.run(infodict)
5bfa4862 2636 if not files_to_delete:
dcf64d43 2637 return infodict
5bfa4862 2638
2639 if self.params.get('keepvideo', False):
2640 for f in files_to_delete:
dcf64d43 2641 infodict['__files_to_move'].setdefault(f, '')
5bfa4862 2642 else:
2643 for old_filename in set(files_to_delete):
2644 self.to_screen('Deleting original file %s (pass -k to keep)' % old_filename)
2645 try:
2646 os.remove(encodeFilename(old_filename))
2647 except (IOError, OSError):
2648 self.report_warning('Unable to remove downloaded original file')
dcf64d43 2649 if old_filename in infodict['__files_to_move']:
2650 del infodict['__files_to_move'][old_filename]
2651 return infodict
5bfa4862 2652
277d6ff5 2653 @staticmethod
2654 def post_extract(info_dict):
2655 def actual_post_extract(info_dict):
2656 if info_dict.get('_type') in ('playlist', 'multi_video'):
2657 for video_dict in info_dict.get('entries', {}):
b050d210 2658 actual_post_extract(video_dict or {})
277d6ff5 2659 return
2660
2661 if '__post_extractor' not in info_dict:
2662 return
2663 post_extractor = info_dict['__post_extractor']
2664 if post_extractor:
2665 info_dict.update(post_extractor().items())
2666 del info_dict['__post_extractor']
2667 return
2668
b050d210 2669 actual_post_extract(info_dict or {})
277d6ff5 2670
56d868db 2671 def pre_process(self, ie_info, key='pre_process', files_to_move=None):
5bfa4862 2672 info = dict(ie_info)
56d868db 2673 info['__files_to_move'] = files_to_move or {}
2674 for pp in self._pps[key]:
dcf64d43 2675 info = self.run_pp(pp, info)
56d868db 2676 return info, info.pop('__files_to_move', None)
5bfa4862 2677
dcf64d43 2678 def post_process(self, filename, ie_info, files_to_move=None):
8222d8de
JMF
2679 """Run all the postprocessors on the given file."""
2680 info = dict(ie_info)
2681 info['filepath'] = filename
dcf64d43 2682 info['__files_to_move'] = files_to_move or {}
0202b52a 2683
56d868db 2684 for pp in ie_info.get('__postprocessors', []) + self._pps['post_process']:
dcf64d43 2685 info = self.run_pp(pp, info)
2686 info = self.run_pp(MoveFilesAfterDownloadPP(self), info)
2687 del info['__files_to_move']
56d868db 2688 for pp in self._pps['after_move']:
dcf64d43 2689 info = self.run_pp(pp, info)
23c1a667 2690 return info
c1c9a79c 2691
5db07df6 2692 def _make_archive_id(self, info_dict):
e9fef7ee
S
2693 video_id = info_dict.get('id')
2694 if not video_id:
2695 return
5db07df6
PH
2696 # Future-proof against any change in case
2697 # and backwards compatibility with prior versions
e9fef7ee 2698 extractor = info_dict.get('extractor_key') or info_dict.get('ie_key') # key in a playlist
7012b23c 2699 if extractor is None:
1211bb6d
S
2700 url = str_or_none(info_dict.get('url'))
2701 if not url:
2702 return
e9fef7ee
S
2703 # Try to find matching extractor for the URL and take its ie_key
2704 for ie in self._ies:
1211bb6d 2705 if ie.suitable(url):
e9fef7ee
S
2706 extractor = ie.ie_key()
2707 break
2708 else:
2709 return
d0757229 2710 return '%s %s' % (extractor.lower(), video_id)
5db07df6
PH
2711
2712 def in_download_archive(self, info_dict):
2713 fn = self.params.get('download_archive')
2714 if fn is None:
2715 return False
2716
2717 vid_id = self._make_archive_id(info_dict)
e9fef7ee 2718 if not vid_id:
7012b23c 2719 return False # Incomplete video information
5db07df6 2720
a45e8619 2721 return vid_id in self.archive
c1c9a79c
PH
2722
2723 def record_download_archive(self, info_dict):
2724 fn = self.params.get('download_archive')
2725 if fn is None:
2726 return
5db07df6
PH
2727 vid_id = self._make_archive_id(info_dict)
2728 assert vid_id
c1c9a79c 2729 with locked_file(fn, 'a', encoding='utf-8') as archive_file:
6febd1c1 2730 archive_file.write(vid_id + '\n')
a45e8619 2731 self.archive.add(vid_id)
dd82ffea 2732
8c51aa65 2733 @staticmethod
8abeeb94 2734 def format_resolution(format, default='unknown'):
fb04e403
PH
2735 if format.get('vcodec') == 'none':
2736 return 'audio only'
f49d89ee
PH
2737 if format.get('resolution') is not None:
2738 return format['resolution']
35615307
DA
2739 if format.get('width') and format.get('height'):
2740 res = '%dx%d' % (format['width'], format['height'])
2741 elif format.get('height'):
2742 res = '%sp' % format['height']
2743 elif format.get('width'):
388ae76b 2744 res = '%dx?' % format['width']
8c51aa65 2745 else:
8abeeb94 2746 res = default
8c51aa65
JMF
2747 return res
2748
c57f7757
PH
2749 def _format_note(self, fdict):
2750 res = ''
2751 if fdict.get('ext') in ['f4f', 'f4m']:
2752 res += '(unsupported) '
32f90364
PH
2753 if fdict.get('language'):
2754 if res:
2755 res += ' '
9016d76f 2756 res += '[%s] ' % fdict['language']
c57f7757
PH
2757 if fdict.get('format_note') is not None:
2758 res += fdict['format_note'] + ' '
2759 if fdict.get('tbr') is not None:
2760 res += '%4dk ' % fdict['tbr']
2761 if fdict.get('container') is not None:
2762 if res:
2763 res += ', '
2764 res += '%s container' % fdict['container']
3089bc74
S
2765 if (fdict.get('vcodec') is not None
2766 and fdict.get('vcodec') != 'none'):
c57f7757
PH
2767 if res:
2768 res += ', '
2769 res += fdict['vcodec']
91c7271a 2770 if fdict.get('vbr') is not None:
c57f7757
PH
2771 res += '@'
2772 elif fdict.get('vbr') is not None and fdict.get('abr') is not None:
2773 res += 'video@'
2774 if fdict.get('vbr') is not None:
2775 res += '%4dk' % fdict['vbr']
fbb21cf5 2776 if fdict.get('fps') is not None:
5d583bdf
S
2777 if res:
2778 res += ', '
2779 res += '%sfps' % fdict['fps']
c57f7757
PH
2780 if fdict.get('acodec') is not None:
2781 if res:
2782 res += ', '
2783 if fdict['acodec'] == 'none':
2784 res += 'video only'
2785 else:
2786 res += '%-5s' % fdict['acodec']
2787 elif fdict.get('abr') is not None:
2788 if res:
2789 res += ', '
2790 res += 'audio'
2791 if fdict.get('abr') is not None:
2792 res += '@%3dk' % fdict['abr']
2793 if fdict.get('asr') is not None:
2794 res += ' (%5dHz)' % fdict['asr']
2795 if fdict.get('filesize') is not None:
2796 if res:
2797 res += ', '
2798 res += format_bytes(fdict['filesize'])
9732d77e
PH
2799 elif fdict.get('filesize_approx') is not None:
2800 if res:
2801 res += ', '
2802 res += '~' + format_bytes(fdict['filesize_approx'])
c57f7757 2803 return res
91c7271a 2804
76d321f6 2805 def _format_note_table(self, f):
2806 def join_fields(*vargs):
2807 return ', '.join((val for val in vargs if val != ''))
2808
2809 return join_fields(
2810 'UNSUPPORTED' if f.get('ext') in ('f4f', 'f4m') else '',
2811 format_field(f, 'language', '[%s]'),
2812 format_field(f, 'format_note'),
2813 format_field(f, 'container', ignore=(None, f.get('ext'))),
2814 format_field(f, 'asr', '%5dHz'))
2815
c57f7757 2816 def list_formats(self, info_dict):
94badb25 2817 formats = info_dict.get('formats', [info_dict])
76d321f6 2818 new_format = self.params.get('listformats_table', False)
2819 if new_format:
2820 table = [
2821 [
2822 format_field(f, 'format_id'),
2823 format_field(f, 'ext'),
2824 self.format_resolution(f),
2825 format_field(f, 'fps', '%d'),
2826 '|',
2827 format_field(f, 'filesize', ' %s', func=format_bytes) + format_field(f, 'filesize_approx', '~%s', func=format_bytes),
2828 format_field(f, 'tbr', '%4dk'),
52a8a1e1 2829 shorten_protocol_name(f.get('protocol', '').replace("native", "n")),
76d321f6 2830 '|',
2831 format_field(f, 'vcodec', default='unknown').replace('none', ''),
2832 format_field(f, 'vbr', '%4dk'),
2833 format_field(f, 'acodec', default='unknown').replace('none', ''),
2834 format_field(f, 'abr', '%3dk'),
2835 format_field(f, 'asr', '%5dHz'),
2836 self._format_note_table(f)]
2837 for f in formats
2838 if f.get('preference') is None or f['preference'] >= -1000]
2839 header_line = ['ID', 'EXT', 'RESOLUTION', 'FPS', '|', ' FILESIZE', ' TBR', 'PROTO',
2840 '|', 'VCODEC', ' VBR', 'ACODEC', ' ABR', ' ASR', 'NOTE']
2841 else:
2842 table = [
2843 [
2844 format_field(f, 'format_id'),
2845 format_field(f, 'ext'),
2846 self.format_resolution(f),
2847 self._format_note(f)]
2848 for f in formats
2849 if f.get('preference') is None or f['preference'] >= -1000]
2850 header_line = ['format code', 'extension', 'resolution', 'note']
57dd9a8f 2851
cfb56d1a 2852 self.to_screen(
76d321f6 2853 '[info] Available formats for %s:\n%s' % (info_dict['id'], render_table(
2854 header_line,
2855 table,
2856 delim=new_format,
2857 extraGap=(0 if new_format else 1),
2858 hideEmpty=new_format)))
cfb56d1a
PH
2859
2860 def list_thumbnails(self, info_dict):
2861 thumbnails = info_dict.get('thumbnails')
2862 if not thumbnails:
b7b72db9 2863 self.to_screen('[info] No thumbnails present for %s' % info_dict['id'])
2864 return
cfb56d1a
PH
2865
2866 self.to_screen(
2867 '[info] Thumbnails for %s:' % info_dict['id'])
2868 self.to_screen(render_table(
2869 ['ID', 'width', 'height', 'URL'],
2870 [[t['id'], t.get('width', 'unknown'), t.get('height', 'unknown'), t['url']] for t in thumbnails]))
dca08720 2871
360e1ca5 2872 def list_subtitles(self, video_id, subtitles, name='subtitles'):
a504ced0 2873 if not subtitles:
360e1ca5 2874 self.to_screen('%s has no %s' % (video_id, name))
a504ced0 2875 return
a504ced0 2876 self.to_screen(
edab9dbf
JMF
2877 'Available %s for %s:' % (name, video_id))
2878 self.to_screen(render_table(
2879 ['Language', 'formats'],
2880 [[lang, ', '.join(f['ext'] for f in reversed(formats))]
2881 for lang, formats in subtitles.items()]))
a504ced0 2882
dca08720
PH
2883 def urlopen(self, req):
2884 """ Start an HTTP download """
82d8a8b6 2885 if isinstance(req, compat_basestring):
67dda517 2886 req = sanitized_Request(req)
19a41fc6 2887 return self._opener.open(req, timeout=self._socket_timeout)
dca08720
PH
2888
2889 def print_debug_header(self):
2890 if not self.params.get('verbose'):
2891 return
62fec3b2 2892
4192b51c 2893 if type('') is not compat_str:
067aa17e 2894 # Python 2.6 on SLES11 SP1 (https://github.com/ytdl-org/youtube-dl/issues/3326)
4192b51c
PH
2895 self.report_warning(
2896 'Your Python is broken! Update to a newer and supported version')
2897
c6afed48
PH
2898 stdout_encoding = getattr(
2899 sys.stdout, 'encoding', 'missing (%s)' % type(sys.stdout).__name__)
b0472057 2900 encoding_str = (
734f90bb
PH
2901 '[debug] Encodings: locale %s, fs %s, out %s, pref %s\n' % (
2902 locale.getpreferredencoding(),
2903 sys.getfilesystemencoding(),
c6afed48 2904 stdout_encoding,
b0472057 2905 self.get_encoding()))
4192b51c 2906 write_string(encoding_str, encoding=None)
734f90bb 2907
e5813e53 2908 source = (
2909 '(exe)' if hasattr(sys, 'frozen')
2910 else '(zip)' if isinstance(globals().get('__loader__'), zipimporter)
2911 else '(source)' if os.path.basename(sys.argv[0]) == '__main__.py'
2912 else '')
2913 self._write_string('[debug] yt-dlp version %s %s\n' % (__version__, source))
e0986e31 2914 if _LAZY_LOADER:
f74980cb 2915 self._write_string('[debug] Lazy loading extractors enabled\n')
2916 if _PLUGIN_CLASSES:
2917 self._write_string(
2918 '[debug] Plugin Extractors: %s\n' % [ie.ie_key() for ie in _PLUGIN_CLASSES])
dca08720
PH
2919 try:
2920 sp = subprocess.Popen(
2921 ['git', 'rev-parse', '--short', 'HEAD'],
2922 stdout=subprocess.PIPE, stderr=subprocess.PIPE,
2923 cwd=os.path.dirname(os.path.abspath(__file__)))
f5b1bca9 2924 out, err = process_communicate_or_kill(sp)
dca08720
PH
2925 out = out.decode().strip()
2926 if re.match('[0-9a-f]+', out):
f74980cb 2927 self._write_string('[debug] Git HEAD: %s\n' % out)
70a1165b 2928 except Exception:
dca08720
PH
2929 try:
2930 sys.exc_clear()
70a1165b 2931 except Exception:
dca08720 2932 pass
b300cda4
S
2933
2934 def python_implementation():
2935 impl_name = platform.python_implementation()
2936 if impl_name == 'PyPy' and hasattr(sys, 'pypy_version_info'):
2937 return impl_name + ' version %d.%d.%d' % sys.pypy_version_info[:3]
2938 return impl_name
2939
e5813e53 2940 self._write_string('[debug] Python version %s (%s %s) - %s\n' % (
2941 platform.python_version(),
2942 python_implementation(),
2943 platform.architecture()[0],
b300cda4 2944 platform_name()))
d28b5171 2945
73fac4e9 2946 exe_versions = FFmpegPostProcessor.get_versions(self)
4c83c967 2947 exe_versions['rtmpdump'] = rtmpdump_version()
feee8d32 2948 exe_versions['phantomjs'] = PhantomJSwrapper._version()
d28b5171
PH
2949 exe_str = ', '.join(
2950 '%s %s' % (exe, v)
2951 for exe, v in sorted(exe_versions.items())
2952 if v
2953 )
2954 if not exe_str:
2955 exe_str = 'none'
2956 self._write_string('[debug] exe versions: %s\n' % exe_str)
dca08720
PH
2957
2958 proxy_map = {}
2959 for handler in self._opener.handlers:
2960 if hasattr(handler, 'proxies'):
2961 proxy_map.update(handler.proxies)
734f90bb 2962 self._write_string('[debug] Proxy map: ' + compat_str(proxy_map) + '\n')
dca08720 2963
58b1f00d
PH
2964 if self.params.get('call_home', False):
2965 ipaddr = self.urlopen('https://yt-dl.org/ip').read().decode('utf-8')
2966 self._write_string('[debug] Public IP address: %s\n' % ipaddr)
f5546c0b 2967 return
58b1f00d
PH
2968 latest_version = self.urlopen(
2969 'https://yt-dl.org/latest/version').read().decode('utf-8')
2970 if version_tuple(latest_version) > version_tuple(__version__):
2971 self.report_warning(
2972 'You are using an outdated version (newest version: %s)! '
2973 'See https://yt-dl.org/update if you need help updating.' %
2974 latest_version)
2975
e344693b 2976 def _setup_opener(self):
6ad14cab 2977 timeout_val = self.params.get('socket_timeout')
19a41fc6 2978 self._socket_timeout = 600 if timeout_val is None else float(timeout_val)
6ad14cab 2979
dca08720
PH
2980 opts_cookiefile = self.params.get('cookiefile')
2981 opts_proxy = self.params.get('proxy')
2982
2983 if opts_cookiefile is None:
2984 self.cookiejar = compat_cookiejar.CookieJar()
2985 else:
590bc6f6 2986 opts_cookiefile = expand_path(opts_cookiefile)
1bab3437 2987 self.cookiejar = YoutubeDLCookieJar(opts_cookiefile)
dca08720 2988 if os.access(opts_cookiefile, os.R_OK):
1d88b3e6 2989 self.cookiejar.load(ignore_discard=True, ignore_expires=True)
dca08720 2990
6a3f4c3f 2991 cookie_processor = YoutubeDLCookieProcessor(self.cookiejar)
dca08720
PH
2992 if opts_proxy is not None:
2993 if opts_proxy == '':
2994 proxies = {}
2995 else:
2996 proxies = {'http': opts_proxy, 'https': opts_proxy}
2997 else:
2998 proxies = compat_urllib_request.getproxies()
067aa17e 2999 # Set HTTPS proxy to HTTP one if given (https://github.com/ytdl-org/youtube-dl/issues/805)
dca08720
PH
3000 if 'http' in proxies and 'https' not in proxies:
3001 proxies['https'] = proxies['http']
91410c9b 3002 proxy_handler = PerRequestProxyHandler(proxies)
a0ddb8a2
PH
3003
3004 debuglevel = 1 if self.params.get('debug_printtraffic') else 0
be4a824d
PH
3005 https_handler = make_HTTPS_handler(self.params, debuglevel=debuglevel)
3006 ydlh = YoutubeDLHandler(self.params, debuglevel=debuglevel)
fca6dba8 3007 redirect_handler = YoutubeDLRedirectHandler()
8b172c2e 3008 data_handler = compat_urllib_request_DataHandler()
6240b0a2
JMF
3009
3010 # When passing our own FileHandler instance, build_opener won't add the
3011 # default FileHandler and allows us to disable the file protocol, which
3012 # can be used for malicious purposes (see
067aa17e 3013 # https://github.com/ytdl-org/youtube-dl/issues/8227)
6240b0a2
JMF
3014 file_handler = compat_urllib_request.FileHandler()
3015
3016 def file_open(*args, **kwargs):
7a5c1cfe 3017 raise compat_urllib_error.URLError('file:// scheme is explicitly disabled in yt-dlp for security reasons')
6240b0a2
JMF
3018 file_handler.file_open = file_open
3019
3020 opener = compat_urllib_request.build_opener(
fca6dba8 3021 proxy_handler, https_handler, cookie_processor, ydlh, redirect_handler, data_handler, file_handler)
2461f79d 3022
dca08720
PH
3023 # Delete the default user-agent header, which would otherwise apply in
3024 # cases where our custom HTTP handler doesn't come into play
067aa17e 3025 # (See https://github.com/ytdl-org/youtube-dl/issues/1309 for details)
dca08720
PH
3026 opener.addheaders = []
3027 self._opener = opener
62fec3b2
PH
3028
3029 def encode(self, s):
3030 if isinstance(s, bytes):
3031 return s # Already encoded
3032
3033 try:
3034 return s.encode(self.get_encoding())
3035 except UnicodeEncodeError as err:
3036 err.reason = err.reason + '. Check your system encoding configuration or use the --encoding option.'
3037 raise
3038
3039 def get_encoding(self):
3040 encoding = self.params.get('encoding')
3041 if encoding is None:
3042 encoding = preferredencoding()
3043 return encoding
ec82d85a 3044
de6000d9 3045 def _write_thumbnails(self, info_dict, filename): # return the extensions
6c4fd172 3046 write_all = self.params.get('write_all_thumbnails', False)
3047 thumbnails = []
3048 if write_all or self.params.get('writethumbnail', False):
0202b52a 3049 thumbnails = info_dict.get('thumbnails') or []
6c4fd172 3050 multiple = write_all and len(thumbnails) > 1
ec82d85a 3051
0202b52a 3052 ret = []
6c4fd172 3053 for t in thumbnails[::1 if write_all else -1]:
ec82d85a 3054 thumb_ext = determine_ext(t['url'], 'jpg')
6c4fd172 3055 suffix = '%s.' % t['id'] if multiple else ''
3056 thumb_display_id = '%s ' % t['id'] if multiple else ''
dcf64d43 3057 t['filepath'] = thumb_filename = replace_extension(filename, suffix + thumb_ext, info_dict.get('ext'))
ec82d85a 3058
0c3d0f51 3059 if not self.params.get('overwrites', True) and os.path.exists(encodeFilename(thumb_filename)):
de6000d9 3060 ret.append(suffix + thumb_ext)
ec82d85a
PH
3061 self.to_screen('[%s] %s: Thumbnail %sis already present' %
3062 (info_dict['extractor'], info_dict['id'], thumb_display_id))
3063 else:
5ef7d9bd 3064 self.to_screen('[%s] %s: Downloading thumbnail %s ...' %
ec82d85a
PH
3065 (info_dict['extractor'], info_dict['id'], thumb_display_id))
3066 try:
3067 uf = self.urlopen(t['url'])
d3d89c32 3068 with open(encodeFilename(thumb_filename), 'wb') as thumbf:
ec82d85a 3069 shutil.copyfileobj(uf, thumbf)
de6000d9 3070 ret.append(suffix + thumb_ext)
ec82d85a
PH
3071 self.to_screen('[%s] %s: Writing thumbnail %sto: %s' %
3072 (info_dict['extractor'], info_dict['id'], thumb_display_id, thumb_filename))
3073 except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
3074 self.report_warning('Unable to download thumbnail "%s": %s' %
9b9c5355 3075 (t['url'], error_to_compat_str(err)))
6c4fd172 3076 if ret and not write_all:
3077 break
0202b52a 3078 return ret