4 from __future__
import absolute_import
, unicode_literals
29 from string
import ascii_letters
30 from zipimport
import zipimporter
34 compat_get_terminal_size
,
40 compat_tokenize_tokenize
,
42 compat_urllib_request
,
43 compat_urllib_request_DataHandler
,
45 from .cookies
import load_cookies
55 DOT_DESKTOP_LINK_TEMPLATE
,
56 DOT_URL_LINK_TEMPLATE
,
57 DOT_WEBLOC_LINK_TEMPLATE
,
87 PerRequestProxyHandler
,
92 process_communicate_or_kill
,
93 register_socks_protocols
,
110 UnavailableVideoError
,
116 YoutubeDLCookieProcessor
,
118 YoutubeDLRedirectHandler
,
120 from .cache
import Cache
121 from .extractor
import (
122 gen_extractor_classes
,
127 from .extractor
.openload
import PhantomJSwrapper
128 from .downloader
import (
130 get_suitable_downloader
,
131 shorten_protocol_name
133 from .downloader
.rtmp
import rtmpdump_version
134 from .postprocessor
import (
136 FFmpegFixupDurationPP
,
139 FFmpegFixupStretchedPP
,
140 FFmpegFixupTimestampPP
,
143 MoveFilesAfterDownloadPP
,
145 from .version
import __version__
147 if compat_os_name
== 'nt':
151 class YoutubeDL(object):
154 YoutubeDL objects are the ones responsible of downloading the
155 actual video file and writing it to disk if the user has requested
156 it, among some other tasks. In most cases there should be one per
157 program. As, given a video URL, the downloader doesn't know how to
158 extract all the needed information, task that InfoExtractors do, it
159 has to pass the URL to one of them.
161 For this, YoutubeDL objects have a method that allows
162 InfoExtractors to be registered in a given order. When it is passed
163 a URL, the YoutubeDL object handles it to the first InfoExtractor it
164 finds that reports being able to handle it. The InfoExtractor extracts
165 all the information about the video or videos the URL refers to, and
166 YoutubeDL process the extracted information, possibly using a File
167 Downloader to download the video.
169 YoutubeDL objects accept a lot of parameters. In order not to saturate
170 the object constructor with arguments, it receives a dictionary of
171 options instead. These options are available through the params
172 attribute for the InfoExtractors to use. The YoutubeDL also
173 registers itself as the downloader in charge for the InfoExtractors
174 that are added to it, so this is a "mutual registration".
178 username: Username for authentication purposes.
179 password: Password for authentication purposes.
180 videopassword: Password for accessing a video.
181 ap_mso: Adobe Pass multiple-system operator identifier.
182 ap_username: Multiple-system operator account username.
183 ap_password: Multiple-system operator account password.
184 usenetrc: Use netrc for authentication instead.
185 verbose: Print additional info to stdout.
186 quiet: Do not print messages to stdout.
187 no_warnings: Do not print out anything for warnings.
188 forceprint: A list of templates to force print
189 forceurl: Force printing final URL. (Deprecated)
190 forcetitle: Force printing title. (Deprecated)
191 forceid: Force printing ID. (Deprecated)
192 forcethumbnail: Force printing thumbnail URL. (Deprecated)
193 forcedescription: Force printing description. (Deprecated)
194 forcefilename: Force printing final filename. (Deprecated)
195 forceduration: Force printing duration. (Deprecated)
196 forcejson: Force printing info_dict as JSON.
197 dump_single_json: Force printing the info_dict of the whole playlist
198 (or video) as a single JSON line.
199 force_write_download_archive: Force writing download archive regardless
200 of 'skip_download' or 'simulate'.
201 simulate: Do not download the video files. If unset (or None),
202 simulate only if listsubtitles, listformats or list_thumbnails is used
203 format: Video format code. see "FORMAT SELECTION" for more details.
204 allow_unplayable_formats: Allow unplayable formats to be extracted and downloaded.
205 ignore_no_formats_error: Ignore "No video formats" error. Usefull for
206 extracting metadata even if the video is not actually
207 available for download (experimental)
208 format_sort: How to sort the video formats. see "Sorting Formats"
210 format_sort_force: Force the given format_sort. see "Sorting Formats"
212 allow_multiple_video_streams: Allow multiple video streams to be merged
214 allow_multiple_audio_streams: Allow multiple audio streams to be merged
216 check_formats Whether to test if the formats are downloadable.
217 Can be True (check all), False (check none)
218 or None (check only if requested by extractor)
219 paths: Dictionary of output paths. The allowed keys are 'home'
220 'temp' and the keys of OUTTMPL_TYPES (in utils.py)
221 outtmpl: Dictionary of templates for output names. Allowed keys
222 are 'default' and the keys of OUTTMPL_TYPES (in utils.py).
223 For compatibility with youtube-dl, a single string can also be used
224 outtmpl_na_placeholder: Placeholder for unavailable meta fields.
225 restrictfilenames: Do not allow "&" and spaces in file names
226 trim_file_name: Limit length of filename (extension excluded)
227 windowsfilenames: Force the filenames to be windows compatible
228 ignoreerrors: Do not stop on download errors
229 (Default True when running yt-dlp,
230 but False when directly accessing YoutubeDL class)
231 skip_playlist_after_errors: Number of allowed failures until the rest of
232 the playlist is skipped
233 force_generic_extractor: Force downloader to use the generic extractor
234 overwrites: Overwrite all video and metadata files if True,
235 overwrite only non-video files if None
236 and don't overwrite any file if False
237 For compatibility with youtube-dl,
238 "nooverwrites" may also be used instead
239 playliststart: Playlist item to start at.
240 playlistend: Playlist item to end at.
241 playlist_items: Specific indices of playlist to download.
242 playlistreverse: Download playlist items in reverse order.
243 playlistrandom: Download playlist items in random order.
244 matchtitle: Download only matching titles.
245 rejecttitle: Reject downloads for matching titles.
246 logger: Log messages to a logging.Logger instance.
247 logtostderr: Log messages to stderr instead of stdout.
248 writedescription: Write the video description to a .description file
249 writeinfojson: Write the video description to a .info.json file
250 clean_infojson: Remove private fields from the infojson
251 getcomments: Extract video comments. This will not be written to disk
252 unless writeinfojson is also given
253 writeannotations: Write the video annotations to a .annotations.xml file
254 writethumbnail: Write the thumbnail image to a file
255 allow_playlist_files: Whether to write playlists' description, infojson etc
256 also to disk when using the 'write*' options
257 write_all_thumbnails: Write all thumbnail formats to files
258 writelink: Write an internet shortcut file, depending on the
259 current platform (.url/.webloc/.desktop)
260 writeurllink: Write a Windows internet shortcut file (.url)
261 writewebloclink: Write a macOS internet shortcut file (.webloc)
262 writedesktoplink: Write a Linux internet shortcut file (.desktop)
263 writesubtitles: Write the video subtitles to a file
264 writeautomaticsub: Write the automatically generated subtitles to a file
265 allsubtitles: Deprecated - Use subtitleslangs = ['all']
266 Downloads all the subtitles of the video
267 (requires writesubtitles or writeautomaticsub)
268 listsubtitles: Lists all available subtitles for the video
269 subtitlesformat: The format code for subtitles
270 subtitleslangs: List of languages of the subtitles to download (can be regex).
271 The list may contain "all" to refer to all the available
272 subtitles. The language can be prefixed with a "-" to
273 exclude it from the requested languages. Eg: ['all', '-live_chat']
274 keepvideo: Keep the video file after post-processing
275 daterange: A DateRange object, download only if the upload_date is in the range.
276 skip_download: Skip the actual download of the video file
277 cachedir: Location of the cache files in the filesystem.
278 False to disable filesystem cache.
279 noplaylist: Download single video instead of a playlist if in doubt.
280 age_limit: An integer representing the user's age in years.
281 Unsuitable videos for the given age are skipped.
282 min_views: An integer representing the minimum view count the video
283 must have in order to not be skipped.
284 Videos without view count information are always
285 downloaded. None for no limit.
286 max_views: An integer representing the maximum view count.
287 Videos that are more popular than that are not
289 Videos without view count information are always
290 downloaded. None for no limit.
291 download_archive: File name of a file where all downloads are recorded.
292 Videos already present in the file are not downloaded
294 break_on_existing: Stop the download process after attempting to download a
295 file that is in the archive.
296 break_on_reject: Stop the download process when encountering a video that
297 has been filtered out.
298 cookiefile: File name where cookies should be read from and dumped to
299 cookiesfrombrowser: A tuple containing the name of the browser and the profile
300 name/path from where cookies are loaded.
301 Eg: ('chrome', ) or (vivaldi, 'default')
302 nocheckcertificate:Do not verify SSL certificates
303 prefer_insecure: Use HTTP instead of HTTPS to retrieve information.
304 At the moment, this is only supported by YouTube.
305 proxy: URL of the proxy server to use
306 geo_verification_proxy: URL of the proxy to use for IP address verification
307 on geo-restricted sites.
308 socket_timeout: Time to wait for unresponsive hosts, in seconds
309 bidi_workaround: Work around buggy terminals without bidirectional text
310 support, using fridibi
311 debug_printtraffic:Print out sent and received HTTP traffic
312 include_ads: Download ads as well
313 default_search: Prepend this string if an input url is not valid.
314 'auto' for elaborate guessing
315 encoding: Use this encoding instead of the system-specified.
316 extract_flat: Do not resolve URLs, return the immediate result.
317 Pass in 'in_playlist' to only show this behavior for
319 postprocessors: A list of dictionaries, each with an entry
320 * key: The name of the postprocessor. See
321 yt_dlp/postprocessor/__init__.py for a list.
322 * when: When to run the postprocessor. Can be one of
323 pre_process|before_dl|post_process|after_move.
324 Assumed to be 'post_process' if not given
325 post_hooks: A list of functions that get called as the final step
326 for each video file, after all postprocessors have been
327 called. The filename will be passed as the only argument.
328 progress_hooks: A list of functions that get called on download
329 progress, with a dictionary with the entries
330 * status: One of "downloading", "error", or "finished".
331 Check this first and ignore unknown values.
332 * info_dict: The extracted info_dict
334 If status is one of "downloading", or "finished", the
335 following properties may also be present:
336 * filename: The final filename (always present)
337 * tmpfilename: The filename we're currently writing to
338 * downloaded_bytes: Bytes on disk
339 * total_bytes: Size of the whole file, None if unknown
340 * total_bytes_estimate: Guess of the eventual file size,
342 * elapsed: The number of seconds since download started.
343 * eta: The estimated time in seconds, None if unknown
344 * speed: The download speed in bytes/second, None if
346 * fragment_index: The counter of the currently
347 downloaded video fragment.
348 * fragment_count: The number of fragments (= individual
349 files that will be merged)
351 Progress hooks are guaranteed to be called at least once
352 (with status "finished") if the download is successful.
353 merge_output_format: Extension to use when merging formats.
354 final_ext: Expected final extension; used to detect when the file was
355 already downloaded and converted. "merge_output_format" is
356 replaced by this extension when given
357 fixup: Automatically correct known faults of the file.
359 - "never": do nothing
360 - "warn": only emit a warning
361 - "detect_or_warn": check whether we can do anything
362 about it, warn otherwise (default)
363 source_address: Client-side IP address to bind to.
364 call_home: Boolean, true iff we are allowed to contact the
365 yt-dlp servers for debugging. (BROKEN)
366 sleep_interval_requests: Number of seconds to sleep between requests
368 sleep_interval: Number of seconds to sleep before each download when
369 used alone or a lower bound of a range for randomized
370 sleep before each download (minimum possible number
371 of seconds to sleep) when used along with
373 max_sleep_interval:Upper bound of a range for randomized sleep before each
374 download (maximum possible number of seconds to sleep).
375 Must only be used along with sleep_interval.
376 Actual sleep time will be a random float from range
377 [sleep_interval; max_sleep_interval].
378 sleep_interval_subtitles: Number of seconds to sleep before each subtitle download
379 listformats: Print an overview of available video formats and exit.
380 list_thumbnails: Print a table of all thumbnails and exit.
381 match_filter: A function that gets called with the info_dict of
383 If it returns a message, the video is ignored.
384 If it returns None, the video is downloaded.
385 match_filter_func in utils.py is one example for this.
386 no_color: Do not emit color codes in output.
387 geo_bypass: Bypass geographic restriction via faking X-Forwarded-For
390 Two-letter ISO 3166-2 country code that will be used for
391 explicit geographic restriction bypassing via faking
392 X-Forwarded-For HTTP header
394 IP range in CIDR notation that will be used similarly to
397 The following options determine which downloader is picked:
398 external_downloader: A dictionary of protocol keys and the executable of the
399 external downloader to use for it. The allowed protocols
400 are default|http|ftp|m3u8|dash|rtsp|rtmp|mms.
401 Set the value to 'native' to use the native downloader
402 hls_prefer_native: Deprecated - Use external_downloader = {'m3u8': 'native'}
403 or {'m3u8': 'ffmpeg'} instead.
404 Use the native HLS downloader instead of ffmpeg/avconv
405 if True, otherwise use ffmpeg/avconv if False, otherwise
406 use downloader suggested by extractor if None.
407 compat_opts: Compatibility options. See "Differences in default behavior".
408 The following options do not work when used through the API:
409 filename, abort-on-error, multistreams, no-live-chat,
410 no-clean-infojson, no-playlist-metafiles, no-keep-subs.
411 Refer __init__.py for their implementation
413 The following parameters are not used by YoutubeDL itself, they are used by
414 the downloader (see yt_dlp/downloader/common.py):
415 nopart, updatetime, buffersize, ratelimit, throttledratelimit, min_filesize,
416 max_filesize, test, noresizebuffer, retries, continuedl, noprogress, consoletitle,
417 xattr_set_filesize, external_downloader_args, hls_use_mpegts, http_chunk_size.
419 The following options are used by the post processors:
420 prefer_ffmpeg: If False, use avconv instead of ffmpeg if both are available,
421 otherwise prefer ffmpeg. (avconv support is deprecated)
422 ffmpeg_location: Location of the ffmpeg/avconv binary; either the path
423 to the binary or its containing directory.
424 postprocessor_args: A dictionary of postprocessor/executable keys (in lower case)
425 and a list of additional command-line arguments for the
426 postprocessor/executable. The dict can also have "PP+EXE" keys
427 which are used when the given exe is used by the given PP.
428 Use 'default' as the name for arguments to passed to all PP
429 For compatibility with youtube-dl, a single list of args
432 The following options are used by the extractors:
433 extractor_retries: Number of times to retry for known errors
434 dynamic_mpd: Whether to process dynamic DASH manifests (default: True)
435 hls_split_discontinuity: Split HLS playlists to different formats at
436 discontinuities such as ad breaks (default: False)
437 extractor_args: A dictionary of arguments to be passed to the extractors.
438 See "EXTRACTOR ARGUMENTS" for details.
439 Eg: {'youtube': {'skip': ['dash', 'hls']}}
440 youtube_include_dash_manifest: Deprecated - Use extractor_args instead.
441 If True (default), DASH manifests and related
442 data will be downloaded and processed by extractor.
443 You can reduce network I/O by disabling it if you don't
444 care about DASH. (only for youtube)
445 youtube_include_hls_manifest: Deprecated - Use extractor_args instead.
446 If True (default), HLS manifests and related
447 data will be downloaded and processed by extractor.
448 You can reduce network I/O by disabling it if you don't
449 care about HLS. (only for youtube)
452 _NUMERIC_FIELDS
= set((
453 'width', 'height', 'tbr', 'abr', 'asr', 'vbr', 'fps', 'filesize', 'filesize_approx',
454 'timestamp', 'upload_year', 'upload_month', 'upload_day',
455 'duration', 'view_count', 'like_count', 'dislike_count', 'repost_count',
456 'average_rating', 'comment_count', 'age_limit',
457 'start_time', 'end_time',
458 'chapter_number', 'season_number', 'episode_number',
459 'track_number', 'disc_number', 'release_year',
465 _pps
= {'pre_process': [], 'before_dl': [], 'after_move': [], 'post_process': []}
466 _printed_messages
= set()
467 _first_webpage_request
= True
468 _download_retcode
= None
469 _num_downloads
= None
471 _playlist_urls
= set()
474 def __init__(self
, params
=None, auto_init
=True):
475 """Create a FileDownloader object with the given options."""
479 self
._ies
_instances
= {}
480 self
._pps
= {'pre_process': [], 'before_dl': [], 'after_move': [], 'post_process': []}
481 self
._printed
_messages
= set()
482 self
._first
_webpage
_request
= True
483 self
._post
_hooks
= []
484 self
._progress
_hooks
= []
485 self
._download
_retcode
= 0
486 self
._num
_downloads
= 0
487 self
._screen
_file
= [sys
.stdout
, sys
.stderr
][params
.get('logtostderr', False)]
488 self
._err
_file
= sys
.stderr
491 'nocheckcertificate': False,
493 self
.params
.update(params
)
494 self
.cache
= Cache(self
)
496 if sys
.version_info
< (3, 6):
498 'Python version %d.%d is not supported! Please update to Python 3.6 or above' % sys
.version_info
[:2])
500 if self
.params
.get('allow_unplayable_formats'):
502 'You have asked for unplayable formats to be listed/downloaded. '
503 'This is a developer option intended for debugging. '
504 'If you experience any issues while using this option, DO NOT open a bug report')
506 def check_deprecated(param
, option
, suggestion
):
507 if self
.params
.get(param
) is not None:
508 self
.report_warning('%s is deprecated. Use %s instead' % (option
, suggestion
))
512 if check_deprecated('cn_verification_proxy', '--cn-verification-proxy', '--geo-verification-proxy'):
513 if self
.params
.get('geo_verification_proxy') is None:
514 self
.params
['geo_verification_proxy'] = self
.params
['cn_verification_proxy']
516 check_deprecated('autonumber', '--auto-number', '-o "%(autonumber)s-%(title)s.%(ext)s"')
517 check_deprecated('usetitle', '--title', '-o "%(title)s-%(id)s.%(ext)s"')
518 check_deprecated('useid', '--id', '-o "%(id)s.%(ext)s"')
520 for msg
in self
.params
.get('warnings', []):
521 self
.report_warning(msg
)
523 if self
.params
.get('overwrites') is None:
524 self
.params
.pop('overwrites', None)
525 elif self
.params
.get('nooverwrites') is not None:
526 # nooverwrites was unnecessarily changed to overwrites
527 # in 0c3d0f51778b153f65c21906031c2e091fcfb641
528 # This ensures compatibility with both keys
529 self
.params
['overwrites'] = not self
.params
['nooverwrites']
531 self
.params
['nooverwrites'] = not self
.params
['overwrites']
533 if params
.get('bidi_workaround', False):
536 master
, slave
= pty
.openpty()
537 width
= compat_get_terminal_size().columns
541 width_args
= ['-w', str(width
)]
543 stdin
=subprocess
.PIPE
,
545 stderr
=self
._err
_file
)
547 self
._output
_process
= subprocess
.Popen(
548 ['bidiv'] + width_args
, **sp_kwargs
551 self
._output
_process
= subprocess
.Popen(
552 ['fribidi', '-c', 'UTF-8'] + width_args
, **sp_kwargs
)
553 self
._output
_channel
= os
.fdopen(master
, 'rb')
554 except OSError as ose
:
555 if ose
.errno
== errno
.ENOENT
:
556 self
.report_warning('Could not find fribidi executable, ignoring --bidi-workaround . Make sure that fribidi is an executable file in one of the directories in your $PATH.')
560 if (sys
.platform
!= 'win32'
561 and sys
.getfilesystemencoding() in ['ascii', 'ANSI_X3.4-1968']
562 and not params
.get('restrictfilenames', False)):
563 # Unicode filesystem API will throw errors (#1474, #13027)
565 'Assuming --restrict-filenames since file system encoding '
566 'cannot encode all characters. '
567 'Set the LC_ALL environment variable to fix this.')
568 self
.params
['restrictfilenames'] = True
570 self
.outtmpl_dict
= self
.parse_outtmpl()
572 # Creating format selector here allows us to catch syntax errors before the extraction
573 self
.format_selector
= (
574 None if self
.params
.get('format') is None
575 else self
.build_format_selector(self
.params
['format']))
579 """Preload the archive, if any is specified"""
580 def preload_download_archive(fn
):
583 self
.write_debug('Loading archive file %r\n' % fn
)
585 with locked_file(fn
, 'r', encoding
='utf-8') as archive_file
:
586 for line
in archive_file
:
587 self
.archive
.add(line
.strip())
588 except IOError as ioe
:
589 if ioe
.errno
!= errno
.ENOENT
:
595 preload_download_archive(self
.params
.get('download_archive'))
598 self
.print_debug_header()
599 self
.add_default_info_extractors()
601 for pp_def_raw
in self
.params
.get('postprocessors', []):
602 pp_def
= dict(pp_def_raw
)
603 when
= pp_def
.pop('when', 'post_process')
604 pp_class
= get_postprocessor(pp_def
.pop('key'))
605 pp
= pp_class(self
, **compat_kwargs(pp_def
))
606 self
.add_post_processor(pp
, when
=when
)
608 for ph
in self
.params
.get('post_hooks', []):
609 self
.add_post_hook(ph
)
611 for ph
in self
.params
.get('progress_hooks', []):
612 self
.add_progress_hook(ph
)
614 register_socks_protocols()
616 def warn_if_short_id(self
, argv
):
617 # short YouTube ID starting with dash?
619 i
for i
, a
in enumerate(argv
)
620 if re
.match(r
'^-[0-9A-Za-z_-]{10}$', a
)]
624 + [a
for i
, a
in enumerate(argv
) if i
not in idxs
]
625 + ['--'] + [argv
[i
] for i
in idxs
]
628 'Long argument string detected. '
629 'Use -- to separate parameters and URLs, like this:\n%s\n' %
630 args_to_str(correct_argv
))
632 def add_info_extractor(self
, ie
):
633 """Add an InfoExtractor object to the end of the list."""
635 self
._ies
[ie_key
] = ie
636 if not isinstance(ie
, type):
637 self
._ies
_instances
[ie_key
] = ie
638 ie
.set_downloader(self
)
640 def _get_info_extractor_class(self
, ie_key
):
641 ie
= self
._ies
.get(ie_key
)
643 ie
= get_info_extractor(ie_key
)
644 self
.add_info_extractor(ie
)
647 def get_info_extractor(self
, ie_key
):
649 Get an instance of an IE with name ie_key, it will try to get one from
650 the _ies list, if there's no instance it will create a new one and add
651 it to the extractor list.
653 ie
= self
._ies
_instances
.get(ie_key
)
655 ie
= get_info_extractor(ie_key
)()
656 self
.add_info_extractor(ie
)
659 def add_default_info_extractors(self
):
661 Add the InfoExtractors returned by gen_extractors to the end of the list
663 for ie
in gen_extractor_classes():
664 self
.add_info_extractor(ie
)
666 def add_post_processor(self
, pp
, when
='post_process'):
667 """Add a PostProcessor object to the end of the chain."""
668 self
._pps
[when
].append(pp
)
669 pp
.set_downloader(self
)
671 def add_post_hook(self
, ph
):
672 """Add the post hook"""
673 self
._post
_hooks
.append(ph
)
675 def add_progress_hook(self
, ph
):
676 """Add the progress hook (currently only for the file downloader)"""
677 self
._progress
_hooks
.append(ph
)
679 def _bidi_workaround(self
, message
):
680 if not hasattr(self
, '_output_channel'):
683 assert hasattr(self
, '_output_process')
684 assert isinstance(message
, compat_str
)
685 line_count
= message
.count('\n') + 1
686 self
._output
_process
.stdin
.write((message
+ '\n').encode('utf-8'))
687 self
._output
_process
.stdin
.flush()
688 res
= ''.join(self
._output
_channel
.readline().decode('utf-8')
689 for _
in range(line_count
))
690 return res
[:-len('\n')]
692 def _write_string(self
, message
, out
=None, only_once
=False):
694 if message
in self
._printed
_messages
:
696 self
._printed
_messages
.add(message
)
697 write_string(message
, out
=out
, encoding
=self
.params
.get('encoding'))
699 def to_stdout(self
, message
, skip_eol
=False, quiet
=False):
700 """Print message to stdout"""
701 if self
.params
.get('logger'):
702 self
.params
['logger'].debug(message
)
703 elif not quiet
or self
.params
.get('verbose'):
705 '%s%s' % (self
._bidi
_workaround
(message
), ('' if skip_eol
else '\n')),
706 self
._err
_file
if quiet
else self
._screen
_file
)
708 def to_stderr(self
, message
, only_once
=False):
709 """Print message to stderr"""
710 assert isinstance(message
, compat_str
)
711 if self
.params
.get('logger'):
712 self
.params
['logger'].error(message
)
714 self
._write
_string
('%s\n' % self
._bidi
_workaround
(message
), self
._err
_file
, only_once
=only_once
)
716 def to_console_title(self
, message
):
717 if not self
.params
.get('consoletitle', False):
719 if compat_os_name
== 'nt':
720 if ctypes
.windll
.kernel32
.GetConsoleWindow():
721 # c_wchar_p() might not be necessary if `message` is
722 # already of type unicode()
723 ctypes
.windll
.kernel32
.SetConsoleTitleW(ctypes
.c_wchar_p(message
))
724 elif 'TERM' in os
.environ
:
725 self
._write
_string
('\033]0;%s\007' % message
, self
._screen
_file
)
727 def save_console_title(self
):
728 if not self
.params
.get('consoletitle', False):
730 if self
.params
.get('simulate'):
732 if compat_os_name
!= 'nt' and 'TERM' in os
.environ
:
733 # Save the title on stack
734 self
._write
_string
('\033[22;0t', self
._screen
_file
)
736 def restore_console_title(self
):
737 if not self
.params
.get('consoletitle', False):
739 if self
.params
.get('simulate'):
741 if compat_os_name
!= 'nt' and 'TERM' in os
.environ
:
742 # Restore the title from stack
743 self
._write
_string
('\033[23;0t', self
._screen
_file
)
746 self
.save_console_title()
749 def __exit__(self
, *args
):
750 self
.restore_console_title()
752 if self
.params
.get('cookiefile') is not None:
753 self
.cookiejar
.save(ignore_discard
=True, ignore_expires
=True)
755 def trouble(self
, message
=None, tb
=None):
756 """Determine action to take when a download problem appears.
758 Depending on if the downloader has been configured to ignore
759 download errors or not, this method may throw an exception or
760 not when errors are found, after printing the message.
762 tb, if given, is additional traceback information.
764 if message
is not None:
765 self
.to_stderr(message
)
766 if self
.params
.get('verbose'):
768 if sys
.exc_info()[0]: # if .trouble has been called from an except block
770 if hasattr(sys
.exc_info()[1], 'exc_info') and sys
.exc_info()[1].exc_info
[0]:
771 tb
+= ''.join(traceback
.format_exception(*sys
.exc_info()[1].exc_info
))
772 tb
+= encode_compat_str(traceback
.format_exc())
774 tb_data
= traceback
.format_list(traceback
.extract_stack())
775 tb
= ''.join(tb_data
)
778 if not self
.params
.get('ignoreerrors', False):
779 if sys
.exc_info()[0] and hasattr(sys
.exc_info()[1], 'exc_info') and sys
.exc_info()[1].exc_info
[0]:
780 exc_info
= sys
.exc_info()[1].exc_info
782 exc_info
= sys
.exc_info()
783 raise DownloadError(message
, exc_info
)
784 self
._download
_retcode
= 1
786 def to_screen(self
, message
, skip_eol
=False):
787 """Print message to stdout if not in quiet mode"""
789 message
, skip_eol
, quiet
=self
.params
.get('quiet', False))
791 def report_warning(self
, message
, only_once
=False):
793 Print the message to stderr, it will be prefixed with 'WARNING:'
794 If stderr is a tty file the 'WARNING:' will be colored
796 if self
.params
.get('logger') is not None:
797 self
.params
['logger'].warning(message
)
799 if self
.params
.get('no_warnings'):
801 if not self
.params
.get('no_color') and self
._err
_file
.isatty() and compat_os_name
!= 'nt':
802 _msg_header
= '\033[0;33mWARNING:\033[0m'
804 _msg_header
= 'WARNING:'
805 warning_message
= '%s %s' % (_msg_header
, message
)
806 self
.to_stderr(warning_message
, only_once
)
808 def report_error(self
, message
, tb
=None):
810 Do the same as trouble, but prefixes the message with 'ERROR:', colored
811 in red if stderr is a tty file.
813 if not self
.params
.get('no_color') and self
._err
_file
.isatty() and compat_os_name
!= 'nt':
814 _msg_header
= '\033[0;31mERROR:\033[0m'
816 _msg_header
= 'ERROR:'
817 error_message
= '%s %s' % (_msg_header
, message
)
818 self
.trouble(error_message
, tb
)
820 def write_debug(self
, message
, only_once
=False):
821 '''Log debug message or Print message to stderr'''
822 if not self
.params
.get('verbose', False):
824 message
= '[debug] %s' % message
825 if self
.params
.get('logger'):
826 self
.params
['logger'].debug(message
)
828 self
.to_stderr(message
, only_once
)
830 def report_file_already_downloaded(self
, file_name
):
831 """Report file has already been fully downloaded."""
833 self
.to_screen('[download] %s has already been downloaded' % file_name
)
834 except UnicodeEncodeError:
835 self
.to_screen('[download] The file has already been downloaded')
837 def report_file_delete(self
, file_name
):
838 """Report that existing file will be deleted."""
840 self
.to_screen('Deleting existing file %s' % file_name
)
841 except UnicodeEncodeError:
842 self
.to_screen('Deleting existing file')
844 def raise_no_formats(self
, info
, forced
=False):
845 has_drm
= info
.get('__has_drm')
846 msg
= 'This video is DRM protected' if has_drm
else 'No video formats found!'
847 expected
= self
.params
.get('ignore_no_formats_error')
848 if forced
or not expected
:
849 raise ExtractorError(msg
, video_id
=info
['id'], ie
=info
['extractor'],
850 expected
=has_drm
or expected
)
852 self
.report_warning(msg
)
854 def parse_outtmpl(self
):
855 outtmpl_dict
= self
.params
.get('outtmpl', {})
856 if not isinstance(outtmpl_dict
, dict):
857 outtmpl_dict
= {'default': outtmpl_dict}
858 outtmpl_dict
.update({
859 k
: v
for k
, v
in DEFAULT_OUTTMPL
.items()
860 if not outtmpl_dict
.get(k
)})
861 for key
, val
in outtmpl_dict
.items():
862 if isinstance(val
, bytes):
864 'Parameter outtmpl is bytes, but should be a unicode string. '
865 'Put from __future__ import unicode_literals at the top of your code file or consider switching to Python 3.x.')
868 def get_output_path(self
, dir_type
='', filename
=None):
869 paths
= self
.params
.get('paths', {})
870 assert isinstance(paths
, dict)
872 expand_path(paths
.get('home', '').strip()),
873 expand_path(paths
.get(dir_type
, '').strip()) if dir_type
else '',
876 # Temporary fix for #4787
877 # 'Treat' all problem characters by passing filename through preferredencoding
878 # to workaround encoding issues with subprocess on python2 @ Windows
879 if sys
.version_info
< (3, 0) and sys
.platform
== 'win32':
880 path
= encodeFilename(path
, True).decode(preferredencoding())
881 return sanitize_path(path
, force
=self
.params
.get('windowsfilenames'))
884 def _outtmpl_expandpath(outtmpl
):
885 # expand_path translates '%%' into '%' and '$$' into '$'
886 # correspondingly that is not what we want since we need to keep
887 # '%%' intact for template dict substitution step. Working around
888 # with boundary-alike separator hack.
889 sep
= ''.join([random
.choice(ascii_letters
) for _
in range(32)])
890 outtmpl
= outtmpl
.replace('%%', '%{0}%'.format(sep
)).replace('$$', '${0}$'.format(sep
))
892 # outtmpl should be expand_path'ed before template dict substitution
893 # because meta fields may contain env variables we don't want to
894 # be expanded. For example, for outtmpl "%(title)s.%(ext)s" and
895 # title "Hello $PATH", we don't want `$PATH` to be expanded.
896 return expand_path(outtmpl
).replace(sep
, '')
899 def escape_outtmpl(outtmpl
):
900 ''' Escape any remaining strings like %s, %abc% etc. '''
902 STR_FORMAT_RE_TMPL
.format('', '(?![%(\0])'),
903 lambda mobj
: ('' if mobj
.group('has_key') else '%') + mobj
.group(0),
907 def validate_outtmpl(cls
, outtmpl
):
908 ''' @return None or Exception object '''
910 STR_FORMAT_RE_TMPL
.format('[^)]*', '[ljq]'),
911 lambda mobj
: f
'{mobj.group(0)[:-1]}s',
912 cls
._outtmpl
_expandpath
(outtmpl
))
914 cls
.escape_outtmpl(outtmpl
) % collections
.defaultdict(int)
916 except ValueError as err
:
919 def prepare_outtmpl(self
, outtmpl
, info_dict
, sanitize
=None):
920 """ Make the template and info_dict suitable for substitution : ydl.outtmpl_escape(outtmpl) % info_dict """
921 info_dict
.setdefault('epoch', int(time
.time())) # keep epoch consistent once set
923 info_dict
= dict(info_dict
) # Do not sanitize so as not to consume LazyList
924 for key
in ('__original_infodict', '__postprocessors'):
925 info_dict
.pop(key
, None)
926 info_dict
['duration_string'] = ( # %(duration>%H-%M-%S)s is wrong if duration > 24hrs
927 formatSeconds(info_dict
['duration'], '-' if sanitize
else ':')
928 if info_dict
.get('duration', None) is not None
930 info_dict
['autonumber'] = self
.params
.get('autonumber_start', 1) - 1 + self
._num
_downloads
931 if info_dict
.get('resolution') is None:
932 info_dict
['resolution'] = self
.format_resolution(info_dict
, default
=None)
934 # For fields playlist_index and autonumber convert all occurrences
935 # of %(field)s to %(field)0Nd for backward compatibility
936 field_size_compat_map
= {
937 'playlist_index': len(str(info_dict
.get('_last_playlist_index') or '')),
938 'autonumber': self
.params
.get('autonumber_size') or 5,
942 EXTERNAL_FORMAT_RE
= re
.compile(STR_FORMAT_RE_TMPL
.format('[^)]*', f
'[{STR_FORMAT_TYPES}ljq]'))
947 # Field is of the form key1.key2...
948 # where keys (except first) can be string, int or slice
949 FIELD_RE
= r
'\w*(?:\.(?:\w+|{num}|{num}?(?::{num}?){{1,2}}))*'.format(num
=r
'(?:-?\d+)')
950 MATH_FIELD_RE
= r
'''{field}|{num}'''.format(field
=FIELD_RE
, num
=r
'-?\d+(?:.\d+)?')
951 MATH_OPERATORS_RE
= r
'(?:%s)' % '|'.join(map(re
.escape
, MATH_FUNCTIONS
.keys()))
952 INTERNAL_FORMAT_RE
= re
.compile(r
'''(?x)
955 (?P<maths>(?:{math_op}{math_field})*)
956 (?:>(?P<strf_format>.+?))?
957 (?:\|(?P<default>.*?))?
958 $'''.format(field
=FIELD_RE
, math_op
=MATH_OPERATORS_RE
, math_field
=MATH_FIELD_RE
))
960 def _traverse_infodict(k
):
964 return traverse_obj(info_dict
, k
, is_user_input
=True, traverse_string
=True)
966 def get_value(mdict
):
968 value
= _traverse_infodict(mdict
['fields'])
971 value
= float_or_none(value
)
972 if value
is not None:
975 offset_key
= mdict
['maths']
977 value
= float_or_none(value
)
981 MATH_FIELD_RE
if operator
else MATH_OPERATORS_RE
,
983 offset_key
= offset_key
[len(item
):]
985 operator
= MATH_FUNCTIONS
[item
]
987 item
, multiplier
= (item
[1:], -1) if item
[0] == '-' else (item
, 1)
988 offset
= float_or_none(item
)
990 offset
= float_or_none(_traverse_infodict(item
))
992 value
= operator(value
, multiplier
* offset
)
993 except (TypeError, ZeroDivisionError):
996 # Datetime formatting
997 if mdict
['strf_format']:
998 value
= strftime_or_none(value
, mdict
['strf_format'])
1002 na
= self
.params
.get('outtmpl_na_placeholder', 'NA')
1004 def _dumpjson_default(obj
):
1005 if isinstance(obj
, (set, LazyList
)):
1007 raise TypeError(f
'Object of type {type(obj).__name__} is not JSON serializable')
1009 def create_key(outer_mobj
):
1010 if not outer_mobj
.group('has_key'):
1011 return f
'%{outer_mobj.group(0)}'
1012 key
= outer_mobj
.group('key')
1013 mobj
= re
.match(INTERNAL_FORMAT_RE
, key
)
1015 value
, default
, mobj
= None, na
, {'fields': ''}
1017 mobj
= mobj
.groupdict()
1018 default
= mobj
['default'] if mobj
['default'] is not None else na
1019 value
= get_value(mobj
)
1021 fmt
= outer_mobj
.group('format')
1022 if fmt
== 's' and value
is not None and key
in field_size_compat_map
.keys():
1023 fmt
= '0{:d}d'.format(field_size_compat_map
[key
])
1025 value
= default
if value
is None else value
1027 str_fmt
= f
'{fmt[:-1]}s'
1029 value
, fmt
= ', '.join(variadic(value
)), str_fmt
1030 elif fmt
[-1] == 'j':
1031 value
, fmt
= json
.dumps(value
, default
=_dumpjson_default
), str_fmt
1032 elif fmt
[-1] == 'q':
1033 value
, fmt
= compat_shlex_quote(str(value
)), str_fmt
1034 elif fmt
[-1] == 'c':
1037 value
, fmt
= default
, 's'
1040 elif fmt
[-1] not in 'rs': # numeric
1041 value
= float_or_none(value
)
1043 value
, fmt
= default
, 's'
1047 # If value is an object, sanitize might convert it to a string
1048 # So we convert it to repr first
1049 value
, fmt
= repr(value
), str_fmt
1050 if fmt
[-1] in 'csr':
1051 value
= sanitize(mobj
['fields'].split('.')[-1], value
)
1053 key
= '%s\0%s' % (key
.replace('%', '%\0'), outer_mobj
.group('format'))
1054 TMPL_DICT
[key
] = value
1055 return '{prefix}%({key}){fmt}'.format(key
=key
, fmt
=fmt
, prefix
=outer_mobj
.group('prefix'))
1057 return EXTERNAL_FORMAT_RE
.sub(create_key
, outtmpl
), TMPL_DICT
1059 def _prepare_filename(self
, info_dict
, tmpl_type
='default'):
1061 sanitize
= lambda k
, v
: sanitize_filename(
1063 restricted
=self
.params
.get('restrictfilenames'),
1064 is_id
=(k
== 'id' or k
.endswith('_id')))
1065 outtmpl
= self
.outtmpl_dict
.get(tmpl_type
, self
.outtmpl_dict
['default'])
1066 outtmpl
, template_dict
= self
.prepare_outtmpl(outtmpl
, info_dict
, sanitize
)
1067 outtmpl
= self
.escape_outtmpl(self
._outtmpl
_expandpath
(outtmpl
))
1068 filename
= outtmpl
% template_dict
1070 force_ext
= OUTTMPL_TYPES
.get(tmpl_type
)
1071 if force_ext
is not None:
1072 filename
= replace_extension(filename
, force_ext
, info_dict
.get('ext'))
1074 # https://github.com/blackjack4494/youtube-dlc/issues/85
1075 trim_file_name
= self
.params
.get('trim_file_name', False)
1077 fn_groups
= filename
.rsplit('.')
1080 if len(fn_groups
) > 2:
1081 sub_ext
= fn_groups
[-2]
1082 filename
= '.'.join(filter(None, [fn_groups
[0][:trim_file_name
], sub_ext
, ext
]))
1085 except ValueError as err
:
1086 self
.report_error('Error in output template: ' + str(err
) + ' (encoding: ' + repr(preferredencoding()) + ')')
1089 def prepare_filename(self
, info_dict
, dir_type
='', warn
=False):
1090 """Generate the output filename."""
1092 filename
= self
._prepare
_filename
(info_dict
, dir_type
or 'default')
1095 if not self
.params
.get('paths'):
1097 elif filename
== '-':
1098 self
.report_warning('--paths is ignored when an outputting to stdout', only_once
=True)
1099 elif os
.path
.isabs(filename
):
1100 self
.report_warning('--paths is ignored since an absolute path is given in output template', only_once
=True)
1101 if filename
== '-' or not filename
:
1104 return self
.get_output_path(dir_type
, filename
)
1106 def _match_entry(self
, info_dict
, incomplete
=False, silent
=False):
1107 """ Returns None if the file should be downloaded """
1109 video_title
= info_dict
.get('title', info_dict
.get('id', 'video'))
1112 if 'title' in info_dict
:
1113 # This can happen when we're just evaluating the playlist
1114 title
= info_dict
['title']
1115 matchtitle
= self
.params
.get('matchtitle', False)
1117 if not re
.search(matchtitle
, title
, re
.IGNORECASE
):
1118 return '"' + title
+ '" title did not match pattern "' + matchtitle
+ '"'
1119 rejecttitle
= self
.params
.get('rejecttitle', False)
1121 if re
.search(rejecttitle
, title
, re
.IGNORECASE
):
1122 return '"' + title
+ '" title matched reject pattern "' + rejecttitle
+ '"'
1123 date
= info_dict
.get('upload_date')
1124 if date
is not None:
1125 dateRange
= self
.params
.get('daterange', DateRange())
1126 if date
not in dateRange
:
1127 return '%s upload date is not in range %s' % (date_from_str(date
).isoformat(), dateRange
)
1128 view_count
= info_dict
.get('view_count')
1129 if view_count
is not None:
1130 min_views
= self
.params
.get('min_views')
1131 if min_views
is not None and view_count
< min_views
:
1132 return 'Skipping %s, because it has not reached minimum view count (%d/%d)' % (video_title
, view_count
, min_views
)
1133 max_views
= self
.params
.get('max_views')
1134 if max_views
is not None and view_count
> max_views
:
1135 return 'Skipping %s, because it has exceeded the maximum view count (%d/%d)' % (video_title
, view_count
, max_views
)
1136 if age_restricted(info_dict
.get('age_limit'), self
.params
.get('age_limit')):
1137 return 'Skipping "%s" because it is age restricted' % video_title
1139 match_filter
= self
.params
.get('match_filter')
1140 if match_filter
is not None:
1142 ret
= match_filter(info_dict
, incomplete
=incomplete
)
1144 # For backward compatibility
1145 ret
= None if incomplete
else match_filter(info_dict
)
1150 if self
.in_download_archive(info_dict
):
1151 reason
= '%s has already been recorded in the archive' % video_title
1152 break_opt
, break_err
= 'break_on_existing', ExistingVideoReached
1154 reason
= check_filter()
1155 break_opt
, break_err
= 'break_on_reject', RejectedVideoReached
1156 if reason
is not None:
1158 self
.to_screen('[download] ' + reason
)
1159 if self
.params
.get(break_opt
, False):
1164 def add_extra_info(info_dict
, extra_info
):
1165 '''Set the keys from extra_info in info dict if they are missing'''
1166 for key
, value
in extra_info
.items():
1167 info_dict
.setdefault(key
, value
)
1169 def extract_info(self
, url
, download
=True, ie_key
=None, extra_info
=None,
1170 process
=True, force_generic_extractor
=False):
1172 Return a list with a dictionary for each video extracted.
1175 url -- URL to extract
1178 download -- whether to download videos during extraction
1179 ie_key -- extractor key hint
1180 extra_info -- dictionary containing the extra values to add to each result
1181 process -- whether to resolve all unresolved references (URLs, playlist items),
1182 must be True for download to work.
1183 force_generic_extractor -- force using the generic extractor
1186 if extra_info
is None:
1189 if not ie_key
and force_generic_extractor
:
1193 ies
= {ie_key: self._get_info_extractor_class(ie_key)}
1197 for ie_key
, ie
in ies
.items():
1198 if not ie
.suitable(url
):
1201 if not ie
.working():
1202 self
.report_warning('The program functionality for this site has been marked as broken, '
1203 'and will probably not work.')
1205 temp_id
= ie
.get_temp_id(url
)
1206 if temp_id
is not None and self
.in_download_archive({'id': temp_id, 'ie_key': ie_key}
):
1207 self
.to_screen("[%s] %s: has already been recorded in archive" % (
1210 return self
.__extract
_info
(url
, self
.get_info_extractor(ie_key
), download
, extra_info
, process
)
1212 self
.report_error('no suitable InfoExtractor for URL %s' % url
)
1214 def __handle_extraction_exceptions(func
):
1216 def wrapper(self
, *args
, **kwargs
):
1218 return func(self
, *args
, **kwargs
)
1219 except GeoRestrictedError
as e
:
1222 msg
+= '\nThis video is available in %s.' % ', '.join(
1223 map(ISO3166Utils
.short2full
, e
.countries
))
1224 msg
+= '\nYou might want to use a VPN or a proxy server (with --proxy) to workaround.'
1225 self
.report_error(msg
)
1226 except ExtractorError
as e
: # An error we somewhat expected
1227 self
.report_error(compat_str(e
), e
.format_traceback())
1228 except ThrottledDownload
:
1229 self
.to_stderr('\r')
1230 self
.report_warning('The download speed is below throttle limit. Re-extracting data')
1231 return wrapper(self
, *args
, **kwargs
)
1232 except (MaxDownloadsReached
, ExistingVideoReached
, RejectedVideoReached
, LazyList
.IndexError):
1234 except Exception as e
:
1235 if self
.params
.get('ignoreerrors', False):
1236 self
.report_error(error_to_compat_str(e
), tb
=encode_compat_str(traceback
.format_exc()))
1241 @__handle_extraction_exceptions
1242 def __extract_info(self
, url
, ie
, download
, extra_info
, process
):
1243 ie_result
= ie
.extract(url
)
1244 if ie_result
is None: # Finished already (backwards compatibility; listformats and friends should be moved here)
1246 if isinstance(ie_result
, list):
1247 # Backwards compatibility: old IE result format
1249 '_type': 'compat_list',
1250 'entries': ie_result
,
1252 if extra_info
.get('original_url'):
1253 ie_result
.setdefault('original_url', extra_info
['original_url'])
1254 self
.add_default_extra_info(ie_result
, ie
, url
)
1256 return self
.process_ie_result(ie_result
, download
, extra_info
)
1260 def add_default_extra_info(self
, ie_result
, ie
, url
):
1262 self
.add_extra_info(ie_result
, {
1264 'original_url': url
,
1265 'webpage_url_basename': url_basename(url
),
1268 self
.add_extra_info(ie_result
, {
1269 'extractor': ie
.IE_NAME
,
1270 'extractor_key': ie
.ie_key(),
1273 def process_ie_result(self
, ie_result
, download
=True, extra_info
=None):
1275 Take the result of the ie(may be modified) and resolve all unresolved
1276 references (URLs, playlist items).
1278 It will also download the videos if 'download'.
1279 Returns the resolved ie_result.
1281 if extra_info
is None:
1283 result_type
= ie_result
.get('_type', 'video')
1285 if result_type
in ('url', 'url_transparent'):
1286 ie_result
['url'] = sanitize_url(ie_result
['url'])
1287 if ie_result
.get('original_url'):
1288 extra_info
.setdefault('original_url', ie_result
['original_url'])
1290 extract_flat
= self
.params
.get('extract_flat', False)
1291 if ((extract_flat
== 'in_playlist' and 'playlist' in extra_info
)
1292 or extract_flat
is True):
1293 info_copy
= ie_result
.copy()
1294 self
.add_extra_info(info_copy
, extra_info
)
1295 ie
= try_get(ie_result
.get('ie_key'), self
.get_info_extractor
)
1296 self
.add_default_extra_info(info_copy
, ie
, ie_result
['url'])
1297 self
.__forced
_printings
(info_copy
, self
.prepare_filename(info_copy
), incomplete
=True)
1300 if result_type
== 'video':
1301 self
.add_extra_info(ie_result
, extra_info
)
1302 ie_result
= self
.process_video_result(ie_result
, download
=download
)
1303 additional_urls
= (ie_result
or {}).get('additional_urls')
1305 # TODO: Improve MetadataParserPP to allow setting a list
1306 if isinstance(additional_urls
, compat_str
):
1307 additional_urls
= [additional_urls
]
1309 '[info] %s: %d additional URL(s) requested' % (ie_result
['id'], len(additional_urls
)))
1310 self
.write_debug('Additional URLs: "%s"' % '", "'.join(additional_urls
))
1311 ie_result
['additional_entries'] = [
1313 url
, download
, extra_info
,
1314 force_generic_extractor
=self
.params
.get('force_generic_extractor'))
1315 for url
in additional_urls
1318 elif result_type
== 'url':
1319 # We have to add extra_info to the results because it may be
1320 # contained in a playlist
1321 return self
.extract_info(
1322 ie_result
['url'], download
,
1323 ie_key
=ie_result
.get('ie_key'),
1324 extra_info
=extra_info
)
1325 elif result_type
== 'url_transparent':
1326 # Use the information from the embedding page
1327 info
= self
.extract_info(
1328 ie_result
['url'], ie_key
=ie_result
.get('ie_key'),
1329 extra_info
=extra_info
, download
=False, process
=False)
1331 # extract_info may return None when ignoreerrors is enabled and
1332 # extraction failed with an error, don't crash and return early
1337 force_properties
= dict(
1338 (k
, v
) for k
, v
in ie_result
.items() if v
is not None)
1339 for f
in ('_type', 'url', 'id', 'extractor', 'extractor_key', 'ie_key'):
1340 if f
in force_properties
:
1341 del force_properties
[f
]
1342 new_result
= info
.copy()
1343 new_result
.update(force_properties
)
1345 # Extracted info may not be a video result (i.e.
1346 # info.get('_type', 'video') != video) but rather an url or
1347 # url_transparent. In such cases outer metadata (from ie_result)
1348 # should be propagated to inner one (info). For this to happen
1349 # _type of info should be overridden with url_transparent. This
1350 # fixes issue from https://github.com/ytdl-org/youtube-dl/pull/11163.
1351 if new_result
.get('_type') == 'url':
1352 new_result
['_type'] = 'url_transparent'
1354 return self
.process_ie_result(
1355 new_result
, download
=download
, extra_info
=extra_info
)
1356 elif result_type
in ('playlist', 'multi_video'):
1357 # Protect from infinite recursion due to recursively nested playlists
1358 # (see https://github.com/ytdl-org/youtube-dl/issues/27833)
1359 webpage_url
= ie_result
['webpage_url']
1360 if webpage_url
in self
._playlist
_urls
:
1362 '[download] Skipping already downloaded playlist: %s'
1363 % ie_result
.get('title') or ie_result
.get('id'))
1366 self
._playlist
_level
+= 1
1367 self
._playlist
_urls
.add(webpage_url
)
1368 self
._sanitize
_thumbnails
(ie_result
)
1370 return self
.__process
_playlist
(ie_result
, download
)
1372 self
._playlist
_level
-= 1
1373 if not self
._playlist
_level
:
1374 self
._playlist
_urls
.clear()
1375 elif result_type
== 'compat_list':
1376 self
.report_warning(
1377 'Extractor %s returned a compat_list result. '
1378 'It needs to be updated.' % ie_result
.get('extractor'))
1381 self
.add_extra_info(r
, {
1382 'extractor': ie_result
['extractor'],
1383 'webpage_url': ie_result
['webpage_url'],
1384 'webpage_url_basename': url_basename(ie_result
['webpage_url']),
1385 'extractor_key': ie_result
['extractor_key'],
1388 ie_result
['entries'] = [
1389 self
.process_ie_result(_fixup(r
), download
, extra_info
)
1390 for r
in ie_result
['entries']
1394 raise Exception('Invalid result type: %s' % result_type
)
1396 def _ensure_dir_exists(self
, path
):
1397 return make_dir(path
, self
.report_error
)
1399 def __process_playlist(self
, ie_result
, download
):
1400 # We process each entry in the playlist
1401 playlist
= ie_result
.get('title') or ie_result
.get('id')
1402 self
.to_screen('[download] Downloading playlist: %s' % playlist
)
1404 if 'entries' not in ie_result
:
1405 raise EntryNotInPlaylist()
1406 incomplete_entries
= bool(ie_result
.get('requested_entries'))
1407 if incomplete_entries
:
1408 def fill_missing_entries(entries
, indexes
):
1409 ret
= [None] * max(*indexes
)
1410 for i
, entry
in zip(indexes
, entries
):
1413 ie_result
['entries'] = fill_missing_entries(ie_result
['entries'], ie_result
['requested_entries'])
1415 playlist_results
= []
1417 playliststart
= self
.params
.get('playliststart', 1)
1418 playlistend
= self
.params
.get('playlistend')
1419 # For backwards compatibility, interpret -1 as whole list
1420 if playlistend
== -1:
1423 playlistitems_str
= self
.params
.get('playlist_items')
1424 playlistitems
= None
1425 if playlistitems_str
is not None:
1426 def iter_playlistitems(format
):
1427 for string_segment
in format
.split(','):
1428 if '-' in string_segment
:
1429 start
, end
= string_segment
.split('-')
1430 for item
in range(int(start
), int(end
) + 1):
1433 yield int(string_segment
)
1434 playlistitems
= orderedSet(iter_playlistitems(playlistitems_str
))
1436 ie_entries
= ie_result
['entries']
1438 'Downloading %d videos' if not isinstance(ie_entries
, list)
1439 else 'Collected %d videos; downloading %%d of them' % len(ie_entries
))
1441 if isinstance(ie_entries
, list):
1443 return ie_entries
[i
- 1]
1445 if not isinstance(ie_entries
, PagedList
):
1446 ie_entries
= LazyList(ie_entries
)
1449 return YoutubeDL
.__handle
_extraction
_exceptions
(
1450 lambda self
, i
: ie_entries
[i
- 1]
1454 for i
in playlistitems
or itertools
.count(playliststart
):
1455 if playlistitems
is None and playlistend
is not None and playlistend
< i
:
1459 entry
= get_entry(i
)
1461 raise EntryNotInPlaylist()
1462 except (IndexError, EntryNotInPlaylist
):
1463 if incomplete_entries
:
1464 raise EntryNotInPlaylist()
1465 elif not playlistitems
:
1467 entries
.append(entry
)
1469 if entry
is not None:
1470 self
._match
_entry
(entry
, incomplete
=True, silent
=True)
1471 except (ExistingVideoReached
, RejectedVideoReached
):
1473 ie_result
['entries'] = entries
1475 # Save playlist_index before re-ordering
1477 ((playlistitems
[i
- 1] if playlistitems
else i
+ playliststart
- 1), entry
)
1478 for i
, entry
in enumerate(entries
, 1)
1479 if entry
is not None]
1480 n_entries
= len(entries
)
1482 if not playlistitems
and (playliststart
or playlistend
):
1483 playlistitems
= list(range(playliststart
, playliststart
+ n_entries
))
1484 ie_result
['requested_entries'] = playlistitems
1486 if self
.params
.get('allow_playlist_files', True):
1488 'playlist': playlist
,
1489 'playlist_id': ie_result
.get('id'),
1490 'playlist_title': ie_result
.get('title'),
1491 'playlist_uploader': ie_result
.get('uploader'),
1492 'playlist_uploader_id': ie_result
.get('uploader_id'),
1493 'playlist_index': 0,
1495 ie_copy
.update(dict(ie_result
))
1497 if self
.params
.get('writeinfojson', False):
1498 infofn
= self
.prepare_filename(ie_copy
, 'pl_infojson')
1499 if not self
._ensure
_dir
_exists
(encodeFilename(infofn
)):
1501 if not self
.params
.get('overwrites', True) and os
.path
.exists(encodeFilename(infofn
)):
1502 self
.to_screen('[info] Playlist metadata is already present')
1504 self
.to_screen('[info] Writing playlist metadata as JSON to: ' + infofn
)
1506 write_json_file(self
.sanitize_info(ie_result
, self
.params
.get('clean_infojson', True)), infofn
)
1507 except (OSError, IOError):
1508 self
.report_error('Cannot write playlist metadata to JSON file ' + infofn
)
1510 # TODO: This should be passed to ThumbnailsConvertor if necessary
1511 self
._write
_thumbnails
(ie_copy
, self
.prepare_filename(ie_copy
, 'pl_thumbnail'))
1513 if self
.params
.get('writedescription', False):
1514 descfn
= self
.prepare_filename(ie_copy
, 'pl_description')
1515 if not self
._ensure
_dir
_exists
(encodeFilename(descfn
)):
1517 if not self
.params
.get('overwrites', True) and os
.path
.exists(encodeFilename(descfn
)):
1518 self
.to_screen('[info] Playlist description is already present')
1519 elif ie_result
.get('description') is None:
1520 self
.report_warning('There\'s no playlist description to write.')
1523 self
.to_screen('[info] Writing playlist description to: ' + descfn
)
1524 with io
.open(encodeFilename(descfn
), 'w', encoding
='utf-8') as descfile
:
1525 descfile
.write(ie_result
['description'])
1526 except (OSError, IOError):
1527 self
.report_error('Cannot write playlist description file ' + descfn
)
1530 if self
.params
.get('playlistreverse', False):
1531 entries
= entries
[::-1]
1532 if self
.params
.get('playlistrandom', False):
1533 random
.shuffle(entries
)
1535 x_forwarded_for
= ie_result
.get('__x_forwarded_for_ip')
1537 self
.to_screen('[%s] playlist %s: %s' % (ie_result
['extractor'], playlist
, msg
% n_entries
))
1539 max_failures
= self
.params
.get('skip_playlist_after_errors') or float('inf')
1540 for i
, entry_tuple
in enumerate(entries
, 1):
1541 playlist_index
, entry
= entry_tuple
1542 if 'playlist-index' in self
.params
.get('compat_opts', []):
1543 playlist_index
= playlistitems
[i
- 1] if playlistitems
else i
+ playliststart
- 1
1544 self
.to_screen('[download] Downloading video %s of %s' % (i
, n_entries
))
1545 # This __x_forwarded_for_ip thing is a bit ugly but requires
1548 entry
['__x_forwarded_for_ip'] = x_forwarded_for
1550 'n_entries': n_entries
,
1551 '_last_playlist_index': max(playlistitems
) if playlistitems
else (playlistend
or n_entries
),
1552 'playlist_index': playlist_index
,
1553 'playlist_autonumber': i
,
1554 'playlist': playlist
,
1555 'playlist_id': ie_result
.get('id'),
1556 'playlist_title': ie_result
.get('title'),
1557 'playlist_uploader': ie_result
.get('uploader'),
1558 'playlist_uploader_id': ie_result
.get('uploader_id'),
1559 'extractor': ie_result
['extractor'],
1560 'webpage_url': ie_result
['webpage_url'],
1561 'webpage_url_basename': url_basename(ie_result
['webpage_url']),
1562 'extractor_key': ie_result
['extractor_key'],
1565 if self
._match
_entry
(entry
, incomplete
=True) is not None:
1568 entry_result
= self
.__process
_iterable
_entry
(entry
, download
, extra
)
1569 if not entry_result
:
1571 if failures
>= max_failures
:
1573 'Skipping the remaining entries in playlist "%s" since %d items failed extraction' % (playlist
, failures
))
1575 # TODO: skip failed (empty) entries?
1576 playlist_results
.append(entry_result
)
1577 ie_result
['entries'] = playlist_results
1578 self
.to_screen('[download] Finished downloading playlist: %s' % playlist
)
1581 @__handle_extraction_exceptions
1582 def __process_iterable_entry(self
, entry
, download
, extra_info
):
1583 return self
.process_ie_result(
1584 entry
, download
=download
, extra_info
=extra_info
)
1586 def _build_format_filter(self
, filter_spec
):
1587 " Returns a function to filter the formats according to the filter_spec "
1597 operator_rex
= re
.compile(r
'''(?x)\s*
1598 (?P<key>width|height|tbr|abr|vbr|asr|filesize|filesize_approx|fps)\s*
1599 (?P<op>%s)(?P<none_inclusive>\s*\?)?\s*
1600 (?P<value>[0-9.]+(?:[kKmMgGtTpPeEzZyY]i?[Bb]?)?)\s*
1601 ''' % '|'.join(map(re
.escape
, OPERATORS
.keys())))
1602 m
= operator_rex
.fullmatch(filter_spec
)
1605 comparison_value
= int(m
.group('value'))
1607 comparison_value
= parse_filesize(m
.group('value'))
1608 if comparison_value
is None:
1609 comparison_value
= parse_filesize(m
.group('value') + 'B')
1610 if comparison_value
is None:
1612 'Invalid value %r in format specification %r' % (
1613 m
.group('value'), filter_spec
))
1614 op
= OPERATORS
[m
.group('op')]
1619 '^=': lambda attr
, value
: attr
.startswith(value
),
1620 '$=': lambda attr
, value
: attr
.endswith(value
),
1621 '*=': lambda attr
, value
: value
in attr
,
1623 str_operator_rex
= re
.compile(r
'''(?x)\s*
1624 (?P<key>[a-zA-Z0-9._-]+)\s*
1625 (?P<negation>!\s*)?(?P<op>%s)(?P<none_inclusive>\s*\?)?\s*
1626 (?P<value>[a-zA-Z0-9._-]+)\s*
1627 ''' % '|'.join(map(re
.escape
, STR_OPERATORS
.keys())))
1628 m
= str_operator_rex
.fullmatch(filter_spec
)
1630 comparison_value
= m
.group('value')
1631 str_op
= STR_OPERATORS
[m
.group('op')]
1632 if m
.group('negation'):
1633 op
= lambda attr
, value
: not str_op(attr
, value
)
1638 raise SyntaxError('Invalid filter specification %r' % filter_spec
)
1641 actual_value
= f
.get(m
.group('key'))
1642 if actual_value
is None:
1643 return m
.group('none_inclusive')
1644 return op(actual_value
, comparison_value
)
1647 def _default_format_spec(self
, info_dict
, download
=True):
1650 merger
= FFmpegMergerPP(self
)
1651 return merger
.available
and merger
.can_merge()
1654 not self
.params
.get('simulate')
1658 or info_dict
.get('is_live', False)
1659 or self
.outtmpl_dict
['default'] == '-'))
1662 or self
.params
.get('allow_multiple_audio_streams', False)
1663 or 'format-spec' in self
.params
.get('compat_opts', []))
1666 'best/bestvideo+bestaudio' if prefer_best
1667 else 'bestvideo*+bestaudio/best' if not compat
1668 else 'bestvideo+bestaudio/best')
1670 def build_format_selector(self
, format_spec
):
1671 def syntax_error(note
, start
):
1673 'Invalid format specification: '
1674 '{0}\n\t{1}\n\t{2}^'.format(note
, format_spec
, ' ' * start
[1]))
1675 return SyntaxError(message
)
1677 PICKFIRST
= 'PICKFIRST'
1681 FormatSelector
= collections
.namedtuple('FormatSelector', ['type', 'selector', 'filters'])
1683 allow_multiple_streams
= {'audio': self
.params
.get('allow_multiple_audio_streams', False),
1684 'video': self
.params
.get('allow_multiple_video_streams', False)}
1686 check_formats
= self
.params
.get('check_formats')
1688 def _parse_filter(tokens
):
1690 for type, string
, start
, _
, _
in tokens
:
1691 if type == tokenize
.OP
and string
== ']':
1692 return ''.join(filter_parts
)
1694 filter_parts
.append(string
)
1696 def _remove_unused_ops(tokens
):
1697 # Remove operators that we don't use and join them with the surrounding strings
1698 # for example: 'mp4' '-' 'baseline' '-' '16x9' is converted to 'mp4-baseline-16x9'
1699 ALLOWED_OPS
= ('/', '+', ',', '(', ')')
1700 last_string
, last_start
, last_end
, last_line
= None, None, None, None
1701 for type, string
, start
, end
, line
in tokens
:
1702 if type == tokenize
.OP
and string
== '[':
1704 yield tokenize
.NAME
, last_string
, last_start
, last_end
, last_line
1706 yield type, string
, start
, end
, line
1707 # everything inside brackets will be handled by _parse_filter
1708 for type, string
, start
, end
, line
in tokens
:
1709 yield type, string
, start
, end
, line
1710 if type == tokenize
.OP
and string
== ']':
1712 elif type == tokenize
.OP
and string
in ALLOWED_OPS
:
1714 yield tokenize
.NAME
, last_string
, last_start
, last_end
, last_line
1716 yield type, string
, start
, end
, line
1717 elif type in [tokenize
.NAME
, tokenize
.NUMBER
, tokenize
.OP
]:
1719 last_string
= string
1723 last_string
+= string
1725 yield tokenize
.NAME
, last_string
, last_start
, last_end
, last_line
1727 def _parse_format_selection(tokens
, inside_merge
=False, inside_choice
=False, inside_group
=False):
1729 current_selector
= None
1730 for type, string
, start
, _
, _
in tokens
:
1731 # ENCODING is only defined in python 3.x
1732 if type == getattr(tokenize
, 'ENCODING', None):
1734 elif type in [tokenize
.NAME
, tokenize
.NUMBER
]:
1735 current_selector
= FormatSelector(SINGLE
, string
, [])
1736 elif type == tokenize
.OP
:
1738 if not inside_group
:
1739 # ')' will be handled by the parentheses group
1740 tokens
.restore_last_token()
1742 elif inside_merge
and string
in ['/', ',']:
1743 tokens
.restore_last_token()
1745 elif inside_choice
and string
== ',':
1746 tokens
.restore_last_token()
1749 if not current_selector
:
1750 raise syntax_error('"," must follow a format selector', start
)
1751 selectors
.append(current_selector
)
1752 current_selector
= None
1754 if not current_selector
:
1755 raise syntax_error('"/" must follow a format selector', start
)
1756 first_choice
= current_selector
1757 second_choice
= _parse_format_selection(tokens
, inside_choice
=True)
1758 current_selector
= FormatSelector(PICKFIRST
, (first_choice
, second_choice
), [])
1760 if not current_selector
:
1761 current_selector
= FormatSelector(SINGLE
, 'best', [])
1762 format_filter
= _parse_filter(tokens
)
1763 current_selector
.filters
.append(format_filter
)
1765 if current_selector
:
1766 raise syntax_error('Unexpected "("', start
)
1767 group
= _parse_format_selection(tokens
, inside_group
=True)
1768 current_selector
= FormatSelector(GROUP
, group
, [])
1770 if not current_selector
:
1771 raise syntax_error('Unexpected "+"', start
)
1772 selector_1
= current_selector
1773 selector_2
= _parse_format_selection(tokens
, inside_merge
=True)
1775 raise syntax_error('Expected a selector', start
)
1776 current_selector
= FormatSelector(MERGE
, (selector_1
, selector_2
), [])
1778 raise syntax_error('Operator not recognized: "{0}"'.format(string
), start
)
1779 elif type == tokenize
.ENDMARKER
:
1781 if current_selector
:
1782 selectors
.append(current_selector
)
1785 def _merge(formats_pair
):
1786 format_1
, format_2
= formats_pair
1789 formats_info
.extend(format_1
.get('requested_formats', (format_1
,)))
1790 formats_info
.extend(format_2
.get('requested_formats', (format_2
,)))
1792 if not allow_multiple_streams
['video'] or not allow_multiple_streams
['audio']:
1793 get_no_more
= {'video': False, 'audio': False}
1794 for (i
, fmt_info
) in enumerate(formats_info
):
1795 if fmt_info
.get('acodec') == fmt_info
.get('vcodec') == 'none':
1798 for aud_vid
in ['audio', 'video']:
1799 if not allow_multiple_streams
[aud_vid
] and fmt_info
.get(aud_vid
[0] + 'codec') != 'none':
1800 if get_no_more
[aud_vid
]:
1803 get_no_more
[aud_vid
] = True
1805 if len(formats_info
) == 1:
1806 return formats_info
[0]
1808 video_fmts
= [fmt_info
for fmt_info
in formats_info
if fmt_info
.get('vcodec') != 'none']
1809 audio_fmts
= [fmt_info
for fmt_info
in formats_info
if fmt_info
.get('acodec') != 'none']
1811 the_only_video
= video_fmts
[0] if len(video_fmts
) == 1 else None
1812 the_only_audio
= audio_fmts
[0] if len(audio_fmts
) == 1 else None
1814 output_ext
= self
.params
.get('merge_output_format')
1817 output_ext
= the_only_video
['ext']
1818 elif the_only_audio
and not video_fmts
:
1819 output_ext
= the_only_audio
['ext']
1824 'requested_formats': formats_info
,
1825 'format': '+'.join(fmt_info
.get('format') for fmt_info
in formats_info
),
1826 'format_id': '+'.join(fmt_info
.get('format_id') for fmt_info
in formats_info
),
1832 'width': the_only_video
.get('width'),
1833 'height': the_only_video
.get('height'),
1834 'resolution': the_only_video
.get('resolution') or self
.format_resolution(the_only_video
),
1835 'fps': the_only_video
.get('fps'),
1836 'vcodec': the_only_video
.get('vcodec'),
1837 'vbr': the_only_video
.get('vbr'),
1838 'stretched_ratio': the_only_video
.get('stretched_ratio'),
1843 'acodec': the_only_audio
.get('acodec'),
1844 'abr': the_only_audio
.get('abr'),
1849 def _check_formats(formats
):
1850 if not check_formats
:
1854 self
.to_screen('[info] Testing format %s' % f
['format_id'])
1855 temp_file
= tempfile
.NamedTemporaryFile(
1856 suffix
='.tmp', delete
=False,
1857 dir=self
.get_output_path('temp') or None)
1860 success
, _
= self
.dl(temp_file
.name
, f
, test
=True)
1861 except (DownloadError
, IOError, OSError, ValueError) + network_exceptions
:
1864 if os
.path
.exists(temp_file
.name
):
1866 os
.remove(temp_file
.name
)
1868 self
.report_warning('Unable to delete temporary file "%s"' % temp_file
.name
)
1872 self
.to_screen('[info] Unable to download format %s. Skipping...' % f
['format_id'])
1874 def _build_selector_function(selector
):
1875 if isinstance(selector
, list): # ,
1876 fs
= [_build_selector_function(s
) for s
in selector
]
1878 def selector_function(ctx
):
1881 return selector_function
1883 elif selector
.type == GROUP
: # ()
1884 selector_function
= _build_selector_function(selector
.selector
)
1886 elif selector
.type == PICKFIRST
: # /
1887 fs
= [_build_selector_function(s
) for s
in selector
.selector
]
1889 def selector_function(ctx
):
1891 picked_formats
= list(f(ctx
))
1893 return picked_formats
1896 elif selector
.type == MERGE
: # +
1897 selector_1
, selector_2
= map(_build_selector_function
, selector
.selector
)
1899 def selector_function(ctx
):
1900 for pair
in itertools
.product(
1901 selector_1(copy
.deepcopy(ctx
)), selector_2(copy
.deepcopy(ctx
))):
1904 elif selector
.type == SINGLE
: # atom
1905 format_spec
= selector
.selector
or 'best'
1907 # TODO: Add allvideo, allaudio etc by generalizing the code with best/worst selector
1908 if format_spec
== 'all':
1909 def selector_function(ctx
):
1910 yield from _check_formats(ctx
['formats'])
1911 elif format_spec
== 'mergeall':
1912 def selector_function(ctx
):
1913 formats
= list(_check_formats(ctx
['formats']))
1916 merged_format
= formats
[-1]
1917 for f
in formats
[-2::-1]:
1918 merged_format
= _merge((merged_format
, f
))
1922 format_fallback
, format_reverse
, format_idx
= False, True, 1
1924 r
'(?P<bw>best|worst|b|w)(?P<type>video|audio|v|a)?(?P<mod>\*)?(?:\.(?P<n>[1-9]\d*))?$',
1926 if mobj
is not None:
1927 format_idx
= int_or_none(mobj
.group('n'), default
=1)
1928 format_reverse
= mobj
.group('bw')[0] == 'b'
1929 format_type
= (mobj
.group('type') or [None])[0]
1930 not_format_type
= {'v': 'a', 'a': 'v'}
.get(format_type
)
1931 format_modified
= mobj
.group('mod') is not None
1933 format_fallback
= not format_type
and not format_modified
# for b, w
1935 (lambda f
: f
.get('%scodec' % format_type
) != 'none')
1936 if format_type
and format_modified
# bv*, ba*, wv*, wa*
1937 else (lambda f
: f
.get('%scodec' % not_format_type
) == 'none')
1938 if format_type
# bv, ba, wv, wa
1939 else (lambda f
: f
.get('vcodec') != 'none' and f
.get('acodec') != 'none')
1940 if not format_modified
# b, w
1941 else lambda f
: True) # b*, w*
1942 filter_f
= lambda f
: _filter_f(f
) and (
1943 f
.get('vcodec') != 'none' or f
.get('acodec') != 'none')
1945 filter_f
= ((lambda f
: f
.get('ext') == format_spec
)
1946 if format_spec
in ['mp4', 'flv', 'webm', '3gp', 'm4a', 'mp3', 'ogg', 'aac', 'wav'] # extension
1947 else (lambda f
: f
.get('format_id') == format_spec
)) # id
1949 def selector_function(ctx
):
1950 formats
= list(ctx
['formats'])
1951 matches
= list(filter(filter_f
, formats
)) if filter_f
is not None else formats
1952 if format_fallback
and ctx
['incomplete_formats'] and not matches
:
1953 # for extractors with incomplete formats (audio only (soundcloud)
1954 # or video only (imgur)) best/worst will fallback to
1955 # best/worst {video,audio}-only format
1957 matches
= LazyList(_check_formats(matches
[::-1 if format_reverse
else 1]))
1959 yield matches
[format_idx
- 1]
1963 filters
= [self
._build
_format
_filter
(f
) for f
in selector
.filters
]
1965 def final_selector(ctx
):
1966 ctx_copy
= copy
.deepcopy(ctx
)
1967 for _filter
in filters
:
1968 ctx_copy
['formats'] = list(filter(_filter
, ctx_copy
['formats']))
1969 return selector_function(ctx_copy
)
1970 return final_selector
1972 stream
= io
.BytesIO(format_spec
.encode('utf-8'))
1974 tokens
= list(_remove_unused_ops(compat_tokenize_tokenize(stream
.readline
)))
1975 except tokenize
.TokenError
:
1976 raise syntax_error('Missing closing/opening brackets or parenthesis', (0, len(format_spec
)))
1978 class TokenIterator(object):
1979 def __init__(self
, tokens
):
1980 self
.tokens
= tokens
1987 if self
.counter
>= len(self
.tokens
):
1988 raise StopIteration()
1989 value
= self
.tokens
[self
.counter
]
1995 def restore_last_token(self
):
1998 parsed_selector
= _parse_format_selection(iter(TokenIterator(tokens
)))
1999 return _build_selector_function(parsed_selector
)
2001 def _calc_headers(self
, info_dict
):
2002 res
= std_headers
.copy()
2004 add_headers
= info_dict
.get('http_headers')
2006 res
.update(add_headers
)
2008 cookies
= self
._calc
_cookies
(info_dict
)
2010 res
['Cookie'] = cookies
2012 if 'X-Forwarded-For' not in res
:
2013 x_forwarded_for_ip
= info_dict
.get('__x_forwarded_for_ip')
2014 if x_forwarded_for_ip
:
2015 res
['X-Forwarded-For'] = x_forwarded_for_ip
2019 def _calc_cookies(self
, info_dict
):
2020 pr
= sanitized_Request(info_dict
['url'])
2021 self
.cookiejar
.add_cookie_header(pr
)
2022 return pr
.get_header('Cookie')
2024 def _sanitize_thumbnails(self
, info_dict
):
2025 thumbnails
= info_dict
.get('thumbnails')
2026 if thumbnails
is None:
2027 thumbnail
= info_dict
.get('thumbnail')
2029 info_dict
['thumbnails'] = thumbnails
= [{'url': thumbnail}
]
2031 thumbnails
.sort(key
=lambda t
: (
2032 t
.get('preference') if t
.get('preference') is not None else -1,
2033 t
.get('width') if t
.get('width') is not None else -1,
2034 t
.get('height') if t
.get('height') is not None else -1,
2035 t
.get('id') if t
.get('id') is not None else '',
2038 def thumbnail_tester():
2039 if self
.params
.get('check_formats'):
2041 to_screen
= lambda msg
: self
.to_screen(f
'[info] {msg}')
2044 to_screen
= self
.write_debug
2046 def test_thumbnail(t
):
2047 if not test_all
and not t
.get('_test_url'):
2049 to_screen('Testing thumbnail %s' % t
['id'])
2051 self
.urlopen(HEADRequest(t
['url']))
2052 except network_exceptions
as err
:
2053 to_screen('Unable to connect to thumbnail %s URL "%s" - %s. Skipping...' % (
2054 t
['id'], t
['url'], error_to_compat_str(err
)))
2058 return test_thumbnail
2060 for i
, t
in enumerate(thumbnails
):
2061 if t
.get('id') is None:
2063 if t
.get('width') and t
.get('height'):
2064 t
['resolution'] = '%dx%d' % (t
['width'], t
['height'])
2065 t
['url'] = sanitize_url(t
['url'])
2067 if self
.params
.get('check_formats') is not False:
2068 info_dict
['thumbnails'] = LazyList(filter(thumbnail_tester(), thumbnails
[::-1])).reverse()
2070 info_dict
['thumbnails'] = thumbnails
2072 def process_video_result(self
, info_dict
, download
=True):
2073 assert info_dict
.get('_type', 'video') == 'video'
2075 if 'id' not in info_dict
:
2076 raise ExtractorError('Missing "id" field in extractor result')
2077 if 'title' not in info_dict
:
2078 raise ExtractorError('Missing "title" field in extractor result',
2079 video_id
=info_dict
['id'], ie
=info_dict
['extractor'])
2081 def report_force_conversion(field
, field_not
, conversion
):
2082 self
.report_warning(
2083 '"%s" field is not %s - forcing %s conversion, there is an error in extractor'
2084 % (field
, field_not
, conversion
))
2086 def sanitize_string_field(info
, string_field
):
2087 field
= info
.get(string_field
)
2088 if field
is None or isinstance(field
, compat_str
):
2090 report_force_conversion(string_field
, 'a string', 'string')
2091 info
[string_field
] = compat_str(field
)
2093 def sanitize_numeric_fields(info
):
2094 for numeric_field
in self
._NUMERIC
_FIELDS
:
2095 field
= info
.get(numeric_field
)
2096 if field
is None or isinstance(field
, compat_numeric_types
):
2098 report_force_conversion(numeric_field
, 'numeric', 'int')
2099 info
[numeric_field
] = int_or_none(field
)
2101 sanitize_string_field(info_dict
, 'id')
2102 sanitize_numeric_fields(info_dict
)
2104 if 'playlist' not in info_dict
:
2105 # It isn't part of a playlist
2106 info_dict
['playlist'] = None
2107 info_dict
['playlist_index'] = None
2109 self
._sanitize
_thumbnails
(info_dict
)
2111 thumbnail
= info_dict
.get('thumbnail')
2112 thumbnails
= info_dict
.get('thumbnails')
2114 info_dict
['thumbnail'] = sanitize_url(thumbnail
)
2116 info_dict
['thumbnail'] = thumbnails
[-1]['url']
2118 if info_dict
.get('display_id') is None and 'id' in info_dict
:
2119 info_dict
['display_id'] = info_dict
['id']
2121 for ts_key
, date_key
in (
2122 ('timestamp', 'upload_date'),
2123 ('release_timestamp', 'release_date'),
2125 if info_dict
.get(date_key
) is None and info_dict
.get(ts_key
) is not None:
2126 # Working around out-of-range timestamp values (e.g. negative ones on Windows,
2127 # see http://bugs.python.org/issue1646728)
2129 upload_date
= datetime
.datetime
.utcfromtimestamp(info_dict
[ts_key
])
2130 info_dict
[date_key
] = upload_date
.strftime('%Y%m%d')
2131 except (ValueError, OverflowError, OSError):
2134 live_keys
= ('is_live', 'was_live')
2135 live_status
= info_dict
.get('live_status')
2136 if live_status
is None:
2137 for key
in live_keys
:
2138 if info_dict
.get(key
) is False:
2140 if info_dict
.get(key
):
2143 if all(info_dict
.get(key
) is False for key
in live_keys
):
2144 live_status
= 'not_live'
2146 info_dict
['live_status'] = live_status
2147 for key
in live_keys
:
2148 if info_dict
.get(key
) is None:
2149 info_dict
[key
] = (live_status
== key
)
2151 # Auto generate title fields corresponding to the *_number fields when missing
2152 # in order to always have clean titles. This is very common for TV series.
2153 for field
in ('chapter', 'season', 'episode'):
2154 if info_dict
.get('%s_number' % field
) is not None and not info_dict
.get(field
):
2155 info_dict
[field
] = '%s %d' % (field
.capitalize(), info_dict
['%s_number' % field
])
2157 for cc_kind
in ('subtitles', 'automatic_captions'):
2158 cc
= info_dict
.get(cc_kind
)
2160 for _
, subtitle
in cc
.items():
2161 for subtitle_format
in subtitle
:
2162 if subtitle_format
.get('url'):
2163 subtitle_format
['url'] = sanitize_url(subtitle_format
['url'])
2164 if subtitle_format
.get('ext') is None:
2165 subtitle_format
['ext'] = determine_ext(subtitle_format
['url']).lower()
2167 automatic_captions
= info_dict
.get('automatic_captions')
2168 subtitles
= info_dict
.get('subtitles')
2170 info_dict
['requested_subtitles'] = self
.process_subtitles(
2171 info_dict
['id'], subtitles
, automatic_captions
)
2173 # We now pick which formats have to be downloaded
2174 if info_dict
.get('formats') is None:
2175 # There's only one format available
2176 formats
= [info_dict
]
2178 formats
= info_dict
['formats']
2180 info_dict
['__has_drm'] = any(f
.get('has_drm') for f
in formats
)
2181 if not self
.params
.get('allow_unplayable_formats'):
2182 formats
= [f
for f
in formats
if not f
.get('has_drm')]
2185 self
.raise_no_formats(info_dict
)
2187 def is_wellformed(f
):
2190 self
.report_warning(
2191 '"url" field is missing or empty - skipping format, '
2192 'there is an error in extractor')
2194 if isinstance(url
, bytes):
2195 sanitize_string_field(f
, 'url')
2198 # Filter out malformed formats for better extraction robustness
2199 formats
= list(filter(is_wellformed
, formats
))
2203 # We check that all the formats have the format and format_id fields
2204 for i
, format
in enumerate(formats
):
2205 sanitize_string_field(format
, 'format_id')
2206 sanitize_numeric_fields(format
)
2207 format
['url'] = sanitize_url(format
['url'])
2208 if not format
.get('format_id'):
2209 format
['format_id'] = compat_str(i
)
2211 # Sanitize format_id from characters used in format selector expression
2212 format
['format_id'] = re
.sub(r
'[\s,/+\[\]()]', '_', format
['format_id'])
2213 format_id
= format
['format_id']
2214 if format_id
not in formats_dict
:
2215 formats_dict
[format_id
] = []
2216 formats_dict
[format_id
].append(format
)
2218 # Make sure all formats have unique format_id
2219 for format_id
, ambiguous_formats
in formats_dict
.items():
2220 if len(ambiguous_formats
) > 1:
2221 for i
, format
in enumerate(ambiguous_formats
):
2222 format
['format_id'] = '%s-%d' % (format_id
, i
)
2224 for i
, format
in enumerate(formats
):
2225 if format
.get('format') is None:
2226 format
['format'] = '{id} - {res}{note}'.format(
2227 id=format
['format_id'],
2228 res
=self
.format_resolution(format
),
2229 note
=format_field(format
, 'format_note', ' (%s)'),
2231 # Automatically determine file extension if missing
2232 if format
.get('ext') is None:
2233 format
['ext'] = determine_ext(format
['url']).lower()
2234 # Automatically determine protocol if missing (useful for format
2235 # selection purposes)
2236 if format
.get('protocol') is None:
2237 format
['protocol'] = determine_protocol(format
)
2238 # Add HTTP headers, so that external programs can use them from the
2240 full_format_info
= info_dict
.copy()
2241 full_format_info
.update(format
)
2242 format
['http_headers'] = self
._calc
_headers
(full_format_info
)
2243 # Remove private housekeeping stuff
2244 if '__x_forwarded_for_ip' in info_dict
:
2245 del info_dict
['__x_forwarded_for_ip']
2247 # TODO Central sorting goes here
2249 if not formats
or formats
[0] is not info_dict
:
2250 # only set the 'formats' fields if the original info_dict list them
2251 # otherwise we end up with a circular reference, the first (and unique)
2252 # element in the 'formats' field in info_dict is info_dict itself,
2253 # which can't be exported to json
2254 info_dict
['formats'] = formats
2256 info_dict
, _
= self
.pre_process(info_dict
)
2258 if self
.params
.get('list_thumbnails'):
2259 self
.list_thumbnails(info_dict
)
2260 if self
.params
.get('listformats'):
2261 if not info_dict
.get('formats') and not info_dict
.get('url'):
2262 self
.to_screen('%s has no formats' % info_dict
['id'])
2264 self
.list_formats(info_dict
)
2265 if self
.params
.get('listsubtitles'):
2266 if 'automatic_captions' in info_dict
:
2267 self
.list_subtitles(
2268 info_dict
['id'], automatic_captions
, 'automatic captions')
2269 self
.list_subtitles(info_dict
['id'], subtitles
, 'subtitles')
2270 list_only
= self
.params
.get('simulate') is None and (
2271 self
.params
.get('list_thumbnails') or self
.params
.get('listformats') or self
.params
.get('listsubtitles'))
2273 # Without this printing, -F --print-json will not work
2274 self
.__forced
_printings
(info_dict
, self
.prepare_filename(info_dict
), incomplete
=True)
2277 format_selector
= self
.format_selector
2278 if format_selector
is None:
2279 req_format
= self
._default
_format
_spec
(info_dict
, download
=download
)
2280 self
.write_debug('Default format spec: %s' % req_format
)
2281 format_selector
= self
.build_format_selector(req_format
)
2283 # While in format selection we may need to have an access to the original
2284 # format set in order to calculate some metrics or do some processing.
2285 # For now we need to be able to guess whether original formats provided
2286 # by extractor are incomplete or not (i.e. whether extractor provides only
2287 # video-only or audio-only formats) for proper formats selection for
2288 # extractors with such incomplete formats (see
2289 # https://github.com/ytdl-org/youtube-dl/pull/5556).
2290 # Since formats may be filtered during format selection and may not match
2291 # the original formats the results may be incorrect. Thus original formats
2292 # or pre-calculated metrics should be passed to format selection routines
2294 # We will pass a context object containing all necessary additional data
2295 # instead of just formats.
2296 # This fixes incorrect format selection issue (see
2297 # https://github.com/ytdl-org/youtube-dl/issues/10083).
2298 incomplete_formats
= (
2299 # All formats are video-only or
2300 all(f
.get('vcodec') != 'none' and f
.get('acodec') == 'none' for f
in formats
)
2301 # all formats are audio-only
2302 or all(f
.get('vcodec') == 'none' and f
.get('acodec') != 'none' for f
in formats
))
2306 'incomplete_formats': incomplete_formats
,
2309 formats_to_download
= list(format_selector(ctx
))
2310 if not formats_to_download
:
2311 if not self
.params
.get('ignore_no_formats_error'):
2312 raise ExtractorError('Requested format is not available', expected
=True,
2313 video_id
=info_dict
['id'], ie
=info_dict
['extractor'])
2315 self
.report_warning('Requested format is not available')
2316 # Process what we can, even without any available formats.
2317 self
.process_info(dict(info_dict
))
2320 '[info] %s: Downloading %d format(s): %s' % (
2321 info_dict
['id'], len(formats_to_download
),
2322 ", ".join([f
['format_id'] for f
in formats_to_download
])))
2323 for fmt
in formats_to_download
:
2324 new_info
= dict(info_dict
)
2325 # Save a reference to the original info_dict so that it can be modified in process_info if needed
2326 new_info
['__original_infodict'] = info_dict
2327 new_info
.update(fmt
)
2328 self
.process_info(new_info
)
2329 # We update the info dict with the best quality format (backwards compatibility)
2330 if formats_to_download
:
2331 info_dict
.update(formats_to_download
[-1])
2334 def process_subtitles(self
, video_id
, normal_subtitles
, automatic_captions
):
2335 """Select the requested subtitles and their format"""
2337 if normal_subtitles
and self
.params
.get('writesubtitles'):
2338 available_subs
.update(normal_subtitles
)
2339 if automatic_captions
and self
.params
.get('writeautomaticsub'):
2340 for lang
, cap_info
in automatic_captions
.items():
2341 if lang
not in available_subs
:
2342 available_subs
[lang
] = cap_info
2344 if (not self
.params
.get('writesubtitles') and not
2345 self
.params
.get('writeautomaticsub') or not
2349 all_sub_langs
= available_subs
.keys()
2350 if self
.params
.get('allsubtitles', False):
2351 requested_langs
= all_sub_langs
2352 elif self
.params
.get('subtitleslangs', False):
2353 requested_langs
= set()
2354 for lang
in self
.params
.get('subtitleslangs'):
2356 requested_langs
.update(all_sub_langs
)
2358 discard
= lang
[0] == '-'
2361 current_langs
= filter(re
.compile(lang
+ '$').match
, all_sub_langs
)
2363 for lang
in current_langs
:
2364 requested_langs
.discard(lang
)
2366 requested_langs
.update(current_langs
)
2367 elif 'en' in available_subs
:
2368 requested_langs
= ['en']
2370 requested_langs
= [list(all_sub_langs
)[0]]
2372 self
.write_debug('Downloading subtitles: %s' % ', '.join(requested_langs
))
2374 formats_query
= self
.params
.get('subtitlesformat', 'best')
2375 formats_preference
= formats_query
.split('/') if formats_query
else []
2377 for lang
in requested_langs
:
2378 formats
= available_subs
.get(lang
)
2380 self
.report_warning('%s subtitles not available for %s' % (lang
, video_id
))
2382 for ext
in formats_preference
:
2386 matches
= list(filter(lambda f
: f
['ext'] == ext
, formats
))
2392 self
.report_warning(
2393 'No subtitle format found matching "%s" for language %s, '
2394 'using %s' % (formats_query
, lang
, f
['ext']))
2398 def __forced_printings(self
, info_dict
, filename
, incomplete
):
2399 def print_mandatory(field
, actual_field
=None):
2400 if actual_field
is None:
2401 actual_field
= field
2402 if (self
.params
.get('force%s' % field
, False)
2403 and (not incomplete
or info_dict
.get(actual_field
) is not None)):
2404 self
.to_stdout(info_dict
[actual_field
])
2406 def print_optional(field
):
2407 if (self
.params
.get('force%s' % field
, False)
2408 and info_dict
.get(field
) is not None):
2409 self
.to_stdout(info_dict
[field
])
2411 info_dict
= info_dict
.copy()
2412 if filename
is not None:
2413 info_dict
['filename'] = filename
2414 if info_dict
.get('requested_formats') is not None:
2415 # For RTMP URLs, also include the playpath
2416 info_dict
['urls'] = '\n'.join(f
['url'] + f
.get('play_path', '') for f
in info_dict
['requested_formats'])
2417 elif 'url' in info_dict
:
2418 info_dict
['urls'] = info_dict
['url'] + info_dict
.get('play_path', '')
2420 if self
.params
.get('forceprint') or self
.params
.get('forcejson'):
2421 self
.post_extract(info_dict
)
2422 for tmpl
in self
.params
.get('forceprint', []):
2423 if re
.match(r
'\w+$', tmpl
):
2424 tmpl
= '%({})s'.format(tmpl
)
2425 tmpl
, info_copy
= self
.prepare_outtmpl(tmpl
, info_dict
)
2426 self
.to_stdout(self
.escape_outtmpl(tmpl
) % info_copy
)
2428 print_mandatory('title')
2429 print_mandatory('id')
2430 print_mandatory('url', 'urls')
2431 print_optional('thumbnail')
2432 print_optional('description')
2433 print_optional('filename')
2434 if self
.params
.get('forceduration') and info_dict
.get('duration') is not None:
2435 self
.to_stdout(formatSeconds(info_dict
['duration']))
2436 print_mandatory('format')
2438 if self
.params
.get('forcejson'):
2439 self
.to_stdout(json
.dumps(self
.sanitize_info(info_dict
)))
2441 def dl(self
, name
, info
, subtitle
=False, test
=False):
2442 if not info
.get('url'):
2443 self
.raise_no_formats(info
, True)
2446 verbose
= self
.params
.get('verbose')
2449 'quiet': not verbose
,
2451 'noprogress': not verbose
,
2453 'skip_unavailable_fragments': False,
2454 'keep_fragments': False,
2456 '_no_ytdl_file': True,
2459 params
= self
.params
2460 fd
= get_suitable_downloader(info
, params
, to_stdout
=(name
== '-'))(self
, params
)
2462 for ph
in self
._progress
_hooks
:
2463 fd
.add_progress_hook(ph
)
2464 urls
= '", "'.join([f
['url'] for f
in info
.get('requested_formats', [])] or [info
['url']])
2465 self
.write_debug('Invoking downloader on "%s"' % urls
)
2466 new_info
= dict(info
)
2467 if new_info
.get('http_headers') is None:
2468 new_info
['http_headers'] = self
._calc
_headers
(new_info
)
2469 return fd
.download(name
, new_info
, subtitle
)
2471 def process_info(self
, info_dict
):
2472 """Process a single resolved IE result."""
2474 assert info_dict
.get('_type', 'video') == 'video'
2476 max_downloads
= self
.params
.get('max_downloads')
2477 if max_downloads
is not None:
2478 if self
._num
_downloads
>= int(max_downloads
):
2479 raise MaxDownloadsReached()
2481 # TODO: backward compatibility, to be removed
2482 info_dict
['fulltitle'] = info_dict
['title']
2484 if 'format' not in info_dict
and 'ext' in info_dict
:
2485 info_dict
['format'] = info_dict
['ext']
2487 if self
._match
_entry
(info_dict
) is not None:
2490 self
.post_extract(info_dict
)
2491 self
._num
_downloads
+= 1
2493 # info_dict['_filename'] needs to be set for backward compatibility
2494 info_dict
['_filename'] = full_filename
= self
.prepare_filename(info_dict
, warn
=True)
2495 temp_filename
= self
.prepare_filename(info_dict
, 'temp')
2499 self
.__forced
_printings
(info_dict
, full_filename
, incomplete
=('format' not in info_dict
))
2501 if self
.params
.get('simulate'):
2502 if self
.params
.get('force_write_download_archive', False):
2503 self
.record_download_archive(info_dict
)
2505 # Do nothing else if in simulate mode
2508 if full_filename
is None:
2511 if not self
._ensure
_dir
_exists
(encodeFilename(full_filename
)):
2513 if not self
._ensure
_dir
_exists
(encodeFilename(temp_filename
)):
2516 if self
.params
.get('writedescription', False):
2517 descfn
= self
.prepare_filename(info_dict
, 'description')
2518 if not self
._ensure
_dir
_exists
(encodeFilename(descfn
)):
2520 if not self
.params
.get('overwrites', True) and os
.path
.exists(encodeFilename(descfn
)):
2521 self
.to_screen('[info] Video description is already present')
2522 elif info_dict
.get('description') is None:
2523 self
.report_warning('There\'s no description to write.')
2526 self
.to_screen('[info] Writing video description to: ' + descfn
)
2527 with io
.open(encodeFilename(descfn
), 'w', encoding
='utf-8') as descfile
:
2528 descfile
.write(info_dict
['description'])
2529 except (OSError, IOError):
2530 self
.report_error('Cannot write description file ' + descfn
)
2533 if self
.params
.get('writeannotations', False):
2534 annofn
= self
.prepare_filename(info_dict
, 'annotation')
2535 if not self
._ensure
_dir
_exists
(encodeFilename(annofn
)):
2537 if not self
.params
.get('overwrites', True) and os
.path
.exists(encodeFilename(annofn
)):
2538 self
.to_screen('[info] Video annotations are already present')
2539 elif not info_dict
.get('annotations'):
2540 self
.report_warning('There are no annotations to write.')
2543 self
.to_screen('[info] Writing video annotations to: ' + annofn
)
2544 with io
.open(encodeFilename(annofn
), 'w', encoding
='utf-8') as annofile
:
2545 annofile
.write(info_dict
['annotations'])
2546 except (KeyError, TypeError):
2547 self
.report_warning('There are no annotations to write.')
2548 except (OSError, IOError):
2549 self
.report_error('Cannot write annotations file: ' + annofn
)
2552 subtitles_are_requested
= any([self
.params
.get('writesubtitles', False),
2553 self
.params
.get('writeautomaticsub')])
2555 if subtitles_are_requested
and info_dict
.get('requested_subtitles'):
2556 # subtitles download errors are already managed as troubles in relevant IE
2557 # that way it will silently go on when used with unsupporting IE
2558 subtitles
= info_dict
['requested_subtitles']
2559 # ie = self.get_info_extractor(info_dict['extractor_key'])
2560 for sub_lang
, sub_info
in subtitles
.items():
2561 sub_format
= sub_info
['ext']
2562 sub_filename
= subtitles_filename(temp_filename
, sub_lang
, sub_format
, info_dict
.get('ext'))
2563 sub_filename_final
= subtitles_filename(
2564 self
.prepare_filename(info_dict
, 'subtitle'), sub_lang
, sub_format
, info_dict
.get('ext'))
2565 if not self
.params
.get('overwrites', True) and os
.path
.exists(encodeFilename(sub_filename
)):
2566 self
.to_screen('[info] Video subtitle %s.%s is already present' % (sub_lang
, sub_format
))
2567 sub_info
['filepath'] = sub_filename
2568 files_to_move
[sub_filename
] = sub_filename_final
2570 self
.to_screen('[info] Writing video subtitles to: ' + sub_filename
)
2571 if sub_info
.get('data') is not None:
2573 # Use newline='' to prevent conversion of newline characters
2574 # See https://github.com/ytdl-org/youtube-dl/issues/10268
2575 with io
.open(encodeFilename(sub_filename
), 'w', encoding
='utf-8', newline
='') as subfile
:
2576 subfile
.write(sub_info
['data'])
2577 sub_info
['filepath'] = sub_filename
2578 files_to_move
[sub_filename
] = sub_filename_final
2579 except (OSError, IOError):
2580 self
.report_error('Cannot write subtitles file ' + sub_filename
)
2584 self
.dl(sub_filename
, sub_info
.copy(), subtitle
=True)
2585 sub_info
['filepath'] = sub_filename
2586 files_to_move
[sub_filename
] = sub_filename_final
2587 except (ExtractorError
, IOError, OSError, ValueError) + network_exceptions
as err
:
2588 self
.report_warning('Unable to download subtitle for "%s": %s' %
2589 (sub_lang
, error_to_compat_str(err
)))
2592 if self
.params
.get('writeinfojson', False):
2593 infofn
= self
.prepare_filename(info_dict
, 'infojson')
2594 if not self
._ensure
_dir
_exists
(encodeFilename(infofn
)):
2596 if not self
.params
.get('overwrites', True) and os
.path
.exists(encodeFilename(infofn
)):
2597 self
.to_screen('[info] Video metadata is already present')
2599 self
.to_screen('[info] Writing video metadata as JSON to: ' + infofn
)
2601 write_json_file(self
.sanitize_info(info_dict
, self
.params
.get('clean_infojson', True)), infofn
)
2602 except (OSError, IOError):
2603 self
.report_error('Cannot write video metadata to JSON file ' + infofn
)
2605 info_dict
['__infojson_filename'] = infofn
2607 for thumb_ext
in self
._write
_thumbnails
(info_dict
, temp_filename
):
2608 thumb_filename_temp
= replace_extension(temp_filename
, thumb_ext
, info_dict
.get('ext'))
2609 thumb_filename
= replace_extension(
2610 self
.prepare_filename(info_dict
, 'thumbnail'), thumb_ext
, info_dict
.get('ext'))
2611 files_to_move
[thumb_filename_temp
] = thumb_filename
2613 # Write internet shortcut files
2614 url_link
= webloc_link
= desktop_link
= False
2615 if self
.params
.get('writelink', False):
2616 if sys
.platform
== "darwin": # macOS.
2618 elif sys
.platform
.startswith("linux"):
2620 else: # if sys.platform in ['win32', 'cygwin']:
2622 if self
.params
.get('writeurllink', False):
2624 if self
.params
.get('writewebloclink', False):
2626 if self
.params
.get('writedesktoplink', False):
2629 if url_link
or webloc_link
or desktop_link
:
2630 if 'webpage_url' not in info_dict
:
2631 self
.report_error('Cannot write internet shortcut file because the "webpage_url" field is missing in the media information')
2633 ascii_url
= iri_to_uri(info_dict
['webpage_url'])
2635 def _write_link_file(extension
, template
, newline
, embed_filename
):
2636 linkfn
= replace_extension(full_filename
, extension
, info_dict
.get('ext'))
2637 if self
.params
.get('overwrites', True) and os
.path
.exists(encodeFilename(linkfn
)):
2638 self
.to_screen('[info] Internet shortcut is already present')
2641 self
.to_screen('[info] Writing internet shortcut to: ' + linkfn
)
2642 with io
.open(encodeFilename(to_high_limit_path(linkfn
)), 'w', encoding
='utf-8', newline
=newline
) as linkfile
:
2643 template_vars
= {'url': ascii_url}
2645 template_vars
['filename'] = linkfn
[:-(len(extension
) + 1)]
2646 linkfile
.write(template
% template_vars
)
2647 except (OSError, IOError):
2648 self
.report_error('Cannot write internet shortcut ' + linkfn
)
2653 if not _write_link_file('url', DOT_URL_LINK_TEMPLATE
, '\r\n', embed_filename
=False):
2656 if not _write_link_file('webloc', DOT_WEBLOC_LINK_TEMPLATE
, '\n', embed_filename
=False):
2659 if not _write_link_file('desktop', DOT_DESKTOP_LINK_TEMPLATE
, '\n', embed_filename
=True):
2663 info_dict
, files_to_move
= self
.pre_process(info_dict
, 'before_dl', files_to_move
)
2664 except PostProcessingError
as err
:
2665 self
.report_error('Preprocessing: %s' % str(err
))
2668 must_record_download_archive
= False
2669 if self
.params
.get('skip_download', False):
2670 info_dict
['filepath'] = temp_filename
2671 info_dict
['__finaldir'] = os
.path
.dirname(os
.path
.abspath(encodeFilename(full_filename
)))
2672 info_dict
['__files_to_move'] = files_to_move
2673 info_dict
= self
.run_pp(MoveFilesAfterDownloadPP(self
, False), info_dict
)
2676 info_dict
.setdefault('__postprocessors', [])
2679 def existing_file(*filepaths
):
2680 ext
= info_dict
.get('ext')
2681 final_ext
= self
.params
.get('final_ext', ext
)
2683 for file in orderedSet(filepaths
):
2684 if final_ext
!= ext
:
2685 converted
= replace_extension(file, final_ext
, ext
)
2686 if os
.path
.exists(encodeFilename(converted
)):
2687 existing_files
.append(converted
)
2688 if os
.path
.exists(encodeFilename(file)):
2689 existing_files
.append(file)
2691 if not existing_files
or self
.params
.get('overwrites', False):
2692 for file in orderedSet(existing_files
):
2693 self
.report_file_delete(file)
2694 os
.remove(encodeFilename(file))
2697 info_dict
['ext'] = os
.path
.splitext(existing_files
[0])[1][1:]
2698 return existing_files
[0]
2701 if info_dict
.get('requested_formats') is not None:
2703 def compatible_formats(formats
):
2704 # TODO: some formats actually allow this (mkv, webm, ogg, mp4), but not all of them.
2705 video_formats
= [format
for format
in formats
if format
.get('vcodec') != 'none']
2706 audio_formats
= [format
for format
in formats
if format
.get('acodec') != 'none']
2707 if len(video_formats
) > 2 or len(audio_formats
) > 2:
2711 exts
= set(format
.get('ext') for format
in formats
)
2713 set(('mp3', 'mp4', 'm4a', 'm4p', 'm4b', 'm4r', 'm4v', 'ismv', 'isma')),
2716 for ext_sets
in COMPATIBLE_EXTS
:
2717 if ext_sets
.issuperset(exts
):
2719 # TODO: Check acodec/vcodec
2722 requested_formats
= info_dict
['requested_formats']
2723 old_ext
= info_dict
['ext']
2724 if self
.params
.get('merge_output_format') is None and not compatible_formats(requested_formats
):
2725 info_dict
['ext'] = 'mkv'
2726 self
.report_warning(
2727 'Requested formats are incompatible for merge and will be merged into mkv.')
2728 new_ext
= info_dict
['ext']
2730 def correct_ext(filename
, ext
=new_ext
):
2733 filename_real_ext
= os
.path
.splitext(filename
)[1][1:]
2735 os
.path
.splitext(filename
)[0]
2736 if filename_real_ext
in (old_ext
, new_ext
)
2738 return '%s.%s' % (filename_wo_ext
, ext
)
2740 # Ensure filename always has a correct extension for successful merge
2741 full_filename
= correct_ext(full_filename
)
2742 temp_filename
= correct_ext(temp_filename
)
2743 dl_filename
= existing_file(full_filename
, temp_filename
)
2744 info_dict
['__real_download'] = False
2746 _protocols
= set(determine_protocol(f
) for f
in requested_formats
)
2747 if len(_protocols
) == 1: # All requested formats have same protocol
2748 info_dict
['protocol'] = _protocols
.pop()
2749 directly_mergable
= FFmpegFD
.can_merge_formats(info_dict
)
2750 if dl_filename
is not None:
2751 self
.report_file_already_downloaded(dl_filename
)
2752 elif (directly_mergable
and get_suitable_downloader(
2753 info_dict
, self
.params
, to_stdout
=(temp_filename
== '-')) == FFmpegFD
):
2754 info_dict
['url'] = '\n'.join(f
['url'] for f
in requested_formats
)
2755 success
, real_download
= self
.dl(temp_filename
, info_dict
)
2756 info_dict
['__real_download'] = real_download
2759 merger
= FFmpegMergerPP(self
)
2760 if self
.params
.get('allow_unplayable_formats'):
2761 self
.report_warning(
2762 'You have requested merging of multiple formats '
2763 'while also allowing unplayable formats to be downloaded. '
2764 'The formats won\'t be merged to prevent data corruption.')
2765 elif not merger
.available
:
2766 self
.report_warning(
2767 'You have requested merging of multiple formats but ffmpeg is not installed. '
2768 'The formats won\'t be merged.')
2770 if temp_filename
== '-':
2771 reason
= ('using a downloader other than ffmpeg' if directly_mergable
2772 else 'but the formats are incompatible for simultaneous download' if merger
.available
2773 else 'but ffmpeg is not installed')
2774 self
.report_warning(
2775 f
'You have requested downloading multiple formats to stdout {reason}. '
2776 'The formats will be streamed one after the other')
2777 fname
= temp_filename
2778 for f
in requested_formats
:
2779 new_info
= dict(info_dict
)
2780 del new_info
['requested_formats']
2782 if temp_filename
!= '-':
2783 fname
= prepend_extension(
2784 correct_ext(temp_filename
, new_info
['ext']),
2785 'f%s' % f
['format_id'], new_info
['ext'])
2786 if not self
._ensure
_dir
_exists
(fname
):
2788 downloaded
.append(fname
)
2789 partial_success
, real_download
= self
.dl(fname
, new_info
)
2790 info_dict
['__real_download'] = info_dict
['__real_download'] or real_download
2791 success
= success
and partial_success
2792 if merger
.available
and not self
.params
.get('allow_unplayable_formats'):
2793 info_dict
['__postprocessors'].append(merger
)
2794 info_dict
['__files_to_merge'] = downloaded
2795 # Even if there were no downloads, it is being merged only now
2796 info_dict
['__real_download'] = True
2798 for file in downloaded
:
2799 files_to_move
[file] = None
2801 # Just a single file
2802 dl_filename
= existing_file(full_filename
, temp_filename
)
2803 if dl_filename
is None or dl_filename
== temp_filename
:
2804 # dl_filename == temp_filename could mean that the file was partially downloaded with --no-part.
2805 # So we should try to resume the download
2806 success
, real_download
= self
.dl(temp_filename
, info_dict
)
2807 info_dict
['__real_download'] = real_download
2809 self
.report_file_already_downloaded(dl_filename
)
2811 dl_filename
= dl_filename
or temp_filename
2812 info_dict
['__finaldir'] = os
.path
.dirname(os
.path
.abspath(encodeFilename(full_filename
)))
2814 except network_exceptions
as err
:
2815 self
.report_error('unable to download video data: %s' % error_to_compat_str(err
))
2817 except (OSError, IOError) as err
:
2818 raise UnavailableVideoError(err
)
2819 except (ContentTooShortError
, ) as err
:
2820 self
.report_error('content too short (expected %s bytes and served %s)' % (err
.expected
, err
.downloaded
))
2823 if success
and full_filename
!= '-':
2827 fixup_policy
= self
.params
.get('fixup')
2828 vid
= info_dict
['id']
2830 if fixup_policy
in ('ignore', 'never'):
2832 elif fixup_policy
== 'warn':
2834 elif fixup_policy
!= 'force':
2835 assert fixup_policy
in ('detect_or_warn', None)
2836 if not info_dict
.get('__real_download'):
2839 def ffmpeg_fixup(cndn
, msg
, cls
):
2843 self
.report_warning(f
'{vid}: {msg}')
2847 info_dict
['__postprocessors'].append(pp
)
2849 self
.report_warning(f
'{vid}: {msg}. Install ffmpeg to fix this automatically')
2851 stretched_ratio
= info_dict
.get('stretched_ratio')
2853 stretched_ratio
not in (1, None),
2854 f
'Non-uniform pixel ratio {stretched_ratio}',
2855 FFmpegFixupStretchedPP
)
2858 (info_dict
.get('requested_formats') is None
2859 and info_dict
.get('container') == 'm4a_dash'
2860 and info_dict
.get('ext') == 'm4a'),
2861 'writing DASH m4a. Only some players support this container',
2864 downloader
= (get_suitable_downloader(info_dict
, self
.params
).__name
__
2865 if 'protocol' in info_dict
else None)
2866 ffmpeg_fixup(downloader
== 'HlsFD', 'malformed AAC bitstream detected', FFmpegFixupM3u8PP
)
2867 ffmpeg_fixup(downloader
== 'WebSocketFragmentFD', 'malformed timestamps detected', FFmpegFixupTimestampPP
)
2868 ffmpeg_fixup(downloader
== 'WebSocketFragmentFD', 'malformed duration detected', FFmpegFixupDurationPP
)
2872 info_dict
= self
.post_process(dl_filename
, info_dict
, files_to_move
)
2873 except PostProcessingError
as err
:
2874 self
.report_error('Postprocessing: %s' % str(err
))
2877 for ph
in self
._post
_hooks
:
2878 ph(info_dict
['filepath'])
2879 except Exception as err
:
2880 self
.report_error('post hooks: %s' % str(err
))
2882 must_record_download_archive
= True
2884 if must_record_download_archive
or self
.params
.get('force_write_download_archive', False):
2885 self
.record_download_archive(info_dict
)
2886 max_downloads
= self
.params
.get('max_downloads')
2887 if max_downloads
is not None and self
._num
_downloads
>= int(max_downloads
):
2888 raise MaxDownloadsReached()
2890 def download(self
, url_list
):
2891 """Download a given list of URLs."""
2892 outtmpl
= self
.outtmpl_dict
['default']
2893 if (len(url_list
) > 1
2895 and '%' not in outtmpl
2896 and self
.params
.get('max_downloads') != 1):
2897 raise SameFileError(outtmpl
)
2899 for url
in url_list
:
2901 # It also downloads the videos
2902 res
= self
.extract_info(
2903 url
, force_generic_extractor
=self
.params
.get('force_generic_extractor', False))
2904 except UnavailableVideoError
:
2905 self
.report_error('unable to download video')
2906 except MaxDownloadsReached
:
2907 self
.to_screen('[info] Maximum number of downloads reached')
2909 except ExistingVideoReached
:
2910 self
.to_screen('[info] Encountered a video that is already in the archive, stopping due to --break-on-existing')
2912 except RejectedVideoReached
:
2913 self
.to_screen('[info] Encountered a video that did not match filter, stopping due to --break-on-reject')
2916 if self
.params
.get('dump_single_json', False):
2917 self
.post_extract(res
)
2918 self
.to_stdout(json
.dumps(self
.sanitize_info(res
)))
2920 return self
._download
_retcode
2922 def download_with_info_file(self
, info_filename
):
2923 with contextlib
.closing(fileinput
.FileInput(
2924 [info_filename
], mode
='r',
2925 openhook
=fileinput
.hook_encoded('utf-8'))) as f
:
2926 # FileInput doesn't have a read method, we can't call json.load
2927 info
= self
.sanitize_info(json
.loads('\n'.join(f
)), self
.params
.get('clean_infojson', True))
2929 self
.process_ie_result(info
, download
=True)
2930 except (DownloadError
, EntryNotInPlaylist
, ThrottledDownload
):
2931 webpage_url
= info
.get('webpage_url')
2932 if webpage_url
is not None:
2933 self
.report_warning('The info failed to download, trying with "%s"' % webpage_url
)
2934 return self
.download([webpage_url
])
2937 return self
._download
_retcode
2940 def sanitize_info(info_dict
, remove_private_keys
=False):
2941 ''' Sanitize the infodict for converting to json '''
2942 if info_dict
is None:
2944 info_dict
.setdefault('epoch', int(time
.time()))
2945 remove_keys
= {'__original_infodict'}
# Always remove this since this may contain a copy of the entire dict
2946 keep_keys
= ['_type'], # Always keep this to facilitate load-info-json
2947 if remove_private_keys
:
2949 'requested_formats', 'requested_subtitles', 'requested_entries',
2950 'filepath', 'entries', 'original_url', 'playlist_autonumber',
2952 empty_values
= (None, {}, [], set(), tuple())
2953 reject
= lambda k
, v
: k
not in keep_keys
and (
2954 k
.startswith('_') or k
in remove_keys
or v
in empty_values
)
2956 reject
= lambda k
, v
: k
in remove_keys
2957 filter_fn
= lambda obj
: (
2958 list(map(filter_fn
, obj
)) if isinstance(obj
, (LazyList
, list, tuple, set))
2959 else obj
if not isinstance(obj
, dict)
2960 else dict((k
, filter_fn(v
)) for k
, v
in obj
.items() if not reject(k
, v
)))
2961 return filter_fn(info_dict
)
2964 def filter_requested_info(info_dict
, actually_filter
=True):
2965 ''' Alias of sanitize_info for backward compatibility '''
2966 return YoutubeDL
.sanitize_info(info_dict
, actually_filter
)
2968 def run_pp(self
, pp
, infodict
):
2969 files_to_delete
= []
2970 if '__files_to_move' not in infodict
:
2971 infodict
['__files_to_move'] = {}
2972 files_to_delete
, infodict
= pp
.run(infodict
)
2973 if not files_to_delete
:
2976 if self
.params
.get('keepvideo', False):
2977 for f
in files_to_delete
:
2978 infodict
['__files_to_move'].setdefault(f
, '')
2980 for old_filename
in set(files_to_delete
):
2981 self
.to_screen('Deleting original file %s (pass -k to keep)' % old_filename
)
2983 os
.remove(encodeFilename(old_filename
))
2984 except (IOError, OSError):
2985 self
.report_warning('Unable to remove downloaded original file')
2986 if old_filename
in infodict
['__files_to_move']:
2987 del infodict
['__files_to_move'][old_filename
]
2991 def post_extract(info_dict
):
2992 def actual_post_extract(info_dict
):
2993 if info_dict
.get('_type') in ('playlist', 'multi_video'):
2994 for video_dict
in info_dict
.get('entries', {}):
2995 actual_post_extract(video_dict
or {})
2998 post_extractor
= info_dict
.get('__post_extractor') or (lambda: {})
2999 extra
= post_extractor().items()
3000 info_dict
.update(extra
)
3001 info_dict
.pop('__post_extractor', None)
3003 original_infodict
= info_dict
.get('__original_infodict') or {}
3004 original_infodict
.update(extra
)
3005 original_infodict
.pop('__post_extractor', None)
3007 actual_post_extract(info_dict
or {})
3009 def pre_process(self
, ie_info
, key
='pre_process', files_to_move
=None):
3010 info
= dict(ie_info
)
3011 info
['__files_to_move'] = files_to_move
or {}
3012 for pp
in self
._pps
[key
]:
3013 info
= self
.run_pp(pp
, info
)
3014 return info
, info
.pop('__files_to_move', None)
3016 def post_process(self
, filename
, ie_info
, files_to_move
=None):
3017 """Run all the postprocessors on the given file."""
3018 info
= dict(ie_info
)
3019 info
['filepath'] = filename
3020 info
['__files_to_move'] = files_to_move
or {}
3022 for pp
in ie_info
.get('__postprocessors', []) + self
._pps
['post_process']:
3023 info
= self
.run_pp(pp
, info
)
3024 info
= self
.run_pp(MoveFilesAfterDownloadPP(self
), info
)
3025 del info
['__files_to_move']
3026 for pp
in self
._pps
['after_move']:
3027 info
= self
.run_pp(pp
, info
)
3030 def _make_archive_id(self
, info_dict
):
3031 video_id
= info_dict
.get('id')
3034 # Future-proof against any change in case
3035 # and backwards compatibility with prior versions
3036 extractor
= info_dict
.get('extractor_key') or info_dict
.get('ie_key') # key in a playlist
3037 if extractor
is None:
3038 url
= str_or_none(info_dict
.get('url'))
3041 # Try to find matching extractor for the URL and take its ie_key
3042 for ie_key
, ie
in self
._ies
.items():
3043 if ie
.suitable(url
):
3048 return '%s %s' % (extractor
.lower(), video_id
)
3050 def in_download_archive(self
, info_dict
):
3051 fn
= self
.params
.get('download_archive')
3055 vid_id
= self
._make
_archive
_id
(info_dict
)
3057 return False # Incomplete video information
3059 return vid_id
in self
.archive
3061 def record_download_archive(self
, info_dict
):
3062 fn
= self
.params
.get('download_archive')
3065 vid_id
= self
._make
_archive
_id
(info_dict
)
3067 with locked_file(fn
, 'a', encoding
='utf-8') as archive_file
:
3068 archive_file
.write(vid_id
+ '\n')
3069 self
.archive
.add(vid_id
)
3072 def format_resolution(format
, default
='unknown'):
3073 if format
.get('vcodec') == 'none':
3074 if format
.get('acodec') == 'none':
3077 if format
.get('resolution') is not None:
3078 return format
['resolution']
3079 if format
.get('width') and format
.get('height'):
3080 res
= '%dx%d' % (format
['width'], format
['height'])
3081 elif format
.get('height'):
3082 res
= '%sp' % format
['height']
3083 elif format
.get('width'):
3084 res
= '%dx?' % format
['width']
3089 def _format_note(self
, fdict
):
3091 if fdict
.get('ext') in ['f4f', 'f4m']:
3092 res
+= '(unsupported) '
3093 if fdict
.get('language'):
3096 res
+= '[%s] ' % fdict
['language']
3097 if fdict
.get('format_note') is not None:
3098 res
+= fdict
['format_note'] + ' '
3099 if fdict
.get('tbr') is not None:
3100 res
+= '%4dk ' % fdict
['tbr']
3101 if fdict
.get('container') is not None:
3104 res
+= '%s container' % fdict
['container']
3105 if (fdict
.get('vcodec') is not None
3106 and fdict
.get('vcodec') != 'none'):
3109 res
+= fdict
['vcodec']
3110 if fdict
.get('vbr') is not None:
3112 elif fdict
.get('vbr') is not None and fdict
.get('abr') is not None:
3114 if fdict
.get('vbr') is not None:
3115 res
+= '%4dk' % fdict
['vbr']
3116 if fdict
.get('fps') is not None:
3119 res
+= '%sfps' % fdict
['fps']
3120 if fdict
.get('acodec') is not None:
3123 if fdict
['acodec'] == 'none':
3126 res
+= '%-5s' % fdict
['acodec']
3127 elif fdict
.get('abr') is not None:
3131 if fdict
.get('abr') is not None:
3132 res
+= '@%3dk' % fdict
['abr']
3133 if fdict
.get('asr') is not None:
3134 res
+= ' (%5dHz)' % fdict
['asr']
3135 if fdict
.get('filesize') is not None:
3138 res
+= format_bytes(fdict
['filesize'])
3139 elif fdict
.get('filesize_approx') is not None:
3142 res
+= '~' + format_bytes(fdict
['filesize_approx'])
3145 def list_formats(self
, info_dict
):
3146 formats
= info_dict
.get('formats', [info_dict
])
3148 'list-formats' not in self
.params
.get('compat_opts', [])
3149 and self
.params
.get('listformats_table', True) is not False)
3153 format_field(f
, 'format_id'),
3154 format_field(f
, 'ext'),
3155 self
.format_resolution(f
),
3156 format_field(f
, 'fps', '%d'),
3158 format_field(f
, 'filesize', ' %s', func
=format_bytes
) + format_field(f
, 'filesize_approx', '~%s', func
=format_bytes
),
3159 format_field(f
, 'tbr', '%4dk'),
3160 shorten_protocol_name(f
.get('protocol', '').replace("native", "n")),
3162 format_field(f
, 'vcodec', default
='unknown').replace('none', ''),
3163 format_field(f
, 'vbr', '%4dk'),
3164 format_field(f
, 'acodec', default
='unknown').replace('none', ''),
3165 format_field(f
, 'abr', '%3dk'),
3166 format_field(f
, 'asr', '%5dHz'),
3167 ', '.join(filter(None, (
3168 'UNSUPPORTED' if f
.get('ext') in ('f4f', 'f4m') else '',
3169 format_field(f
, 'language', '[%s]'),
3170 format_field(f
, 'format_note'),
3171 format_field(f
, 'container', ignore
=(None, f
.get('ext'))),
3173 ] for f
in formats
if f
.get('preference') is None or f
['preference'] >= -1000]
3174 header_line
= ['ID', 'EXT', 'RESOLUTION', 'FPS', '|', ' FILESIZE', ' TBR', 'PROTO',
3175 '|', 'VCODEC', ' VBR', 'ACODEC', ' ABR', ' ASR', 'MORE INFO']
3179 format_field(f
, 'format_id'),
3180 format_field(f
, 'ext'),
3181 self
.format_resolution(f
),
3182 self
._format
_note
(f
)]
3184 if f
.get('preference') is None or f
['preference'] >= -1000]
3185 header_line
= ['format code', 'extension', 'resolution', 'note']
3188 '[info] Available formats for %s:' % info_dict
['id'])
3189 self
.to_stdout(render_table(
3190 header_line
, table
, delim
=new_format
, extraGap
=(0 if new_format
else 1), hideEmpty
=new_format
))
3192 def list_thumbnails(self
, info_dict
):
3193 thumbnails
= list(info_dict
.get('thumbnails'))
3195 self
.to_screen('[info] No thumbnails present for %s' % info_dict
['id'])
3199 '[info] Thumbnails for %s:' % info_dict
['id'])
3200 self
.to_stdout(render_table(
3201 ['ID', 'width', 'height', 'URL'],
3202 [[t
['id'], t
.get('width', 'unknown'), t
.get('height', 'unknown'), t
['url']] for t
in thumbnails
]))
3204 def list_subtitles(self
, video_id
, subtitles
, name
='subtitles'):
3206 self
.to_screen('%s has no %s' % (video_id
, name
))
3209 'Available %s for %s:' % (name
, video_id
))
3211 def _row(lang
, formats
):
3212 exts
, names
= zip(*((f
['ext'], f
.get('name') or 'unknown') for f
in reversed(formats
)))
3213 if len(set(names
)) == 1:
3214 names
= [] if names
[0] == 'unknown' else names
[:1]
3215 return [lang
, ', '.join(names
), ', '.join(exts
)]
3217 self
.to_stdout(render_table(
3218 ['Language', 'Name', 'Formats'],
3219 [_row(lang
, formats
) for lang
, formats
in subtitles
.items()],
3222 def urlopen(self
, req
):
3223 """ Start an HTTP download """
3224 if isinstance(req
, compat_basestring
):
3225 req
= sanitized_Request(req
)
3226 return self
._opener
.open(req
, timeout
=self
._socket
_timeout
)
3228 def print_debug_header(self
):
3229 if not self
.params
.get('verbose'):
3232 stdout_encoding
= getattr(
3233 sys
.stdout
, 'encoding', 'missing (%s)' % type(sys
.stdout
).__name
__)
3235 '[debug] Encodings: locale %s, fs %s, out %s, pref %s\n' % (
3236 locale
.getpreferredencoding(),
3237 sys
.getfilesystemencoding(),
3239 self
.get_encoding()))
3240 write_string(encoding_str
, encoding
=None)
3243 '(exe)' if hasattr(sys
, 'frozen')
3244 else '(zip)' if isinstance(globals().get('__loader__'), zipimporter
)
3245 else '(source)' if os
.path
.basename(sys
.argv
[0]) == '__main__.py'
3247 self
._write
_string
('[debug] yt-dlp version %s %s\n' % (__version__
, source
))
3249 self
._write
_string
('[debug] Lazy loading extractors enabled\n')
3252 '[debug] Plugin Extractors: %s\n' % [ie
.ie_key() for ie
in _PLUGIN_CLASSES
])
3253 if self
.params
.get('compat_opts'):
3255 '[debug] Compatibility options: %s\n' % ', '.join(self
.params
.get('compat_opts')))
3257 sp
= subprocess
.Popen(
3258 ['git', 'rev-parse', '--short', 'HEAD'],
3259 stdout
=subprocess
.PIPE
, stderr
=subprocess
.PIPE
,
3260 cwd
=os
.path
.dirname(os
.path
.abspath(__file__
)))
3261 out
, err
= process_communicate_or_kill(sp
)
3262 out
= out
.decode().strip()
3263 if re
.match('[0-9a-f]+', out
):
3264 self
._write
_string
('[debug] Git HEAD: %s\n' % out
)
3271 def python_implementation():
3272 impl_name
= platform
.python_implementation()
3273 if impl_name
== 'PyPy' and hasattr(sys
, 'pypy_version_info'):
3274 return impl_name
+ ' version %d.%d.%d' % sys
.pypy_version_info
[:3]
3277 self
._write
_string
('[debug] Python version %s (%s %s) - %s\n' % (
3278 platform
.python_version(),
3279 python_implementation(),
3280 platform
.architecture()[0],
3283 exe_versions
= FFmpegPostProcessor
.get_versions(self
)
3284 exe_versions
['rtmpdump'] = rtmpdump_version()
3285 exe_versions
['phantomjs'] = PhantomJSwrapper
._version
()
3286 exe_str
= ', '.join(
3287 f
'{exe} {v}' for exe
, v
in sorted(exe_versions
.items()) if v
3289 self
._write
_string
('[debug] exe versions: %s\n' % exe_str
)
3291 from .downloader
.fragment
import can_decrypt_frag
3292 from .downloader
.websocket
import has_websockets
3293 from .postprocessor
.embedthumbnail
import has_mutagen
3294 from .cookies
import SQLITE_AVAILABLE
, KEYRING_AVAILABLE
3296 lib_str
= ', '.join(sorted(filter(None, (
3297 can_decrypt_frag
and 'pycryptodome',
3298 has_websockets
and 'websockets',
3299 has_mutagen
and 'mutagen',
3300 SQLITE_AVAILABLE
and 'sqlite',
3301 KEYRING_AVAILABLE
and 'keyring',
3303 self
._write
_string
('[debug] Optional libraries: %s\n' % lib_str
)
3306 for handler
in self
._opener
.handlers
:
3307 if hasattr(handler
, 'proxies'):
3308 proxy_map
.update(handler
.proxies
)
3309 self
._write
_string
('[debug] Proxy map: ' + compat_str(proxy_map
) + '\n')
3311 if self
.params
.get('call_home', False):
3312 ipaddr
= self
.urlopen('https://yt-dl.org/ip').read().decode('utf-8')
3313 self
._write
_string
('[debug] Public IP address: %s\n' % ipaddr
)
3315 latest_version
= self
.urlopen(
3316 'https://yt-dl.org/latest/version').read().decode('utf-8')
3317 if version_tuple(latest_version
) > version_tuple(__version__
):
3318 self
.report_warning(
3319 'You are using an outdated version (newest version: %s)! '
3320 'See https://yt-dl.org/update if you need help updating.' %
3323 def _setup_opener(self
):
3324 timeout_val
= self
.params
.get('socket_timeout')
3325 self
._socket
_timeout
= 600 if timeout_val
is None else float(timeout_val
)
3327 opts_cookiesfrombrowser
= self
.params
.get('cookiesfrombrowser')
3328 opts_cookiefile
= self
.params
.get('cookiefile')
3329 opts_proxy
= self
.params
.get('proxy')
3331 self
.cookiejar
= load_cookies(opts_cookiefile
, opts_cookiesfrombrowser
, self
)
3333 cookie_processor
= YoutubeDLCookieProcessor(self
.cookiejar
)
3334 if opts_proxy
is not None:
3335 if opts_proxy
== '':
3338 proxies
= {'http': opts_proxy, 'https': opts_proxy}
3340 proxies
= compat_urllib_request
.getproxies()
3341 # Set HTTPS proxy to HTTP one if given (https://github.com/ytdl-org/youtube-dl/issues/805)
3342 if 'http' in proxies
and 'https' not in proxies
:
3343 proxies
['https'] = proxies
['http']
3344 proxy_handler
= PerRequestProxyHandler(proxies
)
3346 debuglevel
= 1 if self
.params
.get('debug_printtraffic') else 0
3347 https_handler
= make_HTTPS_handler(self
.params
, debuglevel
=debuglevel
)
3348 ydlh
= YoutubeDLHandler(self
.params
, debuglevel
=debuglevel
)
3349 redirect_handler
= YoutubeDLRedirectHandler()
3350 data_handler
= compat_urllib_request_DataHandler()
3352 # When passing our own FileHandler instance, build_opener won't add the
3353 # default FileHandler and allows us to disable the file protocol, which
3354 # can be used for malicious purposes (see
3355 # https://github.com/ytdl-org/youtube-dl/issues/8227)
3356 file_handler
= compat_urllib_request
.FileHandler()
3358 def file_open(*args
, **kwargs
):
3359 raise compat_urllib_error
.URLError('file:// scheme is explicitly disabled in yt-dlp for security reasons')
3360 file_handler
.file_open
= file_open
3362 opener
= compat_urllib_request
.build_opener(
3363 proxy_handler
, https_handler
, cookie_processor
, ydlh
, redirect_handler
, data_handler
, file_handler
)
3365 # Delete the default user-agent header, which would otherwise apply in
3366 # cases where our custom HTTP handler doesn't come into play
3367 # (See https://github.com/ytdl-org/youtube-dl/issues/1309 for details)
3368 opener
.addheaders
= []
3369 self
._opener
= opener
3371 def encode(self
, s
):
3372 if isinstance(s
, bytes):
3373 return s
# Already encoded
3376 return s
.encode(self
.get_encoding())
3377 except UnicodeEncodeError as err
:
3378 err
.reason
= err
.reason
+ '. Check your system encoding configuration or use the --encoding option.'
3381 def get_encoding(self
):
3382 encoding
= self
.params
.get('encoding')
3383 if encoding
is None:
3384 encoding
= preferredencoding()
3387 def _write_thumbnails(self
, info_dict
, filename
): # return the extensions
3388 write_all
= self
.params
.get('write_all_thumbnails', False)
3390 if write_all
or self
.params
.get('writethumbnail', False):
3391 thumbnails
= info_dict
.get('thumbnails') or []
3392 multiple
= write_all
and len(thumbnails
) > 1
3395 for t
in thumbnails
[::-1]:
3396 thumb_ext
= determine_ext(t
['url'], 'jpg')
3397 suffix
= '%s.' % t
['id'] if multiple
else ''
3398 thumb_display_id
= '%s ' % t
['id'] if multiple
else ''
3399 thumb_filename
= replace_extension(filename
, suffix
+ thumb_ext
, info_dict
.get('ext'))
3401 if not self
.params
.get('overwrites', True) and os
.path
.exists(encodeFilename(thumb_filename
)):
3402 ret
.append(suffix
+ thumb_ext
)
3403 t
['filepath'] = thumb_filename
3404 self
.to_screen('[%s] %s: Thumbnail %sis already present' %
3405 (info_dict
['extractor'], info_dict
['id'], thumb_display_id
))
3407 self
.to_screen('[%s] %s: Downloading thumbnail %s ...' %
3408 (info_dict
['extractor'], info_dict
['id'], thumb_display_id
))
3410 uf
= self
.urlopen(t
['url'])
3411 with open(encodeFilename(thumb_filename
), 'wb') as thumbf
:
3412 shutil
.copyfileobj(uf
, thumbf
)
3413 ret
.append(suffix
+ thumb_ext
)
3414 self
.to_screen('[%s] %s: Writing thumbnail %sto: %s' %
3415 (info_dict
['extractor'], info_dict
['id'], thumb_display_id
, thumb_filename
))
3416 t
['filepath'] = thumb_filename
3417 except network_exceptions
as err
:
3418 self
.report_warning('Unable to download thumbnail "%s": %s' %
3419 (t
['url'], error_to_compat_str(err
)))
3420 if ret
and not write_all
: