4 from __future__
import absolute_import
, unicode_literals
29 from string
import ascii_letters
34 compat_get_terminal_size
,
40 compat_tokenize_tokenize
,
42 compat_urllib_request
,
43 compat_urllib_request_DataHandler
,
54 DOT_DESKTOP_LINK_TEMPLATE
,
55 DOT_URL_LINK_TEMPLATE
,
56 DOT_WEBLOC_LINK_TEMPLATE
,
77 PerRequestProxyHandler
,
82 register_socks_protocols
,
95 UnavailableVideoError
,
101 YoutubeDLCookieProcessor
,
103 YoutubeDLRedirectHandler
,
104 process_communicate_or_kill
,
106 from .cache
import Cache
107 from .extractor
import get_info_extractor
, gen_extractor_classes
, _LAZY_LOADER
108 from .extractor
.openload
import PhantomJSwrapper
109 from .downloader
import get_suitable_downloader
110 from .downloader
.rtmp
import rtmpdump_version
111 from .postprocessor
import (
114 FFmpegFixupStretchedPP
,
117 FFmpegSubtitlesConvertorPP
,
120 from .version
import __version__
122 if compat_os_name
== 'nt':
126 class YoutubeDL(object):
129 YoutubeDL objects are the ones responsible of downloading the
130 actual video file and writing it to disk if the user has requested
131 it, among some other tasks. In most cases there should be one per
132 program. As, given a video URL, the downloader doesn't know how to
133 extract all the needed information, task that InfoExtractors do, it
134 has to pass the URL to one of them.
136 For this, YoutubeDL objects have a method that allows
137 InfoExtractors to be registered in a given order. When it is passed
138 a URL, the YoutubeDL object handles it to the first InfoExtractor it
139 finds that reports being able to handle it. The InfoExtractor extracts
140 all the information about the video or videos the URL refers to, and
141 YoutubeDL process the extracted information, possibly using a File
142 Downloader to download the video.
144 YoutubeDL objects accept a lot of parameters. In order not to saturate
145 the object constructor with arguments, it receives a dictionary of
146 options instead. These options are available through the params
147 attribute for the InfoExtractors to use. The YoutubeDL also
148 registers itself as the downloader in charge for the InfoExtractors
149 that are added to it, so this is a "mutual registration".
153 username: Username for authentication purposes.
154 password: Password for authentication purposes.
155 videopassword: Password for accessing a video.
156 ap_mso: Adobe Pass multiple-system operator identifier.
157 ap_username: Multiple-system operator account username.
158 ap_password: Multiple-system operator account password.
159 usenetrc: Use netrc for authentication instead.
160 verbose: Print additional info to stdout.
161 quiet: Do not print messages to stdout.
162 no_warnings: Do not print out anything for warnings.
163 forceurl: Force printing final URL.
164 forcetitle: Force printing title.
165 forceid: Force printing ID.
166 forcethumbnail: Force printing thumbnail URL.
167 forcedescription: Force printing description.
168 forcefilename: Force printing final filename.
169 forceduration: Force printing duration.
170 forcejson: Force printing info_dict as JSON.
171 dump_single_json: Force printing the info_dict of the whole playlist
172 (or video) as a single JSON line.
173 force_write_download_archive: Force writing download archive regardless of
174 'skip_download' or 'simulate'.
175 simulate: Do not download the video files.
176 format: Video format code. see "FORMAT SELECTION" for more details.
177 format_sort: How to sort the video formats. see "Sorting Formats" for more details.
178 format_sort_force: Force the given format_sort. see "Sorting Formats" for more details.
179 allow_multiple_video_streams: Allow multiple video streams to be merged into a single file
180 allow_multiple_audio_streams: Allow multiple audio streams to be merged into a single file
181 outtmpl: Template for output names.
182 restrictfilenames: Do not allow "&" and spaces in file names.
183 trim_file_name: Limit length of filename (extension excluded).
184 ignoreerrors: Do not stop on download errors. (Default True when running youtube-dlc, but False when directly accessing YoutubeDL class)
185 force_generic_extractor: Force downloader to use the generic extractor
186 overwrites: Overwrite all video and metadata files if True,
187 overwrite only non-video files if None
188 and don't overwrite any file if False
189 playliststart: Playlist item to start at.
190 playlistend: Playlist item to end at.
191 playlist_items: Specific indices of playlist to download.
192 playlistreverse: Download playlist items in reverse order.
193 playlistrandom: Download playlist items in random order.
194 matchtitle: Download only matching titles.
195 rejecttitle: Reject downloads for matching titles.
196 logger: Log messages to a logging.Logger instance.
197 logtostderr: Log messages to stderr instead of stdout.
198 writedescription: Write the video description to a .description file
199 writeinfojson: Write the video description to a .info.json file
200 writeannotations: Write the video annotations to a .annotations.xml file
201 writethumbnail: Write the thumbnail image to a file
202 write_all_thumbnails: Write all thumbnail formats to files
203 writelink: Write an internet shortcut file, depending on the
204 current platform (.url/.webloc/.desktop)
205 writeurllink: Write a Windows internet shortcut file (.url)
206 writewebloclink: Write a macOS internet shortcut file (.webloc)
207 writedesktoplink: Write a Linux internet shortcut file (.desktop)
208 writesubtitles: Write the video subtitles to a file
209 writeautomaticsub: Write the automatically generated subtitles to a file
210 allsubtitles: Downloads all the subtitles of the video
211 (requires writesubtitles or writeautomaticsub)
212 listsubtitles: Lists all available subtitles for the video
213 subtitlesformat: The format code for subtitles
214 subtitleslangs: List of languages of the subtitles to download
215 keepvideo: Keep the video file after post-processing
216 daterange: A DateRange object, download only if the upload_date is in the range.
217 skip_download: Skip the actual download of the video file
218 cachedir: Location of the cache files in the filesystem.
219 False to disable filesystem cache.
220 noplaylist: Download single video instead of a playlist if in doubt.
221 age_limit: An integer representing the user's age in years.
222 Unsuitable videos for the given age are skipped.
223 min_views: An integer representing the minimum view count the video
224 must have in order to not be skipped.
225 Videos without view count information are always
226 downloaded. None for no limit.
227 max_views: An integer representing the maximum view count.
228 Videos that are more popular than that are not
230 Videos without view count information are always
231 downloaded. None for no limit.
232 download_archive: File name of a file where all downloads are recorded.
233 Videos already present in the file are not downloaded
235 break_on_existing: Stop the download process after attempting to download a
236 file that is in the archive.
237 break_on_reject: Stop the download process when encountering a video that
238 has been filtered out.
239 cookiefile: File name where cookies should be read from and dumped to
240 nocheckcertificate:Do not verify SSL certificates
241 prefer_insecure: Use HTTP instead of HTTPS to retrieve information.
242 At the moment, this is only supported by YouTube.
243 proxy: URL of the proxy server to use
244 geo_verification_proxy: URL of the proxy to use for IP address verification
245 on geo-restricted sites.
246 socket_timeout: Time to wait for unresponsive hosts, in seconds
247 bidi_workaround: Work around buggy terminals without bidirectional text
248 support, using fridibi
249 debug_printtraffic:Print out sent and received HTTP traffic
250 include_ads: Download ads as well
251 default_search: Prepend this string if an input url is not valid.
252 'auto' for elaborate guessing
253 encoding: Use this encoding instead of the system-specified.
254 extract_flat: Do not resolve URLs, return the immediate result.
255 Pass in 'in_playlist' to only show this behavior for
257 postprocessors: A list of dictionaries, each with an entry
258 * key: The name of the postprocessor. See
259 youtube_dlc/postprocessor/__init__.py for a list.
260 as well as any further keyword arguments for the
262 post_hooks: A list of functions that get called as the final step
263 for each video file, after all postprocessors have been
264 called. The filename will be passed as the only argument.
265 progress_hooks: A list of functions that get called on download
266 progress, with a dictionary with the entries
267 * status: One of "downloading", "error", or "finished".
268 Check this first and ignore unknown values.
270 If status is one of "downloading", or "finished", the
271 following properties may also be present:
272 * filename: The final filename (always present)
273 * tmpfilename: The filename we're currently writing to
274 * downloaded_bytes: Bytes on disk
275 * total_bytes: Size of the whole file, None if unknown
276 * total_bytes_estimate: Guess of the eventual file size,
278 * elapsed: The number of seconds since download started.
279 * eta: The estimated time in seconds, None if unknown
280 * speed: The download speed in bytes/second, None if
282 * fragment_index: The counter of the currently
283 downloaded video fragment.
284 * fragment_count: The number of fragments (= individual
285 files that will be merged)
287 Progress hooks are guaranteed to be called at least once
288 (with status "finished") if the download is successful.
289 merge_output_format: Extension to use when merging formats.
290 fixup: Automatically correct known faults of the file.
292 - "never": do nothing
293 - "warn": only emit a warning
294 - "detect_or_warn": check whether we can do anything
295 about it, warn otherwise (default)
296 source_address: Client-side IP address to bind to.
297 call_home: Boolean, true iff we are allowed to contact the
298 youtube-dlc servers for debugging.
299 sleep_interval: Number of seconds to sleep before each download when
300 used alone or a lower bound of a range for randomized
301 sleep before each download (minimum possible number
302 of seconds to sleep) when used along with
304 max_sleep_interval:Upper bound of a range for randomized sleep before each
305 download (maximum possible number of seconds to sleep).
306 Must only be used along with sleep_interval.
307 Actual sleep time will be a random float from range
308 [sleep_interval; max_sleep_interval].
309 listformats: Print an overview of available video formats and exit.
310 list_thumbnails: Print a table of all thumbnails and exit.
311 match_filter: A function that gets called with the info_dict of
313 If it returns a message, the video is ignored.
314 If it returns None, the video is downloaded.
315 match_filter_func in utils.py is one example for this.
316 no_color: Do not emit color codes in output.
317 geo_bypass: Bypass geographic restriction via faking X-Forwarded-For
320 Two-letter ISO 3166-2 country code that will be used for
321 explicit geographic restriction bypassing via faking
322 X-Forwarded-For HTTP header
324 IP range in CIDR notation that will be used similarly to
327 The following options determine which downloader is picked:
328 external_downloader: Executable of the external downloader to call.
329 None or unset for standard (built-in) downloader.
330 hls_prefer_native: Use the native HLS downloader instead of ffmpeg/avconv
331 if True, otherwise use ffmpeg/avconv if False, otherwise
332 use downloader suggested by extractor if None.
334 The following parameters are not used by YoutubeDL itself, they are used by
335 the downloader (see youtube_dlc/downloader/common.py):
336 nopart, updatetime, buffersize, ratelimit, min_filesize, max_filesize, test,
337 noresizebuffer, retries, continuedl, noprogress, consoletitle,
338 xattr_set_filesize, external_downloader_args, hls_use_mpegts,
341 The following options are used by the post processors:
342 prefer_ffmpeg: If False, use avconv instead of ffmpeg if both are available,
343 otherwise prefer ffmpeg.
344 ffmpeg_location: Location of the ffmpeg/avconv binary; either the path
345 to the binary or its containing directory.
346 postprocessor_args: A dictionary of postprocessor names (in lower case) and a list
347 of additional command-line arguments for the postprocessor.
348 Use 'default' as the name for arguments to passed to all PP.
350 The following options are used by the Youtube extractor:
351 youtube_include_dash_manifest: If True (default), DASH manifests and related
352 data will be downloaded and processed by extractor.
353 You can reduce network I/O by disabling it if you don't
357 _NUMERIC_FIELDS
= set((
358 'width', 'height', 'tbr', 'abr', 'asr', 'vbr', 'fps', 'filesize', 'filesize_approx',
359 'timestamp', 'upload_year', 'upload_month', 'upload_day',
360 'duration', 'view_count', 'like_count', 'dislike_count', 'repost_count',
361 'average_rating', 'comment_count', 'age_limit',
362 'start_time', 'end_time',
363 'chapter_number', 'season_number', 'episode_number',
364 'track_number', 'disc_number', 'release_year',
371 _download_retcode
= None
372 _num_downloads
= None
374 _playlist_urls
= set()
377 def __init__(self
, params
=None, auto_init
=True):
378 """Create a FileDownloader object with the given options."""
382 self
._ies
_instances
= {}
384 self
._post
_hooks
= []
385 self
._progress
_hooks
= []
386 self
._download
_retcode
= 0
387 self
._num
_downloads
= 0
388 self
._screen
_file
= [sys
.stdout
, sys
.stderr
][params
.get('logtostderr', False)]
389 self
._err
_file
= sys
.stderr
392 'nocheckcertificate': False,
394 self
.params
.update(params
)
395 self
.cache
= Cache(self
)
398 """Preload the archive, if any is specified"""
399 def preload_download_archive(self
):
400 fn
= self
.params
.get('download_archive')
404 with locked_file(fn
, 'r', encoding
='utf-8') as archive_file
:
405 for line
in archive_file
:
406 self
.archive
.add(line
.strip())
407 except IOError as ioe
:
408 if ioe
.errno
!= errno
.ENOENT
:
413 def check_deprecated(param
, option
, suggestion
):
414 if self
.params
.get(param
) is not None:
416 '%s is deprecated. Use %s instead.' % (option
, suggestion
))
420 if self
.params
.get('verbose'):
421 self
.to_stdout('[debug] Loading archive file %r' % self
.params
.get('download_archive'))
423 preload_download_archive(self
)
425 if check_deprecated('cn_verification_proxy', '--cn-verification-proxy', '--geo-verification-proxy'):
426 if self
.params
.get('geo_verification_proxy') is None:
427 self
.params
['geo_verification_proxy'] = self
.params
['cn_verification_proxy']
429 check_deprecated('autonumber_size', '--autonumber-size', 'output template with %(autonumber)0Nd, where N in the number of digits')
430 check_deprecated('autonumber', '--auto-number', '-o "%(autonumber)s-%(title)s.%(ext)s"')
431 check_deprecated('usetitle', '--title', '-o "%(title)s-%(id)s.%(ext)s"')
433 if params
.get('bidi_workaround', False):
436 master
, slave
= pty
.openpty()
437 width
= compat_get_terminal_size().columns
441 width_args
= ['-w', str(width
)]
443 stdin
=subprocess
.PIPE
,
445 stderr
=self
._err
_file
)
447 self
._output
_process
= subprocess
.Popen(
448 ['bidiv'] + width_args
, **sp_kwargs
451 self
._output
_process
= subprocess
.Popen(
452 ['fribidi', '-c', 'UTF-8'] + width_args
, **sp_kwargs
)
453 self
._output
_channel
= os
.fdopen(master
, 'rb')
454 except OSError as ose
:
455 if ose
.errno
== errno
.ENOENT
:
456 self
.report_warning('Could not find fribidi executable, ignoring --bidi-workaround . Make sure that fribidi is an executable file in one of the directories in your $PATH.')
460 if (sys
.platform
!= 'win32'
461 and sys
.getfilesystemencoding() in ['ascii', 'ANSI_X3.4-1968']
462 and not params
.get('restrictfilenames', False)):
463 # Unicode filesystem API will throw errors (#1474, #13027)
465 'Assuming --restrict-filenames since file system encoding '
466 'cannot encode all characters. '
467 'Set the LC_ALL environment variable to fix this.')
468 self
.params
['restrictfilenames'] = True
470 if isinstance(params
.get('outtmpl'), bytes):
472 'Parameter outtmpl is bytes, but should be a unicode string. '
473 'Put from __future__ import unicode_literals at the top of your code file or consider switching to Python 3.x.')
478 self
.print_debug_header()
479 self
.add_default_info_extractors()
481 for pp_def_raw
in self
.params
.get('postprocessors', []):
482 pp_class
= get_postprocessor(pp_def_raw
['key'])
483 pp_def
= dict(pp_def_raw
)
485 pp
= pp_class(self
, **compat_kwargs(pp_def
))
486 self
.add_post_processor(pp
)
488 for ph
in self
.params
.get('post_hooks', []):
489 self
.add_post_hook(ph
)
491 for ph
in self
.params
.get('progress_hooks', []):
492 self
.add_progress_hook(ph
)
494 register_socks_protocols()
496 def warn_if_short_id(self
, argv
):
497 # short YouTube ID starting with dash?
499 i
for i
, a
in enumerate(argv
)
500 if re
.match(r
'^-[0-9A-Za-z_-]{10}$', a
)]
504 + [a
for i
, a
in enumerate(argv
) if i
not in idxs
]
505 + ['--'] + [argv
[i
] for i
in idxs
]
508 'Long argument string detected. '
509 'Use -- to separate parameters and URLs, like this:\n%s\n' %
510 args_to_str(correct_argv
))
512 def add_info_extractor(self
, ie
):
513 """Add an InfoExtractor object to the end of the list."""
515 if not isinstance(ie
, type):
516 self
._ies
_instances
[ie
.ie_key()] = ie
517 ie
.set_downloader(self
)
519 def get_info_extractor(self
, ie_key
):
521 Get an instance of an IE with name ie_key, it will try to get one from
522 the _ies list, if there's no instance it will create a new one and add
523 it to the extractor list.
525 ie
= self
._ies
_instances
.get(ie_key
)
527 ie
= get_info_extractor(ie_key
)()
528 self
.add_info_extractor(ie
)
531 def add_default_info_extractors(self
):
533 Add the InfoExtractors returned by gen_extractors to the end of the list
535 for ie
in gen_extractor_classes():
536 self
.add_info_extractor(ie
)
538 def add_post_processor(self
, pp
):
539 """Add a PostProcessor object to the end of the chain."""
541 pp
.set_downloader(self
)
543 def add_post_hook(self
, ph
):
544 """Add the post hook"""
545 self
._post
_hooks
.append(ph
)
547 def add_progress_hook(self
, ph
):
548 """Add the progress hook (currently only for the file downloader)"""
549 self
._progress
_hooks
.append(ph
)
551 def _bidi_workaround(self
, message
):
552 if not hasattr(self
, '_output_channel'):
555 assert hasattr(self
, '_output_process')
556 assert isinstance(message
, compat_str
)
557 line_count
= message
.count('\n') + 1
558 self
._output
_process
.stdin
.write((message
+ '\n').encode('utf-8'))
559 self
._output
_process
.stdin
.flush()
560 res
= ''.join(self
._output
_channel
.readline().decode('utf-8')
561 for _
in range(line_count
))
562 return res
[:-len('\n')]
564 def to_screen(self
, message
, skip_eol
=False):
565 """Print message to stdout if not in quiet mode."""
566 return self
.to_stdout(message
, skip_eol
, check_quiet
=True)
568 def _write_string(self
, s
, out
=None):
569 write_string(s
, out
=out
, encoding
=self
.params
.get('encoding'))
571 def to_stdout(self
, message
, skip_eol
=False, check_quiet
=False):
572 """Print message to stdout if not in quiet mode."""
573 if self
.params
.get('logger'):
574 self
.params
['logger'].debug(message
)
575 elif not check_quiet
or not self
.params
.get('quiet', False):
576 message
= self
._bidi
_workaround
(message
)
577 terminator
= ['\n', ''][skip_eol
]
578 output
= message
+ terminator
580 self
._write
_string
(output
, self
._screen
_file
)
582 def to_stderr(self
, message
):
583 """Print message to stderr."""
584 assert isinstance(message
, compat_str
)
585 if self
.params
.get('logger'):
586 self
.params
['logger'].error(message
)
588 message
= self
._bidi
_workaround
(message
)
589 output
= message
+ '\n'
590 self
._write
_string
(output
, self
._err
_file
)
592 def to_console_title(self
, message
):
593 if not self
.params
.get('consoletitle', False):
595 if compat_os_name
== 'nt':
596 if ctypes
.windll
.kernel32
.GetConsoleWindow():
597 # c_wchar_p() might not be necessary if `message` is
598 # already of type unicode()
599 ctypes
.windll
.kernel32
.SetConsoleTitleW(ctypes
.c_wchar_p(message
))
600 elif 'TERM' in os
.environ
:
601 self
._write
_string
('\033[0;%s\007' % message
, self
._screen
_file
)
603 def save_console_title(self
):
604 if not self
.params
.get('consoletitle', False):
606 if self
.params
.get('simulate', False):
608 if compat_os_name
!= 'nt' and 'TERM' in os
.environ
:
609 # Save the title on stack
610 self
._write
_string
('\033[22;0t', self
._screen
_file
)
612 def restore_console_title(self
):
613 if not self
.params
.get('consoletitle', False):
615 if self
.params
.get('simulate', False):
617 if compat_os_name
!= 'nt' and 'TERM' in os
.environ
:
618 # Restore the title from stack
619 self
._write
_string
('\033[23;0t', self
._screen
_file
)
622 self
.save_console_title()
625 def __exit__(self
, *args
):
626 self
.restore_console_title()
628 if self
.params
.get('cookiefile') is not None:
629 self
.cookiejar
.save(ignore_discard
=True, ignore_expires
=True)
631 def trouble(self
, message
=None, tb
=None):
632 """Determine action to take when a download problem appears.
634 Depending on if the downloader has been configured to ignore
635 download errors or not, this method may throw an exception or
636 not when errors are found, after printing the message.
638 tb, if given, is additional traceback information.
640 if message
is not None:
641 self
.to_stderr(message
)
642 if self
.params
.get('verbose'):
644 if sys
.exc_info()[0]: # if .trouble has been called from an except block
646 if hasattr(sys
.exc_info()[1], 'exc_info') and sys
.exc_info()[1].exc_info
[0]:
647 tb
+= ''.join(traceback
.format_exception(*sys
.exc_info()[1].exc_info
))
648 tb
+= encode_compat_str(traceback
.format_exc())
650 tb_data
= traceback
.format_list(traceback
.extract_stack())
651 tb
= ''.join(tb_data
)
653 if not self
.params
.get('ignoreerrors', False):
654 if sys
.exc_info()[0] and hasattr(sys
.exc_info()[1], 'exc_info') and sys
.exc_info()[1].exc_info
[0]:
655 exc_info
= sys
.exc_info()[1].exc_info
657 exc_info
= sys
.exc_info()
658 raise DownloadError(message
, exc_info
)
659 self
._download
_retcode
= 1
661 def report_warning(self
, message
):
663 Print the message to stderr, it will be prefixed with 'WARNING:'
664 If stderr is a tty file the 'WARNING:' will be colored
666 if self
.params
.get('logger') is not None:
667 self
.params
['logger'].warning(message
)
669 if self
.params
.get('no_warnings'):
671 if not self
.params
.get('no_color') and self
._err
_file
.isatty() and compat_os_name
!= 'nt':
672 _msg_header
= '\033[0;33mWARNING:\033[0m'
674 _msg_header
= 'WARNING:'
675 warning_message
= '%s %s' % (_msg_header
, message
)
676 self
.to_stderr(warning_message
)
678 def report_error(self
, message
, tb
=None):
680 Do the same as trouble, but prefixes the message with 'ERROR:', colored
681 in red if stderr is a tty file.
683 if not self
.params
.get('no_color') and self
._err
_file
.isatty() and compat_os_name
!= 'nt':
684 _msg_header
= '\033[0;31mERROR:\033[0m'
686 _msg_header
= 'ERROR:'
687 error_message
= '%s %s' % (_msg_header
, message
)
688 self
.trouble(error_message
, tb
)
690 def report_file_already_downloaded(self
, file_name
):
691 """Report file has already been fully downloaded."""
693 self
.to_screen('[download] %s has already been downloaded' % file_name
)
694 except UnicodeEncodeError:
695 self
.to_screen('[download] The file has already been downloaded')
697 def report_file_delete(self
, file_name
):
698 """Report that existing file will be deleted."""
700 self
.to_screen('Deleting already existent file %s' % file_name
)
701 except UnicodeEncodeError:
702 self
.to_screen('Deleting already existent file')
704 def prepare_filename(self
, info_dict
):
705 """Generate the output filename."""
707 template_dict
= dict(info_dict
)
709 template_dict
['epoch'] = int(time
.time())
710 autonumber_size
= self
.params
.get('autonumber_size')
711 if autonumber_size
is None:
713 template_dict
['autonumber'] = self
.params
.get('autonumber_start', 1) - 1 + self
._num
_downloads
714 if template_dict
.get('resolution') is None:
715 if template_dict
.get('width') and template_dict
.get('height'):
716 template_dict
['resolution'] = '%dx%d' % (template_dict
['width'], template_dict
['height'])
717 elif template_dict
.get('height'):
718 template_dict
['resolution'] = '%sp' % template_dict
['height']
719 elif template_dict
.get('width'):
720 template_dict
['resolution'] = '%dx?' % template_dict
['width']
722 sanitize
= lambda k
, v
: sanitize_filename(
724 restricted
=self
.params
.get('restrictfilenames'),
725 is_id
=(k
== 'id' or k
.endswith('_id')))
726 template_dict
= dict((k
, v
if isinstance(v
, compat_numeric_types
) else sanitize(k
, v
))
727 for k
, v
in template_dict
.items()
728 if v
is not None and not isinstance(v
, (list, tuple, dict)))
729 template_dict
= collections
.defaultdict(lambda: 'NA', template_dict
)
731 outtmpl
= self
.params
.get('outtmpl', DEFAULT_OUTTMPL
)
733 # For fields playlist_index and autonumber convert all occurrences
734 # of %(field)s to %(field)0Nd for backward compatibility
735 field_size_compat_map
= {
736 'playlist_index': len(str(template_dict
['n_entries'])),
737 'autonumber': autonumber_size
,
739 FIELD_SIZE_COMPAT_RE
= r
'(?<!%)%\((?P<field>autonumber|playlist_index)\)s'
740 mobj
= re
.search(FIELD_SIZE_COMPAT_RE
, outtmpl
)
743 FIELD_SIZE_COMPAT_RE
,
744 r
'%%(\1)0%dd' % field_size_compat_map
[mobj
.group('field')],
747 # Missing numeric fields used together with integer presentation types
748 # in format specification will break the argument substitution since
749 # string 'NA' is returned for missing fields. We will patch output
750 # template for missing fields to meet string presentation type.
751 for numeric_field
in self
._NUMERIC
_FIELDS
:
752 if numeric_field
not in template_dict
:
753 # As of [1] format syntax is:
754 # %[mapping_key][conversion_flags][minimum_width][.precision][length_modifier]type
755 # 1. https://docs.python.org/2/library/stdtypes.html#string-formatting
759 \({0}\) # mapping key
760 (?:[#0\-+ ]+)? # conversion flags (optional)
761 (?:\d+)? # minimum field width (optional)
762 (?:\.\d+)? # precision (optional)
763 [hlL]? # length modifier (optional)
764 [diouxXeEfFgGcrs%] # conversion type
767 FORMAT_RE
.format(numeric_field
),
768 r
'%({0})s'.format(numeric_field
), outtmpl
)
770 # expand_path translates '%%' into '%' and '$$' into '$'
771 # correspondingly that is not what we want since we need to keep
772 # '%%' intact for template dict substitution step. Working around
773 # with boundary-alike separator hack.
774 sep
= ''.join([random
.choice(ascii_letters
) for _
in range(32)])
775 outtmpl
= outtmpl
.replace('%%', '%{0}%'.format(sep
)).replace('$$', '${0}$'.format(sep
))
777 # outtmpl should be expand_path'ed before template dict substitution
778 # because meta fields may contain env variables we don't want to
779 # be expanded. For example, for outtmpl "%(title)s.%(ext)s" and
780 # title "Hello $PATH", we don't want `$PATH` to be expanded.
781 filename
= expand_path(outtmpl
).replace(sep
, '') % template_dict
783 # https://github.com/blackjack4494/youtube-dlc/issues/85
784 trim_file_name
= self
.params
.get('trim_file_name', False)
786 fn_groups
= filename
.rsplit('.')
789 if len(fn_groups
) > 2:
790 sub_ext
= fn_groups
[-2]
791 filename
= '.'.join(filter(None, [fn_groups
[0][:trim_file_name
], sub_ext
, ext
]))
793 # Temporary fix for #4787
794 # 'Treat' all problem characters by passing filename through preferredencoding
795 # to workaround encoding issues with subprocess on python2 @ Windows
796 if sys
.version_info
< (3, 0) and sys
.platform
== 'win32':
797 filename
= encodeFilename(filename
, True).decode(preferredencoding())
798 return sanitize_path(filename
)
799 except ValueError as err
:
800 self
.report_error('Error in output template: ' + str(err
) + ' (encoding: ' + repr(preferredencoding()) + ')')
803 def _match_entry(self
, info_dict
, incomplete
):
804 """ Returns None if the file should be downloaded """
807 video_title
= info_dict
.get('title', info_dict
.get('id', 'video'))
808 if 'title' in info_dict
:
809 # This can happen when we're just evaluating the playlist
810 title
= info_dict
['title']
811 matchtitle
= self
.params
.get('matchtitle', False)
813 if not re
.search(matchtitle
, title
, re
.IGNORECASE
):
814 return '"' + title
+ '" title did not match pattern "' + matchtitle
+ '"'
815 rejecttitle
= self
.params
.get('rejecttitle', False)
817 if re
.search(rejecttitle
, title
, re
.IGNORECASE
):
818 return '"' + title
+ '" title matched reject pattern "' + rejecttitle
+ '"'
819 date
= info_dict
.get('upload_date')
821 dateRange
= self
.params
.get('daterange', DateRange())
822 if date
not in dateRange
:
823 return '%s upload date is not in range %s' % (date_from_str(date
).isoformat(), dateRange
)
824 view_count
= info_dict
.get('view_count')
825 if view_count
is not None:
826 min_views
= self
.params
.get('min_views')
827 if min_views
is not None and view_count
< min_views
:
828 return 'Skipping %s, because it has not reached minimum view count (%d/%d)' % (video_title
, view_count
, min_views
)
829 max_views
= self
.params
.get('max_views')
830 if max_views
is not None and view_count
> max_views
:
831 return 'Skipping %s, because it has exceeded the maximum view count (%d/%d)' % (video_title
, view_count
, max_views
)
832 if age_restricted(info_dict
.get('age_limit'), self
.params
.get('age_limit')):
833 return 'Skipping "%s" because it is age restricted' % video_title
834 if self
.in_download_archive(info_dict
):
835 return '%s has already been recorded in archive' % video_title
838 match_filter
= self
.params
.get('match_filter')
839 if match_filter
is not None:
840 ret
= match_filter(info_dict
)
845 reason
= check_filter()
846 if reason
is not None:
847 self
.to_screen('[download] ' + reason
)
848 if reason
.endswith('has already been recorded in the archive') and self
.params
.get('break_on_existing', False):
849 raise ExistingVideoReached()
850 elif self
.params
.get('break_on_reject', False):
851 raise RejectedVideoReached()
855 def add_extra_info(info_dict
, extra_info
):
856 '''Set the keys from extra_info in info dict if they are missing'''
857 for key
, value
in extra_info
.items():
858 info_dict
.setdefault(key
, value
)
860 def extract_info(self
, url
, download
=True, ie_key
=None, info_dict
=None, extra_info
={},
861 process
=True, force_generic_extractor
=False):
863 Returns a list with a dictionary for each video we find.
864 If 'download', also downloads the videos.
865 extra_info is a dict containing the extra values to add to each result
868 if not ie_key
and force_generic_extractor
:
872 ies
= [self
.get_info_extractor(ie_key
)]
877 if not ie
.suitable(url
):
881 ie
= self
.get_info_extractor(ie_key
)
883 self
.report_warning('The program functionality for this site has been marked as broken, '
884 'and will probably not work.')
887 temp_id
= ie
.extract_id(url
) if callable(getattr(ie
, 'extract_id', None)) else ie
._match
_id
(url
)
888 except (AssertionError, IndexError, AttributeError):
890 if temp_id
is not None and self
.in_download_archive({'id': temp_id, 'ie_key': ie_key}
):
891 self
.to_screen("[%s] %s: has already been recorded in archive" % (
895 return self
.__extract
_info
(url
, ie
, download
, extra_info
, process
, info_dict
)
898 self
.report_error('no suitable InfoExtractor for URL %s' % url
)
900 def __handle_extraction_exceptions(func
):
901 def wrapper(self
, *args
, **kwargs
):
903 return func(self
, *args
, **kwargs
)
904 except GeoRestrictedError
as e
:
907 msg
+= '\nThis video is available in %s.' % ', '.join(
908 map(ISO3166Utils
.short2full
, e
.countries
))
909 msg
+= '\nYou might want to use a VPN or a proxy server (with --proxy) to workaround.'
910 self
.report_error(msg
)
911 except ExtractorError
as e
: # An error we somewhat expected
912 self
.report_error(compat_str(e
), e
.format_traceback())
913 except (MaxDownloadsReached
, ExistingVideoReached
, RejectedVideoReached
):
915 except Exception as e
:
916 if self
.params
.get('ignoreerrors', False):
917 self
.report_error(error_to_compat_str(e
), tb
=encode_compat_str(traceback
.format_exc()))
922 @__handle_extraction_exceptions
923 def __extract_info(self
, url
, ie
, download
, extra_info
, process
, info_dict
):
924 ie_result
= ie
.extract(url
)
925 if ie_result
is None: # Finished already (backwards compatibility; listformats and friends should be moved here)
927 if isinstance(ie_result
, list):
928 # Backwards compatibility: old IE result format
930 '_type': 'compat_list',
931 'entries': ie_result
,
934 if info_dict
.get('id'):
935 ie_result
['id'] = info_dict
['id']
936 if info_dict
.get('title'):
937 ie_result
['title'] = info_dict
['title']
938 self
.add_default_extra_info(ie_result
, ie
, url
)
940 return self
.process_ie_result(ie_result
, download
, extra_info
)
944 def add_default_extra_info(self
, ie_result
, ie
, url
):
945 self
.add_extra_info(ie_result
, {
946 'extractor': ie
.IE_NAME
,
949 formatSeconds(ie_result
['duration'], '-')
950 if ie_result
.get('duration', None) is not None
952 'webpage_url_basename': url_basename(url
),
953 'extractor_key': ie
.ie_key(),
956 def process_ie_result(self
, ie_result
, download
=True, extra_info
={}):
958 Take the result of the ie(may be modified) and resolve all unresolved
959 references (URLs, playlist items).
961 It will also download the videos if 'download'.
962 Returns the resolved ie_result.
964 result_type
= ie_result
.get('_type', 'video')
966 if result_type
in ('url', 'url_transparent'):
967 ie_result
['url'] = sanitize_url(ie_result
['url'])
968 extract_flat
= self
.params
.get('extract_flat', False)
969 if ((extract_flat
== 'in_playlist' and 'playlist' in extra_info
)
970 or extract_flat
is True):
971 self
.__forced
_printings
(
972 ie_result
, self
.prepare_filename(ie_result
),
976 if result_type
== 'video':
977 self
.add_extra_info(ie_result
, extra_info
)
978 return self
.process_video_result(ie_result
, download
=download
)
979 elif result_type
== 'url':
980 # We have to add extra_info to the results because it may be
981 # contained in a playlist
982 return self
.extract_info(ie_result
['url'],
983 download
, info_dict
=ie_result
,
984 ie_key
=ie_result
.get('ie_key'),
985 extra_info
=extra_info
)
986 elif result_type
== 'url_transparent':
987 # Use the information from the embedding page
988 info
= self
.extract_info(
989 ie_result
['url'], ie_key
=ie_result
.get('ie_key'),
990 extra_info
=extra_info
, download
=False, process
=False)
992 # extract_info may return None when ignoreerrors is enabled and
993 # extraction failed with an error, don't crash and return early
998 force_properties
= dict(
999 (k
, v
) for k
, v
in ie_result
.items() if v
is not None)
1000 for f
in ('_type', 'url', 'id', 'extractor', 'extractor_key', 'ie_key'):
1001 if f
in force_properties
:
1002 del force_properties
[f
]
1003 new_result
= info
.copy()
1004 new_result
.update(force_properties
)
1006 # Extracted info may not be a video result (i.e.
1007 # info.get('_type', 'video') != video) but rather an url or
1008 # url_transparent. In such cases outer metadata (from ie_result)
1009 # should be propagated to inner one (info). For this to happen
1010 # _type of info should be overridden with url_transparent. This
1011 # fixes issue from https://github.com/ytdl-org/youtube-dl/pull/11163.
1012 if new_result
.get('_type') == 'url':
1013 new_result
['_type'] = 'url_transparent'
1015 return self
.process_ie_result(
1016 new_result
, download
=download
, extra_info
=extra_info
)
1017 elif result_type
in ('playlist', 'multi_video'):
1018 # Protect from infinite recursion due to recursively nested playlists
1019 # (see https://github.com/ytdl-org/youtube-dl/issues/27833)
1020 webpage_url
= ie_result
['webpage_url']
1021 if webpage_url
in self
._playlist
_urls
:
1023 '[download] Skipping already downloaded playlist: %s'
1024 % ie_result
.get('title') or ie_result
.get('id'))
1027 self
._playlist
_level
+= 1
1028 self
._playlist
_urls
.add(webpage_url
)
1030 return self
.__process
_playlist
(ie_result
, download
)
1032 self
._playlist
_level
-= 1
1033 if not self
._playlist
_level
:
1034 self
._playlist
_urls
.clear()
1035 elif result_type
== 'compat_list':
1036 self
.report_warning(
1037 'Extractor %s returned a compat_list result. '
1038 'It needs to be updated.' % ie_result
.get('extractor'))
1041 self
.add_extra_info(
1044 'extractor': ie_result
['extractor'],
1045 'webpage_url': ie_result
['webpage_url'],
1046 'webpage_url_basename': url_basename(ie_result
['webpage_url']),
1047 'extractor_key': ie_result
['extractor_key'],
1051 ie_result
['entries'] = [
1052 self
.process_ie_result(_fixup(r
), download
, extra_info
)
1053 for r
in ie_result
['entries']
1057 raise Exception('Invalid result type: %s' % result_type
)
1059 def __process_playlist(self
, ie_result
, download
):
1060 # We process each entry in the playlist
1061 playlist
= ie_result
.get('title') or ie_result
.get('id')
1062 self
.to_screen('[download] Downloading playlist: %s' % playlist
)
1064 playlist_results
= []
1066 playliststart
= self
.params
.get('playliststart', 1) - 1
1067 playlistend
= self
.params
.get('playlistend')
1068 # For backwards compatibility, interpret -1 as whole list
1069 if playlistend
== -1:
1072 playlistitems_str
= self
.params
.get('playlist_items')
1073 playlistitems
= None
1074 if playlistitems_str
is not None:
1075 def iter_playlistitems(format
):
1076 for string_segment
in format
.split(','):
1077 if '-' in string_segment
:
1078 start
, end
= string_segment
.split('-')
1079 for item
in range(int(start
), int(end
) + 1):
1082 yield int(string_segment
)
1083 playlistitems
= orderedSet(iter_playlistitems(playlistitems_str
))
1085 ie_entries
= ie_result
['entries']
1087 def make_playlistitems_entries(list_ie_entries
):
1088 num_entries
= len(list_ie_entries
)
1090 list_ie_entries
[i
- 1] for i
in playlistitems
1091 if -num_entries
<= i
- 1 < num_entries
]
1093 def report_download(num_entries
):
1095 '[%s] playlist %s: Downloading %d videos' %
1096 (ie_result
['extractor'], playlist
, num_entries
))
1098 if isinstance(ie_entries
, list):
1099 n_all_entries
= len(ie_entries
)
1101 entries
= make_playlistitems_entries(ie_entries
)
1103 entries
= ie_entries
[playliststart
:playlistend
]
1104 n_entries
= len(entries
)
1106 '[%s] playlist %s: Collected %d video ids (downloading %d of them)' %
1107 (ie_result
['extractor'], playlist
, n_all_entries
, n_entries
))
1108 elif isinstance(ie_entries
, PagedList
):
1111 for item
in playlistitems
:
1112 entries
.extend(ie_entries
.getslice(
1116 entries
= ie_entries
.getslice(
1117 playliststart
, playlistend
)
1118 n_entries
= len(entries
)
1119 report_download(n_entries
)
1122 entries
= make_playlistitems_entries(list(itertools
.islice(
1123 ie_entries
, 0, max(playlistitems
))))
1125 entries
= list(itertools
.islice(
1126 ie_entries
, playliststart
, playlistend
))
1127 n_entries
= len(entries
)
1128 report_download(n_entries
)
1130 if self
.params
.get('playlistreverse', False):
1131 entries
= entries
[::-1]
1133 if self
.params
.get('playlistrandom', False):
1134 random
.shuffle(entries
)
1136 x_forwarded_for
= ie_result
.get('__x_forwarded_for_ip')
1138 for i
, entry
in enumerate(entries
, 1):
1139 self
.to_screen('[download] Downloading video %s of %s' % (i
, n_entries
))
1140 # This __x_forwarded_for_ip thing is a bit ugly but requires
1143 entry
['__x_forwarded_for_ip'] = x_forwarded_for
1145 'n_entries': n_entries
,
1146 'playlist': playlist
,
1147 'playlist_id': ie_result
.get('id'),
1148 'playlist_title': ie_result
.get('title'),
1149 'playlist_uploader': ie_result
.get('uploader'),
1150 'playlist_uploader_id': ie_result
.get('uploader_id'),
1151 'playlist_index': playlistitems
[i
- 1] if playlistitems
else i
+ playliststart
,
1152 'extractor': ie_result
['extractor'],
1153 'webpage_url': ie_result
['webpage_url'],
1154 'webpage_url_basename': url_basename(ie_result
['webpage_url']),
1155 'extractor_key': ie_result
['extractor_key'],
1158 if self
._match
_entry
(entry
, incomplete
=True) is not None:
1161 entry_result
= self
.__process
_iterable
_entry
(entry
, download
, extra
)
1162 # TODO: skip failed (empty) entries?
1163 playlist_results
.append(entry_result
)
1164 ie_result
['entries'] = playlist_results
1165 self
.to_screen('[download] Finished downloading playlist: %s' % playlist
)
1168 @__handle_extraction_exceptions
1169 def __process_iterable_entry(self
, entry
, download
, extra_info
):
1170 return self
.process_ie_result(
1171 entry
, download
=download
, extra_info
=extra_info
)
1173 def _build_format_filter(self
, filter_spec
):
1174 " Returns a function to filter the formats according to the filter_spec "
1184 operator_rex
= re
.compile(r
'''(?x)\s*
1185 (?P<key>width|height|tbr|abr|vbr|asr|filesize|filesize_approx|fps)
1186 \s*(?P<op>%s)(?P<none_inclusive>\s*\?)?\s*
1187 (?P<value>[0-9.]+(?:[kKmMgGtTpPeEzZyY]i?[Bb]?)?)
1189 ''' % '|'.join(map(re
.escape
, OPERATORS
.keys())))
1190 m
= operator_rex
.search(filter_spec
)
1193 comparison_value
= int(m
.group('value'))
1195 comparison_value
= parse_filesize(m
.group('value'))
1196 if comparison_value
is None:
1197 comparison_value
= parse_filesize(m
.group('value') + 'B')
1198 if comparison_value
is None:
1200 'Invalid value %r in format specification %r' % (
1201 m
.group('value'), filter_spec
))
1202 op
= OPERATORS
[m
.group('op')]
1207 '^=': lambda attr
, value
: attr
.startswith(value
),
1208 '$=': lambda attr
, value
: attr
.endswith(value
),
1209 '*=': lambda attr
, value
: value
in attr
,
1211 str_operator_rex
= re
.compile(r
'''(?x)
1212 \s*(?P<key>[a-zA-Z0-9._-]+)
1213 \s*(?P<negation>!\s*)?(?P<op>%s)(?P<none_inclusive>\s*\?)?
1214 \s*(?P<value>[a-zA-Z0-9._-]+)
1216 ''' % '|'.join(map(re
.escape
, STR_OPERATORS
.keys())))
1217 m
= str_operator_rex
.search(filter_spec
)
1219 comparison_value
= m
.group('value')
1220 str_op
= STR_OPERATORS
[m
.group('op')]
1221 if m
.group('negation'):
1222 op
= lambda attr
, value
: not str_op(attr
, value
)
1227 raise ValueError('Invalid filter specification %r' % filter_spec
)
1230 actual_value
= f
.get(m
.group('key'))
1231 if actual_value
is None:
1232 return m
.group('none_inclusive')
1233 return op(actual_value
, comparison_value
)
1236 def _default_format_spec(self
, info_dict
, download
=True):
1239 merger
= FFmpegMergerPP(self
)
1240 return merger
.available
and merger
.can_merge()
1243 not self
.params
.get('simulate', False)
1247 or info_dict
.get('is_live', False)
1248 or self
.params
.get('outtmpl', DEFAULT_OUTTMPL
) == '-'))
1251 'best/bestvideo+bestaudio'
1253 else 'bestvideo*+bestaudio/best'
1254 if not self
.params
.get('allow_multiple_audio_streams', False)
1255 else 'bestvideo+bestaudio/best')
1257 def build_format_selector(self
, format_spec
):
1258 def syntax_error(note
, start
):
1260 'Invalid format specification: '
1261 '{0}\n\t{1}\n\t{2}^'.format(note
, format_spec
, ' ' * start
[1]))
1262 return SyntaxError(message
)
1264 PICKFIRST
= 'PICKFIRST'
1268 FormatSelector
= collections
.namedtuple('FormatSelector', ['type', 'selector', 'filters'])
1270 allow_multiple_streams
= {'audio': self
.params
.get('allow_multiple_audio_streams', False),
1271 'video': self
.params
.get('allow_multiple_video_streams', False)}
1273 def _parse_filter(tokens
):
1275 for type, string
, start
, _
, _
in tokens
:
1276 if type == tokenize
.OP
and string
== ']':
1277 return ''.join(filter_parts
)
1279 filter_parts
.append(string
)
1281 def _remove_unused_ops(tokens
):
1282 # Remove operators that we don't use and join them with the surrounding strings
1283 # for example: 'mp4' '-' 'baseline' '-' '16x9' is converted to 'mp4-baseline-16x9'
1284 ALLOWED_OPS
= ('/', '+', ',', '(', ')')
1285 last_string
, last_start
, last_end
, last_line
= None, None, None, None
1286 for type, string
, start
, end
, line
in tokens
:
1287 if type == tokenize
.OP
and string
== '[':
1289 yield tokenize
.NAME
, last_string
, last_start
, last_end
, last_line
1291 yield type, string
, start
, end
, line
1292 # everything inside brackets will be handled by _parse_filter
1293 for type, string
, start
, end
, line
in tokens
:
1294 yield type, string
, start
, end
, line
1295 if type == tokenize
.OP
and string
== ']':
1297 elif type == tokenize
.OP
and string
in ALLOWED_OPS
:
1299 yield tokenize
.NAME
, last_string
, last_start
, last_end
, last_line
1301 yield type, string
, start
, end
, line
1302 elif type in [tokenize
.NAME
, tokenize
.NUMBER
, tokenize
.OP
]:
1304 last_string
= string
1308 last_string
+= string
1310 yield tokenize
.NAME
, last_string
, last_start
, last_end
, last_line
1312 def _parse_format_selection(tokens
, inside_merge
=False, inside_choice
=False, inside_group
=False):
1314 current_selector
= None
1315 for type, string
, start
, _
, _
in tokens
:
1316 # ENCODING is only defined in python 3.x
1317 if type == getattr(tokenize
, 'ENCODING', None):
1319 elif type in [tokenize
.NAME
, tokenize
.NUMBER
]:
1320 current_selector
= FormatSelector(SINGLE
, string
, [])
1321 elif type == tokenize
.OP
:
1323 if not inside_group
:
1324 # ')' will be handled by the parentheses group
1325 tokens
.restore_last_token()
1327 elif inside_merge
and string
in ['/', ',']:
1328 tokens
.restore_last_token()
1330 elif inside_choice
and string
== ',':
1331 tokens
.restore_last_token()
1334 if not current_selector
:
1335 raise syntax_error('"," must follow a format selector', start
)
1336 selectors
.append(current_selector
)
1337 current_selector
= None
1339 if not current_selector
:
1340 raise syntax_error('"/" must follow a format selector', start
)
1341 first_choice
= current_selector
1342 second_choice
= _parse_format_selection(tokens
, inside_choice
=True)
1343 current_selector
= FormatSelector(PICKFIRST
, (first_choice
, second_choice
), [])
1345 if not current_selector
:
1346 current_selector
= FormatSelector(SINGLE
, 'best', [])
1347 format_filter
= _parse_filter(tokens
)
1348 current_selector
.filters
.append(format_filter
)
1350 if current_selector
:
1351 raise syntax_error('Unexpected "("', start
)
1352 group
= _parse_format_selection(tokens
, inside_group
=True)
1353 current_selector
= FormatSelector(GROUP
, group
, [])
1355 if not current_selector
:
1356 raise syntax_error('Unexpected "+"', start
)
1357 selector_1
= current_selector
1358 selector_2
= _parse_format_selection(tokens
, inside_merge
=True)
1360 raise syntax_error('Expected a selector', start
)
1361 current_selector
= FormatSelector(MERGE
, (selector_1
, selector_2
), [])
1363 raise syntax_error('Operator not recognized: "{0}"'.format(string
), start
)
1364 elif type == tokenize
.ENDMARKER
:
1366 if current_selector
:
1367 selectors
.append(current_selector
)
1370 def _build_selector_function(selector
):
1371 if isinstance(selector
, list): # ,
1372 fs
= [_build_selector_function(s
) for s
in selector
]
1374 def selector_function(ctx
):
1376 for format
in f(ctx
):
1378 return selector_function
1380 elif selector
.type == GROUP
: # ()
1381 selector_function
= _build_selector_function(selector
.selector
)
1383 elif selector
.type == PICKFIRST
: # /
1384 fs
= [_build_selector_function(s
) for s
in selector
.selector
]
1386 def selector_function(ctx
):
1388 picked_formats
= list(f(ctx
))
1390 return picked_formats
1393 elif selector
.type == SINGLE
: # atom
1394 format_spec
= selector
.selector
if selector
.selector
is not None else 'best'
1396 if format_spec
== 'all':
1397 def selector_function(ctx
):
1398 formats
= list(ctx
['formats'])
1404 format_fallback
= False
1405 format_spec_obj
= re
.match(r
'(best|worst|b|w)(video|audio|v|a)?(\*)?$', format_spec
)
1406 if format_spec_obj
is not None:
1407 format_idx
= 0 if format_spec_obj
.group(1)[0] == 'w' else -1
1408 format_type
= format_spec_obj
.group(2)[0] if format_spec_obj
.group(2) else False
1409 not_format_type
= 'v' if format_type
== 'a' else 'a'
1410 format_modified
= format_spec_obj
.group(3) is not None
1412 format_fallback
= not format_type
and not format_modified
# for b, w
1413 filter_f
= ((lambda f
: f
.get(format_type
+ 'codec') != 'none')
1414 if format_type
and format_modified
# bv*, ba*, wv*, wa*
1415 else (lambda f
: f
.get(not_format_type
+ 'codec') == 'none')
1416 if format_type
# bv, ba, wv, wa
1417 else (lambda f
: f
.get('vcodec') != 'none' and f
.get('acodec') != 'none')
1418 if not format_modified
# b, w
1422 filter_f
= ((lambda f
: f
.get('ext') == format_spec
)
1423 if format_spec
in ['mp4', 'flv', 'webm', '3gp', 'm4a', 'mp3', 'ogg', 'aac', 'wav'] # extension
1424 else (lambda f
: f
.get('format_id') == format_spec
)) # id
1426 def selector_function(ctx
):
1427 formats
= list(ctx
['formats'])
1430 matches
= list(filter(filter_f
, formats
)) if filter_f
is not None else formats
1432 yield matches
[format_idx
]
1433 elif format_fallback
== 'force' or (format_fallback
and ctx
['incomplete_formats']):
1434 # for extractors with incomplete formats (audio only (soundcloud)
1435 # or video only (imgur)) best/worst will fallback to
1436 # best/worst {video,audio}-only format
1437 yield formats
[format_idx
]
1439 elif selector
.type == MERGE
: # +
1440 def _merge(formats_pair
):
1441 format_1
, format_2
= formats_pair
1444 formats_info
.extend(format_1
.get('requested_formats', (format_1
,)))
1445 formats_info
.extend(format_2
.get('requested_formats', (format_2
,)))
1447 if not allow_multiple_streams
['video'] or not allow_multiple_streams
['audio']:
1448 get_no_more
= {"video": False, "audio": False}
1449 for (i
, fmt_info
) in enumerate(formats_info
):
1450 for aud_vid
in ["audio", "video"]:
1451 if not allow_multiple_streams
[aud_vid
] and fmt_info
.get(aud_vid
[0] + 'codec') != 'none':
1452 if get_no_more
[aud_vid
]:
1454 get_no_more
[aud_vid
] = True
1456 if len(formats_info
) == 1:
1457 return formats_info
[0]
1459 video_fmts
= [fmt_info
for fmt_info
in formats_info
if fmt_info
.get('vcodec') != 'none']
1460 audio_fmts
= [fmt_info
for fmt_info
in formats_info
if fmt_info
.get('acodec') != 'none']
1462 the_only_video
= video_fmts
[0] if len(video_fmts
) == 1 else None
1463 the_only_audio
= audio_fmts
[0] if len(audio_fmts
) == 1 else None
1465 output_ext
= self
.params
.get('merge_output_format')
1468 output_ext
= the_only_video
['ext']
1469 elif the_only_audio
and not video_fmts
:
1470 output_ext
= the_only_audio
['ext']
1475 'requested_formats': formats_info
,
1476 'format': '+'.join(fmt_info
.get('format') for fmt_info
in formats_info
),
1477 'format_id': '+'.join(fmt_info
.get('format_id') for fmt_info
in formats_info
),
1483 'width': the_only_video
.get('width'),
1484 'height': the_only_video
.get('height'),
1485 'resolution': the_only_video
.get('resolution'),
1486 'fps': the_only_video
.get('fps'),
1487 'vcodec': the_only_video
.get('vcodec'),
1488 'vbr': the_only_video
.get('vbr'),
1489 'stretched_ratio': the_only_video
.get('stretched_ratio'),
1494 'acodec': the_only_audio
.get('acodec'),
1495 'abr': the_only_audio
.get('abr'),
1500 selector_1
, selector_2
= map(_build_selector_function
, selector
.selector
)
1502 def selector_function(ctx
):
1503 for pair
in itertools
.product(
1504 selector_1(copy
.deepcopy(ctx
)), selector_2(copy
.deepcopy(ctx
))):
1507 filters
= [self
._build
_format
_filter
(f
) for f
in selector
.filters
]
1509 def final_selector(ctx
):
1510 ctx_copy
= copy
.deepcopy(ctx
)
1511 for _filter
in filters
:
1512 ctx_copy
['formats'] = list(filter(_filter
, ctx_copy
['formats']))
1513 return selector_function(ctx_copy
)
1514 return final_selector
1516 stream
= io
.BytesIO(format_spec
.encode('utf-8'))
1518 tokens
= list(_remove_unused_ops(compat_tokenize_tokenize(stream
.readline
)))
1519 except tokenize
.TokenError
:
1520 raise syntax_error('Missing closing/opening brackets or parenthesis', (0, len(format_spec
)))
1522 class TokenIterator(object):
1523 def __init__(self
, tokens
):
1524 self
.tokens
= tokens
1531 if self
.counter
>= len(self
.tokens
):
1532 raise StopIteration()
1533 value
= self
.tokens
[self
.counter
]
1539 def restore_last_token(self
):
1542 parsed_selector
= _parse_format_selection(iter(TokenIterator(tokens
)))
1543 return _build_selector_function(parsed_selector
)
1545 def _calc_headers(self
, info_dict
):
1546 res
= std_headers
.copy()
1548 add_headers
= info_dict
.get('http_headers')
1550 res
.update(add_headers
)
1552 cookies
= self
._calc
_cookies
(info_dict
)
1554 res
['Cookie'] = cookies
1556 if 'X-Forwarded-For' not in res
:
1557 x_forwarded_for_ip
= info_dict
.get('__x_forwarded_for_ip')
1558 if x_forwarded_for_ip
:
1559 res
['X-Forwarded-For'] = x_forwarded_for_ip
1563 def _calc_cookies(self
, info_dict
):
1564 pr
= sanitized_Request(info_dict
['url'])
1565 self
.cookiejar
.add_cookie_header(pr
)
1566 return pr
.get_header('Cookie')
1568 def process_video_result(self
, info_dict
, download
=True):
1569 assert info_dict
.get('_type', 'video') == 'video'
1571 if 'id' not in info_dict
:
1572 raise ExtractorError('Missing "id" field in extractor result')
1573 if 'title' not in info_dict
:
1574 raise ExtractorError('Missing "title" field in extractor result')
1576 def report_force_conversion(field
, field_not
, conversion
):
1577 self
.report_warning(
1578 '"%s" field is not %s - forcing %s conversion, there is an error in extractor'
1579 % (field
, field_not
, conversion
))
1581 def sanitize_string_field(info
, string_field
):
1582 field
= info
.get(string_field
)
1583 if field
is None or isinstance(field
, compat_str
):
1585 report_force_conversion(string_field
, 'a string', 'string')
1586 info
[string_field
] = compat_str(field
)
1588 def sanitize_numeric_fields(info
):
1589 for numeric_field
in self
._NUMERIC
_FIELDS
:
1590 field
= info
.get(numeric_field
)
1591 if field
is None or isinstance(field
, compat_numeric_types
):
1593 report_force_conversion(numeric_field
, 'numeric', 'int')
1594 info
[numeric_field
] = int_or_none(field
)
1596 sanitize_string_field(info_dict
, 'id')
1597 sanitize_numeric_fields(info_dict
)
1599 if 'playlist' not in info_dict
:
1600 # It isn't part of a playlist
1601 info_dict
['playlist'] = None
1602 info_dict
['playlist_index'] = None
1604 thumbnails
= info_dict
.get('thumbnails')
1605 if thumbnails
is None:
1606 thumbnail
= info_dict
.get('thumbnail')
1608 info_dict
['thumbnails'] = thumbnails
= [{'url': thumbnail}
]
1610 thumbnails
.sort(key
=lambda t
: (
1611 t
.get('preference') if t
.get('preference') is not None else -1,
1612 t
.get('width') if t
.get('width') is not None else -1,
1613 t
.get('height') if t
.get('height') is not None else -1,
1614 t
.get('id') if t
.get('id') is not None else '', t
.get('url')))
1615 for i
, t
in enumerate(thumbnails
):
1616 t
['url'] = sanitize_url(t
['url'])
1617 if t
.get('width') and t
.get('height'):
1618 t
['resolution'] = '%dx%d' % (t
['width'], t
['height'])
1619 if t
.get('id') is None:
1622 if self
.params
.get('list_thumbnails'):
1623 self
.list_thumbnails(info_dict
)
1626 thumbnail
= info_dict
.get('thumbnail')
1628 info_dict
['thumbnail'] = sanitize_url(thumbnail
)
1630 info_dict
['thumbnail'] = thumbnails
[-1]['url']
1632 if 'display_id' not in info_dict
and 'id' in info_dict
:
1633 info_dict
['display_id'] = info_dict
['id']
1635 if info_dict
.get('upload_date') is None and info_dict
.get('timestamp') is not None:
1636 # Working around out-of-range timestamp values (e.g. negative ones on Windows,
1637 # see http://bugs.python.org/issue1646728)
1639 upload_date
= datetime
.datetime
.utcfromtimestamp(info_dict
['timestamp'])
1640 info_dict
['upload_date'] = upload_date
.strftime('%Y%m%d')
1641 except (ValueError, OverflowError, OSError):
1644 # Auto generate title fields corresponding to the *_number fields when missing
1645 # in order to always have clean titles. This is very common for TV series.
1646 for field
in ('chapter', 'season', 'episode'):
1647 if info_dict
.get('%s_number' % field
) is not None and not info_dict
.get(field
):
1648 info_dict
[field
] = '%s %d' % (field
.capitalize(), info_dict
['%s_number' % field
])
1650 for cc_kind
in ('subtitles', 'automatic_captions'):
1651 cc
= info_dict
.get(cc_kind
)
1653 for _
, subtitle
in cc
.items():
1654 for subtitle_format
in subtitle
:
1655 if subtitle_format
.get('url'):
1656 subtitle_format
['url'] = sanitize_url(subtitle_format
['url'])
1657 if subtitle_format
.get('ext') is None:
1658 subtitle_format
['ext'] = determine_ext(subtitle_format
['url']).lower()
1660 automatic_captions
= info_dict
.get('automatic_captions')
1661 subtitles
= info_dict
.get('subtitles')
1663 if self
.params
.get('listsubtitles', False):
1664 if 'automatic_captions' in info_dict
:
1665 self
.list_subtitles(
1666 info_dict
['id'], automatic_captions
, 'automatic captions')
1667 self
.list_subtitles(info_dict
['id'], subtitles
, 'subtitles')
1670 info_dict
['requested_subtitles'] = self
.process_subtitles(
1671 info_dict
['id'], subtitles
, automatic_captions
)
1673 # We now pick which formats have to be downloaded
1674 if info_dict
.get('formats') is None:
1675 # There's only one format available
1676 formats
= [info_dict
]
1678 formats
= info_dict
['formats']
1681 raise ExtractorError('No video formats found!')
1683 def is_wellformed(f
):
1686 self
.report_warning(
1687 '"url" field is missing or empty - skipping format, '
1688 'there is an error in extractor')
1690 if isinstance(url
, bytes):
1691 sanitize_string_field(f
, 'url')
1694 # Filter out malformed formats for better extraction robustness
1695 formats
= list(filter(is_wellformed
, formats
))
1699 # We check that all the formats have the format and format_id fields
1700 for i
, format
in enumerate(formats
):
1701 sanitize_string_field(format
, 'format_id')
1702 sanitize_numeric_fields(format
)
1703 format
['url'] = sanitize_url(format
['url'])
1704 if not format
.get('format_id'):
1705 format
['format_id'] = compat_str(i
)
1707 # Sanitize format_id from characters used in format selector expression
1708 format
['format_id'] = re
.sub(r
'[\s,/+\[\]()]', '_', format
['format_id'])
1709 format_id
= format
['format_id']
1710 if format_id
not in formats_dict
:
1711 formats_dict
[format_id
] = []
1712 formats_dict
[format_id
].append(format
)
1714 # Make sure all formats have unique format_id
1715 for format_id
, ambiguous_formats
in formats_dict
.items():
1716 if len(ambiguous_formats
) > 1:
1717 for i
, format
in enumerate(ambiguous_formats
):
1718 format
['format_id'] = '%s-%d' % (format_id
, i
)
1720 for i
, format
in enumerate(formats
):
1721 if format
.get('format') is None:
1722 format
['format'] = '{id} - {res}{note}'.format(
1723 id=format
['format_id'],
1724 res
=self
.format_resolution(format
),
1725 note
=' ({0})'.format(format
['format_note']) if format
.get('format_note') is not None else '',
1727 # Automatically determine file extension if missing
1728 if format
.get('ext') is None:
1729 format
['ext'] = determine_ext(format
['url']).lower()
1730 # Automatically determine protocol if missing (useful for format
1731 # selection purposes)
1732 if format
.get('protocol') is None:
1733 format
['protocol'] = determine_protocol(format
)
1734 # Add HTTP headers, so that external programs can use them from the
1736 full_format_info
= info_dict
.copy()
1737 full_format_info
.update(format
)
1738 format
['http_headers'] = self
._calc
_headers
(full_format_info
)
1739 # Remove private housekeeping stuff
1740 if '__x_forwarded_for_ip' in info_dict
:
1741 del info_dict
['__x_forwarded_for_ip']
1743 # TODO Central sorting goes here
1745 if formats
[0] is not info_dict
:
1746 # only set the 'formats' fields if the original info_dict list them
1747 # otherwise we end up with a circular reference, the first (and unique)
1748 # element in the 'formats' field in info_dict is info_dict itself,
1749 # which can't be exported to json
1750 info_dict
['formats'] = formats
1751 if self
.params
.get('listformats'):
1752 self
.list_formats(info_dict
)
1755 req_format
= self
.params
.get('format')
1756 if req_format
is None:
1757 req_format
= self
._default
_format
_spec
(info_dict
, download
=download
)
1758 if self
.params
.get('verbose'):
1759 self
._write
_string
('[debug] Default format spec: %s\n' % req_format
)
1761 format_selector
= self
.build_format_selector(req_format
)
1763 # While in format selection we may need to have an access to the original
1764 # format set in order to calculate some metrics or do some processing.
1765 # For now we need to be able to guess whether original formats provided
1766 # by extractor are incomplete or not (i.e. whether extractor provides only
1767 # video-only or audio-only formats) for proper formats selection for
1768 # extractors with such incomplete formats (see
1769 # https://github.com/ytdl-org/youtube-dl/pull/5556).
1770 # Since formats may be filtered during format selection and may not match
1771 # the original formats the results may be incorrect. Thus original formats
1772 # or pre-calculated metrics should be passed to format selection routines
1774 # We will pass a context object containing all necessary additional data
1775 # instead of just formats.
1776 # This fixes incorrect format selection issue (see
1777 # https://github.com/ytdl-org/youtube-dl/issues/10083).
1778 incomplete_formats
= (
1779 # All formats are video-only or
1780 all(f
.get('vcodec') != 'none' and f
.get('acodec') == 'none' for f
in formats
)
1781 # all formats are audio-only
1782 or all(f
.get('vcodec') == 'none' and f
.get('acodec') != 'none' for f
in formats
))
1786 'incomplete_formats': incomplete_formats
,
1789 formats_to_download
= list(format_selector(ctx
))
1790 if not formats_to_download
:
1791 raise ExtractorError('requested format not available',
1795 self
.to_screen('[info] Downloading format(s) %s' % ", ".join([f
['format_id'] for f
in formats_to_download
]))
1796 if len(formats_to_download
) > 1:
1797 self
.to_screen('[info] %s: downloading video in %s formats' % (info_dict
['id'], len(formats_to_download
)))
1798 for format
in formats_to_download
:
1799 new_info
= dict(info_dict
)
1800 new_info
.update(format
)
1801 self
.process_info(new_info
)
1802 # We update the info dict with the best quality format (backwards compatibility)
1803 info_dict
.update(formats_to_download
[-1])
1806 def process_subtitles(self
, video_id
, normal_subtitles
, automatic_captions
):
1807 """Select the requested subtitles and their format"""
1809 if normal_subtitles
and self
.params
.get('writesubtitles'):
1810 available_subs
.update(normal_subtitles
)
1811 if automatic_captions
and self
.params
.get('writeautomaticsub'):
1812 for lang
, cap_info
in automatic_captions
.items():
1813 if lang
not in available_subs
:
1814 available_subs
[lang
] = cap_info
1816 if (not self
.params
.get('writesubtitles') and not
1817 self
.params
.get('writeautomaticsub') or not
1821 if self
.params
.get('allsubtitles', False):
1822 requested_langs
= available_subs
.keys()
1824 if self
.params
.get('subtitleslangs', False):
1825 requested_langs
= self
.params
.get('subtitleslangs')
1826 elif 'en' in available_subs
:
1827 requested_langs
= ['en']
1829 requested_langs
= [list(available_subs
.keys())[0]]
1831 formats_query
= self
.params
.get('subtitlesformat', 'best')
1832 formats_preference
= formats_query
.split('/') if formats_query
else []
1834 for lang
in requested_langs
:
1835 formats
= available_subs
.get(lang
)
1837 self
.report_warning('%s subtitles not available for %s' % (lang
, video_id
))
1839 for ext
in formats_preference
:
1843 matches
= list(filter(lambda f
: f
['ext'] == ext
, formats
))
1849 self
.report_warning(
1850 'No subtitle format found matching "%s" for language %s, '
1851 'using %s' % (formats_query
, lang
, f
['ext']))
1855 def __forced_printings(self
, info_dict
, filename
, incomplete
):
1856 def print_mandatory(field
):
1857 if (self
.params
.get('force%s' % field
, False)
1858 and (not incomplete
or info_dict
.get(field
) is not None)):
1859 self
.to_stdout(info_dict
[field
])
1861 def print_optional(field
):
1862 if (self
.params
.get('force%s' % field
, False)
1863 and info_dict
.get(field
) is not None):
1864 self
.to_stdout(info_dict
[field
])
1866 print_mandatory('title')
1867 print_mandatory('id')
1868 if self
.params
.get('forceurl', False) and not incomplete
:
1869 if info_dict
.get('requested_formats') is not None:
1870 for f
in info_dict
['requested_formats']:
1871 self
.to_stdout(f
['url'] + f
.get('play_path', ''))
1873 # For RTMP URLs, also include the playpath
1874 self
.to_stdout(info_dict
['url'] + info_dict
.get('play_path', ''))
1875 print_optional('thumbnail')
1876 print_optional('description')
1877 if self
.params
.get('forcefilename', False) and filename
is not None:
1878 self
.to_stdout(filename
)
1879 if self
.params
.get('forceduration', False) and info_dict
.get('duration') is not None:
1880 self
.to_stdout(formatSeconds(info_dict
['duration']))
1881 print_mandatory('format')
1882 if self
.params
.get('forcejson', False):
1883 self
.to_stdout(json
.dumps(info_dict
))
1885 def process_info(self
, info_dict
):
1886 """Process a single resolved IE result."""
1888 assert info_dict
.get('_type', 'video') == 'video'
1890 max_downloads
= self
.params
.get('max_downloads')
1891 if max_downloads
is not None:
1892 if self
._num
_downloads
>= int(max_downloads
):
1893 raise MaxDownloadsReached()
1895 # TODO: backward compatibility, to be removed
1896 info_dict
['fulltitle'] = info_dict
['title']
1898 if 'format' not in info_dict
:
1899 info_dict
['format'] = info_dict
['ext']
1901 if self
._match
_entry
(info_dict
, incomplete
=False) is not None:
1904 self
._num
_downloads
+= 1
1906 info_dict
['_filename'] = filename
= self
.prepare_filename(info_dict
)
1909 self
.__forced
_printings
(info_dict
, filename
, incomplete
=False)
1911 if self
.params
.get('simulate', False):
1912 if self
.params
.get('force_write_download_archive', False):
1913 self
.record_download_archive(info_dict
)
1915 # Do nothing else if in simulate mode
1918 if filename
is None:
1921 def ensure_dir_exists(path
):
1923 dn
= os
.path
.dirname(path
)
1924 if dn
and not os
.path
.exists(dn
):
1927 except (OSError, IOError) as err
:
1928 self
.report_error('unable to create directory ' + error_to_compat_str(err
))
1931 if not ensure_dir_exists(sanitize_path(encodeFilename(filename
))):
1934 if self
.params
.get('writedescription', False):
1935 descfn
= replace_extension(filename
, 'description', info_dict
.get('ext'))
1936 if not self
.params
.get('overwrites', True) and os
.path
.exists(encodeFilename(descfn
)):
1937 self
.to_screen('[info] Video description is already present')
1938 elif info_dict
.get('description') is None:
1939 self
.report_warning('There\'s no description to write.')
1942 self
.to_screen('[info] Writing video description to: ' + descfn
)
1943 with io
.open(encodeFilename(descfn
), 'w', encoding
='utf-8') as descfile
:
1944 descfile
.write(info_dict
['description'])
1945 except (OSError, IOError):
1946 self
.report_error('Cannot write description file ' + descfn
)
1949 if self
.params
.get('writeannotations', False):
1950 annofn
= replace_extension(filename
, 'annotations.xml', info_dict
.get('ext'))
1951 if not self
.params
.get('overwrites', True) and os
.path
.exists(encodeFilename(annofn
)):
1952 self
.to_screen('[info] Video annotations are already present')
1953 elif not info_dict
.get('annotations'):
1954 self
.report_warning('There are no annotations to write.')
1957 self
.to_screen('[info] Writing video annotations to: ' + annofn
)
1958 with io
.open(encodeFilename(annofn
), 'w', encoding
='utf-8') as annofile
:
1959 annofile
.write(info_dict
['annotations'])
1960 except (KeyError, TypeError):
1961 self
.report_warning('There are no annotations to write.')
1962 except (OSError, IOError):
1963 self
.report_error('Cannot write annotations file: ' + annofn
)
1966 def dl(name
, info
, subtitle
=False):
1967 fd
= get_suitable_downloader(info
, self
.params
)(self
, self
.params
)
1968 for ph
in self
._progress
_hooks
:
1969 fd
.add_progress_hook(ph
)
1970 if self
.params
.get('verbose'):
1971 self
.to_screen('[debug] Invoking downloader on %r' % info
.get('url'))
1972 return fd
.download(name
, info
, subtitle
)
1974 subtitles_are_requested
= any([self
.params
.get('writesubtitles', False),
1975 self
.params
.get('writeautomaticsub')])
1977 if subtitles_are_requested
and info_dict
.get('requested_subtitles'):
1978 # subtitles download errors are already managed as troubles in relevant IE
1979 # that way it will silently go on when used with unsupporting IE
1980 subtitles
= info_dict
['requested_subtitles']
1981 # ie = self.get_info_extractor(info_dict['extractor_key'])
1982 for sub_lang
, sub_info
in subtitles
.items():
1983 sub_format
= sub_info
['ext']
1984 sub_filename
= subtitles_filename(filename
, sub_lang
, sub_format
, info_dict
.get('ext'))
1985 if not self
.params
.get('overwrites', True) and os
.path
.exists(encodeFilename(sub_filename
)):
1986 self
.to_screen('[info] Video subtitle %s.%s is already present' % (sub_lang
, sub_format
))
1988 self
.to_screen('[info] Writing video subtitles to: ' + sub_filename
)
1989 if sub_info
.get('data') is not None:
1991 # Use newline='' to prevent conversion of newline characters
1992 # See https://github.com/ytdl-org/youtube-dl/issues/10268
1993 with io
.open(encodeFilename(sub_filename
), 'w', encoding
='utf-8', newline
='') as subfile
:
1994 subfile
.write(sub_info
['data'])
1995 except (OSError, IOError):
1996 self
.report_error('Cannot write subtitles file ' + sub_filename
)
2000 dl(sub_filename
, sub_info
, subtitle
=True)
2002 if self.params.get('sleep_interval_subtitles', False):
2003 dl(sub_filename, sub_info)
2005 sub_data = ie._request_webpage(
2006 sub_info['url'], info_dict['id'], note=False).read()
2007 with io.open(encodeFilename(sub_filename), 'wb') as subfile:
2008 subfile.write(sub_data)
2010 except (ExtractorError
, IOError, OSError, ValueError, compat_urllib_error
.URLError
, compat_http_client
.HTTPException
, socket
.error
) as err
:
2011 self
.report_warning('Unable to download subtitle for "%s": %s' %
2012 (sub_lang
, error_to_compat_str(err
)))
2015 if self
.params
.get('skip_download', False):
2016 if self
.params
.get('convertsubtitles', False):
2017 subconv
= FFmpegSubtitlesConvertorPP(self
, format
=self
.params
.get('convertsubtitles'))
2018 filename_real_ext
= os
.path
.splitext(filename
)[1][1:]
2020 os
.path
.splitext(filename
)[0]
2021 if filename_real_ext
== info_dict
['ext']
2023 afilename
= '%s.%s' % (filename_wo_ext
, self
.params
.get('convertsubtitles'))
2024 if subconv
.available
:
2025 info_dict
.setdefault('__postprocessors', [])
2026 # info_dict['__postprocessors'].append(subconv)
2027 if os
.path
.exists(encodeFilename(afilename
)):
2029 '[download] %s has already been downloaded and '
2030 'converted' % afilename
)
2033 self
.post_process(filename
, info_dict
)
2034 except (PostProcessingError
) as err
:
2035 self
.report_error('postprocessing: %s' % str(err
))
2038 if self
.params
.get('writeinfojson', False):
2039 infofn
= replace_extension(filename
, 'info.json', info_dict
.get('ext'))
2040 if not self
.params
.get('overwrites', True) and os
.path
.exists(encodeFilename(infofn
)):
2041 self
.to_screen('[info] Video description metadata is already present')
2043 self
.to_screen('[info] Writing video description metadata as JSON to: ' + infofn
)
2045 write_json_file(self
.filter_requested_info(info_dict
), infofn
)
2046 except (OSError, IOError):
2047 self
.report_error('Cannot write metadata to JSON file ' + infofn
)
2050 self
._write
_thumbnails
(info_dict
, filename
)
2052 # Write internet shortcut files
2053 url_link
= webloc_link
= desktop_link
= False
2054 if self
.params
.get('writelink', False):
2055 if sys
.platform
== "darwin": # macOS.
2057 elif sys
.platform
.startswith("linux"):
2059 else: # if sys.platform in ['win32', 'cygwin']:
2061 if self
.params
.get('writeurllink', False):
2063 if self
.params
.get('writewebloclink', False):
2065 if self
.params
.get('writedesktoplink', False):
2068 if url_link
or webloc_link
or desktop_link
:
2069 if 'webpage_url' not in info_dict
:
2070 self
.report_error('Cannot write internet shortcut file because the "webpage_url" field is missing in the media information')
2072 ascii_url
= iri_to_uri(info_dict
['webpage_url'])
2074 def _write_link_file(extension
, template
, newline
, embed_filename
):
2075 linkfn
= replace_extension(filename
, extension
, info_dict
.get('ext'))
2076 if self
.params
.get('nooverwrites', False) and os
.path
.exists(encodeFilename(linkfn
)):
2077 self
.to_screen('[info] Internet shortcut is already present')
2080 self
.to_screen('[info] Writing internet shortcut to: ' + linkfn
)
2081 with io
.open(encodeFilename(to_high_limit_path(linkfn
)), 'w', encoding
='utf-8', newline
=newline
) as linkfile
:
2082 template_vars
= {'url': ascii_url}
2084 template_vars
['filename'] = linkfn
[:-(len(extension
) + 1)]
2085 linkfile
.write(template
% template_vars
)
2086 except (OSError, IOError):
2087 self
.report_error('Cannot write internet shortcut ' + linkfn
)
2092 if not _write_link_file('url', DOT_URL_LINK_TEMPLATE
, '\r\n', embed_filename
=False):
2095 if not _write_link_file('webloc', DOT_WEBLOC_LINK_TEMPLATE
, '\n', embed_filename
=False):
2098 if not _write_link_file('desktop', DOT_DESKTOP_LINK_TEMPLATE
, '\n', embed_filename
=True):
2102 must_record_download_archive
= False
2103 if not self
.params
.get('skip_download', False):
2105 if info_dict
.get('requested_formats') is not None:
2108 merger
= FFmpegMergerPP(self
)
2109 if not merger
.available
:
2111 self
.report_warning('You have requested multiple '
2112 'formats but ffmpeg or avconv are not installed.'
2113 ' The formats won\'t be merged.')
2115 postprocessors
= [merger
]
2117 def compatible_formats(formats
):
2118 # TODO: some formats actually allow this (mkv, webm, ogg, mp4), but not all of them.
2119 video_formats
= [format
for format
in formats
if format
.get('vcodec') != 'none']
2120 audio_formats
= [format
for format
in formats
if format
.get('acodec') != 'none']
2121 if len(video_formats
) > 2 or len(audio_formats
) > 2:
2125 exts
= set(format
.get('ext') for format
in formats
)
2127 set(('mp3', 'mp4', 'm4a', 'm4p', 'm4b', 'm4r', 'm4v', 'ismv', 'isma')),
2130 for ext_sets
in COMPATIBLE_EXTS
:
2131 if ext_sets
.issuperset(exts
):
2133 # TODO: Check acodec/vcodec
2136 filename_real_ext
= os
.path
.splitext(filename
)[1][1:]
2138 os
.path
.splitext(filename
)[0]
2139 if filename_real_ext
== info_dict
['ext']
2141 requested_formats
= info_dict
['requested_formats']
2142 if self
.params
.get('merge_output_format') is None and not compatible_formats(requested_formats
):
2143 info_dict
['ext'] = 'mkv'
2144 self
.report_warning(
2145 'Requested formats are incompatible for merge and will be merged into mkv.')
2146 # Ensure filename always has a correct extension for successful merge
2147 filename
= '%s.%s' % (filename_wo_ext
, info_dict
['ext'])
2148 file_exists
= os
.path
.exists(encodeFilename(filename
))
2149 if not self
.params
.get('overwrites', False) and file_exists
:
2151 '[download] %s has already been downloaded and '
2152 'merged' % filename
)
2155 self
.report_file_delete(filename
)
2156 os
.remove(encodeFilename(filename
))
2157 for f
in requested_formats
:
2158 new_info
= dict(info_dict
)
2160 fname
= prepend_extension(
2161 self
.prepare_filename(new_info
),
2162 'f%s' % f
['format_id'], new_info
['ext'])
2163 if not ensure_dir_exists(fname
):
2165 downloaded
.append(fname
)
2166 partial_success
, real_download
= dl(fname
, new_info
)
2167 success
= success
and partial_success
2168 info_dict
['__postprocessors'] = postprocessors
2169 info_dict
['__files_to_merge'] = downloaded
2170 # Even if there were no downloads, it is being merged only now
2171 info_dict
['__real_download'] = True
2173 # Delete existing file with --yes-overwrites
2174 if self
.params
.get('overwrites', False):
2175 if os
.path
.exists(encodeFilename(filename
)):
2176 self
.report_file_delete(filename
)
2177 os
.remove(encodeFilename(filename
))
2178 # Just a single file
2179 success
, real_download
= dl(filename
, info_dict
)
2180 info_dict
['__real_download'] = real_download
2181 except (compat_urllib_error
.URLError
, compat_http_client
.HTTPException
, socket
.error
) as err
:
2182 self
.report_error('unable to download video data: %s' % error_to_compat_str(err
))
2184 except (OSError, IOError) as err
:
2185 raise UnavailableVideoError(err
)
2186 except (ContentTooShortError
, ) as err
:
2187 self
.report_error('content too short (expected %s bytes and served %s)' % (err
.expected
, err
.downloaded
))
2190 if success
and filename
!= '-':
2192 fixup_policy
= self
.params
.get('fixup')
2193 if fixup_policy
is None:
2194 fixup_policy
= 'detect_or_warn'
2196 INSTALL_FFMPEG_MESSAGE
= 'Install ffmpeg or avconv to fix this automatically.'
2198 stretched_ratio
= info_dict
.get('stretched_ratio')
2199 if stretched_ratio
is not None and stretched_ratio
!= 1:
2200 if fixup_policy
== 'warn':
2201 self
.report_warning('%s: Non-uniform pixel ratio (%s)' % (
2202 info_dict
['id'], stretched_ratio
))
2203 elif fixup_policy
== 'detect_or_warn':
2204 stretched_pp
= FFmpegFixupStretchedPP(self
)
2205 if stretched_pp
.available
:
2206 info_dict
.setdefault('__postprocessors', [])
2207 info_dict
['__postprocessors'].append(stretched_pp
)
2209 self
.report_warning(
2210 '%s: Non-uniform pixel ratio (%s). %s'
2211 % (info_dict
['id'], stretched_ratio
, INSTALL_FFMPEG_MESSAGE
))
2213 assert fixup_policy
in ('ignore', 'never')
2215 if (info_dict
.get('requested_formats') is None
2216 and info_dict
.get('container') == 'm4a_dash'):
2217 if fixup_policy
== 'warn':
2218 self
.report_warning(
2219 '%s: writing DASH m4a. '
2220 'Only some players support this container.'
2222 elif fixup_policy
== 'detect_or_warn':
2223 fixup_pp
= FFmpegFixupM4aPP(self
)
2224 if fixup_pp
.available
:
2225 info_dict
.setdefault('__postprocessors', [])
2226 info_dict
['__postprocessors'].append(fixup_pp
)
2228 self
.report_warning(
2229 '%s: writing DASH m4a. '
2230 'Only some players support this container. %s'
2231 % (info_dict
['id'], INSTALL_FFMPEG_MESSAGE
))
2233 assert fixup_policy
in ('ignore', 'never')
2235 if (info_dict
.get('protocol') == 'm3u8_native'
2236 or info_dict
.get('protocol') == 'm3u8'
2237 and self
.params
.get('hls_prefer_native')):
2238 if fixup_policy
== 'warn':
2239 self
.report_warning('%s: malformed AAC bitstream detected.' % (
2241 elif fixup_policy
== 'detect_or_warn':
2242 fixup_pp
= FFmpegFixupM3u8PP(self
)
2243 if fixup_pp
.available
:
2244 info_dict
.setdefault('__postprocessors', [])
2245 info_dict
['__postprocessors'].append(fixup_pp
)
2247 self
.report_warning(
2248 '%s: malformed AAC bitstream detected. %s'
2249 % (info_dict
['id'], INSTALL_FFMPEG_MESSAGE
))
2251 assert fixup_policy
in ('ignore', 'never')
2254 self
.post_process(filename
, info_dict
)
2255 except (PostProcessingError
) as err
:
2256 self
.report_error('postprocessing: %s' % str(err
))
2259 for ph
in self
._post
_hooks
:
2261 except Exception as err
:
2262 self
.report_error('post hooks: %s' % str(err
))
2264 must_record_download_archive
= True
2266 if must_record_download_archive
or self
.params
.get('force_write_download_archive', False):
2267 self
.record_download_archive(info_dict
)
2268 max_downloads
= self
.params
.get('max_downloads')
2269 if max_downloads
is not None and self
._num
_downloads
>= int(max_downloads
):
2270 raise MaxDownloadsReached()
2272 def download(self
, url_list
):
2273 """Download a given list of URLs."""
2274 outtmpl
= self
.params
.get('outtmpl', DEFAULT_OUTTMPL
)
2275 if (len(url_list
) > 1
2277 and '%' not in outtmpl
2278 and self
.params
.get('max_downloads') != 1):
2279 raise SameFileError(outtmpl
)
2281 for url
in url_list
:
2283 # It also downloads the videos
2284 res
= self
.extract_info(
2285 url
, force_generic_extractor
=self
.params
.get('force_generic_extractor', False))
2286 except UnavailableVideoError
:
2287 self
.report_error('unable to download video')
2288 except MaxDownloadsReached
:
2289 self
.to_screen('[info] Maximum number of downloaded files reached')
2291 except ExistingVideoReached
:
2292 self
.to_screen('[info] Encountered a file that is already in the archive, stopping due to --break-on-existing')
2294 except RejectedVideoReached
:
2295 self
.to_screen('[info] Encountered a file that did not match filter, stopping due to --break-on-reject')
2298 if self
.params
.get('dump_single_json', False):
2299 self
.to_stdout(json
.dumps(res
))
2301 return self
._download
_retcode
2303 def download_with_info_file(self
, info_filename
):
2304 with contextlib
.closing(fileinput
.FileInput(
2305 [info_filename
], mode
='r',
2306 openhook
=fileinput
.hook_encoded('utf-8'))) as f
:
2307 # FileInput doesn't have a read method, we can't call json.load
2308 info
= self
.filter_requested_info(json
.loads('\n'.join(f
)))
2310 self
.process_ie_result(info
, download
=True)
2311 except DownloadError
:
2312 webpage_url
= info
.get('webpage_url')
2313 if webpage_url
is not None:
2314 self
.report_warning('The info failed to download, trying with "%s"' % webpage_url
)
2315 return self
.download([webpage_url
])
2318 return self
._download
_retcode
2321 def filter_requested_info(info_dict
):
2323 (k
, v
) for k
, v
in info_dict
.items()
2324 if k
not in ['requested_formats', 'requested_subtitles'])
2326 def post_process(self
, filename
, ie_info
):
2327 """Run all the postprocessors on the given file."""
2328 info
= dict(ie_info
)
2329 info
['filepath'] = filename
2331 if ie_info
.get('__postprocessors') is not None:
2332 pps_chain
.extend(ie_info
['__postprocessors'])
2333 pps_chain
.extend(self
._pps
)
2334 for pp
in pps_chain
:
2335 files_to_delete
= []
2337 files_to_delete
, info
= pp
.run(info
)
2338 except PostProcessingError
as e
:
2339 self
.report_error(e
.msg
)
2340 if files_to_delete
and not self
.params
.get('keepvideo', False):
2341 for old_filename
in set(files_to_delete
):
2342 self
.to_screen('Deleting original file %s (pass -k to keep)' % old_filename
)
2344 os
.remove(encodeFilename(old_filename
))
2345 except (IOError, OSError):
2346 self
.report_warning('Unable to remove downloaded original file')
2348 def _make_archive_id(self
, info_dict
):
2349 video_id
= info_dict
.get('id')
2352 # Future-proof against any change in case
2353 # and backwards compatibility with prior versions
2354 extractor
= info_dict
.get('extractor_key') or info_dict
.get('ie_key') # key in a playlist
2355 if extractor
is None:
2356 url
= str_or_none(info_dict
.get('url'))
2359 # Try to find matching extractor for the URL and take its ie_key
2360 for ie
in self
._ies
:
2361 if ie
.suitable(url
):
2362 extractor
= ie
.ie_key()
2366 return extractor
.lower() + ' ' + video_id
2368 def in_download_archive(self
, info_dict
):
2369 fn
= self
.params
.get('download_archive')
2373 vid_id
= self
._make
_archive
_id
(info_dict
)
2375 return False # Incomplete video information
2377 return vid_id
in self
.archive
2379 def record_download_archive(self
, info_dict
):
2380 fn
= self
.params
.get('download_archive')
2383 vid_id
= self
._make
_archive
_id
(info_dict
)
2385 with locked_file(fn
, 'a', encoding
='utf-8') as archive_file
:
2386 archive_file
.write(vid_id
+ '\n')
2387 self
.archive
.add(vid_id
)
2390 def format_resolution(format
, default
='unknown'):
2391 if format
.get('vcodec') == 'none':
2393 if format
.get('resolution') is not None:
2394 return format
['resolution']
2395 if format
.get('height') is not None:
2396 if format
.get('width') is not None:
2397 res
= '%sx%s' % (format
['width'], format
['height'])
2399 res
= '%sp' % format
['height']
2400 elif format
.get('width') is not None:
2401 res
= '%dx?' % format
['width']
2406 def _format_note(self
, fdict
):
2408 if fdict
.get('ext') in ['f4f', 'f4m']:
2409 res
+= '(unsupported) '
2410 if fdict
.get('language'):
2413 res
+= '[%s] ' % fdict
['language']
2414 if fdict
.get('format_note') is not None:
2415 res
+= fdict
['format_note'] + ' '
2416 if fdict
.get('tbr') is not None:
2417 res
+= '%4dk ' % fdict
['tbr']
2418 if fdict
.get('container') is not None:
2421 res
+= '%s container' % fdict
['container']
2422 if (fdict
.get('vcodec') is not None
2423 and fdict
.get('vcodec') != 'none'):
2426 res
+= fdict
['vcodec']
2427 if fdict
.get('vbr') is not None:
2429 elif fdict
.get('vbr') is not None and fdict
.get('abr') is not None:
2431 if fdict
.get('vbr') is not None:
2432 res
+= '%4dk' % fdict
['vbr']
2433 if fdict
.get('fps') is not None:
2436 res
+= '%sfps' % fdict
['fps']
2437 if fdict
.get('acodec') is not None:
2440 if fdict
['acodec'] == 'none':
2443 res
+= '%-5s' % fdict
['acodec']
2444 elif fdict
.get('abr') is not None:
2448 if fdict
.get('abr') is not None:
2449 res
+= '@%3dk' % fdict
['abr']
2450 if fdict
.get('asr') is not None:
2451 res
+= ' (%5dHz)' % fdict
['asr']
2452 if fdict
.get('filesize') is not None:
2455 res
+= format_bytes(fdict
['filesize'])
2456 elif fdict
.get('filesize_approx') is not None:
2459 res
+= '~' + format_bytes(fdict
['filesize_approx'])
2462 def _format_note_table(self
, f
):
2463 def join_fields(*vargs
):
2464 return ', '.join((val
for val
in vargs
if val
!= ''))
2467 'UNSUPPORTED' if f
.get('ext') in ('f4f', 'f4m') else '',
2468 format_field(f
, 'language', '[%s]'),
2469 format_field(f
, 'format_note'),
2470 format_field(f
, 'container', ignore
=(None, f
.get('ext'))),
2471 format_field(f
, 'asr', '%5dHz'))
2473 def list_formats(self
, info_dict
):
2474 formats
= info_dict
.get('formats', [info_dict
])
2475 new_format
= self
.params
.get('listformats_table', False)
2479 format_field(f
, 'format_id'),
2480 format_field(f
, 'ext'),
2481 self
.format_resolution(f
),
2482 format_field(f
, 'fps', '%d'),
2484 format_field(f
, 'filesize', ' %s', func
=format_bytes
) + format_field(f
, 'filesize_approx', '~%s', func
=format_bytes
),
2485 format_field(f
, 'tbr', '%4dk'),
2486 f
.get('protocol').replace('http_dash_segments', 'dash').replace("native", "n"),
2488 format_field(f
, 'vcodec', default
='unknown').replace('none', ''),
2489 format_field(f
, 'vbr', '%4dk'),
2490 format_field(f
, 'acodec', default
='unknown').replace('none', ''),
2491 format_field(f
, 'abr', '%3dk'),
2492 format_field(f
, 'asr', '%5dHz'),
2493 self
._format
_note
_table
(f
)]
2495 if f
.get('preference') is None or f
['preference'] >= -1000]
2496 header_line
= ['ID', 'EXT', 'RESOLUTION', 'FPS', '|', ' FILESIZE', ' TBR', 'PROTO',
2497 '|', 'VCODEC', ' VBR', 'ACODEC', ' ABR', ' ASR', 'NOTE']
2501 format_field(f
, 'format_id'),
2502 format_field(f
, 'ext'),
2503 self
.format_resolution(f
),
2504 self
._format
_note
(f
)]
2506 if f
.get('preference') is None or f
['preference'] >= -1000]
2507 header_line
= ['format code', 'extension', 'resolution', 'note']
2509 # if len(formats) > 1:
2510 # table[-1][-1] += (' ' if table[-1][-1] else '') + '(best)'
2512 '[info] Available formats for %s:\n%s' % (info_dict
['id'], render_table(
2516 extraGap
=(0 if new_format
else 1),
2517 hideEmpty
=new_format
)))
2519 def list_thumbnails(self
, info_dict
):
2520 thumbnails
= info_dict
.get('thumbnails')
2522 self
.to_screen('[info] No thumbnails present for %s' % info_dict
['id'])
2526 '[info] Thumbnails for %s:' % info_dict
['id'])
2527 self
.to_screen(render_table(
2528 ['ID', 'width', 'height', 'URL'],
2529 [[t
['id'], t
.get('width', 'unknown'), t
.get('height', 'unknown'), t
['url']] for t
in thumbnails
]))
2531 def list_subtitles(self
, video_id
, subtitles
, name
='subtitles'):
2533 self
.to_screen('%s has no %s' % (video_id
, name
))
2536 'Available %s for %s:' % (name
, video_id
))
2537 self
.to_screen(render_table(
2538 ['Language', 'formats'],
2539 [[lang
, ', '.join(f
['ext'] for f
in reversed(formats
))]
2540 for lang
, formats
in subtitles
.items()]))
2542 def urlopen(self
, req
):
2543 """ Start an HTTP download """
2544 if isinstance(req
, compat_basestring
):
2545 req
= sanitized_Request(req
)
2546 return self
._opener
.open(req
, timeout
=self
._socket
_timeout
)
2548 def print_debug_header(self
):
2549 if not self
.params
.get('verbose'):
2552 if type('') is not compat_str
:
2553 # Python 2.6 on SLES11 SP1 (https://github.com/ytdl-org/youtube-dl/issues/3326)
2554 self
.report_warning(
2555 'Your Python is broken! Update to a newer and supported version')
2557 stdout_encoding
= getattr(
2558 sys
.stdout
, 'encoding', 'missing (%s)' % type(sys
.stdout
).__name
__)
2560 '[debug] Encodings: locale %s, fs %s, out %s, pref %s\n' % (
2561 locale
.getpreferredencoding(),
2562 sys
.getfilesystemencoding(),
2564 self
.get_encoding()))
2565 write_string(encoding_str
, encoding
=None)
2567 self
._write
_string
('[debug] yt-dlp version ' + __version__
+ '\n')
2569 self
._write
_string
('[debug] Lazy loading extractors enabled' + '\n')
2571 sp
= subprocess
.Popen(
2572 ['git', 'rev-parse', '--short', 'HEAD'],
2573 stdout
=subprocess
.PIPE
, stderr
=subprocess
.PIPE
,
2574 cwd
=os
.path
.dirname(os
.path
.abspath(__file__
)))
2575 out
, err
= process_communicate_or_kill(sp
)
2576 out
= out
.decode().strip()
2577 if re
.match('[0-9a-f]+', out
):
2578 self
._write
_string
('[debug] Git HEAD: ' + out
+ '\n')
2585 def python_implementation():
2586 impl_name
= platform
.python_implementation()
2587 if impl_name
== 'PyPy' and hasattr(sys
, 'pypy_version_info'):
2588 return impl_name
+ ' version %d.%d.%d' % sys
.pypy_version_info
[:3]
2591 self
._write
_string
('[debug] Python version %s (%s) - %s\n' % (
2592 platform
.python_version(), python_implementation(),
2595 exe_versions
= FFmpegPostProcessor
.get_versions(self
)
2596 exe_versions
['rtmpdump'] = rtmpdump_version()
2597 exe_versions
['phantomjs'] = PhantomJSwrapper
._version
()
2598 exe_str
= ', '.join(
2600 for exe
, v
in sorted(exe_versions
.items())
2605 self
._write
_string
('[debug] exe versions: %s\n' % exe_str
)
2608 for handler
in self
._opener
.handlers
:
2609 if hasattr(handler
, 'proxies'):
2610 proxy_map
.update(handler
.proxies
)
2611 self
._write
_string
('[debug] Proxy map: ' + compat_str(proxy_map
) + '\n')
2613 if self
.params
.get('call_home', False):
2614 ipaddr
= self
.urlopen('https://yt-dl.org/ip').read().decode('utf-8')
2615 self
._write
_string
('[debug] Public IP address: %s\n' % ipaddr
)
2617 latest_version
= self
.urlopen(
2618 'https://yt-dl.org/latest/version').read().decode('utf-8')
2619 if version_tuple(latest_version
) > version_tuple(__version__
):
2620 self
.report_warning(
2621 'You are using an outdated version (newest version: %s)! '
2622 'See https://yt-dl.org/update if you need help updating.' %
2625 def _setup_opener(self
):
2626 timeout_val
= self
.params
.get('socket_timeout')
2627 self
._socket
_timeout
= 600 if timeout_val
is None else float(timeout_val
)
2629 opts_cookiefile
= self
.params
.get('cookiefile')
2630 opts_proxy
= self
.params
.get('proxy')
2632 if opts_cookiefile
is None:
2633 self
.cookiejar
= compat_cookiejar
.CookieJar()
2635 opts_cookiefile
= expand_path(opts_cookiefile
)
2636 self
.cookiejar
= YoutubeDLCookieJar(opts_cookiefile
)
2637 if os
.access(opts_cookiefile
, os
.R_OK
):
2638 self
.cookiejar
.load(ignore_discard
=True, ignore_expires
=True)
2640 cookie_processor
= YoutubeDLCookieProcessor(self
.cookiejar
)
2641 if opts_proxy
is not None:
2642 if opts_proxy
== '':
2645 proxies
= {'http': opts_proxy, 'https': opts_proxy}
2647 proxies
= compat_urllib_request
.getproxies()
2648 # Set HTTPS proxy to HTTP one if given (https://github.com/ytdl-org/youtube-dl/issues/805)
2649 if 'http' in proxies
and 'https' not in proxies
:
2650 proxies
['https'] = proxies
['http']
2651 proxy_handler
= PerRequestProxyHandler(proxies
)
2653 debuglevel
= 1 if self
.params
.get('debug_printtraffic') else 0
2654 https_handler
= make_HTTPS_handler(self
.params
, debuglevel
=debuglevel
)
2655 ydlh
= YoutubeDLHandler(self
.params
, debuglevel
=debuglevel
)
2656 redirect_handler
= YoutubeDLRedirectHandler()
2657 data_handler
= compat_urllib_request_DataHandler()
2659 # When passing our own FileHandler instance, build_opener won't add the
2660 # default FileHandler and allows us to disable the file protocol, which
2661 # can be used for malicious purposes (see
2662 # https://github.com/ytdl-org/youtube-dl/issues/8227)
2663 file_handler
= compat_urllib_request
.FileHandler()
2665 def file_open(*args
, **kwargs
):
2666 raise compat_urllib_error
.URLError('file:// scheme is explicitly disabled in youtube-dlc for security reasons')
2667 file_handler
.file_open
= file_open
2669 opener
= compat_urllib_request
.build_opener(
2670 proxy_handler
, https_handler
, cookie_processor
, ydlh
, redirect_handler
, data_handler
, file_handler
)
2672 # Delete the default user-agent header, which would otherwise apply in
2673 # cases where our custom HTTP handler doesn't come into play
2674 # (See https://github.com/ytdl-org/youtube-dl/issues/1309 for details)
2675 opener
.addheaders
= []
2676 self
._opener
= opener
2678 def encode(self
, s
):
2679 if isinstance(s
, bytes):
2680 return s
# Already encoded
2683 return s
.encode(self
.get_encoding())
2684 except UnicodeEncodeError as err
:
2685 err
.reason
= err
.reason
+ '. Check your system encoding configuration or use the --encoding option.'
2688 def get_encoding(self
):
2689 encoding
= self
.params
.get('encoding')
2690 if encoding
is None:
2691 encoding
= preferredencoding()
2694 def _write_thumbnails(self
, info_dict
, filename
):
2695 if self
.params
.get('writethumbnail', False):
2696 thumbnails
= info_dict
.get('thumbnails')
2698 thumbnails
= [thumbnails
[-1]]
2699 elif self
.params
.get('write_all_thumbnails', False):
2700 thumbnails
= info_dict
.get('thumbnails')
2705 # No thumbnails present, so return immediately
2708 for t
in thumbnails
:
2709 thumb_ext
= determine_ext(t
['url'], 'jpg')
2710 suffix
= '_%s' % t
['id'] if len(thumbnails
) > 1 else ''
2711 thumb_display_id
= '%s ' % t
['id'] if len(thumbnails
) > 1 else ''
2712 t
['filename'] = thumb_filename
= replace_extension(filename
+ suffix
, thumb_ext
, info_dict
.get('ext'))
2714 if not self
.params
.get('overwrites', True) and os
.path
.exists(encodeFilename(thumb_filename
)):
2715 self
.to_screen('[%s] %s: Thumbnail %sis already present' %
2716 (info_dict
['extractor'], info_dict
['id'], thumb_display_id
))
2718 self
.to_screen('[%s] %s: Downloading thumbnail %s...' %
2719 (info_dict
['extractor'], info_dict
['id'], thumb_display_id
))
2721 uf
= self
.urlopen(t
['url'])
2722 with open(encodeFilename(thumb_filename
), 'wb') as thumbf
:
2723 shutil
.copyfileobj(uf
, thumbf
)
2724 self
.to_screen('[%s] %s: Writing thumbnail %sto: %s' %
2725 (info_dict
['extractor'], info_dict
['id'], thumb_display_id
, thumb_filename
))
2726 except (compat_urllib_error
.URLError
, compat_http_client
.HTTPException
, socket
.error
) as err
:
2727 self
.report_warning('Unable to download thumbnail "%s": %s' %
2728 (t
['url'], error_to_compat_str(err
)))