4 from __future__
import absolute_import
, unicode_literals
29 from string
import ascii_letters
34 compat_get_terminal_size
,
40 compat_tokenize_tokenize
,
42 compat_urllib_request
,
43 compat_urllib_request_DataHandler
,
71 PerRequestProxyHandler
,
76 register_socks_protocols
,
87 UnavailableVideoError
,
93 YoutubeDLCookieProcessor
,
95 YoutubeDLRedirectHandler
,
97 from .cache
import Cache
98 from .extractor
import get_info_extractor
, gen_extractor_classes
, _LAZY_LOADER
99 from .extractor
.openload
import PhantomJSwrapper
100 from .downloader
import get_suitable_downloader
101 from .downloader
.rtmp
import rtmpdump_version
102 from .postprocessor
import (
105 FFmpegFixupStretchedPP
,
108 FFmpegSubtitlesConvertorPP
,
111 from .version
import __version__
113 if compat_os_name
== 'nt':
117 class YoutubeDL(object):
120 YoutubeDL objects are the ones responsible of downloading the
121 actual video file and writing it to disk if the user has requested
122 it, among some other tasks. In most cases there should be one per
123 program. As, given a video URL, the downloader doesn't know how to
124 extract all the needed information, task that InfoExtractors do, it
125 has to pass the URL to one of them.
127 For this, YoutubeDL objects have a method that allows
128 InfoExtractors to be registered in a given order. When it is passed
129 a URL, the YoutubeDL object handles it to the first InfoExtractor it
130 finds that reports being able to handle it. The InfoExtractor extracts
131 all the information about the video or videos the URL refers to, and
132 YoutubeDL process the extracted information, possibly using a File
133 Downloader to download the video.
135 YoutubeDL objects accept a lot of parameters. In order not to saturate
136 the object constructor with arguments, it receives a dictionary of
137 options instead. These options are available through the params
138 attribute for the InfoExtractors to use. The YoutubeDL also
139 registers itself as the downloader in charge for the InfoExtractors
140 that are added to it, so this is a "mutual registration".
144 username: Username for authentication purposes.
145 password: Password for authentication purposes.
146 videopassword: Password for accessing a video.
147 ap_mso: Adobe Pass multiple-system operator identifier.
148 ap_username: Multiple-system operator account username.
149 ap_password: Multiple-system operator account password.
150 usenetrc: Use netrc for authentication instead.
151 verbose: Print additional info to stdout.
152 quiet: Do not print messages to stdout.
153 no_warnings: Do not print out anything for warnings.
154 forceurl: Force printing final URL.
155 forcetitle: Force printing title.
156 forceid: Force printing ID.
157 forcethumbnail: Force printing thumbnail URL.
158 forcedescription: Force printing description.
159 forcefilename: Force printing final filename.
160 forceduration: Force printing duration.
161 forcejson: Force printing info_dict as JSON.
162 dump_single_json: Force printing the info_dict of the whole playlist
163 (or video) as a single JSON line.
164 simulate: Do not download the video files.
165 format: Video format code. See options.py for more information.
166 outtmpl: Template for output names.
167 restrictfilenames: Do not allow "&" and spaces in file names.
168 trim_file_name: Limit length of filename (extension excluded).
169 ignoreerrors: Do not stop on download errors.
170 force_generic_extractor: Force downloader to use the generic extractor
171 nooverwrites: Prevent overwriting files.
172 playliststart: Playlist item to start at.
173 playlistend: Playlist item to end at.
174 playlist_items: Specific indices of playlist to download.
175 playlistreverse: Download playlist items in reverse order.
176 playlistrandom: Download playlist items in random order.
177 matchtitle: Download only matching titles.
178 rejecttitle: Reject downloads for matching titles.
179 logger: Log messages to a logging.Logger instance.
180 logtostderr: Log messages to stderr instead of stdout.
181 writedescription: Write the video description to a .description file
182 writeinfojson: Write the video description to a .info.json file
183 writeannotations: Write the video annotations to a .annotations.xml file
184 writethumbnail: Write the thumbnail image to a file
185 write_all_thumbnails: Write all thumbnail formats to files
186 writesubtitles: Write the video subtitles to a file
187 writeautomaticsub: Write the automatically generated subtitles to a file
188 allsubtitles: Downloads all the subtitles of the video
189 (requires writesubtitles or writeautomaticsub)
190 listsubtitles: Lists all available subtitles for the video
191 subtitlesformat: The format code for subtitles
192 subtitleslangs: List of languages of the subtitles to download
193 keepvideo: Keep the video file after post-processing
194 daterange: A DateRange object, download only if the upload_date is in the range.
195 skip_download: Skip the actual download of the video file
196 cachedir: Location of the cache files in the filesystem.
197 False to disable filesystem cache.
198 noplaylist: Download single video instead of a playlist if in doubt.
199 age_limit: An integer representing the user's age in years.
200 Unsuitable videos for the given age are skipped.
201 min_views: An integer representing the minimum view count the video
202 must have in order to not be skipped.
203 Videos without view count information are always
204 downloaded. None for no limit.
205 max_views: An integer representing the maximum view count.
206 Videos that are more popular than that are not
208 Videos without view count information are always
209 downloaded. None for no limit.
210 download_archive: File name of a file where all downloads are recorded.
211 Videos already present in the file are not downloaded
213 cookiefile: File name where cookies should be read from and dumped to.
214 nocheckcertificate:Do not verify SSL certificates
215 prefer_insecure: Use HTTP instead of HTTPS to retrieve information.
216 At the moment, this is only supported by YouTube.
217 proxy: URL of the proxy server to use
218 geo_verification_proxy: URL of the proxy to use for IP address verification
219 on geo-restricted sites.
220 socket_timeout: Time to wait for unresponsive hosts, in seconds
221 bidi_workaround: Work around buggy terminals without bidirectional text
222 support, using fridibi
223 debug_printtraffic:Print out sent and received HTTP traffic
224 include_ads: Download ads as well
225 default_search: Prepend this string if an input url is not valid.
226 'auto' for elaborate guessing
227 encoding: Use this encoding instead of the system-specified.
228 extract_flat: Do not resolve URLs, return the immediate result.
229 Pass in 'in_playlist' to only show this behavior for
231 postprocessors: A list of dictionaries, each with an entry
232 * key: The name of the postprocessor. See
233 youtube_dlc/postprocessor/__init__.py for a list.
234 as well as any further keyword arguments for the
236 progress_hooks: A list of functions that get called on download
237 progress, with a dictionary with the entries
238 * status: One of "downloading", "error", or "finished".
239 Check this first and ignore unknown values.
241 If status is one of "downloading", or "finished", the
242 following properties may also be present:
243 * filename: The final filename (always present)
244 * tmpfilename: The filename we're currently writing to
245 * downloaded_bytes: Bytes on disk
246 * total_bytes: Size of the whole file, None if unknown
247 * total_bytes_estimate: Guess of the eventual file size,
249 * elapsed: The number of seconds since download started.
250 * eta: The estimated time in seconds, None if unknown
251 * speed: The download speed in bytes/second, None if
253 * fragment_index: The counter of the currently
254 downloaded video fragment.
255 * fragment_count: The number of fragments (= individual
256 files that will be merged)
258 Progress hooks are guaranteed to be called at least once
259 (with status "finished") if the download is successful.
260 merge_output_format: Extension to use when merging formats.
261 fixup: Automatically correct known faults of the file.
263 - "never": do nothing
264 - "warn": only emit a warning
265 - "detect_or_warn": check whether we can do anything
266 about it, warn otherwise (default)
267 source_address: Client-side IP address to bind to.
268 call_home: Boolean, true iff we are allowed to contact the
269 youtube-dlc servers for debugging.
270 sleep_interval: Number of seconds to sleep before each download when
271 used alone or a lower bound of a range for randomized
272 sleep before each download (minimum possible number
273 of seconds to sleep) when used along with
275 max_sleep_interval:Upper bound of a range for randomized sleep before each
276 download (maximum possible number of seconds to sleep).
277 Must only be used along with sleep_interval.
278 Actual sleep time will be a random float from range
279 [sleep_interval; max_sleep_interval].
280 listformats: Print an overview of available video formats and exit.
281 list_thumbnails: Print a table of all thumbnails and exit.
282 match_filter: A function that gets called with the info_dict of
284 If it returns a message, the video is ignored.
285 If it returns None, the video is downloaded.
286 match_filter_func in utils.py is one example for this.
287 no_color: Do not emit color codes in output.
288 geo_bypass: Bypass geographic restriction via faking X-Forwarded-For
291 Two-letter ISO 3166-2 country code that will be used for
292 explicit geographic restriction bypassing via faking
293 X-Forwarded-For HTTP header
295 IP range in CIDR notation that will be used similarly to
298 The following options determine which downloader is picked:
299 external_downloader: Executable of the external downloader to call.
300 None or unset for standard (built-in) downloader.
301 hls_prefer_native: Use the native HLS downloader instead of ffmpeg/avconv
302 if True, otherwise use ffmpeg/avconv if False, otherwise
303 use downloader suggested by extractor if None.
305 The following parameters are not used by YoutubeDL itself, they are used by
306 the downloader (see youtube_dlc/downloader/common.py):
307 nopart, updatetime, buffersize, ratelimit, min_filesize, max_filesize, test,
308 noresizebuffer, retries, continuedl, noprogress, consoletitle,
309 xattr_set_filesize, external_downloader_args, hls_use_mpegts,
312 The following options are used by the post processors:
313 prefer_ffmpeg: If False, use avconv instead of ffmpeg if both are available,
314 otherwise prefer ffmpeg.
315 ffmpeg_location: Location of the ffmpeg/avconv binary; either the path
316 to the binary or its containing directory.
317 postprocessor_args: A list of additional command-line arguments for the
320 The following options are used by the Youtube extractor:
321 youtube_include_dash_manifest: If True (default), DASH manifests and related
322 data will be downloaded and processed by extractor.
323 You can reduce network I/O by disabling it if you don't
327 _NUMERIC_FIELDS
= set((
328 'width', 'height', 'tbr', 'abr', 'asr', 'vbr', 'fps', 'filesize', 'filesize_approx',
329 'timestamp', 'upload_year', 'upload_month', 'upload_day',
330 'duration', 'view_count', 'like_count', 'dislike_count', 'repost_count',
331 'average_rating', 'comment_count', 'age_limit',
332 'start_time', 'end_time',
333 'chapter_number', 'season_number', 'episode_number',
334 'track_number', 'disc_number', 'release_year',
341 _download_retcode
= None
342 _num_downloads
= None
345 def __init__(self
, params
=None, auto_init
=True):
346 """Create a FileDownloader object with the given options."""
350 self
._ies
_instances
= {}
352 self
._progress
_hooks
= []
353 self
._download
_retcode
= 0
354 self
._num
_downloads
= 0
355 self
._screen
_file
= [sys
.stdout
, sys
.stderr
][params
.get('logtostderr', False)]
356 self
._err
_file
= sys
.stderr
359 'nocheckcertificate': False,
361 self
.params
.update(params
)
362 self
.cache
= Cache(self
)
365 """Preload the archive, if any is specified"""
366 def preload_download_archive(self
):
367 fn
= self
.params
.get('download_archive')
371 with locked_file(fn
, 'r', encoding
='utf-8') as archive_file
:
372 for line
in archive_file
:
373 self
.archive
.add(line
.strip())
374 except IOError as ioe
:
375 if ioe
.errno
!= errno
.ENOENT
:
380 def check_deprecated(param
, option
, suggestion
):
381 if self
.params
.get(param
) is not None:
383 '%s is deprecated. Use %s instead.' % (option
, suggestion
))
387 if self
.params
.get('verbose'):
388 self
.to_stdout('[debug] Loading archive file %r' % self
.params
.get('download_archive'))
390 preload_download_archive(self
)
392 if check_deprecated('cn_verification_proxy', '--cn-verification-proxy', '--geo-verification-proxy'):
393 if self
.params
.get('geo_verification_proxy') is None:
394 self
.params
['geo_verification_proxy'] = self
.params
['cn_verification_proxy']
396 check_deprecated('autonumber_size', '--autonumber-size', 'output template with %(autonumber)0Nd, where N in the number of digits')
397 check_deprecated('autonumber', '--auto-number', '-o "%(autonumber)s-%(title)s.%(ext)s"')
398 check_deprecated('usetitle', '--title', '-o "%(title)s-%(id)s.%(ext)s"')
400 if params
.get('bidi_workaround', False):
403 master
, slave
= pty
.openpty()
404 width
= compat_get_terminal_size().columns
408 width_args
= ['-w', str(width
)]
410 stdin
=subprocess
.PIPE
,
412 stderr
=self
._err
_file
)
414 self
._output
_process
= subprocess
.Popen(
415 ['bidiv'] + width_args
, **sp_kwargs
418 self
._output
_process
= subprocess
.Popen(
419 ['fribidi', '-c', 'UTF-8'] + width_args
, **sp_kwargs
)
420 self
._output
_channel
= os
.fdopen(master
, 'rb')
421 except OSError as ose
:
422 if ose
.errno
== errno
.ENOENT
:
423 self
.report_warning('Could not find fribidi executable, ignoring --bidi-workaround . Make sure that fribidi is an executable file in one of the directories in your $PATH.')
427 if (sys
.platform
!= 'win32'
428 and sys
.getfilesystemencoding() in ['ascii', 'ANSI_X3.4-1968']
429 and not params
.get('restrictfilenames', False)):
430 # Unicode filesystem API will throw errors (#1474, #13027)
432 'Assuming --restrict-filenames since file system encoding '
433 'cannot encode all characters. '
434 'Set the LC_ALL environment variable to fix this.')
435 self
.params
['restrictfilenames'] = True
437 if isinstance(params
.get('outtmpl'), bytes):
439 'Parameter outtmpl is bytes, but should be a unicode string. '
440 'Put from __future__ import unicode_literals at the top of your code file or consider switching to Python 3.x.')
445 self
.print_debug_header()
446 self
.add_default_info_extractors()
448 for pp_def_raw
in self
.params
.get('postprocessors', []):
449 pp_class
= get_postprocessor(pp_def_raw
['key'])
450 pp_def
= dict(pp_def_raw
)
452 pp
= pp_class(self
, **compat_kwargs(pp_def
))
453 self
.add_post_processor(pp
)
455 for ph
in self
.params
.get('progress_hooks', []):
456 self
.add_progress_hook(ph
)
458 register_socks_protocols()
460 def warn_if_short_id(self
, argv
):
461 # short YouTube ID starting with dash?
463 i
for i
, a
in enumerate(argv
)
464 if re
.match(r
'^-[0-9A-Za-z_-]{10}$', a
)]
468 + [a
for i
, a
in enumerate(argv
) if i
not in idxs
]
469 + ['--'] + [argv
[i
] for i
in idxs
]
472 'Long argument string detected. '
473 'Use -- to separate parameters and URLs, like this:\n%s\n' %
474 args_to_str(correct_argv
))
476 def add_info_extractor(self
, ie
):
477 """Add an InfoExtractor object to the end of the list."""
479 if not isinstance(ie
, type):
480 self
._ies
_instances
[ie
.ie_key()] = ie
481 ie
.set_downloader(self
)
483 def get_info_extractor(self
, ie_key
):
485 Get an instance of an IE with name ie_key, it will try to get one from
486 the _ies list, if there's no instance it will create a new one and add
487 it to the extractor list.
489 ie
= self
._ies
_instances
.get(ie_key
)
491 ie
= get_info_extractor(ie_key
)()
492 self
.add_info_extractor(ie
)
495 def add_default_info_extractors(self
):
497 Add the InfoExtractors returned by gen_extractors to the end of the list
499 for ie
in gen_extractor_classes():
500 self
.add_info_extractor(ie
)
502 def add_post_processor(self
, pp
):
503 """Add a PostProcessor object to the end of the chain."""
505 pp
.set_downloader(self
)
507 def add_progress_hook(self
, ph
):
508 """Add the progress hook (currently only for the file downloader)"""
509 self
._progress
_hooks
.append(ph
)
511 def _bidi_workaround(self
, message
):
512 if not hasattr(self
, '_output_channel'):
515 assert hasattr(self
, '_output_process')
516 assert isinstance(message
, compat_str
)
517 line_count
= message
.count('\n') + 1
518 self
._output
_process
.stdin
.write((message
+ '\n').encode('utf-8'))
519 self
._output
_process
.stdin
.flush()
520 res
= ''.join(self
._output
_channel
.readline().decode('utf-8')
521 for _
in range(line_count
))
522 return res
[:-len('\n')]
524 def to_screen(self
, message
, skip_eol
=False):
525 """Print message to stdout if not in quiet mode."""
526 return self
.to_stdout(message
, skip_eol
, check_quiet
=True)
528 def _write_string(self
, s
, out
=None):
529 write_string(s
, out
=out
, encoding
=self
.params
.get('encoding'))
531 def to_stdout(self
, message
, skip_eol
=False, check_quiet
=False):
532 """Print message to stdout if not in quiet mode."""
533 if self
.params
.get('logger'):
534 self
.params
['logger'].debug(message
)
535 elif not check_quiet
or not self
.params
.get('quiet', False):
536 message
= self
._bidi
_workaround
(message
)
537 terminator
= ['\n', ''][skip_eol
]
538 output
= message
+ terminator
540 self
._write
_string
(output
, self
._screen
_file
)
542 def to_stderr(self
, message
):
543 """Print message to stderr."""
544 assert isinstance(message
, compat_str
)
545 if self
.params
.get('logger'):
546 self
.params
['logger'].error(message
)
548 message
= self
._bidi
_workaround
(message
)
549 output
= message
+ '\n'
550 self
._write
_string
(output
, self
._err
_file
)
552 def to_console_title(self
, message
):
553 if not self
.params
.get('consoletitle', False):
555 if compat_os_name
== 'nt':
556 if ctypes
.windll
.kernel32
.GetConsoleWindow():
557 # c_wchar_p() might not be necessary if `message` is
558 # already of type unicode()
559 ctypes
.windll
.kernel32
.SetConsoleTitleW(ctypes
.c_wchar_p(message
))
560 elif 'TERM' in os
.environ
:
561 self
._write
_string
('\033]0;%s\007' % message
, self
._screen
_file
)
563 def save_console_title(self
):
564 if not self
.params
.get('consoletitle', False):
566 if self
.params
.get('simulate', False):
568 if compat_os_name
!= 'nt' and 'TERM' in os
.environ
:
569 # Save the title on stack
570 self
._write
_string
('\033[22;0t', self
._screen
_file
)
572 def restore_console_title(self
):
573 if not self
.params
.get('consoletitle', False):
575 if self
.params
.get('simulate', False):
577 if compat_os_name
!= 'nt' and 'TERM' in os
.environ
:
578 # Restore the title from stack
579 self
._write
_string
('\033[23;0t', self
._screen
_file
)
582 self
.save_console_title()
585 def __exit__(self
, *args
):
586 self
.restore_console_title()
588 if self
.params
.get('cookiefile') is not None:
589 self
.cookiejar
.save(ignore_discard
=True, ignore_expires
=True)
591 def trouble(self
, message
=None, tb
=None):
592 """Determine action to take when a download problem appears.
594 Depending on if the downloader has been configured to ignore
595 download errors or not, this method may throw an exception or
596 not when errors are found, after printing the message.
598 tb, if given, is additional traceback information.
600 if message
is not None:
601 self
.to_stderr(message
)
602 if self
.params
.get('verbose'):
604 if sys
.exc_info()[0]: # if .trouble has been called from an except block
606 if hasattr(sys
.exc_info()[1], 'exc_info') and sys
.exc_info()[1].exc_info
[0]:
607 tb
+= ''.join(traceback
.format_exception(*sys
.exc_info()[1].exc_info
))
608 tb
+= encode_compat_str(traceback
.format_exc())
610 tb_data
= traceback
.format_list(traceback
.extract_stack())
611 tb
= ''.join(tb_data
)
613 if not self
.params
.get('ignoreerrors', False):
614 if sys
.exc_info()[0] and hasattr(sys
.exc_info()[1], 'exc_info') and sys
.exc_info()[1].exc_info
[0]:
615 exc_info
= sys
.exc_info()[1].exc_info
617 exc_info
= sys
.exc_info()
618 raise DownloadError(message
, exc_info
)
619 self
._download
_retcode
= 1
621 def report_warning(self
, message
):
623 Print the message to stderr, it will be prefixed with 'WARNING:'
624 If stderr is a tty file the 'WARNING:' will be colored
626 if self
.params
.get('logger') is not None:
627 self
.params
['logger'].warning(message
)
629 if self
.params
.get('no_warnings'):
631 if not self
.params
.get('no_color') and self
._err
_file
.isatty() and compat_os_name
!= 'nt':
632 _msg_header
= '\033[0;33mWARNING:\033[0m'
634 _msg_header
= 'WARNING:'
635 warning_message
= '%s %s' % (_msg_header
, message
)
636 self
.to_stderr(warning_message
)
638 def report_error(self
, message
, tb
=None):
640 Do the same as trouble, but prefixes the message with 'ERROR:', colored
641 in red if stderr is a tty file.
643 if not self
.params
.get('no_color') and self
._err
_file
.isatty() and compat_os_name
!= 'nt':
644 _msg_header
= '\033[0;31mERROR:\033[0m'
646 _msg_header
= 'ERROR:'
647 error_message
= '%s %s' % (_msg_header
, message
)
648 self
.trouble(error_message
, tb
)
650 def report_file_already_downloaded(self
, file_name
):
651 """Report file has already been fully downloaded."""
653 self
.to_screen('[download] %s has already been downloaded' % file_name
)
654 except UnicodeEncodeError:
655 self
.to_screen('[download] The file has already been downloaded')
657 def prepare_filename(self
, info_dict
):
658 """Generate the output filename."""
660 template_dict
= dict(info_dict
)
662 template_dict
['epoch'] = int(time
.time())
663 autonumber_size
= self
.params
.get('autonumber_size')
664 if autonumber_size
is None:
666 template_dict
['autonumber'] = self
.params
.get('autonumber_start', 1) - 1 + self
._num
_downloads
667 if template_dict
.get('resolution') is None:
668 if template_dict
.get('width') and template_dict
.get('height'):
669 template_dict
['resolution'] = '%dx%d' % (template_dict
['width'], template_dict
['height'])
670 elif template_dict
.get('height'):
671 template_dict
['resolution'] = '%sp' % template_dict
['height']
672 elif template_dict
.get('width'):
673 template_dict
['resolution'] = '%dx?' % template_dict
['width']
675 sanitize
= lambda k
, v
: sanitize_filename(
677 restricted
=self
.params
.get('restrictfilenames'),
678 is_id
=(k
== 'id' or k
.endswith('_id')))
679 template_dict
= dict((k
, v
if isinstance(v
, compat_numeric_types
) else sanitize(k
, v
))
680 for k
, v
in template_dict
.items()
681 if v
is not None and not isinstance(v
, (list, tuple, dict)))
682 template_dict
= collections
.defaultdict(lambda: 'NA', template_dict
)
684 outtmpl
= self
.params
.get('outtmpl', DEFAULT_OUTTMPL
)
686 # For fields playlist_index and autonumber convert all occurrences
687 # of %(field)s to %(field)0Nd for backward compatibility
688 field_size_compat_map
= {
689 'playlist_index': len(str(template_dict
['n_entries'])),
690 'autonumber': autonumber_size
,
692 FIELD_SIZE_COMPAT_RE
= r
'(?<!%)%\((?P<field>autonumber|playlist_index)\)s'
693 mobj
= re
.search(FIELD_SIZE_COMPAT_RE
, outtmpl
)
696 FIELD_SIZE_COMPAT_RE
,
697 r
'%%(\1)0%dd' % field_size_compat_map
[mobj
.group('field')],
700 # Missing numeric fields used together with integer presentation types
701 # in format specification will break the argument substitution since
702 # string 'NA' is returned for missing fields. We will patch output
703 # template for missing fields to meet string presentation type.
704 for numeric_field
in self
._NUMERIC
_FIELDS
:
705 if numeric_field
not in template_dict
:
706 # As of [1] format syntax is:
707 # %[mapping_key][conversion_flags][minimum_width][.precision][length_modifier]type
708 # 1. https://docs.python.org/2/library/stdtypes.html#string-formatting
712 \({0}\) # mapping key
713 (?:[#0\-+ ]+)? # conversion flags (optional)
714 (?:\d+)? # minimum field width (optional)
715 (?:\.\d+)? # precision (optional)
716 [hlL]? # length modifier (optional)
717 [diouxXeEfFgGcrs%] # conversion type
720 FORMAT_RE
.format(numeric_field
),
721 r
'%({0})s'.format(numeric_field
), outtmpl
)
723 # expand_path translates '%%' into '%' and '$$' into '$'
724 # correspondingly that is not what we want since we need to keep
725 # '%%' intact for template dict substitution step. Working around
726 # with boundary-alike separator hack.
727 sep
= ''.join([random
.choice(ascii_letters
) for _
in range(32)])
728 outtmpl
= outtmpl
.replace('%%', '%{0}%'.format(sep
)).replace('$$', '${0}$'.format(sep
))
730 # outtmpl should be expand_path'ed before template dict substitution
731 # because meta fields may contain env variables we don't want to
732 # be expanded. For example, for outtmpl "%(title)s.%(ext)s" and
733 # title "Hello $PATH", we don't want `$PATH` to be expanded.
734 filename
= expand_path(outtmpl
).replace(sep
, '') % template_dict
736 # https://github.com/blackjack4494/youtube-dlc/issues/85
737 trim_file_name
= self
.params
.get('trim_file_name', False)
739 fn_groups
= filename
.rsplit('.')
742 if len(fn_groups
) > 2:
743 sub_ext
= fn_groups
[-2]
744 filename
= '.'.join(filter(None, [fn_groups
[0][:trim_file_name
], sub_ext
, ext
]))
746 # Temporary fix for #4787
747 # 'Treat' all problem characters by passing filename through preferredencoding
748 # to workaround encoding issues with subprocess on python2 @ Windows
749 if sys
.version_info
< (3, 0) and sys
.platform
== 'win32':
750 filename
= encodeFilename(filename
, True).decode(preferredencoding())
751 return sanitize_path(filename
)
752 except ValueError as err
:
753 self
.report_error('Error in output template: ' + str(err
) + ' (encoding: ' + repr(preferredencoding()) + ')')
756 def _match_entry(self
, info_dict
, incomplete
):
757 """ Returns None if the file should be downloaded """
759 video_title
= info_dict
.get('title', info_dict
.get('id', 'video'))
760 if 'title' in info_dict
:
761 # This can happen when we're just evaluating the playlist
762 title
= info_dict
['title']
763 matchtitle
= self
.params
.get('matchtitle', False)
765 if not re
.search(matchtitle
, title
, re
.IGNORECASE
):
766 return '"' + title
+ '" title did not match pattern "' + matchtitle
+ '"'
767 rejecttitle
= self
.params
.get('rejecttitle', False)
769 if re
.search(rejecttitle
, title
, re
.IGNORECASE
):
770 return '"' + title
+ '" title matched reject pattern "' + rejecttitle
+ '"'
771 date
= info_dict
.get('upload_date')
773 dateRange
= self
.params
.get('daterange', DateRange())
774 if date
not in dateRange
:
775 return '%s upload date is not in range %s' % (date_from_str(date
).isoformat(), dateRange
)
776 view_count
= info_dict
.get('view_count')
777 if view_count
is not None:
778 min_views
= self
.params
.get('min_views')
779 if min_views
is not None and view_count
< min_views
:
780 return 'Skipping %s, because it has not reached minimum view count (%d/%d)' % (video_title
, view_count
, min_views
)
781 max_views
= self
.params
.get('max_views')
782 if max_views
is not None and view_count
> max_views
:
783 return 'Skipping %s, because it has exceeded the maximum view count (%d/%d)' % (video_title
, view_count
, max_views
)
784 if age_restricted(info_dict
.get('age_limit'), self
.params
.get('age_limit')):
785 return 'Skipping "%s" because it is age restricted' % video_title
786 if self
.in_download_archive(info_dict
):
787 return '%s has already been recorded in archive' % video_title
790 match_filter
= self
.params
.get('match_filter')
791 if match_filter
is not None:
792 ret
= match_filter(info_dict
)
799 def add_extra_info(info_dict
, extra_info
):
800 '''Set the keys from extra_info in info dict if they are missing'''
801 for key
, value
in extra_info
.items():
802 info_dict
.setdefault(key
, value
)
804 def extract_info(self
, url
, download
=True, ie_key
=None, info_dict
=None, extra_info
={},
805 process
=True, force_generic_extractor
=False):
807 Returns a list with a dictionary for each video we find.
808 If 'download', also downloads the videos.
809 extra_info is a dict containing the extra values to add to each result
812 if not ie_key
and force_generic_extractor
:
816 ies
= [self
.get_info_extractor(ie_key
)]
821 if not ie
.suitable(url
):
824 ie
= self
.get_info_extractor(ie
.ie_key())
826 self
.report_warning('The program functionality for this site has been marked as broken, '
827 'and will probably not work.')
830 ie_result
= ie
.extract(url
)
831 if ie_result
is None: # Finished already (backwards compatibility; listformats and friends should be moved here)
833 if isinstance(ie_result
, list):
834 # Backwards compatibility: old IE result format
836 '_type': 'compat_list',
837 'entries': ie_result
,
840 if info_dict
.get('id'):
841 ie_result
['id'] = info_dict
['id']
842 if info_dict
.get('title'):
843 ie_result
['title'] = info_dict
['title']
844 self
.add_default_extra_info(ie_result
, ie
, url
)
846 return self
.process_ie_result(ie_result
, download
, extra_info
)
849 except GeoRestrictedError
as e
:
852 msg
+= '\nThis video is available in %s.' % ', '.join(
853 map(ISO3166Utils
.short2full
, e
.countries
))
854 msg
+= '\nYou might want to use a VPN or a proxy server (with --proxy) to workaround.'
855 self
.report_error(msg
)
857 except ExtractorError
as e
: # An error we somewhat expected
858 self
.report_error(compat_str(e
), e
.format_traceback())
860 except MaxDownloadsReached
:
862 except Exception as e
:
863 if self
.params
.get('ignoreerrors', False):
864 self
.report_error(error_to_compat_str(e
), tb
=encode_compat_str(traceback
.format_exc()))
869 self
.report_error('no suitable InfoExtractor for URL %s' % url
)
871 def add_default_extra_info(self
, ie_result
, ie
, url
):
872 self
.add_extra_info(ie_result
, {
873 'extractor': ie
.IE_NAME
,
875 'webpage_url_basename': url_basename(url
),
876 'extractor_key': ie
.ie_key(),
879 def process_ie_result(self
, ie_result
, download
=True, extra_info
={}):
881 Take the result of the ie(may be modified) and resolve all unresolved
882 references (URLs, playlist items).
884 It will also download the videos if 'download'.
885 Returns the resolved ie_result.
887 result_type
= ie_result
.get('_type', 'video')
889 if result_type
in ('url', 'url_transparent'):
890 ie_result
['url'] = sanitize_url(ie_result
['url'])
891 extract_flat
= self
.params
.get('extract_flat', False)
892 if ((extract_flat
== 'in_playlist' and 'playlist' in extra_info
)
893 or extract_flat
is True):
894 self
.__forced
_printings
(
895 ie_result
, self
.prepare_filename(ie_result
),
899 if result_type
== 'video':
900 self
.add_extra_info(ie_result
, extra_info
)
901 return self
.process_video_result(ie_result
, download
=download
)
902 elif result_type
== 'url':
903 # We have to add extra_info to the results because it may be
904 # contained in a playlist
905 return self
.extract_info(ie_result
['url'],
906 download
, info_dict
=ie_result
,
907 ie_key
=ie_result
.get('ie_key'),
908 extra_info
=extra_info
)
909 elif result_type
== 'url_transparent':
910 # Use the information from the embedding page
911 info
= self
.extract_info(
912 ie_result
['url'], ie_key
=ie_result
.get('ie_key'),
913 extra_info
=extra_info
, download
=False, process
=False)
915 # extract_info may return None when ignoreerrors is enabled and
916 # extraction failed with an error, don't crash and return early
921 force_properties
= dict(
922 (k
, v
) for k
, v
in ie_result
.items() if v
is not None)
923 for f
in ('_type', 'url', 'id', 'extractor', 'extractor_key', 'ie_key'):
924 if f
in force_properties
:
925 del force_properties
[f
]
926 new_result
= info
.copy()
927 new_result
.update(force_properties
)
929 # Extracted info may not be a video result (i.e.
930 # info.get('_type', 'video') != video) but rather an url or
931 # url_transparent. In such cases outer metadata (from ie_result)
932 # should be propagated to inner one (info). For this to happen
933 # _type of info should be overridden with url_transparent. This
934 # fixes issue from https://github.com/ytdl-org/youtube-dl/pull/11163.
935 if new_result
.get('_type') == 'url':
936 new_result
['_type'] = 'url_transparent'
938 return self
.process_ie_result(
939 new_result
, download
=download
, extra_info
=extra_info
)
940 elif result_type
in ('playlist', 'multi_video'):
941 # We process each entry in the playlist
942 playlist
= ie_result
.get('title') or ie_result
.get('id')
943 self
.to_screen('[download] Downloading playlist: %s' % playlist
)
945 playlist_results
= []
947 playliststart
= self
.params
.get('playliststart', 1) - 1
948 playlistend
= self
.params
.get('playlistend')
949 # For backwards compatibility, interpret -1 as whole list
950 if playlistend
== -1:
953 playlistitems_str
= self
.params
.get('playlist_items')
955 if playlistitems_str
is not None:
956 def iter_playlistitems(format
):
957 for string_segment
in format
.split(','):
958 if '-' in string_segment
:
959 start
, end
= string_segment
.split('-')
960 for item
in range(int(start
), int(end
) + 1):
963 yield int(string_segment
)
964 playlistitems
= orderedSet(iter_playlistitems(playlistitems_str
))
966 ie_entries
= ie_result
['entries']
968 def make_playlistitems_entries(list_ie_entries
):
969 num_entries
= len(list_ie_entries
)
971 list_ie_entries
[i
- 1] for i
in playlistitems
972 if -num_entries
<= i
- 1 < num_entries
]
974 def report_download(num_entries
):
976 '[%s] playlist %s: Downloading %d videos' %
977 (ie_result
['extractor'], playlist
, num_entries
))
979 if isinstance(ie_entries
, list):
980 n_all_entries
= len(ie_entries
)
982 entries
= make_playlistitems_entries(ie_entries
)
984 entries
= ie_entries
[playliststart
:playlistend
]
985 n_entries
= len(entries
)
987 '[%s] playlist %s: Collected %d video ids (downloading %d of them)' %
988 (ie_result
['extractor'], playlist
, n_all_entries
, n_entries
))
989 elif isinstance(ie_entries
, PagedList
):
992 for item
in playlistitems
:
993 entries
.extend(ie_entries
.getslice(
997 entries
= ie_entries
.getslice(
998 playliststart
, playlistend
)
999 n_entries
= len(entries
)
1000 report_download(n_entries
)
1003 entries
= make_playlistitems_entries(list(itertools
.islice(
1004 ie_entries
, 0, max(playlistitems
))))
1006 entries
= list(itertools
.islice(
1007 ie_entries
, playliststart
, playlistend
))
1008 n_entries
= len(entries
)
1009 report_download(n_entries
)
1011 if self
.params
.get('playlistreverse', False):
1012 entries
= entries
[::-1]
1014 if self
.params
.get('playlistrandom', False):
1015 random
.shuffle(entries
)
1017 x_forwarded_for
= ie_result
.get('__x_forwarded_for_ip')
1019 for i
, entry
in enumerate(entries
, 1):
1020 self
.to_screen('[download] Downloading video %s of %s' % (i
, n_entries
))
1021 # This __x_forwarded_for_ip thing is a bit ugly but requires
1024 entry
['__x_forwarded_for_ip'] = x_forwarded_for
1026 'n_entries': n_entries
,
1027 'playlist': playlist
,
1028 'playlist_id': ie_result
.get('id'),
1029 'playlist_title': ie_result
.get('title'),
1030 'playlist_uploader': ie_result
.get('uploader'),
1031 'playlist_uploader_id': ie_result
.get('uploader_id'),
1032 'playlist_index': playlistitems
[i
- 1] if playlistitems
else i
+ playliststart
,
1033 'extractor': ie_result
['extractor'],
1034 'webpage_url': ie_result
['webpage_url'],
1035 'webpage_url_basename': url_basename(ie_result
['webpage_url']),
1036 'extractor_key': ie_result
['extractor_key'],
1039 reason
= self
._match
_entry
(entry
, incomplete
=True)
1040 if reason
is not None:
1041 self
.to_screen('[download] ' + reason
)
1044 entry_result
= self
.process_ie_result(entry
,
1047 playlist_results
.append(entry_result
)
1048 ie_result
['entries'] = playlist_results
1049 self
.to_screen('[download] Finished downloading playlist: %s' % playlist
)
1051 elif result_type
== 'compat_list':
1052 self
.report_warning(
1053 'Extractor %s returned a compat_list result. '
1054 'It needs to be updated.' % ie_result
.get('extractor'))
1057 self
.add_extra_info(
1060 'extractor': ie_result
['extractor'],
1061 'webpage_url': ie_result
['webpage_url'],
1062 'webpage_url_basename': url_basename(ie_result
['webpage_url']),
1063 'extractor_key': ie_result
['extractor_key'],
1067 ie_result
['entries'] = [
1068 self
.process_ie_result(_fixup(r
), download
, extra_info
)
1069 for r
in ie_result
['entries']
1073 raise Exception('Invalid result type: %s' % result_type
)
1075 def _build_format_filter(self
, filter_spec
):
1076 " Returns a function to filter the formats according to the filter_spec "
1086 operator_rex
= re
.compile(r
'''(?x)\s*
1087 (?P<key>width|height|tbr|abr|vbr|asr|filesize|filesize_approx|fps)
1088 \s*(?P<op>%s)(?P<none_inclusive>\s*\?)?\s*
1089 (?P<value>[0-9.]+(?:[kKmMgGtTpPeEzZyY]i?[Bb]?)?)
1091 ''' % '|'.join(map(re
.escape
, OPERATORS
.keys())))
1092 m
= operator_rex
.search(filter_spec
)
1095 comparison_value
= int(m
.group('value'))
1097 comparison_value
= parse_filesize(m
.group('value'))
1098 if comparison_value
is None:
1099 comparison_value
= parse_filesize(m
.group('value') + 'B')
1100 if comparison_value
is None:
1102 'Invalid value %r in format specification %r' % (
1103 m
.group('value'), filter_spec
))
1104 op
= OPERATORS
[m
.group('op')]
1109 '^=': lambda attr
, value
: attr
.startswith(value
),
1110 '$=': lambda attr
, value
: attr
.endswith(value
),
1111 '*=': lambda attr
, value
: value
in attr
,
1113 str_operator_rex
= re
.compile(r
'''(?x)
1114 \s*(?P<key>ext|acodec|vcodec|container|protocol|format_id)
1115 \s*(?P<negation>!\s*)?(?P<op>%s)(?P<none_inclusive>\s*\?)?
1116 \s*(?P<value>[a-zA-Z0-9._-]+)
1118 ''' % '|'.join(map(re
.escape
, STR_OPERATORS
.keys())))
1119 m
= str_operator_rex
.search(filter_spec
)
1121 comparison_value
= m
.group('value')
1122 str_op
= STR_OPERATORS
[m
.group('op')]
1123 if m
.group('negation'):
1124 op
= lambda attr
, value
: not str_op(attr
, value
)
1129 raise ValueError('Invalid filter specification %r' % filter_spec
)
1132 actual_value
= f
.get(m
.group('key'))
1133 if actual_value
is None:
1134 return m
.group('none_inclusive')
1135 return op(actual_value
, comparison_value
)
1138 def _default_format_spec(self
, info_dict
, download
=True):
1141 merger
= FFmpegMergerPP(self
)
1142 return merger
.available
and merger
.can_merge()
1145 if self
.params
.get('simulate', False):
1149 if self
.params
.get('outtmpl', DEFAULT_OUTTMPL
) == '-':
1151 if info_dict
.get('is_live'):
1157 req_format_list
= ['bestvideo+bestaudio', 'best']
1159 req_format_list
.reverse()
1160 return '/'.join(req_format_list
)
1162 def build_format_selector(self
, format_spec
):
1163 def syntax_error(note
, start
):
1165 'Invalid format specification: '
1166 '{0}\n\t{1}\n\t{2}^'.format(note
, format_spec
, ' ' * start
[1]))
1167 return SyntaxError(message
)
1169 PICKFIRST
= 'PICKFIRST'
1173 FormatSelector
= collections
.namedtuple('FormatSelector', ['type', 'selector', 'filters'])
1175 def _parse_filter(tokens
):
1177 for type, string
, start
, _
, _
in tokens
:
1178 if type == tokenize
.OP
and string
== ']':
1179 return ''.join(filter_parts
)
1181 filter_parts
.append(string
)
1183 def _remove_unused_ops(tokens
):
1184 # Remove operators that we don't use and join them with the surrounding strings
1185 # for example: 'mp4' '-' 'baseline' '-' '16x9' is converted to 'mp4-baseline-16x9'
1186 ALLOWED_OPS
= ('/', '+', ',', '(', ')')
1187 last_string
, last_start
, last_end
, last_line
= None, None, None, None
1188 for type, string
, start
, end
, line
in tokens
:
1189 if type == tokenize
.OP
and string
== '[':
1191 yield tokenize
.NAME
, last_string
, last_start
, last_end
, last_line
1193 yield type, string
, start
, end
, line
1194 # everything inside brackets will be handled by _parse_filter
1195 for type, string
, start
, end
, line
in tokens
:
1196 yield type, string
, start
, end
, line
1197 if type == tokenize
.OP
and string
== ']':
1199 elif type == tokenize
.OP
and string
in ALLOWED_OPS
:
1201 yield tokenize
.NAME
, last_string
, last_start
, last_end
, last_line
1203 yield type, string
, start
, end
, line
1204 elif type in [tokenize
.NAME
, tokenize
.NUMBER
, tokenize
.OP
]:
1206 last_string
= string
1210 last_string
+= string
1212 yield tokenize
.NAME
, last_string
, last_start
, last_end
, last_line
1214 def _parse_format_selection(tokens
, inside_merge
=False, inside_choice
=False, inside_group
=False):
1216 current_selector
= None
1217 for type, string
, start
, _
, _
in tokens
:
1218 # ENCODING is only defined in python 3.x
1219 if type == getattr(tokenize
, 'ENCODING', None):
1221 elif type in [tokenize
.NAME
, tokenize
.NUMBER
]:
1222 current_selector
= FormatSelector(SINGLE
, string
, [])
1223 elif type == tokenize
.OP
:
1225 if not inside_group
:
1226 # ')' will be handled by the parentheses group
1227 tokens
.restore_last_token()
1229 elif inside_merge
and string
in ['/', ',']:
1230 tokens
.restore_last_token()
1232 elif inside_choice
and string
== ',':
1233 tokens
.restore_last_token()
1236 if not current_selector
:
1237 raise syntax_error('"," must follow a format selector', start
)
1238 selectors
.append(current_selector
)
1239 current_selector
= None
1241 if not current_selector
:
1242 raise syntax_error('"/" must follow a format selector', start
)
1243 first_choice
= current_selector
1244 second_choice
= _parse_format_selection(tokens
, inside_choice
=True)
1245 current_selector
= FormatSelector(PICKFIRST
, (first_choice
, second_choice
), [])
1247 if not current_selector
:
1248 current_selector
= FormatSelector(SINGLE
, 'best', [])
1249 format_filter
= _parse_filter(tokens
)
1250 current_selector
.filters
.append(format_filter
)
1252 if current_selector
:
1253 raise syntax_error('Unexpected "("', start
)
1254 group
= _parse_format_selection(tokens
, inside_group
=True)
1255 current_selector
= FormatSelector(GROUP
, group
, [])
1257 if not current_selector
:
1258 raise syntax_error('Unexpected "+"', start
)
1259 selector_1
= current_selector
1260 selector_2
= _parse_format_selection(tokens
, inside_merge
=True)
1262 raise syntax_error('Expected a selector', start
)
1263 current_selector
= FormatSelector(MERGE
, (selector_1
, selector_2
), [])
1265 raise syntax_error('Operator not recognized: "{0}"'.format(string
), start
)
1266 elif type == tokenize
.ENDMARKER
:
1268 if current_selector
:
1269 selectors
.append(current_selector
)
1272 def _build_selector_function(selector
):
1273 if isinstance(selector
, list):
1274 fs
= [_build_selector_function(s
) for s
in selector
]
1276 def selector_function(ctx
):
1278 for format
in f(ctx
):
1280 return selector_function
1281 elif selector
.type == GROUP
:
1282 selector_function
= _build_selector_function(selector
.selector
)
1283 elif selector
.type == PICKFIRST
:
1284 fs
= [_build_selector_function(s
) for s
in selector
.selector
]
1286 def selector_function(ctx
):
1288 picked_formats
= list(f(ctx
))
1290 return picked_formats
1292 elif selector
.type == SINGLE
:
1293 format_spec
= selector
.selector
1295 def selector_function(ctx
):
1296 formats
= list(ctx
['formats'])
1299 if format_spec
== 'all':
1302 elif format_spec
in ['best', 'worst', None]:
1303 format_idx
= 0 if format_spec
== 'worst' else -1
1304 audiovideo_formats
= [
1306 if f
.get('vcodec') != 'none' and f
.get('acodec') != 'none']
1307 if audiovideo_formats
:
1308 yield audiovideo_formats
[format_idx
]
1309 # for extractors with incomplete formats (audio only (soundcloud)
1310 # or video only (imgur)) we will fallback to best/worst
1311 # {video,audio}-only format
1312 elif ctx
['incomplete_formats']:
1313 yield formats
[format_idx
]
1314 elif format_spec
== 'bestaudio':
1317 if f
.get('vcodec') == 'none']
1319 yield audio_formats
[-1]
1320 elif format_spec
== 'worstaudio':
1323 if f
.get('vcodec') == 'none']
1325 yield audio_formats
[0]
1326 elif format_spec
== 'bestvideo':
1329 if f
.get('acodec') == 'none']
1331 yield video_formats
[-1]
1332 elif format_spec
== 'worstvideo':
1335 if f
.get('acodec') == 'none']
1337 yield video_formats
[0]
1339 extensions
= ['mp4', 'flv', 'webm', '3gp', 'm4a', 'mp3', 'ogg', 'aac', 'wav']
1340 if format_spec
in extensions
:
1341 filter_f
= lambda f
: f
['ext'] == format_spec
1343 filter_f
= lambda f
: f
['format_id'] == format_spec
1344 matches
= list(filter(filter_f
, formats
))
1347 elif selector
.type == MERGE
:
1348 def _merge(formats_pair
):
1349 format_1
, format_2
= formats_pair
1352 formats_info
.extend(format_1
.get('requested_formats', (format_1
,)))
1353 formats_info
.extend(format_2
.get('requested_formats', (format_2
,)))
1355 video_fmts
= [fmt_info
for fmt_info
in formats_info
if fmt_info
.get('vcodec') != 'none']
1356 audio_fmts
= [fmt_info
for fmt_info
in formats_info
if fmt_info
.get('acodec') != 'none']
1358 the_only_video
= video_fmts
[0] if len(video_fmts
) == 1 else None
1359 the_only_audio
= audio_fmts
[0] if len(audio_fmts
) == 1 else None
1361 output_ext
= self
.params
.get('merge_output_format')
1364 output_ext
= the_only_video
['ext']
1365 elif the_only_audio
and not video_fmts
:
1366 output_ext
= the_only_audio
['ext']
1371 'requested_formats': formats_info
,
1372 'format': '+'.join(fmt_info
.get('format') for fmt_info
in formats_info
),
1373 'format_id': '+'.join(fmt_info
.get('format_id') for fmt_info
in formats_info
),
1379 'width': the_only_video
.get('width'),
1380 'height': the_only_video
.get('height'),
1381 'resolution': the_only_video
.get('resolution'),
1382 'fps': the_only_video
.get('fps'),
1383 'vcodec': the_only_video
.get('vcodec'),
1384 'vbr': the_only_video
.get('vbr'),
1385 'stretched_ratio': the_only_video
.get('stretched_ratio'),
1390 'acodec': the_only_audio
.get('acodec'),
1391 'abr': the_only_audio
.get('abr'),
1396 selector_1
, selector_2
= map(_build_selector_function
, selector
.selector
)
1398 def selector_function(ctx
):
1399 for pair
in itertools
.product(
1400 selector_1(copy
.deepcopy(ctx
)), selector_2(copy
.deepcopy(ctx
))):
1403 filters
= [self
._build
_format
_filter
(f
) for f
in selector
.filters
]
1405 def final_selector(ctx
):
1406 ctx_copy
= copy
.deepcopy(ctx
)
1407 for _filter
in filters
:
1408 ctx_copy
['formats'] = list(filter(_filter
, ctx_copy
['formats']))
1409 return selector_function(ctx_copy
)
1410 return final_selector
1412 stream
= io
.BytesIO(format_spec
.encode('utf-8'))
1414 tokens
= list(_remove_unused_ops(compat_tokenize_tokenize(stream
.readline
)))
1415 except tokenize
.TokenError
:
1416 raise syntax_error('Missing closing/opening brackets or parenthesis', (0, len(format_spec
)))
1418 class TokenIterator(object):
1419 def __init__(self
, tokens
):
1420 self
.tokens
= tokens
1427 if self
.counter
>= len(self
.tokens
):
1428 raise StopIteration()
1429 value
= self
.tokens
[self
.counter
]
1435 def restore_last_token(self
):
1438 parsed_selector
= _parse_format_selection(iter(TokenIterator(tokens
)))
1439 return _build_selector_function(parsed_selector
)
1441 def _calc_headers(self
, info_dict
):
1442 res
= std_headers
.copy()
1444 add_headers
= info_dict
.get('http_headers')
1446 res
.update(add_headers
)
1448 cookies
= self
._calc
_cookies
(info_dict
)
1450 res
['Cookie'] = cookies
1452 if 'X-Forwarded-For' not in res
:
1453 x_forwarded_for_ip
= info_dict
.get('__x_forwarded_for_ip')
1454 if x_forwarded_for_ip
:
1455 res
['X-Forwarded-For'] = x_forwarded_for_ip
1459 def _calc_cookies(self
, info_dict
):
1460 pr
= sanitized_Request(info_dict
['url'])
1461 self
.cookiejar
.add_cookie_header(pr
)
1462 return pr
.get_header('Cookie')
1464 def process_video_result(self
, info_dict
, download
=True):
1465 assert info_dict
.get('_type', 'video') == 'video'
1467 if 'id' not in info_dict
:
1468 raise ExtractorError('Missing "id" field in extractor result')
1469 if 'title' not in info_dict
:
1470 raise ExtractorError('Missing "title" field in extractor result')
1472 def report_force_conversion(field
, field_not
, conversion
):
1473 self
.report_warning(
1474 '"%s" field is not %s - forcing %s conversion, there is an error in extractor'
1475 % (field
, field_not
, conversion
))
1477 def sanitize_string_field(info
, string_field
):
1478 field
= info
.get(string_field
)
1479 if field
is None or isinstance(field
, compat_str
):
1481 report_force_conversion(string_field
, 'a string', 'string')
1482 info
[string_field
] = compat_str(field
)
1484 def sanitize_numeric_fields(info
):
1485 for numeric_field
in self
._NUMERIC
_FIELDS
:
1486 field
= info
.get(numeric_field
)
1487 if field
is None or isinstance(field
, compat_numeric_types
):
1489 report_force_conversion(numeric_field
, 'numeric', 'int')
1490 info
[numeric_field
] = int_or_none(field
)
1492 sanitize_string_field(info_dict
, 'id')
1493 sanitize_numeric_fields(info_dict
)
1495 if 'playlist' not in info_dict
:
1496 # It isn't part of a playlist
1497 info_dict
['playlist'] = None
1498 info_dict
['playlist_index'] = None
1500 thumbnails
= info_dict
.get('thumbnails')
1501 if thumbnails
is None:
1502 thumbnail
= info_dict
.get('thumbnail')
1504 info_dict
['thumbnails'] = thumbnails
= [{'url': thumbnail}
]
1506 thumbnails
.sort(key
=lambda t
: (
1507 t
.get('preference') if t
.get('preference') is not None else -1,
1508 t
.get('width') if t
.get('width') is not None else -1,
1509 t
.get('height') if t
.get('height') is not None else -1,
1510 t
.get('id') if t
.get('id') is not None else '', t
.get('url')))
1511 for i
, t
in enumerate(thumbnails
):
1512 t
['url'] = sanitize_url(t
['url'])
1513 if t
.get('width') and t
.get('height'):
1514 t
['resolution'] = '%dx%d' % (t
['width'], t
['height'])
1515 if t
.get('id') is None:
1518 if self
.params
.get('list_thumbnails'):
1519 self
.list_thumbnails(info_dict
)
1522 thumbnail
= info_dict
.get('thumbnail')
1524 info_dict
['thumbnail'] = sanitize_url(thumbnail
)
1526 info_dict
['thumbnail'] = thumbnails
[-1]['url']
1528 if 'display_id' not in info_dict
and 'id' in info_dict
:
1529 info_dict
['display_id'] = info_dict
['id']
1531 if info_dict
.get('upload_date') is None and info_dict
.get('timestamp') is not None:
1532 # Working around out-of-range timestamp values (e.g. negative ones on Windows,
1533 # see http://bugs.python.org/issue1646728)
1535 upload_date
= datetime
.datetime
.utcfromtimestamp(info_dict
['timestamp'])
1536 info_dict
['upload_date'] = upload_date
.strftime('%Y%m%d')
1537 except (ValueError, OverflowError, OSError):
1540 # Auto generate title fields corresponding to the *_number fields when missing
1541 # in order to always have clean titles. This is very common for TV series.
1542 for field
in ('chapter', 'season', 'episode'):
1543 if info_dict
.get('%s_number' % field
) is not None and not info_dict
.get(field
):
1544 info_dict
[field
] = '%s %d' % (field
.capitalize(), info_dict
['%s_number' % field
])
1546 for cc_kind
in ('subtitles', 'automatic_captions'):
1547 cc
= info_dict
.get(cc_kind
)
1549 for _
, subtitle
in cc
.items():
1550 for subtitle_format
in subtitle
:
1551 if subtitle_format
.get('url'):
1552 subtitle_format
['url'] = sanitize_url(subtitle_format
['url'])
1553 if subtitle_format
.get('ext') is None:
1554 subtitle_format
['ext'] = determine_ext(subtitle_format
['url']).lower()
1556 automatic_captions
= info_dict
.get('automatic_captions')
1557 subtitles
= info_dict
.get('subtitles')
1559 if self
.params
.get('listsubtitles', False):
1560 if 'automatic_captions' in info_dict
:
1561 self
.list_subtitles(
1562 info_dict
['id'], automatic_captions
, 'automatic captions')
1563 self
.list_subtitles(info_dict
['id'], subtitles
, 'subtitles')
1566 info_dict
['requested_subtitles'] = self
.process_subtitles(
1567 info_dict
['id'], subtitles
, automatic_captions
)
1569 # We now pick which formats have to be downloaded
1570 if info_dict
.get('formats') is None:
1571 # There's only one format available
1572 formats
= [info_dict
]
1574 formats
= info_dict
['formats']
1577 raise ExtractorError('No video formats found!')
1579 def is_wellformed(f
):
1582 self
.report_warning(
1583 '"url" field is missing or empty - skipping format, '
1584 'there is an error in extractor')
1586 if isinstance(url
, bytes):
1587 sanitize_string_field(f
, 'url')
1590 # Filter out malformed formats for better extraction robustness
1591 formats
= list(filter(is_wellformed
, formats
))
1595 # We check that all the formats have the format and format_id fields
1596 for i
, format
in enumerate(formats
):
1597 sanitize_string_field(format
, 'format_id')
1598 sanitize_numeric_fields(format
)
1599 format
['url'] = sanitize_url(format
['url'])
1600 if not format
.get('format_id'):
1601 format
['format_id'] = compat_str(i
)
1603 # Sanitize format_id from characters used in format selector expression
1604 format
['format_id'] = re
.sub(r
'[\s,/+\[\]()]', '_', format
['format_id'])
1605 format_id
= format
['format_id']
1606 if format_id
not in formats_dict
:
1607 formats_dict
[format_id
] = []
1608 formats_dict
[format_id
].append(format
)
1610 # Make sure all formats have unique format_id
1611 for format_id
, ambiguous_formats
in formats_dict
.items():
1612 if len(ambiguous_formats
) > 1:
1613 for i
, format
in enumerate(ambiguous_formats
):
1614 format
['format_id'] = '%s-%d' % (format_id
, i
)
1616 for i
, format
in enumerate(formats
):
1617 if format
.get('format') is None:
1618 format
['format'] = '{id} - {res}{note}'.format(
1619 id=format
['format_id'],
1620 res
=self
.format_resolution(format
),
1621 note
=' ({0})'.format(format
['format_note']) if format
.get('format_note') is not None else '',
1623 # Automatically determine file extension if missing
1624 if format
.get('ext') is None:
1625 format
['ext'] = determine_ext(format
['url']).lower()
1626 # Automatically determine protocol if missing (useful for format
1627 # selection purposes)
1628 if format
.get('protocol') is None:
1629 format
['protocol'] = determine_protocol(format
)
1630 # Add HTTP headers, so that external programs can use them from the
1632 full_format_info
= info_dict
.copy()
1633 full_format_info
.update(format
)
1634 format
['http_headers'] = self
._calc
_headers
(full_format_info
)
1635 # Remove private housekeeping stuff
1636 if '__x_forwarded_for_ip' in info_dict
:
1637 del info_dict
['__x_forwarded_for_ip']
1639 # TODO Central sorting goes here
1641 if formats
[0] is not info_dict
:
1642 # only set the 'formats' fields if the original info_dict list them
1643 # otherwise we end up with a circular reference, the first (and unique)
1644 # element in the 'formats' field in info_dict is info_dict itself,
1645 # which can't be exported to json
1646 info_dict
['formats'] = formats
1647 if self
.params
.get('listformats'):
1648 self
.list_formats(info_dict
)
1651 req_format
= self
.params
.get('format')
1652 if req_format
is None:
1653 req_format
= self
._default
_format
_spec
(info_dict
, download
=download
)
1654 if self
.params
.get('verbose'):
1655 self
.to_stdout('[debug] Default format spec: %s' % req_format
)
1657 format_selector
= self
.build_format_selector(req_format
)
1659 # While in format selection we may need to have an access to the original
1660 # format set in order to calculate some metrics or do some processing.
1661 # For now we need to be able to guess whether original formats provided
1662 # by extractor are incomplete or not (i.e. whether extractor provides only
1663 # video-only or audio-only formats) for proper formats selection for
1664 # extractors with such incomplete formats (see
1665 # https://github.com/ytdl-org/youtube-dl/pull/5556).
1666 # Since formats may be filtered during format selection and may not match
1667 # the original formats the results may be incorrect. Thus original formats
1668 # or pre-calculated metrics should be passed to format selection routines
1670 # We will pass a context object containing all necessary additional data
1671 # instead of just formats.
1672 # This fixes incorrect format selection issue (see
1673 # https://github.com/ytdl-org/youtube-dl/issues/10083).
1674 incomplete_formats
= (
1675 # All formats are video-only or
1676 all(f
.get('vcodec') != 'none' and f
.get('acodec') == 'none' for f
in formats
)
1677 # all formats are audio-only
1678 or all(f
.get('vcodec') == 'none' and f
.get('acodec') != 'none' for f
in formats
))
1682 'incomplete_formats': incomplete_formats
,
1685 formats_to_download
= list(format_selector(ctx
))
1686 if not formats_to_download
:
1687 raise ExtractorError('requested format not available',
1691 if len(formats_to_download
) > 1:
1692 self
.to_screen('[info] %s: downloading video in %s formats' % (info_dict
['id'], len(formats_to_download
)))
1693 for format
in formats_to_download
:
1694 new_info
= dict(info_dict
)
1695 new_info
.update(format
)
1696 self
.process_info(new_info
)
1697 # We update the info dict with the best quality format (backwards compatibility)
1698 info_dict
.update(formats_to_download
[-1])
1701 def process_subtitles(self
, video_id
, normal_subtitles
, automatic_captions
):
1702 """Select the requested subtitles and their format"""
1704 if normal_subtitles
and self
.params
.get('writesubtitles'):
1705 available_subs
.update(normal_subtitles
)
1706 if automatic_captions
and self
.params
.get('writeautomaticsub'):
1707 for lang
, cap_info
in automatic_captions
.items():
1708 if lang
not in available_subs
:
1709 available_subs
[lang
] = cap_info
1711 if (not self
.params
.get('writesubtitles') and not
1712 self
.params
.get('writeautomaticsub') or not
1716 if self
.params
.get('allsubtitles', False):
1717 requested_langs
= available_subs
.keys()
1719 if self
.params
.get('subtitleslangs', False):
1720 requested_langs
= self
.params
.get('subtitleslangs')
1721 elif 'en' in available_subs
:
1722 requested_langs
= ['en']
1724 requested_langs
= [list(available_subs
.keys())[0]]
1726 formats_query
= self
.params
.get('subtitlesformat', 'best')
1727 formats_preference
= formats_query
.split('/') if formats_query
else []
1729 for lang
in requested_langs
:
1730 formats
= available_subs
.get(lang
)
1732 self
.report_warning('%s subtitles not available for %s' % (lang
, video_id
))
1734 for ext
in formats_preference
:
1738 matches
= list(filter(lambda f
: f
['ext'] == ext
, formats
))
1744 self
.report_warning(
1745 'No subtitle format found matching "%s" for language %s, '
1746 'using %s' % (formats_query
, lang
, f
['ext']))
1750 def __forced_printings(self
, info_dict
, filename
, incomplete
):
1751 def print_mandatory(field
):
1752 if (self
.params
.get('force%s' % field
, False)
1753 and (not incomplete
or info_dict
.get(field
) is not None)):
1754 self
.to_stdout(info_dict
[field
])
1756 def print_optional(field
):
1757 if (self
.params
.get('force%s' % field
, False)
1758 and info_dict
.get(field
) is not None):
1759 self
.to_stdout(info_dict
[field
])
1761 print_mandatory('title')
1762 print_mandatory('id')
1763 if self
.params
.get('forceurl', False) and not incomplete
:
1764 if info_dict
.get('requested_formats') is not None:
1765 for f
in info_dict
['requested_formats']:
1766 self
.to_stdout(f
['url'] + f
.get('play_path', ''))
1768 # For RTMP URLs, also include the playpath
1769 self
.to_stdout(info_dict
['url'] + info_dict
.get('play_path', ''))
1770 print_optional('thumbnail')
1771 print_optional('description')
1772 if self
.params
.get('forcefilename', False) and filename
is not None:
1773 self
.to_stdout(filename
)
1774 if self
.params
.get('forceduration', False) and info_dict
.get('duration') is not None:
1775 self
.to_stdout(formatSeconds(info_dict
['duration']))
1776 print_mandatory('format')
1777 if self
.params
.get('forcejson', False):
1778 self
.to_stdout(json
.dumps(info_dict
))
1780 def process_info(self
, info_dict
):
1781 """Process a single resolved IE result."""
1783 assert info_dict
.get('_type', 'video') == 'video'
1785 max_downloads
= self
.params
.get('max_downloads')
1786 if max_downloads
is not None:
1787 if self
._num
_downloads
>= int(max_downloads
):
1788 raise MaxDownloadsReached()
1790 # TODO: backward compatibility, to be removed
1791 info_dict
['fulltitle'] = info_dict
['title']
1793 if 'format' not in info_dict
:
1794 info_dict
['format'] = info_dict
['ext']
1796 reason
= self
._match
_entry
(info_dict
, incomplete
=False)
1797 if reason
is not None:
1798 self
.to_screen('[download] ' + reason
)
1801 self
._num
_downloads
+= 1
1803 info_dict
['_filename'] = filename
= self
.prepare_filename(info_dict
)
1806 self
.__forced
_printings
(info_dict
, filename
, incomplete
=False)
1808 # Do nothing else if in simulate mode
1809 if self
.params
.get('simulate', False):
1812 if filename
is None:
1815 def ensure_dir_exists(path
):
1817 dn
= os
.path
.dirname(path
)
1818 if dn
and not os
.path
.exists(dn
):
1821 except (OSError, IOError) as err
:
1822 self
.report_error('unable to create directory ' + error_to_compat_str(err
))
1825 if not ensure_dir_exists(sanitize_path(encodeFilename(filename
))):
1828 if self
.params
.get('writedescription', False):
1829 descfn
= replace_extension(filename
, 'description', info_dict
.get('ext'))
1830 if self
.params
.get('nooverwrites', False) and os
.path
.exists(encodeFilename(descfn
)):
1831 self
.to_screen('[info] Video description is already present')
1832 elif info_dict
.get('description') is None:
1833 self
.report_warning('There\'s no description to write.')
1836 self
.to_screen('[info] Writing video description to: ' + descfn
)
1837 with io
.open(encodeFilename(descfn
), 'w', encoding
='utf-8') as descfile
:
1838 descfile
.write(info_dict
['description'])
1839 except (OSError, IOError):
1840 self
.report_error('Cannot write description file ' + descfn
)
1843 if self
.params
.get('writeannotations', False):
1844 annofn
= replace_extension(filename
, 'annotations.xml', info_dict
.get('ext'))
1845 if self
.params
.get('nooverwrites', False) and os
.path
.exists(encodeFilename(annofn
)):
1846 self
.to_screen('[info] Video annotations are already present')
1847 elif not info_dict
.get('annotations'):
1848 self
.report_warning('There are no annotations to write.')
1851 self
.to_screen('[info] Writing video annotations to: ' + annofn
)
1852 with io
.open(encodeFilename(annofn
), 'w', encoding
='utf-8') as annofile
:
1853 annofile
.write(info_dict
['annotations'])
1854 except (KeyError, TypeError):
1855 self
.report_warning('There are no annotations to write.')
1856 except (OSError, IOError):
1857 self
.report_error('Cannot write annotations file: ' + annofn
)
1860 def dl(name
, info
, subtitle
=False):
1861 fd
= get_suitable_downloader(info
, self
.params
)(self
, self
.params
)
1862 for ph
in self
._progress
_hooks
:
1863 fd
.add_progress_hook(ph
)
1864 if self
.params
.get('verbose'):
1865 self
.to_stdout('[debug] Invoking downloader on %r' % info
.get('url'))
1866 return fd
.download(name
, info
, subtitle
)
1868 subtitles_are_requested
= any([self
.params
.get('writesubtitles', False),
1869 self
.params
.get('writeautomaticsub')])
1871 if subtitles_are_requested
and info_dict
.get('requested_subtitles'):
1872 # subtitles download errors are already managed as troubles in relevant IE
1873 # that way it will silently go on when used with unsupporting IE
1874 subtitles
= info_dict
['requested_subtitles']
1875 # ie = self.get_info_extractor(info_dict['extractor_key'])
1876 for sub_lang
, sub_info
in subtitles
.items():
1877 sub_format
= sub_info
['ext']
1878 sub_filename
= subtitles_filename(filename
, sub_lang
, sub_format
, info_dict
.get('ext'))
1879 if self
.params
.get('nooverwrites', False) and os
.path
.exists(encodeFilename(sub_filename
)):
1880 self
.to_screen('[info] Video subtitle %s.%s is already present' % (sub_lang
, sub_format
))
1882 self
.to_screen('[info] Writing video subtitles to: ' + sub_filename
)
1883 if sub_info
.get('data') is not None:
1885 # Use newline='' to prevent conversion of newline characters
1886 # See https://github.com/ytdl-org/youtube-dl/issues/10268
1887 with io
.open(encodeFilename(sub_filename
), 'w', encoding
='utf-8', newline
='') as subfile
:
1888 subfile
.write(sub_info
['data'])
1889 except (OSError, IOError):
1890 self
.report_error('Cannot write subtitles file ' + sub_filename
)
1894 dl(sub_filename
, sub_info
, subtitle
=True)
1896 if self.params.get('sleep_interval_subtitles', False):
1897 dl(sub_filename, sub_info)
1899 sub_data = ie._request_webpage(
1900 sub_info['url'], info_dict['id'], note=False).read()
1901 with io.open(encodeFilename(sub_filename), 'wb') as subfile:
1902 subfile.write(sub_data)
1904 except (ExtractorError
, IOError, OSError, ValueError, compat_urllib_error
.URLError
, compat_http_client
.HTTPException
, socket
.error
) as err
:
1905 self
.report_warning('Unable to download subtitle for "%s": %s' %
1906 (sub_lang
, error_to_compat_str(err
)))
1909 if self
.params
.get('skip_download', False):
1910 if self
.params
.get('convertsubtitles', False):
1911 subconv
= FFmpegSubtitlesConvertorPP(self
, format
=self
.params
.get('convertsubtitles'))
1912 filename_real_ext
= os
.path
.splitext(filename
)[1][1:]
1914 os
.path
.splitext(filename
)[0]
1915 if filename_real_ext
== info_dict
['ext']
1917 afilename
= '%s.%s' % (filename_wo_ext
, self
.params
.get('convertsubtitles'))
1918 if subconv
.available
:
1919 info_dict
.setdefault('__postprocessors', [])
1920 # info_dict['__postprocessors'].append(subconv)
1921 if os
.path
.exists(encodeFilename(afilename
)):
1923 '[download] %s has already been downloaded and '
1924 'converted' % afilename
)
1927 self
.post_process(filename
, info_dict
)
1928 except (PostProcessingError
) as err
:
1929 self
.report_error('postprocessing: %s' % str(err
))
1932 if self
.params
.get('writeinfojson', False):
1933 infofn
= replace_extension(filename
, 'info.json', info_dict
.get('ext'))
1934 if self
.params
.get('nooverwrites', False) and os
.path
.exists(encodeFilename(infofn
)):
1935 self
.to_screen('[info] Video description metadata is already present')
1937 self
.to_screen('[info] Writing video description metadata as JSON to: ' + infofn
)
1939 write_json_file(self
.filter_requested_info(info_dict
), infofn
)
1940 except (OSError, IOError):
1941 self
.report_error('Cannot write metadata to JSON file ' + infofn
)
1944 self
._write
_thumbnails
(info_dict
, filename
)
1946 if not self
.params
.get('skip_download', False):
1948 if info_dict
.get('requested_formats') is not None:
1951 merger
= FFmpegMergerPP(self
)
1952 if not merger
.available
:
1954 self
.report_warning('You have requested multiple '
1955 'formats but ffmpeg or avconv are not installed.'
1956 ' The formats won\'t be merged.')
1958 postprocessors
= [merger
]
1960 def compatible_formats(formats
):
1961 # TODO: some formats actually allow this (mkv, webm, ogg, mp4), but not all of them.
1962 video_formats
= [format
for format
in formats
if format
.get('vcodec') != 'none']
1963 audio_formats
= [format
for format
in formats
if format
.get('acodec') != 'none']
1964 if len(video_formats
) > 2 or len(audio_formats
) > 2:
1968 exts
= set(format
.get('ext') for format
in formats
)
1970 set(('mp3', 'mp4', 'm4a', 'm4p', 'm4b', 'm4r', 'm4v', 'ismv', 'isma')),
1973 for ext_sets
in COMPATIBLE_EXTS
:
1974 if ext_sets
.issuperset(exts
):
1976 # TODO: Check acodec/vcodec
1979 filename_real_ext
= os
.path
.splitext(filename
)[1][1:]
1981 os
.path
.splitext(filename
)[0]
1982 if filename_real_ext
== info_dict
['ext']
1984 requested_formats
= info_dict
['requested_formats']
1985 if self
.params
.get('merge_output_format') is None and not compatible_formats(requested_formats
):
1986 info_dict
['ext'] = 'mkv'
1987 self
.report_warning(
1988 'Requested formats are incompatible for merge and will be merged into mkv.')
1989 # Ensure filename always has a correct extension for successful merge
1990 filename
= '%s.%s' % (filename_wo_ext
, info_dict
['ext'])
1991 if os
.path
.exists(encodeFilename(filename
)):
1993 '[download] %s has already been downloaded and '
1994 'merged' % filename
)
1996 for f
in requested_formats
:
1997 new_info
= dict(info_dict
)
1999 fname
= prepend_extension(
2000 self
.prepare_filename(new_info
),
2001 'f%s' % f
['format_id'], new_info
['ext'])
2002 if not ensure_dir_exists(fname
):
2004 downloaded
.append(fname
)
2005 partial_success
= dl(fname
, new_info
)
2006 success
= success
and partial_success
2007 info_dict
['__postprocessors'] = postprocessors
2008 info_dict
['__files_to_merge'] = downloaded
2010 # Just a single file
2011 success
= dl(filename
, info_dict
)
2012 except (compat_urllib_error
.URLError
, compat_http_client
.HTTPException
, socket
.error
) as err
:
2013 self
.report_error('unable to download video data: %s' % error_to_compat_str(err
))
2015 except (OSError, IOError) as err
:
2016 raise UnavailableVideoError(err
)
2017 except (ContentTooShortError
, ) as err
:
2018 self
.report_error('content too short (expected %s bytes and served %s)' % (err
.expected
, err
.downloaded
))
2021 if success
and filename
!= '-':
2023 fixup_policy
= self
.params
.get('fixup')
2024 if fixup_policy
is None:
2025 fixup_policy
= 'detect_or_warn'
2027 INSTALL_FFMPEG_MESSAGE
= 'Install ffmpeg or avconv to fix this automatically.'
2029 stretched_ratio
= info_dict
.get('stretched_ratio')
2030 if stretched_ratio
is not None and stretched_ratio
!= 1:
2031 if fixup_policy
== 'warn':
2032 self
.report_warning('%s: Non-uniform pixel ratio (%s)' % (
2033 info_dict
['id'], stretched_ratio
))
2034 elif fixup_policy
== 'detect_or_warn':
2035 stretched_pp
= FFmpegFixupStretchedPP(self
)
2036 if stretched_pp
.available
:
2037 info_dict
.setdefault('__postprocessors', [])
2038 info_dict
['__postprocessors'].append(stretched_pp
)
2040 self
.report_warning(
2041 '%s: Non-uniform pixel ratio (%s). %s'
2042 % (info_dict
['id'], stretched_ratio
, INSTALL_FFMPEG_MESSAGE
))
2044 assert fixup_policy
in ('ignore', 'never')
2046 if (info_dict
.get('requested_formats') is None
2047 and info_dict
.get('container') == 'm4a_dash'):
2048 if fixup_policy
== 'warn':
2049 self
.report_warning(
2050 '%s: writing DASH m4a. '
2051 'Only some players support this container.'
2053 elif fixup_policy
== 'detect_or_warn':
2054 fixup_pp
= FFmpegFixupM4aPP(self
)
2055 if fixup_pp
.available
:
2056 info_dict
.setdefault('__postprocessors', [])
2057 info_dict
['__postprocessors'].append(fixup_pp
)
2059 self
.report_warning(
2060 '%s: writing DASH m4a. '
2061 'Only some players support this container. %s'
2062 % (info_dict
['id'], INSTALL_FFMPEG_MESSAGE
))
2064 assert fixup_policy
in ('ignore', 'never')
2066 if (info_dict
.get('protocol') == 'm3u8_native'
2067 or info_dict
.get('protocol') == 'm3u8'
2068 and self
.params
.get('hls_prefer_native')):
2069 if fixup_policy
== 'warn':
2070 self
.report_warning('%s: malformed AAC bitstream detected.' % (
2072 elif fixup_policy
== 'detect_or_warn':
2073 fixup_pp
= FFmpegFixupM3u8PP(self
)
2074 if fixup_pp
.available
:
2075 info_dict
.setdefault('__postprocessors', [])
2076 info_dict
['__postprocessors'].append(fixup_pp
)
2078 self
.report_warning(
2079 '%s: malformed AAC bitstream detected. %s'
2080 % (info_dict
['id'], INSTALL_FFMPEG_MESSAGE
))
2082 assert fixup_policy
in ('ignore', 'never')
2085 self
.post_process(filename
, info_dict
)
2086 except (PostProcessingError
) as err
:
2087 self
.report_error('postprocessing: %s' % str(err
))
2089 self
.record_download_archive(info_dict
)
2091 def download(self
, url_list
):
2092 """Download a given list of URLs."""
2093 outtmpl
= self
.params
.get('outtmpl', DEFAULT_OUTTMPL
)
2094 if (len(url_list
) > 1
2096 and '%' not in outtmpl
2097 and self
.params
.get('max_downloads') != 1):
2098 raise SameFileError(outtmpl
)
2100 for url
in url_list
:
2102 # It also downloads the videos
2103 res
= self
.extract_info(
2104 url
, force_generic_extractor
=self
.params
.get('force_generic_extractor', False))
2105 except UnavailableVideoError
:
2106 self
.report_error('unable to download video')
2107 except MaxDownloadsReached
:
2108 self
.to_screen('[info] Maximum number of downloaded files reached.')
2111 if self
.params
.get('dump_single_json', False):
2112 self
.to_stdout(json
.dumps(res
))
2114 return self
._download
_retcode
2116 def download_with_info_file(self
, info_filename
):
2117 with contextlib
.closing(fileinput
.FileInput(
2118 [info_filename
], mode
='r',
2119 openhook
=fileinput
.hook_encoded('utf-8'))) as f
:
2120 # FileInput doesn't have a read method, we can't call json.load
2121 info
= self
.filter_requested_info(json
.loads('\n'.join(f
)))
2123 self
.process_ie_result(info
, download
=True)
2124 except DownloadError
:
2125 webpage_url
= info
.get('webpage_url')
2126 if webpage_url
is not None:
2127 self
.report_warning('The info failed to download, trying with "%s"' % webpage_url
)
2128 return self
.download([webpage_url
])
2131 return self
._download
_retcode
2134 def filter_requested_info(info_dict
):
2136 (k
, v
) for k
, v
in info_dict
.items()
2137 if k
not in ['requested_formats', 'requested_subtitles'])
2139 def post_process(self
, filename
, ie_info
):
2140 """Run all the postprocessors on the given file."""
2141 info
= dict(ie_info
)
2142 info
['filepath'] = filename
2144 if ie_info
.get('__postprocessors') is not None:
2145 pps_chain
.extend(ie_info
['__postprocessors'])
2146 pps_chain
.extend(self
._pps
)
2147 for pp
in pps_chain
:
2148 files_to_delete
= []
2150 files_to_delete
, info
= pp
.run(info
)
2151 except PostProcessingError
as e
:
2152 self
.report_error(e
.msg
)
2153 if files_to_delete
and not self
.params
.get('keepvideo', False):
2154 for old_filename
in set(files_to_delete
):
2155 self
.to_screen('Deleting original file %s (pass -k to keep)' % old_filename
)
2157 os
.remove(encodeFilename(old_filename
))
2158 except (IOError, OSError):
2159 self
.report_warning('Unable to remove downloaded original file')
2161 def _make_archive_id(self
, info_dict
):
2162 video_id
= info_dict
.get('id')
2165 # Future-proof against any change in case
2166 # and backwards compatibility with prior versions
2167 extractor
= info_dict
.get('extractor_key') or info_dict
.get('ie_key') # key in a playlist
2168 if extractor
is None:
2169 url
= str_or_none(info_dict
.get('url'))
2172 # Try to find matching extractor for the URL and take its ie_key
2173 for ie
in self
._ies
:
2174 if ie
.suitable(url
):
2175 extractor
= ie
.ie_key()
2179 return extractor
.lower() + ' ' + video_id
2181 def in_download_archive(self
, info_dict
):
2182 fn
= self
.params
.get('download_archive')
2186 vid_id
= self
._make
_archive
_id
(info_dict
)
2188 return False # Incomplete video information
2190 return vid_id
in self
.archive
2192 def record_download_archive(self
, info_dict
):
2193 fn
= self
.params
.get('download_archive')
2196 vid_id
= self
._make
_archive
_id
(info_dict
)
2198 with locked_file(fn
, 'a', encoding
='utf-8') as archive_file
:
2199 archive_file
.write(vid_id
+ '\n')
2200 self
.archive
.add(vid_id
)
2203 def format_resolution(format
, default
='unknown'):
2204 if format
.get('vcodec') == 'none':
2206 if format
.get('resolution') is not None:
2207 return format
['resolution']
2208 if format
.get('height') is not None:
2209 if format
.get('width') is not None:
2210 res
= '%sx%s' % (format
['width'], format
['height'])
2212 res
= '%sp' % format
['height']
2213 elif format
.get('width') is not None:
2214 res
= '%dx?' % format
['width']
2219 def _format_note(self
, fdict
):
2221 if fdict
.get('ext') in ['f4f', 'f4m']:
2222 res
+= '(unsupported) '
2223 if fdict
.get('language'):
2226 res
+= '[%s] ' % fdict
['language']
2227 if fdict
.get('format_note') is not None:
2228 res
+= fdict
['format_note'] + ' '
2229 if fdict
.get('tbr') is not None:
2230 res
+= '%4dk ' % fdict
['tbr']
2231 if fdict
.get('container') is not None:
2234 res
+= '%s container' % fdict
['container']
2235 if (fdict
.get('vcodec') is not None
2236 and fdict
.get('vcodec') != 'none'):
2239 res
+= fdict
['vcodec']
2240 if fdict
.get('vbr') is not None:
2242 elif fdict
.get('vbr') is not None and fdict
.get('abr') is not None:
2244 if fdict
.get('vbr') is not None:
2245 res
+= '%4dk' % fdict
['vbr']
2246 if fdict
.get('fps') is not None:
2249 res
+= '%sfps' % fdict
['fps']
2250 if fdict
.get('acodec') is not None:
2253 if fdict
['acodec'] == 'none':
2256 res
+= '%-5s' % fdict
['acodec']
2257 elif fdict
.get('abr') is not None:
2261 if fdict
.get('abr') is not None:
2262 res
+= '@%3dk' % fdict
['abr']
2263 if fdict
.get('asr') is not None:
2264 res
+= ' (%5dHz)' % fdict
['asr']
2265 if fdict
.get('filesize') is not None:
2268 res
+= format_bytes(fdict
['filesize'])
2269 elif fdict
.get('filesize_approx') is not None:
2272 res
+= '~' + format_bytes(fdict
['filesize_approx'])
2275 def list_formats(self
, info_dict
):
2276 formats
= info_dict
.get('formats', [info_dict
])
2278 [f
['format_id'], f
['ext'], self
.format_resolution(f
), self
._format
_note
(f
)]
2280 if f
.get('preference') is None or f
['preference'] >= -1000]
2281 if len(formats
) > 1:
2282 table
[-1][-1] += (' ' if table
[-1][-1] else '') + '(best)'
2284 header_line
= ['format code', 'extension', 'resolution', 'note']
2286 '[info] Available formats for %s:\n%s' %
2287 (info_dict
['id'], render_table(header_line
, table
)))
2289 def list_thumbnails(self
, info_dict
):
2290 thumbnails
= info_dict
.get('thumbnails')
2292 self
.to_screen('[info] No thumbnails present for %s' % info_dict
['id'])
2296 '[info] Thumbnails for %s:' % info_dict
['id'])
2297 self
.to_screen(render_table(
2298 ['ID', 'width', 'height', 'URL'],
2299 [[t
['id'], t
.get('width', 'unknown'), t
.get('height', 'unknown'), t
['url']] for t
in thumbnails
]))
2301 def list_subtitles(self
, video_id
, subtitles
, name
='subtitles'):
2303 self
.to_screen('%s has no %s' % (video_id
, name
))
2306 'Available %s for %s:' % (name
, video_id
))
2307 self
.to_screen(render_table(
2308 ['Language', 'formats'],
2309 [[lang
, ', '.join(f
['ext'] for f
in reversed(formats
))]
2310 for lang
, formats
in subtitles
.items()]))
2312 def urlopen(self
, req
):
2313 """ Start an HTTP download """
2314 if isinstance(req
, compat_basestring
):
2315 req
= sanitized_Request(req
)
2316 return self
._opener
.open(req
, timeout
=self
._socket
_timeout
)
2318 def print_debug_header(self
):
2319 if not self
.params
.get('verbose'):
2322 if type('') is not compat_str
:
2323 # Python 2.6 on SLES11 SP1 (https://github.com/ytdl-org/youtube-dl/issues/3326)
2324 self
.report_warning(
2325 'Your Python is broken! Update to a newer and supported version')
2327 stdout_encoding
= getattr(
2328 sys
.stdout
, 'encoding', 'missing (%s)' % type(sys
.stdout
).__name
__)
2330 '[debug] Encodings: locale %s, fs %s, out %s, pref %s\n' % (
2331 locale
.getpreferredencoding(),
2332 sys
.getfilesystemencoding(),
2334 self
.get_encoding()))
2335 write_string(encoding_str
, encoding
=None)
2337 self
._write
_string
('[debug] youtube-dlc version ' + __version__
+ '\n')
2339 self
._write
_string
('[debug] Lazy loading extractors enabled' + '\n')
2341 sp
= subprocess
.Popen(
2342 ['git', 'rev-parse', '--short', 'HEAD'],
2343 stdout
=subprocess
.PIPE
, stderr
=subprocess
.PIPE
,
2344 cwd
=os
.path
.dirname(os
.path
.abspath(__file__
)))
2345 out
, err
= sp
.communicate()
2346 out
= out
.decode().strip()
2347 if re
.match('[0-9a-f]+', out
):
2348 self
._write
_string
('[debug] Git HEAD: ' + out
+ '\n')
2355 def python_implementation():
2356 impl_name
= platform
.python_implementation()
2357 if impl_name
== 'PyPy' and hasattr(sys
, 'pypy_version_info'):
2358 return impl_name
+ ' version %d.%d.%d' % sys
.pypy_version_info
[:3]
2361 self
._write
_string
('[debug] Python version %s (%s) - %s\n' % (
2362 platform
.python_version(), python_implementation(),
2365 exe_versions
= FFmpegPostProcessor
.get_versions(self
)
2366 exe_versions
['rtmpdump'] = rtmpdump_version()
2367 exe_versions
['phantomjs'] = PhantomJSwrapper
._version
()
2368 exe_str
= ', '.join(
2370 for exe
, v
in sorted(exe_versions
.items())
2375 self
._write
_string
('[debug] exe versions: %s\n' % exe_str
)
2378 for handler
in self
._opener
.handlers
:
2379 if hasattr(handler
, 'proxies'):
2380 proxy_map
.update(handler
.proxies
)
2381 self
._write
_string
('[debug] Proxy map: ' + compat_str(proxy_map
) + '\n')
2383 if self
.params
.get('call_home', False):
2384 ipaddr
= self
.urlopen('https://yt-dl.org/ip').read().decode('utf-8')
2385 self
._write
_string
('[debug] Public IP address: %s\n' % ipaddr
)
2386 latest_version
= self
.urlopen(
2387 'https://yt-dl.org/latest/version').read().decode('utf-8')
2388 if version_tuple(latest_version
) > version_tuple(__version__
):
2389 self
.report_warning(
2390 'You are using an outdated version (newest version: %s)! '
2391 'See https://yt-dl.org/update if you need help updating.' %
2394 def _setup_opener(self
):
2395 timeout_val
= self
.params
.get('socket_timeout')
2396 self
._socket
_timeout
= 600 if timeout_val
is None else float(timeout_val
)
2398 opts_cookiefile
= self
.params
.get('cookiefile')
2399 opts_proxy
= self
.params
.get('proxy')
2401 if opts_cookiefile
is None:
2402 self
.cookiejar
= compat_cookiejar
.CookieJar()
2404 opts_cookiefile
= expand_path(opts_cookiefile
)
2405 self
.cookiejar
= YoutubeDLCookieJar(opts_cookiefile
)
2406 if os
.access(opts_cookiefile
, os
.R_OK
):
2407 self
.cookiejar
.load(ignore_discard
=True, ignore_expires
=True)
2409 cookie_processor
= YoutubeDLCookieProcessor(self
.cookiejar
)
2410 if opts_proxy
is not None:
2411 if opts_proxy
== '':
2414 proxies
= {'http': opts_proxy, 'https': opts_proxy}
2416 proxies
= compat_urllib_request
.getproxies()
2417 # Set HTTPS proxy to HTTP one if given (https://github.com/ytdl-org/youtube-dl/issues/805)
2418 if 'http' in proxies
and 'https' not in proxies
:
2419 proxies
['https'] = proxies
['http']
2420 proxy_handler
= PerRequestProxyHandler(proxies
)
2422 debuglevel
= 1 if self
.params
.get('debug_printtraffic') else 0
2423 https_handler
= make_HTTPS_handler(self
.params
, debuglevel
=debuglevel
)
2424 ydlh
= YoutubeDLHandler(self
.params
, debuglevel
=debuglevel
)
2425 redirect_handler
= YoutubeDLRedirectHandler()
2426 data_handler
= compat_urllib_request_DataHandler()
2428 # When passing our own FileHandler instance, build_opener won't add the
2429 # default FileHandler and allows us to disable the file protocol, which
2430 # can be used for malicious purposes (see
2431 # https://github.com/ytdl-org/youtube-dl/issues/8227)
2432 file_handler
= compat_urllib_request
.FileHandler()
2434 def file_open(*args
, **kwargs
):
2435 raise compat_urllib_error
.URLError('file:// scheme is explicitly disabled in youtube-dlc for security reasons')
2436 file_handler
.file_open
= file_open
2438 opener
= compat_urllib_request
.build_opener(
2439 proxy_handler
, https_handler
, cookie_processor
, ydlh
, redirect_handler
, data_handler
, file_handler
)
2441 # Delete the default user-agent header, which would otherwise apply in
2442 # cases where our custom HTTP handler doesn't come into play
2443 # (See https://github.com/ytdl-org/youtube-dl/issues/1309 for details)
2444 opener
.addheaders
= []
2445 self
._opener
= opener
2447 def encode(self
, s
):
2448 if isinstance(s
, bytes):
2449 return s
# Already encoded
2452 return s
.encode(self
.get_encoding())
2453 except UnicodeEncodeError as err
:
2454 err
.reason
= err
.reason
+ '. Check your system encoding configuration or use the --encoding option.'
2457 def get_encoding(self
):
2458 encoding
= self
.params
.get('encoding')
2459 if encoding
is None:
2460 encoding
= preferredencoding()
2463 def _write_thumbnails(self
, info_dict
, filename
):
2464 if self
.params
.get('writethumbnail', False):
2465 thumbnails
= info_dict
.get('thumbnails')
2467 thumbnails
= [thumbnails
[-1]]
2468 elif self
.params
.get('write_all_thumbnails', False):
2469 thumbnails
= info_dict
.get('thumbnails')
2474 # No thumbnails present, so return immediately
2477 for t
in thumbnails
:
2478 thumb_ext
= determine_ext(t
['url'], 'jpg')
2479 suffix
= '_%s' % t
['id'] if len(thumbnails
) > 1 else ''
2480 thumb_display_id
= '%s ' % t
['id'] if len(thumbnails
) > 1 else ''
2481 t
['filename'] = thumb_filename
= os
.path
.splitext(filename
)[0] + suffix
+ '.' + thumb_ext
2483 if self
.params
.get('nooverwrites', False) and os
.path
.exists(encodeFilename(thumb_filename
)):
2484 self
.to_screen('[%s] %s: Thumbnail %sis already present' %
2485 (info_dict
['extractor'], info_dict
['id'], thumb_display_id
))
2487 self
.to_screen('[%s] %s: Downloading thumbnail %s...' %
2488 (info_dict
['extractor'], info_dict
['id'], thumb_display_id
))
2490 uf
= self
.urlopen(t
['url'])
2491 with open(encodeFilename(thumb_filename
), 'wb') as thumbf
:
2492 shutil
.copyfileobj(uf
, thumbf
)
2493 self
.to_screen('[%s] %s: Writing thumbnail %sto: %s' %
2494 (info_dict
['extractor'], info_dict
['id'], thumb_display_id
, thumb_filename
))
2495 except (compat_urllib_error
.URLError
, compat_http_client
.HTTPException
, socket
.error
) as err
:
2496 self
.report_warning('Unable to download thumbnail "%s": %s' %
2497 (t
['url'], error_to_compat_str(err
)))