DEFAULT_OUTTMPL,
determine_ext,
determine_protocol,
+ DOT_DESKTOP_LINK_TEMPLATE,
+ DOT_URL_LINK_TEMPLATE,
+ DOT_WEBLOC_LINK_TEMPLATE,
DownloadError,
encode_compat_str,
encodeFilename,
formatSeconds,
GeoRestrictedError,
int_or_none,
+ iri_to_uri,
ISO3166Utils,
locked_file,
make_HTTPS_handler,
std_headers,
str_or_none,
subtitles_filename,
+ to_high_limit_path,
UnavailableVideoError,
url_basename,
version_tuple,
dump_single_json: Force printing the info_dict of the whole playlist
(or video) as a single JSON line.
simulate: Do not download the video files.
- format: Video format code. See options.py for more information.
+ format: Video format code. see "FORMAT SELECTION" for more details.
+ format_sort: How to sort the video formats. see "Sorting Formats" for more details.
+ format_sort_force: Force the given format_sort. see "Sorting Formats" for more details.
+ allow_multiple_video_streams: Allow multiple video streams to be merged into a single file
+ allow_multiple_audio_streams: Allow multiple audio streams to be merged into a single file
outtmpl: Template for output names.
restrictfilenames: Do not allow "&" and spaces in file names.
trim_file_name: Limit length of filename (extension excluded).
writeannotations: Write the video annotations to a .annotations.xml file
writethumbnail: Write the thumbnail image to a file
write_all_thumbnails: Write all thumbnail formats to files
+ writelink: Write an internet shortcut file, depending on the
+ current platform (.url/.webloc/.desktop)
+ writeurllink: Write a Windows internet shortcut file (.url)
+ writewebloclink: Write a macOS internet shortcut file (.webloc)
+ writedesktoplink: Write a Linux internet shortcut file (.desktop)
writesubtitles: Write the video subtitles to a file
writeautomaticsub: Write the automatically generated subtitles to a file
allsubtitles: Downloads all the subtitles of the video
download_archive: File name of a file where all downloads are recorded.
Videos already present in the file are not downloaded
again.
+ break_on_existing: Stop the download process after attempting to download a file that's
+ in the archive.
cookiefile: File name where cookies should be read from and dumped to.
nocheckcertificate:Do not verify SSL certificates
prefer_insecure: Use HTTP instead of HTTPS to retrieve information.
for key, value in extra_info.items():
info_dict.setdefault(key, value)
- def extract_info(self, url, download=True, ie_key=None, extra_info={},
+ def extract_info(self, url, download=True, ie_key=None, info_dict=None, extra_info={},
process=True, force_generic_extractor=False):
'''
Returns a list with a dictionary for each video we find.
if not ie.suitable(url):
continue
- ie = self.get_info_extractor(ie.ie_key())
+ ie_key = ie.ie_key()
+ ie = self.get_info_extractor(ie_key)
if not ie.working():
self.report_warning('The program functionality for this site has been marked as broken, '
'and will probably not work.')
try:
- ie_result = ie.extract(url)
- if ie_result is None: # Finished already (backwards compatibility; listformats and friends should be moved here)
- break
- if isinstance(ie_result, list):
- # Backwards compatibility: old IE result format
- ie_result = {
- '_type': 'compat_list',
- 'entries': ie_result,
- }
- self.add_default_extra_info(ie_result, ie, url)
- if process:
- return self.process_ie_result(ie_result, download, extra_info)
- else:
- return ie_result
+ temp_id = ie.extract_id(url) if callable(getattr(ie, 'extract_id', None)) else ie._match_id(url)
+ except (AssertionError, IndexError, AttributeError):
+ temp_id = None
+ if temp_id is not None and self.in_download_archive({'id': temp_id, 'ie_key': ie_key}):
+ self.to_screen("[%s] %s: has already been recorded in archive" % (
+ ie_key, temp_id))
+ break
+
+ return self.__extract_info(url, ie, download, extra_info, process, info_dict)
+
+ else:
+ self.report_error('no suitable InfoExtractor for URL %s' % url)
+
+ def __handle_extraction_exceptions(func):
+ def wrapper(self, *args, **kwargs):
+ try:
+ return func(self, *args, **kwargs)
except GeoRestrictedError as e:
msg = e.msg
if e.countries:
map(ISO3166Utils.short2full, e.countries))
msg += '\nYou might want to use a VPN or a proxy server (with --proxy) to workaround.'
self.report_error(msg)
- break
except ExtractorError as e: # An error we somewhat expected
self.report_error(compat_str(e), e.format_traceback())
- break
except MaxDownloadsReached:
raise
except Exception as e:
if self.params.get('ignoreerrors', False):
self.report_error(error_to_compat_str(e), tb=encode_compat_str(traceback.format_exc()))
- break
else:
raise
+ return wrapper
+
+ @__handle_extraction_exceptions
+ def __extract_info(self, url, ie, download, extra_info, process, info_dict):
+ ie_result = ie.extract(url)
+ if ie_result is None: # Finished already (backwards compatibility; listformats and friends should be moved here)
+ return
+ if isinstance(ie_result, list):
+ # Backwards compatibility: old IE result format
+ ie_result = {
+ '_type': 'compat_list',
+ 'entries': ie_result,
+ }
+ if info_dict:
+ if info_dict.get('id'):
+ ie_result['id'] = info_dict['id']
+ if info_dict.get('title'):
+ ie_result['title'] = info_dict['title']
+ self.add_default_extra_info(ie_result, ie, url)
+ if process:
+ return self.process_ie_result(ie_result, download, extra_info)
else:
- self.report_error('no suitable InfoExtractor for URL %s' % url)
+ return ie_result
def add_default_extra_info(self, ie_result, ie, url):
self.add_extra_info(ie_result, {
# We have to add extra_info to the results because it may be
# contained in a playlist
return self.extract_info(ie_result['url'],
- download,
+ download, info_dict=ie_result,
ie_key=ie_result.get('ie_key'),
extra_info=extra_info)
elif result_type == 'url_transparent':
reason = self._match_entry(entry, incomplete=True)
if reason is not None:
- self.to_screen('[download] ' + reason)
- continue
+ if reason.endswith('has already been recorded in the archive') and self.params.get('break_on_existing'):
+ print('[download] tried downloading a file that\'s already in the archive, stopping since --break-on-existing is set.')
+ break
+ else:
+ self.to_screen('[download] ' + reason)
+ continue
- entry_result = self.process_ie_result(entry,
- download=download,
- extra_info=extra)
+ entry_result = self.__process_iterable_entry(entry, download, extra)
+ # TODO: skip failed (empty) entries?
playlist_results.append(entry_result)
ie_result['entries'] = playlist_results
self.to_screen('[download] Finished downloading playlist: %s' % playlist)
else:
raise Exception('Invalid result type: %s' % result_type)
+ @__handle_extraction_exceptions
+ def __process_iterable_entry(self, entry, download, extra_info):
+ return self.process_ie_result(
+ entry, download=download, extra_info=extra_info)
+
def _build_format_filter(self, filter_spec):
" Returns a function to filter the formats according to the filter_spec "
'*=': lambda attr, value: value in attr,
}
str_operator_rex = re.compile(r'''(?x)
- \s*(?P<key>ext|acodec|vcodec|container|protocol|format_id)
+ \s*(?P<key>[a-zA-Z0-9._-]+)
\s*(?P<negation>!\s*)?(?P<op>%s)(?P<none_inclusive>\s*\?)?
\s*(?P<value>[a-zA-Z0-9._-]+)
\s*$
GROUP = 'GROUP'
FormatSelector = collections.namedtuple('FormatSelector', ['type', 'selector', 'filters'])
+ allow_multiple_streams = {'audio': self.params.get('allow_multiple_audio_streams', True),
+ 'video': self.params.get('allow_multiple_video_streams', True)}
+
def _parse_filter(tokens):
filter_parts = []
for type, string, start, _, _ in tokens:
return selectors
def _build_selector_function(selector):
- if isinstance(selector, list):
+ if isinstance(selector, list): # ,
fs = [_build_selector_function(s) for s in selector]
def selector_function(ctx):
for format in f(ctx):
yield format
return selector_function
- elif selector.type == GROUP:
+
+ elif selector.type == GROUP: # ()
selector_function = _build_selector_function(selector.selector)
- elif selector.type == PICKFIRST:
+
+ elif selector.type == PICKFIRST: # /
fs = [_build_selector_function(s) for s in selector.selector]
def selector_function(ctx):
if picked_formats:
return picked_formats
return []
- elif selector.type == SINGLE:
- format_spec = selector.selector
- def selector_function(ctx):
- formats = list(ctx['formats'])
- if not formats:
- return
- if format_spec == 'all':
- for f in formats:
- yield f
- elif format_spec in ['best', 'worst', None]:
- format_idx = 0 if format_spec == 'worst' else -1
- audiovideo_formats = [
- f for f in formats
- if f.get('vcodec') != 'none' and f.get('acodec') != 'none']
- if audiovideo_formats:
- yield audiovideo_formats[format_idx]
- # for extractors with incomplete formats (audio only (soundcloud)
- # or video only (imgur)) we will fallback to best/worst
- # {video,audio}-only format
- elif ctx['incomplete_formats']:
- yield formats[format_idx]
- elif format_spec == 'bestaudio':
- audio_formats = [
- f for f in formats
- if f.get('vcodec') == 'none']
- if audio_formats:
- yield audio_formats[-1]
- elif format_spec == 'worstaudio':
- audio_formats = [
- f for f in formats
- if f.get('vcodec') == 'none']
- if audio_formats:
- yield audio_formats[0]
- elif format_spec == 'bestvideo':
- video_formats = [
- f for f in formats
- if f.get('acodec') == 'none']
- if video_formats:
- yield video_formats[-1]
- elif format_spec == 'worstvideo':
- video_formats = [
- f for f in formats
- if f.get('acodec') == 'none']
- if video_formats:
- yield video_formats[0]
+ elif selector.type == SINGLE: # atom
+ format_spec = selector.selector if selector.selector is not None else 'best'
+
+ if format_spec == 'all':
+ def selector_function(ctx):
+ formats = list(ctx['formats'])
+ if formats:
+ for f in formats:
+ yield f
+
+ else:
+ format_fallback = False
+ format_spec_obj = re.match(r'(best|worst|b|w)(video|audio|v|a)?(\*)?$', format_spec)
+ if format_spec_obj is not None:
+ format_idx = 0 if format_spec_obj.group(1)[0] == 'w' else -1
+ format_type = format_spec_obj.group(2)[0] if format_spec_obj.group(2) else False
+ not_format_type = 'v' if format_type == 'a' else 'a'
+ format_modified = format_spec_obj.group(3) is not None
+
+ format_fallback = not format_type and not format_modified # for b, w
+ filter_f = ((lambda f: f.get(format_type + 'codec') != 'none')
+ if format_type and format_modified # bv*, ba*, wv*, wa*
+ else (lambda f: f.get(not_format_type + 'codec') == 'none')
+ if format_type # bv, ba, wv, wa
+ else (lambda f: f.get('vcodec') != 'none' and f.get('acodec') != 'none')
+ if not format_modified # b, w
+ else None) # b*, w*
else:
- extensions = ['mp4', 'flv', 'webm', '3gp', 'm4a', 'mp3', 'ogg', 'aac', 'wav']
- if format_spec in extensions:
- filter_f = lambda f: f['ext'] == format_spec
- else:
- filter_f = lambda f: f['format_id'] == format_spec
- matches = list(filter(filter_f, formats))
+ format_idx = -1
+ filter_f = ((lambda f: f.get('ext') == format_spec)
+ if format_spec in ['mp4', 'flv', 'webm', '3gp', 'm4a', 'mp3', 'ogg', 'aac', 'wav'] # extension
+ else (lambda f: f.get('format_id') == format_spec)) # id
+
+ def selector_function(ctx):
+ formats = list(ctx['formats'])
+ if not formats:
+ return
+ matches = list(filter(filter_f, formats)) if filter_f is not None else formats
if matches:
- yield matches[-1]
- elif selector.type == MERGE:
+ yield matches[format_idx]
+ elif format_fallback == 'force' or (format_fallback and ctx['incomplete_formats']):
+ # for extractors with incomplete formats (audio only (soundcloud)
+ # or video only (imgur)) best/worst will fallback to
+ # best/worst {video,audio}-only format
+ yield formats[format_idx]
+
+ elif selector.type == MERGE: # +
def _merge(formats_pair):
format_1, format_2 = formats_pair
formats_info.extend(format_1.get('requested_formats', (format_1,)))
formats_info.extend(format_2.get('requested_formats', (format_2,)))
+ if not allow_multiple_streams['video'] or not allow_multiple_streams['audio']:
+ get_no_more = {"video": False, "audio": False}
+ for (i, fmt_info) in enumerate(formats_info):
+ for aud_vid in ["audio", "video"]:
+ if not allow_multiple_streams[aud_vid] and fmt_info.get(aud_vid[0] + 'codec') != 'none':
+ if get_no_more[aud_vid]:
+ formats_info.pop(i)
+ get_no_more[aud_vid] = True
+
+ if len(formats_info) == 1:
+ return formats_info[0]
+
video_fmts = [fmt_info for fmt_info in formats_info if fmt_info.get('vcodec') != 'none']
audio_fmts = [fmt_info for fmt_info in formats_info if fmt_info.get('acodec') != 'none']
expected=True)
if download:
+ self.to_screen('[info] Downloading format(s) %s' % ", ".join([f['format_id'] for f in formats_to_download]))
if len(formats_to_download) > 1:
self.to_screen('[info] %s: downloading video in %s formats' % (info_dict['id'], len(formats_to_download)))
for format in formats_to_download:
self.report_error('Cannot write annotations file: ' + annofn)
return
- def dl(name, info):
+ def dl(name, info, subtitle=False):
fd = get_suitable_downloader(info, self.params)(self, self.params)
for ph in self._progress_hooks:
fd.add_progress_hook(ph)
if self.params.get('verbose'):
self.to_stdout('[debug] Invoking downloader on %r' % info.get('url'))
- return fd.download(name, info)
+ return fd.download(name, info, subtitle)
subtitles_are_requested = any([self.params.get('writesubtitles', False),
self.params.get('writeautomaticsub')])
# subtitles download errors are already managed as troubles in relevant IE
# that way it will silently go on when used with unsupporting IE
subtitles = info_dict['requested_subtitles']
- ie = self.get_info_extractor(info_dict['extractor_key'])
+ # ie = self.get_info_extractor(info_dict['extractor_key'])
for sub_lang, sub_info in subtitles.items():
sub_format = sub_info['ext']
sub_filename = subtitles_filename(filename, sub_lang, sub_format, info_dict.get('ext'))
return
else:
try:
+ dl(sub_filename, sub_info, subtitle=True)
+ '''
if self.params.get('sleep_interval_subtitles', False):
dl(sub_filename, sub_info)
else:
sub_info['url'], info_dict['id'], note=False).read()
with io.open(encodeFilename(sub_filename), 'wb') as subfile:
subfile.write(sub_data)
+ '''
except (ExtractorError, IOError, OSError, ValueError, compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
self.report_warning('Unable to download subtitle for "%s": %s' %
(sub_lang, error_to_compat_str(err)))
self._write_thumbnails(info_dict, filename)
+ # Write internet shortcut files
+ url_link = webloc_link = desktop_link = False
+ if self.params.get('writelink', False):
+ if sys.platform == "darwin": # macOS.
+ webloc_link = True
+ elif sys.platform.startswith("linux"):
+ desktop_link = True
+ else: # if sys.platform in ['win32', 'cygwin']:
+ url_link = True
+ if self.params.get('writeurllink', False):
+ url_link = True
+ if self.params.get('writewebloclink', False):
+ webloc_link = True
+ if self.params.get('writedesktoplink', False):
+ desktop_link = True
+
+ if url_link or webloc_link or desktop_link:
+ if 'webpage_url' not in info_dict:
+ self.report_error('Cannot write internet shortcut file because the "webpage_url" field is missing in the media information')
+ return
+ ascii_url = iri_to_uri(info_dict['webpage_url'])
+
+ def _write_link_file(extension, template, newline, embed_filename):
+ linkfn = replace_extension(filename, extension, info_dict.get('ext'))
+ if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(linkfn)):
+ self.to_screen('[info] Internet shortcut is already present')
+ else:
+ try:
+ self.to_screen('[info] Writing internet shortcut to: ' + linkfn)
+ with io.open(encodeFilename(to_high_limit_path(linkfn)), 'w', encoding='utf-8', newline=newline) as linkfile:
+ template_vars = {'url': ascii_url}
+ if embed_filename:
+ template_vars['filename'] = linkfn[:-(len(extension) + 1)]
+ linkfile.write(template % template_vars)
+ except (OSError, IOError):
+ self.report_error('Cannot write internet shortcut ' + linkfn)
+ return False
+ return True
+
+ if url_link:
+ if not _write_link_file('url', DOT_URL_LINK_TEMPLATE, '\r\n', embed_filename=False):
+ return
+ if webloc_link:
+ if not _write_link_file('webloc', DOT_WEBLOC_LINK_TEMPLATE, '\n', embed_filename=False):
+ return
+ if desktop_link:
+ if not _write_link_file('desktop', DOT_DESKTOP_LINK_TEMPLATE, '\n', embed_filename=True):
+ return
+
+ # Download
+ must_record_download_archive = False
if not self.params.get('skip_download', False):
try:
if info_dict.get('requested_formats') is not None:
[f['format_id'], f['ext'], self.format_resolution(f), self._format_note(f)]
for f in formats
if f.get('preference') is None or f['preference'] >= -1000]
- if len(formats) > 1:
- table[-1][-1] += (' ' if table[-1][-1] else '') + '(best)'
+ # if len(formats) > 1:
+ # table[-1][-1] += (' ' if table[-1][-1] else '') + '(best*)'
header_line = ['format code', 'extension', 'resolution', 'note']
self.to_screen(