PostProcessingError,
preferredencoding,
prepend_extension,
+ ReExtractInfo,
register_socks_protocols,
RejectedVideoReached,
render_table,
strftime_or_none,
subtitles_filename,
supports_terminal_sequences,
- ThrottledDownload,
+ timetuple_from_msec,
to_high_limit_path,
traverse_obj,
try_get,
_PLUGIN_CLASSES as plugin_postprocessors
)
from .update import detect_variant
-from .version import __version__
+from .version import __version__, RELEASE_GIT_HEAD
if compat_os_name == 'nt':
import ctypes
file that is in the archive.
break_on_reject: Stop the download process when encountering a video that
has been filtered out.
+ break_per_url: Whether break_on_reject and break_on_existing
+ should act on each input URL as opposed to for the entire queue
cookiefile: File name where cookies should be read from and dumped to
cookiesfrombrowser: A tuple containing the name of the browser and the profile
name/path from where cookies are loaded.
extract_flat: Do not resolve URLs, return the immediate result.
Pass in 'in_playlist' to only show this behavior for
playlist items.
+ wait_for_video: If given, wait for scheduled streams to become available.
+ The value should be a tuple containing the range
+ (min_secs, max_secs) to wait between retries
postprocessors: A list of dictionaries, each with an entry
* key: The name of the postprocessor. See
yt_dlp/postprocessor/__init__.py for a list.
compat_opts: Compatibility options. See "Differences in default behavior".
The following options do not work when used through the API:
filename, abort-on-error, multistreams, no-live-chat, format-sort
- no-clean-infojson, no-playlist-metafiles, no-keep-subs.
+ no-clean-infojson, no-playlist-metafiles, no-keep-subs, no-attach-info-json.
Refer __init__.py for their implementation
progress_template: Dictionary of templates for progress outputs.
Allowed keys are 'download', 'postprocess',
self.cache = Cache(self)
windows_enable_vt_mode()
- # FIXME: This will break if we ever print color to stdout
self._allow_colors = {
'screen': not self.params.get('no_color') and supports_terminal_sequences(self._screen_file),
'err': not self.params.get('no_color') and supports_terminal_sequences(self._err_file),
for msg in self.params.get('_warnings', []):
self.report_warning(msg)
+ for msg in self.params.get('_deprecation_warnings', []):
+ self.deprecation_warning(msg)
if 'list-formats' in self.params.get('compat_opts', []):
self.params['listformats_table'] = False
pp = pp_class(self, **compat_kwargs(pp_def))
self.add_post_processor(pp, when=when)
- for ph in self.params.get('post_hooks', []):
- self.add_post_hook(ph)
-
- for ph in self.params.get('progress_hooks', []):
- self.add_progress_hook(ph)
+ hooks = {
+ 'post_hooks': self.add_post_hook,
+ 'progress_hooks': self.add_progress_hook,
+ 'postprocessor_hooks': self.add_postprocessor_hook,
+ }
+ for opt, fn in hooks.items():
+ for ph in self.params.get(opt, []):
+ fn(ph)
register_socks_protocols()
class Styles(Enum):
HEADERS = 'yellow'
- EMPHASIS = 'blue'
+ EMPHASIS = 'light blue'
ID = 'green'
DELIM = 'blue'
ERROR = 'red'
WARNING = 'yellow'
+ SUPPRESS = 'light black'
- def __format_text(self, out, text, f, fallback=None, *, test_encoding=False):
- assert out in ('screen', 'err')
+ def _format_text(self, handle, allow_colors, text, f, fallback=None, *, test_encoding=False):
if test_encoding:
original_text = text
- handle = self._screen_file if out == 'screen' else self._err_file
encoding = self.params.get('encoding') or getattr(handle, 'encoding', 'ascii')
text = text.encode(encoding, 'ignore').decode(encoding)
if fallback is not None and text != original_text:
text = fallback
if isinstance(f, self.Styles):
- f = f._value_
- return format_text(text, f) if self._allow_colors[out] else text if fallback is None else fallback
+ f = f.value
+ return format_text(text, f) if allow_colors else text if fallback is None else fallback
def _format_screen(self, *args, **kwargs):
- return self.__format_text('screen', *args, **kwargs)
+ return self._format_text(
+ self._screen_file, self._allow_colors['screen'], *args, **kwargs)
def _format_err(self, *args, **kwargs):
- return self.__format_text('err', *args, **kwargs)
+ return self._format_text(
+ self._err_file, self._allow_colors['err'], *args, **kwargs)
def report_warning(self, message, only_once=False):
'''
return
self.to_stderr(f'{self._format_err("WARNING:", self.Styles.WARNING)} {message}', only_once)
+ def deprecation_warning(self, message):
+ if self.params.get('logger') is not None:
+ self.params['logger'].warning('DeprecationWarning: {message}')
+ else:
+ self.to_stderr(f'{self._format_err("DeprecationWarning:", self.Styles.ERROR)} {message}', True)
+
def report_error(self, message, tb=None):
'''
Do the same as trouble, but prefixes the message with 'ERROR:', colored
# https://github.com/blackjack4494/youtube-dlc/issues/85
trim_file_name = self.params.get('trim_file_name', False)
if trim_file_name:
- fn_groups = filename.rsplit('.')
- ext = fn_groups[-1]
- sub_ext = ''
- if len(fn_groups) > 2:
- sub_ext = fn_groups[-2]
- filename = join_nonempty(fn_groups[0][:trim_file_name], sub_ext, ext, delim='.')
+ no_ext, *ext = filename.rsplit('.', 2)
+ filename = join_nonempty(no_ext[:trim_file_name], *ext, delim='.')
return filename
except ValueError as err:
temp_id = ie.get_temp_id(url)
if temp_id is not None and self.in_download_archive({'id': temp_id, 'ie_key': ie_key}):
- self.to_screen("[%s] %s: has already been recorded in archive" % (
- ie_key, temp_id))
+ self.to_screen(f'[{ie_key}] {temp_id}: has already been recorded in the archive')
+ if self.params.get('break_on_existing', False):
+ raise ExistingVideoReached()
break
return self.__extract_info(url, self.get_info_extractor(ie_key), download, extra_info, process)
else:
self.report_error(msg)
except ExtractorError as e: # An error we somewhat expected
self.report_error(compat_str(e), e.format_traceback())
- except ThrottledDownload as e:
- self.to_stderr('\r')
- self.report_warning(f'{e}; Re-extracting data')
+ except ReExtractInfo as e:
+ if e.expected:
+ self.to_screen(f'{e}; Re-extracting data')
+ else:
+ self.to_stderr('\r')
+ self.report_warning(f'{e}; Re-extracting data')
return wrapper(self, *args, **kwargs)
- except (DownloadCancelled, LazyList.IndexError):
+ except (DownloadCancelled, LazyList.IndexError, PagedList.IndexError):
raise
except Exception as e:
if self.params.get('ignoreerrors'):
raise
return wrapper
+ def _wait_for_video(self, ie_result):
+ if (not self.params.get('wait_for_video')
+ or ie_result.get('_type', 'video') != 'video'
+ or ie_result.get('formats') or ie_result.get('url')):
+ return
+
+ format_dur = lambda dur: '%02d:%02d:%02d' % timetuple_from_msec(dur * 1000)[:-1]
+ last_msg = ''
+
+ def progress(msg):
+ nonlocal last_msg
+ self.to_screen(msg + ' ' * (len(last_msg) - len(msg)) + '\r', skip_eol=True)
+ last_msg = msg
+
+ min_wait, max_wait = self.params.get('wait_for_video')
+ diff = try_get(ie_result, lambda x: x['release_timestamp'] - time.time())
+ if diff is None and ie_result.get('live_status') == 'is_upcoming':
+ diff = random.randrange(min_wait or 0, max_wait) if max_wait else min_wait
+ self.report_warning('Release time of video is not known')
+ elif (diff or 0) <= 0:
+ self.report_warning('Video should already be available according to extracted info')
+ diff = min(max(diff, min_wait or 0), max_wait or float('inf'))
+ self.to_screen(f'[wait] Waiting for {format_dur(diff)} - Press Ctrl+C to try now')
+
+ wait_till = time.time() + diff
+ try:
+ while True:
+ diff = wait_till - time.time()
+ if diff <= 0:
+ progress('')
+ raise ReExtractInfo('[wait] Wait period ended', expected=True)
+ progress(f'[wait] Remaining time until next attempt: {self._format_screen(format_dur(diff), self.Styles.EMPHASIS)}')
+ time.sleep(1)
+ except KeyboardInterrupt:
+ progress('')
+ raise ReExtractInfo('[wait] Interrupted by user', expected=True)
+ except BaseException as e:
+ if not isinstance(e, ReExtractInfo):
+ self.to_screen('')
+ raise
+
@__handle_extraction_exceptions
def __extract_info(self, url, ie, download, extra_info, process):
ie_result = ie.extract(url)
ie_result.setdefault('original_url', extra_info['original_url'])
self.add_default_extra_info(ie_result, ie, url)
if process:
+ self._wait_for_video(ie_result)
return self.process_ie_result(ie_result, download, extra_info)
else:
return ie_result
if 'entries' not in ie_result:
raise EntryNotInPlaylist('There are no entries')
+
+ MissingEntry = object()
incomplete_entries = bool(ie_result.get('requested_entries'))
if incomplete_entries:
- def fill_missing_entries(entries, indexes):
- ret = [None] * max(*indexes)
- for i, entry in zip(indexes, entries):
+ def fill_missing_entries(entries, indices):
+ ret = [MissingEntry] * max(indices)
+ for i, entry in zip(indices, entries):
ret[i - 1] = entry
return ret
ie_result['entries'] = fill_missing_entries(ie_result['entries'], ie_result['requested_entries'])
entry = None
try:
entry = get_entry(i)
- if entry is None:
+ if entry is MissingEntry:
raise EntryNotInPlaylist()
except (IndexError, EntryNotInPlaylist):
if incomplete_entries:
if entry is not None]
n_entries = len(entries)
- if not playlistitems and (playliststart or playlistend):
+ if not playlistitems and (playliststart != 1 or playlistend):
playlistitems = list(range(playliststart, playliststart + n_entries))
ie_result['requested_entries'] = playlistitems
+ _infojson_written = False
if not self.params.get('simulate') and self.params.get('allow_playlist_files', True):
ie_copy = {
'playlist': playlist,
}
ie_copy.update(dict(ie_result))
- if self._write_info_json('playlist', ie_result,
- self.prepare_filename(ie_copy, 'pl_infojson')) is None:
+ _infojson_written = self._write_info_json(
+ 'playlist', ie_result, self.prepare_filename(ie_copy, 'pl_infojson'))
+ if _infojson_written is None:
return
if self._write_description('playlist', ie_result,
self.prepare_filename(ie_copy, 'pl_description')) is None:
self.report_error(
'Skipping the remaining entries in playlist "%s" since %d items failed extraction' % (playlist, failures))
break
- # TODO: skip failed (empty) entries?
playlist_results.append(entry_result)
ie_result['entries'] = playlist_results
+
+ # Write the updated info to json
+ if _infojson_written and self._write_info_json(
+ 'updated playlist', ie_result,
+ self.prepare_filename(ie_copy, 'pl_infojson'), overwrite=True) is None:
+ return
self.to_screen('[download] Finished downloading playlist: %s' % playlist)
return ie_result
def _check_formats(self, formats):
for f in formats:
self.to_screen('[info] Testing format %s' % f['format_id'])
- temp_file = tempfile.NamedTemporaryFile(
- suffix='.tmp', delete=False,
- dir=self.get_output_path('temp') or None)
+ path = self.get_output_path('temp')
+ if not self._ensure_dir_exists(f'{path}/'):
+ continue
+ temp_file = tempfile.NamedTemporaryFile(suffix='.tmp', delete=False, dir=path or None)
temp_file.close()
try:
success, _ = self.dl(temp_file.name, f, test=True)
# TODO: Add allvideo, allaudio etc by generalizing the code with best/worst selector
if format_spec == 'all':
def selector_function(ctx):
- yield from _check_formats(ctx['formats'])
+ yield from _check_formats(ctx['formats'][::-1])
elif format_spec == 'mergeall':
def selector_function(ctx):
formats = list(_check_formats(ctx['formats']))
t['url'] = sanitize_url(t['url'])
if self.params.get('check_formats') is True:
- info_dict['thumbnails'] = LazyList(check_thumbnails(thumbnails[::-1])).reverse()
+ info_dict['thumbnails'] = LazyList(check_thumbnails(thumbnails[::-1]), reverse=True)
else:
info_dict['thumbnails'] = thumbnails
# TODO Central sorting goes here
if self.params.get('check_formats') is True:
- formats = LazyList(self._check_formats(formats[::-1])).reverse()
+ formats = LazyList(self._check_formats(formats[::-1]), reverse=True)
if not formats or formats[0] is not info_dict:
# only set the 'formats' fields if the original info_dict list them
infofn = self.prepare_filename(info_dict, 'infojson')
_infojson_written = self._write_info_json('video', info_dict, infofn)
if _infojson_written:
+ info_dict['infojson_filename'] = infofn
+ # For backward compatability, even though it was a private field
info_dict['__infojson_filename'] = infofn
elif _infojson_written is None:
return
self.report_error('Cannot write internet shortcut file because the "webpage_url" field is missing in the media information')
return False
linkfn = replace_extension(self.prepare_filename(info_dict, 'link'), link_type, info_dict.get('ext'))
+ if not self._ensure_dir_exists(encodeFilename(linkfn)):
+ return False
if self.params.get('overwrites', True) and os.path.exists(encodeFilename(linkfn)):
self.to_screen(f'[info] Internet shortcut (.{link_type}) is already present')
return True
downloader = get_suitable_downloader(info_dict, self.params) if 'protocol' in info_dict else None
downloader = downloader.__name__ if downloader else None
ffmpeg_fixup(info_dict.get('requested_formats') is None and downloader == 'HlsFD',
- 'malformed AAC bitstream detected', FFmpegFixupM3u8PP)
- ffmpeg_fixup(downloader == 'WebSocketFragmentFD', 'malformed timestamps detected', FFmpegFixupTimestampPP)
- ffmpeg_fixup(downloader == 'WebSocketFragmentFD', 'malformed duration detected', FFmpegFixupDurationPP)
+ 'Possible MPEG-TS in MP4 container or malformed AAC timestamps',
+ FFmpegFixupM3u8PP)
+ ffmpeg_fixup(downloader == 'WebSocketFragmentFD', 'Malformed timestamps detected', FFmpegFixupTimestampPP)
+ ffmpeg_fixup(downloader == 'WebSocketFragmentFD', 'Malformed duration detected', FFmpegFixupDurationPP)
fixup()
try:
res = func(*args, **kwargs)
except UnavailableVideoError as e:
self.report_error(e)
- except DownloadCancelled as e:
+ except MaxDownloadsReached as e:
self.to_screen(f'[info] {e}')
raise
+ except DownloadCancelled as e:
+ self.to_screen(f'[info] {e}')
+ if not self.params.get('break_per_url'):
+ raise
else:
if self.params.get('dump_single_json', False):
self.post_extract(res)
info = self.sanitize_info(json.loads('\n'.join(f)), self.params.get('clean_infojson', True))
try:
self.__download_wrapper(self.process_ie_result)(info, download=True)
- except (DownloadError, EntryNotInPlaylist, ThrottledDownload) as e:
- self.to_stderr('\r')
+ except (DownloadError, EntryNotInPlaylist, ReExtractInfo) as e:
+ if not isinstance(e, EntryNotInPlaylist):
+ self.to_stderr('\r')
webpage_url = info.get('webpage_url')
if webpage_url is not None:
self.report_warning(f'The info failed to download: {e}; trying with URL {webpage_url}')
keep_keys = ['_type'] # Always keep this to facilitate load-info-json
if remove_private_keys:
remove_keys |= {
- 'requested_formats', 'requested_subtitles', 'requested_entries',
- 'filepath', 'entries', 'original_url', 'playlist_autonumber',
+ 'requested_formats', 'requested_subtitles', 'requested_entries', 'entries',
+ 'filepath', 'infojson_filename', 'original_url', 'playlist_autonumber',
}
empty_values = (None, {}, [], set(), tuple())
reject = lambda k, v: k not in keep_keys and (
@staticmethod
def format_resolution(format, default='unknown'):
- is_images = format.get('vcodec') == 'none' and format.get('acodec') == 'none'
if format.get('vcodec') == 'none' and format.get('acodec') != 'none':
return 'audio only'
if format.get('resolution') is not None:
return format['resolution']
if format.get('width') and format.get('height'):
- res = '%dx%d' % (format['width'], format['height'])
+ return '%dx%d' % (format['width'], format['height'])
elif format.get('height'):
- res = '%sp' % format['height']
+ return '%sp' % format['height']
elif format.get('width'):
- res = '%dx?' % format['width']
- elif is_images:
- return 'images'
- else:
- return default
- return f'{res} images' if is_images else res
+ return '%dx?' % format['width']
+ return default
def _format_note(self, fdict):
res = ''
if fdict.get('ext') in ['f4f', 'f4m']:
- res += '(unsupported) '
+ res += '(unsupported)'
if fdict.get('language'):
if res:
res += ' '
- res += '[%s] ' % fdict['language']
+ res += '[%s]' % fdict['language']
if fdict.get('format_note') is not None:
- res += fdict['format_note'] + ' '
+ if res:
+ res += ' '
+ res += fdict['format_note']
if fdict.get('tbr') is not None:
- res += '%4dk ' % fdict['tbr']
+ if res:
+ res += ', '
+ res += '%4dk' % fdict['tbr']
if fdict.get('container') is not None:
if res:
res += ', '
formats = info_dict.get('formats', [info_dict])
new_format = self.params.get('listformats_table', True) is not False
if new_format:
- tbr_digits = number_of_digits(max(f.get('tbr') or 0 for f in formats))
- vbr_digits = number_of_digits(max(f.get('vbr') or 0 for f in formats))
- abr_digits = number_of_digits(max(f.get('abr') or 0 for f in formats))
delim = self._format_screen('\u2502', self.Styles.DELIM, '|', test_encoding=True)
table = [
[
self._format_screen(format_field(f, 'format_id'), self.Styles.ID),
format_field(f, 'ext'),
- self.format_resolution(f),
- format_field(f, 'fps', '%3d'),
+ format_field(f, func=self.format_resolution, ignore=('audio only', 'images')),
+ format_field(f, 'fps', '\t%d'),
format_field(f, 'dynamic_range', '%s', ignore=(None, 'SDR')).replace('HDR', ''),
delim,
- format_field(f, 'filesize', ' %s', func=format_bytes) + format_field(f, 'filesize_approx', '~%s', func=format_bytes),
- format_field(f, 'tbr', f'%{tbr_digits}dk'),
- shorten_protocol_name(f.get('protocol', '').replace("native", "n")),
+ format_field(f, 'filesize', ' \t%s', func=format_bytes) + format_field(f, 'filesize_approx', '~\t%s', func=format_bytes),
+ format_field(f, 'tbr', '\t%dk'),
+ shorten_protocol_name(f.get('protocol', '').replace('native', 'n')),
delim,
- format_field(f, 'vcodec', default='unknown').replace('none', ''),
- format_field(f, 'vbr', f'%{vbr_digits}dk'),
- format_field(f, 'acodec', default='unknown').replace('none', ''),
- format_field(f, 'abr', f'%{abr_digits}dk'),
- format_field(f, 'asr', '%5dHz'),
+ format_field(f, 'vcodec', default='unknown').replace(
+ 'none',
+ 'images' if f.get('acodec') == 'none'
+ else self._format_screen('audio only', self.Styles.SUPPRESS)),
+ format_field(f, 'vbr', '\t%dk'),
+ format_field(f, 'acodec', default='unknown').replace(
+ 'none',
+ '' if f.get('vcodec') == 'none'
+ else self._format_screen('video only', self.Styles.SUPPRESS)),
+ format_field(f, 'abr', '\t%dk'),
+ format_field(f, 'asr', '\t%dHz'),
join_nonempty(
self._format_screen('UNSUPPORTED', 'light red') if f.get('ext') in ('f4f', 'f4m') else None,
format_field(f, 'language', '[%s]'),
- format_field(f, 'format_note'),
- format_field(f, 'container', ignore=(None, f.get('ext'))),
- delim=', '),
+ join_nonempty(
+ format_field(f, 'format_note'),
+ format_field(f, 'container', ignore=(None, f.get('ext'))),
+ delim=', '),
+ delim=' '),
] for f in formats if f.get('preference') is None or f['preference'] >= -1000]
header_line = self._list_format_headers(
- 'ID', 'EXT', 'RESOLUTION', 'FPS', 'HDR', delim, ' FILESIZE', ' TBR', 'PROTO',
- delim, 'VCODEC', ' VBR', 'ACODEC', ' ABR', ' ASR', 'MORE INFO')
+ 'ID', 'EXT', 'RESOLUTION', '\tFPS', 'HDR', delim, '\tFILESIZE', '\tTBR', 'PROTO',
+ delim, 'VCODEC', '\tVBR', 'ACODEC', '\tABR', '\tASR', 'MORE INFO')
else:
table = [
[
'[info] Available formats for %s:' % info_dict['id'])
self.to_stdout(render_table(
header_line, table,
- extraGap=(0 if new_format else 1),
- hideEmpty=new_format,
+ extra_gap=(0 if new_format else 1),
+ hide_empty=new_format,
delim=new_format and self._format_screen('\u2500', self.Styles.DELIM, '-', test_encoding=True)))
def list_thumbnails(self, info_dict):
self.to_stdout(render_table(
self._list_format_headers('Language', 'Name', 'Formats'),
[_row(lang, formats) for lang, formats in subtitles.items()],
- hideEmpty=True))
+ hide_empty=True))
def urlopen(self, req):
""" Start an HTTP download """
write_debug = lambda msg: self._write_string(f'[debug] {msg}\n')
source = detect_variant()
- write_debug('yt-dlp version %s%s' % (__version__, '' if source == 'unknown' else f' ({source})'))
+ write_debug(join_nonempty(
+ 'yt-dlp version', __version__,
+ f'[{RELEASE_GIT_HEAD}]' if RELEASE_GIT_HEAD else '',
+ '' if source == 'unknown' else f'({source})',
+ delim=' '))
if not _LAZY_LOADER:
if os.environ.get('YTDLP_NO_LAZY_EXTRACTORS'):
write_debug('Lazy loading extractors is forcibly disabled')
for name, klass in itertools.chain(plugin_extractors.items(), plugin_postprocessors.items())])
if self.params.get('compat_opts'):
write_debug('Compatibility options: %s' % ', '.join(self.params.get('compat_opts')))
- try:
- sp = Popen(
- ['git', 'rev-parse', '--short', 'HEAD'],
- stdout=subprocess.PIPE, stderr=subprocess.PIPE,
- cwd=os.path.dirname(os.path.abspath(__file__)))
- out, err = sp.communicate_or_kill()
- out = out.decode().strip()
- if re.match('[0-9a-f]+', out):
- write_debug('Git HEAD: %s' % out)
- except Exception:
+
+ if source == 'source':
try:
- sys.exc_clear()
+ sp = Popen(
+ ['git', 'rev-parse', '--short', 'HEAD'],
+ stdout=subprocess.PIPE, stderr=subprocess.PIPE,
+ cwd=os.path.dirname(os.path.abspath(__file__)))
+ out, err = sp.communicate_or_kill()
+ out = out.decode().strip()
+ if re.match('[0-9a-f]+', out):
+ write_debug('Git HEAD: %s' % out)
except Exception:
- pass
+ try:
+ sys.exc_clear()
+ except Exception:
+ pass
def python_implementation():
impl_name = platform.python_implementation()
encoding = preferredencoding()
return encoding
- def _write_info_json(self, label, ie_result, infofn):
+ def _write_info_json(self, label, ie_result, infofn, overwrite=None):
''' Write infojson and returns True = written, False = skip, None = error '''
+ if overwrite is None:
+ overwrite = self.params.get('overwrites', True)
if not self.params.get('writeinfojson'):
return False
elif not infofn:
return False
elif not self._ensure_dir_exists(infofn):
return None
- elif not self.params.get('overwrites', True) and os.path.exists(infofn):
+ elif not overwrite and os.path.exists(infofn):
self.to_screen(f'[info] {label.title()} metadata is already present')
else:
self.to_screen(f'[info] Writing {label} metadata as JSON to: {infofn}')