format_field,
formatSeconds,
GeoRestrictedError,
+ get_domain,
HEADRequest,
int_or_none,
iri_to_uri,
PostProcessingError,
preferredencoding,
prepend_extension,
+ ReExtractInfo,
register_socks_protocols,
RejectedVideoReached,
+ remove_terminal_sequences,
render_table,
replace_extension,
SameFileError,
strftime_or_none,
subtitles_filename,
supports_terminal_sequences,
- ThrottledDownload,
+ timetuple_from_msec,
to_high_limit_path,
traverse_obj,
try_get,
_PLUGIN_CLASSES as plugin_postprocessors
)
from .update import detect_variant
-from .version import __version__
+from .version import __version__, RELEASE_GIT_HEAD
if compat_os_name == 'nt':
import ctypes
file that is in the archive.
break_on_reject: Stop the download process when encountering a video that
has been filtered out.
+ break_per_url: Whether break_on_reject and break_on_existing
+ should act on each input URL as opposed to for the entire queue
cookiefile: File name where cookies should be read from and dumped to
cookiesfrombrowser: A tuple containing the name of the browser and the profile
name/path from where cookies are loaded.
bidi_workaround: Work around buggy terminals without bidirectional text
support, using fridibi
debug_printtraffic:Print out sent and received HTTP traffic
- include_ads: Download ads as well
+ include_ads: Download ads as well (deprecated)
default_search: Prepend this string if an input url is not valid.
'auto' for elaborate guessing
encoding: Use this encoding instead of the system-specified.
extract_flat: Do not resolve URLs, return the immediate result.
Pass in 'in_playlist' to only show this behavior for
playlist items.
+ wait_for_video: If given, wait for scheduled streams to become available.
+ The value should be a tuple containing the range
+ (min_secs, max_secs) to wait between retries
postprocessors: A list of dictionaries, each with an entry
* key: The name of the postprocessor. See
yt_dlp/postprocessor/__init__.py for a list.
for msg in self.params.get('_warnings', []):
self.report_warning(msg)
+ for msg in self.params.get('_deprecation_warnings', []):
+ self.deprecation_warning(msg)
if 'list-formats' in self.params.get('compat_opts', []):
self.params['listformats_table'] = False
self.print_debug_header()
self.add_default_info_extractors()
- for pp_def_raw in self.params.get('postprocessors', []):
- pp_def = dict(pp_def_raw)
- when = pp_def.pop('when', 'post_process')
- pp_class = get_postprocessor(pp_def.pop('key'))
- pp = pp_class(self, **compat_kwargs(pp_def))
- self.add_post_processor(pp, when=when)
-
hooks = {
'post_hooks': self.add_post_hook,
'progress_hooks': self.add_progress_hook,
for ph in self.params.get(opt, []):
fn(ph)
+ for pp_def_raw in self.params.get('postprocessors', []):
+ pp_def = dict(pp_def_raw)
+ when = pp_def.pop('when', 'post_process')
+ self.add_post_processor(
+ get_postprocessor(pp_def.pop('key'))(self, **compat_kwargs(pp_def)),
+ when=when)
+
register_socks_protocols()
def preload_download_archive(fn):
def add_postprocessor_hook(self, ph):
"""Add the postprocessing progress hook"""
self._postprocessor_hooks.append(ph)
+ for pps in self._pps.values():
+ for pp in pps:
+ pp.add_progress_hook(ph)
def _bidi_workaround(self, message):
if not hasattr(self, '_output_channel'):
def to_console_title(self, message):
if not self.params.get('consoletitle', False):
return
+ message = remove_terminal_sequences(message)
if compat_os_name == 'nt':
if ctypes.windll.kernel32.GetConsoleWindow():
# c_wchar_p() might not be necessary if `message` is
class Styles(Enum):
HEADERS = 'yellow'
- EMPHASIS = 'blue'
+ EMPHASIS = 'light blue'
ID = 'green'
DELIM = 'blue'
ERROR = 'red'
WARNING = 'yellow'
SUPPRESS = 'light black'
- def __format_text(self, out, text, f, fallback=None, *, test_encoding=False):
- assert out in ('screen', 'err')
+ def _format_text(self, handle, allow_colors, text, f, fallback=None, *, test_encoding=False):
if test_encoding:
original_text = text
- handle = self._screen_file if out == 'screen' else self._err_file
encoding = self.params.get('encoding') or getattr(handle, 'encoding', 'ascii')
text = text.encode(encoding, 'ignore').decode(encoding)
if fallback is not None and text != original_text:
text = fallback
if isinstance(f, self.Styles):
- f = f._value_
- return format_text(text, f) if self._allow_colors[out] else text if fallback is None else fallback
+ f = f.value
+ return format_text(text, f) if allow_colors else text if fallback is None else fallback
def _format_screen(self, *args, **kwargs):
- return self.__format_text('screen', *args, **kwargs)
+ return self._format_text(
+ self._screen_file, self._allow_colors['screen'], *args, **kwargs)
def _format_err(self, *args, **kwargs):
- return self.__format_text('err', *args, **kwargs)
+ return self._format_text(
+ self._err_file, self._allow_colors['err'], *args, **kwargs)
def report_warning(self, message, only_once=False):
'''
return
self.to_stderr(f'{self._format_err("WARNING:", self.Styles.WARNING)} {message}', only_once)
+ def deprecation_warning(self, message):
+ if self.params.get('logger') is not None:
+ self.params['logger'].warning('DeprecationWarning: {message}')
+ else:
+ self.to_stderr(f'{self._format_err("DeprecationWarning:", self.Styles.ERROR)} {message}', True)
+
def report_error(self, message, tb=None):
'''
Do the same as trouble, but prefixes the message with 'ERROR:', colored
(?P<fields>{field})
(?P<maths>(?:{math_op}{math_field})*)
(?:>(?P<strf_format>.+?))?
- (?P<alternate>(?<!\\),[^|)]+)?
+ (?P<alternate>(?<!\\),[^|&)]+)?
+ (?:&(?P<replacement>.*?))?
(?:\|(?P<default>.*?))?
$'''.format(field=FIELD_RE, math_op=MATH_OPERATORS_RE, math_field=MATH_FIELD_RE))
key = outer_mobj.group('key')
mobj = re.match(INTERNAL_FORMAT_RE, key)
initial_field = mobj.group('fields').split('.')[-1] if mobj else ''
- value, default = None, na
+ value, replacement, default = None, None, na
while mobj:
mobj = mobj.groupdict()
default = mobj['default'] if mobj['default'] is not None else default
value = get_value(mobj)
+ replacement = mobj['replacement']
if value is None and mobj['alternate']:
mobj = re.match(INTERNAL_FORMAT_RE, mobj['alternate'][1:])
else:
if fmt == 's' and value is not None and key in field_size_compat_map.keys():
fmt = '0{:d}d'.format(field_size_compat_map[key])
- value = default if value is None else value
+ value = default if value is None else value if replacement is None else replacement
flags = outer_mobj.group('conversion') or ''
str_fmt = f'{fmt[:-1]}s'
# https://github.com/blackjack4494/youtube-dlc/issues/85
trim_file_name = self.params.get('trim_file_name', False)
if trim_file_name:
- fn_groups = filename.rsplit('.')
- ext = fn_groups[-1]
- sub_ext = ''
- if len(fn_groups) > 2:
- sub_ext = fn_groups[-2]
- filename = join_nonempty(fn_groups[0][:trim_file_name], sub_ext, ext, delim='.')
+ no_ext, *ext = filename.rsplit('.', 2)
+ filename = join_nonempty(no_ext[:trim_file_name], *ext, delim='.')
return filename
except ValueError as err:
temp_id = ie.get_temp_id(url)
if temp_id is not None and self.in_download_archive({'id': temp_id, 'ie_key': ie_key}):
- self.to_screen("[%s] %s: has already been recorded in archive" % (
- ie_key, temp_id))
+ self.to_screen(f'[{ie_key}] {temp_id}: has already been recorded in the archive')
+ if self.params.get('break_on_existing', False):
+ raise ExistingVideoReached()
break
return self.__extract_info(url, self.get_info_extractor(ie_key), download, extra_info, process)
else:
self.report_error(msg)
except ExtractorError as e: # An error we somewhat expected
self.report_error(compat_str(e), e.format_traceback())
- except ThrottledDownload as e:
- self.to_stderr('\r')
- self.report_warning(f'{e}; Re-extracting data')
+ except ReExtractInfo as e:
+ if e.expected:
+ self.to_screen(f'{e}; Re-extracting data')
+ else:
+ self.to_stderr('\r')
+ self.report_warning(f'{e}; Re-extracting data')
return wrapper(self, *args, **kwargs)
except (DownloadCancelled, LazyList.IndexError, PagedList.IndexError):
raise
raise
return wrapper
+ def _wait_for_video(self, ie_result):
+ if (not self.params.get('wait_for_video')
+ or ie_result.get('_type', 'video') != 'video'
+ or ie_result.get('formats') or ie_result.get('url')):
+ return
+
+ format_dur = lambda dur: '%02d:%02d:%02d' % timetuple_from_msec(dur * 1000)[:-1]
+ last_msg = ''
+
+ def progress(msg):
+ nonlocal last_msg
+ self.to_screen(msg + ' ' * (len(last_msg) - len(msg)) + '\r', skip_eol=True)
+ last_msg = msg
+
+ min_wait, max_wait = self.params.get('wait_for_video')
+ diff = try_get(ie_result, lambda x: x['release_timestamp'] - time.time())
+ if diff is None and ie_result.get('live_status') == 'is_upcoming':
+ diff = random.randrange(min_wait, max_wait) if (max_wait and min_wait) else (max_wait or min_wait)
+ self.report_warning('Release time of video is not known')
+ elif (diff or 0) <= 0:
+ self.report_warning('Video should already be available according to extracted info')
+ diff = min(max(diff or 0, min_wait or 0), max_wait or float('inf'))
+ self.to_screen(f'[wait] Waiting for {format_dur(diff)} - Press Ctrl+C to try now')
+
+ wait_till = time.time() + diff
+ try:
+ while True:
+ diff = wait_till - time.time()
+ if diff <= 0:
+ progress('')
+ raise ReExtractInfo('[wait] Wait period ended', expected=True)
+ progress(f'[wait] Remaining time until next attempt: {self._format_screen(format_dur(diff), self.Styles.EMPHASIS)}')
+ time.sleep(1)
+ except KeyboardInterrupt:
+ progress('')
+ raise ReExtractInfo('[wait] Interrupted by user', expected=True)
+ except BaseException as e:
+ if not isinstance(e, ReExtractInfo):
+ self.to_screen('')
+ raise
+
@__handle_extraction_exceptions
def __extract_info(self, url, ie, download, extra_info, process):
ie_result = ie.extract(url)
ie_result.setdefault('original_url', extra_info['original_url'])
self.add_default_extra_info(ie_result, ie, url)
if process:
+ self._wait_for_video(ie_result)
return self.process_ie_result(ie_result, download, extra_info)
else:
return ie_result
'webpage_url': url,
'original_url': url,
'webpage_url_basename': url_basename(url),
+ 'webpage_url_domain': get_domain(url),
})
if ie is not None:
self.add_extra_info(ie_result, {
info_copy['id'] = ie.get_temp_id(ie_result['url'])
self.add_default_extra_info(info_copy, ie, ie_result['url'])
self.add_extra_info(info_copy, extra_info)
+ info_copy, _ = self.pre_process(info_copy)
self.__forced_printings(info_copy, self.prepare_filename(info_copy), incomplete=True)
if self.params.get('force_write_download_archive', False):
self.record_download_archive(info_copy)
'extractor': ie_result['extractor'],
'webpage_url': ie_result['webpage_url'],
'webpage_url_basename': url_basename(ie_result['webpage_url']),
+ 'webpage_url_domain': get_domain(ie_result['webpage_url']),
'extractor_key': ie_result['extractor_key'],
})
return r
'extractor': ie_result['extractor'],
'webpage_url': ie_result['webpage_url'],
'webpage_url_basename': url_basename(ie_result['webpage_url']),
+ 'webpage_url_domain': get_domain(ie_result['webpage_url']),
'extractor_key': ie_result['extractor_key'],
}
def _check_formats(self, formats):
for f in formats:
self.to_screen('[info] Testing format %s' % f['format_id'])
- temp_file = tempfile.NamedTemporaryFile(
- suffix='.tmp', delete=False,
- dir=self.get_output_path('temp') or None)
+ path = self.get_output_path('temp')
+ if not self._ensure_dir_exists(f'{path}/'):
+ continue
+ temp_file = tempfile.NamedTemporaryFile(suffix='.tmp', delete=False, dir=path or None)
temp_file.close()
try:
success, _ = self.dl(temp_file.name, f, test=True)
if self._num_downloads >= int(max_downloads):
raise MaxDownloadsReached()
+ if info_dict.get('is_live'):
+ info_dict['title'] += ' ' + datetime.datetime.now().strftime('%Y-%m-%d %H:%M')
+
# TODO: backward compatibility, to be removed
info_dict['fulltitle'] = info_dict['title']
downloader = get_suitable_downloader(info_dict, self.params) if 'protocol' in info_dict else None
downloader = downloader.__name__ if downloader else None
ffmpeg_fixup(info_dict.get('requested_formats') is None and downloader == 'HlsFD',
- 'malformed AAC bitstream detected', FFmpegFixupM3u8PP)
- ffmpeg_fixup(downloader == 'WebSocketFragmentFD', 'malformed timestamps detected', FFmpegFixupTimestampPP)
- ffmpeg_fixup(downloader == 'WebSocketFragmentFD', 'malformed duration detected', FFmpegFixupDurationPP)
+ 'Possible MPEG-TS in MP4 container or malformed AAC timestamps',
+ FFmpegFixupM3u8PP)
+ ffmpeg_fixup(downloader == 'WebSocketFragmentFD', 'Malformed timestamps detected', FFmpegFixupTimestampPP)
+ ffmpeg_fixup(downloader == 'WebSocketFragmentFD', 'Malformed duration detected', FFmpegFixupDurationPP)
fixup()
try:
res = func(*args, **kwargs)
except UnavailableVideoError as e:
self.report_error(e)
- except DownloadCancelled as e:
+ except MaxDownloadsReached as e:
self.to_screen(f'[info] {e}')
raise
+ except DownloadCancelled as e:
+ self.to_screen(f'[info] {e}')
+ if not self.params.get('break_per_url'):
+ raise
else:
if self.params.get('dump_single_json', False):
self.post_extract(res)
info = self.sanitize_info(json.loads('\n'.join(f)), self.params.get('clean_infojson', True))
try:
self.__download_wrapper(self.process_ie_result)(info, download=True)
- except (DownloadError, EntryNotInPlaylist, ThrottledDownload) as e:
+ except (DownloadError, EntryNotInPlaylist, ReExtractInfo) as e:
if not isinstance(e, EntryNotInPlaylist):
self.to_stderr('\r')
webpage_url = info.get('webpage_url')
def _format_note(self, fdict):
res = ''
if fdict.get('ext') in ['f4f', 'f4m']:
- res += '(unsupported) '
+ res += '(unsupported)'
if fdict.get('language'):
if res:
res += ' '
- res += '[%s] ' % fdict['language']
+ res += '[%s]' % fdict['language']
if fdict.get('format_note') is not None:
- res += fdict['format_note'] + ' '
+ if res:
+ res += ' '
+ res += fdict['format_note']
if fdict.get('tbr') is not None:
- res += '%4dk ' % fdict['tbr']
+ if res:
+ res += ', '
+ res += '%4dk' % fdict['tbr']
if fdict.get('container') is not None:
if res:
res += ', '
def get_encoding(stream):
ret = getattr(stream, 'encoding', 'missing (%s)' % type(stream).__name__)
if not supports_terminal_sequences(stream):
- ret += ' (No ANSI)'
+ from .compat import WINDOWS_VT_MODE
+ ret += ' (No VT)' if WINDOWS_VT_MODE is False else ' (No ANSI)'
return ret
encoding_str = 'Encodings: locale %s, fs %s, out %s, err %s, pref %s' % (
write_debug = lambda msg: self._write_string(f'[debug] {msg}\n')
source = detect_variant()
- write_debug('yt-dlp version %s%s' % (__version__, '' if source == 'unknown' else f' ({source})'))
+ write_debug(join_nonempty(
+ 'yt-dlp version', __version__,
+ f'[{RELEASE_GIT_HEAD}]' if RELEASE_GIT_HEAD else '',
+ '' if source == 'unknown' else f'({source})',
+ delim=' '))
if not _LAZY_LOADER:
if os.environ.get('YTDLP_NO_LAZY_EXTRACTORS'):
write_debug('Lazy loading extractors is forcibly disabled')
for name, klass in itertools.chain(plugin_extractors.items(), plugin_postprocessors.items())])
if self.params.get('compat_opts'):
write_debug('Compatibility options: %s' % ', '.join(self.params.get('compat_opts')))
- try:
- sp = Popen(
- ['git', 'rev-parse', '--short', 'HEAD'],
- stdout=subprocess.PIPE, stderr=subprocess.PIPE,
- cwd=os.path.dirname(os.path.abspath(__file__)))
- out, err = sp.communicate_or_kill()
- out = out.decode().strip()
- if re.match('[0-9a-f]+', out):
- write_debug('Git HEAD: %s' % out)
- except Exception:
+
+ if source == 'source':
try:
- sys.exc_clear()
+ sp = Popen(
+ ['git', 'rev-parse', '--short', 'HEAD'],
+ stdout=subprocess.PIPE, stderr=subprocess.PIPE,
+ cwd=os.path.dirname(os.path.abspath(__file__)))
+ out, err = sp.communicate_or_kill()
+ out = out.decode().strip()
+ if re.match('[0-9a-f]+', out):
+ write_debug('Git HEAD: %s' % out)
except Exception:
- pass
+ try:
+ sys.exc_clear()
+ except Exception:
+ pass
def python_implementation():
impl_name = platform.python_implementation()