1. **More Conversions**: In addition to the normal format types `diouxXeEfFgGcrs`, yt-dlp additionally supports converting to `B` = **B**ytes, `j` = **j**son (flag `#` for pretty-printing, `+` for Unicode), `h` = HTML escaping, `l` = a comma separated **l**ist (flag `#` for `\n` newline-separated), `q` = a string **q**uoted for the terminal (flag `#` to split a list into different arguments), `D` = add **D**ecimal suffixes (e.g. 10M) (flag `#` to use 1024 as factor), and `S` = **S**anitize as filename (flag `#` for restricted)
-1. **Unicode normalization**: The format type `U` can be used for NFC [unicode normalization](https://docs.python.org/3/library/unicodedata.html#unicodedata.normalize). The alternate form flag (`#`) changes the normalization to NFD and the conversion flag `+` can be used for NFKC/NFKD compatibility equivalence normalization. E.g. `%(title)+.100U` is NFKC
+1. **Unicode normalization**: The format type `U` can be used for NFC [Unicode normalization](https://docs.python.org/3/library/unicodedata.html#unicodedata.normalize). The alternate form flag (`#`) changes the normalization to NFD and the conversion flag `+` can be used for NFKC/NFKD compatibility equivalence normalization. E.g. `%(title)+.100U` is NFKC
To summarize, the general syntax for a field is:
```
# NB: Keep in sync with the docstring of extractor/common.py
'url', 'manifest_url', 'manifest_stream_number', 'ext', 'format', 'format_id', 'format_note',
'width', 'height', 'resolution', 'dynamic_range', 'tbr', 'abr', 'acodec', 'asr', 'audio_channels',
- 'vbr', 'fps', 'vcodec', 'container', 'filesize', 'filesize_approx',
+ 'vbr', 'fps', 'vcodec', 'container', 'filesize', 'filesize_approx', 'rows', 'columns',
'player_url', 'protocol', 'fragment_base_url', 'fragments', 'is_from_start',
'preference', 'language', 'language_preference', 'quality', 'source_preference',
'http_headers', 'stretched_ratio', 'no_resume', 'has_drm', 'downloader_options',
format_field(f, 'ext'),
self.format_resolution(f),
self._format_note(f)
- ] for f in formats if f.get('preference') is None or f['preference'] >= -1000]
+ ] for f in formats if (f.get('preference') or 0) >= -1000]
return render_table(['format code', 'extension', 'resolution', 'note'], table, extra_gap=1)
def simplified_codec(f, field):
def main(argv=None):
+ global _IN_CLI
+ _IN_CLI = True
try:
_exit(*variadic(_real_main(argv)))
except DownloadError:
import yt_dlp
if __name__ == '__main__':
- yt_dlp._IN_CLI = True
yt_dlp.main()
return tmpl
return default
- _formats_bytes = lambda k: f'{format_bytes(s.get(k)):>10s}'
+ _format_bytes = lambda k: f'{format_bytes(s.get(k)):>10s}'
if s['status'] == 'finished':
if self.params.get('noprogress'):
s.update({
'speed': speed,
'_speed_str': self.format_speed(speed).strip(),
- '_total_bytes_str': _formats_bytes('total_bytes'),
+ '_total_bytes_str': _format_bytes('total_bytes'),
'_elapsed_str': self.format_seconds(s.get('elapsed')),
'_percent_str': self.format_percent(100),
})
lambda: 100 * s['downloaded_bytes'] / s['total_bytes'],
lambda: 100 * s['downloaded_bytes'] / s['total_bytes_estimate'],
lambda: s['downloaded_bytes'] == 0 and 0)),
- '_total_bytes_str': _formats_bytes('total_bytes'),
- '_total_bytes_estimate_str': _formats_bytes('total_bytes_estimate'),
- '_downloaded_bytes_str': _formats_bytes('downloaded_bytes'),
+ '_total_bytes_str': _format_bytes('total_bytes'),
+ '_total_bytes_estimate_str': _format_bytes('total_bytes_estimate'),
+ '_downloaded_bytes_str': _format_bytes('downloaded_bytes'),
'_elapsed_str': self.format_seconds(s.get('elapsed')),
})
return self._downloader.params.get(name, default, *args, **kwargs)
return default
- def report_drm(self, video_id, partial=False):
+ def report_drm(self, video_id, partial=NO_DEFAULT):
+ if partial is not NO_DEFAULT:
+ self._downloader.deprecation_warning('InfoExtractor.report_drm no longer accepts the argument partial')
self.raise_no_formats('This video is DRM protected', expected=True, video_id=video_id)
def report_extraction(self, id_or_name):
unified_timestamp,
unsmuggle_url,
url_or_none,
+ variadic,
xpath_attr,
xpath_text,
xpath_with_ns,
webpage)
if mobj is not None:
varname = mobj.group(1)
- sources = self._parse_json(
- mobj.group(2), video_id, transform_source=js_to_json,
- fatal=False) or []
- if not isinstance(sources, list):
- sources = [sources]
+ sources = variadic(self._parse_json(
+ mobj.group(2), video_id, transform_source=js_to_json, fatal=False) or [])
formats = []
subtitles = {}
for source in sources:
'tags': ['prank call', 'prank'],
'upload_date': '20220825'
}
+ }, {
+ 'url': 'https://prankcast.com/phonelosers/showreel/2048-NOT-COOL',
+ 'info_dict': {
+ 'id': '2048',
+ 'ext': 'mp3',
+ 'title': 'NOT COOL',
+ 'display_id': 'NOT-COOL',
+ 'timestamp': 1665028364,
+ 'uploader': 'phonelosers',
+ 'channel_id': 6,
+ 'duration': 4044,
+ 'cast': ['phonelosers'],
+ 'description': '',
+ 'categories': ['prank'],
+ 'tags': ['prank call', 'prank'],
+ 'upload_date': '20221006'
+ }
}]
def _real_extract(self, url):
import re
from .common import InfoExtractor
-from ..utils import (
- determine_ext,
- js_to_json,
- mimetype2ext,
- traverse_obj,
-)
+from ..utils import determine_ext, js_to_json, mimetype2ext, traverse_obj
class TV24UAVideoIE(InfoExtractor):
'live_status': 'not_live',
'playable_in_embed': True,
'comment_count': int,
- 'channel_follower_count': int
+ 'channel_follower_count': int,
+ 'chapters': list,
},
'params': {
'skip_download': True,
'live_status': 'not_live',
'channel_url': 'https://www.youtube.com/channel/UCH1dpzjCEiGAt8CXkryhkZg',
'comment_count': int,
- 'channel_follower_count': int
+ 'channel_follower_count': int,
+ 'chapters': list,
},
'params': {
'skip_download': True,
'duration': 522,
'channel': 'kudvenkat',
'comment_count': int,
- 'channel_follower_count': int
+ 'channel_follower_count': int,
+ 'chapters': list,
},
'params': {
'skip_download': True,
'like_count': int,
'live_status': 'not_live',
'playable_in_embed': True,
- 'channel_follower_count': int
+ 'channel_follower_count': int,
+ 'chapters': list,
},
'params': {
'format': '17', # 3gp format available on android
'duration': 248,
'categories': ['Education'],
'age_limit': 0,
- 'channel_follower_count': int
+ 'channel_follower_count': int,
+ 'chapters': list,
}, 'params': {'format': 'mhtml', 'skip_download': True}
}, {
# Ensure video upload_date is in UTC timezone (video was uploaded 1641170939)
sponsor_chapters = [to_chapter(s) for s in duration_match]
if not sponsor_chapters:
- self.to_screen('No segments were found in the SponsorBlock database')
+ self.to_screen('No matching segments were found in the SponsorBlock database')
else:
self.to_screen(f'Found {len(sponsor_chapters)} segments in the SponsorBlock database')
return sponsor_chapters
return self.parser.parse_args(self.all_args)
-class WebSocketsWrapper():
+class WebSocketsWrapper:
"""Wraps websockets module to use in non-async scopes"""
pool = None
def wrapper(self, *args, **kwargs):
bound_args = signature.bind(self, *args, **kwargs)
bound_args.apply_defaults()
- key = tuple(bound_args.arguments.values())
+ key = tuple(bound_args.arguments.values())[1:]
- if not hasattr(self, '__cached_method__cache'):
- self.__cached_method__cache = {}
- cache = self.__cached_method__cache.setdefault(f.__name__, {})
+ cache = vars(self).setdefault('__cached_method__cache', {}).setdefault(f.__name__, {})
if key not in cache:
cache[key] = f(self, *args, **kwargs)
return cache[key]