compat_urlparse,
compat_xml_parse_error,
)
+from ..downloader import FileDownloader
from ..downloader.f4m import (
get_base_url,
remove_encrypted_media,
html, '%s form' % form_id, group='form')
return self._hidden_inputs(form)
- def _sort_formats(self, formats, field_preference=None):
- if not formats:
- raise ExtractorError('No video formats found')
-
- for f in formats:
- # Automatically determine tbr when missing based on abr and vbr (improves
- # formats sorting in some cases)
- if 'tbr' not in f and f.get('abr') is not None and f.get('vbr') is not None:
- f['tbr'] = f['abr'] + f['vbr']
-
- def _formats_key(f):
- # TODO remove the following workaround
- from ..utils import determine_ext
- if not f.get('ext') and 'url' in f:
- f['ext'] = determine_ext(f['url'])
-
- if isinstance(field_preference, (list, tuple)):
- return tuple(
- f.get(field)
- if f.get(field) is not None
- else ('' if field == 'format_id' else -1)
- for field in field_preference)
-
- preference = f.get('preference')
- if preference is None:
- preference = 0
- if f.get('ext') in ['f4f', 'f4m']: # Not yet supported
- preference -= 0.5
-
- protocol = f.get('protocol') or determine_protocol(f)
- proto_preference = 0 if protocol in ['http', 'https'] else (-0.5 if protocol == 'rtsp' else -0.1)
-
- if f.get('vcodec') == 'none': # audio only
- preference -= 50
- if self._downloader.params.get('prefer_free_formats'):
- ORDER = ['aac', 'mp3', 'm4a', 'webm', 'ogg', 'opus']
+ class FormatSort:
+ regex = r' *((?P<reverse>\+)?(?P<field>[a-zA-Z0-9_]+)((?P<seperator>[~:])(?P<limit>.*?))?)? *$'
+
+ default = ('hidden', 'has_video', 'has_audio', 'extractor', 'lang', 'quality',
+ 'tbr', 'filesize', 'vbr', 'height', 'width', 'protocol', 'vext',
+ 'abr', 'aext', 'fps', 'filesize_approx', 'source_preference', 'format_id')
+
+ settings = {
+ 'vcodec': {'type': 'ordered', 'regex': True,
+ 'order': ['av01', 'vp9', '(h265|he?vc?)', '(h264|avc)', 'vp8', '(mp4v|h263)', 'theora', '', None, 'none']},
+ 'acodec': {'type': 'ordered', 'regex': True,
+ 'order': ['opus', 'vorbis', 'aac', 'mp?4a?', 'mp3', 'e?a?c-?3', 'dts', '', None, 'none']},
+ 'protocol': {'type': 'ordered', 'regex': True,
+ 'order': ['(ht|f)tps', '(ht|f)tp$', 'm3u8.+', 'm3u8', '.*dash', '', 'mms|rtsp', 'none', 'f4']},
+ 'vext': {'type': 'ordered', 'field': 'video_ext',
+ 'order': ('mp4', 'flv', 'webm', '', 'none'), # Why is flv prefered over webm???
+ 'order_free': ('webm', 'mp4', 'flv', '', 'none')},
+ 'aext': {'type': 'ordered', 'field': 'audio_ext',
+ 'order': ('m4a', 'aac', 'mp3', 'ogg', 'opus', 'webm', '', 'none'),
+ 'order_free': ('opus', 'ogg', 'webm', 'm4a', 'mp3', 'aac', '', 'none')},
+ 'hidden': {'visible': False, 'forced': True, 'type': 'extractor', 'max': -1000},
+ 'extractor_preference': {'priority': True, 'type': 'extractor'},
+ 'has_video': {'priority': True, 'field': 'vcodec', 'type': 'boolean', 'not_in_list': ('none',)},
+ 'has_audio': {'priority': True, 'field': 'acodec', 'type': 'boolean', 'not_in_list': ('none',)},
+ 'language_preference': {'priority': True, 'convert': 'ignore'},
+ 'quality': {'priority': True, 'convert': 'float_none'},
+ 'filesize': {'convert': 'bytes'},
+ 'filesize_approx': {'convert': 'bytes'},
+ 'format_id': {'convert': 'string'},
+ 'height': {'convert': 'float_none'},
+ 'width': {'convert': 'float_none'},
+ 'fps': {'convert': 'float_none'},
+ 'tbr': {'convert': 'float_none'},
+ 'vbr': {'convert': 'float_none'},
+ 'abr': {'convert': 'float_none'},
+ 'asr': {'convert': 'float_none'},
+ 'source_preference': {'convert': 'ignore'},
+ 'codec': {'type': 'combined', 'field': ('vcodec', 'acodec')},
+ 'bitrate': {'type': 'combined', 'field': ('tbr', 'vbr', 'abr'), 'same_limit': True},
+ 'filesize_estimate': {'type': 'combined', 'same_limit': True, 'field': ('filesize', 'filesize_approx')},
+ 'extension': {'type': 'combined', 'field': ('vext', 'aext')},
+ 'dimension': {'type': 'multiple', 'field': ('height', 'width'), 'function': min}, # not named as 'resolution' because such a field exists
+ 'res': {'type': 'alias', 'field': 'dimension'},
+ 'ext': {'type': 'alias', 'field': 'extension'},
+ 'br': {'type': 'alias', 'field': 'bitrate'},
+ 'total_bitrate': {'type': 'alias', 'field': 'tbr'},
+ 'video_bitrate': {'type': 'alias', 'field': 'vbr'},
+ 'audio_bitrate': {'type': 'alias', 'field': 'abr'},
+ 'framerate': {'type': 'alias', 'field': 'fps'},
+ 'lang': {'type': 'alias', 'field': 'language_preference'}, # not named as 'language' because such a field exists
+ 'proto': {'type': 'alias', 'field': 'protocol'},
+ 'source': {'type': 'alias', 'field': 'source_preference'},
+ 'size': {'type': 'alias', 'field': 'filesize_estimate'},
+ 'samplerate': {'type': 'alias', 'field': 'asr'},
+ 'video_ext': {'type': 'alias', 'field': 'vext'},
+ 'audio_ext': {'type': 'alias', 'field': 'aext'},
+ 'video_codec': {'type': 'alias', 'field': 'vcodec'},
+ 'audio_codec': {'type': 'alias', 'field': 'acodec'},
+ 'video': {'type': 'alias', 'field': 'has_video'},
+ 'audio': {'type': 'alias', 'field': 'has_audio'},
+ 'extractor': {'type': 'alias', 'field': 'extractor_preference'},
+ 'preference': {'type': 'alias', 'field': 'extractor_preference'}}
+
+ _order = []
+
+ def _get_field_setting(self, field, key):
+ if field not in self.settings:
+ self.settings[field] = {}
+ propObj = self.settings[field]
+ if key not in propObj:
+ type = propObj.get('type')
+ if key == 'field':
+ default = 'preference' if type == 'extractor' else (field,) if type in ('combined', 'multiple') else field
+ elif key == 'convert':
+ default = 'order' if type == 'ordered' else 'float_string' if field else 'ignore'
else:
- ORDER = ['webm', 'opus', 'ogg', 'mp3', 'aac', 'm4a']
- ext_preference = 0
- try:
- audio_ext_preference = ORDER.index(f['ext'])
- except ValueError:
- audio_ext_preference = -1
+ default = {'type': 'field', 'visible': True, 'order': [], 'not_in_list': (None,), 'function': max}.get(key, None)
+ propObj[key] = default
+ return propObj[key]
+
+ def _resolve_field_value(self, field, value, convertNone=False):
+ if value is None:
+ if not convertNone:
+ return None
+ else:
+ value = value.lower()
+ conversion = self._get_field_setting(field, 'convert')
+ if conversion == 'ignore':
+ return None
+ if conversion == 'string':
+ return value
+ elif conversion == 'float_none':
+ return float_or_none(value)
+ elif conversion == 'bytes':
+ return FileDownloader.parse_bytes(value)
+ elif conversion == 'order':
+ order_free = self._get_field_setting(field, 'order_free')
+ order_list = order_free if order_free and self._use_free_order else self._get_field_setting(field, 'order')
+ use_regex = self._get_field_setting(field, 'regex')
+ list_length = len(order_list)
+ empty_pos = order_list.index('') if '' in order_list else list_length + 1
+ if use_regex and value is not None:
+ for (i, regex) in enumerate(order_list):
+ if regex and re.match(regex, value):
+ return list_length - i
+ return list_length - empty_pos # not in list
+ else: # not regex or value = None
+ return list_length - (order_list.index(value) if value in order_list else empty_pos)
else:
- if f.get('acodec') == 'none': # video only
- preference -= 40
- if self._downloader.params.get('prefer_free_formats'):
- ORDER = ['flv', 'mp4', 'webm']
+ if value.isnumeric():
+ return float(value)
else:
- ORDER = ['webm', 'flv', 'mp4']
- try:
- ext_preference = ORDER.index(f['ext'])
- except ValueError:
- ext_preference = -1
- audio_ext_preference = 0
-
- return (
- preference,
- f.get('language_preference') if f.get('language_preference') is not None else -1,
- f.get('quality') if f.get('quality') is not None else -1,
- f.get('tbr') if f.get('tbr') is not None else -1,
- f.get('filesize') if f.get('filesize') is not None else -1,
- f.get('vbr') if f.get('vbr') is not None else -1,
- f.get('height') if f.get('height') is not None else -1,
- f.get('width') if f.get('width') is not None else -1,
- proto_preference,
- ext_preference,
- f.get('abr') if f.get('abr') is not None else -1,
- audio_ext_preference,
- f.get('fps') if f.get('fps') is not None else -1,
- f.get('filesize_approx') if f.get('filesize_approx') is not None else -1,
- f.get('source_preference') if f.get('source_preference') is not None else -1,
- f.get('format_id') if f.get('format_id') is not None else '',
- )
- formats.sort(key=_formats_key)
+ self.settings[field]['convert'] = 'string'
+ return value
+
+ def evaluate_params(self, params, sort_extractor):
+ self._use_free_order = params.get('prefer_free_formats', False)
+ self._sort_user = params.get('format_sort', [])
+ self._sort_extractor = sort_extractor
+
+ def add_item(field, reverse, closest, limit_text):
+ field = field.lower()
+ if field in self._order:
+ return
+ self._order.append(field)
+ limit = self._resolve_field_value(field, limit_text)
+ data = {
+ 'reverse': reverse,
+ 'closest': False if limit is None else closest,
+ 'limit_text': limit_text,
+ 'limit': limit}
+ if field in self.settings:
+ self.settings[field].update(data)
+ else:
+ self.settings[field] = data
+
+ sort_list = (
+ tuple(field for field in self.default if self._get_field_setting(field, 'forced'))
+ + (tuple() if params.get('format_sort_force', False)
+ else tuple(field for field in self.default if self._get_field_setting(field, 'priority')))
+ + tuple(self._sort_user) + tuple(sort_extractor) + self.default)
+
+ for item in sort_list:
+ match = re.match(self.regex, item)
+ if match is None:
+ raise ExtractorError('Invalid format sort string "%s" given by extractor' % item)
+ field = match.group('field')
+ if field is None:
+ continue
+ if self._get_field_setting(field, 'type') == 'alias':
+ field = self._get_field_setting(field, 'field')
+ reverse = match.group('reverse') is not None
+ closest = match.group('seperator') == '~'
+ limit_text = match.group('limit')
+
+ has_limit = limit_text is not None
+ has_multiple_fields = self._get_field_setting(field, 'type') == 'combined'
+ has_multiple_limits = has_limit and has_multiple_fields and not self._get_field_setting(field, 'same_limit')
+
+ fields = self._get_field_setting(field, 'field') if has_multiple_fields else (field,)
+ limits = limit_text.split(":") if has_multiple_limits else (limit_text,) if has_limit else tuple()
+ limit_count = len(limits)
+ for (i, f) in enumerate(fields):
+ add_item(f, reverse, closest,
+ limits[i] if i < limit_count
+ else limits[0] if has_limit and not has_multiple_limits
+ else None)
+
+ def print_verbose_info(self, to_screen):
+ to_screen('[debug] Sort order given by user: %s' % ','.join(self._sort_user))
+ if self._sort_extractor:
+ to_screen('[debug] Sort order given by extractor: %s' % ','.join(self._sort_extractor))
+ to_screen('[debug] Formats sorted by: %s' % ', '.join(['%s%s%s' % (
+ '+' if self._get_field_setting(field, 'reverse') else '', field,
+ '%s%s(%s)' % ('~' if self._get_field_setting(field, 'closest') else ':',
+ self._get_field_setting(field, 'limit_text'),
+ self._get_field_setting(field, 'limit'))
+ if self._get_field_setting(field, 'limit_text') is not None else '')
+ for field in self._order if self._get_field_setting(field, 'visible')]))
+
+ def _calculate_field_preference_from_value(self, format, field, type, value):
+ reverse = self._get_field_setting(field, 'reverse')
+ closest = self._get_field_setting(field, 'closest')
+ limit = self._get_field_setting(field, 'limit')
+
+ if type == 'extractor':
+ maximum = self._get_field_setting(field, 'max')
+ if value is None or (maximum is not None and value >= maximum):
+ value = 0
+ elif type == 'boolean':
+ in_list = self._get_field_setting(field, 'in_list')
+ not_in_list = self._get_field_setting(field, 'not_in_list')
+ value = 0 if ((in_list is None or value in in_list) and (not_in_list is None or value not in not_in_list)) else -1
+ elif type == 'ordered':
+ value = self._resolve_field_value(field, value, True)
+
+ # try to convert to number
+ val_num = float_or_none(value)
+ is_num = self._get_field_setting(field, 'convert') != 'string' and val_num is not None
+ if is_num:
+ value = val_num
+
+ return ((-10, 0) if value is None
+ else (1, value, 0) if not is_num # if a field has mixed strings and numbers, strings are sorted higher
+ else (0, -abs(value - limit), value - limit if reverse else limit - value) if closest
+ else (0, value, 0) if not reverse and (limit is None or value <= limit)
+ else (0, -value, 0) if limit is None or (reverse and value == limit) or value > limit
+ else (-1, value, 0))
+
+ def _calculate_field_preference(self, format, field):
+ type = self._get_field_setting(field, 'type') # extractor, boolean, ordered, field, multiple
+ get_value = lambda f: format.get(self._get_field_setting(f, 'field'))
+ if type == 'multiple':
+ type = 'field' # Only 'field' is allowed in multiple for now
+ actual_fields = self._get_field_setting(field, 'field')
+
+ def wrapped_function(values):
+ values = tuple(filter(lambda x: x is not None, values))
+ return (self._get_field_setting(field, 'function')(*values) if len(values) > 1
+ else values[0] if values
+ else None)
+
+ value = wrapped_function((get_value(f) for f in actual_fields))
+ else:
+ value = get_value(field)
+ return self._calculate_field_preference_from_value(format, field, type, value)
+
+ def calculate_preference(self, format):
+ # Determine missing protocol
+ if not format.get('protocol'):
+ format['protocol'] = determine_protocol(format)
+
+ # Determine missing ext
+ if not format.get('ext') and 'url' in format:
+ format['ext'] = determine_ext(format['url'])
+ if format.get('vcodec') == 'none':
+ format['audio_ext'] = format['ext']
+ format['video_ext'] = 'none'
+ else:
+ format['video_ext'] = format['ext']
+ format['audio_ext'] = 'none'
+ # if format.get('preference') is None and format.get('ext') in ('f4f', 'f4m'): # Not supported?
+ # format['preference'] = -1000
+
+ # Determine missing bitrates
+ if format.get('tbr') is None:
+ if format.get('vbr') is not None and format.get('abr') is not None:
+ format['tbr'] = format.get('vbr', 0) + format.get('abr', 0)
+ else:
+ if format.get('vcodec') != "none" and format.get('vbr') is None:
+ format['vbr'] = format.get('tbr') - format.get('abr', 0)
+ if format.get('acodec') != "none" and format.get('abr') is None:
+ format['abr'] = format.get('tbr') - format.get('vbr', 0)
+
+ return tuple(self._calculate_field_preference(format, field) for field in self._order)
+
+ def _sort_formats(self, formats, field_preference=[]):
+ if not formats:
+ raise ExtractorError('No video formats found')
+ format_sort = self.FormatSort() # params and to_screen are taken from the downloader
+ format_sort.evaluate_params(self._downloader.params, field_preference)
+ if self._downloader.params.get('verbose', False):
+ format_sort.print_verbose_info(self._downloader.to_screen)
+ formats.sort(key=lambda f: format_sort.calculate_preference(f))
def _check_formats(self, formats, video_id):
if formats: