(?:\|(?P<default>.*?))?
)$''')
+ def _from_user_input(field):
+ if field == ':':
+ return ...
+ elif ':' in field:
+ return slice(*map(int_or_none, field.split(':')))
+ elif int_or_none(field) is not None:
+ return int(field)
+ return field
+
def _traverse_infodict(fields):
fields = [f for x in re.split(r'\.({.+?})\.?', fields)
for f in ([x] if x.startswith('{') else x.split('.'))]
for i, f in enumerate(fields):
if not f.startswith('{'):
+ fields[i] = _from_user_input(f)
continue
assert f.endswith('}'), f'No closing brace for {f} in {fields}'
- fields[i] = {k: k.split('.') for k in f[1:-1].split(',')}
+ fields[i] = {k: list(map(_from_user_input, k.split('.'))) for k in f[1:-1].split(',')}
- return traverse_obj(info_dict, fields, is_user_input=True, traverse_string=True)
+ return traverse_obj(info_dict, fields, traverse_string=True)
def get_value(mdict):
# Object traversal
return selector_function(ctx_copy)
return final_selector
- stream = io.BytesIO(format_spec.encode())
+ # HACK: Python 3.12 changed the underlying parser, rendering '7_a' invalid
+ # Prefix numbers with random letters to avoid it being classified as a number
+ # See: https://github.com/yt-dlp/yt-dlp/pulls/8797
+ # TODO: Implement parser not reliant on tokenize.tokenize
+ prefix = ''.join(random.choices(string.ascii_letters, k=32))
+ stream = io.BytesIO(re.sub(r'\d[_\d]*', rf'{prefix}\g<0>', format_spec).encode())
try:
- tokens = list(_remove_unused_ops(tokenize.tokenize(stream.readline)))
+ tokens = list(_remove_unused_ops(
+ token._replace(string=token.string.replace(prefix, ''))
+ for token in tokenize.tokenize(stream.readline)))
except tokenize.TokenError:
raise syntax_error('Missing closing/opening brackets or parenthesis', (0, len(format_spec)))