]> jfr.im git - yt-dlp.git/commitdiff
[core] Fix format selection parse error for CPython 3.12 (#8797)
authorSimon Sawicki <redacted>
Sun, 24 Dec 2023 21:09:01 +0000 (22:09 +0100)
committerGitHub <redacted>
Sun, 24 Dec 2023 21:09:01 +0000 (22:09 +0100)
Authored by: Grub4K

test/test_YoutubeDL.py
yt_dlp/YoutubeDL.py

index 48c710e00c75de1c09cafb52d5e2ce685ad36092..8bff08314596a51cb75ea08f949e1ccfcc357071 100644 (file)
@@ -140,6 +140,8 @@ def test(inp, *expected, multi=False):
         test('example-with-dashes', 'example-with-dashes')
         test('all', '2', '47', '45', 'example-with-dashes', '35')
         test('mergeall', '2+47+45+example-with-dashes+35', multi=True)
+        # See: https://github.com/yt-dlp/yt-dlp/pulls/8797
+        test('7_a/worst', '35')
 
     def test_format_selection_audio(self):
         formats = [
index 0c07866e4992312b6e0e53b1bb579c0696302b86..5e28fd0e21c00c6d1b1d563ea5cf38e5a0b05545 100644 (file)
@@ -2465,9 +2465,16 @@ def final_selector(ctx):
                 return selector_function(ctx_copy)
             return final_selector
 
-        stream = io.BytesIO(format_spec.encode())
+        # HACK: Python 3.12 changed the underlying parser, rendering '7_a' invalid
+        #       Prefix numbers with random letters to avoid it being classified as a number
+        #       See: https://github.com/yt-dlp/yt-dlp/pulls/8797
+        # TODO: Implement parser not reliant on tokenize.tokenize
+        prefix = ''.join(random.choices(string.ascii_letters, k=32))
+        stream = io.BytesIO(re.sub(r'\d[_\d]*', rf'{prefix}\g<0>', format_spec).encode())
         try:
-            tokens = list(_remove_unused_ops(tokenize.tokenize(stream.readline)))
+            tokens = list(_remove_unused_ops(
+                token._replace(string=token.string.replace(prefix, ''))
+                for token in tokenize.tokenize(stream.readline)))
         except tokenize.TokenError:
             raise syntax_error('Missing closing/opening brackets or parenthesis', (0, len(format_spec)))