[core] Fix format selection parse error for CPython 3.12 (#8797)

author Simon Sawicki <redacted>

Sun, 24 Dec 2023 21:09:01 +0000 (22:09 +0100)

committer GitHub <redacted>

Sun, 24 Dec 2023 21:09:01 +0000 (22:09 +0100)
author Simon Sawicki <redacted>
Sun, 24 Dec 2023 21:09:01 +0000 (22:09 +0100)
committer GitHub <redacted>
Sun, 24 Dec 2023 21:09:01 +0000 (22:09 +0100)
diff --git a/test/test_YoutubeDL.py b/test/test_YoutubeDL.py

index 48c710e00c75de1c09cafb52d5e2ce685ad36092..8bff08314596a51cb75ea08f949e1ccfcc357071 100644 (file)
--- a/test/test_YoutubeDL.py
+++ b/test/test_YoutubeDL.py
@@ -140,6 +140,8 @@ def test(inp, *expected, multi=False):
          test('example-with-dashes', 'example-with-dashes')
          test('all', '2', '47', '45', 'example-with-dashes', '35')
          test('mergeall', '2+47+45+example-with-dashes+35', multi=True)
+        # See: https://github.com/yt-dlp/yt-dlp/pulls/8797
+        test('7_a/worst', '35')
  
      def test_format_selection_audio(self):
          formats = [
diff --git a/yt_dlp/YoutubeDL.py b/yt_dlp/YoutubeDL.py

index 0c07866e4992312b6e0e53b1bb579c0696302b86..5e28fd0e21c00c6d1b1d563ea5cf38e5a0b05545 100644 (file)
--- a/yt_dlp/YoutubeDL.py
+++ b/yt_dlp/YoutubeDL.py
@@ -2465,9 +2465,16 @@ def final_selector(ctx):
                  return selector_function(ctx_copy)
              return final_selector
  
-        stream = io.BytesIO(format_spec.encode())
+        # HACK: Python 3.12 changed the underlying parser, rendering '7_a' invalid
+        #       Prefix numbers with random letters to avoid it being classified as a number
+        #       See: https://github.com/yt-dlp/yt-dlp/pulls/8797
+        # TODO: Implement parser not reliant on tokenize.tokenize
+        prefix = ''.join(random.choices(string.ascii_letters, k=32))
+        stream = io.BytesIO(re.sub(r'\d[_\d]*', rf'{prefix}\g<0>', format_spec).encode())
          try:
-            tokens = list(_remove_unused_ops(tokenize.tokenize(stream.readline)))
+            tokens = list(_remove_unused_ops(
+                token._replace(string=token.string.replace(prefix, ''))
+                for token in tokenize.tokenize(stream.readline)))
          except tokenize.TokenError:
              raise syntax_error('Missing closing/opening brackets or parenthesis', (0, len(format_spec)))
author	Simon Sawicki <redacted>
	Sun, 24 Dec 2023 21:09:01 +0000 (22:09 +0100)
committer	GitHub <redacted>
	Sun, 24 Dec 2023 21:09:01 +0000 (22:09 +0100)
test/test_YoutubeDL.py		patch \| blob \| blame \| history
yt_dlp/YoutubeDL.py		patch \| blob \| blame \| history