]> jfr.im git - yt-dlp.git/blobdiff - yt_dlp/utils.py
[utils] `is_html`: Handle double BOM
[yt-dlp.git] / yt_dlp / utils.py
index 48a94415db5dccd25a26b5e70fce44ec48e2d055..3b0e6750c379c5461c37b08e5f5e7e6dafbf0453 100644 (file)
@@ -3290,14 +3290,13 @@ def is_html(first_bytes):
         (b'\xff\xfe', 'utf-16-le'),
         (b'\xfe\xff', 'utf-16-be'),
     ]
+
+    encoding = 'utf-8'
     for bom, enc in BOMS:
-        if first_bytes.startswith(bom):
-            s = first_bytes[len(bom):].decode(enc, 'replace')
-            break
-    else:
-        s = first_bytes.decode('utf-8', 'replace')
+        while first_bytes.startswith(bom):
+            encoding, first_bytes = enc, first_bytes[len(bom):]
 
-    return re.match(r'^\s*<', s)
+    return re.match(r'^\s*<', first_bytes.decode(encoding, 'replace'))
 
 
 def determine_protocol(info_dict):