]> jfr.im git - yt-dlp.git/blobdiff - yt_dlp/extractor/generic.py
[ie/generic] Add `key_query` extractor-arg
[yt-dlp.git] / yt_dlp / extractor / generic.py
index 33e71d1c572989c5ef90454ea6c2f457422b71f4..3b8e1e957cc861625ec08ef9327f9270a2402e74 100644 (file)
@@ -4,7 +4,7 @@
 import urllib.parse
 import xml.etree.ElementTree
 
-from .common import InfoExtractor  # isort: split
+from .common import InfoExtractor
 from .commonprotocols import RtmpIE
 from .youtube import YoutubeIE
 from ..compat import compat_etree_fromstring
@@ -17,6 +17,7 @@
     determine_protocol,
     dict_get,
     extract_basic_auth,
+    filter_dict,
     format_field,
     int_or_none,
     is_html,
@@ -35,6 +36,7 @@
     unsmuggle_url,
     update_url_query,
     url_or_none,
+    urlhandle_detect_ext,
     urljoin,
     variadic,
     xpath_attr,
@@ -58,7 +60,9 @@ class GenericIE(InfoExtractor):
                 'ext': 'mp4',
                 'title': 'trailer',
                 'upload_date': '20100513',
-            }
+                'direct': True,
+                'timestamp': 1273772943.0,
+            },
         },
         # Direct link to media delivered compressed (until Accept-Encoding is *)
         {
@@ -71,7 +75,7 @@ class GenericIE(InfoExtractor):
                 'upload_date': '20140522',
             },
             'expected_warnings': [
-                'URL could be a direct video link, returning it as such.'
+                'URL could be a direct video link, returning it as such.',
             ],
             'skip': 'URL invalid',
         },
@@ -101,10 +105,12 @@ class GenericIE(InfoExtractor):
                 'ext': 'webm',
                 'title': '5_Lennart_Poettering_-_Systemd',
                 'upload_date': '20141120',
+                'direct': True,
+                'timestamp': 1416498816.0,
             },
             'expected_warnings': [
-                'URL could be a direct video link, returning it as such.'
-            ]
+                'URL could be a direct video link, returning it as such.',
+            ],
         },
         # RSS feed
         {
@@ -112,7 +118,7 @@ class GenericIE(InfoExtractor):
             'info_dict': {
                 'id': 'https://phihag.de/2014/youtube-dl/rss2.xml',
                 'title': 'Zero Punctuation',
-                'description': 're:.*groundbreaking video review series.*'
+                'description': 're:.*groundbreaking video review series.*',
             },
             'playlist_mincount': 11,
         },
@@ -133,6 +139,7 @@ class GenericIE(InfoExtractor):
                     'upload_date': '20201204',
                 },
             }],
+            'skip': 'Dead link',
         },
         # RSS feed with item with description and thumbnails
         {
@@ -145,12 +152,12 @@ class GenericIE(InfoExtractor):
             'playlist': [{
                 'info_dict': {
                     'ext': 'm4a',
-                    'id': 'c1c879525ce2cb640b344507e682c36d',
+                    'id': '818a5d38-01cd-152f-2231-ee479677fa82',
                     'title': 're:Hydrogen!',
                     'description': 're:.*In this episode we are going.*',
                     'timestamp': 1567977776,
                     'upload_date': '20190908',
-                    'duration': 459,
+                    'duration': 423,
                     'thumbnail': r're:^https?://.*\.jpg$',
                     'episode_number': 1,
                     'season_number': 1,
@@ -267,6 +274,7 @@ class GenericIE(InfoExtractor):
             'params': {
                 'skip_download': True,
             },
+            'skip': '404 Not Found',
         },
         # MPD from http://dash-mse-test.appspot.com/media.html
         {
@@ -278,6 +286,7 @@ class GenericIE(InfoExtractor):
                 'title': 'car-20120827-manifest',
                 'formats': 'mincount:9',
                 'upload_date': '20130904',
+                'timestamp': 1378272859.0,
             },
         },
         # m3u8 served with Content-Type: audio/x-mpegURL; charset=utf-8
@@ -318,14 +327,14 @@ class GenericIE(InfoExtractor):
                 'id': 'cmQHVoWB5FY',
                 'ext': 'mp4',
                 'upload_date': '20130224',
-                'uploader_id': 'TheVerge',
+                'uploader_id': '@TheVerge',
                 'description': r're:^Chris Ziegler takes a look at the\.*',
                 'uploader': 'The Verge',
                 'title': 'First Firefox OS phones side-by-side',
             },
             'params': {
                 'skip_download': False,
-            }
+            },
         },
         {
             # redirect in Refresh HTTP header
@@ -351,7 +360,7 @@ class GenericIE(InfoExtractor):
                 'ext': 'mp4',
                 'uploader': 'www.hodiho.fr',
                 'title': 'R\u00e9gis plante sa Jeep',
-            }
+            },
         },
         # bandcamp page with custom domain
         {
@@ -365,46 +374,6 @@ class GenericIE(InfoExtractor):
             },
             'skip': 'There is a limit of 200 free downloads / month for the test song',
         },
-        # ooyala video
-        {
-            'url': 'http://www.rollingstone.com/music/videos/norwegian-dj-cashmere-cat-goes-spartan-on-with-me-premiere-20131219',
-            'md5': '166dd577b433b4d4ebfee10b0824d8ff',
-            'info_dict': {
-                'id': 'BwY2RxaTrTkslxOfcan0UCf0YqyvWysJ',
-                'ext': 'mp4',
-                'title': '2cc213299525360.mov',  # that's what we get
-                'duration': 238.231,
-            },
-            'add_ie': ['Ooyala'],
-        },
-        {
-            # ooyala video embedded with http://player.ooyala.com/iframe.js
-            'url': 'http://www.macrumors.com/2015/07/24/steve-jobs-the-man-in-the-machine-first-trailer/',
-            'info_dict': {
-                'id': 'p0MGJndjoG5SOKqO_hZJuZFPB-Tr5VgB',
-                'ext': 'mp4',
-                'title': '"Steve Jobs: Man in the Machine" trailer',
-                'description': 'The first trailer for the Alex Gibney documentary "Steve Jobs: Man in the Machine."',
-                'duration': 135.427,
-            },
-            'params': {
-                'skip_download': True,
-            },
-            'skip': 'movie expired',
-        },
-        # ooyala video embedded with http://player.ooyala.com/static/v4/production/latest/core.min.js
-        {
-            'url': 'http://wnep.com/2017/07/22/steampunk-fest-comes-to-honesdale/',
-            'info_dict': {
-                'id': 'lwYWYxYzE6V5uJMjNGyKtwwiw9ZJD7t2',
-                'ext': 'mp4',
-                'title': 'Steampunk Fest Comes to Honesdale',
-                'duration': 43.276,
-            },
-            'params': {
-                'skip_download': True,
-            }
-        },
         # embed.ly video
         {
             'url': 'http://www.tested.com/science/weird/460206-tested-grinding-coffee-2000-frames-second/',
@@ -469,19 +438,19 @@ class GenericIE(InfoExtractor):
                     'id': '370908',
                     'title': 'Госзаказ. День 3',
                     'ext': 'mp4',
-                }
+                },
             }, {
                 'info_dict': {
                     'id': '370905',
                     'title': 'Госзаказ. День 2',
                     'ext': 'mp4',
-                }
+                },
             }, {
                 'info_dict': {
                     'id': '370902',
                     'title': 'Госзаказ. День 1',
                     'ext': 'mp4',
-                }
+                },
             }],
             'params': {
                 # m3u8 download
@@ -497,7 +466,8 @@ class GenericIE(InfoExtractor):
                 'title': 'Ужастики, русский трейлер (2015)',
                 'thumbnail': r're:^https?://.*\.jpg$',
                 'duration': 153,
-            }
+            },
+            'skip': 'Site dead',
         },
         # XHamster embed
         {
@@ -521,7 +491,7 @@ class GenericIE(InfoExtractor):
                 'title': 'Hidden miracles of the natural world',
                 'uploader': 'Louie Schwartzberg',
                 'description': 'md5:8145d19d320ff3e52f28401f4c4283b9',
-            }
+            },
         },
         # nowvideo embed hidden behind percent encoding
         {
@@ -546,7 +516,7 @@ class GenericIE(InfoExtractor):
                 'upload_date': '20140320',
             },
             'params': {
-                'skip_download': 'Requires rtmpdump'
+                'skip_download': 'Requires rtmpdump',
             },
             'skip': 'video gone',
         },
@@ -567,8 +537,8 @@ class GenericIE(InfoExtractor):
                 'skip_download': True,
             },
             'expected_warnings': [
-                'Forbidden'
-            ]
+                'Forbidden',
+            ],
         },
         # Condé Nast embed
         {
@@ -578,7 +548,7 @@ class GenericIE(InfoExtractor):
                 'id': '53501be369702d3275860000',
                 'ext': 'mp4',
                 'title': 'Honda’s  New Asimo Robot Is More Human Than Ever',
-            }
+            },
         },
         # Dailymotion embed
         {
@@ -625,7 +595,7 @@ class GenericIE(InfoExtractor):
             'add_ie': ['Youtube'],
             'params': {
                 'skip_download': True,
-            }
+            },
         },
         # MTVServices embed
         {
@@ -654,7 +624,7 @@ class GenericIE(InfoExtractor):
             },
             'params': {
                 'skip_download': True,
-            }
+            },
         },
         # Flowplayer
         {
@@ -666,7 +636,7 @@ class GenericIE(InfoExtractor):
                 'age_limit': 18,
                 'uploader': 'www.handjobhub.com',
                 'title': 'Busty Blonde Siri Tit Fuck While Wank at HandjobHub.com',
-            }
+            },
         },
         # MLB embed
         {
@@ -710,7 +680,7 @@ class GenericIE(InfoExtractor):
                 'uploader': 'Sophos Security',
                 'title': 'Chet Chat 171 - Oct 29, 2014',
                 'upload_date': '20141029',
-            }
+            },
         },
         # Soundcloud multiple embeds
         {
@@ -744,7 +714,7 @@ class GenericIE(InfoExtractor):
                 'ext': 'flv',
                 'upload_date': '20141112',
                 'title': 'Rosetta #CometLanding webcast HL 10',
-            }
+            },
         },
         # Another Livestream embed, without 'new.' in URL
         {
@@ -769,15 +739,17 @@ class GenericIE(InfoExtractor):
             'playlist_mincount': 1,
             'add_ie': ['Youtube'],
         },
-        # Cinchcast embed
+        # Libsyn embed
         {
             'url': 'http://undergroundwellness.com/podcasts/306-5-steps-to-permanent-gut-healing/',
             'info_dict': {
-                'id': '7141703',
+                'id': '3793998',
                 'ext': 'mp3',
                 'upload_date': '20141126',
-                'title': 'Jack Tips: 5 Steps to Permanent Gut Healing',
-            }
+                'title': 'Underground Wellness Radio - Jack Tips: 5 Steps to Permanent Gut Healing',
+                'thumbnail': 'https://assets.libsyn.com/secure/item/3793998/?height=90&width=90',
+                'duration': 3989.0,
+            },
         },
         # Cinerama player
         {
@@ -787,7 +759,7 @@ class GenericIE(InfoExtractor):
                 'ext': 'mp4',
                 'uploader': 'www.abc.net.au',
                 'title': 'Game of Thrones with dice - Dungeons and Dragons fantasy role-playing game gets new life - 19/01/2015',
-            }
+            },
         },
         # embedded viddler video
         {
@@ -904,7 +876,7 @@ class GenericIE(InfoExtractor):
             'info_dict': {
                 'id': 'aanslagen-kopenhagen',
                 'title': 'Aanslagen Kopenhagen',
-            }
+            },
         },
         # Zapiks embed
         {
@@ -913,7 +885,7 @@ class GenericIE(InfoExtractor):
                 'id': '118046',
                 'ext': 'mp4',
                 'title': 'EP3S5 - Bon Appétit - Baqueira Mi Corazon !',
-            }
+            },
         },
         # Kaltura embed (different embed code)
         {
@@ -952,11 +924,11 @@ class GenericIE(InfoExtractor):
             },
             'add_ie': ['Kaltura'],
             'expected_warnings': [
-                'Could not send HEAD request'
+                'Could not send HEAD request',
             ],
             'params': {
                 'skip_download': True,
-            }
+            },
         },
         {
             # Kaltura embedded, some fileExt broken (#11480)
@@ -1083,7 +1055,7 @@ class GenericIE(InfoExtractor):
             'info_dict': {
                 'id': '8RUoRhRi',
                 'ext': 'mp4',
-                'title': "Fox & Friends Says Protecting Atheists From Discrimination Is Anti-Christian!",
+                'title': 'Fox & Friends Says Protecting Atheists From Discrimination Is Anti-Christian!',
                 'description': 'md5:e1a46ad1650e3a5ec7196d432799127f',
                 'timestamp': 1428207000,
                 'upload_date': '20150405',
@@ -1159,7 +1131,7 @@ class GenericIE(InfoExtractor):
                 'uploader': 'clickhole',
                 'upload_date': '20150527',
                 'timestamp': 1432744860,
-            }
+            },
         },
         # SnagFilms embed
         {
@@ -1168,7 +1140,7 @@ class GenericIE(InfoExtractor):
                 'id': '74849a00-85a9-11e1-9660-123139220831',
                 'ext': 'mp4',
                 'title': '#whilewewatch',
-            }
+            },
         },
         # AdobeTVVideo embed
         {
@@ -1464,7 +1436,7 @@ class GenericIE(InfoExtractor):
                     'upload_date': '20211217',
                     'thumbnail': 'https://www.megatv.com/wp-content/uploads/2021/12/tsiodras-mitsotakis-1024x545.jpg',
                 },
-            }]
+            }],
         },
         {
             'url': 'https://www.ertnews.gr/video/manolis-goyalles-o-anthropos-piso-apo-ti-diadiktyaki-vasilopita/',
@@ -1558,16 +1530,6 @@ class GenericIE(InfoExtractor):
                 'title': 'Стас Намин: «Мы нарушили девственность Кремля»',
             },
         },
-        {
-            # vzaar embed
-            'url': 'http://help.vzaar.com/article/165-embedding-video',
-            'md5': '7e3919d9d2620b89e3e00bec7fe8c9d4',
-            'info_dict': {
-                'id': '8707641',
-                'ext': 'mp4',
-                'title': 'Building A Business Online: Principal Chairs Q & A',
-            },
-        },
         {
             # multiple HTML5 videos on one page
             'url': 'https://www.paragon-software.com/home/rk-free/keyscenarios.html',
@@ -1585,7 +1547,7 @@ class GenericIE(InfoExtractor):
                 'id': '0f64ce6',
                 'title': 'vl14062007715967',
                 'ext': 'mp4',
-            }
+            },
         },
         {
             'url': 'http://www.heidelberg-laureate-forum.org/blog/video/lecture-friday-september-23-2016-sir-c-antony-r-hoare/',
@@ -1597,7 +1559,7 @@ class GenericIE(InfoExtractor):
                 'description': 'md5:5a51db84a62def7b7054df2ade403c6c',
                 'timestamp': 1474354800,
                 'upload_date': '20160920',
-            }
+            },
         },
         {
             'url': 'http://www.kidzworld.com/article/30935-trolls-the-beat-goes-on-interview-skylar-astin-and-amanda-leighton',
@@ -1689,7 +1651,7 @@ class GenericIE(InfoExtractor):
             'info_dict': {
                 'id': '83645793',
                 'title': 'Lock up and get excited',
-                'ext': 'mp4'
+                'ext': 'mp4',
             },
             'skip': 'TODO: fix nested playlists processing in tests',
         },
@@ -1765,7 +1727,7 @@ class GenericIE(InfoExtractor):
                 'upload_date': '20220110',
                 'thumbnail': 'https://opentv-static.siliconweb.com/imgHandler/1920/70bc39fa-895b-4918-a364-c39d2135fc6d.jpg',
 
-            }
+            },
         },
         {
             # blogger embed
@@ -1935,8 +1897,8 @@ class GenericIE(InfoExtractor):
                 'timestamp': 1501941939.0,
                 'title': 'That small heart attack.',
                 'upload_date': '20170805',
-                'uploader': 'Antw87'
-            }
+                'uploader': 'Antw87',
+            },
         },
         {
             # 1080p Reddit-hosted video that will redirect and be processed by RedditIE
@@ -1948,8 +1910,8 @@ class GenericIE(InfoExtractor):
                 'title': "The game Didn't want me to Knife that Guy I guess",
                 'uploader': 'paraf1ve',
                 'timestamp': 1636788683.0,
-                'upload_date': '20211113'
-            }
+                'upload_date': '20211113',
+            },
         },
         {
             # MainStreaming player
@@ -1961,15 +1923,15 @@ class GenericIE(InfoExtractor):
                 'ext': 'mp4',
                 'live_status': 'not_live',
                 'thumbnail': r're:https?://[A-Za-z0-9-]*\.msvdn.net/image/\w+/poster',
-                'duration': 1512
-            }
+                'duration': 1512,
+            },
         },
         {
             # Multiple gfycat iframe embeds
             'url': 'https://www.gezip.net/bbs/board.php?bo_table=entertaine&wr_id=613422',
             'info_dict': {
                 'title': '재이, 윤, 세은 황금 드레스를 입고 빛난다',
-                'id': 'board'
+                'id': 'board',
             },
             'playlist_count': 8,
         },
@@ -1978,18 +1940,18 @@ class GenericIE(InfoExtractor):
             'url': 'https://www.gezip.net/bbs/board.php?bo_table=entertaine&wr_id=612199',
             'info_dict': {
                 'title': '옳게 된 크롭 니트 스테이씨 아이사',
-                'id': 'board'
+                'id': 'board',
             },
-            'playlist_count': 6
+            'playlist_count': 6,
         },
         {
             # Multiple gfycat embeds, with uppercase "IFR" in urls
             'url': 'https://kkzz.kr/?vid=2295',
             'info_dict': {
                 'title': '지방시 앰버서더 에스파 카리나 움짤',
-                'id': '?vid=2295'
+                'id': '?vid=2295',
             },
-            'playlist_count': 9
+            'playlist_count': 9,
         },
         {
             # Panopto embeds
@@ -2022,9 +1984,9 @@ class GenericIE(InfoExtractor):
             'url': 'https://www.hs.fi/kotimaa/art-2000008762560.html',
             'info_dict': {
                 'title': 'Koronavirus | Epidemiahuippu voi olla Suomessa ohi, mutta koronaviruksen poistamista yleisvaarallisten tautien joukosta harkitaan vasta syksyllä',
-                'id': 'art-2000008762560'
+                'id': 'art-2000008762560',
             },
-            'playlist_count': 3
+            'playlist_count': 3,
         },
         {
             # Ruutu embed in hs.fi with a single video
@@ -2053,7 +2015,7 @@ class GenericIE(InfoExtractor):
                 'thumbnail': 'https://www.filmarkivet.se/wp-content/uploads/parisdmoll2.jpg',
                 'timestamp': 1652833414,
                 'age_limit': 0,
-            }
+            },
         },
         {
             'url': 'https://www.mollymovieclub.com/p/interstellar?s=r#details',
@@ -2093,7 +2055,7 @@ class GenericIE(InfoExtractor):
                 'thumbnail': 'https://cdn.jwplayer.com/v2/media/YTmgRiNU/poster.jpg?width=720',
                 'duration': 5688.0,
                 'upload_date': '20210111',
-            }
+            },
         },
         {
             'note': 'JSON LD with multiple @type',
@@ -2109,7 +2071,7 @@ class GenericIE(InfoExtractor):
                 'upload_date': '20200411',
                 'age_limit': 0,
                 'duration': 111.0,
-            }
+            },
         },
         {
             'note': 'JSON LD with unexpected data type',
@@ -2124,7 +2086,7 @@ class GenericIE(InfoExtractor):
                 'thumbnail': r're:^https://media.autoweek.nl/m/.+\.jpg$',
                 'age_limit': 0,
                 'direct': True,
-            }
+            },
         },
         {
             'note': 'server returns data in brotli compression by default if `accept-encoding: *` is specified.',
@@ -2142,22 +2104,6 @@ class GenericIE(InfoExtractor):
                 'age_limit': 0,
             },
         },
-        {
-            'note': 'JW Player embed with unicode-escape sequences in URL',
-            'url': 'https://www.medici.tv/en/concerts/lahav-shani-mozart-mahler-israel-philharmonic-abu-dhabi-classics',
-            'info_dict': {
-                'id': 'm',
-                'ext': 'mp4',
-                'title': 'Lahav Shani conducts the Israel Philharmonic\'s first-ever concert in Abu Dhabi',
-                'description': 'Mahler\'s ',
-                'uploader': 'www.medici.tv',
-                'age_limit': 0,
-                'thumbnail': r're:^https?://.+\.jpg',
-            },
-            'params': {
-                'skip_download': True,
-            },
-        },
         {
             'url': 'https://shooshtime.com/videos/284002/just-out-of-the-shower-joi/',
             'md5': 'e2f0a4c329f7986280b7328e24036d60',
@@ -2202,7 +2148,7 @@ class GenericIE(InfoExtractor):
 
     def report_following_redirect(self, new_url):
         """Report information extraction."""
-        self._downloader.to_screen('[redirect] Following redirect to %s' % new_url)
+        self._downloader.to_screen(f'[redirect] Following redirect to {new_url}')
 
     def report_detected(self, name, num=1, note=None):
         if num > 1:
@@ -2221,7 +2167,15 @@ def _extra_manifest_info(self, info, manifest_url):
                 urllib.parse.urlparse(fragment_query).query or fragment_query
                 or urllib.parse.urlparse(manifest_url).query or None)
 
-        hex_or_none = lambda x: x if re.fullmatch(r'(0x)?[\da-f]+', x, re.IGNORECASE) else None
+        key_query = self._configuration_arg('key_query', [None], casesense=True)[0]
+        if key_query is not None:
+            info['extra_param_to_key_url'] = (
+                urllib.parse.urlparse(key_query).query or key_query
+                or urllib.parse.urlparse(manifest_url).query or None)
+
+        def hex_or_none(value):
+            return value if re.fullmatch(r'(0x)?[\da-f]+', value, re.IGNORECASE) else None
+
         info['hls_aes'] = traverse_obj(self._configuration_arg('hls_key', casesense=True), {
             'uri': (0, {url_or_none}), 'key': (0, {hex_or_none}), 'iv': (1, {hex_or_none}),
         }) or None
@@ -2298,29 +2252,29 @@ def _kvs_get_real_url(cls, video_url, license_code):
             return video_url  # not obfuscated
 
         parsed = urllib.parse.urlparse(video_url[len('function/0/'):])
-        license = cls._kvs_get_license_token(license_code)
+        license_token = cls._kvs_get_license_token(license_code)
         urlparts = parsed.path.split('/')
 
         HASH_LENGTH = 32
-        hash = urlparts[3][:HASH_LENGTH]
+        hash_ = urlparts[3][:HASH_LENGTH]
         indices = list(range(HASH_LENGTH))
 
         # Swap indices of hash according to the destination calculated from the license token
         accum = 0
         for src in reversed(range(HASH_LENGTH)):
-            accum += license[src]
+            accum += license_token[src]
             dest = (src + accum) % HASH_LENGTH
             indices[src], indices[dest] = indices[dest], indices[src]
 
-        urlparts[3] = ''.join(hash[index] for index in indices) + urlparts[3][HASH_LENGTH:]
+        urlparts[3] = ''.join(hash_[index] for index in indices) + urlparts[3][HASH_LENGTH:]
         return urllib.parse.urlunparse(parsed._replace(path='/'.join(urlparts)))
 
     @staticmethod
-    def _kvs_get_license_token(license):
-        license = license.replace('$', '')
-        license_values = [int(char) for char in license]
+    def _kvs_get_license_token(license_code):
+        license_code = license_code.replace('$', '')
+        license_values = [int(char) for char in license_code]
 
-        modlicense = license.replace('0', '1')
+        modlicense = license_code.replace('0', '1')
         center = len(modlicense) // 2
         fronthalf = int(modlicense[:center + 1])
         backhalf = int(modlicense[center:])
@@ -2392,18 +2346,17 @@ def _real_extract(self, url):
                     if default_search == 'auto_warning':
                         if re.match(r'^(?:url|URL)$', url):
                             raise ExtractorError(
-                                'Invalid URL:  %r . Call yt-dlp like this:  yt-dlp -v "https://www.youtube.com/watch?v=BaW_jenozKc"  ' % url,
+                                f'Invalid URL:  {url!r} . Call yt-dlp like this:  yt-dlp -v "https://www.youtube.com/watch?v=BaW_jenozKc"  ',
                                 expected=True)
                         else:
                             self.report_warning(
-                                'Falling back to youtube search for  %s . Set --default-search "auto" to suppress this warning.' % url)
+                                f'Falling back to youtube search for  {url} . Set --default-search "auto" to suppress this warning.')
                     return self.url_result('ytsearch:' + url)
 
             if default_search in ('error', 'fixup_error'):
                 raise ExtractorError(
-                    '%r is not a valid URL. '
-                    'Set --default-search "ytsearch" (or run  yt-dlp "ytsearch:%s" ) to search YouTube'
-                    % (url, url), expected=True)
+                    f'{url!r} is not a valid URL. '
+                    f'Set --default-search "ytsearch" (or run  yt-dlp "ytsearch:{url}" ) to search YouTube', expected=True)
             else:
                 if ':' not in default_search:
                     default_search += ':'
@@ -2427,12 +2380,11 @@ def _real_extract(self, url):
         # to accept raw bytes and being able to download only a chunk.
         # It may probably better to solve this by checking Content-Type for application/octet-stream
         # after a HEAD request, but not sure if we can rely on this.
-        full_response = self._request_webpage(url, video_id, headers={
+        full_response = self._request_webpage(url, video_id, headers=filter_dict({
             'Accept-Encoding': 'identity',
-            **smuggled_data.get('http_headers', {})
-        })
+            'Referer': smuggled_data.get('referer'),
+        }))
         new_url = full_response.url
-        url = urllib.parse.urlparse(url)._replace(scheme=urllib.parse.urlparse(new_url).scheme).geturl()
         if new_url != extract_basic_auth(url)[0]:
             self.report_following_redirect(new_url)
             if force_videoid:
@@ -2442,7 +2394,7 @@ def _real_extract(self, url):
         info_dict = {
             'id': video_id,
             'title': self._generic_title(url),
-            'timestamp': unified_timestamp(full_response.headers.get('Last-Modified'))
+            'timestamp': unified_timestamp(full_response.headers.get('Last-Modified')),
         }
 
         # Check for direct link to a video
@@ -2450,13 +2402,13 @@ def _real_extract(self, url):
         m = re.match(r'^(?P<type>audio|video|application(?=/(?:ogg$|(?:vnd\.apple\.|x-)?mpegurl)))/(?P<format_id>[^;\s]+)', content_type)
         if m:
             self.report_detected('direct video link')
-            headers = smuggled_data.get('http_headers', {})
+            headers = filter_dict({'Referer': smuggled_data.get('referer')})
             format_id = str(m.group('format_id'))
-            ext = determine_ext(url)
+            ext = determine_ext(url, default_ext=None) or urlhandle_detect_ext(full_response)
             subtitles = {}
             if format_id.endswith('mpegurl') or ext == 'm3u8':
                 formats, subtitles = self._extract_m3u8_formats_and_subtitles(url, video_id, 'mp4', headers=headers)
-            elif format_id.endswith('mpd') or format_id.endswith('dash+xml') or ext == 'mpd':
+            elif format_id.endswith(('mpd', 'dash+xml')) or ext == 'mpd':
                 formats, subtitles = self._extract_mpd_formats_and_subtitles(url, video_id, headers=headers)
             elif format_id == 'f4m' or ext == 'f4m':
                 formats = self._extract_f4m_formats(url, video_id, headers=headers)
@@ -2464,7 +2416,8 @@ def _real_extract(self, url):
                 formats = [{
                     'format_id': format_id,
                     'url': url,
-                    'vcodec': 'none' if m.group('type') == 'audio' else None
+                    'ext': ext,
+                    'vcodec': 'none' if m.group('type') == 'audio' else None,
                 }]
                 info_dict['direct'] = True
             info_dict.update({
@@ -2512,7 +2465,7 @@ def _real_extract(self, url):
             try:
                 doc = compat_etree_fromstring(webpage)
             except xml.etree.ElementTree.ParseError:
-                doc = compat_etree_fromstring(webpage.encode('utf-8'))
+                doc = compat_etree_fromstring(webpage.encode())
             if doc.tag == 'rss':
                 self.report_detected('RSS feed')
                 return self._extract_rss(url, video_id, doc)
@@ -2660,7 +2613,7 @@ def _extract_embeds(self, url, webpage, *, urlh=None, info_dict={}):
                     })
             # https://docs.videojs.com/player#addRemoteTextTrack
             # https://html.spec.whatwg.org/multipage/media.html#htmltrackelement
-            for sub_match in re.finditer(rf'(?s){re.escape(varname)}' r'\.addRemoteTextTrack\(({.+?})\s*,\s*(?:true|false)\)', webpage):
+            for sub_match in re.finditer(rf'(?s){re.escape(varname)}' r'\.addRemoteTextTrack\(({.+?})\s*,\s*(?:true|false)\)', webpage):
                 sub = self._parse_json(
                     sub_match.group(1), video_id, transform_source=js_to_json, fatal=False) or {}
                 sub_src = str_or_none(sub.get('src'))
@@ -2701,7 +2654,7 @@ def _extract_embeds(self, url, webpage, *, urlh=None, info_dict={}):
                 'url': smuggle_url(json_ld['url'], {
                     'force_videoid': video_id,
                     'to_generic': True,
-                    'http_headers': {'Referer': url},
+                    'referer': url,
                 }),
             }, json_ld)]
 
@@ -2782,7 +2735,7 @@ def filter_video(urls):
             REDIRECT_REGEX = r'[0-9]{,2};\s*(?:URL|url)=\'?([^\'"]+)'
             found = re.search(
                 r'(?i)<meta\s+(?=(?:[a-z-]+="[^"]+"\s+)*http-equiv="refresh")'
-                r'(?:[a-z-]+="[^"]+"\s+)*?content="%s' % REDIRECT_REGEX,
+                rf'(?:[a-z-]+="[^"]+"\s+)*?content="{REDIRECT_REGEX}',
                 webpage)
             if not found:
                 # Look also in Refresh HTTP header
@@ -2826,7 +2779,7 @@ def filter_video(urls):
 
             video_id = os.path.splitext(video_id)[0]
             headers = {
-                'referer': actual_url
+                'referer': actual_url,
             }
 
             entry_info_dict = {
@@ -2883,5 +2836,5 @@ def filter_video(urls):
             for num, e in enumerate(entries, start=1):
                 # 'url' results don't have a title
                 if e.get('title') is not None:
-                    e['title'] = '%s (%d)' % (e['title'], num)
+                    e['title'] = '{} ({})'.format(e['title'], num)
         return entries