[ie/mlbtv] Fix extraction (#10296)

[yt-dlp.git] / yt_dlp / extractor / common.py
diff --git a/yt_dlp/extractor/common.py b/yt_dlp/extractor/common.py

index fe08839aaa6c484e91c4b728c040685ebc824adf..f63bd782586b2f72d96cc7bee4476c3ab12e280d 100644 (file)
--- a/yt_dlp/extractor/common.py
+++ b/yt_dlp/extractor/common.py
@@ -1,5 +1,6 @@
  import base64
  import collections
  import base64
  import collections
+import functools
  import getpass
  import hashlib
  import http.client
  import getpass
  import hashlib
  import http.client
@@ -17,16 +18,26 @@
  import sys
  import time
  import types
  import sys
  import time
  import types
-import urllib.error
  import urllib.parse
  import urllib.request
  import xml.etree.ElementTree
  
  import urllib.parse
  import urllib.request
  import xml.etree.ElementTree
  
-from ..compat import functools  # isort: split
-from ..compat import compat_etree_fromstring, compat_expanduser, compat_os_name
+from ..compat import (
+    compat_etree_fromstring,
+    compat_expanduser,
+    compat_os_name,
+    urllib_req_to_req,
+)
  from ..cookies import LenientSimpleCookie
  from ..downloader.f4m import get_base_url, remove_encrypted_media
  from ..downloader.hls import HlsFD
  from ..cookies import LenientSimpleCookie
  from ..downloader.f4m import get_base_url, remove_encrypted_media
  from ..downloader.hls import HlsFD
+from ..networking import HEADRequest, Request
+from ..networking.exceptions import (
+    HTTPError,
+    IncompleteRead,
+    network_exceptions,
+)
+from ..networking.impersonate import ImpersonateTarget
  from ..utils import (
      IDENTITY,
      JSON_LD_RE,
  from ..utils import (
      IDENTITY,
      JSON_LD_RE,
@@ -35,7 +46,6 @@
      FormatSorter,
      GeoRestrictedError,
      GeoUtils,
      FormatSorter,
      GeoRestrictedError,
      GeoUtils,
-    HEADRequest,
      LenientJSONDecoder,
      Popen,
      RegexNotFoundError,
      LenientJSONDecoder,
      Popen,
      RegexNotFoundError,
@@ -50,7 +60,6 @@
      determine_ext,
      dict_get,
      encode_data_uri,
      determine_ext,
      dict_get,
      encode_data_uri,
-    error_to_compat_str,
      extract_attributes,
      filter_dict,
      fix_xml_ampersands,
      extract_attributes,
      filter_dict,
      fix_xml_ampersands,
@@ -61,7 +70,6 @@
      js_to_json,
      mimetype2ext,
      netrc_from_content,
      js_to_json,
      mimetype2ext,
      netrc_from_content,
-    network_exceptions,
      orderedSet,
      parse_bitrate,
      parse_codecs,
      orderedSet,
      parse_bitrate,
      parse_codecs,
@@ -71,7 +79,6 @@
      parse_resolution,
      sanitize_filename,
      sanitize_url,
      parse_resolution,
      sanitize_filename,
      sanitize_url,
-    sanitized_Request,
      smuggle_url,
      str_or_none,
      str_to_int,
      smuggle_url,
      str_or_none,
      str_to_int,
@@ -83,8 +90,6 @@
      unescapeHTML,
      unified_strdate,
      unified_timestamp,
      unescapeHTML,
      unified_strdate,
      unified_timestamp,
-    update_Request,
-    update_url_query,
      url_basename,
      url_or_none,
      urlhandle_detect_ext,
      url_basename,
      url_or_none,
      urlhandle_detect_ext,
@@ -165,12 +170,12 @@ class InfoExtractor:
                                   Automatically calculated from width and height
                      * dynamic_range The dynamic range of the video. One of:
                                   "SDR" (None), "HDR10", "HDR10+, "HDR12", "HLG, "DV"
                                   Automatically calculated from width and height
                      * dynamic_range The dynamic range of the video. One of:
                                   "SDR" (None), "HDR10", "HDR10+, "HDR12", "HLG, "DV"
-                    * tbr        Average bitrate of audio and video in KBit/s
-                    * abr        Average audio bitrate in KBit/s
+                    * tbr        Average bitrate of audio and video in kbps (1000 bits/sec)
+                    * abr        Average audio bitrate in kbps (1000 bits/sec)
                      * acodec     Name of the audio codec in use
                      * asr        Audio sampling rate in Hertz
                      * audio_channels  Number of audio channels
                      * acodec     Name of the audio codec in use
                      * asr        Audio sampling rate in Hertz
                      * audio_channels  Number of audio channels
-                    * vbr        Average video bitrate in KBit/s
+                    * vbr        Average video bitrate in kbps (1000 bits/sec)
                      * fps        Frame rate
                      * vcodec     Name of the video codec in use
                      * container  Name of the container format
                      * fps        Frame rate
                      * vcodec     Name of the video codec in use
                      * container  Name of the container format
@@ -229,7 +234,14 @@ class InfoExtractor:
                                   'maybe' if the format may have DRM and has to be tested before download.
                      * extra_param_to_segment_url  A query string to append to each
                                   fragment's URL, or to update each existing query string
                                   'maybe' if the format may have DRM and has to be tested before download.
                      * extra_param_to_segment_url  A query string to append to each
                                   fragment's URL, or to update each existing query string
-                                 with. Only applied by the native HLS/DASH downloaders.
+                                 with. If it is an HLS stream with an AES-128 decryption key,
+                                 the query paramaters will be passed to the key URI as well,
+                                 unless there is an `extra_param_to_key_url` given,
+                                 or unless an external key URI is provided via `hls_aes`.
+                                 Only applied by the native HLS/DASH downloaders.
+                    * extra_param_to_key_url  A query string to append to the URL
+                                 of the format's HLS AES-128 decryption key.
+                                 Only applied by the native HLS downloader.
                      * hls_aes    A dictionary of HLS AES-128 decryption information
                                   used by the native HLS downloader to override the
                                   values in the media playlist when an '#EXT-X-KEY' tag
                      * hls_aes    A dictionary of HLS AES-128 decryption information
                                   used by the native HLS downloader to override the
                                   values in the media playlist when an '#EXT-X-KEY' tag
@@ -241,7 +253,10 @@ class InfoExtractor:
                      * downloader_options  A dictionary of downloader options
                                   (For internal use only)
                                   * http_chunk_size Chunk size for HTTP downloads
                      * downloader_options  A dictionary of downloader options
                                   (For internal use only)
                                   * http_chunk_size Chunk size for HTTP downloads
-                                 * ffmpeg_args     Extra arguments for ffmpeg downloader
+                                 * ffmpeg_args     Extra arguments for ffmpeg downloader (input)
+                                 * ffmpeg_args_out Extra arguments for ffmpeg downloader (output)
+                    * is_dash_periods  Whether the format is a result of merging
+                                 multiple DASH periods.
                      RTMP formats can also have the additional fields: page_url,
                      app, play_path, tc_url, flash_version, rtmp_live, rtmp_conn,
                      rtmp_protocol, rtmp_real_time
                      RTMP formats can also have the additional fields: page_url,
                      app, play_path, tc_url, flash_version, rtmp_live, rtmp_conn,
                      rtmp_protocol, rtmp_real_time
@@ -255,7 +270,7 @@ class InfoExtractor:
  
      direct:         True if a direct video file was given (must only be set by GenericIE)
      alt_title:      A secondary title of the video.
  
      direct:         True if a direct video file was given (must only be set by GenericIE)
      alt_title:      A secondary title of the video.
-    display_id      An alternative identifier for the video, not necessarily
+    display_id:     An alternative identifier for the video, not necessarily
                      unique, but available before title. Typically, id is
                      something like "4234987", title "Dancing naked mole rats",
                      and display_id "dancing-naked-mole-rats"
                      unique, but available before title. Typically, id is
                      something like "4234987", title "Dancing naked mole rats",
                      and display_id "dancing-naked-mole-rats"
@@ -273,7 +288,7 @@ class InfoExtractor:
      description:    Full video description.
      uploader:       Full name of the video uploader.
      license:        License name the video is licensed under.
      description:    Full video description.
      uploader:       Full name of the video uploader.
      license:        License name the video is licensed under.
-    creator:        The creator of the video.
+    creators:       List of creators of the video.
      timestamp:      UNIX timestamp of the moment the video was uploaded
      upload_date:    Video upload date in UTC (YYYYMMDD).
                      If not explicitly set, calculated from timestamp
      timestamp:      UNIX timestamp of the moment the video was uploaded
      upload_date:    Video upload date in UTC (YYYYMMDD).
                      If not explicitly set, calculated from timestamp
@@ -281,6 +296,9 @@ class InfoExtractor:
                      If it is not clear whether to use timestamp or this, use the former
      release_date:   The date (YYYYMMDD) when the video was released in UTC.
                      If not explicitly set, calculated from release_timestamp
                      If it is not clear whether to use timestamp or this, use the former
      release_date:   The date (YYYYMMDD) when the video was released in UTC.
                      If not explicitly set, calculated from release_timestamp
+    release_year:   Year (YYYY) as integer when the video or album was released.
+                    To be used if no exact release date is known.
+                    If not explicitly set, calculated from release_date.
      modified_timestamp: UNIX timestamp of the moment the video was last modified.
      modified_date:   The date (YYYYMMDD) when the video was last modified in UTC.
                      If not explicitly set, calculated from modified_timestamp
      modified_timestamp: UNIX timestamp of the moment the video was last modified.
      modified_date:   The date (YYYYMMDD) when the video was last modified in UTC.
                      If not explicitly set, calculated from modified_timestamp
@@ -374,6 +392,7 @@ class InfoExtractor:
                      'private', 'premium_only', 'subscriber_only', 'needs_auth',
                      'unlisted' or 'public'. Use 'InfoExtractor._availability'
                      to set it
                      'private', 'premium_only', 'subscriber_only', 'needs_auth',
                      'unlisted' or 'public'. Use 'InfoExtractor._availability'
                      to set it
+    media_type:     The type of media as classified by the site, e.g. "episode", "clip", "trailer"
      _old_archive_ids: A list of old archive ids needed for backward compatibility
      _format_sort_fields: A list of fields to use for sorting formats
      __post_extractor: A function to be called just before the metadata is
      _old_archive_ids: A list of old archive ids needed for backward compatibility
      _format_sort_fields: A list of fields to use for sorting formats
      __post_extractor: A function to be called just before the metadata is
@@ -413,17 +432,16 @@ class InfoExtractor:
      track_number:   Number of the track within an album or a disc, as an integer.
      track_id:       Id of the track (useful in case of custom indexing, e.g. 6.iii),
                      as a unicode string.
      track_number:   Number of the track within an album or a disc, as an integer.
      track_id:       Id of the track (useful in case of custom indexing, e.g. 6.iii),
                      as a unicode string.
-    artist:         Artist(s) of the track.
-    genre:          Genre(s) of the track.
+    artists:        List of artists of the track.
+    composers:      List of composers of the piece.
+    genres:         List of genres of the track.
      album:          Title of the album the track belongs to.
      album_type:     Type of the album (e.g. "Demo", "Full-length", "Split", "Compilation", etc).
      album:          Title of the album the track belongs to.
      album_type:     Type of the album (e.g. "Demo", "Full-length", "Split", "Compilation", etc).
-    album_artist:   List of all artists appeared on the album (e.g.
-                    "Ash Borer / Fell Voices" or "Various Artists", useful for splits
-                    and compilations).
+    album_artists:  List of all artists appeared on the album.
+                    E.g. ["Ash Borer", "Fell Voices"] or ["Various Artists"].
+                    Useful for splits and compilations.
      disc_number:    Number of the disc or other physical medium the track belongs to,
                      as an integer.
      disc_number:    Number of the disc or other physical medium the track belongs to,
                      as an integer.
-    release_year:   Year (YYYY) when the album was released.
-    composer:       Composer of the piece
  
      The following fields should only be set for clips that should be cut from the original video:
  
  
      The following fields should only be set for clips that should be cut from the original video:
  
@@ -434,6 +452,18 @@ class InfoExtractor:
      rows:           Number of rows in each storyboard fragment, as an integer
      columns:        Number of columns in each storyboard fragment, as an integer
  
      rows:           Number of rows in each storyboard fragment, as an integer
      columns:        Number of columns in each storyboard fragment, as an integer
  
+    The following fields are deprecated and should not be set by new code:
+    composer:       Use "composers" instead.
+                    Composer(s) of the piece, comma-separated.
+    artist:         Use "artists" instead.
+                    Artist(s) of the track, comma-separated.
+    genre:          Use "genres" instead.
+                    Genre(s) of the track, comma-separated.
+    album_artist:   Use "album_artists" instead.
+                    All artists appeared on the album, comma-separated.
+    creator:        Use "creators" instead.
+                    The creator of the video.
+
      Unless mentioned otherwise, the fields should be Unicode strings.
  
      Unless mentioned otherwise, None is equivalent to absence of information.
      Unless mentioned otherwise, the fields should be Unicode strings.
  
      Unless mentioned otherwise, None is equivalent to absence of information.
@@ -724,11 +754,11 @@ def extract(self, url):
          except UnsupportedError:
              raise
          except ExtractorError as e:
          except UnsupportedError:
              raise
          except ExtractorError as e:
-            e.video_id = e.video_id or self.get_temp_id(url),
-            e.ie = e.ie or self.IE_NAME,
+            e.video_id = e.video_id or self.get_temp_id(url)
+            e.ie = e.ie or self.IE_NAME
              e.traceback = e.traceback or sys.exc_info()[2]
              raise
              e.traceback = e.traceback or sys.exc_info()[2]
              raise
-        except http.client.IncompleteRead as e:
+        except IncompleteRead as e:
              raise ExtractorError('A network error has occurred.', cause=e, expected=True, video_id=self.get_temp_id(url))
          except (KeyError, StopIteration) as e:
              raise ExtractorError('An extractor error has occurred.', cause=e, video_id=self.get_temp_id(url))
              raise ExtractorError('A network error has occurred.', cause=e, expected=True, video_id=self.get_temp_id(url))
          except (KeyError, StopIteration) as e:
              raise ExtractorError('An extractor error has occurred.', cause=e, video_id=self.get_temp_id(url))
@@ -743,8 +773,8 @@ def __maybe_fake_ip_and_retry(self, countries):
              self._x_forwarded_for_ip = GeoUtils.random_ipv4(country_code)
              if self._x_forwarded_for_ip:
                  self.report_warning(
              self._x_forwarded_for_ip = GeoUtils.random_ipv4(country_code)
              if self._x_forwarded_for_ip:
                  self.report_warning(
-                    'Video is geo restricted. Retrying extraction with fake IP %s (%s) as X-Forwarded-For.'
-                    % (self._x_forwarded_for_ip, country_code.upper()))
+                    'Video is geo restricted. Retrying extraction with fake IP '
+                    f'{self._x_forwarded_for_ip} ({country_code.upper()}) as X-Forwarded-For.')
                  return True
          return False
  
                  return True
          return False
  
@@ -787,22 +817,28 @@ def IE_NAME(cls):
  
      @staticmethod
      def __can_accept_status_code(err, expected_status):
  
      @staticmethod
      def __can_accept_status_code(err, expected_status):
-        assert isinstance(err, urllib.error.HTTPError)
+        assert isinstance(err, HTTPError)
          if expected_status is None:
              return False
          elif callable(expected_status):
          if expected_status is None:
              return False
          elif callable(expected_status):
-            return expected_status(err.code) is True
+            return expected_status(err.status) is True
          else:
          else:
-            return err.code in variadic(expected_status)
+            return err.status in variadic(expected_status)
  
  
-    def _create_request(self, url_or_request, data=None, headers=None, query=None):
+    def _create_request(self, url_or_request, data=None, headers=None, query=None, extensions=None):
          if isinstance(url_or_request, urllib.request.Request):
          if isinstance(url_or_request, urllib.request.Request):
-            return update_Request(url_or_request, data=data, headers=headers, query=query)
-        if query:
-            url_or_request = update_url_query(url_or_request, query)
-        return sanitized_Request(url_or_request, data, headers or {})
+            self._downloader.deprecation_warning(
+                'Passing a urllib.request.Request to _create_request() is deprecated. '
+                'Use yt_dlp.networking.common.Request instead.')
+            url_or_request = urllib_req_to_req(url_or_request)
+        elif not isinstance(url_or_request, Request):
+            url_or_request = Request(url_or_request)
  
  
-    def _request_webpage(self, url_or_request, video_id, note=None, errnote=None, fatal=True, data=None, headers=None, query=None, expected_status=None):
+        url_or_request.update(data=data, headers=headers, query=query, extensions=extensions)
+        return url_or_request
+
+    def _request_webpage(self, url_or_request, video_id, note=None, errnote=None, fatal=True, data=None,
+                         headers=None, query=None, expected_status=None, impersonate=None, require_impersonation=False):
          """
          Return the response handle.
  
          """
          Return the response handle.
  
@@ -811,7 +847,7 @@ def _request_webpage(self, url_or_request, video_id, note=None, errnote=None, fa
          if not self._downloader._first_webpage_request:
              sleep_interval = self.get_param('sleep_interval_requests') or 0
              if sleep_interval > 0:
          if not self._downloader._first_webpage_request:
              sleep_interval = self.get_param('sleep_interval_requests') or 0
              if sleep_interval > 0:
-                self.to_screen('Sleeping %s seconds ...' % sleep_interval)
+                self.to_screen(f'Sleeping {sleep_interval} seconds ...')
                  time.sleep(sleep_interval)
          else:
              self._downloader._first_webpage_request = False
                  time.sleep(sleep_interval)
          else:
              self._downloader._first_webpage_request = False
@@ -833,24 +869,42 @@ def _request_webpage(self, url_or_request, video_id, note=None, errnote=None, fa
              headers = (headers or {}).copy()
              headers.setdefault('X-Forwarded-For', self._x_forwarded_for_ip)
  
              headers = (headers or {}).copy()
              headers.setdefault('X-Forwarded-For', self._x_forwarded_for_ip)
  
+        extensions = {}
+
+        if impersonate in (True, ''):
+            impersonate = ImpersonateTarget()
+        requested_targets = [
+            t if isinstance(t, ImpersonateTarget) else ImpersonateTarget.from_str(t)
+            for t in variadic(impersonate)
+        ] if impersonate else []
+
+        available_target = next(filter(self._downloader._impersonate_target_available, requested_targets), None)
+        if available_target:
+            extensions['impersonate'] = available_target
+        elif requested_targets:
+            message = 'The extractor is attempting impersonation, but '
+            message += (
+                'no impersonate target is available' if not str(impersonate)
+                else f'none of these impersonate targets are available: "{", ".join(map(str, requested_targets))}"')
+            info_msg = ('see  https://github.com/yt-dlp/yt-dlp#impersonation  '
+                        'for information on installing the required dependencies')
+            if require_impersonation:
+                raise ExtractorError(f'{message}; {info_msg}', expected=True)
+            self.report_warning(f'{message}; if you encounter errors, then {info_msg}', only_once=True)
+
          try:
          try:
-            return self._downloader.urlopen(self._create_request(url_or_request, data, headers, query))
+            return self._downloader.urlopen(self._create_request(url_or_request, data, headers, query, extensions))
          except network_exceptions as err:
          except network_exceptions as err:
-            if isinstance(err, urllib.error.HTTPError):
+            if isinstance(err, HTTPError):
                  if self.__can_accept_status_code(err, expected_status):
                  if self.__can_accept_status_code(err, expected_status):
-                    # Retain reference to error to prevent file object from
-                    # being closed before it can be read. Works around the
-                    # effects of <https://bugs.python.org/issue15002>
-                    # introduced in Python 3.4.1.
-                    err.fp._error = err
-                    return err.fp
+                    return err.response
  
              if errnote is False:
                  return False
              if errnote is None:
                  errnote = 'Unable to download webpage'
  
  
              if errnote is False:
                  return False
              if errnote is None:
                  errnote = 'Unable to download webpage'
  
-            errmsg = f'{errnote}: {error_to_compat_str(err)}'
+            errmsg = f'{errnote}: {err}'
              if fatal:
                  raise ExtractorError(errmsg, cause=err)
              else:
              if fatal:
                  raise ExtractorError(errmsg, cause=err)
              else:
@@ -858,13 +912,14 @@ def _request_webpage(self, url_or_request, video_id, note=None, errnote=None, fa
                  return False
  
      def _download_webpage_handle(self, url_or_request, video_id, note=None, errnote=None, fatal=True,
                  return False
  
      def _download_webpage_handle(self, url_or_request, video_id, note=None, errnote=None, fatal=True,
-                                 encoding=None, data=None, headers={}, query={}, expected_status=None):
+                                 encoding=None, data=None, headers={}, query={}, expected_status=None,
+                                 impersonate=None, require_impersonation=False):
          """
          Return a tuple (page content as string, URL handle).
  
          Arguments:
          url_or_request -- plain text URL as a string or
          """
          Return a tuple (page content as string, URL handle).
  
          Arguments:
          url_or_request -- plain text URL as a string or
-            a urllib.request.Request object
+            a yt_dlp.networking.Request object
          video_id -- Video/playlist/item identifier (string)
  
          Keyword arguments:
          video_id -- Video/playlist/item identifier (string)
  
          Keyword arguments:
@@ -889,17 +944,27 @@ def _download_webpage_handle(self, url_or_request, video_id, note=None, errnote=
                    returning True if it should be accepted
              Note that this argument does not affect success status codes (2xx)
              which are always accepted.
                    returning True if it should be accepted
              Note that this argument does not affect success status codes (2xx)
              which are always accepted.
+        impersonate -- the impersonate target. Can be any of the following entities:
+                - an instance of yt_dlp.networking.impersonate.ImpersonateTarget
+                - a string in the format of CLIENT[:OS]
+                - a list or a tuple of CLIENT[:OS] strings or ImpersonateTarget instances
+                - a boolean value; True means any impersonate target is sufficient
+        require_impersonation -- flag to toggle whether the request should raise an error
+            if impersonation is not possible (bool, default: False)
          """
  
          # Strip hashes from the URL (#1038)
          if isinstance(url_or_request, str):
              url_or_request = url_or_request.partition('#')[0]
  
          """
  
          # Strip hashes from the URL (#1038)
          if isinstance(url_or_request, str):
              url_or_request = url_or_request.partition('#')[0]
  
-        urlh = self._request_webpage(url_or_request, video_id, note, errnote, fatal, data=data, headers=headers, query=query, expected_status=expected_status)
+        urlh = self._request_webpage(url_or_request, video_id, note, errnote, fatal, data=data,
+                                     headers=headers, query=query, expected_status=expected_status,
+                                     impersonate=impersonate, require_impersonation=require_impersonation)
          if urlh is False:
              assert not fatal
              return False
          if urlh is False:
              assert not fatal
              return False
-        content = self._webpage_read_content(urlh, url_or_request, video_id, note, errnote, fatal, encoding=encoding)
+        content = self._webpage_read_content(urlh, url_or_request, video_id, note, errnote, fatal,
+                                             encoding=encoding, data=data)
          return (content, urlh)
  
      @staticmethod
          return (content, urlh)
  
      @staticmethod
@@ -928,7 +993,7 @@ def __check_blocked(self, content):
                  r'<iframe src="([^"]+)"', content,
                  'Websense information URL', default=None)
              if blocked_iframe:
                  r'<iframe src="([^"]+)"', content,
                  'Websense information URL', default=None)
              if blocked_iframe:
-                msg += ' Visit %s for more details' % blocked_iframe
+                msg += f' Visit {blocked_iframe} for more details'
              raise ExtractorError(msg, expected=True)
          if '<title>The URL you requested has been blocked</title>' in first_block:
              msg = (
              raise ExtractorError(msg, expected=True)
          if '<title>The URL you requested has been blocked</title>' in first_block:
              msg = (
@@ -938,7 +1003,7 @@ def __check_blocked(self, content):
                  r'</h1><p>(.*?)</p>',
                  content, 'block message', default=None)
              if block_msg:
                  r'</h1><p>(.*?)</p>',
                  content, 'block message', default=None)
              if block_msg:
-                msg += ' (Message: "%s")' % block_msg.replace('\n', ' ')
+                msg += ' (Message: "{}")'.format(block_msg.replace('\n', ' '))
              raise ExtractorError(msg, expected=True)
          if ('<title>TTK :: Доступ к ресурсу ограничен</title>' in content
                  and 'blocklist.rkn.gov.ru' in content):
              raise ExtractorError(msg, expected=True)
          if ('<title>TTK :: Доступ к ресурсу ограничен</title>' in content
                  and 'blocklist.rkn.gov.ru' in content):
@@ -947,11 +1012,13 @@ def __check_blocked(self, content):
                  'Visit http://blocklist.rkn.gov.ru/ for a block reason.',
                  expected=True)
  
                  'Visit http://blocklist.rkn.gov.ru/ for a block reason.',
                  expected=True)
  
-    def _request_dump_filename(self, url, video_id):
-        basen = f'{video_id}_{url}'
+    def _request_dump_filename(self, url, video_id, data=None):
+        if data is not None:
+            data = hashlib.md5(data).hexdigest()
+        basen = join_nonempty(video_id, data, url, delim='_')
          trim_length = self.get_param('trim_file_name') or 240
          if len(basen) > trim_length:
          trim_length = self.get_param('trim_file_name') or 240
          if len(basen) > trim_length:
-            h = '___' + hashlib.md5(basen.encode('utf-8')).hexdigest()
+            h = '___' + hashlib.md5(basen.encode()).hexdigest()
              basen = basen[:trim_length - len(h)] + h
          filename = sanitize_filename(f'{basen}.dump', restricted=True)
          # Working around MAX_PATH limitation on Windows (see
              basen = basen[:trim_length - len(h)] + h
          filename = sanitize_filename(f'{basen}.dump', restricted=True)
          # Working around MAX_PATH limitation on Windows (see
@@ -970,16 +1037,19 @@ def __decode_webpage(self, webpage_bytes, encoding, headers):
          except LookupError:
              return webpage_bytes.decode('utf-8', 'replace')
  
          except LookupError:
              return webpage_bytes.decode('utf-8', 'replace')
  
-    def _webpage_read_content(self, urlh, url_or_request, video_id, note=None, errnote=None, fatal=True, prefix=None, encoding=None):
+    def _webpage_read_content(self, urlh, url_or_request, video_id, note=None, errnote=None, fatal=True,
+                              prefix=None, encoding=None, data=None):
          webpage_bytes = urlh.read()
          if prefix is not None:
              webpage_bytes = prefix + webpage_bytes
          if self.get_param('dump_intermediate_pages', False):
          webpage_bytes = urlh.read()
          if prefix is not None:
              webpage_bytes = prefix + webpage_bytes
          if self.get_param('dump_intermediate_pages', False):
-            self.to_screen('Dumping request to ' + urlh.geturl())
+            self.to_screen('Dumping request to ' + urlh.url)
              dump = base64.b64encode(webpage_bytes).decode('ascii')
              self._downloader.to_screen(dump)
          if self.get_param('write_pages'):
              dump = base64.b64encode(webpage_bytes).decode('ascii')
              self._downloader.to_screen(dump)
          if self.get_param('write_pages'):
-            filename = self._request_dump_filename(urlh.geturl(), video_id)
+            if isinstance(url_or_request, Request):
+                data = self._create_request(url_or_request, data).data
+            filename = self._request_dump_filename(urlh.url, video_id, data)
              self.to_screen(f'Saving request to {filename}')
              with open(filename, 'wb') as outf:
                  outf.write(webpage_bytes)
              self.to_screen(f'Saving request to {filename}')
              with open(filename, 'wb') as outf:
                  outf.write(webpage_bytes)
@@ -999,7 +1069,7 @@ def _parse_xml(self, xml_string, video_id, transform_source=None, fatal=True, er
          if transform_source:
              xml_string = transform_source(xml_string)
          try:
          if transform_source:
              xml_string = transform_source(xml_string)
          try:
-            return compat_etree_fromstring(xml_string.encode('utf-8'))
+            return compat_etree_fromstring(xml_string.encode())
          except xml.etree.ElementTree.ParseError as ve:
              self.__print_error('Failed to parse XML' if errnote is None else errnote, fatal, video_id, ve)
  
          except xml.etree.ElementTree.ParseError as ve:
              self.__print_error('Failed to parse XML' if errnote is None else errnote, fatal, video_id, ve)
  
@@ -1024,20 +1094,23 @@ def parse(ie, content, *args, errnote=errnote, **kwargs):
              return getattr(ie, parser)(content, *args, **kwargs)
  
          def download_handle(self, url_or_request, video_id, note=note, errnote=errnote, transform_source=None,
              return getattr(ie, parser)(content, *args, **kwargs)
  
          def download_handle(self, url_or_request, video_id, note=note, errnote=errnote, transform_source=None,
-                            fatal=True, encoding=None, data=None, headers={}, query={}, expected_status=None):
+                            fatal=True, encoding=None, data=None, headers={}, query={}, expected_status=None,
+                            impersonate=None, require_impersonation=False):
              res = self._download_webpage_handle(
                  url_or_request, video_id, note=note, errnote=errnote, fatal=fatal, encoding=encoding,
              res = self._download_webpage_handle(
                  url_or_request, video_id, note=note, errnote=errnote, fatal=fatal, encoding=encoding,
-                data=data, headers=headers, query=query, expected_status=expected_status)
+                data=data, headers=headers, query=query, expected_status=expected_status,
+                impersonate=impersonate, require_impersonation=require_impersonation)
              if res is False:
                  return res
              content, urlh = res
              return parse(self, content, video_id, transform_source=transform_source, fatal=fatal, errnote=errnote), urlh
  
          def download_content(self, url_or_request, video_id, note=note, errnote=errnote, transform_source=None,
              if res is False:
                  return res
              content, urlh = res
              return parse(self, content, video_id, transform_source=transform_source, fatal=fatal, errnote=errnote), urlh
  
          def download_content(self, url_or_request, video_id, note=note, errnote=errnote, transform_source=None,
-                             fatal=True, encoding=None, data=None, headers={}, query={}, expected_status=None):
+                             fatal=True, encoding=None, data=None, headers={}, query={}, expected_status=None,
+                             impersonate=None, require_impersonation=False):
              if self.get_param('load_pages'):
                  url_or_request = self._create_request(url_or_request, data, headers, query)
              if self.get_param('load_pages'):
                  url_or_request = self._create_request(url_or_request, data, headers, query)
-                filename = self._request_dump_filename(url_or_request.full_url, video_id)
+                filename = self._request_dump_filename(url_or_request.url, video_id, url_or_request.data)
                  self.to_screen(f'Loading request from {filename}')
                  try:
                      with open(filename, 'rb') as dumpf:
                  self.to_screen(f'Loading request from {filename}')
                  try:
                      with open(filename, 'rb') as dumpf:
@@ -1057,6 +1130,8 @@ def download_content(self, url_or_request, video_id, note=note, errnote=errnote,
                  'headers': headers,
                  'query': query,
                  'expected_status': expected_status,
                  'headers': headers,
                  'query': query,
                  'expected_status': expected_status,
+                'impersonate': impersonate,
+                'require_impersonation': require_impersonation,
              }
              if parser is None:
                  kwargs.pop('transform_source')
              }
              if parser is None:
                  kwargs.pop('transform_source')
@@ -1111,7 +1186,7 @@ def _download_webpage(
          while True:
              try:
                  return self.__download_webpage(url_or_request, video_id, note, errnote, None, fatal, *args, **kwargs)
          while True:
              try:
                  return self.__download_webpage(url_or_request, video_id, note, errnote, None, fatal, *args, **kwargs)
-            except http.client.IncompleteRead as e:
+            except IncompleteRead as e:
                  try_count += 1
                  if try_count >= tries:
                      raise e
                  try_count += 1
                  if try_count >= tries:
                      raise e
@@ -1145,11 +1220,11 @@ def report_drm(self, video_id, partial=NO_DEFAULT):
  
      def report_extraction(self, id_or_name):
          """Report information extraction."""
  
      def report_extraction(self, id_or_name):
          """Report information extraction."""
-        self.to_screen('%s: Extracting information' % id_or_name)
+        self.to_screen(f'{id_or_name}: Extracting information')
  
      def report_download_webpage(self, video_id):
          """Report webpage download."""
  
      def report_download_webpage(self, video_id):
          """Report webpage download."""
-        self.to_screen('%s: Downloading webpage' % video_id)
+        self.to_screen(f'{video_id}: Downloading webpage')
  
      def report_age_confirmation(self):
          """Report attempt to confirm age."""
  
      def report_age_confirmation(self):
          """Report attempt to confirm age."""
@@ -1255,9 +1330,9 @@ def _search_regex(self, pattern, string, name, default=NO_DEFAULT, fatal=True, f
          elif default is not NO_DEFAULT:
              return default
          elif fatal:
          elif default is not NO_DEFAULT:
              return default
          elif fatal:
-            raise RegexNotFoundError('Unable to extract %s' % _name)
+            raise RegexNotFoundError(f'Unable to extract {_name}')
          else:
          else:
-            self.report_warning('unable to extract %s' % _name + bug_reports_message())
+            self.report_warning(f'unable to extract {_name}' + bug_reports_message())
              return None
  
      def _search_json(self, start_pattern, string, name, video_id, *, end_pattern='',
              return None
  
      def _search_json(self, start_pattern, string, name, video_id, *, end_pattern='',
@@ -1317,7 +1392,10 @@ def _get_netrc_login_info(self, netrc_machine=None):
          else:
              return None, None
          if not info:
          else:
              return None, None
          if not info:
-            raise netrc.NetrcParseError(f'No authenticators for {netrc_machine}')
+            self.to_screen(f'No authenticators for {netrc_machine}')
+            return None, None
+
+        self.write_debug(f'Using netrc for {netrc_machine} authentication')
          return info[0], info[2]
  
      def _get_login_info(self, username_option='username', password_option='password', netrc_machine=None):
          return info[0], info[2]
  
      def _get_login_info(self, username_option='username', password_option='password', netrc_machine=None):
@@ -1353,14 +1431,14 @@ def _get_tfa_info(self, note='two-factor verification code'):
          if tfa is not None:
              return tfa
  
          if tfa is not None:
              return tfa
  
-        return getpass.getpass('Type %s and press [Return]: ' % note)
+        return getpass.getpass(f'Type {note} and press [Return]: ')
  
      # Helper functions for extracting OpenGraph info
      @staticmethod
      def _og_regexes(prop):
          content_re = r'content=(?:"([^"]+?)"|\'([^\']+?)\'|\s*([^\s"\'=<>`]+?)(?=\s|/?>))'
  
      # Helper functions for extracting OpenGraph info
      @staticmethod
      def _og_regexes(prop):
          content_re = r'content=(?:"([^"]+?)"|\'([^\']+?)\'|\s*([^\s"\'=<>`]+?)(?=\s|/?>))'
-        property_re = (r'(?:name|property)=(?:\'og%(sep)s%(prop)s\'|"og%(sep)s%(prop)s"|\s*og%(sep)s%(prop)s\b)'
-                       % {'prop': re.escape(prop), 'sep': '(?:&#x3A;|[:-])'})
+        property_re = r'(?:name|property)=(?:\'og{sep}{prop}\'|"og{sep}{prop}"|\s*og{sep}{prop}\b)'.format(
+            prop=re.escape(prop), sep='(?:&#x3A;|[:-])')
          template = r'<meta[^>]+?%s[^>]+?%s'
          return [
              template % (property_re, content_re),
          template = r'<meta[^>]+?%s[^>]+?%s'
          return [
              template % (property_re, content_re),
@@ -1369,14 +1447,14 @@ def _og_regexes(prop):
  
      @staticmethod
      def _meta_regex(prop):
  
      @staticmethod
      def _meta_regex(prop):
-        return r'''(?isx)<meta
-                    (?=[^>]+(?:itemprop|name|property|id|http-equiv)=(["\']?)%s\1)
-                    [^>]+?content=(["\'])(?P<content>.*?)\2''' % re.escape(prop)
+        return rf'''(?isx)<meta
+                    (?=[^>]+(?:itemprop|name|property|id|http-equiv)=(["\']?){re.escape(prop)}\1)
+                    [^>]+?content=(["\'])(?P<content>.*?)\2'''
  
      def _og_search_property(self, prop, html, name=None, **kargs):
          prop = variadic(prop)
          if name is None:
  
      def _og_search_property(self, prop, html, name=None, **kargs):
          prop = variadic(prop)
          if name is None:
-            name = 'OpenGraph %s' % prop[0]
+            name = f'OpenGraph {prop[0]}'
          og_regexes = []
          for p in prop:
              og_regexes.extend(self._og_regexes(p))
          og_regexes = []
          for p in prop:
              og_regexes.extend(self._og_regexes(p))
@@ -1499,7 +1577,7 @@ def _search_json_ld(self, html, video_id, expected_type=None, *, fatal=True, def
          elif fatal:
              raise RegexNotFoundError('Unable to extract JSON-LD')
          else:
          elif fatal:
              raise RegexNotFoundError('Unable to extract JSON-LD')
          else:
-            self.report_warning('unable to extract JSON-LD %s' % bug_reports_message())
+            self.report_warning(f'unable to extract JSON-LD {bug_reports_message()}')
              return {}
  
      def _json_ld(self, json_ld, video_id, fatal=True, expected_type=None):
              return {}
  
      def _json_ld(self, json_ld, video_id, fatal=True, expected_type=None):
@@ -1521,8 +1599,8 @@ def _json_ld(self, json_ld, video_id, fatal=True, expected_type=None):
          }
  
          def is_type(e, *expected_types):
          }
  
          def is_type(e, *expected_types):
-            type = variadic(traverse_obj(e, '@type'))
-            return any(x in type for x in expected_types)
+            type_ = variadic(traverse_obj(e, '@type'))
+            return any(x in type_ for x in expected_types)
  
          def extract_interaction_type(e):
              interaction_type = e.get('interactionType')
  
          def extract_interaction_type(e):
              interaction_type = e.get('interactionType')
@@ -1551,7 +1629,7 @@ def extract_interaction_statistic(e):
                  count_kind = INTERACTION_TYPE_MAP.get(interaction_type.split('/')[-1])
                  if not count_kind:
                      continue
                  count_kind = INTERACTION_TYPE_MAP.get(interaction_type.split('/')[-1])
                  if not count_kind:
                      continue
-                count_key = '%s_count' % count_kind
+                count_key = f'{count_kind}_count'
                  if info.get(count_key) is not None:
                      continue
                  info[count_key] = interaction_count
                  if info.get(count_key) is not None:
                      continue
                  info[count_key] = interaction_count
@@ -1563,7 +1641,7 @@ def extract_chapter_information(e):
                  'end_time': part.get('endOffset'),
              } for part in variadic(e.get('hasPart') or []) if part.get('@type') == 'Clip']
              for idx, (last_c, current_c, next_c) in enumerate(zip(
                  'end_time': part.get('endOffset'),
              } for part in variadic(e.get('hasPart') or []) if part.get('@type') == 'Clip']
              for idx, (last_c, current_c, next_c) in enumerate(zip(
-                    [{'end_time': 0}] + chapters, chapters, chapters[1:])):
+                    [{'end_time': 0}, *chapters], chapters, chapters[1:])):
                  current_c['end_time'] = current_c['end_time'] or next_c['start_time']
                  current_c['start_time'] = current_c['start_time'] or last_c['end_time']
                  if None in current_c.values():
                  current_c['end_time'] = current_c['end_time'] or next_c['start_time']
                  current_c['start_time'] = current_c['start_time'] or last_c['end_time']
                  if None in current_c.values():
@@ -1672,17 +1750,21 @@ def traverse_json_ld(json_ld, at_top_level=True):
          traverse_json_ld(json_ld)
          return filter_dict(info)
  
          traverse_json_ld(json_ld)
          return filter_dict(info)
  
-    def _search_nextjs_data(self, webpage, video_id, *, transform_source=None, fatal=True, **kw):
-        return self._parse_json(
-            self._search_regex(
-                r'(?s)<script[^>]+id=[\'"]__NEXT_DATA__[\'"][^>]*>([^<]+)</script>',
-                webpage, 'next.js data', fatal=fatal, **kw),
-            video_id, transform_source=transform_source, fatal=fatal)
+    def _search_nextjs_data(self, webpage, video_id, *, fatal=True, default=NO_DEFAULT, **kw):
+        if default == '{}':
+            self._downloader.deprecation_warning('using `default=\'{}\'` is deprecated, use `default={}` instead')
+            default = {}
+        if default is not NO_DEFAULT:
+            fatal = False
+
+        return self._search_json(
+            r'<script[^>]+id=[\'"]__NEXT_DATA__[\'"][^>]*>', webpage, 'next.js data',
+            video_id, end_pattern='</script>', fatal=fatal, default=default, **kw)
  
      def _search_nuxt_data(self, webpage, video_id, context_name='__NUXT__', *, fatal=True, traverse=('data', 0)):
          """Parses Nuxt.js metadata. This works as long as the function __NUXT__ invokes is a pure function"""
          rectx = re.escape(context_name)
  
      def _search_nuxt_data(self, webpage, video_id, context_name='__NUXT__', *, fatal=True, traverse=('data', 0)):
          """Parses Nuxt.js metadata. This works as long as the function __NUXT__ invokes is a pure function"""
          rectx = re.escape(context_name)
-        FUNCTION_RE = r'\(function\((?P<arg_keys>.*?)\){return\s+(?P<js>{.*?})\s*;?\s*}\((?P<arg_vals>.*?)\)'
+        FUNCTION_RE = r'\(function\((?P<arg_keys>.*?)\){.*?\breturn\s+(?P<js>{.*?})\s*;?\s*}\((?P<arg_vals>.*?)\)'
          js, arg_keys, arg_vals = self._search_regex(
              (rf'<script>\s*window\.{rectx}={FUNCTION_RE}\s*\)\s*;?\s*</script>', rf'{rectx}\(.*?{FUNCTION_RE}'),
              webpage, context_name, group=('js', 'arg_keys', 'arg_vals'),
          js, arg_keys, arg_vals = self._search_regex(
              (rf'<script>\s*window\.{rectx}={FUNCTION_RE}\s*\)\s*;?\s*</script>', rf'{rectx}\(.*?{FUNCTION_RE}'),
              webpage, context_name, group=('js', 'arg_keys', 'arg_vals'),
@@ -1700,9 +1782,9 @@ def _search_nuxt_data(self, webpage, video_id, context_name='__NUXT__', *, fatal
      def _hidden_inputs(html):
          html = re.sub(r'<!--(?:(?!<!--).)*-->', '', html)
          hidden_inputs = {}
      def _hidden_inputs(html):
          html = re.sub(r'<!--(?:(?!<!--).)*-->', '', html)
          hidden_inputs = {}
-        for input in re.findall(r'(?i)(<input[^>]+>)', html):
-            attrs = extract_attributes(input)
-            if not input:
+        for input_el in re.findall(r'(?i)(<input[^>]+>)', html):
+            attrs = extract_attributes(input_el)
+            if not input_el:
                  continue
              if attrs.get('type') not in ('hidden', 'submit'):
                  continue
                  continue
              if attrs.get('type') not in ('hidden', 'submit'):
                  continue
@@ -1714,8 +1796,8 @@ def _hidden_inputs(html):
  
      def _form_hidden_inputs(self, form_id, html):
          form = self._search_regex(
  
      def _form_hidden_inputs(self, form_id, html):
          form = self._search_regex(
-            r'(?is)<form[^>]+?id=(["\'])%s\1[^>]*>(?P<form>.+?)</form>' % form_id,
-            html, '%s form' % form_id, group='form')
+            rf'(?is)<form[^>]+?id=(["\']){form_id}\1[^>]*>(?P<form>.+?)</form>',
+            html, f'{form_id} form', group='form')
          return self._hidden_inputs(form)
  
      @classproperty(cache=True)
          return self._hidden_inputs(form)
  
      @classproperty(cache=True)
@@ -1745,7 +1827,7 @@ def _check_formats(self, formats, video_id):
              formats[:] = filter(
                  lambda f: self._is_valid_url(
                      f['url'], video_id,
              formats[:] = filter(
                  lambda f: self._is_valid_url(
                      f['url'], video_id,
-                    item='%s video format' % f.get('format_id') if f.get('format_id') else 'video'),
+                    item='{} video format'.format(f.get('format_id')) if f.get('format_id') else 'video'),
                  formats)
  
      @staticmethod
                  formats)
  
      @staticmethod
@@ -1761,15 +1843,14 @@ def _remove_duplicate_formats(formats):
      def _is_valid_url(self, url, video_id, item='video', headers={}):
          url = self._proto_relative_url(url, scheme='http:')
          # For now assume non HTTP(S) URLs always valid
      def _is_valid_url(self, url, video_id, item='video', headers={}):
          url = self._proto_relative_url(url, scheme='http:')
          # For now assume non HTTP(S) URLs always valid
-        if not (url.startswith('http://') or url.startswith('https://')):
+        if not url.startswith(('http://', 'https://')):
              return True
          try:
              return True
          try:
-            self._request_webpage(url, video_id, 'Checking %s URL' % item, headers=headers)
+            self._request_webpage(url, video_id, f'Checking {item} URL', headers=headers)
              return True
          except ExtractorError as e:
              self.to_screen(
              return True
          except ExtractorError as e:
              self.to_screen(
-                '%s: %s URL is invalid, skipping: %s'
-                % (video_id, item, error_to_compat_str(e.cause)))
+                f'{video_id}: {item} URL is invalid, skipping: {e.cause!s}')
              return False
  
      def http_scheme(self):
              return False
  
      def http_scheme(self):
@@ -1808,7 +1889,7 @@ def _extract_f4m_formats(self, manifest_url, video_id, preference=None, quality=
              return []
  
          manifest, urlh = res
              return []
  
          manifest, urlh = res
-        manifest_url = urlh.geturl()
+        manifest_url = urlh.url
  
          return self._parse_f4m_formats(
              manifest, manifest_url, video_id, preference=preference, quality=quality, f4m_id=f4m_id,
  
          return self._parse_f4m_formats(
              manifest, manifest_url, video_id, preference=preference, quality=quality, f4m_id=f4m_id,
@@ -1823,8 +1904,8 @@ def _parse_f4m_formats(self, manifest, manifest_url, video_id, preference=None,
          # currently yt-dlp cannot decode the playerVerificationChallenge as Akamai uses Adobe Alchemy
          akamai_pv = manifest.find('{http://ns.adobe.com/f4m/1.0}pv-2.0')
          if akamai_pv is not None and ';' in akamai_pv.text:
          # currently yt-dlp cannot decode the playerVerificationChallenge as Akamai uses Adobe Alchemy
          akamai_pv = manifest.find('{http://ns.adobe.com/f4m/1.0}pv-2.0')
          if akamai_pv is not None and ';' in akamai_pv.text:
-            playerVerificationChallenge = akamai_pv.text.split(';')[0]
-            if playerVerificationChallenge.strip() != '':
+            player_verification_challenge = akamai_pv.text.split(';')[0]
+            if player_verification_challenge.strip() != '':
                  return []
  
          formats = []
                  return []
  
          formats = []
@@ -1870,7 +1951,7 @@ def _parse_f4m_formats(self, manifest, manifest_url, video_id, preference=None,
                  if not media_url:
                      continue
                  manifest_url = (
                  if not media_url:
                      continue
                  manifest_url = (
-                    media_url if media_url.startswith('http://') or media_url.startswith('https://')
+                    media_url if media_url.startswith(('http://', 'https://'))
                      else ((manifest_base_url or '/'.join(manifest_url.split('/')[:-1])) + '/' + media_url))
                  # If media_url is itself a f4m manifest do the recursive extraction
                  # since bitrates in parent manifest (this one) and media_url manifest
                      else ((manifest_base_url or '/'.join(manifest_url.split('/')[:-1])) + '/' + media_url))
                  # If media_url is itself a f4m manifest do the recursive extraction
                  # since bitrates in parent manifest (this one) and media_url manifest
@@ -1931,7 +2012,7 @@ def _m3u8_meta_format(self, m3u8_url, ext=None, preference=None, quality=None, m
      def _report_ignoring_subs(self, name):
          self.report_warning(bug_reports_message(
              f'Ignoring subtitle tracks found in the {name} manifest; '
      def _report_ignoring_subs(self, name):
          self.report_warning(bug_reports_message(
              f'Ignoring subtitle tracks found in the {name} manifest; '
-            'if any subtitle tracks are missing,'
+            'if any subtitle tracks are missing,',
          ), only_once=True)
  
      def _extract_m3u8_formats(self, *args, **kwargs):
          ), only_once=True)
  
      def _extract_m3u8_formats(self, *args, **kwargs):
@@ -1967,7 +2048,7 @@ def _extract_m3u8_formats_and_subtitles(
              return [], {}
  
          m3u8_doc, urlh = res
              return [], {}
  
          m3u8_doc, urlh = res
-        m3u8_url = urlh.geturl()
+        m3u8_url = urlh.url
  
          return self._parse_m3u8_formats_and_subtitles(
              m3u8_doc, m3u8_url, ext=ext, entry_protocol=entry_protocol,
  
          return self._parse_m3u8_formats_and_subtitles(
              m3u8_doc, m3u8_url, ext=ext, entry_protocol=entry_protocol,
@@ -2022,7 +2103,7 @@ def _extract_m3u8_playlist_indices(*args, **kwargs):
              formats = [{
                  'format_id': join_nonempty(m3u8_id, idx),
                  'format_index': idx,
              formats = [{
                  'format_id': join_nonempty(m3u8_id, idx),
                  'format_index': idx,
-                'url': m3u8_url or encode_data_uri(m3u8_doc.encode('utf-8'), 'application/x-mpegurl'),
+                'url': m3u8_url or encode_data_uri(m3u8_doc.encode(), 'application/x-mpegurl'),
                  'ext': ext,
                  'protocol': entry_protocol,
                  'preference': preference,
                  'ext': ext,
                  'protocol': entry_protocol,
                  'preference': preference,
@@ -2141,6 +2222,11 @@ def build_stream_name():
                          'quality': quality,
                          'has_drm': has_drm,
                      }
                          'quality': quality,
                          'has_drm': has_drm,
                      }
+
+                    # YouTube-specific
+                    if yt_audio_content_id := last_stream_inf.get('YT-EXT-AUDIO-CONTENT-ID'):
+                        f['language'] = yt_audio_content_id.split('.')[0]
+
                      resolution = last_stream_inf.get('RESOLUTION')
                      if resolution:
                          mobj = re.search(r'(?P<width>\d+)[xX](?P<height>\d+)', resolution)
                      resolution = last_stream_inf.get('RESOLUTION')
                      if resolution:
                          mobj = re.search(r'(?P<width>\d+)[xX](?P<height>\d+)', resolution)
@@ -2220,7 +2306,9 @@ def _extract_mpd_vod_duration(
              mpd_url, video_id,
              note='Downloading MPD VOD manifest' if note is None else note,
              errnote='Failed to download VOD manifest' if errnote is None else errnote,
              mpd_url, video_id,
              note='Downloading MPD VOD manifest' if note is None else note,
              errnote='Failed to download VOD manifest' if errnote is None else errnote,
-            fatal=False, data=data, headers=headers, query=query) or {}
+            fatal=False, data=data, headers=headers, query=query)
+        if not isinstance(mpd_doc, xml.etree.ElementTree.Element):
+            return None
          return int_or_none(parse_duration(mpd_doc.get('mediaPresentationDuration')))
  
      @staticmethod
          return int_or_none(parse_duration(mpd_doc.get('mediaPresentationDuration')))
  
      @staticmethod
@@ -2232,7 +2320,7 @@ def _xpath_ns(path, namespace=None):
              if not c or c == '.':
                  out.append(c)
              else:
              if not c or c == '.':
                  out.append(c)
              else:
-                out.append('{%s}%s' % (namespace, c))
+                out.append(f'{{{namespace}}}{c}')
          return '/'.join(out)
  
      def _extract_smil_formats_and_subtitles(self, smil_url, video_id, fatal=True, f4m_params=None, transform_source=None):
          return '/'.join(out)
  
      def _extract_smil_formats_and_subtitles(self, smil_url, video_id, fatal=True, f4m_params=None, transform_source=None):
@@ -2243,18 +2331,10 @@ def _extract_smil_formats_and_subtitles(self, smil_url, video_id, fatal=True, f4
          if res is False:
              assert not fatal
              return [], {}
          if res is False:
              assert not fatal
              return [], {}
-
          smil, urlh = res
          smil, urlh = res
-        smil_url = urlh.geturl()
-
-        namespace = self._parse_smil_namespace(smil)
-
-        fmts = self._parse_smil_formats(
-            smil, smil_url, video_id, namespace=namespace, f4m_params=f4m_params)
-        subs = self._parse_smil_subtitles(
-            smil, namespace=namespace)
  
  
-        return fmts, subs
+        return self._parse_smil_formats_and_subtitles(smil, urlh.url, video_id, f4m_params=f4m_params,
+                                                      namespace=self._parse_smil_namespace(smil))
  
      def _extract_smil_formats(self, *args, **kwargs):
          fmts, subs = self._extract_smil_formats_and_subtitles(*args, **kwargs)
  
      def _extract_smil_formats(self, *args, **kwargs):
          fmts, subs = self._extract_smil_formats_and_subtitles(*args, **kwargs)
@@ -2268,7 +2348,7 @@ def _extract_smil_info(self, smil_url, video_id, fatal=True, f4m_params=None):
              return {}
  
          smil, urlh = res
              return {}
  
          smil, urlh = res
-        smil_url = urlh.geturl()
+        smil_url = urlh.url
  
          return self._parse_smil(smil, smil_url, video_id, f4m_params=f4m_params)
  
  
          return self._parse_smil(smil, smil_url, video_id, f4m_params=f4m_params)
  
@@ -2280,9 +2360,8 @@ def _download_smil(self, smil_url, video_id, fatal=True, transform_source=None):
      def _parse_smil(self, smil, smil_url, video_id, f4m_params=None):
          namespace = self._parse_smil_namespace(smil)
  
      def _parse_smil(self, smil, smil_url, video_id, f4m_params=None):
          namespace = self._parse_smil_namespace(smil)
  
-        formats = self._parse_smil_formats(
+        formats, subtitles = self._parse_smil_formats_and_subtitles(
              smil, smil_url, video_id, namespace=namespace, f4m_params=f4m_params)
              smil, smil_url, video_id, namespace=namespace, f4m_params=f4m_params)
-        subtitles = self._parse_smil_subtitles(smil, namespace=namespace)
  
          video_id = os.path.splitext(url_basename(smil_url))[0]
          title = None
  
          video_id = os.path.splitext(url_basename(smil_url))[0]
          title = None
@@ -2321,7 +2400,14 @@ def _parse_smil_namespace(self, smil):
          return self._search_regex(
              r'(?i)^{([^}]+)?}smil$', smil.tag, 'namespace', default=None)
  
          return self._search_regex(
              r'(?i)^{([^}]+)?}smil$', smil.tag, 'namespace', default=None)
  
-    def _parse_smil_formats(self, smil, smil_url, video_id, namespace=None, f4m_params=None, transform_rtmp_url=None):
+    def _parse_smil_formats(self, *args, **kwargs):
+        fmts, subs = self._parse_smil_formats_and_subtitles(*args, **kwargs)
+        if subs:
+            self._report_ignoring_subs('SMIL')
+        return fmts
+
+    def _parse_smil_formats_and_subtitles(
+            self, smil, smil_url, video_id, namespace=None, f4m_params=None, transform_rtmp_url=None):
          base = smil_url
          for meta in smil.findall(self._xpath_ns('./head/meta', namespace)):
              b = meta.get('base') or meta.get('httpBase')
          base = smil_url
          for meta in smil.findall(self._xpath_ns('./head/meta', namespace)):
              b = meta.get('base') or meta.get('httpBase')
@@ -2329,14 +2415,16 @@ def _parse_smil_formats(self, smil, smil_url, video_id, namespace=None, f4m_para
                  base = b
                  break
  
                  base = b
                  break
  
-        formats = []
+        formats, subtitles = [], {}
          rtmp_count = 0
          http_count = 0
          m3u8_count = 0
          imgs_count = 0
  
          srcs = set()
          rtmp_count = 0
          http_count = 0
          m3u8_count = 0
          imgs_count = 0
  
          srcs = set()
-        media = smil.findall(self._xpath_ns('.//video', namespace)) + smil.findall(self._xpath_ns('.//audio', namespace))
+        media = itertools.chain.from_iterable(
+            smil.findall(self._xpath_ns(arg, namespace))
+            for arg in ['.//video', './/audio', './/media'])
          for medium in media:
              src = medium.get('src')
              if not src or src in srcs:
          for medium in media:
              src = medium.get('src')
              if not src or src in srcs:
@@ -2373,12 +2461,13 @@ def _parse_smil_formats(self, smil, smil_url, video_id, namespace=None, f4m_para
                      })
                  continue
  
                      })
                  continue
  
-            src_url = src if src.startswith('http') else urllib.parse.urljoin(base, src)
+            src_url = src if src.startswith('http') else urllib.parse.urljoin(f'{base}/', src)
              src_url = src_url.strip()
  
              if proto == 'm3u8' or src_ext == 'm3u8':
              src_url = src_url.strip()
  
              if proto == 'm3u8' or src_ext == 'm3u8':
-                m3u8_formats = self._extract_m3u8_formats(
+                m3u8_formats, m3u8_subs = self._extract_m3u8_formats_and_subtitles(
                      src_url, video_id, ext or 'mp4', m3u8_id='hls', fatal=False)
                      src_url, video_id, ext or 'mp4', m3u8_id='hls', fatal=False)
+                self._merge_subtitles(m3u8_subs, target=subtitles)
                  if len(m3u8_formats) == 1:
                      m3u8_count += 1
                      m3u8_formats[0].update({
                  if len(m3u8_formats) == 1:
                      m3u8_count += 1
                      m3u8_formats[0].update({
@@ -2399,11 +2488,15 @@ def _parse_smil_formats(self, smil, smil_url, video_id, namespace=None, f4m_para
                  f4m_url += urllib.parse.urlencode(f4m_params)
                  formats.extend(self._extract_f4m_formats(f4m_url, video_id, f4m_id='hds', fatal=False))
              elif src_ext == 'mpd':
                  f4m_url += urllib.parse.urlencode(f4m_params)
                  formats.extend(self._extract_f4m_formats(f4m_url, video_id, f4m_id='hds', fatal=False))
              elif src_ext == 'mpd':
-                formats.extend(self._extract_mpd_formats(
-                    src_url, video_id, mpd_id='dash', fatal=False))
+                mpd_formats, mpd_subs = self._extract_mpd_formats_and_subtitles(
+                    src_url, video_id, mpd_id='dash', fatal=False)
+                formats.extend(mpd_formats)
+                self._merge_subtitles(mpd_subs, target=subtitles)
              elif re.search(r'\.ism/[Mm]anifest', src_url):
              elif re.search(r'\.ism/[Mm]anifest', src_url):
-                formats.extend(self._extract_ism_formats(
-                    src_url, video_id, ism_id='mss', fatal=False))
+                ism_formats, ism_subs = self._extract_ism_formats_and_subtitles(
+                    src_url, video_id, ism_id='mss', fatal=False)
+                formats.extend(ism_formats)
+                self._merge_subtitles(ism_subs, target=subtitles)
              elif src_url.startswith('http') and self._is_valid_url(src, video_id):
                  http_count += 1
                  formats.append({
              elif src_url.startswith('http') and self._is_valid_url(src, video_id):
                  http_count += 1
                  formats.append({
@@ -2424,7 +2517,7 @@ def _parse_smil_formats(self, smil, smil_url, video_id, namespace=None, f4m_para
  
              imgs_count += 1
              formats.append({
  
              imgs_count += 1
              formats.append({
-                'format_id': 'imagestream-%d' % (imgs_count),
+                'format_id': f'imagestream-{imgs_count}',
                  'url': src,
                  'ext': mimetype2ext(medium.get('type')),
                  'acodec': 'none',
                  'url': src,
                  'ext': mimetype2ext(medium.get('type')),
                  'acodec': 'none',
@@ -2434,12 +2527,15 @@ def _parse_smil_formats(self, smil, smil_url, video_id, namespace=None, f4m_para
                  'format_note': 'SMIL storyboards',
              })
  
                  'format_note': 'SMIL storyboards',
              })
  
-        return formats
+        smil_subs = self._parse_smil_subtitles(smil, namespace=namespace)
+        self._merge_subtitles(smil_subs, target=subtitles)
+
+        return formats, subtitles
  
      def _parse_smil_subtitles(self, smil, namespace=None, subtitles_lang='en'):
          urls = []
          subtitles = {}
  
      def _parse_smil_subtitles(self, smil, namespace=None, subtitles_lang='en'):
          urls = []
          subtitles = {}
-        for num, textstream in enumerate(smil.findall(self._xpath_ns('.//textstream', namespace))):
+        for textstream in smil.findall(self._xpath_ns('.//textstream', namespace)):
              src = textstream.get('src')
              if not src or src in urls:
                  continue
              src = textstream.get('src')
              if not src or src in urls:
                  continue
@@ -2460,7 +2556,7 @@ def _extract_xspf_playlist(self, xspf_url, playlist_id, fatal=True):
              return []
  
          xspf, urlh = res
              return []
  
          xspf, urlh = res
-        xspf_url = urlh.geturl()
+        xspf_url = urlh.url
  
          return self._parse_xspf(
              xspf, playlist_id, xspf_url=xspf_url,
  
          return self._parse_xspf(
              xspf, playlist_id, xspf_url=xspf_url,
@@ -2512,7 +2608,11 @@ def _extract_mpd_formats(self, *args, **kwargs):
              self._report_ignoring_subs('DASH')
          return fmts
  
              self._report_ignoring_subs('DASH')
          return fmts
  
-    def _extract_mpd_formats_and_subtitles(
+    def _extract_mpd_formats_and_subtitles(self, *args, **kwargs):
+        periods = self._extract_mpd_periods(*args, **kwargs)
+        return self._merge_mpd_periods(periods)
+
+    def _extract_mpd_periods(
              self, mpd_url, video_id, mpd_id=None, note=None, errnote=None,
              fatal=True, data=None, headers={}, query={}):
  
              self, mpd_url, video_id, mpd_id=None, note=None, errnote=None,
              fatal=True, data=None, headers={}, query={}):
  
@@ -2525,17 +2625,16 @@ def _extract_mpd_formats_and_subtitles(
              errnote='Failed to download MPD manifest' if errnote is None else errnote,
              fatal=fatal, data=data, headers=headers, query=query)
          if res is False:
              errnote='Failed to download MPD manifest' if errnote is None else errnote,
              fatal=fatal, data=data, headers=headers, query=query)
          if res is False:
-            return [], {}
+            return []
          mpd_doc, urlh = res
          if mpd_doc is None:
          mpd_doc, urlh = res
          if mpd_doc is None:
-            return [], {}
+            return []
  
          # We could have been redirected to a new url when we retrieved our mpd file.
  
          # We could have been redirected to a new url when we retrieved our mpd file.
-        mpd_url = urlh.geturl()
+        mpd_url = urlh.url
          mpd_base_url = base_url(mpd_url)
  
          mpd_base_url = base_url(mpd_url)
  
-        return self._parse_mpd_formats_and_subtitles(
-            mpd_doc, mpd_id, mpd_base_url, mpd_url)
+        return self._parse_mpd_periods(mpd_doc, mpd_id, mpd_base_url, mpd_url)
  
      def _parse_mpd_formats(self, *args, **kwargs):
          fmts, subs = self._parse_mpd_formats_and_subtitles(*args, **kwargs)
  
      def _parse_mpd_formats(self, *args, **kwargs):
          fmts, subs = self._parse_mpd_formats_and_subtitles(*args, **kwargs)
@@ -2543,8 +2642,39 @@ def _parse_mpd_formats(self, *args, **kwargs):
              self._report_ignoring_subs('DASH')
          return fmts
  
              self._report_ignoring_subs('DASH')
          return fmts
  
-    def _parse_mpd_formats_and_subtitles(
-            self, mpd_doc, mpd_id=None, mpd_base_url='', mpd_url=None):
+    def _parse_mpd_formats_and_subtitles(self, *args, **kwargs):
+        periods = self._parse_mpd_periods(*args, **kwargs)
+        return self._merge_mpd_periods(periods)
+
+    def _merge_mpd_periods(self, periods):
+        """
+        Combine all formats and subtitles from an MPD manifest into a single list,
+        by concatenate streams with similar formats.
+        """
+        formats, subtitles = {}, {}
+        for period in periods:
+            for f in period['formats']:
+                assert 'is_dash_periods' not in f, 'format already processed'
+                f['is_dash_periods'] = True
+                format_key = tuple(v for k, v in f.items() if k not in (
+                    ('format_id', 'fragments', 'manifest_stream_number')))
+                if format_key not in formats:
+                    formats[format_key] = f
+                elif 'fragments' in f:
+                    formats[format_key].setdefault('fragments', []).extend(f['fragments'])
+
+            if subtitles and period['subtitles']:
+                self.report_warning(bug_reports_message(
+                    'Found subtitles in multiple periods in the DASH manifest; '
+                    'if part of the subtitles are missing,',
+                ), only_once=True)
+
+            for sub_lang, sub_info in period['subtitles'].items():
+                subtitles.setdefault(sub_lang, []).extend(sub_info)
+
+        return list(formats.values()), subtitles
+
+    def _parse_mpd_periods(self, mpd_doc, mpd_id=None, mpd_base_url='', mpd_url=None):
          """
          Parse formats from MPD manifest.
          References:
          """
          Parse formats from MPD manifest.
          References:
@@ -2623,9 +2753,13 @@ def extract_Initialization(source):
              return ms_info
  
          mpd_duration = parse_duration(mpd_doc.get('mediaPresentationDuration'))
              return ms_info
  
          mpd_duration = parse_duration(mpd_doc.get('mediaPresentationDuration'))
-        formats, subtitles = [], {}
          stream_numbers = collections.defaultdict(int)
          stream_numbers = collections.defaultdict(int)
-        for period in mpd_doc.findall(_add_ns('Period')):
+        for period_idx, period in enumerate(mpd_doc.findall(_add_ns('Period'))):
+            period_entry = {
+                'id': period.get('id', f'period-{period_idx}'),
+                'formats': [],
+                'subtitles': collections.defaultdict(list),
+            }
              period_duration = parse_duration(period.get('duration')) or mpd_duration
              period_ms_info = extract_multisegment_info(period, {
                  'start_number': 1,
              period_duration = parse_duration(period.get('duration')) or mpd_duration
              period_ms_info = extract_multisegment_info(period, {
                  'start_number': 1,
@@ -2658,7 +2792,7 @@ def extract_Initialization(source):
                          elif mimetype2ext(mime_type) in ('tt', 'dfxp', 'ttml', 'xml', 'json'):
                              content_type = 'text'
                          else:
                          elif mimetype2ext(mime_type) in ('tt', 'dfxp', 'ttml', 'xml', 'json'):
                              content_type = 'text'
                          else:
-                            self.report_warning('Unknown MIME type %s in DASH manifest' % mime_type)
+                            self.report_warning(f'Unknown MIME type {mime_type} in DASH manifest')
                              continue
  
                      base_url = ''
                              continue
  
                      base_url = ''
@@ -2696,10 +2830,10 @@ def extract_Initialization(source):
                              'asr': int_or_none(representation_attrib.get('audioSamplingRate')),
                              'fps': int_or_none(representation_attrib.get('frameRate')),
                              'language': lang if lang not in ('mul', 'und', 'zxx', 'mis') else None,
                              'asr': int_or_none(representation_attrib.get('audioSamplingRate')),
                              'fps': int_or_none(representation_attrib.get('frameRate')),
                              'language': lang if lang not in ('mul', 'und', 'zxx', 'mis') else None,
-                            'format_note': 'DASH %s' % content_type,
+                            'format_note': f'DASH {content_type}',
                              'filesize': filesize,
                              'container': mimetype2ext(mime_type) + '_dash',
                              'filesize': filesize,
                              'container': mimetype2ext(mime_type) + '_dash',
-                            **codecs
+                            **codecs,
                          }
                      elif content_type == 'text':
                          f = {
                          }
                      elif content_type == 'text':
                          f = {
@@ -2740,8 +2874,8 @@ def prepare_template(template_name, identifiers):
                                  t += c
                          # Next, $...$ templates are translated to their
                          # %(...) counterparts to be used with % operator
                                  t += c
                          # Next, $...$ templates are translated to their
                          # %(...) counterparts to be used with % operator
-                        t = re.sub(r'\$(%s)\$' % '|'.join(identifiers), r'%(\1)d', t)
-                        t = re.sub(r'\$(%s)%%([^$]+)\$' % '|'.join(identifiers), r'%(\1)\2', t)
+                        t = re.sub(r'\$({})\$'.format('|'.join(identifiers)), r'%(\1)d', t)
+                        t = re.sub(r'\$({})%([^$]+)\$'.format('|'.join(identifiers)), r'%(\1)\2', t)
                          t.replace('$$', '$')
                          return t
  
                          t.replace('$$', '$')
                          return t
  
@@ -2804,12 +2938,12 @@ def add_segment_url():
                                      'duration': float_or_none(segment_d, representation_ms_info['timescale']),
                                  })
  
                                      'duration': float_or_none(segment_d, representation_ms_info['timescale']),
                                  })
  
-                            for num, s in enumerate(representation_ms_info['s']):
+                            for s in representation_ms_info['s']:
                                  segment_time = s.get('t') or segment_time
                                  segment_d = s['d']
                                  add_segment_url()
                                  segment_number += 1
                                  segment_time = s.get('t') or segment_time
                                  segment_d = s['d']
                                  add_segment_url()
                                  segment_number += 1
-                                for r in range(s.get('r', 0)):
+                                for _ in range(s.get('r', 0)):
                                      segment_time += segment_d
                                      add_segment_url()
                                      segment_number += 1
                                      segment_time += segment_d
                                      add_segment_url()
                                      segment_number += 1
@@ -2823,7 +2957,7 @@ def add_segment_url():
                          timescale = representation_ms_info['timescale']
                          for s in representation_ms_info['s']:
                              duration = float_or_none(s['d'], timescale)
                          timescale = representation_ms_info['timescale']
                          for s in representation_ms_info['s']:
                              duration = float_or_none(s['d'], timescale)
-                            for r in range(s.get('r', 0) + 1):
+                            for _ in range(s.get('r', 0) + 1):
                                  segment_uri = representation_ms_info['segment_urls'][segment_index]
                                  fragments.append({
                                      location_key(segment_uri): segment_uri,
                                  segment_uri = representation_ms_info['segment_urls'][segment_index]
                                  fragments.append({
                                      location_key(segment_uri): segment_uri,
@@ -2875,11 +3009,10 @@ def add_segment_url():
                      if content_type in ('video', 'audio', 'image/jpeg'):
                          f['manifest_stream_number'] = stream_numbers[f['url']]
                          stream_numbers[f['url']] += 1
                      if content_type in ('video', 'audio', 'image/jpeg'):
                          f['manifest_stream_number'] = stream_numbers[f['url']]
                          stream_numbers[f['url']] += 1
-                        formats.append(f)
+                        period_entry['formats'].append(f)
                      elif content_type == 'text':
                      elif content_type == 'text':
-                        subtitles.setdefault(lang or 'und', []).append(f)
-
-        return formats, subtitles
+                        period_entry['subtitles'][lang or 'und'].append(f)
+            yield period_entry
  
      def _extract_ism_formats(self, *args, **kwargs):
          fmts, subs = self._extract_ism_formats_and_subtitles(*args, **kwargs)
  
      def _extract_ism_formats(self, *args, **kwargs):
          fmts, subs = self._extract_ism_formats_and_subtitles(*args, **kwargs)
@@ -2902,7 +3035,7 @@ def _extract_ism_formats_and_subtitles(self, ism_url, video_id, ism_id=None, not
          if ism_doc is None:
              return [], {}
  
          if ism_doc is None:
              return [], {}
  
-        return self._parse_ism_formats_and_subtitles(ism_doc, urlh.geturl(), ism_id)
+        return self._parse_ism_formats_and_subtitles(ism_doc, urlh.url, ism_id)
  
      def _parse_ism_formats_and_subtitles(self, ism_doc, ism_url, ism_id=None):
          """
  
      def _parse_ism_formats_and_subtitles(self, ism_doc, ism_url, ism_id=None):
          """
@@ -2932,7 +3065,7 @@ def _parse_ism_formats_and_subtitles(self, ism_doc, ism_url, ism_id=None):
                  fourcc = track.get('FourCC') or KNOWN_TAGS.get(track.get('AudioTag'))
                  # TODO: add support for WVC1 and WMAP
                  if fourcc not in ('H264', 'AVC1', 'AACL', 'TTML', 'EC-3'):
                  fourcc = track.get('FourCC') or KNOWN_TAGS.get(track.get('AudioTag'))
                  # TODO: add support for WVC1 and WMAP
                  if fourcc not in ('H264', 'AVC1', 'AACL', 'TTML', 'EC-3'):
-                    self.report_warning('%s is not a supported codec' % fourcc)
+                    self.report_warning(f'{fourcc} is not a supported codec')
                      continue
                  tbr = int(track.attrib['Bitrate']) // 1000
                  # [1] does not mention Width and Height attributes. However,
                      continue
                  tbr = int(track.attrib['Bitrate']) // 1000
                  # [1] does not mention Width and Height attributes. However,
@@ -2981,7 +3114,7 @@ def _parse_ism_formats_and_subtitles(self, ism_doc, ism_url, ism_id=None):
                              'fourcc': fourcc,
                              'language': stream_language,
                              'codec_private_data': track.get('CodecPrivateData'),
                              'fourcc': fourcc,
                              'language': stream_language,
                              'codec_private_data': track.get('CodecPrivateData'),
-                        }
+                        },
                      })
                  elif stream_type in ('video', 'audio'):
                      formats.append({
                      })
                  elif stream_type in ('video', 'audio'):
                      formats.append({
@@ -3063,13 +3196,13 @@ def _media_formats(src, cur_media_type, type_info=None):
          _MEDIA_TAG_NAME_RE = r'(?:(?:amp|dl8(?:-live)?)-)?(video|audio)'
          media_tags = [(media_tag, media_tag_name, media_type, '')
                        for media_tag, media_tag_name, media_type
          _MEDIA_TAG_NAME_RE = r'(?:(?:amp|dl8(?:-live)?)-)?(video|audio)'
          media_tags = [(media_tag, media_tag_name, media_type, '')
                        for media_tag, media_tag_name, media_type
-                      in re.findall(r'(?s)(<(%s)[^>]*/>)' % _MEDIA_TAG_NAME_RE, webpage)]
+                      in re.findall(rf'(?s)(<({_MEDIA_TAG_NAME_RE})[^>]*/>)', webpage)]
          media_tags.extend(re.findall(
              # We only allow video|audio followed by a whitespace or '>'.
              # Allowing more characters may end up in significant slow down (see
              # https://github.com/ytdl-org/youtube-dl/issues/11979,
              # e.g. http://www.porntrex.com/maps/videositemap.xml).
          media_tags.extend(re.findall(
              # We only allow video|audio followed by a whitespace or '>'.
              # Allowing more characters may end up in significant slow down (see
              # https://github.com/ytdl-org/youtube-dl/issues/11979,
              # e.g. http://www.porntrex.com/maps/videositemap.xml).
-            r'(?s)(<(?P<tag>%s)(?:\s+[^>]*)?>)(.*?)</(?P=tag)>' % _MEDIA_TAG_NAME_RE, webpage))
+            rf'(?s)(<(?P<tag>{_MEDIA_TAG_NAME_RE})(?:\s+[^>]*)?>)(.*?)</(?P=tag)>', webpage))
          for media_tag, _, media_type, media_content in media_tags:
              media_info = {
                  'formats': [],
          for media_tag, _, media_type, media_content in media_tags:
              media_info = {
                  'formats': [],
@@ -3213,13 +3346,13 @@ def _extract_wowza_formats(self, url, video_id, m3u8_entry_protocol='m3u8_native
          mobj = re.search(
              r'(?:(?:http|rtmp|rtsp)(?P<s>s)?:)?(?P<url>//[^?]+)', url)
          url_base = mobj.group('url')
          mobj = re.search(
              r'(?:(?:http|rtmp|rtsp)(?P<s>s)?:)?(?P<url>//[^?]+)', url)
          url_base = mobj.group('url')
-        http_base_url = '%s%s:%s' % ('http', mobj.group('s') or '', url_base)
+        http_base_url = '{}{}:{}'.format('http', mobj.group('s') or '', url_base)
          formats = []
  
          def manifest_url(manifest):
              m_url = f'{http_base_url}/{manifest}'
              if query:
          formats = []
  
          def manifest_url(manifest):
              m_url = f'{http_base_url}/{manifest}'
              if query:
-                m_url += '?%s' % query
+                m_url += f'?{query}'
              return m_url
  
          if 'm3u8' not in skip_protocols:
              return m_url
  
          if 'm3u8' not in skip_protocols:
@@ -3241,7 +3374,7 @@ def manifest_url(manifest):
                      video_id, fatal=False)
                  for rtmp_format in rtmp_formats:
                      rtsp_format = rtmp_format.copy()
                      video_id, fatal=False)
                  for rtmp_format in rtmp_formats:
                      rtsp_format = rtmp_format.copy()
-                    rtsp_format['url'] = '%s/%s' % (rtmp_format['url'], rtmp_format['play_path'])
+                    rtsp_format['url'] = '{}/{}'.format(rtmp_format['url'], rtmp_format['play_path'])
                      del rtsp_format['play_path']
                      del rtsp_format['ext']
                      rtsp_format.update({
                      del rtsp_format['play_path']
                      del rtsp_format['ext']
                      rtsp_format.update({
@@ -3261,23 +3394,16 @@ def manifest_url(manifest):
          return formats
  
      def _find_jwplayer_data(self, webpage, video_id=None, transform_source=js_to_json):
          return formats
  
      def _find_jwplayer_data(self, webpage, video_id=None, transform_source=js_to_json):
-        mobj = re.search(
-            r'''(?s)jwplayer\s*\(\s*(?P<q>'|")(?!(?P=q)).+(?P=q)\s*\)(?!</script>).*?\.\s*setup\s*\(\s*(?P<options>(?:\([^)]*\)|[^)])+)\s*\)''',
-            webpage)
-        if mobj:
-            try:
-                jwplayer_data = self._parse_json(mobj.group('options'),
-                                                 video_id=video_id,
-                                                 transform_source=transform_source)
-            except ExtractorError:
-                pass
-            else:
-                if isinstance(jwplayer_data, dict):
-                    return jwplayer_data
-
-    def _extract_jwplayer_data(self, webpage, video_id, *args, **kwargs):
+        return self._search_json(
+            r'''(?<!-)\bjwplayer\s*\(\s*(?P<q>'|")(?!(?P=q)).+(?P=q)\s*\)(?:(?!</script>).)*?\.\s*(?:setup\s*\(|(?P<load>load)\s*\(\s*\[)''',
+            webpage, 'JWPlayer data', video_id,
+            # must be a {...} or sequence, ending
+            contains_pattern=r'\{(?s:.*)}(?(load)(?:\s*,\s*\{(?s:.*)})*)', end_pattern=r'(?(load)\]|\))',
+            transform_source=transform_source, default=None)
+
+    def _extract_jwplayer_data(self, webpage, video_id, *args, transform_source=js_to_json, **kwargs):
          jwplayer_data = self._find_jwplayer_data(
          jwplayer_data = self._find_jwplayer_data(
-            webpage, video_id, transform_source=js_to_json)
+            webpage, video_id, transform_source=transform_source)
          return self._parse_jwplayer_data(
              jwplayer_data, video_id, *args, **kwargs)
  
          return self._parse_jwplayer_data(
              jwplayer_data, video_id, *args, **kwargs)
  
@@ -3309,22 +3435,14 @@ def _parse_jwplayer_data(self, jwplayer_data, video_id=None, require_title=True,
                  mpd_id=mpd_id, rtmp_params=rtmp_params, base_url=base_url)
  
              subtitles = {}
                  mpd_id=mpd_id, rtmp_params=rtmp_params, base_url=base_url)
  
              subtitles = {}
-            tracks = video_data.get('tracks')
-            if tracks and isinstance(tracks, list):
-                for track in tracks:
-                    if not isinstance(track, dict):
-                        continue
-                    track_kind = track.get('kind')
-                    if not track_kind or not isinstance(track_kind, str):
-                        continue
-                    if track_kind.lower() not in ('captions', 'subtitles'):
-                        continue
-                    track_url = urljoin(base_url, track.get('file'))
-                    if not track_url:
-                        continue
-                    subtitles.setdefault(track.get('label') or 'en', []).append({
-                        'url': self._proto_relative_url(track_url)
-                    })
+            for track in traverse_obj(video_data, (
+                    'tracks', lambda _, v: v['kind'].lower() in ('captions', 'subtitles'))):
+                track_url = urljoin(base_url, track.get('file'))
+                if not track_url:
+                    continue
+                subtitles.setdefault(track.get('label') or 'en', []).append({
+                    'url': self._proto_relative_url(track_url),
+                })
  
              entry = {
                  'id': this_video_id,
  
              entry = {
                  'id': this_video_id,
@@ -3402,14 +3520,14 @@ def _parse_jwplayer_formats(self, jwplayer_sources_data, video_id=None,
                      'tbr': int_or_none(source.get('bitrate'), scale=1000),
                      'filesize': int_or_none(source.get('filesize')),
                      'ext': ext,
                      'tbr': int_or_none(source.get('bitrate'), scale=1000),
                      'filesize': int_or_none(source.get('filesize')),
                      'ext': ext,
-                    'format_id': format_id
+                    'format_id': format_id,
                  }
                  if source_url.startswith('rtmp'):
                      a_format['ext'] = 'flv'
                      # See com/longtailvideo/jwplayer/media/RTMPMediaProvider.as
                      # of jwplayer.flash.swf
                      rtmp_url_parts = re.split(
                  }
                  if source_url.startswith('rtmp'):
                      a_format['ext'] = 'flv'
                      # See com/longtailvideo/jwplayer/media/RTMPMediaProvider.as
                      # of jwplayer.flash.swf
                      rtmp_url_parts = re.split(
-                        r'((?:mp4|mp3|flv):)', source_url, 1)
+                        r'((?:mp4|mp3|flv):)', source_url, maxsplit=1)
                      if len(rtmp_url_parts) == 3:
                          rtmp_url, prefix, play_path = rtmp_url_parts
                          a_format.update({
                      if len(rtmp_url_parts) == 3:
                          rtmp_url, prefix, play_path = rtmp_url_parts
                          a_format.update({
@@ -3476,7 +3594,7 @@ def _apply_first_set_cookie_header(self, url_handle, cookie):
                  continue
              cookies = cookies.encode('iso-8859-1').decode('utf-8')
              cookie_value = re.search(
                  continue
              cookies = cookies.encode('iso-8859-1').decode('utf-8')
              cookie_value = re.search(
-                r'%s=(.+?);.*?\b[Dd]omain=(.+?)(?:[,;]|$)' % cookie, cookies)
+                rf'{cookie}=(.+?);.*?\b[Dd]omain=(.+?)(?:[,;]|$)', cookies)
              if cookie_value:
                  value, domain = cookie_value.groups()
                  self._set_cookie(domain, cookie, value)
              if cookie_value:
                  value, domain = cookie_value.groups()
                  self._set_cookie(domain, cookie, value)
@@ -3560,7 +3678,7 @@ def description(cls, *, markdown=True, search_examples=None):
              desc += ' (**Currently broken**)' if markdown else ' (Currently broken)'
  
          # Escape emojis. Ref: https://github.com/github/markup/issues/1153
              desc += ' (**Currently broken**)' if markdown else ' (Currently broken)'
  
          # Escape emojis. Ref: https://github.com/github/markup/issues/1153
-        name = (' - **%s**' % re.sub(r':(\w+:)', ':\u200B\\g<1>', cls.IE_NAME)) if markdown else cls.IE_NAME
+        name = (' - **{}**'.format(re.sub(r':(\w+:)', ':\u200B\\g<1>', cls.IE_NAME))) if markdown else cls.IE_NAME
          return f'{name}:{desc}' if desc else name
  
      def extract_subtitles(self, *args, **kwargs):
          return f'{name}:{desc}' if desc else name
  
      def extract_subtitles(self, *args, **kwargs):
@@ -3600,7 +3718,7 @@ def extractor():
              self.to_screen(f'Extracted {comment_count} comments')
              return {
                  'comments': comments,
              self.to_screen(f'Extracted {comment_count} comments')
              return {
                  'comments': comments,
-                'comment_count': None if interrupted else comment_count
+                'comment_count': None if interrupted else comment_count,
              }
          return extractor
  
              }
          return extractor
  
@@ -3704,9 +3822,9 @@ def _extract_chapters_from_description(self, description, duration):
  
      @staticmethod
      def _availability(is_private=None, needs_premium=None, needs_subscription=None, needs_auth=None, is_unlisted=None):
  
      @staticmethod
      def _availability(is_private=None, needs_premium=None, needs_subscription=None, needs_auth=None, is_unlisted=None):
-        all_known = all(map(
-            lambda x: x is not None,
-            (is_private, needs_premium, needs_subscription, needs_auth, is_unlisted)))
+        all_known = all(
+            x is not None for x in
+            (is_private, needs_premium, needs_subscription, needs_auth, is_unlisted))
          return (
              'private' if is_private
              else 'premium_only' if needs_premium
          return (
              'private' if is_private
              else 'premium_only' if needs_premium
@@ -3826,7 +3944,7 @@ class SearchInfoExtractor(InfoExtractor):
  
      @classproperty
      def _VALID_URL(cls):
  
      @classproperty
      def _VALID_URL(cls):
-        return r'%s(?P<prefix>|[1-9][0-9]*|all):(?P<query>[\s\S]+)' % cls._SEARCH_KEY
+        return rf'{cls._SEARCH_KEY}(?P<prefix>|[1-9][0-9]*|all):(?P<query>[\s\S]+)'
  
      def _real_extract(self, query):
          prefix, query = self._match_valid_url(query).group('prefix', 'query')
  
      def _real_extract(self, query):
          prefix, query = self._match_valid_url(query).group('prefix', 'query')