[cleanup] Refactor some code

[yt-dlp.git] / yt_dlp / extractor / common.py
diff --git a/yt_dlp/extractor/common.py b/yt_dlp/extractor/common.py

index b74a5dc011c6b0011c89934d0b423186cf089112..a3ac9dfb7d9f60bb1ebadb3dee012402f948383f 100644 (file)
--- a/yt_dlp/extractor/common.py
+++ b/yt_dlp/extractor/common.py
@@ -9,19 +9,16 @@
  import os
  import random
  import re
-import socket
-import ssl
  import sys
  import time
  import math
  
  from ..compat import (
      compat_cookiejar_Cookie,
-    compat_cookies,
+    compat_cookies_SimpleCookie,
      compat_etree_Element,
      compat_etree_fromstring,
      compat_getpass,
-    compat_integer_types,
      compat_http_client,
      compat_os_name,
      compat_str,
@@ -58,6 +55,7 @@
      js_to_json,
      JSON_LD_RE,
      mimetype2ext,
+    network_exceptions,
      orderedSet,
      parse_bitrate,
      parse_codecs,
@@ -71,6 +69,7 @@
      str_or_none,
      str_to_int,
      strip_or_none,
+    traverse_obj,
      unescapeHTML,
      unified_strdate,
      unified_timestamp,
@@ -79,6 +78,7 @@
      urljoin,
      url_basename,
      url_or_none,
+    variadic,
      xpath_element,
      xpath_text,
      xpath_with_ns,
@@ -157,7 +157,7 @@ class InfoExtractor(object):
                      * player_url SWF Player URL (used for rtmpdump).
                      * protocol   The protocol that will be used for the actual
                                   download, lower-case.
-                                 "http", "https", "rtsp", "rtmp", "rtmpe",
+                                 "http", "https", "rtsp", "rtmp", "rtmp_ffmpeg", "rtmpe",
                                   "m3u8", "m3u8_native" or "http_dash_segments".
                      * fragment_base_url
                                   Base URL for fragments. Each fragment's path
@@ -204,6 +204,9 @@ class InfoExtractor(object):
                                   (HTTP or RTMP) download. Boolean.
                      * downloader_options  A dictionary of downloader options as
                                   described in FileDownloader
+                    RTMP formats can also have the additional fields: page_url,
+                    app, play_path, tc_url, flash_version, rtmp_live, rtmp_conn,
+                    rtmp_protocol, rtmp_real_time
  
      url:            Final video URL.
      ext:            Video filename extension.
@@ -226,13 +229,15 @@ class InfoExtractor(object):
                          * "resolution" (optional, string "{width}x{height}",
                                          deprecated)
                          * "filesize" (optional, int)
+                        * "_test_url" (optional, bool) - If true, test the URL
      thumbnail:      Full URL to a video thumbnail image.
      description:    Full video description.
      uploader:       Full name of the video uploader.
      license:        License name the video is licensed under.
      creator:        The creator of the video.
+    release_timestamp: UNIX timestamp of the moment the video was released.
      release_date:   The date (YYYYMMDD) when the video was released.
-    timestamp:      UNIX timestamp of the moment the video became available.
+    timestamp:      UNIX timestamp of the moment the video was uploaded
      upload_date:    Video upload date (YYYYMMDD).
                      If not explicitly set, calculated from timestamp.
      uploader_id:    Nickname or id of the video uploader.
@@ -250,9 +255,11 @@ class InfoExtractor(object):
                      entry and one of:
                          * "data": The subtitles file contents
                          * "url": A URL pointing to the subtitles file
+                    It can optionally also have:
+                        * "name": Name or description of the subtitles
                      "ext" will be calculated from URL if missing
-    automatic_captions: Like 'subtitles', used by the YoutubeIE for
-                    automatically generated captions
+    automatic_captions: Like 'subtitles'; contains automatically generated
+                    captions instead of normal subtitles
      duration:       Length of the video in seconds, as an integer or float.
      view_count:     How many users have watched the video on the platform.
      like_count:     Number of positive ratings of the video
@@ -285,10 +292,13 @@ class InfoExtractor(object):
      categories:     A list of categories that the video falls in, for example
                      ["Sports", "Berlin"]
      tags:           A list of tags assigned to the video, e.g. ["sweden", "pop music"]
+    cast:           A list of the video cast
      is_live:        True, False, or None (=unknown). Whether this video is a
                      live stream that goes on instead of a fixed-length video.
      was_live:       True, False, or None (=unknown). Whether this video was
                      originally a live stream.
+    live_status:    'is_live', 'upcoming', 'was_live', 'not_live' or None (=unknown)
+                    If absent, automatically set from is_live, was_live
      start_time:     Time in seconds where the reproduction should start, as
                      specified in the URL.
      end_time:       Time in seconds where the reproduction should end, as
@@ -300,7 +310,11 @@ class InfoExtractor(object):
      playable_in_embed: Whether this video is allowed to play in embedded
                      players on other sites. Can be True (=always allowed),
                      False (=never allowed), None (=unknown), or a string
-                    specifying the criteria for embedability (Eg: 'whitelist').
+                    specifying the criteria for embedability (Eg: 'whitelist')
+    availability:   Under what condition the video is available. One of
+                    'private', 'premium_only', 'subscriber_only', 'needs_auth',
+                    'unlisted' or 'public'. Use 'InfoExtractor._availability'
+                    to set it
      __post_extractor: A function to be called just before the metadata is
                      written to either disk, logger or console. The function
                      must return a dict which will be added to the info_dict.
@@ -416,6 +430,14 @@ class InfoExtractor(object):
      _GEO_IP_BLOCKS = None
      _WORKING = True
  
+    _LOGIN_HINTS = {
+        'any': 'Use --cookies, --username and --password or --netrc to provide account credentials',
+        'cookies': (
+            'Use --cookies for the authentication. '
+            'See  https://github.com/ytdl-org/youtube-dl#how-do-i-pass-cookies-to-youtube-dl  for how to pass cookies'),
+        'password': 'Use --username and --password or --netrc to provide account credentials',
+    }
+
      def __init__(self, downloader=None):
          """Constructor. Receives an optional downloader."""
          self._ready = False
@@ -485,7 +507,7 @@ def _initialize_geo_bypass(self, geo_bypass_context):
          if not self._x_forwarded_for_ip:
  
              # Geo bypass mechanism is explicitly disabled by user
-            if not self._downloader.params.get('geo_bypass', True):
+            if not self.get_param('geo_bypass', True):
                  return
  
              if not geo_bypass_context:
@@ -507,7 +529,7 @@ def _initialize_geo_bypass(self, geo_bypass_context):
  
              # Explicit IP block specified by user, use it right away
              # regardless of whether extractor is geo bypassable or not
-            ip_block = self._downloader.params.get('geo_bypass_ip_block', None)
+            ip_block = self.get_param('geo_bypass_ip_block', None)
  
              # Otherwise use random IP block from geo bypass context but only
              # if extractor is known as geo bypassable
@@ -518,17 +540,15 @@ def _initialize_geo_bypass(self, geo_bypass_context):
  
              if ip_block:
                  self._x_forwarded_for_ip = GeoUtils.random_ipv4(ip_block)
-                if self._downloader.params.get('verbose', False):
-                    self._downloader.to_screen(
-                        '[debug] Using fake IP %s as X-Forwarded-For.'
-                        % self._x_forwarded_for_ip)
+                self._downloader.write_debug(
+                    '[debug] Using fake IP %s as X-Forwarded-For' % self._x_forwarded_for_ip)
                  return
  
              # Path 2: bypassing based on country code
  
              # Explicit country code specified by user, use it right away
              # regardless of whether extractor is geo bypassable or not
-            country = self._downloader.params.get('geo_bypass_country', None)
+            country = self.get_param('geo_bypass_country', None)
  
              # Otherwise use random country code from geo bypass context but
              # only if extractor is known as geo bypassable
@@ -539,10 +559,8 @@ def _initialize_geo_bypass(self, geo_bypass_context):
  
              if country:
                  self._x_forwarded_for_ip = GeoUtils.random_ipv4(country)
-                if self._downloader.params.get('verbose', False):
-                    self._downloader.to_screen(
-                        '[debug] Using fake IP %s (%s) as X-Forwarded-For.'
-                        % (self._x_forwarded_for_ip, country.upper()))
+                self._downloader.write_debug(
+                    'Using fake IP %s (%s) as X-Forwarded-For' % (self._x_forwarded_for_ip, country.upper()))
  
      def extract(self, url):
          """Extracts URL information and returns it in list of dicts."""
@@ -550,9 +568,16 @@ def extract(self, url):
              for _ in range(2):
                  try:
                      self.initialize()
+                    self.write_debug('Extracting URL: %s' % url)
                      ie_result = self._real_extract(url)
+                    if ie_result is None:
+                        return None
                      if self._x_forwarded_for_ip:
                          ie_result['__x_forwarded_for_ip'] = self._x_forwarded_for_ip
+                    subtitles = ie_result.get('subtitles')
+                    if (subtitles and 'live_chat' in subtitles
+                            and 'no-live-chat' in self.get_param('compat_opts', [])):
+                        del subtitles['live_chat']
                      return ie_result
                  except GeoRestrictedError as e:
                      if self.__maybe_fake_ip_and_retry(e.countries):
@@ -566,9 +591,9 @@ def extract(self, url):
              raise ExtractorError('An extractor error has occurred.', cause=e)
  
      def __maybe_fake_ip_and_retry(self, countries):
-        if (not self._downloader.params.get('geo_bypass_country', None)
+        if (not self.get_param('geo_bypass_country', None)
                  and self._GEO_BYPASS
-                and self._downloader.params.get('geo_bypass', True)
+                and self.get_param('geo_bypass', True)
                  and not self._x_forwarded_for_ip
                  and countries):
              country_code = random.choice(countries)
@@ -606,14 +631,10 @@ def __can_accept_status_code(err, expected_status):
          assert isinstance(err, compat_urllib_error.HTTPError)
          if expected_status is None:
              return False
-        if isinstance(expected_status, compat_integer_types):
-            return err.code == expected_status
-        elif isinstance(expected_status, (list, tuple)):
-            return err.code in expected_status
          elif callable(expected_status):
              return expected_status(err.code) is True
          else:
-            assert False
+            return err.code in variadic(expected_status)
  
      def _request_webpage(self, url_or_request, video_id, note=None, errnote=None, fatal=True, data=None, headers={}, query={}, expected_status=None):
          """
@@ -622,7 +643,7 @@ def _request_webpage(self, url_or_request, video_id, note=None, errnote=None, fa
          See _download_webpage docstring for arguments specification.
          """
          if not self._downloader._first_webpage_request:
-            sleep_interval = float_or_none(self._downloader.params.get('sleep_interval_requests')) or 0
+            sleep_interval = float_or_none(self.get_param('sleep_interval_requests')) or 0
              if sleep_interval > 0:
                  self.to_screen('Sleeping %s seconds ...' % sleep_interval)
                  time.sleep(sleep_interval)
@@ -654,12 +675,9 @@ def _request_webpage(self, url_or_request, video_id, note=None, errnote=None, fa
                  url_or_request = update_url_query(url_or_request, query)
              if data is not None or headers:
                  url_or_request = sanitized_Request(url_or_request, data, headers)
-        exceptions = [compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error]
-        if hasattr(ssl, 'CertificateError'):
-            exceptions.append(ssl.CertificateError)
          try:
              return self._downloader.urlopen(url_or_request)
-        except tuple(exceptions) as err:
+        except network_exceptions as err:
              if isinstance(err, compat_urllib_error.HTTPError):
                  if self.__can_accept_status_code(err, expected_status):
                      # Retain reference to error to prevent file object from
@@ -678,7 +696,7 @@ def _request_webpage(self, url_or_request, video_id, note=None, errnote=None, fa
              if fatal:
                  raise ExtractorError(errmsg, sys.exc_info()[2], cause=err)
              else:
-                self._downloader.report_warning(errmsg)
+                self.report_warning(errmsg)
                  return False
  
      def _download_webpage_handle(self, url_or_request, video_id, note=None, errnote=None, fatal=True, encoding=None, data=None, headers={}, query={}, expected_status=None):
@@ -750,11 +768,11 @@ def _webpage_read_content(self, urlh, url_or_request, video_id, note=None, errno
              webpage_bytes = prefix + webpage_bytes
          if not encoding:
              encoding = self._guess_encoding_from_content(content_type, webpage_bytes)
-        if self._downloader.params.get('dump_intermediate_pages', False):
+        if self.get_param('dump_intermediate_pages', False):
              self.to_screen('Dumping request to ' + urlh.geturl())
              dump = base64.b64encode(webpage_bytes).decode('ascii')
              self._downloader.to_screen(dump)
-        if self._downloader.params.get('write_pages', False):
+        if self.get_param('write_pages', False):
              basen = '%s_%s' % (video_id, urlh.geturl())
              if len(basen) > 240:
                  h = '___' + hashlib.md5(basen.encode('utf-8')).hexdigest()
@@ -938,14 +956,65 @@ def _parse_json(self, json_string, video_id, transform_source=None, fatal=True):
              else:
                  self.report_warning(errmsg + str(ve))
  
-    def report_warning(self, msg, video_id=None):
+    def _parse_socket_response_as_json(self, data, video_id, transform_source=None, fatal=True):
+        return self._parse_json(
+            data[data.find('{'):data.rfind('}') + 1],
+            video_id, transform_source, fatal)
+
+    def _download_socket_json_handle(
+            self, url_or_request, video_id, note='Polling socket',
+            errnote='Unable to poll socket', transform_source=None,
+            fatal=True, encoding=None, data=None, headers={}, query={},
+            expected_status=None):
+        """
+        Return a tuple (JSON object, URL handle).
+
+        See _download_webpage docstring for arguments specification.
+        """
+        res = self._download_webpage_handle(
+            url_or_request, video_id, note, errnote, fatal=fatal,
+            encoding=encoding, data=data, headers=headers, query=query,
+            expected_status=expected_status)
+        if res is False:
+            return res
+        webpage, urlh = res
+        return self._parse_socket_response_as_json(
+            webpage, video_id, transform_source=transform_source,
+            fatal=fatal), urlh
+
+    def _download_socket_json(
+            self, url_or_request, video_id, note='Polling socket',
+            errnote='Unable to poll socket', transform_source=None,
+            fatal=True, encoding=None, data=None, headers={}, query={},
+            expected_status=None):
+        """
+        Return the JSON object as a dict.
+
+        See _download_webpage docstring for arguments specification.
+        """
+        res = self._download_socket_json_handle(
+            url_or_request, video_id, note=note, errnote=errnote,
+            transform_source=transform_source, fatal=fatal, encoding=encoding,
+            data=data, headers=headers, query=query,
+            expected_status=expected_status)
+        return res if res is False else res[0]
+
+    def report_warning(self, msg, video_id=None, *args, **kwargs):
          idstr = '' if video_id is None else '%s: ' % video_id
          self._downloader.report_warning(
-            '[%s] %s%s' % (self.IE_NAME, idstr, msg))
+            '[%s] %s%s' % (self.IE_NAME, idstr, msg), *args, **kwargs)
  
-    def to_screen(self, msg):
+    def to_screen(self, msg, *args, **kwargs):
          """Print msg to screen, prefixing it with '[ie_name]'"""
-        self._downloader.to_screen('[%s] %s' % (self.IE_NAME, msg))
+        self._downloader.to_screen('[%s] %s' % (self.IE_NAME, msg), *args, **kwargs)
+
+    def write_debug(self, msg, *args, **kwargs):
+        self._downloader.write_debug('[%s] %s' % (self.IE_NAME, msg), *args, **kwargs)
+
+    def get_param(self, name, default=None, *args, **kwargs):
+        if self._downloader:
+            return self._downloader.params.get(name, default, *args, **kwargs)
+        return default
  
      def report_extraction(self, id_or_name):
          """Report information extraction."""
@@ -963,15 +1032,28 @@ def report_login(self):
          """Report attempt to log in."""
          self.to_screen('Logging in')
  
-    @staticmethod
-    def raise_login_required(msg='This video is only available for registered users'):
-        raise ExtractorError(
-            '%s. Use --username and --password or --netrc to provide account credentials.' % msg,
-            expected=True)
+    def raise_login_required(
+            self, msg='This video is only available for registered users',
+            metadata_available=False, method='any'):
+        if metadata_available and self.get_param('ignore_no_formats_error'):
+            self.report_warning(msg)
+        if method is not None:
+            msg = '%s. %s' % (msg, self._LOGIN_HINTS[method])
+        raise ExtractorError(msg, expected=True)
+
+    def raise_geo_restricted(
+            self, msg='This video is not available from your location due to geo restriction',
+            countries=None, metadata_available=False):
+        if metadata_available and self.get_param('ignore_no_formats_error'):
+            self.report_warning(msg)
+        else:
+            raise GeoRestrictedError(msg, countries=countries)
  
-    @staticmethod
-    def raise_geo_restricted(msg='This video is not available from your location due to geo restriction', countries=None):
-        raise GeoRestrictedError(msg, countries=countries)
+    def raise_no_formats(self, msg, expected=False, video_id=None):
+        if expected and self.get_param('ignore_no_formats_error'):
+            self.report_warning(msg, video_id)
+        else:
+            raise ExtractorError(msg, expected=expected, video_id=video_id)
  
      # Methods for following #608
      @staticmethod
@@ -1023,7 +1105,7 @@ def _search_regex(self, pattern, string, name, default=NO_DEFAULT, fatal=True, f
                  if mobj:
                      break
  
-        if not self._downloader.params.get('no_color') and compat_os_name != 'nt' and sys.stderr.isatty():
+        if not self.get_param('no_color') and compat_os_name != 'nt' and sys.stderr.isatty():
              _name = '\033[0;34m%s\033[0m' % name
          else:
              _name = name
@@ -1032,6 +1114,8 @@ def _search_regex(self, pattern, string, name, default=NO_DEFAULT, fatal=True, f
              if group is None:
                  # return the first matching group
                  return next(g for g in mobj.groups() if g is not None)
+            elif isinstance(group, (list, tuple)):
+                return tuple(mobj.group(g) for g in group)
              else:
                  return mobj.group(group)
          elif default is not NO_DEFAULT:
@@ -1039,7 +1123,7 @@ def _search_regex(self, pattern, string, name, default=NO_DEFAULT, fatal=True, f
          elif fatal:
              raise RegexNotFoundError('Unable to extract %s' % _name)
          else:
-            self._downloader.report_warning('unable to extract %s' % _name + bug_reports_message())
+            self.report_warning('unable to extract %s' % _name + bug_reports_message())
              return None
  
      def _html_search_regex(self, pattern, string, name, default=NO_DEFAULT, fatal=True, flags=0, group=None):
@@ -1057,7 +1141,7 @@ def _get_netrc_login_info(self, netrc_machine=None):
          password = None
          netrc_machine = netrc_machine or self._NETRC_MACHINE
  
-        if self._downloader.params.get('usenetrc', False):
+        if self.get_param('usenetrc', False):
              try:
                  info = netrc.netrc().authenticators(netrc_machine)
                  if info is not None:
@@ -1067,7 +1151,7 @@ def _get_netrc_login_info(self, netrc_machine=None):
                      raise netrc.NetrcParseError(
                          'No authenticators for %s' % netrc_machine)
              except (IOError, netrc.NetrcParseError) as err:
-                self._downloader.report_warning(
+                self.report_warning(
                      'parsing .netrc: %s' % error_to_compat_str(err))
  
          return username, password
@@ -1081,15 +1165,11 @@ def _get_login_info(self, username_option='username', password_option='password'
          value.
          If there's no info available, return (None, None)
          """
-        if self._downloader is None:
-            return (None, None)
-
-        downloader_params = self._downloader.params
  
          # Attempt to use provided username and password or .netrc data
-        if downloader_params.get(username_option) is not None:
-            username = downloader_params[username_option]
-            password = downloader_params[password_option]
+        username = self.get_param(username_option)
+        if username is not None:
+            password = self.get_param(password_option)
          else:
              username, password = self._get_netrc_login_info(netrc_machine)
  
@@ -1102,12 +1182,10 @@ def _get_tfa_info(self, note='two-factor verification code'):
          currently just uses the command line option
          If there's no info available, return None
          """
-        if self._downloader is None:
-            return None
-        downloader_params = self._downloader.params
  
-        if downloader_params.get('twofactor') is not None:
-            return downloader_params['twofactor']
+        tfa = self.get_param('twofactor')
+        if tfa is not None:
+            return tfa
  
          return compat_getpass('Type %s and press [Return]: ' % note)
  
@@ -1130,8 +1208,7 @@ def _meta_regex(prop):
                      [^>]+?content=(["\'])(?P<content>.*?)\2''' % re.escape(prop)
  
      def _og_search_property(self, prop, html, name=None, **kargs):
-        if not isinstance(prop, (list, tuple)):
-            prop = [prop]
+        prop = variadic(prop)
          if name is None:
              name = 'OpenGraph %s' % prop[0]
          og_regexes = []
@@ -1161,8 +1238,7 @@ def _og_search_url(self, html, **kargs):
          return self._og_search_property('url', html, **kargs)
  
      def _html_search_meta(self, name, html, display_name=None, fatal=False, **kwargs):
-        if not isinstance(name, (list, tuple)):
-            name = [name]
+        name = variadic(name)
          if display_name is None:
              display_name = name[0]
          return self._html_search_regex(
@@ -1222,7 +1298,7 @@ def _search_json_ld(self, html, video_id, expected_type=None, **kwargs):
          # JSON-LD may be malformed and thus `fatal` should be respected.
          # At the same time `default` may be passed that assumes `fatal=False`
          # for _search_regex. Let's simulate the same behavior here as well.
-        fatal = kwargs.get('fatal', True) if default == NO_DEFAULT else False
+        fatal = kwargs.get('fatal', True) if default is NO_DEFAULT else False
          json_ld = []
          for mobj in json_ld_list:
              json_ld_item = self._parse_json(
@@ -1242,7 +1318,7 @@ def _search_json_ld(self, html, video_id, expected_type=None, **kwargs):
          elif fatal:
              raise RegexNotFoundError('Unable to extract JSON-LD')
          else:
-            self._downloader.report_warning('unable to extract JSON-LD %s' % bug_reports_message())
+            self.report_warning('unable to extract JSON-LD %s' % bug_reports_message())
              return {}
  
      def _json_ld(self, json_ld, video_id, fatal=True, expected_type=None):
@@ -1303,6 +1379,7 @@ def extract_interaction_statistic(e):
  
          def extract_video_object(e):
              assert e['@type'] == 'VideoObject'
+            author = e.get('author')
              info.update({
                  'url': url_or_none(e.get('contentUrl')),
                  'title': unescapeHTML(e.get('name')),
@@ -1310,7 +1387,11 @@ def extract_video_object(e):
                  'thumbnail': url_or_none(e.get('thumbnailUrl') or e.get('thumbnailURL')),
                  'duration': parse_duration(e.get('duration')),
                  'timestamp': unified_timestamp(e.get('uploadDate')),
-                'uploader': str_or_none(e.get('author')),
+                # author can be an instance of 'Organization' or 'Person' types.
+                # both types can have 'name' property(inherited from 'Thing' type). [1]
+                # however some websites are using 'Text' type instead.
+                # 1. https://schema.org/VideoObject
+                'uploader': author.get('name') if isinstance(author, dict) else author if isinstance(author, compat_str) else None,
                  'filesize': float_or_none(e.get('contentSize')),
                  'tbr': int_or_none(e.get('bitrate')),
                  'width': int_or_none(e.get('width')),
@@ -1393,11 +1474,14 @@ def _form_hidden_inputs(self, form_id, html):
          return self._hidden_inputs(form)
  
      class FormatSort:
-        regex = r' *((?P<reverse>\+)?(?P<field>[a-zA-Z0-9_]+)((?P<seperator>[~:])(?P<limit>.*?))?)? *$'
+        regex = r' *((?P<reverse>\+)?(?P<field>[a-zA-Z0-9_]+)((?P<separator>[~:])(?P<limit>.*?))?)? *$'
  
-        default = ('hidden', 'hasvid', 'ie_pref', 'lang', 'quality',
+        default = ('hidden', 'aud_or_vid', 'hasvid', 'ie_pref', 'lang', 'quality',
                     'res', 'fps', 'codec:vp9.2', 'size', 'br', 'asr',
-                   'proto', 'ext', 'has_audio', 'source', 'format_id')  # These must not be aliases
+                   'proto', 'ext', 'hasaud', 'source', 'format_id')  # These must not be aliases
+        ytdl_default = ('hasaud', 'quality', 'tbr', 'filesize', 'vbr',
+                        'height', 'width', 'proto', 'vext', 'abr', 'aext',
+                        'fps', 'fs_approx', 'source', 'format_id')
  
          settings = {
              'vcodec': {'type': 'ordered', 'regex': True,
@@ -1405,7 +1489,7 @@ class FormatSort:
              'acodec': {'type': 'ordered', 'regex': True,
                         'order': ['opus', 'vorbis', 'aac', 'mp?4a?', 'mp3', 'e?a?c-?3', 'dts', '', None, 'none']},
              'proto': {'type': 'ordered', 'regex': True, 'field': 'protocol',
-                      'order': ['(ht|f)tps', '(ht|f)tp$', 'm3u8.+', 'm3u8', '.*dash', '', 'mms|rtsp', 'none', 'f4']},
+                      'order': ['(ht|f)tps', '(ht|f)tp$', 'm3u8.+', '.*dash', 'ws|websocket', '', 'mms|rtsp', 'none', 'f4']},
              'vext': {'type': 'ordered', 'field': 'video_ext',
                       'order': ('mp4', 'webm', 'flv', '', 'none'),
                       'order_free': ('webm', 'mp4', 'flv', '', 'none')},
@@ -1413,11 +1497,14 @@ class FormatSort:
                       'order': ('m4a', 'aac', 'mp3', 'ogg', 'opus', 'webm', '', 'none'),
                       'order_free': ('opus', 'ogg', 'webm', 'm4a', 'mp3', 'aac', '', 'none')},
              'hidden': {'visible': False, 'forced': True, 'type': 'extractor', 'max': -1000},
+            'aud_or_vid': {'visible': False, 'forced': True, 'type': 'multiple',
+                           'field': ('vcodec', 'acodec'),
+                           'function': lambda it: int(any(v != 'none' for v in it))},
              'ie_pref': {'priority': True, 'type': 'extractor'},
              'hasvid': {'priority': True, 'field': 'vcodec', 'type': 'boolean', 'not_in_list': ('none',)},
              'hasaud': {'field': 'acodec', 'type': 'boolean', 'not_in_list': ('none',)},
-            'lang': {'priority': True, 'convert': 'ignore', 'type': 'extractor', 'field': 'language_preference'},
-            'quality': {'convert': 'float_none', 'type': 'extractor'},
+            'lang': {'priority': True, 'convert': 'ignore', 'field': 'language_preference'},
+            'quality': {'convert': 'float_none', 'default': -1},
              'filesize': {'convert': 'bytes'},
              'fs_approx': {'convert': 'bytes', 'field': 'filesize_approx'},
              'id': {'convert': 'string', 'field': 'format_id'},
@@ -1428,13 +1515,14 @@ class FormatSort:
              'vbr': {'convert': 'float_none'},
              'abr': {'convert': 'float_none'},
              'asr': {'convert': 'float_none'},
-            'source': {'convert': 'ignore', 'type': 'extractor', 'field': 'source_preference'},
+            'source': {'convert': 'ignore', 'field': 'source_preference'},
  
              'codec': {'type': 'combined', 'field': ('vcodec', 'acodec')},
              'br': {'type': 'combined', 'field': ('tbr', 'vbr', 'abr'), 'same_limit': True},
              'size': {'type': 'combined', 'same_limit': True, 'field': ('filesize', 'fs_approx')},
              'ext': {'type': 'combined', 'field': ('vext', 'aext')},
-            'res': {'type': 'multiple', 'field': ('height', 'width'), 'function': min},
+            'res': {'type': 'multiple', 'field': ('height', 'width'),
+                    'function': lambda it: (lambda l: min(l) if l else 0)(tuple(filter(None, it)))},
  
              # Most of these exist only for compatibility reasons
              'dimension': {'type': 'alias', 'field': 'res'},
@@ -1478,7 +1566,7 @@ def _get_field_setting(self, field, key):
                  elif key == 'convert':
                      default = 'order' if type == 'ordered' else 'float_string' if field else 'ignore'
                  else:
-                    default = {'type': 'field', 'visible': True, 'order': [], 'not_in_list': (None,), 'function': max}.get(key, None)
+                    default = {'type': 'field', 'visible': True, 'order': [], 'not_in_list': (None,)}.get(key, None)
                  propObj[key] = default
              return propObj[key]
  
@@ -1553,7 +1641,7 @@ def add_item(field, reverse, closest, limit_text):
                  if self._get_field_setting(field, 'type') == 'alias':
                      field = self._get_field_setting(field, 'field')
                  reverse = match.group('reverse') is not None
-                closest = match.group('seperator') == '~'
+                closest = match.group('separator') == '~'
                  limit_text = match.group('limit')
  
                  has_limit = limit_text is not None
@@ -1569,11 +1657,12 @@ def add_item(field, reverse, closest, limit_text):
                               else limits[0] if has_limit and not has_multiple_limits
                               else None)
  
-        def print_verbose_info(self, to_screen):
-            to_screen('[debug] Sort order given by user: %s' % ','.join(self._sort_user))
+        def print_verbose_info(self, write_debug):
+            if self._sort_user:
+                write_debug('Sort order given by user: %s' % ', '.join(self._sort_user))
              if self._sort_extractor:
-                to_screen('[debug] Sort order given by extractor: %s' % ', '.join(self._sort_extractor))
-            to_screen('[debug] Formats sorted by: %s' % ', '.join(['%s%s%s' % (
+                write_debug('Sort order given by extractor: %s' % ', '.join(self._sort_extractor))
+            write_debug('Formats sorted by: %s' % ', '.join(['%s%s%s' % (
                  '+' if self._get_field_setting(field, 'reverse') else '', field,
                  '%s%s(%s)' % ('~' if self._get_field_setting(field, 'closest') else ':',
                                self._get_field_setting(field, 'limit_text'),
@@ -1598,7 +1687,7 @@ def _calculate_field_preference_from_value(self, format, field, type, value):
                  value = self._resolve_field_value(field, value, True)
  
              # try to convert to number
-            val_num = float_or_none(value)
+            val_num = float_or_none(value, default=self._get_field_setting(field, 'default'))
              is_num = self._get_field_setting(field, 'convert') != 'string' and val_num is not None
              if is_num:
                  value = val_num
@@ -1617,13 +1706,7 @@ def _calculate_field_preference(self, format, field):
                  type = 'field'  # Only 'field' is allowed in multiple for now
                  actual_fields = self._get_field_setting(field, 'field')
  
-                def wrapped_function(values):
-                    values = tuple(filter(lambda x: x is not None, values))
-                    return (self._get_field_setting(field, 'function')(*values) if len(values) > 1
-                            else values[0] if values
-                            else None)
-
-                value = wrapped_function((get_value(f) for f in actual_fields))
+                value = self._get_field_setting(field, 'function')(get_value(f) for f in actual_fields)
              else:
                  value = get_value(field)
              return self._calculate_field_preference_from_value(format, field, type, value)
@@ -1637,7 +1720,7 @@ def calculate_preference(self, format):
              if not format.get('ext') and 'url' in format:
                  format['ext'] = determine_ext(format['url'])
              if format.get('vcodec') == 'none':
-                format['audio_ext'] = format['ext']
+                format['audio_ext'] = format['ext'] if format.get('acodec') != 'none' else 'none'
                  format['video_ext'] = 'none'
              else:
                  format['video_ext'] = format['ext']
@@ -1659,11 +1742,13 @@ def calculate_preference(self, format):
  
      def _sort_formats(self, formats, field_preference=[]):
          if not formats:
+            if self.get_param('ignore_no_formats_error'):
+                return
              raise ExtractorError('No video formats found')
          format_sort = self.FormatSort()  # params and to_screen are taken from the downloader
          format_sort.evaluate_params(self._downloader.params, field_preference)
-        if self._downloader.params.get('verbose', False):
-            format_sort.print_verbose_info(self._downloader.to_screen)
+        if self.get_param('verbose', False):
+            format_sort.print_verbose_info(self._downloader.write_debug)
          formats.sort(key=lambda f: format_sort.calculate_preference(f))
  
      def _check_formats(self, formats, video_id):
@@ -1702,7 +1787,7 @@ def http_scheme(self):
          """ Either "http:" or "https:", depending on the user's preferences """
          return (
              'http:'
-            if self._downloader.params.get('prefer_insecure', False)
+            if self.get_param('prefer_insecure', False)
              else 'https:')
  
      def _proto_relative_url(self, url, scheme=None):
@@ -1854,47 +1939,71 @@ def _m3u8_meta_format(self, m3u8_url, ext=None, preference=None, quality=None, m
              'format_note': 'Quality selection URL',
          }
  
-    def _extract_m3u8_formats(self, m3u8_url, video_id, ext=None,
-                              entry_protocol='m3u8', preference=None, quality=None,
-                              m3u8_id=None, note=None, errnote=None,
-                              fatal=True, live=False, data=None, headers={},
-                              query={}):
+    def _extract_m3u8_formats(self, *args, **kwargs):
+        fmts, subs = self._extract_m3u8_formats_and_subtitles(*args, **kwargs)
+        if subs:
+            self.report_warning(bug_reports_message(
+                "Ignoring subtitle tracks found in the HLS manifest; "
+                "if any subtitle tracks are missing,"
+            ))
+        return fmts
+
+    def _extract_m3u8_formats_and_subtitles(
+            self, m3u8_url, video_id, ext=None, entry_protocol='m3u8_native',
+            preference=None, quality=None, m3u8_id=None, note=None,
+            errnote=None, fatal=True, live=False, data=None, headers={},
+            query={}):
+
          res = self._download_webpage_handle(
              m3u8_url, video_id,
-            note=note or 'Downloading m3u8 information',
-            errnote=errnote or 'Failed to download m3u8 information',
+            note='Downloading m3u8 information' if note is None else note,
+            errnote='Failed to download m3u8 information' if errnote is None else errnote,
              fatal=fatal, data=data, headers=headers, query=query)
  
          if res is False:
-            return []
+            return [], {}
  
          m3u8_doc, urlh = res
          m3u8_url = urlh.geturl()
  
-        return self._parse_m3u8_formats(
+        return self._parse_m3u8_formats_and_subtitles(
              m3u8_doc, m3u8_url, ext=ext, entry_protocol=entry_protocol,
              preference=preference, quality=quality, m3u8_id=m3u8_id,
              note=note, errnote=errnote, fatal=fatal, live=live, data=data,
              headers=headers, query=query, video_id=video_id)
  
-    def _parse_m3u8_formats(self, m3u8_doc, m3u8_url, ext=None,
-                            entry_protocol='m3u8', preference=None, quality=None,
-                            m3u8_id=None, live=False, note=None, errnote=None,
-                            fatal=True, data=None, headers={}, query={}, video_id=None):
-        if '#EXT-X-FAXS-CM:' in m3u8_doc:  # Adobe Flash Access
-            return []
-
-        if re.search(r'#EXT-X-SESSION-KEY:.*?URI="skd://', m3u8_doc):  # Apple FairPlay
-            return []
-
-        formats = []
+    def _parse_m3u8_formats_and_subtitles(
+            self, m3u8_doc, m3u8_url, ext=None, entry_protocol='m3u8_native',
+            preference=None, quality=None, m3u8_id=None, live=False, note=None,
+            errnote=None, fatal=True, data=None, headers={}, query={},
+            video_id=None):
+        formats, subtitles = [], {}
  
-        format_url = lambda u: (
-            u
-            if re.match(r'^https?://', u)
-            else compat_urlparse.urljoin(m3u8_url, u))
+        if '#EXT-X-FAXS-CM:' in m3u8_doc:  # Adobe Flash Access
+            return formats, subtitles
+
+        if (not self.get_param('allow_unplayable_formats')
+                and re.search(r'#EXT-X-SESSION-KEY:.*?URI="skd://', m3u8_doc)):  # Apple FairPlay
+            return formats, subtitles
+
+        def format_url(url):
+            return url if re.match(r'^https?://', url) else compat_urlparse.urljoin(m3u8_url, url)
+
+        if self.get_param('hls_split_discontinuity', False):
+            def _extract_m3u8_playlist_indices(manifest_url=None, m3u8_doc=None):
+                if not m3u8_doc:
+                    if not manifest_url:
+                        return []
+                    m3u8_doc = self._download_webpage(
+                        manifest_url, video_id, fatal=fatal, data=data, headers=headers,
+                        note=False, errnote='Failed to download m3u8 playlist information')
+                    if m3u8_doc is False:
+                        return []
+                return range(1 + sum(line.startswith('#EXT-X-DISCONTINUITY') for line in m3u8_doc.splitlines()))
  
-        split_discontinuity = self._downloader.params.get('hls_split_discontinuity', False)
+        else:
+            def _extract_m3u8_playlist_indices(*args, **kwargs):
+                return [None]
  
          # References:
          # 1. https://tools.ietf.org/html/draft-pantos-http-live-streaming-21
@@ -1912,70 +2021,18 @@ def _parse_m3u8_formats(self, m3u8_doc, m3u8_url, ext=None,
          # media playlist and MUST NOT appear in master playlist thus we can
          # clearly detect media playlist with this criterion.
  
-        def _extract_m3u8_playlist_formats(format_url=None, m3u8_doc=None, video_id=None,
-                                           fatal=True, data=None, headers={}):
-            if not m3u8_doc:
-                if not format_url:
-                    return []
-                res = self._download_webpage_handle(
-                    format_url, video_id,
-                    note=False,
-                    errnote='Failed to download m3u8 playlist information',
-                    fatal=fatal, data=data, headers=headers)
-
-                if res is False:
-                    return []
-
-                m3u8_doc, urlh = res
-                format_url = urlh.geturl()
-
-            playlist_formats = []
-            i = (
-                0
-                if split_discontinuity
-                else None)
-            format_info = {
-                'index': i,
-                'key_data': None,
-                'files': [],
-            }
-            for line in m3u8_doc.splitlines():
-                if not line.startswith('#'):
-                    format_info['files'].append(line)
-                elif split_discontinuity and line.startswith('#EXT-X-DISCONTINUITY'):
-                    i += 1
-                    playlist_formats.append(format_info)
-                    format_info = {
-                        'index': i,
-                        'url': format_url,
-                        'files': [],
-                    }
-            playlist_formats.append(format_info)
-            return playlist_formats
-
          if '#EXT-X-TARGETDURATION' in m3u8_doc:  # media playlist, return as is
+            formats = [{
+                'format_id': '-'.join(map(str, filter(None, [m3u8_id, idx]))),
+                'format_index': idx,
+                'url': m3u8_url,
+                'ext': ext,
+                'protocol': entry_protocol,
+                'preference': preference,
+                'quality': quality,
+            } for idx in _extract_m3u8_playlist_indices(m3u8_doc=m3u8_doc)]
  
-            playlist_formats = _extract_m3u8_playlist_formats(m3u8_doc=m3u8_doc)
-
-            for format in playlist_formats:
-                format_id = []
-                if m3u8_id:
-                    format_id.append(m3u8_id)
-                format_index = format.get('index')
-                if format_index:
-                    format_id.append(str(format_index))
-                f = {
-                    'format_id': '-'.join(format_id),
-                    'format_index': format_index,
-                    'url': m3u8_url,
-                    'ext': ext,
-                    'protocol': entry_protocol,
-                    'preference': preference,
-                    'quality': quality,
-                }
-                formats.append(f)
-
-            return formats
+            return formats, subtitles
  
          groups = {}
          last_stream_inf = {}
@@ -1987,36 +2044,45 @@ def extract_media(x_media_line):
              if not (media_type and group_id and name):
                  return
              groups.setdefault(group_id, []).append(media)
+            # <https://tools.ietf.org/html/rfc8216#section-4.3.4.1>
+            if media_type == 'SUBTITLES':
+                # According to RFC 8216 §4.3.4.2.1, URI is REQUIRED in the
+                # EXT-X-MEDIA tag if the media type is SUBTITLES.
+                # However, lack of URI has been spotted in the wild.
+                # e.g. NebulaIE; see https://github.com/yt-dlp/yt-dlp/issues/339
+                if not media.get('URI'):
+                    return
+                url = format_url(media['URI'])
+                sub_info = {
+                    'url': url,
+                    'ext': determine_ext(url),
+                }
+                if sub_info['ext'] == 'm3u8':
+                    # Per RFC 8216 §3.1, the only possible subtitle format m3u8
+                    # files may contain is WebVTT:
+                    # <https://tools.ietf.org/html/rfc8216#section-3.1>
+                    sub_info['ext'] = 'vtt'
+                    sub_info['protocol'] = 'm3u8_native'
+                lang = media.get('LANGUAGE') or 'und'
+                subtitles.setdefault(lang, []).append(sub_info)
              if media_type not in ('VIDEO', 'AUDIO'):
                  return
              media_url = media.get('URI')
              if media_url:
                  manifest_url = format_url(media_url)
-                format_id = []
-                playlist_formats = _extract_m3u8_playlist_formats(manifest_url, video_id=video_id,
-                                                                  fatal=fatal, data=data, headers=headers)
-
-                for format in playlist_formats:
-                    format_index = format.get('index')
-                    for v in (m3u8_id, group_id, name):
-                        if v:
-                            format_id.append(v)
-                    if format_index:
-                        format_id.append(str(format_index))
-                    f = {
-                        'format_id': '-'.join(format_id),
-                        'format_index': format_index,
-                        'url': manifest_url,
-                        'manifest_url': m3u8_url,
-                        'language': media.get('LANGUAGE'),
-                        'ext': ext,
-                        'protocol': entry_protocol,
-                        'preference': preference,
-                        'quality': quality,
-                    }
-                    if media_type == 'AUDIO':
-                        f['vcodec'] = 'none'
-                    formats.append(f)
+                formats.extend({
+                    'format_id': '-'.join(map(str, filter(None, (m3u8_id, group_id, name, idx)))),
+                    'format_note': name,
+                    'format_index': idx,
+                    'url': manifest_url,
+                    'manifest_url': m3u8_url,
+                    'language': media.get('LANGUAGE'),
+                    'ext': ext,
+                    'protocol': entry_protocol,
+                    'preference': preference,
+                    'quality': quality,
+                    'vcodec': 'none' if media_type == 'AUDIO' else None,
+                } for idx in _extract_m3u8_playlist_indices(manifest_url))
  
          def build_stream_name():
              # Despite specification does not mention NAME attribute for
@@ -2055,25 +2121,17 @@ def build_stream_name():
                      or last_stream_inf.get('BANDWIDTH'), scale=1000)
                  manifest_url = format_url(line.strip())
  
-                playlist_formats = _extract_m3u8_playlist_formats(manifest_url, video_id=video_id,
-                                                                  fatal=fatal, data=data, headers=headers)
-
-                for frmt in playlist_formats:
-                    format_id = []
-                    if m3u8_id:
-                        format_id.append(m3u8_id)
-                    format_index = frmt.get('index')
-                    stream_name = build_stream_name()
+                for idx in _extract_m3u8_playlist_indices(manifest_url):
+                    format_id = [m3u8_id, None, idx]
                      # Bandwidth of live streams may differ over time thus making
                      # format_id unpredictable. So it's better to keep provided
                      # format_id intact.
                      if not live:
-                        format_id.append(stream_name if stream_name else '%d' % (tbr if tbr else len(formats)))
-                    if format_index:
-                        format_id.append(str(format_index))
+                        stream_name = build_stream_name()
+                        format_id[1] = stream_name if stream_name else '%d' % (tbr if tbr else len(formats))
                      f = {
-                        'format_id': '-'.join(format_id),
-                        'format_index': format_index,
+                        'format_id': '-'.join(map(str, filter(None, format_id))),
+                        'format_index': idx,
                          'url': manifest_url,
                          'manifest_url': m3u8_url,
                          'tbr': tbr,
@@ -2134,7 +2192,7 @@ def build_stream_name():
                          formats.append(http_f)
  
                  last_stream_inf = {}
-        return formats
+        return formats, subtitles
  
      @staticmethod
      def _xpath_ns(path, namespace=None):
@@ -2148,7 +2206,7 @@ def _xpath_ns(path, namespace=None):
                  out.append('{%s}%s' % (namespace, c))
          return '/'.join(out)
  
-    def _extract_smil_formats(self, smil_url, video_id, fatal=True, f4m_params=None, transform_source=None):
+    def _extract_smil_formats_and_subtitles(self, smil_url, video_id, fatal=True, f4m_params=None, transform_source=None):
          smil = self._download_smil(smil_url, video_id, fatal=fatal, transform_source=transform_source)
  
          if smil is False:
@@ -2157,8 +2215,21 @@ def _extract_smil_formats(self, smil_url, video_id, fatal=True, f4m_params=None,
  
          namespace = self._parse_smil_namespace(smil)
  
-        return self._parse_smil_formats(
+        fmts = self._parse_smil_formats(
              smil, smil_url, video_id, namespace=namespace, f4m_params=f4m_params)
+        subs = self._parse_smil_subtitles(
+            smil, namespace=namespace)
+
+        return fmts, subs
+
+    def _extract_smil_formats(self, *args, **kwargs):
+        fmts, subs = self._extract_smil_formats_and_subtitles(*args, **kwargs)
+        if subs:
+            self.report_warning(bug_reports_message(
+                "Ignoring subtitle tracks found in the SMIL manifest; "
+                "if any subtitle tracks are missing,"
+            ))
+        return fmts
  
      def _extract_smil_info(self, smil_url, video_id, fatal=True, f4m_params=None):
          smil = self._download_smil(smil_url, video_id, fatal=fatal)
@@ -2377,23 +2448,44 @@ def _parse_xspf(self, xspf_doc, playlist_id, xspf_url=None, xspf_base_url=None):
              })
          return entries
  
-    def _extract_mpd_formats(self, mpd_url, video_id, mpd_id=None, note=None, errnote=None, fatal=True, data=None, headers={}, query={}):
+    def _extract_mpd_formats(self, *args, **kwargs):
+        fmts, subs = self._extract_mpd_formats_and_subtitles(*args, **kwargs)
+        if subs:
+            self.report_warning(bug_reports_message(
+                "Ignoring subtitle tracks found in the DASH manifest; "
+                "if any subtitle tracks are missing,"
+            ))
+        return fmts
+
+    def _extract_mpd_formats_and_subtitles(
+            self, mpd_url, video_id, mpd_id=None, note=None, errnote=None,
+            fatal=True, data=None, headers={}, query={}):
          res = self._download_xml_handle(
              mpd_url, video_id,
-            note=note or 'Downloading MPD manifest',
-            errnote=errnote or 'Failed to download MPD manifest',
+            note='Downloading MPD manifest' if note is None else note,
+            errnote='Failed to download MPD manifest' if errnote is None else errnote,
              fatal=fatal, data=data, headers=headers, query=query)
          if res is False:
-            return []
+            return [], {}
          mpd_doc, urlh = res
          if mpd_doc is None:
-            return []
+            return [], {}
          mpd_base_url = base_url(urlh.geturl())
  
-        return self._parse_mpd_formats(
+        return self._parse_mpd_formats_and_subtitles(
              mpd_doc, mpd_id, mpd_base_url, mpd_url)
  
-    def _parse_mpd_formats(self, mpd_doc, mpd_id=None, mpd_base_url='', mpd_url=None):
+    def _parse_mpd_formats(self, *args, **kwargs):
+        fmts, subs = self._parse_mpd_formats_and_subtitles(*args, **kwargs)
+        if subs:
+            self.report_warning(bug_reports_message(
+                "Ignoring subtitle tracks found in the DASH manifest; "
+                "if any subtitle tracks are missing,"
+            ))
+        return fmts
+
+    def _parse_mpd_formats_and_subtitles(
+            self, mpd_doc, mpd_id=None, mpd_base_url='', mpd_url=None):
          """
          Parse formats from MPD manifest.
          References:
@@ -2401,9 +2493,9 @@ def _parse_mpd_formats(self, mpd_doc, mpd_id=None, mpd_base_url='', mpd_url=None
              http://standards.iso.org/ittf/PubliclyAvailableStandards/c065274_ISO_IEC_23009-1_2014.zip
           2. https://en.wikipedia.org/wiki/Dynamic_Adaptive_Streaming_over_HTTP
          """
-        if not self._downloader.params.get('dynamic_mpd'):
+        if not self.get_param('dynamic_mpd', True):
              if mpd_doc.get('type') == 'dynamic':
-                return []
+                return [], {}
  
          namespace = self._search_regex(r'(?i)^{([^}]+)?}MPD$', mpd_doc.tag, 'namespace', default=None)
  
@@ -2471,10 +2563,11 @@ def extract_Initialization(source):
                          extract_Initialization(segment_template)
              return ms_info
  
-        skip_unplayable = not self._downloader.params.get('allow_unplayable_formats')
+        skip_unplayable = not self.get_param('allow_unplayable_formats')
  
          mpd_duration = parse_duration(mpd_doc.get('mediaPresentationDuration'))
          formats = []
+        subtitles = {}
          for period in mpd_doc.findall(_add_ns('Period')):
              period_duration = parse_duration(period.get('duration')) or mpd_duration
              period_ms_info = extract_multisegment_info(period, {
@@ -2492,11 +2585,9 @@ def extract_Initialization(source):
                      representation_attrib.update(representation.attrib)
                      # According to [1, 5.3.7.2, Table 9, page 41], @mimeType is mandatory
                      mime_type = representation_attrib['mimeType']
-                    content_type = mime_type.split('/')[0]
-                    if content_type == 'text':
-                        # TODO implement WebVTT downloading
-                        pass
-                    elif content_type in ('video', 'audio'):
+                    content_type = representation_attrib.get('contentType', mime_type.split('/')[0])
+
+                    if content_type in ('video', 'audio', 'text') or mime_type == 'image/jpeg':
                          base_url = ''
                          for element in (representation, adaptation_set, period, mpd_doc):
                              base_url_e = element.find(_add_ns('BaseURL'))
@@ -2513,21 +2604,45 @@ def extract_Initialization(source):
                          url_el = representation.find(_add_ns('BaseURL'))
                          filesize = int_or_none(url_el.attrib.get('{http://youtube.com/yt/2012/10/10}contentLength') if url_el is not None else None)
                          bandwidth = int_or_none(representation_attrib.get('bandwidth'))
-                        f = {
-                            'format_id': '%s-%s' % (mpd_id, representation_id) if mpd_id else representation_id,
-                            'manifest_url': mpd_url,
-                            'ext': mimetype2ext(mime_type),
-                            'width': int_or_none(representation_attrib.get('width')),
-                            'height': int_or_none(representation_attrib.get('height')),
-                            'tbr': float_or_none(bandwidth, 1000),
-                            'asr': int_or_none(representation_attrib.get('audioSamplingRate')),
-                            'fps': int_or_none(representation_attrib.get('frameRate')),
-                            'language': lang if lang not in ('mul', 'und', 'zxx', 'mis') else None,
-                            'format_note': 'DASH %s' % content_type,
-                            'filesize': filesize,
-                            'container': mimetype2ext(mime_type) + '_dash',
-                        }
-                        f.update(parse_codecs(representation_attrib.get('codecs')))
+                        if representation_id is not None:
+                            format_id = representation_id
+                        else:
+                            format_id = content_type
+                        if mpd_id:
+                            format_id = mpd_id + '-' + format_id
+                        if content_type in ('video', 'audio'):
+                            f = {
+                                'format_id': format_id,
+                                'manifest_url': mpd_url,
+                                'ext': mimetype2ext(mime_type),
+                                'width': int_or_none(representation_attrib.get('width')),
+                                'height': int_or_none(representation_attrib.get('height')),
+                                'tbr': float_or_none(bandwidth, 1000),
+                                'asr': int_or_none(representation_attrib.get('audioSamplingRate')),
+                                'fps': int_or_none(representation_attrib.get('frameRate')),
+                                'language': lang if lang not in ('mul', 'und', 'zxx', 'mis') else None,
+                                'format_note': 'DASH %s' % content_type,
+                                'filesize': filesize,
+                                'container': mimetype2ext(mime_type) + '_dash',
+                            }
+                            f.update(parse_codecs(representation_attrib.get('codecs')))
+                        elif content_type == 'text':
+                            f = {
+                                'ext': mimetype2ext(mime_type),
+                                'manifest_url': mpd_url,
+                                'filesize': filesize,
+                            }
+                        elif mime_type == 'image/jpeg':
+                            # See test case in VikiIE
+                            # https://www.viki.com/videos/1175236v-choosing-spouse-by-lottery-episode-1
+                            f = {
+                                'format_id': format_id,
+                                'ext': 'mhtml',
+                                'manifest_url': mpd_url,
+                                'format_note': 'DASH storyboards (jpeg)',
+                                'acodec': 'none',
+                                'vcodec': 'none',
+                            }
                          representation_ms_info = extract_multisegment_info(representation, adaption_set_ms_info)
  
                          def prepare_template(template_name, identifiers):
@@ -2546,7 +2661,8 @@ def prepare_template(template_name, identifiers):
                                      t += c
                              # Next, $...$ templates are translated to their
                              # %(...) counterparts to be used with % operator
-                            t = t.replace('$RepresentationID$', representation_id)
+                            if representation_id is not None:
+                                t = t.replace('$RepresentationID$', representation_id)
                              t = re.sub(r'\$(%s)\$' % '|'.join(identifiers), r'%(\1)d', t)
                              t = re.sub(r'\$(%s)%%([^$]+)\$' % '|'.join(identifiers), r'%(\1)\2', t)
                              t.replace('$$', '$')
@@ -2663,7 +2779,7 @@ def add_segment_url():
                                  'url': mpd_url or base_url,
                                  'fragment_base_url': base_url,
                                  'fragments': [],
-                                'protocol': 'http_dash_segments',
+                                'protocol': 'http_dash_segments' if mime_type != 'image/jpeg' else 'mhtml',
                              })
                              if 'initialization_url' in representation_ms_info:
                                  initialization_url = representation_ms_info['initialization_url']
@@ -2674,26 +2790,38 @@ def add_segment_url():
                          else:
                              # Assuming direct URL to unfragmented media.
                              f['url'] = base_url
-                        formats.append(f)
+                        if content_type in ('video', 'audio') or mime_type == 'image/jpeg':
+                            formats.append(f)
+                        elif content_type == 'text':
+                            subtitles.setdefault(lang or 'und', []).append(f)
                      else:
                          self.report_warning('Unknown MIME type %s in DASH manifest' % mime_type)
-        return formats
-
-    def _extract_ism_formats(self, ism_url, video_id, ism_id=None, note=None, errnote=None, fatal=True, data=None, headers={}, query={}):
+        return formats, subtitles
+
+    def _extract_ism_formats(self, *args, **kwargs):
+        fmts, subs = self._extract_ism_formats_and_subtitles(*args, **kwargs)
+        if subs:
+            self.report_warning(bug_reports_message(
+                "Ignoring subtitle tracks found in the ISM manifest; "
+                "if any subtitle tracks are missing,"
+            ))
+        return fmts
+
+    def _extract_ism_formats_and_subtitles(self, ism_url, video_id, ism_id=None, note=None, errnote=None, fatal=True, data=None, headers={}, query={}):
          res = self._download_xml_handle(
              ism_url, video_id,
-            note=note or 'Downloading ISM manifest',
-            errnote=errnote or 'Failed to download ISM manifest',
+            note='Downloading ISM manifest' if note is None else note,
+            errnote='Failed to download ISM manifest' if errnote is None else errnote,
              fatal=fatal, data=data, headers=headers, query=query)
          if res is False:
-            return []
+            return [], {}
          ism_doc, urlh = res
          if ism_doc is None:
-            return []
+            return [], {}
  
-        return self._parse_ism_formats(ism_doc, urlh.geturl(), ism_id)
+        return self._parse_ism_formats_and_subtitles(ism_doc, urlh.geturl(), ism_id)
  
-    def _parse_ism_formats(self, ism_doc, ism_url, ism_id=None):
+    def _parse_ism_formats_and_subtitles(self, ism_doc, ism_url, ism_id=None):
          """
          Parse formats from ISM manifest.
          References:
@@ -2701,26 +2829,28 @@ def _parse_ism_formats(self, ism_doc, ism_url, ism_id=None):
              https://msdn.microsoft.com/en-us/library/ff469518.aspx
          """
          if ism_doc.get('IsLive') == 'TRUE':
-            return []
-        if (not self._downloader.params.get('allow_unplayable_formats')
+            return [], {}
+        if (not self.get_param('allow_unplayable_formats')
                  and ism_doc.find('Protection') is not None):
-            return []
+            return [], {}
  
          duration = int(ism_doc.attrib['Duration'])
          timescale = int_or_none(ism_doc.get('TimeScale')) or 10000000
  
          formats = []
+        subtitles = {}
          for stream in ism_doc.findall('StreamIndex'):
              stream_type = stream.get('Type')
-            if stream_type not in ('video', 'audio'):
+            if stream_type not in ('video', 'audio', 'text'):
                  continue
              url_pattern = stream.attrib['Url']
              stream_timescale = int_or_none(stream.get('TimeScale')) or timescale
              stream_name = stream.get('Name')
+            stream_language = stream.get('Language', 'und')
              for track in stream.findall('QualityLevel'):
-                fourcc = track.get('FourCC', 'AACL' if track.get('AudioTag') == '255' else None)
+                fourcc = track.get('FourCC') or ('AACL' if track.get('AudioTag') == '255' else None)
                  # TODO: add support for WVC1 and WMAP
-                if fourcc not in ('H264', 'AVC1', 'AACL'):
+                if fourcc not in ('H264', 'AVC1', 'AACL', 'TTML'):
                      self.report_warning('%s is not a supported codec' % fourcc)
                      continue
                  tbr = int(track.attrib['Bitrate']) // 1000
@@ -2763,33 +2893,52 @@ def _parse_ism_formats(self, ism_doc, ism_url, ism_id=None):
                      format_id.append(stream_name)
                  format_id.append(compat_str(tbr))
  
-                formats.append({
-                    'format_id': '-'.join(format_id),
-                    'url': ism_url,
-                    'manifest_url': ism_url,
-                    'ext': 'ismv' if stream_type == 'video' else 'isma',
-                    'width': width,
-                    'height': height,
-                    'tbr': tbr,
-                    'asr': sampling_rate,
-                    'vcodec': 'none' if stream_type == 'audio' else fourcc,
-                    'acodec': 'none' if stream_type == 'video' else fourcc,
-                    'protocol': 'ism',
-                    'fragments': fragments,
-                    '_download_params': {
-                        'duration': duration,
-                        'timescale': stream_timescale,
-                        'width': width or 0,
-                        'height': height or 0,
-                        'fourcc': fourcc,
-                        'codec_private_data': track.get('CodecPrivateData'),
-                        'sampling_rate': sampling_rate,
-                        'channels': int_or_none(track.get('Channels', 2)),
-                        'bits_per_sample': int_or_none(track.get('BitsPerSample', 16)),
-                        'nal_unit_length_field': int_or_none(track.get('NALUnitLengthField', 4)),
-                    },
-                })
-        return formats
+                if stream_type == 'text':
+                    subtitles.setdefault(stream_language, []).append({
+                        'ext': 'ismt',
+                        'protocol': 'ism',
+                        'url': ism_url,
+                        'manifest_url': ism_url,
+                        'fragments': fragments,
+                        '_download_params': {
+                            'stream_type': stream_type,
+                            'duration': duration,
+                            'timescale': stream_timescale,
+                            'fourcc': fourcc,
+                            'language': stream_language,
+                            'codec_private_data': track.get('CodecPrivateData'),
+                        }
+                    })
+                elif stream_type in ('video', 'audio'):
+                    formats.append({
+                        'format_id': '-'.join(format_id),
+                        'url': ism_url,
+                        'manifest_url': ism_url,
+                        'ext': 'ismv' if stream_type == 'video' else 'isma',
+                        'width': width,
+                        'height': height,
+                        'tbr': tbr,
+                        'asr': sampling_rate,
+                        'vcodec': 'none' if stream_type == 'audio' else fourcc,
+                        'acodec': 'none' if stream_type == 'video' else fourcc,
+                        'protocol': 'ism',
+                        'fragments': fragments,
+                        '_download_params': {
+                            'stream_type': stream_type,
+                            'duration': duration,
+                            'timescale': stream_timescale,
+                            'width': width or 0,
+                            'height': height or 0,
+                            'fourcc': fourcc,
+                            'language': stream_language,
+                            'codec_private_data': track.get('CodecPrivateData'),
+                            'sampling_rate': sampling_rate,
+                            'channels': int_or_none(track.get('Channels', 2)),
+                            'bits_per_sample': int_or_none(track.get('BitsPerSample', 16)),
+                            'nal_unit_length_field': int_or_none(track.get('NALUnitLengthField', 4)),
+                        },
+                    })
+        return formats, subtitles
  
      def _parse_html5_media_entries(self, base_url, webpage, video_id, m3u8_id=None, m3u8_entry_protocol='m3u8', mpd_id=None, preference=None, quality=None):
          def absolute_url(item_url):
@@ -2914,7 +3063,16 @@ def _media_formats(src, cur_media_type, type_info={}):
                  entries.append(media_info)
          return entries
  
-    def _extract_akamai_formats(self, manifest_url, video_id, hosts={}):
+    def _extract_akamai_formats(self, *args, **kwargs):
+        fmts, subs = self._extract_akamai_formats_and_subtitles(*args, **kwargs)
+        if subs:
+            self.report_warning(bug_reports_message(
+                "Ignoring subtitle tracks found in the manifests; "
+                "if any subtitle tracks are missing,"
+            ))
+        return fmts
+
+    def _extract_akamai_formats_and_subtitles(self, manifest_url, video_id, hosts={}):
          signed = 'hdnea=' in manifest_url
          if not signed:
              # https://learn.akamai.com/en-us/webhelp/media-services-on-demand/stream-packaging-user-guide/GUID-BE6C0F73-1E06-483B-B0EA-57984B91B7F9.html
@@ -2923,6 +3081,7 @@ def _extract_akamai_formats(self, manifest_url, video_id, hosts={}):
                  '', manifest_url).strip('?')
  
          formats = []
+        subtitles = {}
  
          hdcore_sign = 'hdcore=3.7.0'
          f4m_url = re.sub(r'(https?://[^/]+)/i/', r'\1/z/', manifest_url).replace('/master.m3u8', '/manifest.f4m')
@@ -2941,10 +3100,11 @@ def _extract_akamai_formats(self, manifest_url, video_id, hosts={}):
          hls_host = hosts.get('hls')
          if hls_host:
              m3u8_url = re.sub(r'(https?://)[^/]+', r'\1' + hls_host, m3u8_url)
-        m3u8_formats = self._extract_m3u8_formats(
+        m3u8_formats, m3u8_subtitles = self._extract_m3u8_formats_and_subtitles(
              m3u8_url, video_id, 'mp4', 'm3u8_native',
              m3u8_id='hls', fatal=False)
          formats.extend(m3u8_formats)
+        subtitles = self._merge_subtitles(subtitles, m3u8_subtitles)
  
          http_host = hosts.get('http')
          if http_host and m3u8_formats and not signed:
@@ -2968,7 +3128,7 @@ def _extract_akamai_formats(self, manifest_url, video_id, hosts={}):
                              formats.append(http_f)
                          i += 1
  
-        return formats
+        return formats, subtitles
  
      def _extract_wowza_formats(self, url, video_id, m3u8_entry_protocol='m3u8_native', skip_protocols=[]):
          query = compat_urlparse.urlparse(url).query
@@ -3191,7 +3351,7 @@ def _int(self, v, name, fatal=False, **kwargs):
              if fatal:
                  raise ExtractorError(msg)
              else:
-                self._downloader.report_warning(msg)
+                self.report_warning(msg)
          return res
  
      def _float(self, v, name, fatal=False, **kwargs):
@@ -3201,7 +3361,7 @@ def _float(self, v, name, fatal=False, **kwargs):
              if fatal:
                  raise ExtractorError(msg)
              else:
-                self._downloader.report_warning(msg)
+                self.report_warning(msg)
          return res
  
      def _set_cookie(self, domain, name, value, expire_time=None, port=None,
@@ -3213,10 +3373,10 @@ def _set_cookie(self, domain, name, value, expire_time=None, port=None,
          self._downloader.cookiejar.set_cookie(cookie)
  
      def _get_cookies(self, url):
-        """ Return a compat_cookies.SimpleCookie with the cookies for the url """
+        """ Return a compat_cookies_SimpleCookie with the cookies for the url """
          req = sanitized_Request(url)
          self._downloader.cookiejar.add_cookie_header(req)
-        return compat_cookies.SimpleCookie(req.get_header('Cookie'))
+        return compat_cookies_SimpleCookie(req.get_header('Cookie'))
  
      def _apply_first_set_cookie_header(self, url_handle, cookie):
          """
@@ -3275,8 +3435,8 @@ def is_suitable(self, age_limit):
          return not any_restricted
  
      def extract_subtitles(self, *args, **kwargs):
-        if (self._downloader.params.get('writesubtitles', False)
-                or self._downloader.params.get('listsubtitles')):
+        if (self.get_param('writesubtitles', False)
+                or self.get_param('listsubtitles')):
              return self._get_subtitles(*args, **kwargs)
          return {}
  
@@ -3293,16 +3453,18 @@ def _merge_subtitle_items(subtitle_list1, subtitle_list2):
          return ret
  
      @classmethod
-    def _merge_subtitles(cls, subtitle_dict1, subtitle_dict2):
-        """ Merge two subtitle dictionaries, language by language. """
-        ret = dict(subtitle_dict1)
-        for lang in subtitle_dict2:
-            ret[lang] = cls._merge_subtitle_items(subtitle_dict1.get(lang, []), subtitle_dict2[lang])
-        return ret
+    def _merge_subtitles(cls, *dicts, target=None):
+        """ Merge subtitle dictionaries, language by language. """
+        if target is None:
+            target = {}
+        for d in dicts:
+            for lang, subs in d.items():
+                target[lang] = cls._merge_subtitle_items(target.get(lang, []), subs)
+        return target
  
      def extract_automatic_captions(self, *args, **kwargs):
-        if (self._downloader.params.get('writeautomaticsub', False)
-                or self._downloader.params.get('listsubtitles')):
+        if (self.get_param('writeautomaticsub', False)
+                or self.get_param('listsubtitles')):
              return self._get_automatic_captions(*args, **kwargs)
          return {}
  
@@ -3310,9 +3472,9 @@ def _get_automatic_captions(self, *args, **kwargs):
          raise NotImplementedError('This method must be implemented by subclasses')
  
      def mark_watched(self, *args, **kwargs):
-        if (self._downloader.params.get('mark_watched', False)
+        if (self.get_param('mark_watched', False)
                  and (self._get_login_info()[0] is not None
-                     or self._downloader.params.get('cookiefile') is not None)):
+                     or self.get_param('cookiefile') is not None)):
              self._mark_watched(*args, **kwargs)
  
      def _mark_watched(self, *args, **kwargs):
@@ -3320,7 +3482,7 @@ def _mark_watched(self, *args, **kwargs):
  
      def geo_verification_headers(self):
          headers = {}
-        geo_verification_proxy = self._downloader.params.get('geo_verification_proxy')
+        geo_verification_proxy = self.get_param('geo_verification_proxy')
          if geo_verification_proxy:
              headers['Ytdl-request-proxy'] = geo_verification_proxy
          return headers
@@ -3331,6 +3493,33 @@ def _generic_id(self, url):
      def _generic_title(self, url):
          return compat_urllib_parse_unquote(os.path.splitext(url_basename(url))[0])
  
+    @staticmethod
+    def _availability(is_private=None, needs_premium=None, needs_subscription=None, needs_auth=None, is_unlisted=None):
+        all_known = all(map(
+            lambda x: x is not None,
+            (is_private, needs_premium, needs_subscription, needs_auth, is_unlisted)))
+        return (
+            'private' if is_private
+            else 'premium_only' if needs_premium
+            else 'subscriber_only' if needs_subscription
+            else 'needs_auth' if needs_auth
+            else 'unlisted' if is_unlisted
+            else 'public' if all_known
+            else None)
+
+    def _configuration_arg(self, key, default=NO_DEFAULT, casesense=False):
+        '''
+        @returns            A list of values for the extractor argument given by "key"
+                            or "default" if no such key is present
+        @param default      The default value to return when the key is not present (default: [])
+        @param casesense    When false, the values are converted to lower case
+        '''
+        val = traverse_obj(
+            self._downloader.params, ('extractor_args', self.ie_key().lower(), key))
+        if val is None:
+            return [] if default is NO_DEFAULT else default
+        return list(val) if casesense else [x.lower() for x in val]
+
  
  class SearchInfoExtractor(InfoExtractor):
      """
@@ -3363,7 +3552,7 @@ def _real_extract(self, query):
              if n <= 0:
                  raise ExtractorError('invalid download number %s for query "%s"' % (n, query))
              elif n > self._MAX_RESULTS:
-                self._downloader.report_warning('%s returns max %i results (you requested %i)' % (self._SEARCH_KEY, self._MAX_RESULTS, n))
+                self.report_warning('%s returns max %i results (you requested %i)' % (self._SEARCH_KEY, self._MAX_RESULTS, n))
                  n = self._MAX_RESULTS
              return self._get_n_results(query, n)