Do not verify thumbnail URLs by default

[yt-dlp.git] / yt_dlp / extractor / common.py
diff --git a/yt_dlp/extractor/common.py b/yt_dlp/extractor/common.py

index b14cf0fc9ba75981dd8b9475044255dc41e14751..0a14f7c0d32b289a2797b54b9babfdb667dc5630 100644 (file)
--- a/yt_dlp/extractor/common.py
+++ b/yt_dlp/extractor/common.py
@@ -4,6 +4,7 @@
  import base64
  import datetime
  import hashlib
  import base64
  import datetime
  import hashlib
+import itertools
  import json
  import netrc
  import os
  import json
  import netrc
  import os
@@ -18,8 +19,8 @@
      compat_cookies_SimpleCookie,
      compat_etree_Element,
      compat_etree_fromstring,
      compat_cookies_SimpleCookie,
      compat_etree_Element,
      compat_etree_fromstring,
+    compat_expanduser,
      compat_getpass,
      compat_getpass,
-    compat_integer_types,
      compat_http_client,
      compat_os_name,
      compat_str,
      compat_http_client,
      compat_os_name,
      compat_str,
@@ -36,7 +37,6 @@
      remove_encrypted_media,
  )
  from ..utils import (
      remove_encrypted_media,
  )
  from ..utils import (
-    NO_DEFAULT,
      age_restricted,
      base_url,
      bug_reports_message,
      age_restricted,
      base_url,
      bug_reports_message,
@@ -46,10 +46,11 @@
      determine_protocol,
      dict_get,
      error_to_compat_str,
      determine_protocol,
      dict_get,
      error_to_compat_str,
-    ExtractorError,
      extract_attributes,
      extract_attributes,
+    ExtractorError,
      fix_xml_ampersands,
      float_or_none,
      fix_xml_ampersands,
      float_or_none,
+    format_field,
      GeoRestrictedError,
      GeoUtils,
      int_or_none,
      GeoRestrictedError,
      GeoUtils,
      int_or_none,
@@ -57,6 +58,7 @@
      JSON_LD_RE,
      mimetype2ext,
      network_exceptions,
      JSON_LD_RE,
      mimetype2ext,
      network_exceptions,
+    NO_DEFAULT,
      orderedSet,
      parse_bitrate,
      parse_codecs,
      orderedSet,
      parse_bitrate,
      parse_codecs,
@@ -65,19 +67,21 @@
      parse_m3u8_attributes,
      parse_resolution,
      RegexNotFoundError,
      parse_m3u8_attributes,
      parse_resolution,
      RegexNotFoundError,
-    sanitized_Request,
      sanitize_filename,
      sanitize_filename,
+    sanitized_Request,
      str_or_none,
      str_to_int,
      strip_or_none,
      str_or_none,
      str_to_int,
      strip_or_none,
+    traverse_obj,
      unescapeHTML,
      unified_strdate,
      unified_timestamp,
      update_Request,
      update_url_query,
      unescapeHTML,
      unified_strdate,
      unified_timestamp,
      update_Request,
      update_url_query,
-    urljoin,
      url_basename,
      url_or_none,
      url_basename,
      url_or_none,
+    urljoin,
+    variadic,
      xpath_element,
      xpath_text,
      xpath_with_ns,
      xpath_element,
      xpath_text,
      xpath_with_ns,
@@ -201,6 +205,7 @@ class InfoExtractor(object):
                                   width : height ratio as float.
                      * no_resume  The server does not support resuming the
                                   (HTTP or RTMP) download. Boolean.
                                   width : height ratio as float.
                      * no_resume  The server does not support resuming the
                                   (HTTP or RTMP) download. Boolean.
+                    * has_drm    The format has DRM and cannot be downloaded. Boolean
                      * downloader_options  A dictionary of downloader options as
                                   described in FileDownloader
                      RTMP formats can also have the additional fields: page_url,
                      * downloader_options  A dictionary of downloader options as
                                   described in FileDownloader
                      RTMP formats can also have the additional fields: page_url,
@@ -295,6 +300,8 @@ class InfoExtractor(object):
                      live stream that goes on instead of a fixed-length video.
      was_live:       True, False, or None (=unknown). Whether this video was
                      originally a live stream.
                      live stream that goes on instead of a fixed-length video.
      was_live:       True, False, or None (=unknown). Whether this video was
                      originally a live stream.
+    live_status:    'is_live', 'is_upcoming', 'was_live', 'not_live' or None (=unknown)
+                    If absent, automatically set from is_live, was_live
      start_time:     Time in seconds where the reproduction should start, as
                      specified in the URL.
      end_time:       Time in seconds where the reproduction should end, as
      start_time:     Time in seconds where the reproduction should start, as
                      specified in the URL.
      end_time:       Time in seconds where the reproduction should end, as
@@ -399,6 +406,10 @@ class InfoExtractor(object):
      _real_extract() methods and define a _VALID_URL regexp.
      Probably, they should also be added to the list of extractors.
  
      _real_extract() methods and define a _VALID_URL regexp.
      Probably, they should also be added to the list of extractors.
  
+    Subclasses may also override suitable() if necessary, but ensure the function
+    signature is preserved and that this function imports everything it needs
+    (except other extractors), so that lazy_extractors works correctly
+
      _GEO_BYPASS attribute may be set to False in order to disable
      geo restriction bypass mechanisms for a particular extractor.
      Though it won't disable explicit geo restriction bypass based on
      _GEO_BYPASS attribute may be set to False in order to disable
      geo restriction bypass mechanisms for a particular extractor.
      Though it won't disable explicit geo restriction bypass based on
@@ -414,7 +425,7 @@ class InfoExtractor(object):
      will be used by geo restriction bypass mechanism similarly
      to _GEO_COUNTRIES.
  
      will be used by geo restriction bypass mechanism similarly
      to _GEO_COUNTRIES.
  
-    Finally, the _WORKING attribute should be set to False for broken IEs
+    The _WORKING attribute should be set to False for broken IEs
      in order to warn the users and skip the tests.
      """
  
      in order to warn the users and skip the tests.
      """
  
@@ -429,8 +440,8 @@ class InfoExtractor(object):
      _LOGIN_HINTS = {
          'any': 'Use --cookies, --username and --password or --netrc to provide account credentials',
          'cookies': (
      _LOGIN_HINTS = {
          'any': 'Use --cookies, --username and --password or --netrc to provide account credentials',
          'cookies': (
-            'Use --cookies for the authentication. '
-            'See  https://github.com/ytdl-org/youtube-dl#how-do-i-pass-cookies-to-youtube-dl  for how to pass cookies'),
+            'Use --cookies-from-browser or --cookies for the authentication. '
+            'See  https://github.com/ytdl-org/youtube-dl#how-do-i-pass-cookies-to-youtube-dl  for how to manually pass cookies'),
          'password': 'Use --username and --password or --netrc to provide account credentials',
      }
  
          'password': 'Use --username and --password or --netrc to provide account credentials',
      }
  
@@ -438,26 +449,35 @@ def __init__(self, downloader=None):
          """Constructor. Receives an optional downloader."""
          self._ready = False
          self._x_forwarded_for_ip = None
          """Constructor. Receives an optional downloader."""
          self._ready = False
          self._x_forwarded_for_ip = None
+        self._printed_messages = set()
          self.set_downloader(downloader)
  
      @classmethod
          self.set_downloader(downloader)
  
      @classmethod
-    def suitable(cls, url):
-        """Receives a URL and returns True if suitable for this IE."""
-
+    def _match_valid_url(cls, url):
          # This does not use has/getattr intentionally - we want to know whether
          # we have cached the regexp for *this* class, whereas getattr would also
          # match the superclass
          if '_VALID_URL_RE' not in cls.__dict__:
              cls._VALID_URL_RE = re.compile(cls._VALID_URL)
          # This does not use has/getattr intentionally - we want to know whether
          # we have cached the regexp for *this* class, whereas getattr would also
          # match the superclass
          if '_VALID_URL_RE' not in cls.__dict__:
              cls._VALID_URL_RE = re.compile(cls._VALID_URL)
-        return cls._VALID_URL_RE.match(url) is not None
+        return cls._VALID_URL_RE.match(url)
+
+    @classmethod
+    def suitable(cls, url):
+        """Receives a URL and returns True if suitable for this IE."""
+        # This function must import everything it needs (except other extractors),
+        # so that lazy_extractors works correctly
+        return cls._match_valid_url(url) is not None
  
      @classmethod
      def _match_id(cls, url):
  
      @classmethod
      def _match_id(cls, url):
-        if '_VALID_URL_RE' not in cls.__dict__:
-            cls._VALID_URL_RE = re.compile(cls._VALID_URL)
-        m = cls._VALID_URL_RE.match(url)
-        assert m
-        return compat_str(m.group('id'))
+        return cls._match_valid_url(url).group('id')
+
+    @classmethod
+    def get_temp_id(cls, url):
+        try:
+            return cls._match_id(url)
+        except (IndexError, AttributeError):
+            return None
  
      @classmethod
      def working(cls):
  
      @classmethod
      def working(cls):
@@ -466,6 +486,7 @@ def working(cls):
  
      def initialize(self):
          """Initializes an instance (authentication, etc)."""
  
      def initialize(self):
          """Initializes an instance (authentication, etc)."""
+        self._printed_messages = set()
          self._initialize_geo_bypass({
              'countries': self._GEO_COUNTRIES,
              'ip_blocks': self._GEO_IP_BLOCKS,
          self._initialize_geo_bypass({
              'countries': self._GEO_COUNTRIES,
              'ip_blocks': self._GEO_IP_BLOCKS,
@@ -579,12 +600,14 @@ def extract(self, url):
                      if self.__maybe_fake_ip_and_retry(e.countries):
                          continue
                      raise
                      if self.__maybe_fake_ip_and_retry(e.countries):
                          continue
                      raise
-        except ExtractorError:
-            raise
+        except ExtractorError as e:
+            video_id = e.video_id or self.get_temp_id(url)
+            raise ExtractorError(
+                e.msg, video_id=video_id, ie=self.IE_NAME, tb=e.traceback, expected=e.expected, cause=e.cause)
          except compat_http_client.IncompleteRead as e:
          except compat_http_client.IncompleteRead as e:
-            raise ExtractorError('A network error has occurred.', cause=e, expected=True)
+            raise ExtractorError('A network error has occurred.', cause=e, expected=True, video_id=self.get_temp_id(url))
          except (KeyError, StopIteration) as e:
          except (KeyError, StopIteration) as e:
-            raise ExtractorError('An extractor error has occurred.', cause=e)
+            raise ExtractorError('An extractor error has occurred.', cause=e, video_id=self.get_temp_id(url))
  
      def __maybe_fake_ip_and_retry(self, countries):
          if (not self.get_param('geo_bypass_country', None)
  
      def __maybe_fake_ip_and_retry(self, countries):
          if (not self.get_param('geo_bypass_country', None)
@@ -616,7 +639,7 @@ def _real_extract(self, url):
      @classmethod
      def ie_key(cls):
          """A string for getting the InfoExtractor with get_info_extractor"""
      @classmethod
      def ie_key(cls):
          """A string for getting the InfoExtractor with get_info_extractor"""
-        return compat_str(cls.__name__[:-2])
+        return cls.__name__[:-2]
  
      @property
      def IE_NAME(self):
  
      @property
      def IE_NAME(self):
@@ -627,14 +650,10 @@ def __can_accept_status_code(err, expected_status):
          assert isinstance(err, compat_urllib_error.HTTPError)
          if expected_status is None:
              return False
          assert isinstance(err, compat_urllib_error.HTTPError)
          if expected_status is None:
              return False
-        if isinstance(expected_status, compat_integer_types):
-            return err.code == expected_status
-        elif isinstance(expected_status, (list, tuple)):
-            return err.code in expected_status
          elif callable(expected_status):
              return expected_status(err.code) is True
          else:
          elif callable(expected_status):
              return expected_status(err.code) is True
          else:
-            assert False
+            return err.code in variadic(expected_status)
  
      def _request_webpage(self, url_or_request, video_id, note=None, errnote=None, fatal=True, data=None, headers={}, query={}, expected_status=None):
          """
  
      def _request_webpage(self, url_or_request, video_id, note=None, errnote=None, fatal=True, data=None, headers={}, query={}, expected_status=None):
          """
@@ -774,9 +793,10 @@ def _webpage_read_content(self, urlh, url_or_request, video_id, note=None, errno
              self._downloader.to_screen(dump)
          if self.get_param('write_pages', False):
              basen = '%s_%s' % (video_id, urlh.geturl())
              self._downloader.to_screen(dump)
          if self.get_param('write_pages', False):
              basen = '%s_%s' % (video_id, urlh.geturl())
-            if len(basen) > 240:
+            trim_length = self.get_param('trim_file_name') or 240
+            if len(basen) > trim_length:
                  h = '___' + hashlib.md5(basen.encode('utf-8')).hexdigest()
                  h = '___' + hashlib.md5(basen.encode('utf-8')).hexdigest()
-                basen = basen[:240 - len(h)] + h
+                basen = basen[:trim_length - len(h)] + h
              raw_filename = basen + '.dump'
              filename = sanitize_filename(raw_filename, restricted=True)
              self.to_screen('Saving request to ' + filename)
              raw_filename = basen + '.dump'
              filename = sanitize_filename(raw_filename, restricted=True)
              self.to_screen('Saving request to ' + filename)
@@ -999,10 +1019,14 @@ def _download_socket_json(
              expected_status=expected_status)
          return res if res is False else res[0]
  
              expected_status=expected_status)
          return res if res is False else res[0]
  
-    def report_warning(self, msg, video_id=None, *args, **kwargs):
-        idstr = '' if video_id is None else '%s: ' % video_id
-        self._downloader.report_warning(
-            '[%s] %s%s' % (self.IE_NAME, idstr, msg), *args, **kwargs)
+    def report_warning(self, msg, video_id=None, *args, only_once=False, **kwargs):
+        idstr = format_field(video_id, template='%s: ')
+        msg = f'[{self.IE_NAME}] {idstr}{msg}'
+        if only_once:
+            if f'WARNING: {msg}' in self._printed_messages:
+                return
+            self._printed_messages.add(f'WARNING: {msg}')
+        self._downloader.report_warning(msg, *args, **kwargs)
  
      def to_screen(self, msg, *args, **kwargs):
          """Print msg to screen, prefixing it with '[ie_name]'"""
  
      def to_screen(self, msg, *args, **kwargs):
          """Print msg to screen, prefixing it with '[ie_name]'"""
@@ -1016,6 +1040,9 @@ def get_param(self, name, default=None, *args, **kwargs):
              return self._downloader.params.get(name, default, *args, **kwargs)
          return default
  
              return self._downloader.params.get(name, default, *args, **kwargs)
          return default
  
+    def report_drm(self, video_id, partial=False):
+        self.raise_no_formats('This video is DRM protected', expected=True, video_id=video_id)
+
      def report_extraction(self, id_or_name):
          """Report information extraction."""
          self.to_screen('%s: Extracting information' % id_or_name)
      def report_extraction(self, id_or_name):
          """Report information extraction."""
          self.to_screen('%s: Extracting information' % id_or_name)
@@ -1037,7 +1064,9 @@ def raise_login_required(
              metadata_available=False, method='any'):
          if metadata_available and self.get_param('ignore_no_formats_error'):
              self.report_warning(msg)
              metadata_available=False, method='any'):
          if metadata_available and self.get_param('ignore_no_formats_error'):
              self.report_warning(msg)
-        raise ExtractorError('%s. %s' % (msg, self._LOGIN_HINTS[method]), expected=True)
+        if method is not None:
+            msg = '%s. %s' % (msg, self._LOGIN_HINTS[method])
+        raise ExtractorError(msg, expected=True)
  
      def raise_geo_restricted(
              self, msg='This video is not available from your location due to geo restriction',
  
      def raise_geo_restricted(
              self, msg='This video is not available from your location due to geo restriction',
@@ -1050,17 +1079,20 @@ def raise_geo_restricted(
      def raise_no_formats(self, msg, expected=False, video_id=None):
          if expected and self.get_param('ignore_no_formats_error'):
              self.report_warning(msg, video_id)
      def raise_no_formats(self, msg, expected=False, video_id=None):
          if expected and self.get_param('ignore_no_formats_error'):
              self.report_warning(msg, video_id)
+        elif isinstance(msg, ExtractorError):
+            raise msg
          else:
              raise ExtractorError(msg, expected=expected, video_id=video_id)
  
      # Methods for following #608
      @staticmethod
          else:
              raise ExtractorError(msg, expected=expected, video_id=video_id)
  
      # Methods for following #608
      @staticmethod
-    def url_result(url, ie=None, video_id=None, video_title=None):
+    def url_result(url, ie=None, video_id=None, video_title=None, **kwargs):
          """Returns a URL that points to a page that should be processed"""
          # TODO: ie should be the class used for getting the info
          video_info = {'_type': 'url',
                        'url': url,
                        'ie_key': ie}
          """Returns a URL that points to a page that should be processed"""
          # TODO: ie should be the class used for getting the info
          video_info = {'_type': 'url',
                        'url': url,
                        'ie_key': ie}
+        video_info.update(kwargs)
          if video_id is not None:
              video_info['id'] = video_id
          if video_title is not None:
          if video_id is not None:
              video_info['id'] = video_id
          if video_title is not None:
@@ -1103,15 +1135,14 @@ def _search_regex(self, pattern, string, name, default=NO_DEFAULT, fatal=True, f
                  if mobj:
                      break
  
                  if mobj:
                      break
  
-        if not self.get_param('no_color') and compat_os_name != 'nt' and sys.stderr.isatty():
-            _name = '\033[0;34m%s\033[0m' % name
-        else:
-            _name = name
+        _name = self._downloader._color_text(name, 'blue')
  
          if mobj:
              if group is None:
                  # return the first matching group
                  return next(g for g in mobj.groups() if g is not None)
  
          if mobj:
              if group is None:
                  # return the first matching group
                  return next(g for g in mobj.groups() if g is not None)
+            elif isinstance(group, (list, tuple)):
+                return tuple(mobj.group(g) for g in group)
              else:
                  return mobj.group(group)
          elif default is not NO_DEFAULT:
              else:
                  return mobj.group(group)
          elif default is not NO_DEFAULT:
@@ -1139,7 +1170,10 @@ def _get_netrc_login_info(self, netrc_machine=None):
  
          if self.get_param('usenetrc', False):
              try:
  
          if self.get_param('usenetrc', False):
              try:
-                info = netrc.netrc().authenticators(netrc_machine)
+                netrc_file = compat_expanduser(self.get_param('netrc_location') or '~')
+                if os.path.isdir(netrc_file):
+                    netrc_file = os.path.join(netrc_file, '.netrc')
+                info = netrc.netrc(file=netrc_file).authenticators(netrc_machine)
                  if info is not None:
                      username = info[0]
                      password = info[2]
                  if info is not None:
                      username = info[0]
                      password = info[2]
@@ -1204,8 +1238,7 @@ def _meta_regex(prop):
                      [^>]+?content=(["\'])(?P<content>.*?)\2''' % re.escape(prop)
  
      def _og_search_property(self, prop, html, name=None, **kargs):
                      [^>]+?content=(["\'])(?P<content>.*?)\2''' % re.escape(prop)
  
      def _og_search_property(self, prop, html, name=None, **kargs):
-        if not isinstance(prop, (list, tuple)):
-            prop = [prop]
+        prop = variadic(prop)
          if name is None:
              name = 'OpenGraph %s' % prop[0]
          og_regexes = []
          if name is None:
              name = 'OpenGraph %s' % prop[0]
          og_regexes = []
@@ -1235,8 +1268,7 @@ def _og_search_url(self, html, **kargs):
          return self._og_search_property('url', html, **kargs)
  
      def _html_search_meta(self, name, html, display_name=None, fatal=False, **kwargs):
          return self._og_search_property('url', html, **kargs)
  
      def _html_search_meta(self, name, html, display_name=None, fatal=False, **kwargs):
-        if not isinstance(name, (list, tuple)):
-            name = [name]
+        name = variadic(name)
          if display_name is None:
              display_name = name[0]
          return self._html_search_regex(
          if display_name is None:
              display_name = name[0]
          return self._html_search_regex(
@@ -1296,7 +1328,7 @@ def _search_json_ld(self, html, video_id, expected_type=None, **kwargs):
          # JSON-LD may be malformed and thus `fatal` should be respected.
          # At the same time `default` may be passed that assumes `fatal=False`
          # for _search_regex. Let's simulate the same behavior here as well.
          # JSON-LD may be malformed and thus `fatal` should be respected.
          # At the same time `default` may be passed that assumes `fatal=False`
          # for _search_regex. Let's simulate the same behavior here as well.
-        fatal = kwargs.get('fatal', True) if default == NO_DEFAULT else False
+        fatal = kwargs.get('fatal', True) if default is NO_DEFAULT else False
          json_ld = []
          for mobj in json_ld_list:
              json_ld_item = self._parse_json(
          json_ld = []
          for mobj in json_ld_list:
              json_ld_item = self._parse_json(
@@ -1477,7 +1509,7 @@ class FormatSort:
          default = ('hidden', 'aud_or_vid', 'hasvid', 'ie_pref', 'lang', 'quality',
                     'res', 'fps', 'codec:vp9.2', 'size', 'br', 'asr',
                     'proto', 'ext', 'hasaud', 'source', 'format_id')  # These must not be aliases
          default = ('hidden', 'aud_or_vid', 'hasvid', 'ie_pref', 'lang', 'quality',
                     'res', 'fps', 'codec:vp9.2', 'size', 'br', 'asr',
                     'proto', 'ext', 'hasaud', 'source', 'format_id')  # These must not be aliases
-        ytdl_default = ('hasaud', 'quality', 'tbr', 'filesize', 'vbr',
+        ytdl_default = ('hasaud', 'lang', 'quality', 'tbr', 'filesize', 'vbr',
                          'height', 'width', 'proto', 'vext', 'abr', 'aext',
                          'fps', 'fs_approx', 'source', 'format_id')
  
                          'height', 'width', 'proto', 'vext', 'abr', 'aext',
                          'fps', 'fs_approx', 'source', 'format_id')
  
@@ -1487,7 +1519,7 @@ class FormatSort:
              'acodec': {'type': 'ordered', 'regex': True,
                         'order': ['opus', 'vorbis', 'aac', 'mp?4a?', 'mp3', 'e?a?c-?3', 'dts', '', None, 'none']},
              'proto': {'type': 'ordered', 'regex': True, 'field': 'protocol',
              'acodec': {'type': 'ordered', 'regex': True,
                         'order': ['opus', 'vorbis', 'aac', 'mp?4a?', 'mp3', 'e?a?c-?3', 'dts', '', None, 'none']},
              'proto': {'type': 'ordered', 'regex': True, 'field': 'protocol',
-                      'order': ['(ht|f)tps', '(ht|f)tp$', 'm3u8.+', 'm3u8', '.*dash', '', 'mms|rtsp', 'none', 'f4']},
+                      'order': ['(ht|f)tps', '(ht|f)tp$', 'm3u8.+', '.*dash', 'ws|websocket', '', 'mms|rtsp', 'none', 'f4']},
              'vext': {'type': 'ordered', 'field': 'video_ext',
                       'order': ('mp4', 'webm', 'flv', '', 'none'),
                       'order_free': ('webm', 'mp4', 'flv', '', 'none')},
              'vext': {'type': 'ordered', 'field': 'video_ext',
                       'order': ('mp4', 'webm', 'flv', '', 'none'),
                       'order_free': ('webm', 'mp4', 'flv', '', 'none')},
@@ -1495,13 +1527,13 @@ class FormatSort:
                       'order': ('m4a', 'aac', 'mp3', 'ogg', 'opus', 'webm', '', 'none'),
                       'order_free': ('opus', 'ogg', 'webm', 'm4a', 'mp3', 'aac', '', 'none')},
              'hidden': {'visible': False, 'forced': True, 'type': 'extractor', 'max': -1000},
                       'order': ('m4a', 'aac', 'mp3', 'ogg', 'opus', 'webm', '', 'none'),
                       'order_free': ('opus', 'ogg', 'webm', 'm4a', 'mp3', 'aac', '', 'none')},
              'hidden': {'visible': False, 'forced': True, 'type': 'extractor', 'max': -1000},
-            'aud_or_vid': {'visible': False, 'forced': True, 'type': 'multiple', 'default': 1,
+            'aud_or_vid': {'visible': False, 'forced': True, 'type': 'multiple',
                             'field': ('vcodec', 'acodec'),
                             'function': lambda it: int(any(v != 'none' for v in it))},
              'ie_pref': {'priority': True, 'type': 'extractor'},
              'hasvid': {'priority': True, 'field': 'vcodec', 'type': 'boolean', 'not_in_list': ('none',)},
              'hasaud': {'field': 'acodec', 'type': 'boolean', 'not_in_list': ('none',)},
                             'field': ('vcodec', 'acodec'),
                             'function': lambda it: int(any(v != 'none' for v in it))},
              'ie_pref': {'priority': True, 'type': 'extractor'},
              'hasvid': {'priority': True, 'field': 'vcodec', 'type': 'boolean', 'not_in_list': ('none',)},
              'hasaud': {'field': 'acodec', 'type': 'boolean', 'not_in_list': ('none',)},
-            'lang': {'priority': True, 'convert': 'ignore', 'field': 'language_preference'},
+            'lang': {'convert': 'ignore', 'field': 'language_preference'},
              'quality': {'convert': 'float_none', 'default': -1},
              'filesize': {'convert': 'bytes'},
              'fs_approx': {'convert': 'bytes', 'field': 'filesize_approx'},
              'quality': {'convert': 'float_none', 'default': -1},
              'filesize': {'convert': 'bytes'},
              'fs_approx': {'convert': 'bytes', 'field': 'filesize_approx'},
@@ -1519,7 +1551,8 @@ class FormatSort:
              'br': {'type': 'combined', 'field': ('tbr', 'vbr', 'abr'), 'same_limit': True},
              'size': {'type': 'combined', 'same_limit': True, 'field': ('filesize', 'fs_approx')},
              'ext': {'type': 'combined', 'field': ('vext', 'aext')},
              'br': {'type': 'combined', 'field': ('tbr', 'vbr', 'abr'), 'same_limit': True},
              'size': {'type': 'combined', 'same_limit': True, 'field': ('filesize', 'fs_approx')},
              'ext': {'type': 'combined', 'field': ('vext', 'aext')},
-            'res': {'type': 'multiple', 'field': ('height', 'width'), 'function': min},
+            'res': {'type': 'multiple', 'field': ('height', 'width'),
+                    'function': lambda it: (lambda l: min(l) if l else 0)(tuple(filter(None, it)))},
  
              # Most of these exist only for compatibility reasons
              'dimension': {'type': 'alias', 'field': 'res'},
  
              # Most of these exist only for compatibility reasons
              'dimension': {'type': 'alias', 'field': 'res'},
@@ -1563,7 +1596,7 @@ def _get_field_setting(self, field, key):
                  elif key == 'convert':
                      default = 'order' if type == 'ordered' else 'float_string' if field else 'ignore'
                  else:
                  elif key == 'convert':
                      default = 'order' if type == 'ordered' else 'float_string' if field else 'ignore'
                  else:
-                    default = {'type': 'field', 'visible': True, 'order': [], 'not_in_list': (None,), 'function': max}.get(key, None)
+                    default = {'type': 'field', 'visible': True, 'order': [], 'not_in_list': (None,)}.get(key, None)
                  propObj[key] = default
              return propObj[key]
  
                  propObj[key] = default
              return propObj[key]
  
@@ -1646,7 +1679,7 @@ def add_item(field, reverse, closest, limit_text):
                  has_multiple_limits = has_limit and has_multiple_fields and not self._get_field_setting(field, 'same_limit')
  
                  fields = self._get_field_setting(field, 'field') if has_multiple_fields else (field,)
                  has_multiple_limits = has_limit and has_multiple_fields and not self._get_field_setting(field, 'same_limit')
  
                  fields = self._get_field_setting(field, 'field') if has_multiple_fields else (field,)
-                limits = limit_text.split(":") if has_multiple_limits else (limit_text,) if has_limit else tuple()
+                limits = limit_text.split(':') if has_multiple_limits else (limit_text,) if has_limit else tuple()
                  limit_count = len(limits)
                  for (i, f) in enumerate(fields):
                      add_item(f, reverse, closest,
                  limit_count = len(limits)
                  for (i, f) in enumerate(fields):
                      add_item(f, reverse, closest,
@@ -1703,11 +1736,7 @@ def _calculate_field_preference(self, format, field):
                  type = 'field'  # Only 'field' is allowed in multiple for now
                  actual_fields = self._get_field_setting(field, 'field')
  
                  type = 'field'  # Only 'field' is allowed in multiple for now
                  actual_fields = self._get_field_setting(field, 'field')
  
-                def wrapped_function(values):
-                    values = tuple(filter(lambda x: x is not None, values))
-                    return self._get_field_setting(field, 'function')(values) if values else None
-
-                value = wrapped_function((get_value(f) for f in actual_fields))
+                value = self._get_field_setting(field, 'function')(get_value(f) for f in actual_fields)
              else:
                  value = get_value(field)
              return self._calculate_field_preference_from_value(format, field, type, value)
              else:
                  value = get_value(field)
              return self._calculate_field_preference_from_value(format, field, type, value)
@@ -1734,18 +1763,16 @@ def calculate_preference(self, format):
                  if format.get('vbr') is not None and format.get('abr') is not None:
                      format['tbr'] = format.get('vbr', 0) + format.get('abr', 0)
              else:
                  if format.get('vbr') is not None and format.get('abr') is not None:
                      format['tbr'] = format.get('vbr', 0) + format.get('abr', 0)
              else:
-                if format.get('vcodec') != "none" and format.get('vbr') is None:
+                if format.get('vcodec') != 'none' and format.get('vbr') is None:
                      format['vbr'] = format.get('tbr') - format.get('abr', 0)
                      format['vbr'] = format.get('tbr') - format.get('abr', 0)
-                if format.get('acodec') != "none" and format.get('abr') is None:
+                if format.get('acodec') != 'none' and format.get('abr') is None:
                      format['abr'] = format.get('tbr') - format.get('vbr', 0)
  
              return tuple(self._calculate_field_preference(format, field) for field in self._order)
  
      def _sort_formats(self, formats, field_preference=[]):
          if not formats:
                      format['abr'] = format.get('tbr') - format.get('vbr', 0)
  
              return tuple(self._calculate_field_preference(format, field) for field in self._order)
  
      def _sort_formats(self, formats, field_preference=[]):
          if not formats:
-            if self.get_param('ignore_no_formats_error'):
-                return
-            raise ExtractorError('No video formats found')
+            return
          format_sort = self.FormatSort()  # params and to_screen are taken from the downloader
          format_sort.evaluate_params(self._downloader.params, field_preference)
          if self.get_param('verbose', False):
          format_sort = self.FormatSort()  # params and to_screen are taken from the downloader
          format_sort.evaluate_params(self._downloader.params, field_preference)
          if self.get_param('verbose', False):
@@ -1940,13 +1967,16 @@ def _m3u8_meta_format(self, m3u8_url, ext=None, preference=None, quality=None, m
              'format_note': 'Quality selection URL',
          }
  
              'format_note': 'Quality selection URL',
          }
  
+    def _report_ignoring_subs(self, name):
+        self.report_warning(bug_reports_message(
+            f'Ignoring subtitle tracks found in the {name} manifest; '
+            'if any subtitle tracks are missing,'
+        ), only_once=True)
+
      def _extract_m3u8_formats(self, *args, **kwargs):
          fmts, subs = self._extract_m3u8_formats_and_subtitles(*args, **kwargs)
          if subs:
      def _extract_m3u8_formats(self, *args, **kwargs):
          fmts, subs = self._extract_m3u8_formats_and_subtitles(*args, **kwargs)
          if subs:
-            self.report_warning(bug_reports_message(
-                "Ignoring subtitle tracks found in the HLS manifest; "
-                "if any subtitle tracks are missing,"
-            ))
+            self._report_ignoring_subs('HLS')
          return fmts
  
      def _extract_m3u8_formats_and_subtitles(
          return fmts
  
      def _extract_m3u8_formats_and_subtitles(
@@ -1978,24 +2008,31 @@ def _parse_m3u8_formats_and_subtitles(
              preference=None, quality=None, m3u8_id=None, live=False, note=None,
              errnote=None, fatal=True, data=None, headers={}, query={},
              video_id=None):
              preference=None, quality=None, m3u8_id=None, live=False, note=None,
              errnote=None, fatal=True, data=None, headers={}, query={},
              video_id=None):
+        formats, subtitles = [], {}
  
          if '#EXT-X-FAXS-CM:' in m3u8_doc:  # Adobe Flash Access
  
          if '#EXT-X-FAXS-CM:' in m3u8_doc:  # Adobe Flash Access
-            return [], {}
-
-        if (not self.get_param('allow_unplayable_formats')
-                and re.search(r'#EXT-X-SESSION-KEY:.*?URI="skd://', m3u8_doc)):  # Apple FairPlay
-            return [], {}
+            return formats, subtitles
  
  
-        formats = []
+        has_drm = re.search(r'#EXT-X-(?:SESSION-)?KEY:.*?URI="skd://', m3u8_doc)
  
  
-        subtitles = {}
+        def format_url(url):
+            return url if re.match(r'^https?://', url) else compat_urlparse.urljoin(m3u8_url, url)
  
  
-        format_url = lambda u: (
-            u
-            if re.match(r'^https?://', u)
-            else compat_urlparse.urljoin(m3u8_url, u))
+        if self.get_param('hls_split_discontinuity', False):
+            def _extract_m3u8_playlist_indices(manifest_url=None, m3u8_doc=None):
+                if not m3u8_doc:
+                    if not manifest_url:
+                        return []
+                    m3u8_doc = self._download_webpage(
+                        manifest_url, video_id, fatal=fatal, data=data, headers=headers,
+                        note=False, errnote='Failed to download m3u8 playlist information')
+                    if m3u8_doc is False:
+                        return []
+                return range(1 + sum(line.startswith('#EXT-X-DISCONTINUITY') for line in m3u8_doc.splitlines()))
  
  
-        split_discontinuity = self.get_param('hls_split_discontinuity', False)
+        else:
+            def _extract_m3u8_playlist_indices(*args, **kwargs):
+                return [None]
  
          # References:
          # 1. https://tools.ietf.org/html/draft-pantos-http-live-streaming-21
  
          # References:
          # 1. https://tools.ietf.org/html/draft-pantos-http-live-streaming-21
@@ -2013,68 +2050,17 @@ def _parse_m3u8_formats_and_subtitles(
          # media playlist and MUST NOT appear in master playlist thus we can
          # clearly detect media playlist with this criterion.
  
          # media playlist and MUST NOT appear in master playlist thus we can
          # clearly detect media playlist with this criterion.
  
-        def _extract_m3u8_playlist_formats(format_url=None, m3u8_doc=None, video_id=None,
-                                           fatal=True, data=None, headers={}):
-            if not m3u8_doc:
-                if not format_url:
-                    return []
-                res = self._download_webpage_handle(
-                    format_url, video_id,
-                    note=False,
-                    errnote='Failed to download m3u8 playlist information',
-                    fatal=fatal, data=data, headers=headers)
-
-                if res is False:
-                    return []
-
-                m3u8_doc, urlh = res
-                format_url = urlh.geturl()
-
-            playlist_formats = []
-            i = (
-                0
-                if split_discontinuity
-                else None)
-            format_info = {
-                'index': i,
-                'key_data': None,
-                'files': [],
-            }
-            for line in m3u8_doc.splitlines():
-                if not line.startswith('#'):
-                    format_info['files'].append(line)
-                elif split_discontinuity and line.startswith('#EXT-X-DISCONTINUITY'):
-                    i += 1
-                    playlist_formats.append(format_info)
-                    format_info = {
-                        'index': i,
-                        'url': format_url,
-                        'files': [],
-                    }
-            playlist_formats.append(format_info)
-            return playlist_formats
-
          if '#EXT-X-TARGETDURATION' in m3u8_doc:  # media playlist, return as is
          if '#EXT-X-TARGETDURATION' in m3u8_doc:  # media playlist, return as is
-
-            playlist_formats = _extract_m3u8_playlist_formats(m3u8_doc=m3u8_doc)
-
-            for format in playlist_formats:
-                format_id = []
-                if m3u8_id:
-                    format_id.append(m3u8_id)
-                format_index = format.get('index')
-                if format_index:
-                    format_id.append(str(format_index))
-                f = {
-                    'format_id': '-'.join(format_id),
-                    'format_index': format_index,
-                    'url': m3u8_url,
-                    'ext': ext,
-                    'protocol': entry_protocol,
-                    'preference': preference,
-                    'quality': quality,
-                }
-                formats.append(f)
+            formats = [{
+                'format_id': '-'.join(map(str, filter(None, [m3u8_id, idx]))),
+                'format_index': idx,
+                'url': m3u8_url,
+                'ext': ext,
+                'protocol': entry_protocol,
+                'preference': preference,
+                'quality': quality,
+                'has_drm': has_drm,
+            } for idx in _extract_m3u8_playlist_indices(m3u8_doc=m3u8_doc)]
  
              return formats, subtitles
  
  
              return formats, subtitles
  
@@ -2114,32 +2100,19 @@ def extract_media(x_media_line):
              media_url = media.get('URI')
              if media_url:
                  manifest_url = format_url(media_url)
              media_url = media.get('URI')
              if media_url:
                  manifest_url = format_url(media_url)
-                format_id = []
-                playlist_formats = _extract_m3u8_playlist_formats(manifest_url, video_id=video_id,
-                                                                  fatal=fatal, data=data, headers=headers)
-
-                for format in playlist_formats:
-                    format_index = format.get('index')
-                    for v in (m3u8_id, group_id, name):
-                        if v:
-                            format_id.append(v)
-                    if format_index:
-                        format_id.append(str(format_index))
-                    f = {
-                        'format_id': '-'.join(format_id),
-                        'format_note': name,
-                        'format_index': format_index,
-                        'url': manifest_url,
-                        'manifest_url': m3u8_url,
-                        'language': media.get('LANGUAGE'),
-                        'ext': ext,
-                        'protocol': entry_protocol,
-                        'preference': preference,
-                        'quality': quality,
-                    }
-                    if media_type == 'AUDIO':
-                        f['vcodec'] = 'none'
-                    formats.append(f)
+                formats.extend({
+                    'format_id': '-'.join(map(str, filter(None, (m3u8_id, group_id, name, idx)))),
+                    'format_note': name,
+                    'format_index': idx,
+                    'url': manifest_url,
+                    'manifest_url': m3u8_url,
+                    'language': media.get('LANGUAGE'),
+                    'ext': ext,
+                    'protocol': entry_protocol,
+                    'preference': preference,
+                    'quality': quality,
+                    'vcodec': 'none' if media_type == 'AUDIO' else None,
+                } for idx in _extract_m3u8_playlist_indices(manifest_url))
  
          def build_stream_name():
              # Despite specification does not mention NAME attribute for
  
          def build_stream_name():
              # Despite specification does not mention NAME attribute for
@@ -2178,25 +2151,17 @@ def build_stream_name():
                      or last_stream_inf.get('BANDWIDTH'), scale=1000)
                  manifest_url = format_url(line.strip())
  
                      or last_stream_inf.get('BANDWIDTH'), scale=1000)
                  manifest_url = format_url(line.strip())
  
-                playlist_formats = _extract_m3u8_playlist_formats(manifest_url, video_id=video_id,
-                                                                  fatal=fatal, data=data, headers=headers)
-
-                for frmt in playlist_formats:
-                    format_id = []
-                    if m3u8_id:
-                        format_id.append(m3u8_id)
-                    format_index = frmt.get('index')
-                    stream_name = build_stream_name()
+                for idx in _extract_m3u8_playlist_indices(manifest_url):
+                    format_id = [m3u8_id, None, idx]
                      # Bandwidth of live streams may differ over time thus making
                      # format_id unpredictable. So it's better to keep provided
                      # format_id intact.
                      if not live:
                      # Bandwidth of live streams may differ over time thus making
                      # format_id unpredictable. So it's better to keep provided
                      # format_id intact.
                      if not live:
-                        format_id.append(stream_name if stream_name else '%d' % (tbr if tbr else len(formats)))
-                    if format_index:
-                        format_id.append(str(format_index))
+                        stream_name = build_stream_name()
+                        format_id[1] = stream_name if stream_name else '%d' % (tbr if tbr else len(formats))
                      f = {
                      f = {
-                        'format_id': '-'.join(format_id),
-                        'format_index': format_index,
+                        'format_id': '-'.join(map(str, filter(None, format_id))),
+                        'format_index': idx,
                          'url': manifest_url,
                          'manifest_url': m3u8_url,
                          'tbr': tbr,
                          'url': manifest_url,
                          'manifest_url': m3u8_url,
                          'tbr': tbr,
@@ -2259,6 +2224,25 @@ def build_stream_name():
                  last_stream_inf = {}
          return formats, subtitles
  
                  last_stream_inf = {}
          return formats, subtitles
  
+    def _extract_m3u8_vod_duration(
+            self, m3u8_vod_url, video_id, note=None, errnote=None, data=None, headers={}, query={}):
+
+        m3u8_vod = self._download_webpage(
+            m3u8_vod_url, video_id,
+            note='Downloading m3u8 VOD manifest' if note is None else note,
+            errnote='Failed to download VOD manifest' if errnote is None else errnote,
+            fatal=False, data=data, headers=headers, query=query)
+
+        return self._parse_m3u8_vod_duration(m3u8_vod or '', video_id)
+
+    def _parse_m3u8_vod_duration(self, m3u8_vod, video_id):
+        if '#EXT-X-PLAYLIST-TYPE:VOD' not in m3u8_vod:
+            return None
+
+        return int(sum(
+            float(line[len('#EXTINF:'):].split(',')[0])
+            for line in m3u8_vod.splitlines() if line.startswith('#EXTINF:'))) or None
+
      @staticmethod
      def _xpath_ns(path, namespace=None):
          if not namespace:
      @staticmethod
      def _xpath_ns(path, namespace=None):
          if not namespace:
@@ -2271,7 +2255,7 @@ def _xpath_ns(path, namespace=None):
                  out.append('{%s}%s' % (namespace, c))
          return '/'.join(out)
  
                  out.append('{%s}%s' % (namespace, c))
          return '/'.join(out)
  
-    def _extract_smil_formats(self, smil_url, video_id, fatal=True, f4m_params=None, transform_source=None):
+    def _extract_smil_formats_and_subtitles(self, smil_url, video_id, fatal=True, f4m_params=None, transform_source=None):
          smil = self._download_smil(smil_url, video_id, fatal=fatal, transform_source=transform_source)
  
          if smil is False:
          smil = self._download_smil(smil_url, video_id, fatal=fatal, transform_source=transform_source)
  
          if smil is False:
@@ -2280,8 +2264,18 @@ def _extract_smil_formats(self, smil_url, video_id, fatal=True, f4m_params=None,
  
          namespace = self._parse_smil_namespace(smil)
  
  
          namespace = self._parse_smil_namespace(smil)
  
-        return self._parse_smil_formats(
+        fmts = self._parse_smil_formats(
              smil, smil_url, video_id, namespace=namespace, f4m_params=f4m_params)
              smil, smil_url, video_id, namespace=namespace, f4m_params=f4m_params)
+        subs = self._parse_smil_subtitles(
+            smil, namespace=namespace)
+
+        return fmts, subs
+
+    def _extract_smil_formats(self, *args, **kwargs):
+        fmts, subs = self._extract_smil_formats_and_subtitles(*args, **kwargs)
+        if subs:
+            self._report_ignoring_subs('SMIL')
+        return fmts
  
      def _extract_smil_info(self, smil_url, video_id, fatal=True, f4m_params=None):
          smil = self._download_smil(smil_url, video_id, fatal=fatal)
  
      def _extract_smil_info(self, smil_url, video_id, fatal=True, f4m_params=None):
          smil = self._download_smil(smil_url, video_id, fatal=fatal)
@@ -2350,14 +2344,15 @@ def _parse_smil_formats(self, smil, smil_url, video_id, namespace=None, f4m_para
          rtmp_count = 0
          http_count = 0
          m3u8_count = 0
          rtmp_count = 0
          http_count = 0
          m3u8_count = 0
+        imgs_count = 0
  
  
-        srcs = []
+        srcs = set()
          media = smil.findall(self._xpath_ns('.//video', namespace)) + smil.findall(self._xpath_ns('.//audio', namespace))
          for medium in media:
              src = medium.get('src')
              if not src or src in srcs:
                  continue
          media = smil.findall(self._xpath_ns('.//video', namespace)) + smil.findall(self._xpath_ns('.//audio', namespace))
          for medium in media:
              src = medium.get('src')
              if not src or src in srcs:
                  continue
-            srcs.append(src)
+            srcs.add(src)
  
              bitrate = float_or_none(medium.get('system-bitrate') or medium.get('systemBitrate'), 1000)
              filesize = int_or_none(medium.get('size') or medium.get('fileSize'))
  
              bitrate = float_or_none(medium.get('system-bitrate') or medium.get('systemBitrate'), 1000)
              filesize = int_or_none(medium.get('size') or medium.get('fileSize'))
@@ -2431,6 +2426,24 @@ def _parse_smil_formats(self, smil, smil_url, video_id, namespace=None, f4m_para
                      'height': height,
                  })
  
                      'height': height,
                  })
  
+        for medium in smil.findall(self._xpath_ns('.//imagestream', namespace)):
+            src = medium.get('src')
+            if not src or src in srcs:
+                continue
+            srcs.add(src)
+
+            imgs_count += 1
+            formats.append({
+                'format_id': 'imagestream-%d' % (imgs_count),
+                'url': src,
+                'ext': mimetype2ext(medium.get('type')),
+                'acodec': 'none',
+                'vcodec': 'none',
+                'width': int_or_none(medium.get('width')),
+                'height': int_or_none(medium.get('height')),
+                'format_note': 'SMIL storyboards',
+            })
+
          return formats
  
      def _parse_smil_subtitles(self, smil, namespace=None, subtitles_lang='en'):
          return formats
  
      def _parse_smil_subtitles(self, smil, namespace=None, subtitles_lang='en'):
@@ -2503,10 +2516,7 @@ def _parse_xspf(self, xspf_doc, playlist_id, xspf_url=None, xspf_base_url=None):
      def _extract_mpd_formats(self, *args, **kwargs):
          fmts, subs = self._extract_mpd_formats_and_subtitles(*args, **kwargs)
          if subs:
      def _extract_mpd_formats(self, *args, **kwargs):
          fmts, subs = self._extract_mpd_formats_and_subtitles(*args, **kwargs)
          if subs:
-            self.report_warning(bug_reports_message(
-                "Ignoring subtitle tracks found in the DASH manifest; "
-                "if any subtitle tracks are missing,"
-            ))
+            self._report_ignoring_subs('DASH')
          return fmts
  
      def _extract_mpd_formats_and_subtitles(
          return fmts
  
      def _extract_mpd_formats_and_subtitles(
@@ -2530,10 +2540,7 @@ def _extract_mpd_formats_and_subtitles(
      def _parse_mpd_formats(self, *args, **kwargs):
          fmts, subs = self._parse_mpd_formats_and_subtitles(*args, **kwargs)
          if subs:
      def _parse_mpd_formats(self, *args, **kwargs):
          fmts, subs = self._parse_mpd_formats_and_subtitles(*args, **kwargs)
          if subs:
-            self.report_warning(bug_reports_message(
-                "Ignoring subtitle tracks found in the DASH manifest; "
-                "if any subtitle tracks are missing,"
-            ))
+            self._report_ignoring_subs('DASH')
          return fmts
  
      def _parse_mpd_formats_and_subtitles(
          return fmts
  
      def _parse_mpd_formats_and_subtitles(
@@ -2615,11 +2622,9 @@ def extract_Initialization(source):
                          extract_Initialization(segment_template)
              return ms_info
  
                          extract_Initialization(segment_template)
              return ms_info
  
-        skip_unplayable = not self.get_param('allow_unplayable_formats')
-
          mpd_duration = parse_duration(mpd_doc.get('mediaPresentationDuration'))
          mpd_duration = parse_duration(mpd_doc.get('mediaPresentationDuration'))
-        formats = []
-        subtitles = {}
+        formats, subtitles = [], {}
+        stream_numbers = {'audio': 0, 'video': 0}
          for period in mpd_doc.findall(_add_ns('Period')):
              period_duration = parse_duration(period.get('duration')) or mpd_duration
              period_ms_info = extract_multisegment_info(period, {
          for period in mpd_doc.findall(_add_ns('Period')):
              period_duration = parse_duration(period.get('duration')) or mpd_duration
              period_ms_info = extract_multisegment_info(period, {
@@ -2627,236 +2632,245 @@ def extract_Initialization(source):
                  'timescale': 1,
              })
              for adaptation_set in period.findall(_add_ns('AdaptationSet')):
                  'timescale': 1,
              })
              for adaptation_set in period.findall(_add_ns('AdaptationSet')):
-                if skip_unplayable and is_drm_protected(adaptation_set):
-                    continue
                  adaption_set_ms_info = extract_multisegment_info(adaptation_set, period_ms_info)
                  for representation in adaptation_set.findall(_add_ns('Representation')):
                  adaption_set_ms_info = extract_multisegment_info(adaptation_set, period_ms_info)
                  for representation in adaptation_set.findall(_add_ns('Representation')):
-                    if skip_unplayable and is_drm_protected(representation):
-                        continue
                      representation_attrib = adaptation_set.attrib.copy()
                      representation_attrib.update(representation.attrib)
                      # According to [1, 5.3.7.2, Table 9, page 41], @mimeType is mandatory
                      mime_type = representation_attrib['mimeType']
                      content_type = representation_attrib.get('contentType', mime_type.split('/')[0])
  
                      representation_attrib = adaptation_set.attrib.copy()
                      representation_attrib.update(representation.attrib)
                      # According to [1, 5.3.7.2, Table 9, page 41], @mimeType is mandatory
                      mime_type = representation_attrib['mimeType']
                      content_type = representation_attrib.get('contentType', mime_type.split('/')[0])
  
-                    if content_type in ('video', 'audio', 'text') or mime_type == 'image/jpeg':
-                        base_url = ''
-                        for element in (representation, adaptation_set, period, mpd_doc):
-                            base_url_e = element.find(_add_ns('BaseURL'))
-                            if base_url_e is not None:
-                                base_url = base_url_e.text + base_url
-                                if re.match(r'^https?://', base_url):
-                                    break
-                        if mpd_base_url and not re.match(r'^https?://', base_url):
-                            if not mpd_base_url.endswith('/') and not base_url.startswith('/'):
-                                mpd_base_url += '/'
-                            base_url = mpd_base_url + base_url
-                        representation_id = representation_attrib.get('id')
-                        lang = representation_attrib.get('lang')
-                        url_el = representation.find(_add_ns('BaseURL'))
-                        filesize = int_or_none(url_el.attrib.get('{http://youtube.com/yt/2012/10/10}contentLength') if url_el is not None else None)
-                        bandwidth = int_or_none(representation_attrib.get('bandwidth'))
-                        if representation_id is not None:
-                            format_id = representation_id
+                    codecs = representation_attrib.get('codecs', '')
+                    if content_type not in ('video', 'audio', 'text'):
+                        if mime_type == 'image/jpeg':
+                            content_type = mime_type
+                        elif codecs.split('.')[0] == 'stpp':
+                            content_type = 'text'
+                        elif mimetype2ext(mime_type) in ('tt', 'dfxp', 'ttml', 'xml', 'json'):
+                            content_type = 'text'
                          else:
                          else:
-                            format_id = content_type
-                        if mpd_id:
-                            format_id = mpd_id + '-' + format_id
-                        if content_type in ('video', 'audio'):
-                            f = {
-                                'format_id': format_id,
-                                'manifest_url': mpd_url,
-                                'ext': mimetype2ext(mime_type),
-                                'width': int_or_none(representation_attrib.get('width')),
-                                'height': int_or_none(representation_attrib.get('height')),
-                                'tbr': float_or_none(bandwidth, 1000),
-                                'asr': int_or_none(representation_attrib.get('audioSamplingRate')),
-                                'fps': int_or_none(representation_attrib.get('frameRate')),
-                                'language': lang if lang not in ('mul', 'und', 'zxx', 'mis') else None,
-                                'format_note': 'DASH %s' % content_type,
-                                'filesize': filesize,
-                                'container': mimetype2ext(mime_type) + '_dash',
-                            }
-                            f.update(parse_codecs(representation_attrib.get('codecs')))
-                        elif content_type == 'text':
-                            f = {
-                                'ext': mimetype2ext(mime_type),
-                                'manifest_url': mpd_url,
-                                'filesize': filesize,
-                            }
-                        elif mime_type == 'image/jpeg':
-                            # See test case in VikiIE
-                            # https://www.viki.com/videos/1175236v-choosing-spouse-by-lottery-episode-1
-                            f = {
-                                'format_id': format_id,
-                                'ext': 'mhtml',
-                                'manifest_url': mpd_url,
-                                'format_note': 'DASH storyboards (jpeg)',
-                                'acodec': 'none',
-                                'vcodec': 'none',
-                            }
-                        representation_ms_info = extract_multisegment_info(representation, adaption_set_ms_info)
-
-                        def prepare_template(template_name, identifiers):
-                            tmpl = representation_ms_info[template_name]
-                            # First of, % characters outside $...$ templates
-                            # must be escaped by doubling for proper processing
-                            # by % operator string formatting used further (see
-                            # https://github.com/ytdl-org/youtube-dl/issues/16867).
-                            t = ''
-                            in_template = False
-                            for c in tmpl:
+                            self.report_warning('Unknown MIME type %s in DASH manifest' % mime_type)
+                            continue
+
+                    base_url = ''
+                    for element in (representation, adaptation_set, period, mpd_doc):
+                        base_url_e = element.find(_add_ns('BaseURL'))
+                        if base_url_e is not None:
+                            base_url = base_url_e.text + base_url
+                            if re.match(r'^https?://', base_url):
+                                break
+                    if mpd_base_url and base_url.startswith('/'):
+                        base_url = compat_urlparse.urljoin(mpd_base_url, base_url)
+                    elif mpd_base_url and not re.match(r'^https?://', base_url):
+                        if not mpd_base_url.endswith('/'):
+                            mpd_base_url += '/'
+                        base_url = mpd_base_url + base_url
+                    representation_id = representation_attrib.get('id')
+                    lang = representation_attrib.get('lang')
+                    url_el = representation.find(_add_ns('BaseURL'))
+                    filesize = int_or_none(url_el.attrib.get('{http://youtube.com/yt/2012/10/10}contentLength') if url_el is not None else None)
+                    bandwidth = int_or_none(representation_attrib.get('bandwidth'))
+                    if representation_id is not None:
+                        format_id = representation_id
+                    else:
+                        format_id = content_type
+                    if mpd_id:
+                        format_id = mpd_id + '-' + format_id
+                    if content_type in ('video', 'audio'):
+                        f = {
+                            'format_id': format_id,
+                            'manifest_url': mpd_url,
+                            'ext': mimetype2ext(mime_type),
+                            'width': int_or_none(representation_attrib.get('width')),
+                            'height': int_or_none(representation_attrib.get('height')),
+                            'tbr': float_or_none(bandwidth, 1000),
+                            'asr': int_or_none(representation_attrib.get('audioSamplingRate')),
+                            'fps': int_or_none(representation_attrib.get('frameRate')),
+                            'language': lang if lang not in ('mul', 'und', 'zxx', 'mis') else None,
+                            'format_note': 'DASH %s' % content_type,
+                            'filesize': filesize,
+                            'container': mimetype2ext(mime_type) + '_dash',
+                            'manifest_stream_number': stream_numbers[content_type]
+                        }
+                        f.update(parse_codecs(codecs))
+                        stream_numbers[content_type] += 1
+                    elif content_type == 'text':
+                        f = {
+                            'ext': mimetype2ext(mime_type),
+                            'manifest_url': mpd_url,
+                            'filesize': filesize,
+                        }
+                    elif content_type == 'image/jpeg':
+                        # See test case in VikiIE
+                        # https://www.viki.com/videos/1175236v-choosing-spouse-by-lottery-episode-1
+                        f = {
+                            'format_id': format_id,
+                            'ext': 'mhtml',
+                            'manifest_url': mpd_url,
+                            'format_note': 'DASH storyboards (jpeg)',
+                            'acodec': 'none',
+                            'vcodec': 'none',
+                        }
+                    if is_drm_protected(adaptation_set) or is_drm_protected(representation):
+                        f['has_drm'] = True
+                    representation_ms_info = extract_multisegment_info(representation, adaption_set_ms_info)
+
+                    def prepare_template(template_name, identifiers):
+                        tmpl = representation_ms_info[template_name]
+                        # First of, % characters outside $...$ templates
+                        # must be escaped by doubling for proper processing
+                        # by % operator string formatting used further (see
+                        # https://github.com/ytdl-org/youtube-dl/issues/16867).
+                        t = ''
+                        in_template = False
+                        for c in tmpl:
+                            t += c
+                            if c == '$':
+                                in_template = not in_template
+                            elif c == '%' and not in_template:
                                  t += c
                                  t += c
-                                if c == '$':
-                                    in_template = not in_template
-                                elif c == '%' and not in_template:
-                                    t += c
-                            # Next, $...$ templates are translated to their
-                            # %(...) counterparts to be used with % operator
-                            if representation_id is not None:
-                                t = t.replace('$RepresentationID$', representation_id)
-                            t = re.sub(r'\$(%s)\$' % '|'.join(identifiers), r'%(\1)d', t)
-                            t = re.sub(r'\$(%s)%%([^$]+)\$' % '|'.join(identifiers), r'%(\1)\2', t)
-                            t.replace('$$', '$')
-                            return t
-
-                        # @initialization is a regular template like @media one
-                        # so it should be handled just the same way (see
-                        # https://github.com/ytdl-org/youtube-dl/issues/11605)
-                        if 'initialization' in representation_ms_info:
-                            initialization_template = prepare_template(
-                                'initialization',
-                                # As per [1, 5.3.9.4.2, Table 15, page 54] $Number$ and
-                                # $Time$ shall not be included for @initialization thus
-                                # only $Bandwidth$ remains
-                                ('Bandwidth', ))
-                            representation_ms_info['initialization_url'] = initialization_template % {
-                                'Bandwidth': bandwidth,
-                            }
+                        # Next, $...$ templates are translated to their
+                        # %(...) counterparts to be used with % operator
+                        if representation_id is not None:
+                            t = t.replace('$RepresentationID$', representation_id)
+                        t = re.sub(r'\$(%s)\$' % '|'.join(identifiers), r'%(\1)d', t)
+                        t = re.sub(r'\$(%s)%%([^$]+)\$' % '|'.join(identifiers), r'%(\1)\2', t)
+                        t.replace('$$', '$')
+                        return t
+
+                    # @initialization is a regular template like @media one
+                    # so it should be handled just the same way (see
+                    # https://github.com/ytdl-org/youtube-dl/issues/11605)
+                    if 'initialization' in representation_ms_info:
+                        initialization_template = prepare_template(
+                            'initialization',
+                            # As per [1, 5.3.9.4.2, Table 15, page 54] $Number$ and
+                            # $Time$ shall not be included for @initialization thus
+                            # only $Bandwidth$ remains
+                            ('Bandwidth', ))
+                        representation_ms_info['initialization_url'] = initialization_template % {
+                            'Bandwidth': bandwidth,
+                        }
  
  
-                        def location_key(location):
-                            return 'url' if re.match(r'^https?://', location) else 'path'
-
-                        if 'segment_urls' not in representation_ms_info and 'media' in representation_ms_info:
-
-                            media_template = prepare_template('media', ('Number', 'Bandwidth', 'Time'))
-                            media_location_key = location_key(media_template)
-
-                            # As per [1, 5.3.9.4.4, Table 16, page 55] $Number$ and $Time$
-                            # can't be used at the same time
-                            if '%(Number' in media_template and 's' not in representation_ms_info:
-                                segment_duration = None
-                                if 'total_number' not in representation_ms_info and 'segment_duration' in representation_ms_info:
-                                    segment_duration = float_or_none(representation_ms_info['segment_duration'], representation_ms_info['timescale'])
-                                    representation_ms_info['total_number'] = int(math.ceil(float(period_duration) / segment_duration))
-                                representation_ms_info['fragments'] = [{
-                                    media_location_key: media_template % {
-                                        'Number': segment_number,
-                                        'Bandwidth': bandwidth,
-                                    },
-                                    'duration': segment_duration,
-                                } for segment_number in range(
-                                    representation_ms_info['start_number'],
-                                    representation_ms_info['total_number'] + representation_ms_info['start_number'])]
-                            else:
-                                # $Number*$ or $Time$ in media template with S list available
-                                # Example $Number*$: http://www.svtplay.se/klipp/9023742/stopptid-om-bjorn-borg
-                                # Example $Time$: https://play.arkena.com/embed/avp/v2/player/media/b41dda37-d8e7-4d3f-b1b5-9a9db578bdfe/1/129411
-                                representation_ms_info['fragments'] = []
-                                segment_time = 0
-                                segment_d = None
-                                segment_number = representation_ms_info['start_number']
-
-                                def add_segment_url():
-                                    segment_url = media_template % {
-                                        'Time': segment_time,
-                                        'Bandwidth': bandwidth,
-                                        'Number': segment_number,
-                                    }
-                                    representation_ms_info['fragments'].append({
-                                        media_location_key: segment_url,
-                                        'duration': float_or_none(segment_d, representation_ms_info['timescale']),
-                                    })
-
-                                for num, s in enumerate(representation_ms_info['s']):
-                                    segment_time = s.get('t') or segment_time
-                                    segment_d = s['d']
+                    def location_key(location):
+                        return 'url' if re.match(r'^https?://', location) else 'path'
+
+                    if 'segment_urls' not in representation_ms_info and 'media' in representation_ms_info:
+
+                        media_template = prepare_template('media', ('Number', 'Bandwidth', 'Time'))
+                        media_location_key = location_key(media_template)
+
+                        # As per [1, 5.3.9.4.4, Table 16, page 55] $Number$ and $Time$
+                        # can't be used at the same time
+                        if '%(Number' in media_template and 's' not in representation_ms_info:
+                            segment_duration = None
+                            if 'total_number' not in representation_ms_info and 'segment_duration' in representation_ms_info:
+                                segment_duration = float_or_none(representation_ms_info['segment_duration'], representation_ms_info['timescale'])
+                                representation_ms_info['total_number'] = int(math.ceil(float(period_duration) / segment_duration))
+                            representation_ms_info['fragments'] = [{
+                                media_location_key: media_template % {
+                                    'Number': segment_number,
+                                    'Bandwidth': bandwidth,
+                                },
+                                'duration': segment_duration,
+                            } for segment_number in range(
+                                representation_ms_info['start_number'],
+                                representation_ms_info['total_number'] + representation_ms_info['start_number'])]
+                        else:
+                            # $Number*$ or $Time$ in media template with S list available
+                            # Example $Number*$: http://www.svtplay.se/klipp/9023742/stopptid-om-bjorn-borg
+                            # Example $Time$: https://play.arkena.com/embed/avp/v2/player/media/b41dda37-d8e7-4d3f-b1b5-9a9db578bdfe/1/129411
+                            representation_ms_info['fragments'] = []
+                            segment_time = 0
+                            segment_d = None
+                            segment_number = representation_ms_info['start_number']
+
+                            def add_segment_url():
+                                segment_url = media_template % {
+                                    'Time': segment_time,
+                                    'Bandwidth': bandwidth,
+                                    'Number': segment_number,
+                                }
+                                representation_ms_info['fragments'].append({
+                                    media_location_key: segment_url,
+                                    'duration': float_or_none(segment_d, representation_ms_info['timescale']),
+                                })
+
+                            for num, s in enumerate(representation_ms_info['s']):
+                                segment_time = s.get('t') or segment_time
+                                segment_d = s['d']
+                                add_segment_url()
+                                segment_number += 1
+                                for r in range(s.get('r', 0)):
+                                    segment_time += segment_d
                                      add_segment_url()
                                      segment_number += 1
                                      add_segment_url()
                                      segment_number += 1
-                                    for r in range(s.get('r', 0)):
-                                        segment_time += segment_d
-                                        add_segment_url()
-                                        segment_number += 1
-                                    segment_time += segment_d
-                        elif 'segment_urls' in representation_ms_info and 's' in representation_ms_info:
-                            # No media template
-                            # Example: https://www.youtube.com/watch?v=iXZV5uAYMJI
-                            # or any YouTube dashsegments video
-                            fragments = []
-                            segment_index = 0
-                            timescale = representation_ms_info['timescale']
-                            for s in representation_ms_info['s']:
-                                duration = float_or_none(s['d'], timescale)
-                                for r in range(s.get('r', 0) + 1):
-                                    segment_uri = representation_ms_info['segment_urls'][segment_index]
-                                    fragments.append({
-                                        location_key(segment_uri): segment_uri,
-                                        'duration': duration,
-                                    })
-                                    segment_index += 1
-                            representation_ms_info['fragments'] = fragments
-                        elif 'segment_urls' in representation_ms_info:
-                            # Segment URLs with no SegmentTimeline
-                            # Example: https://www.seznam.cz/zpravy/clanek/cesko-zasahne-vitr-o-sile-vichrice-muze-byt-i-zivotu-nebezpecny-39091
-                            # https://github.com/ytdl-org/youtube-dl/pull/14844
-                            fragments = []
-                            segment_duration = float_or_none(
-                                representation_ms_info['segment_duration'],
-                                representation_ms_info['timescale']) if 'segment_duration' in representation_ms_info else None
-                            for segment_url in representation_ms_info['segment_urls']:
-                                fragment = {
-                                    location_key(segment_url): segment_url,
-                                }
-                                if segment_duration:
-                                    fragment['duration'] = segment_duration
-                                fragments.append(fragment)
-                            representation_ms_info['fragments'] = fragments
-                        # If there is a fragments key available then we correctly recognized fragmented media.
-                        # Otherwise we will assume unfragmented media with direct access. Technically, such
-                        # assumption is not necessarily correct since we may simply have no support for
-                        # some forms of fragmented media renditions yet, but for now we'll use this fallback.
-                        if 'fragments' in representation_ms_info:
-                            f.update({
-                                # NB: mpd_url may be empty when MPD manifest is parsed from a string
-                                'url': mpd_url or base_url,
-                                'fragment_base_url': base_url,
-                                'fragments': [],
-                                'protocol': 'http_dash_segments' if mime_type != 'image/jpeg' else 'mhtml',
-                            })
-                            if 'initialization_url' in representation_ms_info:
-                                initialization_url = representation_ms_info['initialization_url']
-                                if not f.get('url'):
-                                    f['url'] = initialization_url
-                                f['fragments'].append({location_key(initialization_url): initialization_url})
-                            f['fragments'].extend(representation_ms_info['fragments'])
-                        else:
-                            # Assuming direct URL to unfragmented media.
-                            f['url'] = base_url
-                        if content_type in ('video', 'audio') or mime_type == 'image/jpeg':
-                            formats.append(f)
-                        elif content_type == 'text':
-                            subtitles.setdefault(lang or 'und', []).append(f)
+                                segment_time += segment_d
+                    elif 'segment_urls' in representation_ms_info and 's' in representation_ms_info:
+                        # No media template
+                        # Example: https://www.youtube.com/watch?v=iXZV5uAYMJI
+                        # or any YouTube dashsegments video
+                        fragments = []
+                        segment_index = 0
+                        timescale = representation_ms_info['timescale']
+                        for s in representation_ms_info['s']:
+                            duration = float_or_none(s['d'], timescale)
+                            for r in range(s.get('r', 0) + 1):
+                                segment_uri = representation_ms_info['segment_urls'][segment_index]
+                                fragments.append({
+                                    location_key(segment_uri): segment_uri,
+                                    'duration': duration,
+                                })
+                                segment_index += 1
+                        representation_ms_info['fragments'] = fragments
+                    elif 'segment_urls' in representation_ms_info:
+                        # Segment URLs with no SegmentTimeline
+                        # Example: https://www.seznam.cz/zpravy/clanek/cesko-zasahne-vitr-o-sile-vichrice-muze-byt-i-zivotu-nebezpecny-39091
+                        # https://github.com/ytdl-org/youtube-dl/pull/14844
+                        fragments = []
+                        segment_duration = float_or_none(
+                            representation_ms_info['segment_duration'],
+                            representation_ms_info['timescale']) if 'segment_duration' in representation_ms_info else None
+                        for segment_url in representation_ms_info['segment_urls']:
+                            fragment = {
+                                location_key(segment_url): segment_url,
+                            }
+                            if segment_duration:
+                                fragment['duration'] = segment_duration
+                            fragments.append(fragment)
+                        representation_ms_info['fragments'] = fragments
+                    # If there is a fragments key available then we correctly recognized fragmented media.
+                    # Otherwise we will assume unfragmented media with direct access. Technically, such
+                    # assumption is not necessarily correct since we may simply have no support for
+                    # some forms of fragmented media renditions yet, but for now we'll use this fallback.
+                    if 'fragments' in representation_ms_info:
+                        f.update({
+                            # NB: mpd_url may be empty when MPD manifest is parsed from a string
+                            'url': mpd_url or base_url,
+                            'fragment_base_url': base_url,
+                            'fragments': [],
+                            'protocol': 'http_dash_segments' if mime_type != 'image/jpeg' else 'mhtml',
+                        })
+                        if 'initialization_url' in representation_ms_info:
+                            initialization_url = representation_ms_info['initialization_url']
+                            if not f.get('url'):
+                                f['url'] = initialization_url
+                            f['fragments'].append({location_key(initialization_url): initialization_url})
+                        f['fragments'].extend(representation_ms_info['fragments'])
                      else:
                      else:
-                        self.report_warning('Unknown MIME type %s in DASH manifest' % mime_type)
+                        # Assuming direct URL to unfragmented media.
+                        f['url'] = base_url
+                    if content_type in ('video', 'audio') or mime_type == 'image/jpeg':
+                        formats.append(f)
+                    elif content_type == 'text':
+                        subtitles.setdefault(lang or 'und', []).append(f)
+
          return formats, subtitles
  
      def _extract_ism_formats(self, *args, **kwargs):
          fmts, subs = self._extract_ism_formats_and_subtitles(*args, **kwargs)
          if subs:
          return formats, subtitles
  
      def _extract_ism_formats(self, *args, **kwargs):
          fmts, subs = self._extract_ism_formats_and_subtitles(*args, **kwargs)
          if subs:
-            self.report_warning(bug_reports_message(
-                "Ignoring subtitle tracks found in the ISM manifest; "
-                "if any subtitle tracks are missing,"
-            ))
+            self._report_ignoring_subs('ISM')
          return fmts
  
      def _extract_ism_formats_and_subtitles(self, ism_url, video_id, ism_id=None, note=None, errnote=None, fatal=True, data=None, headers={}, query={}):
          return fmts
  
      def _extract_ism_formats_and_subtitles(self, ism_url, video_id, ism_id=None, note=None, errnote=None, fatal=True, data=None, headers={}, query={}):
@@ -2882,9 +2896,6 @@ def _parse_ism_formats_and_subtitles(self, ism_doc, ism_url, ism_id=None):
          """
          if ism_doc.get('IsLive') == 'TRUE':
              return [], {}
          """
          if ism_doc.get('IsLive') == 'TRUE':
              return [], {}
-        if (not self.get_param('allow_unplayable_formats')
-                and ism_doc.find('Protection') is not None):
-            return [], {}
  
          duration = int(ism_doc.attrib['Duration'])
          timescale = int_or_none(ism_doc.get('TimeScale')) or 10000000
  
          duration = int(ism_doc.attrib['Duration'])
          timescale = int_or_none(ism_doc.get('TimeScale')) or 10000000
@@ -2975,6 +2986,7 @@ def _parse_ism_formats_and_subtitles(self, ism_doc, ism_url, ism_id=None):
                          'acodec': 'none' if stream_type == 'video' else fourcc,
                          'protocol': 'ism',
                          'fragments': fragments,
                          'acodec': 'none' if stream_type == 'video' else fourcc,
                          'protocol': 'ism',
                          'fragments': fragments,
+                        'has_drm': ism_doc.find('Protection') is not None,
                          '_download_params': {
                              'stream_type': stream_type,
                              'duration': duration,
                          '_download_params': {
                              'stream_type': stream_type,
                              'duration': duration,
@@ -3118,10 +3130,7 @@ def _media_formats(src, cur_media_type, type_info={}):
      def _extract_akamai_formats(self, *args, **kwargs):
          fmts, subs = self._extract_akamai_formats_and_subtitles(*args, **kwargs)
          if subs:
      def _extract_akamai_formats(self, *args, **kwargs):
          fmts, subs = self._extract_akamai_formats_and_subtitles(*args, **kwargs)
          if subs:
-            self.report_warning(bug_reports_message(
-                "Ignoring subtitle tracks found in the manifests; "
-                "if any subtitle tracks are missing,"
-            ))
+            self._report_ignoring_subs('akamai')
          return fmts
  
      def _extract_akamai_formats_and_subtitles(self, manifest_url, video_id, hosts={}):
          return fmts
  
      def _extract_akamai_formats_and_subtitles(self, manifest_url, video_id, hosts={}):
@@ -3495,6 +3504,32 @@ def extract_subtitles(self, *args, **kwargs):
      def _get_subtitles(self, *args, **kwargs):
          raise NotImplementedError('This method must be implemented by subclasses')
  
      def _get_subtitles(self, *args, **kwargs):
          raise NotImplementedError('This method must be implemented by subclasses')
  
+    def extract_comments(self, *args, **kwargs):
+        if not self.get_param('getcomments'):
+            return None
+        generator = self._get_comments(*args, **kwargs)
+
+        def extractor():
+            comments = []
+            try:
+                while True:
+                    comments.append(next(generator))
+            except KeyboardInterrupt:
+                interrupted = True
+                self.to_screen('Interrupted by user')
+            except StopIteration:
+                interrupted = False
+            comment_count = len(comments)
+            self.to_screen(f'Extracted {comment_count} comments')
+            return {
+                'comments': comments,
+                'comment_count': None if interrupted else comment_count
+            }
+        return extractor
+
+    def _get_comments(self, *args, **kwargs):
+        raise NotImplementedError('This method must be implemented by subclasses')
+
      @staticmethod
      def _merge_subtitle_items(subtitle_list1, subtitle_list2):
          """ Merge subtitle items for one language. Items with duplicated URLs
      @staticmethod
      def _merge_subtitle_items(subtitle_list1, subtitle_list2):
          """ Merge subtitle items for one language. Items with duplicated URLs
@@ -3505,16 +3540,8 @@ def _merge_subtitle_items(subtitle_list1, subtitle_list2):
          return ret
  
      @classmethod
          return ret
  
      @classmethod
-    def _merge_subtitles(cls, *dicts, **kwargs):
+    def _merge_subtitles(cls, *dicts, target=None):
          """ Merge subtitle dictionaries, language by language. """
          """ Merge subtitle dictionaries, language by language. """
-
-        target = (lambda target=None: target)(**kwargs)
-        # The above lambda extracts the keyword argument 'target' from kwargs
-        # while ensuring there are no stray ones. When Python 2 support
-        # is dropped, remove it and change the function signature to:
-        #
-        #     def _merge_subtitles(cls, *dicts, target=None):
-
          if target is None:
              target = {}
          for d in dicts:
          if target is None:
              target = {}
          for d in dicts:
@@ -3532,9 +3559,11 @@ def _get_automatic_captions(self, *args, **kwargs):
          raise NotImplementedError('This method must be implemented by subclasses')
  
      def mark_watched(self, *args, **kwargs):
          raise NotImplementedError('This method must be implemented by subclasses')
  
      def mark_watched(self, *args, **kwargs):
-        if (self.get_param('mark_watched', False)
-                and (self._get_login_info()[0] is not None
-                     or self.get_param('cookiefile') is not None)):
+        if not self.get_param('mark_watched', False):
+            return
+        if (self._get_login_info()[0] is not None
+                or self.get_param('cookiefile')
+                or self.get_param('cookiesfrombrowser')):
              self._mark_watched(*args, **kwargs)
  
      def _mark_watched(self, *args, **kwargs):
              self._mark_watched(*args, **kwargs)
  
      def _mark_watched(self, *args, **kwargs):
@@ -3567,6 +3596,19 @@ def _availability(is_private=None, needs_premium=None, needs_subscription=None,
              else 'public' if all_known
              else None)
  
              else 'public' if all_known
              else None)
  
+    def _configuration_arg(self, key, default=NO_DEFAULT, casesense=False):
+        '''
+        @returns            A list of values for the extractor argument given by "key"
+                            or "default" if no such key is present
+        @param default      The default value to return when the key is not present (default: [])
+        @param casesense    When false, the values are converted to lower case
+        '''
+        val = traverse_obj(
+            self._downloader.params, ('extractor_args', self.ie_key().lower(), key))
+        if val is None:
+            return [] if default is NO_DEFAULT else default
+        return list(val) if casesense else [x.lower() for x in val]
+
  
  class SearchInfoExtractor(InfoExtractor):
      """
  
  class SearchInfoExtractor(InfoExtractor):
      """
@@ -3604,7 +3646,14 @@ def _real_extract(self, query):
              return self._get_n_results(query, n)
  
      def _get_n_results(self, query, n):
              return self._get_n_results(query, n)
  
      def _get_n_results(self, query, n):
-        """Get a specified number of results for a query"""
+        """Get a specified number of results for a query.
+        Either this function or _search_results must be overridden by subclasses """
+        return self.playlist_result(
+            itertools.islice(self._search_results(query), 0, None if n == float('inf') else n),
+            query, query)
+
+    def _search_results(self, query):
+        """Returns an iterator of search results"""
          raise NotImplementedError('This method must be implemented by subclasses')
  
      @property
          raise NotImplementedError('This method must be implemented by subclasses')
  
      @property