Add `--extractor-args` to pass extractor-specific arguments

[yt-dlp.git] / yt_dlp / extractor / common.py
diff --git a/yt_dlp/extractor/common.py b/yt_dlp/extractor/common.py

index 99695e14f474404c4882b962a916e234aa0f2d96..bb9d8fba5ba9d87e19c9166820d08bf7e800c4a7 100644 (file)
--- a/yt_dlp/extractor/common.py
+++ b/yt_dlp/extractor/common.py
@@ -70,6 +70,7 @@
      str_or_none,
      str_to_int,
      strip_or_none,
+    traverse_obj,
      unescapeHTML,
      unified_strdate,
      unified_timestamp,
@@ -203,6 +204,9 @@ class InfoExtractor(object):
                                   (HTTP or RTMP) download. Boolean.
                      * downloader_options  A dictionary of downloader options as
                                   described in FileDownloader
+                    RTMP formats can also have the additional fields: page_url,
+                    app, play_path, tc_url, flash_version, rtmp_live, rtmp_conn,
+                    rtmp_protocol, rtmp_real_time
  
      url:            Final video URL.
      ext:            Video filename extension.
@@ -250,6 +254,8 @@ class InfoExtractor(object):
                      entry and one of:
                          * "data": The subtitles file contents
                          * "url": A URL pointing to the subtitles file
+                    It can optionally also have:
+                        * "name": Name or description of the subtitles
                      "ext" will be calculated from URL if missing
      automatic_captions: Like 'subtitles'; contains automatically generated
                      captions instead of normal subtitles
@@ -285,6 +291,7 @@ class InfoExtractor(object):
      categories:     A list of categories that the video falls in, for example
                      ["Sports", "Berlin"]
      tags:           A list of tags assigned to the video, e.g. ["sweden", "pop music"]
+    cast:           A list of the video cast
      is_live:        True, False, or None (=unknown). Whether this video is a
                      live stream that goes on instead of a fixed-length video.
      was_live:       True, False, or None (=unknown). Whether this video was
@@ -420,6 +427,14 @@ class InfoExtractor(object):
      _GEO_IP_BLOCKS = None
      _WORKING = True
  
+    _LOGIN_HINTS = {
+        'any': 'Use --cookies, --username and --password or --netrc to provide account credentials',
+        'cookies': (
+            'Use --cookies for the authentication. '
+            'See  https://github.com/ytdl-org/youtube-dl#how-do-i-pass-cookies-to-youtube-dl  for how to pass cookies'),
+        'password': 'Use --username and --password or --netrc to provide account credentials',
+    }
+
      def __init__(self, downloader=None):
          """Constructor. Receives an optional downloader."""
          self._ready = False
@@ -489,7 +504,7 @@ def _initialize_geo_bypass(self, geo_bypass_context):
          if not self._x_forwarded_for_ip:
  
              # Geo bypass mechanism is explicitly disabled by user
-            if not self._downloader.params.get('geo_bypass', True):
+            if not self.get_param('geo_bypass', True):
                  return
  
              if not geo_bypass_context:
@@ -511,7 +526,7 @@ def _initialize_geo_bypass(self, geo_bypass_context):
  
              # Explicit IP block specified by user, use it right away
              # regardless of whether extractor is geo bypassable or not
-            ip_block = self._downloader.params.get('geo_bypass_ip_block', None)
+            ip_block = self.get_param('geo_bypass_ip_block', None)
  
              # Otherwise use random IP block from geo bypass context but only
              # if extractor is known as geo bypassable
@@ -522,17 +537,15 @@ def _initialize_geo_bypass(self, geo_bypass_context):
  
              if ip_block:
                  self._x_forwarded_for_ip = GeoUtils.random_ipv4(ip_block)
-                if self._downloader.params.get('verbose', False):
-                    self._downloader.to_screen(
-                        '[debug] Using fake IP %s as X-Forwarded-For.'
-                        % self._x_forwarded_for_ip)
+                self._downloader.write_debug(
+                    '[debug] Using fake IP %s as X-Forwarded-For' % self._x_forwarded_for_ip)
                  return
  
              # Path 2: bypassing based on country code
  
              # Explicit country code specified by user, use it right away
              # regardless of whether extractor is geo bypassable or not
-            country = self._downloader.params.get('geo_bypass_country', None)
+            country = self.get_param('geo_bypass_country', None)
  
              # Otherwise use random country code from geo bypass context but
              # only if extractor is known as geo bypassable
@@ -543,10 +556,8 @@ def _initialize_geo_bypass(self, geo_bypass_context):
  
              if country:
                  self._x_forwarded_for_ip = GeoUtils.random_ipv4(country)
-                if self._downloader.params.get('verbose', False):
-                    self._downloader.to_screen(
-                        '[debug] Using fake IP %s (%s) as X-Forwarded-For.'
-                        % (self._x_forwarded_for_ip, country.upper()))
+                self._downloader.write_debug(
+                    'Using fake IP %s (%s) as X-Forwarded-For' % (self._x_forwarded_for_ip, country.upper()))
  
      def extract(self, url):
          """Extracts URL information and returns it in list of dicts."""
@@ -554,12 +565,15 @@ def extract(self, url):
              for _ in range(2):
                  try:
                      self.initialize()
+                    self.write_debug('Extracting URL: %s' % url)
                      ie_result = self._real_extract(url)
+                    if ie_result is None:
+                        return None
                      if self._x_forwarded_for_ip:
                          ie_result['__x_forwarded_for_ip'] = self._x_forwarded_for_ip
                      subtitles = ie_result.get('subtitles')
                      if (subtitles and 'live_chat' in subtitles
-                            and 'no-live-chat' in self._downloader.params.get('compat_opts')):
+                            and 'no-live-chat' in self.get_param('compat_opts', [])):
                          del subtitles['live_chat']
                      return ie_result
                  except GeoRestrictedError as e:
@@ -574,9 +588,9 @@ def extract(self, url):
              raise ExtractorError('An extractor error has occurred.', cause=e)
  
      def __maybe_fake_ip_and_retry(self, countries):
-        if (not self._downloader.params.get('geo_bypass_country', None)
+        if (not self.get_param('geo_bypass_country', None)
                  and self._GEO_BYPASS
-                and self._downloader.params.get('geo_bypass', True)
+                and self.get_param('geo_bypass', True)
                  and not self._x_forwarded_for_ip
                  and countries):
              country_code = random.choice(countries)
@@ -630,7 +644,7 @@ def _request_webpage(self, url_or_request, video_id, note=None, errnote=None, fa
          See _download_webpage docstring for arguments specification.
          """
          if not self._downloader._first_webpage_request:
-            sleep_interval = float_or_none(self._downloader.params.get('sleep_interval_requests')) or 0
+            sleep_interval = float_or_none(self.get_param('sleep_interval_requests')) or 0
              if sleep_interval > 0:
                  self.to_screen('Sleeping %s seconds ...' % sleep_interval)
                  time.sleep(sleep_interval)
@@ -755,11 +769,11 @@ def _webpage_read_content(self, urlh, url_or_request, video_id, note=None, errno
              webpage_bytes = prefix + webpage_bytes
          if not encoding:
              encoding = self._guess_encoding_from_content(content_type, webpage_bytes)
-        if self._downloader.params.get('dump_intermediate_pages', False):
+        if self.get_param('dump_intermediate_pages', False):
              self.to_screen('Dumping request to ' + urlh.geturl())
              dump = base64.b64encode(webpage_bytes).decode('ascii')
              self._downloader.to_screen(dump)
-        if self._downloader.params.get('write_pages', False):
+        if self.get_param('write_pages', False):
              basen = '%s_%s' % (video_id, urlh.geturl())
              if len(basen) > 240:
                  h = '___' + hashlib.md5(basen.encode('utf-8')).hexdigest()
@@ -943,14 +957,65 @@ def _parse_json(self, json_string, video_id, transform_source=None, fatal=True):
              else:
                  self.report_warning(errmsg + str(ve))
  
-    def report_warning(self, msg, video_id=None):
+    def _parse_socket_response_as_json(self, data, video_id, transform_source=None, fatal=True):
+        return self._parse_json(
+            data[data.find('{'):data.rfind('}') + 1],
+            video_id, transform_source, fatal)
+
+    def _download_socket_json_handle(
+            self, url_or_request, video_id, note='Polling socket',
+            errnote='Unable to poll socket', transform_source=None,
+            fatal=True, encoding=None, data=None, headers={}, query={},
+            expected_status=None):
+        """
+        Return a tuple (JSON object, URL handle).
+
+        See _download_webpage docstring for arguments specification.
+        """
+        res = self._download_webpage_handle(
+            url_or_request, video_id, note, errnote, fatal=fatal,
+            encoding=encoding, data=data, headers=headers, query=query,
+            expected_status=expected_status)
+        if res is False:
+            return res
+        webpage, urlh = res
+        return self._parse_socket_response_as_json(
+            webpage, video_id, transform_source=transform_source,
+            fatal=fatal), urlh
+
+    def _download_socket_json(
+            self, url_or_request, video_id, note='Polling socket',
+            errnote='Unable to poll socket', transform_source=None,
+            fatal=True, encoding=None, data=None, headers={}, query={},
+            expected_status=None):
+        """
+        Return the JSON object as a dict.
+
+        See _download_webpage docstring for arguments specification.
+        """
+        res = self._download_socket_json_handle(
+            url_or_request, video_id, note=note, errnote=errnote,
+            transform_source=transform_source, fatal=fatal, encoding=encoding,
+            data=data, headers=headers, query=query,
+            expected_status=expected_status)
+        return res if res is False else res[0]
+
+    def report_warning(self, msg, video_id=None, *args, **kwargs):
          idstr = '' if video_id is None else '%s: ' % video_id
          self._downloader.report_warning(
-            '[%s] %s%s' % (self.IE_NAME, idstr, msg))
+            '[%s] %s%s' % (self.IE_NAME, idstr, msg), *args, **kwargs)
  
-    def to_screen(self, msg):
+    def to_screen(self, msg, *args, **kwargs):
          """Print msg to screen, prefixing it with '[ie_name]'"""
-        self._downloader.to_screen('[%s] %s' % (self.IE_NAME, msg))
+        self._downloader.to_screen('[%s] %s' % (self.IE_NAME, msg), *args, **kwargs)
+
+    def write_debug(self, msg, *args, **kwargs):
+        self._downloader.write_debug('[%s] %s' % (self.IE_NAME, msg), *args, **kwargs)
+
+    def get_param(self, name, default=None, *args, **kwargs):
+        if self._downloader:
+            return self._downloader.params.get(name, default, *args, **kwargs)
+        return default
  
      def report_extraction(self, id_or_name):
          """Report information extraction."""
@@ -969,23 +1034,22 @@ def report_login(self):
          self.to_screen('Logging in')
  
      def raise_login_required(
-            self, msg='This video is only available for registered users', metadata_available=False):
-        if metadata_available and self._downloader.params.get('ignore_no_formats_error'):
+            self, msg='This video is only available for registered users',
+            metadata_available=False, method='any'):
+        if metadata_available and self.get_param('ignore_no_formats_error'):
              self.report_warning(msg)
-        raise ExtractorError(
-            '%s. Use --cookies, --username and --password or --netrc to provide account credentials' % msg,
-            expected=True)
+        raise ExtractorError('%s. %s' % (msg, self._LOGIN_HINTS[method]), expected=True)
  
      def raise_geo_restricted(
              self, msg='This video is not available from your location due to geo restriction',
              countries=None, metadata_available=False):
-        if metadata_available and self._downloader.params.get('ignore_no_formats_error'):
+        if metadata_available and self.get_param('ignore_no_formats_error'):
              self.report_warning(msg)
          else:
              raise GeoRestrictedError(msg, countries=countries)
  
      def raise_no_formats(self, msg, expected=False, video_id=None):
-        if expected and self._downloader.params.get('ignore_no_formats_error'):
+        if expected and self.get_param('ignore_no_formats_error'):
              self.report_warning(msg, video_id)
          else:
              raise ExtractorError(msg, expected=expected, video_id=video_id)
@@ -1040,7 +1104,7 @@ def _search_regex(self, pattern, string, name, default=NO_DEFAULT, fatal=True, f
                  if mobj:
                      break
  
-        if not self._downloader.params.get('no_color') and compat_os_name != 'nt' and sys.stderr.isatty():
+        if not self.get_param('no_color') and compat_os_name != 'nt' and sys.stderr.isatty():
              _name = '\033[0;34m%s\033[0m' % name
          else:
              _name = name
@@ -1074,7 +1138,7 @@ def _get_netrc_login_info(self, netrc_machine=None):
          password = None
          netrc_machine = netrc_machine or self._NETRC_MACHINE
  
-        if self._downloader.params.get('usenetrc', False):
+        if self.get_param('usenetrc', False):
              try:
                  info = netrc.netrc().authenticators(netrc_machine)
                  if info is not None:
@@ -1098,15 +1162,11 @@ def _get_login_info(self, username_option='username', password_option='password'
          value.
          If there's no info available, return (None, None)
          """
-        if self._downloader is None:
-            return (None, None)
-
-        downloader_params = self._downloader.params
  
          # Attempt to use provided username and password or .netrc data
-        if downloader_params.get(username_option) is not None:
-            username = downloader_params[username_option]
-            password = downloader_params[password_option]
+        username = self.get_param(username_option)
+        if username is not None:
+            password = self.get_param(password_option)
          else:
              username, password = self._get_netrc_login_info(netrc_machine)
  
@@ -1119,12 +1179,10 @@ def _get_tfa_info(self, note='two-factor verification code'):
          currently just uses the command line option
          If there's no info available, return None
          """
-        if self._downloader is None:
-            return None
-        downloader_params = self._downloader.params
  
-        if downloader_params.get('twofactor') is not None:
-            return downloader_params['twofactor']
+        tfa = self.get_param('twofactor')
+        if tfa is not None:
+            return tfa
  
          return compat_getpass('Type %s and press [Return]: ' % note)
  
@@ -1417,7 +1475,7 @@ def _form_hidden_inputs(self, form_id, html):
      class FormatSort:
          regex = r' *((?P<reverse>\+)?(?P<field>[a-zA-Z0-9_]+)((?P<separator>[~:])(?P<limit>.*?))?)? *$'
  
-        default = ('hidden', 'hasvid', 'ie_pref', 'lang', 'quality',
+        default = ('hidden', 'aud_or_vid', 'hasvid', 'ie_pref', 'lang', 'quality',
                     'res', 'fps', 'codec:vp9.2', 'size', 'br', 'asr',
                     'proto', 'ext', 'hasaud', 'source', 'format_id')  # These must not be aliases
          ytdl_default = ('hasaud', 'quality', 'tbr', 'filesize', 'vbr',
@@ -1430,7 +1488,7 @@ class FormatSort:
              'acodec': {'type': 'ordered', 'regex': True,
                         'order': ['opus', 'vorbis', 'aac', 'mp?4a?', 'mp3', 'e?a?c-?3', 'dts', '', None, 'none']},
              'proto': {'type': 'ordered', 'regex': True, 'field': 'protocol',
-                      'order': ['(ht|f)tps', '(ht|f)tp$', 'm3u8.+', 'm3u8', '.*dash', '', 'mms|rtsp', 'none', 'f4']},
+                      'order': ['(ht|f)tps', '(ht|f)tp$', 'm3u8.+', '.*dash', 'ws|websocket', '', 'mms|rtsp', 'none', 'f4']},
              'vext': {'type': 'ordered', 'field': 'video_ext',
                       'order': ('mp4', 'webm', 'flv', '', 'none'),
                       'order_free': ('webm', 'mp4', 'flv', '', 'none')},
@@ -1438,6 +1496,9 @@ class FormatSort:
                       'order': ('m4a', 'aac', 'mp3', 'ogg', 'opus', 'webm', '', 'none'),
                       'order_free': ('opus', 'ogg', 'webm', 'm4a', 'mp3', 'aac', '', 'none')},
              'hidden': {'visible': False, 'forced': True, 'type': 'extractor', 'max': -1000},
+            'aud_or_vid': {'visible': False, 'forced': True, 'type': 'multiple', 'default': 1,
+                           'field': ('vcodec', 'acodec'),
+                           'function': lambda it: int(any(v != 'none' for v in it))},
              'ie_pref': {'priority': True, 'type': 'extractor'},
              'hasvid': {'priority': True, 'field': 'vcodec', 'type': 'boolean', 'not_in_list': ('none',)},
              'hasaud': {'field': 'acodec', 'type': 'boolean', 'not_in_list': ('none',)},
@@ -1594,12 +1655,12 @@ def add_item(field, reverse, closest, limit_text):
                               else limits[0] if has_limit and not has_multiple_limits
                               else None)
  
-        def print_verbose_info(self, to_screen):
+        def print_verbose_info(self, write_debug):
              if self._sort_user:
-                to_screen('[debug] Sort order given by user: %s' % ', '.join(self._sort_user))
+                write_debug('Sort order given by user: %s' % ', '.join(self._sort_user))
              if self._sort_extractor:
-                to_screen('[debug] Sort order given by extractor: %s' % ', '.join(self._sort_extractor))
-            to_screen('[debug] Formats sorted by: %s' % ', '.join(['%s%s%s' % (
+                write_debug('Sort order given by extractor: %s' % ', '.join(self._sort_extractor))
+            write_debug('Formats sorted by: %s' % ', '.join(['%s%s%s' % (
                  '+' if self._get_field_setting(field, 'reverse') else '', field,
                  '%s%s(%s)' % ('~' if self._get_field_setting(field, 'closest') else ':',
                                self._get_field_setting(field, 'limit_text'),
@@ -1645,9 +1706,7 @@ def _calculate_field_preference(self, format, field):
  
                  def wrapped_function(values):
                      values = tuple(filter(lambda x: x is not None, values))
-                    return (self._get_field_setting(field, 'function')(*values) if len(values) > 1
-                            else values[0] if values
-                            else None)
+                    return self._get_field_setting(field, 'function')(values) if values else None
  
                  value = wrapped_function((get_value(f) for f in actual_fields))
              else:
@@ -1663,7 +1722,7 @@ def calculate_preference(self, format):
              if not format.get('ext') and 'url' in format:
                  format['ext'] = determine_ext(format['url'])
              if format.get('vcodec') == 'none':
-                format['audio_ext'] = format['ext']
+                format['audio_ext'] = format['ext'] if format.get('acodec') != 'none' else 'none'
                  format['video_ext'] = 'none'
              else:
                  format['video_ext'] = format['ext']
@@ -1685,13 +1744,13 @@ def calculate_preference(self, format):
  
      def _sort_formats(self, formats, field_preference=[]):
          if not formats:
-            if self._downloader.params.get('ignore_no_formats_error'):
+            if self.get_param('ignore_no_formats_error'):
                  return
              raise ExtractorError('No video formats found')
          format_sort = self.FormatSort()  # params and to_screen are taken from the downloader
          format_sort.evaluate_params(self._downloader.params, field_preference)
-        if self._downloader.params.get('verbose', False):
-            format_sort.print_verbose_info(self._downloader.to_screen)
+        if self.get_param('verbose', False):
+            format_sort.print_verbose_info(self._downloader.write_debug)
          formats.sort(key=lambda f: format_sort.calculate_preference(f))
  
      def _check_formats(self, formats, video_id):
@@ -1730,7 +1789,7 @@ def http_scheme(self):
          """ Either "http:" or "https:", depending on the user's preferences """
          return (
              'http:'
-            if self._downloader.params.get('prefer_insecure', False)
+            if self.get_param('prefer_insecure', False)
              else 'https:')
  
      def _proto_relative_url(self, url, scheme=None):
@@ -1892,15 +1951,15 @@ def _extract_m3u8_formats(self, *args, **kwargs):
          return fmts
  
      def _extract_m3u8_formats_and_subtitles(
-            self, m3u8_url, video_id, ext=None, entry_protocol='m3u8',
+            self, m3u8_url, video_id, ext=None, entry_protocol='m3u8_native',
              preference=None, quality=None, m3u8_id=None, note=None,
              errnote=None, fatal=True, live=False, data=None, headers={},
              query={}):
  
          res = self._download_webpage_handle(
              m3u8_url, video_id,
-            note=note or 'Downloading m3u8 information',
-            errnote=errnote or 'Failed to download m3u8 information',
+            note='Downloading m3u8 information' if note is None else note,
+            errnote='Failed to download m3u8 information' if errnote is None else errnote,
              fatal=fatal, data=data, headers=headers, query=query)
  
          if res is False:
@@ -1916,7 +1975,7 @@ def _extract_m3u8_formats_and_subtitles(
              headers=headers, query=query, video_id=video_id)
  
      def _parse_m3u8_formats_and_subtitles(
-            self, m3u8_doc, m3u8_url, ext=None, entry_protocol='m3u8',
+            self, m3u8_doc, m3u8_url, ext=None, entry_protocol='m3u8_native',
              preference=None, quality=None, m3u8_id=None, live=False, note=None,
              errnote=None, fatal=True, data=None, headers={}, query={},
              video_id=None):
@@ -1924,7 +1983,7 @@ def _parse_m3u8_formats_and_subtitles(
          if '#EXT-X-FAXS-CM:' in m3u8_doc:  # Adobe Flash Access
              return [], {}
  
-        if (not self._downloader.params.get('allow_unplayable_formats')
+        if (not self.get_param('allow_unplayable_formats')
                  and re.search(r'#EXT-X-SESSION-KEY:.*?URI="skd://', m3u8_doc)):  # Apple FairPlay
              return [], {}
  
@@ -1937,7 +1996,7 @@ def _parse_m3u8_formats_and_subtitles(
              if re.match(r'^https?://', u)
              else compat_urlparse.urljoin(m3u8_url, u))
  
-        split_discontinuity = self._downloader.params.get('hls_split_discontinuity', False)
+        split_discontinuity = self.get_param('hls_split_discontinuity', False)
  
          # References:
          # 1. https://tools.ietf.org/html/draft-pantos-http-live-streaming-21
@@ -2032,7 +2091,12 @@ def extract_media(x_media_line):
              groups.setdefault(group_id, []).append(media)
              # <https://tools.ietf.org/html/rfc8216#section-4.3.4.1>
              if media_type == 'SUBTITLES':
-                lang = media['LANGUAGE']  # XXX: normalise?
+                # According to RFC 8216 §4.3.4.2.1, URI is REQUIRED in the
+                # EXT-X-MEDIA tag if the media type is SUBTITLES.
+                # However, lack of URI has been spotted in the wild.
+                # e.g. NebulaIE; see https://github.com/yt-dlp/yt-dlp/issues/339
+                if not media.get('URI'):
+                    return
                  url = format_url(media['URI'])
                  sub_info = {
                      'url': url,
@@ -2044,6 +2108,7 @@ def extract_media(x_media_line):
                      # <https://tools.ietf.org/html/rfc8216#section-3.1>
                      sub_info['ext'] = 'vtt'
                      sub_info['protocol'] = 'm3u8_native'
+                lang = media.get('LANGUAGE') or 'und'
                  subtitles.setdefault(lang, []).append(sub_info)
              if media_type not in ('VIDEO', 'AUDIO'):
                  return
@@ -2063,6 +2128,7 @@ def extract_media(x_media_line):
                          format_id.append(str(format_index))
                      f = {
                          'format_id': '-'.join(format_id),
+                        'format_note': name,
                          'format_index': format_index,
                          'url': manifest_url,
                          'manifest_url': m3u8_url,
@@ -2449,8 +2515,8 @@ def _extract_mpd_formats_and_subtitles(
              fatal=True, data=None, headers={}, query={}):
          res = self._download_xml_handle(
              mpd_url, video_id,
-            note=note or 'Downloading MPD manifest',
-            errnote=errnote or 'Failed to download MPD manifest',
+            note='Downloading MPD manifest' if note is None else note,
+            errnote='Failed to download MPD manifest' if errnote is None else errnote,
              fatal=fatal, data=data, headers=headers, query=query)
          if res is False:
              return [], {}
@@ -2480,7 +2546,7 @@ def _parse_mpd_formats_and_subtitles(
              http://standards.iso.org/ittf/PubliclyAvailableStandards/c065274_ISO_IEC_23009-1_2014.zip
           2. https://en.wikipedia.org/wiki/Dynamic_Adaptive_Streaming_over_HTTP
          """
-        if not self._downloader.params.get('dynamic_mpd', True):
+        if not self.get_param('dynamic_mpd', True):
              if mpd_doc.get('type') == 'dynamic':
                  return [], {}
  
@@ -2550,7 +2616,7 @@ def extract_Initialization(source):
                          extract_Initialization(segment_template)
              return ms_info
  
-        skip_unplayable = not self._downloader.params.get('allow_unplayable_formats')
+        skip_unplayable = not self.get_param('allow_unplayable_formats')
  
          mpd_duration = parse_duration(mpd_doc.get('mediaPresentationDuration'))
          formats = []
@@ -2574,7 +2640,7 @@ def extract_Initialization(source):
                      mime_type = representation_attrib['mimeType']
                      content_type = representation_attrib.get('contentType', mime_type.split('/')[0])
  
-                    if content_type in ('video', 'audio', 'text'):
+                    if content_type in ('video', 'audio', 'text') or mime_type == 'image/jpeg':
                          base_url = ''
                          for element in (representation, adaptation_set, period, mpd_doc):
                              base_url_e = element.find(_add_ns('BaseURL'))
@@ -2591,9 +2657,15 @@ def extract_Initialization(source):
                          url_el = representation.find(_add_ns('BaseURL'))
                          filesize = int_or_none(url_el.attrib.get('{http://youtube.com/yt/2012/10/10}contentLength') if url_el is not None else None)
                          bandwidth = int_or_none(representation_attrib.get('bandwidth'))
+                        if representation_id is not None:
+                            format_id = representation_id
+                        else:
+                            format_id = content_type
+                        if mpd_id:
+                            format_id = mpd_id + '-' + format_id
                          if content_type in ('video', 'audio'):
                              f = {
-                                'format_id': '%s-%s' % (mpd_id, representation_id) if mpd_id else representation_id,
+                                'format_id': format_id,
                                  'manifest_url': mpd_url,
                                  'ext': mimetype2ext(mime_type),
                                  'width': int_or_none(representation_attrib.get('width')),
@@ -2613,6 +2685,17 @@ def extract_Initialization(source):
                                  'manifest_url': mpd_url,
                                  'filesize': filesize,
                              }
+                        elif mime_type == 'image/jpeg':
+                            # See test case in VikiIE
+                            # https://www.viki.com/videos/1175236v-choosing-spouse-by-lottery-episode-1
+                            f = {
+                                'format_id': format_id,
+                                'ext': 'mhtml',
+                                'manifest_url': mpd_url,
+                                'format_note': 'DASH storyboards (jpeg)',
+                                'acodec': 'none',
+                                'vcodec': 'none',
+                            }
                          representation_ms_info = extract_multisegment_info(representation, adaption_set_ms_info)
  
                          def prepare_template(template_name, identifiers):
@@ -2631,7 +2714,8 @@ def prepare_template(template_name, identifiers):
                                      t += c
                              # Next, $...$ templates are translated to their
                              # %(...) counterparts to be used with % operator
-                            t = t.replace('$RepresentationID$', representation_id)
+                            if representation_id is not None:
+                                t = t.replace('$RepresentationID$', representation_id)
                              t = re.sub(r'\$(%s)\$' % '|'.join(identifiers), r'%(\1)d', t)
                              t = re.sub(r'\$(%s)%%([^$]+)\$' % '|'.join(identifiers), r'%(\1)\2', t)
                              t.replace('$$', '$')
@@ -2748,7 +2832,7 @@ def add_segment_url():
                                  'url': mpd_url or base_url,
                                  'fragment_base_url': base_url,
                                  'fragments': [],
-                                'protocol': 'http_dash_segments',
+                                'protocol': 'http_dash_segments' if mime_type != 'image/jpeg' else 'mhtml',
                              })
                              if 'initialization_url' in representation_ms_info:
                                  initialization_url = representation_ms_info['initialization_url']
@@ -2759,7 +2843,7 @@ def add_segment_url():
                          else:
                              # Assuming direct URL to unfragmented media.
                              f['url'] = base_url
-                        if content_type in ('video', 'audio'):
+                        if content_type in ('video', 'audio') or mime_type == 'image/jpeg':
                              formats.append(f)
                          elif content_type == 'text':
                              subtitles.setdefault(lang or 'und', []).append(f)
@@ -2779,8 +2863,8 @@ def _extract_ism_formats(self, *args, **kwargs):
      def _extract_ism_formats_and_subtitles(self, ism_url, video_id, ism_id=None, note=None, errnote=None, fatal=True, data=None, headers={}, query={}):
          res = self._download_xml_handle(
              ism_url, video_id,
-            note=note or 'Downloading ISM manifest',
-            errnote=errnote or 'Failed to download ISM manifest',
+            note='Downloading ISM manifest' if note is None else note,
+            errnote='Failed to download ISM manifest' if errnote is None else errnote,
              fatal=fatal, data=data, headers=headers, query=query)
          if res is False:
              return [], {}
@@ -2799,7 +2883,7 @@ def _parse_ism_formats_and_subtitles(self, ism_doc, ism_url, ism_id=None):
          """
          if ism_doc.get('IsLive') == 'TRUE':
              return [], {}
-        if (not self._downloader.params.get('allow_unplayable_formats')
+        if (not self.get_param('allow_unplayable_formats')
                  and ism_doc.find('Protection') is not None):
              return [], {}
  
@@ -2817,7 +2901,7 @@ def _parse_ism_formats_and_subtitles(self, ism_doc, ism_url, ism_id=None):
              stream_name = stream.get('Name')
              stream_language = stream.get('Language', 'und')
              for track in stream.findall('QualityLevel'):
-                fourcc = track.get('FourCC', 'AACL' if track.get('AudioTag') == '255' else None)
+                fourcc = track.get('FourCC') or ('AACL' if track.get('AudioTag') == '255' else None)
                  # TODO: add support for WVC1 and WMAP
                  if fourcc not in ('H264', 'AVC1', 'AACL', 'TTML'):
                      self.report_warning('%s is not a supported codec' % fourcc)
@@ -3404,8 +3488,8 @@ def is_suitable(self, age_limit):
          return not any_restricted
  
      def extract_subtitles(self, *args, **kwargs):
-        if (self._downloader.params.get('writesubtitles', False)
-                or self._downloader.params.get('listsubtitles')):
+        if (self.get_param('writesubtitles', False)
+                or self.get_param('listsubtitles')):
              return self._get_subtitles(*args, **kwargs)
          return {}
  
@@ -3440,8 +3524,8 @@ def _merge_subtitles(cls, *dicts, **kwargs):
          return target
  
      def extract_automatic_captions(self, *args, **kwargs):
-        if (self._downloader.params.get('writeautomaticsub', False)
-                or self._downloader.params.get('listsubtitles')):
+        if (self.get_param('writeautomaticsub', False)
+                or self.get_param('listsubtitles')):
              return self._get_automatic_captions(*args, **kwargs)
          return {}
  
@@ -3449,9 +3533,9 @@ def _get_automatic_captions(self, *args, **kwargs):
          raise NotImplementedError('This method must be implemented by subclasses')
  
      def mark_watched(self, *args, **kwargs):
-        if (self._downloader.params.get('mark_watched', False)
+        if (self.get_param('mark_watched', False)
                  and (self._get_login_info()[0] is not None
-                     or self._downloader.params.get('cookiefile') is not None)):
+                     or self.get_param('cookiefile') is not None)):
              self._mark_watched(*args, **kwargs)
  
      def _mark_watched(self, *args, **kwargs):
@@ -3459,7 +3543,7 @@ def _mark_watched(self, *args, **kwargs):
  
      def geo_verification_headers(self):
          headers = {}
-        geo_verification_proxy = self._downloader.params.get('geo_verification_proxy')
+        geo_verification_proxy = self.get_param('geo_verification_proxy')
          if geo_verification_proxy:
              headers['Ytdl-request-proxy'] = geo_verification_proxy
          return headers
@@ -3471,7 +3555,7 @@ def _generic_title(self, url):
          return compat_urllib_parse_unquote(os.path.splitext(url_basename(url))[0])
  
      @staticmethod
-    def _availability(is_private, needs_premium, needs_subscription, needs_auth, is_unlisted):
+    def _availability(is_private=None, needs_premium=None, needs_subscription=None, needs_auth=None, is_unlisted=None):
          all_known = all(map(
              lambda x: x is not None,
              (is_private, needs_premium, needs_subscription, needs_auth, is_unlisted)))
@@ -3484,6 +3568,10 @@ def _availability(is_private, needs_premium, needs_subscription, needs_auth, is_
              else 'public' if all_known
              else None)
  
+    def _configuration_arg(self, key):
+        return traverse_obj(
+            self._downloader.params, ('extractor_args', self.ie_key().lower(), key))
+
  
  class SearchInfoExtractor(InfoExtractor):
      """