[generic] Extract previously missed subtitles (#515)

author Felix S <redacted>

Fri, 16 Jul 2021 14:22:56 +0000 (16:22 +0200)

committer GitHub <redacted>

Fri, 16 Jul 2021 14:22:56 +0000 (19:52 +0530)
author Felix S <redacted>
Fri, 16 Jul 2021 14:22:56 +0000 (16:22 +0200)
committer GitHub <redacted>
Fri, 16 Jul 2021 14:22:56 +0000 (19:52 +0530)
diff --git a/yt_dlp/extractor/common.py b/yt_dlp/extractor/common.py

index 81b88e4fa9d59ba3b37968ed729299b9ef5fdbe9..0ee7ee3b125e5e6eb529fa2babc557bd2af46230 100644 (file)
--- a/yt_dlp/extractor/common.py
+++ b/yt_dlp/extractor/common.py
@@ -2206,7 +2206,7 @@ def _xpath_ns(path, namespace=None):
                  out.append('{%s}%s' % (namespace, c))
          return '/'.join(out)
  
-    def _extract_smil_formats(self, smil_url, video_id, fatal=True, f4m_params=None, transform_source=None):
+    def _extract_smil_formats_and_subtitles(self, smil_url, video_id, fatal=True, f4m_params=None, transform_source=None):
          smil = self._download_smil(smil_url, video_id, fatal=fatal, transform_source=transform_source)
  
          if smil is False:
@@ -2215,8 +2215,21 @@ def _extract_smil_formats(self, smil_url, video_id, fatal=True, f4m_params=None,
  
          namespace = self._parse_smil_namespace(smil)
  
-        return self._parse_smil_formats(
+        fmts = self._parse_smil_formats(
              smil, smil_url, video_id, namespace=namespace, f4m_params=f4m_params)
+        subs = self._parse_smil_subtitles(
+            smil, namespace=namespace)
+
+        return fmts, subs
+
+    def _extract_smil_formats(self, *args, **kwargs):
+        fmts, subs = self._extract_smil_formats_and_subtitles(*args, **kwargs)
+        if subs:
+            self.report_warning(bug_reports_message(
+                "Ignoring subtitle tracks found in the SMIL manifest; "
+                "if any subtitle tracks are missing,"
+            ))
+        return fmts
  
      def _extract_smil_info(self, smil_url, video_id, fatal=True, f4m_params=None):
          smil = self._download_smil(smil_url, video_id, fatal=fatal)
diff --git a/yt_dlp/extractor/generic.py b/yt_dlp/extractor/generic.py

index e53a35008a98b05ce70e551a20389d281aa701ec..7e0598e58edbd2b0c5a20b249d1bbdb78844cae7 100644 (file)
--- a/yt_dlp/extractor/generic.py
+++ b/yt_dlp/extractor/generic.py
@@ -2462,7 +2462,7 @@ def _real_extract(self, url):
  
          # Is it an M3U playlist?
          if first_bytes.startswith(b'#EXTM3U'):
-            info_dict['formats'] = self._extract_m3u8_formats(url, video_id, 'mp4')
+            info_dict['formats'], info_dict['subtitles'] = self._extract_m3u8_formats_and_subtitles(url, video_id, 'mp4')
              self._sort_formats(info_dict['formats'])
              return info_dict
  
@@ -3410,6 +3410,7 @@ def _real_extract(self, url):
              if not isinstance(sources, list):
                  sources = [sources]
              formats = []
+            subtitles = {}
              for source in sources:
                  src = source.get('src')
                  if not src or not isinstance(src, compat_str):
@@ -3422,12 +3423,16 @@ def _real_extract(self, url):
                  if src_type == 'video/youtube':
                      return self.url_result(src, YoutubeIE.ie_key())
                  if src_type == 'application/dash+xml' or ext == 'mpd':
-                    formats.extend(self._extract_mpd_formats(
-                        src, video_id, mpd_id='dash', fatal=False))
+                    fmts, subs = self._extract_mpd_formats_and_subtitles(
+                        src, video_id, mpd_id='dash', fatal=False)
+                    formats.extend(fmts)
+                    self._merge_subtitles(subs, target=subtitles)
                  elif src_type == 'application/x-mpegurl' or ext == 'm3u8':
-                    formats.extend(self._extract_m3u8_formats(
+                    fmts, subs = self._extract_m3u8_formats_and_subtitles(
                          src, video_id, 'mp4', entry_protocol='m3u8_native',
-                        m3u8_id='hls', fatal=False))
+                        m3u8_id='hls', fatal=False)
+                    formats.extend(fmts)
+                    self._merge_subtitles(subs, target=subtitles)
                  else:
                      formats.append({
                          'url': src,
@@ -3437,9 +3442,10 @@ def _real_extract(self, url):
                              'Referer': full_response.geturl(),
                          },
                      })
-            if formats:
+            if formats or subtitles:
                  self._sort_formats(formats)
                  info_dict['formats'] = formats
+                info_dict['subtitles'] = subtitles
                  return info_dict
  
          # Looking for http://schema.org/VideoObject
@@ -3574,13 +3580,13 @@ def filter_video(urls):
  
              ext = determine_ext(video_url)
              if ext == 'smil':
-                entry_info_dict['formats'] = self._extract_smil_formats(video_url, video_id)
+                entry_info_dict = {**self._extract_smil_info(video_url, video_id), **entry_info_dict}
              elif ext == 'xspf':
                  return self.playlist_result(self._extract_xspf_playlist(video_url, video_id), video_id)
              elif ext == 'm3u8':
-                entry_info_dict['formats'] = self._extract_m3u8_formats(video_url, video_id, ext='mp4')
+                entry_info_dict['formats'], entry_info_dict['subtitles'] = self._extract_m3u8_formats_and_subtitles(video_url, video_id, ext='mp4')
              elif ext == 'mpd':
-                entry_info_dict['formats'] = self._extract_mpd_formats(video_url, video_id)
+                entry_info_dict['formats'], entry_info_dict['subtitles'] = self._extract_mpd_formats_and_subtitles(video_url, video_id)
              elif ext == 'f4m':
                  entry_info_dict['formats'] = self._extract_f4m_formats(video_url, video_id)
              elif re.search(r'(?i)\.(?:ism|smil)/manifest', video_url) and video_url != url:
author	Felix S <redacted>
	Fri, 16 Jul 2021 14:22:56 +0000 (16:22 +0200)
committer	GitHub <redacted>
	Fri, 16 Jul 2021 14:22:56 +0000 (19:52 +0530)
yt_dlp/extractor/common.py		patch \| blob \| blame \| history
yt_dlp/extractor/generic.py		patch \| blob \| blame \| history