[extractor/youtube] Bring back `_extract_chapters_from_description`

[yt-dlp.git] / yt_dlp / extractor / youtube.py
diff --git a/yt_dlp/extractor/youtube.py b/yt_dlp/extractor/youtube.py

index e41e746489450e80263324396028aa1421218236..245778dff2d8be48556e671cdc2e54c4072e87f1 100644 (file)
--- a/yt_dlp/extractor/youtube.py
+++ b/yt_dlp/extractor/youtube.py
@@ -2715,6 +2715,21 @@ def _extract_chapters_from_engagement_panel(self, data, duration):
                  for contents in content_list
              ))), [])
  
+    @staticmethod
+    def _extract_chapters_from_description(description, duration):
+        chapters = [{'start_time': 0}]
+        for timestamp, title in re.findall(
+                r'(?m)^((?:\d+:)?\d{1,2}:\d{2})\b\W*\s(.+?)\s*$', description or ''):
+            start = parse_duration(timestamp)
+            if start and title and chapters[-1]['start_time'] < start < duration:
+                chapters[-1]['end_time'] = start
+                chapters.append({
+                    'start_time': start,
+                    'title': title,
+                })
+        chapters[-1]['end_time'] = duration
+        return chapters[1:]
+
      def _extract_chapters(self, chapter_list, chapter_time, chapter_title, duration):
          chapters = []
          last_chapter = {'start_time': 0}
@@ -3668,6 +3683,7 @@ def process_language(container, base_url, lang_code, sub_name, query):
              info['chapters'] = (
                  self._extract_chapters_from_json(initial_data, duration)
                  or self._extract_chapters_from_engagement_panel(initial_data, duration)
+                or self._extract_chapters_from_description(video_description, duration)
                  or None)
  
          contents = traverse_obj(