]> jfr.im git - yt-dlp.git/blobdiff - yt_dlp/extractor/youtube.py
[extractor/youtube] Bring back `_extract_chapters_from_description`
[yt-dlp.git] / yt_dlp / extractor / youtube.py
index e41e746489450e80263324396028aa1421218236..245778dff2d8be48556e671cdc2e54c4072e87f1 100644 (file)
@@ -2715,6 +2715,21 @@ def _extract_chapters_from_engagement_panel(self, data, duration):
                 for contents in content_list
             ))), [])
 
+    @staticmethod
+    def _extract_chapters_from_description(description, duration):
+        chapters = [{'start_time': 0}]
+        for timestamp, title in re.findall(
+                r'(?m)^((?:\d+:)?\d{1,2}:\d{2})\b\W*\s(.+?)\s*$', description or ''):
+            start = parse_duration(timestamp)
+            if start and title and chapters[-1]['start_time'] < start < duration:
+                chapters[-1]['end_time'] = start
+                chapters.append({
+                    'start_time': start,
+                    'title': title,
+                })
+        chapters[-1]['end_time'] = duration
+        return chapters[1:]
+
     def _extract_chapters(self, chapter_list, chapter_time, chapter_title, duration):
         chapters = []
         last_chapter = {'start_time': 0}
@@ -3668,6 +3683,7 @@ def process_language(container, base_url, lang_code, sub_name, query):
             info['chapters'] = (
                 self._extract_chapters_from_json(initial_data, duration)
                 or self._extract_chapters_from_engagement_panel(initial_data, duration)
+                or self._extract_chapters_from_description(video_description, duration)
                 or None)
 
         contents = traverse_obj(