Improve chapter sanitization

author pukkandan <redacted>

Thu, 7 Jul 2022 05:21:47 +0000 (10:51 +0530)

committer pukkandan <redacted>

Thu, 7 Jul 2022 05:58:56 +0000 (11:28 +0530)
author pukkandan <redacted>
Thu, 7 Jul 2022 05:21:47 +0000 (10:51 +0530)
committer pukkandan <redacted>
Thu, 7 Jul 2022 05:58:56 +0000 (11:28 +0530)
diff --git a/yt_dlp/YoutubeDL.py b/yt_dlp/YoutubeDL.py

index 50b85cbfef72e2f67536794af7f9715e53b24505..38d146bfc70e0517d219a80ef9e4be46d5f84fb4 100644 (file)
--- a/yt_dlp/YoutubeDL.py
+++ b/yt_dlp/YoutubeDL.py
@@ -2377,13 +2377,18 @@ def sanitize_numeric_fields(info):
              self.report_warning('"duration" field is negative, there is an error in extractor')
  
          chapters = info_dict.get('chapters') or []
+        if chapters and chapters[0].get('start_time'):
+            chapters.insert(0, {'start_time': 0})
+
          dummy_chapter = {'end_time': 0, 'start_time': info_dict.get('duration')}
-        for prev, current, next_ in zip(
-                (dummy_chapter, *chapters), chapters, (*chapters[1:], dummy_chapter)):
+        for idx, (prev, current, next_) in enumerate(zip(
+                (dummy_chapter, *chapters), chapters, (*chapters[1:], dummy_chapter)), 1):
              if current.get('start_time') is None:
                  current['start_time'] = prev.get('end_time')
              if not current.get('end_time'):
                  current['end_time'] = next_.get('start_time')
+            if not current.get('title'):
+                current['title'] = f'<Untitled Chapter {idx}>'
  
          if 'playlist' not in info_dict:
              # It isn't part of a playlist
diff --git a/yt_dlp/extractor/youtube.py b/yt_dlp/extractor/youtube.py

index 3e2ac030ea8791b5b86e742228f9ec8821565571..90d2435de96673506d2b72bd033b178620c62f9d 100644 (file)
--- a/yt_dlp/extractor/youtube.py
+++ b/yt_dlp/extractor/youtube.py
@@ -2764,17 +2764,15 @@ def _extract_chapters(self, chapter_list, chapter_time, chapter_title, duration,
          if not strict:
              chapter_list.sort(key=lambda c: c['start_time'] or 0)
  
-        chapters = [{'start_time': 0, 'title': '<Untitled>'}]
+        chapters = [{'start_time': 0}]
          for idx, chapter in enumerate(chapter_list):
-            if chapter['start_time'] is None or not chapter['title']:
+            if chapter['start_time'] is None:
                  self.report_warning(f'Incomplete chapter {idx}')
              elif chapters[-1]['start_time'] <= chapter['start_time'] <= duration:
-                chapters[-1]['end_time'] = chapter['start_time']
                  chapters.append(chapter)
              else:
                  self.report_warning(f'Invalid start time for chapter "{chapter["title"]}"')
-        chapters[-1]['end_time'] = duration
-        return chapters if len(chapters) > 1 and chapters[1]['start_time'] else chapters[1:]
+        return chapters[1:]
  
      def _extract_comment(self, comment_renderer, parent=None):
          comment_id = comment_renderer.get('commentId')
author	pukkandan <redacted>
	Thu, 7 Jul 2022 05:21:47 +0000 (10:51 +0530)
committer	pukkandan <redacted>
	Thu, 7 Jul 2022 05:58:56 +0000 (11:28 +0530)
yt_dlp/YoutubeDL.py		patch \| blob \| blame \| history
yt_dlp/extractor/youtube.py		patch \| blob \| blame \| history