]> jfr.im git - yt-dlp.git/commitdiff
[downloader/hls] Remove duplicate cues using a sliding window of candidates
authorFelix S <redacted>
Fri, 23 Apr 2021 08:52:21 +0000 (10:52 +0200)
committerFelix S <redacted>
Wed, 28 Apr 2021 11:51:26 +0000 (17:21 +0530)
yt_dlp/downloader/hls.py
yt_dlp/webvtt.py

index cee3807ceb963b21f03a48cac87c9d3694532eb4..c0e52d35d7d6377190d7295da0eacdbf6ef8110b 100644 (file)
@@ -325,6 +325,31 @@ def pack_fragment(frag_content, frag_index):
                         if isinstance(block, webvtt.CueBlock):
                             block.start += adjust
                             block.end += adjust
+
+                            dedup_window = extra_state.setdefault('webvtt_dedup_window', [])
+                            cue = block.as_json
+
+                            # skip the cue if an identical one appears
+                            # in the window of potential duplicates
+                            # and prune the window of unviable candidates
+                            i = 0
+                            skip = True
+                            while i < len(dedup_window):
+                                window_cue = dedup_window[i]
+                                if window_cue == cue:
+                                    break
+                                if window_cue['end'] >= cue['start']:
+                                    i += 1
+                                    continue
+                                del dedup_window[i]
+                            else:
+                                skip = False
+
+                            if skip:
+                                continue
+
+                            # add the cue to the window
+                            dedup_window.append(cue)
                         elif isinstance(block, webvtt.Magic):
                             # XXX: we do not handle MPEGTS overflow
                             if frag_index == 1:
index 4d026834a2282c7fee2bee94fc1324e0426d0d7d..a184ee369933343f49d68e4917f681f81082498e 100644 (file)
@@ -322,6 +322,16 @@ def write_into(self, stream):
         stream.write(self.text)
         stream.write('\n')
 
+    @property
+    def as_json(self):
+        return {
+            'id': self.id,
+            'start': self.start,
+            'end': self.end,
+            'text': self.text,
+            'settings': self.settings,
+        }
+
 
 def parse_fragment(frag_content):
     """