]> jfr.im git - yt-dlp.git/blame - yt_dlp/postprocessor/sponsorblock.py
[cleanup Misc
[yt-dlp.git] / yt_dlp / postprocessor / sponsorblock.py
CommitLineData
e5a998f3 1import hashlib
7a340e0d
NA
2import json
3import re
14f25df2 4import urllib.parse
7a340e0d
NA
5
6from .ffmpeg import FFmpegPostProcessor
7a340e0d
NA
7
8
9class SponsorBlockPP(FFmpegPostProcessor):
8157a09d 10 # https://wiki.sponsor.ajay.app/w/Types
7a340e0d
NA
11 EXTRACTORS = {
12 'Youtube': 'YouTube',
13 }
8157a09d
NA
14 POI_CATEGORIES = {
15 'poi_highlight': 'Highlight',
16 }
63c547d7
AR
17 NON_SKIPPABLE_CATEGORIES = {
18 **POI_CATEGORIES,
19 'chapter': 'Chapter',
20 }
7a340e0d
NA
21 CATEGORIES = {
22 'sponsor': 'Sponsor',
23 'intro': 'Intermission/Intro Animation',
24 'outro': 'Endcards/Credits',
25 'selfpromo': 'Unpaid/Self Promotion',
7a340e0d 26 'preview': 'Preview/Recap',
8157a09d
NA
27 'filler': 'Filler Tangent',
28 'interaction': 'Interaction Reminder',
29 'music_offtopic': 'Non-Music Section',
63c547d7 30 **NON_SKIPPABLE_CATEGORIES
7a340e0d
NA
31 }
32
33 def __init__(self, downloader, categories=None, api='https://sponsor.ajay.app'):
34 FFmpegPostProcessor.__init__(self, downloader)
35 self._categories = tuple(categories or self.CATEGORIES.keys())
36 self._API_URL = api if re.match('^https?://', api) else 'https://' + api
37
38 def run(self, info):
39 extractor = info['extractor_key']
40 if extractor not in self.EXTRACTORS:
41 self.to_screen(f'SponsorBlock is not supported for {extractor}')
42 return [], info
43
ef58c476 44 self.to_screen('Fetching SponsorBlock segments')
b0f636be 45 info['sponsorblock_chapters'] = self._get_sponsor_chapters(info, info.get('duration'))
7a340e0d
NA
46 return [], info
47
48 def _get_sponsor_chapters(self, info, duration):
49 segments = self._get_sponsor_segments(info['id'], self.EXTRACTORS[info['extractor_key']])
50
51 def duration_filter(s):
52 start_end = s['segment']
7a34b5d6
NA
53 # Ignore entire video segments (https://wiki.sponsor.ajay.app/w/Types).
54 if start_end == (0, 0):
55 return False
7a340e0d
NA
56 # Ignore milliseconds difference at the start.
57 if start_end[0] <= 1:
58 start_end[0] = 0
8157a09d
NA
59 # Make POI chapters 1 sec so that we can properly mark them
60 if s['category'] in self.POI_CATEGORIES.keys():
61 start_end[1] += 1
7a340e0d
NA
62 # Ignore milliseconds difference at the end.
63 # Never allow the segment to exceed the video.
64 if duration and duration - start_end[1] <= 1:
65 start_end[1] = duration
66 # SponsorBlock duration may be absent or it may deviate from the real one.
cd5df121 67 diff = abs(duration - s['videoDuration']) if s['videoDuration'] else 0
68 return diff < 1 or (diff < 5 and diff / (start_end[1] - start_end[0]) < 0.05)
7a340e0d
NA
69
70 duration_match = [s for s in segments if duration_filter(s)]
71 if len(duration_match) != len(segments):
72 self.report_warning('Some SponsorBlock segments are from a video of different duration, maybe from an old version of this video')
73
74 def to_chapter(s):
75 (start, end), cat = s['segment'], s['category']
63c547d7 76 title = s['description'] if cat == 'chapter' else self.CATEGORIES[cat]
7a340e0d
NA
77 return {
78 'start_time': start,
79 'end_time': end,
80 'category': cat,
63c547d7 81 'title': title,
1338ae3b 82 'type': s['actionType'],
63c547d7 83 '_categories': [(cat, start, end, title)],
7a340e0d
NA
84 }
85
86 sponsor_chapters = [to_chapter(s) for s in duration_match]
87 if not sponsor_chapters:
d5d1df8a 88 self.to_screen('No matching segments were found in the SponsorBlock database')
7a340e0d
NA
89 else:
90 self.to_screen(f'Found {len(sponsor_chapters)} segments in the SponsorBlock database')
91 return sponsor_chapters
92
93 def _get_sponsor_segments(self, video_id, service):
e5a998f3 94 hash = hashlib.sha256(video_id.encode('ascii')).hexdigest()
7a340e0d 95 # SponsorBlock API recommends using first 4 hash characters.
14f25df2 96 url = f'{self._API_URL}/api/skipSegments/{hash[:4]}?' + urllib.parse.urlencode({
7a340e0d
NA
97 'service': service,
98 'categories': json.dumps(self._categories),
63c547d7 99 'actionTypes': json.dumps(['skip', 'poi', 'chapter'])
7a340e0d 100 })
a3f2445e 101 for d in self._download_json(url) or []:
7a340e0d
NA
102 if d['videoID'] == video_id:
103 return d['segments']
104 return []