]>
Commit | Line | Data |
---|---|---|
ef58c476 | 1 | from hashlib import sha256 |
2 | import itertools | |
7a340e0d NA |
3 | import json |
4 | import re | |
ef58c476 | 5 | import time |
7a340e0d NA |
6 | |
7 | from .ffmpeg import FFmpegPostProcessor | |
8 | from ..compat import compat_urllib_parse_urlencode, compat_HTTPError | |
e6f21b3d | 9 | from ..utils import PostProcessingError, network_exceptions, sanitized_Request |
7a340e0d NA |
10 | |
11 | ||
12 | class SponsorBlockPP(FFmpegPostProcessor): | |
8157a09d | 13 | # https://wiki.sponsor.ajay.app/w/Types |
7a340e0d NA |
14 | EXTRACTORS = { |
15 | 'Youtube': 'YouTube', | |
16 | } | |
8157a09d NA |
17 | POI_CATEGORIES = { |
18 | 'poi_highlight': 'Highlight', | |
19 | } | |
7a340e0d NA |
20 | CATEGORIES = { |
21 | 'sponsor': 'Sponsor', | |
22 | 'intro': 'Intermission/Intro Animation', | |
23 | 'outro': 'Endcards/Credits', | |
24 | 'selfpromo': 'Unpaid/Self Promotion', | |
7a340e0d | 25 | 'preview': 'Preview/Recap', |
8157a09d NA |
26 | 'filler': 'Filler Tangent', |
27 | 'interaction': 'Interaction Reminder', | |
28 | 'music_offtopic': 'Non-Music Section', | |
29 | **POI_CATEGORIES, | |
7a340e0d NA |
30 | } |
31 | ||
32 | def __init__(self, downloader, categories=None, api='https://sponsor.ajay.app'): | |
33 | FFmpegPostProcessor.__init__(self, downloader) | |
34 | self._categories = tuple(categories or self.CATEGORIES.keys()) | |
35 | self._API_URL = api if re.match('^https?://', api) else 'https://' + api | |
36 | ||
37 | def run(self, info): | |
38 | extractor = info['extractor_key'] | |
39 | if extractor not in self.EXTRACTORS: | |
40 | self.to_screen(f'SponsorBlock is not supported for {extractor}') | |
41 | return [], info | |
42 | ||
ef58c476 | 43 | self.to_screen('Fetching SponsorBlock segments') |
7a340e0d NA |
44 | info['sponsorblock_chapters'] = self._get_sponsor_chapters(info, info['duration']) |
45 | return [], info | |
46 | ||
47 | def _get_sponsor_chapters(self, info, duration): | |
48 | segments = self._get_sponsor_segments(info['id'], self.EXTRACTORS[info['extractor_key']]) | |
49 | ||
50 | def duration_filter(s): | |
51 | start_end = s['segment'] | |
7a34b5d6 NA |
52 | # Ignore entire video segments (https://wiki.sponsor.ajay.app/w/Types). |
53 | if start_end == (0, 0): | |
54 | return False | |
7a340e0d NA |
55 | # Ignore milliseconds difference at the start. |
56 | if start_end[0] <= 1: | |
57 | start_end[0] = 0 | |
8157a09d NA |
58 | # Make POI chapters 1 sec so that we can properly mark them |
59 | if s['category'] in self.POI_CATEGORIES.keys(): | |
60 | start_end[1] += 1 | |
7a340e0d NA |
61 | # Ignore milliseconds difference at the end. |
62 | # Never allow the segment to exceed the video. | |
63 | if duration and duration - start_end[1] <= 1: | |
64 | start_end[1] = duration | |
65 | # SponsorBlock duration may be absent or it may deviate from the real one. | |
66 | return s['videoDuration'] == 0 or not duration or abs(duration - s['videoDuration']) <= 1 | |
67 | ||
68 | duration_match = [s for s in segments if duration_filter(s)] | |
69 | if len(duration_match) != len(segments): | |
70 | self.report_warning('Some SponsorBlock segments are from a video of different duration, maybe from an old version of this video') | |
71 | ||
72 | def to_chapter(s): | |
73 | (start, end), cat = s['segment'], s['category'] | |
74 | return { | |
75 | 'start_time': start, | |
76 | 'end_time': end, | |
77 | 'category': cat, | |
78 | 'title': self.CATEGORIES[cat], | |
79 | '_categories': [(cat, start, end)] | |
80 | } | |
81 | ||
82 | sponsor_chapters = [to_chapter(s) for s in duration_match] | |
83 | if not sponsor_chapters: | |
84 | self.to_screen('No segments were found in the SponsorBlock database') | |
85 | else: | |
86 | self.to_screen(f'Found {len(sponsor_chapters)} segments in the SponsorBlock database') | |
87 | return sponsor_chapters | |
88 | ||
89 | def _get_sponsor_segments(self, video_id, service): | |
90 | hash = sha256(video_id.encode('ascii')).hexdigest() | |
91 | # SponsorBlock API recommends using first 4 hash characters. | |
92 | url = f'{self._API_URL}/api/skipSegments/{hash[:4]}?' + compat_urllib_parse_urlencode({ | |
93 | 'service': service, | |
94 | 'categories': json.dumps(self._categories), | |
7a34b5d6 | 95 | 'actionTypes': json.dumps(['skip', 'poi']) |
7a340e0d | 96 | }) |
ef58c476 | 97 | self.write_debug(f'SponsorBlock query: {url}') |
7a340e0d NA |
98 | for d in self._get_json(url): |
99 | if d['videoID'] == video_id: | |
100 | return d['segments'] | |
101 | return [] | |
102 | ||
103 | def _get_json(self, url): | |
ef58c476 | 104 | # While this is not an extractor, it behaves similar to one and |
105 | # so obey extractor_retries and sleep_interval_requests | |
106 | max_retries = self.get_param('extractor_retries', 3) | |
107 | sleep_interval = self.get_param('sleep_interval_requests') or 0 | |
108 | for retries in itertools.count(): | |
109 | try: | |
110 | rsp = self._downloader.urlopen(sanitized_Request(url)) | |
111 | return json.loads(rsp.read().decode(rsp.info().get_param('charset') or 'utf-8')) | |
112 | except network_exceptions as e: | |
113 | if isinstance(e, compat_HTTPError) and e.code == 404: | |
114 | return [] | |
115 | if retries < max_retries: | |
116 | self.report_warning(f'{e}. Retrying...') | |
117 | if sleep_interval > 0: | |
118 | self.to_screen(f'Sleeping {sleep_interval} seconds ...') | |
119 | time.sleep(sleep_interval) | |
120 | continue | |
121 | raise PostProcessingError(f'Unable to communicate with SponsorBlock API: {e}') |