+import hashlib
import json
import re
-from hashlib import sha256
+import urllib.parse
from .ffmpeg import FFmpegPostProcessor
-from ..compat import compat_urllib_parse_urlencode, compat_HTTPError
-from ..utils import PostProcessingError, sanitized_Request
class SponsorBlockPP(FFmpegPostProcessor):
-
+ # https://wiki.sponsor.ajay.app/w/Types
EXTRACTORS = {
'Youtube': 'YouTube',
}
+ POI_CATEGORIES = {
+ 'poi_highlight': 'Highlight',
+ }
+ NON_SKIPPABLE_CATEGORIES = {
+ **POI_CATEGORIES,
+ 'chapter': 'Chapter',
+ }
CATEGORIES = {
'sponsor': 'Sponsor',
'intro': 'Intermission/Intro Animation',
'outro': 'Endcards/Credits',
'selfpromo': 'Unpaid/Self Promotion',
- 'interaction': 'Interaction Reminder',
'preview': 'Preview/Recap',
- 'music_offtopic': 'Non-Music Section'
+ 'filler': 'Filler Tangent',
+ 'interaction': 'Interaction Reminder',
+ 'music_offtopic': 'Non-Music Section',
+ **NON_SKIPPABLE_CATEGORIES,
}
def __init__(self, downloader, categories=None, api='https://sponsor.ajay.app'):
self.to_screen(f'SponsorBlock is not supported for {extractor}')
return [], info
- info['sponsorblock_chapters'] = self._get_sponsor_chapters(info, info['duration'])
+ self.to_screen('Fetching SponsorBlock segments')
+ info['sponsorblock_chapters'] = self._get_sponsor_chapters(info, info.get('duration'))
return [], info
def _get_sponsor_chapters(self, info, duration):
def duration_filter(s):
start_end = s['segment']
+ # Ignore entire video segments (https://wiki.sponsor.ajay.app/w/Types).
+ if start_end == (0, 0):
+ return False
# Ignore milliseconds difference at the start.
if start_end[0] <= 1:
start_end[0] = 0
+ # Make POI chapters 1 sec so that we can properly mark them
+ if s['category'] in self.POI_CATEGORIES:
+ start_end[1] += 1
# Ignore milliseconds difference at the end.
# Never allow the segment to exceed the video.
if duration and duration - start_end[1] <= 1:
start_end[1] = duration
# SponsorBlock duration may be absent or it may deviate from the real one.
- return s['videoDuration'] == 0 or not duration or abs(duration - s['videoDuration']) <= 1
+ diff = abs(duration - s['videoDuration']) if s['videoDuration'] else 0
+ return diff < 1 or (diff < 5 and diff / (start_end[1] - start_end[0]) < 0.05)
duration_match = [s for s in segments if duration_filter(s)]
if len(duration_match) != len(segments):
def to_chapter(s):
(start, end), cat = s['segment'], s['category']
+ title = s['description'] if cat == 'chapter' else self.CATEGORIES[cat]
return {
'start_time': start,
'end_time': end,
'category': cat,
- 'title': self.CATEGORIES[cat],
- '_categories': [(cat, start, end)]
+ 'title': title,
+ 'type': s['actionType'],
+ '_categories': [(cat, start, end, title)],
}
sponsor_chapters = [to_chapter(s) for s in duration_match]
if not sponsor_chapters:
- self.to_screen('No segments were found in the SponsorBlock database')
+ self.to_screen('No matching segments were found in the SponsorBlock database')
else:
self.to_screen(f'Found {len(sponsor_chapters)} segments in the SponsorBlock database')
return sponsor_chapters
def _get_sponsor_segments(self, video_id, service):
- hash = sha256(video_id.encode('ascii')).hexdigest()
+ video_hash = hashlib.sha256(video_id.encode('ascii')).hexdigest()
# SponsorBlock API recommends using first 4 hash characters.
- url = f'{self._API_URL}/api/skipSegments/{hash[:4]}?' + compat_urllib_parse_urlencode({
+ url = f'{self._API_URL}/api/skipSegments/{video_hash[:4]}?' + urllib.parse.urlencode({
'service': service,
'categories': json.dumps(self._categories),
+ 'actionTypes': json.dumps(['skip', 'poi', 'chapter']),
})
- for d in self._get_json(url):
+ for d in self._download_json(url) or []:
if d['videoID'] == video_id:
return d['segments']
return []
-
- def _get_json(self, url):
- self.write_debug(f'SponsorBlock query: {url}')
- try:
- rsp = self._downloader.urlopen(sanitized_Request(url))
- except compat_HTTPError as e:
- if e.code == 404:
- return []
- raise PostProcessingError(f'Error communicating with SponsorBlock API - {e}')
-
- return json.loads(rsp.read().decode(rsp.info().get_param('charset') or 'utf-8'))