]> jfr.im git - yt-dlp.git/blame - yt_dlp/extractor/hse.py
Tolerate failure to `--write-link` due to unknown URL
[yt-dlp.git] / yt_dlp / extractor / hse.py
CommitLineData
ae43a4b9 1# coding: utf-8
2from .common import InfoExtractor
3from ..utils import (
4 ExtractorError,
5 traverse_obj,
6 unified_timestamp,
7)
8
9
10class HSEShowBaseInfoExtractor(InfoExtractor):
11 _GEO_COUNTRIES = ['DE']
12
13 def _extract_redux_data(self, url, video_id):
14 webpage = self._download_webpage(url, video_id)
15 redux = self._html_search_regex(
16 r'window\.__REDUX_DATA__\s*=\s*({.*});?', webpage, 'redux data')
17 return self._parse_json(redux.replace('\n', ''), video_id)
18
19 def _extract_formats_and_subtitles(self, sources, video_id):
20 if not sources:
21 raise ExtractorError('No video found', expected=True, video_id=video_id)
22 formats, subtitles = [], {}
23 for src in sources:
24 if src['mimetype'] != 'application/x-mpegURL':
25 continue
26 fmts, subs = self._extract_m3u8_formats_and_subtitles(src['url'], video_id, ext='mp4')
27 formats.extend(fmts)
28 subtitles = self._merge_subtitles(subtitles, subs)
29 self._sort_formats(formats)
30 return formats, subtitles
31
32
33class HSEShowIE(HSEShowBaseInfoExtractor):
34 _VALID_URL = r'https?://(?:www\.)?hse\.de/dpl/c/tv-shows/(?P<id>[0-9]+)'
35 _TESTS = [{
36 'url': 'https://www.hse.de/dpl/c/tv-shows/505350',
37 'info_dict': {
38 'id': '505350',
39 'ext': 'mp4',
40 'title': 'Pfeffinger Mode & Accessoires',
41 'timestamp': 1638810000,
42 'upload_date': '20211206',
43 'channel': 'HSE24',
44 'uploader': 'Arina Pirayesh'
45 },
46 'params': {'skip_download': 'm3u8'},
47 }]
48
49 def _real_extract(self, url):
50 video_id = self._match_id(url)
51 json_data = self._extract_redux_data(url, video_id)
52 formats, subtitles = self._extract_formats_and_subtitles(
53 traverse_obj(json_data, ('tvShowPage', 'tvShowVideo', 'sources')), video_id)
54
55 show = traverse_obj(json_data, ('tvShowPage', 'tvShow')) or {}
56 return {
57 'id': video_id,
58 'title': show.get('title') or video_id,
59 'formats': formats,
60 'timestamp': unified_timestamp(f'{show.get("date")} {show.get("hour")}:00'),
61 'thumbnail': traverse_obj(json_data, ('tvShowVideo', 'poster')),
62 'channel': self._search_regex(
63 r'tvShow \| ([A-Z0-9]+)_', show.get('actionFieldText') or '', video_id, fatal=False),
64 'uploader': show.get('presenter'),
65 'subtitles': subtitles,
66 }
67
68
69class HSEProductIE(HSEShowBaseInfoExtractor):
70 _VALID_URL = r'https?://(?:www\.)?hse\.de/dpl/p/product/(?P<id>[0-9]+)'
71 _TESTS = [{
72 'url': 'https://www.hse.de/dpl/p/product/408630',
73 'info_dict': {
74 'id': '408630',
75 'ext': 'mp4',
76 'title': 'Hose im Ponte-Mix',
77 'uploader': 'Judith Williams'
78 },
79 'params': {'skip_download': 'm3u8'},
80 }]
81
82 def _real_extract(self, url):
83 video_id = self._match_id(url)
84 json_data = self._extract_redux_data(url, video_id)
85 video = traverse_obj(json_data, ('productContent', 'productContent', 'videos', 0)) or {}
86 formats, subtitles = self._extract_formats_and_subtitles(video.get('sources'), video_id)
87
88 return {
89 'id': video_id,
90 'title': traverse_obj(json_data, ('productDetail', 'product', 'name', 'short')) or video_id,
91 'formats': formats,
92 'subtitles': subtitles,
93 'thumbnail': video.get('poster'),
94 'uploader': traverse_obj(json_data, ('productDetail', 'product', 'brand', 'brandName')),
95 }