]> jfr.im git - yt-dlp.git/blob - yt_dlp/extractor/rtvslo.py
[extractor] Deprecate `_sort_formats`
[yt-dlp.git] / yt_dlp / extractor / rtvslo.py
1 from .common import InfoExtractor
2 from ..utils import (
3 ExtractorError,
4 parse_duration,
5 traverse_obj,
6 unified_timestamp,
7 url_or_none,
8 )
9
10
11 class RTVSLOIE(InfoExtractor):
12 IE_NAME = 'rtvslo.si'
13 _VALID_URL = r'''(?x)
14 https?://(?:
15 (?:365|4d)\.rtvslo.si/arhiv/[^/?#&;]+|
16 (?:www\.)?rtvslo\.si/rtv365/arhiv
17 )/(?P<id>\d+)'''
18 _GEO_COUNTRIES = ['SI']
19
20 _API_BASE = 'https://api.rtvslo.si/ava/{}/{}?client_id=82013fb3a531d5414f478747c1aca622'
21 SUB_LANGS_MAP = {'Slovenski': 'sl'}
22
23 _TESTS = [
24 {
25 'url': 'https://www.rtvslo.si/rtv365/arhiv/174842550?s=tv',
26 'info_dict': {
27 'id': '174842550',
28 'ext': 'flv',
29 'release_timestamp': 1643140032,
30 'upload_date': '20220125',
31 'series': 'Dnevnik',
32 'thumbnail': 'https://img.rtvcdn.si/_up/ava/ava_misc/show_logos/92/dnevnik_3_wide2.jpg',
33 'description': 'md5:76a18692757aeb8f0f51221106277dd2',
34 'timestamp': 1643137046,
35 'title': 'Dnevnik',
36 'series_id': '92',
37 'release_date': '20220125',
38 'duration': 1789,
39 },
40 }, {
41 'url': 'https://365.rtvslo.si/arhiv/utrip/174843754',
42 'info_dict': {
43 'id': '174843754',
44 'ext': 'mp4',
45 'series_id': '94',
46 'release_date': '20220129',
47 'timestamp': 1643484455,
48 'title': 'Utrip',
49 'duration': 813,
50 'thumbnail': 'https://img.rtvcdn.si/_up/ava/ava_misc/show_logos/94/utrip_1_wide2.jpg',
51 'description': 'md5:77f2892630c7b17bb7a5bb84319020c9',
52 'release_timestamp': 1643485825,
53 'upload_date': '20220129',
54 'series': 'Utrip',
55 },
56 }, {
57 'url': 'https://365.rtvslo.si/arhiv/il-giornale-della-sera/174844609',
58 'info_dict': {
59 'id': '174844609',
60 'ext': 'mp3',
61 'series_id': '106615841',
62 'title': 'Il giornale della sera',
63 'duration': 1328,
64 'series': 'Il giornale della sera',
65 'timestamp': 1643743800,
66 'release_timestamp': 1643745424,
67 'thumbnail': 'https://img.rtvcdn.si/_up/ava/ava_misc/show_logos/il-giornale-della-sera_wide2.jpg',
68 'upload_date': '20220201',
69 'tbr': 128000,
70 'release_date': '20220201',
71 },
72
73 }, {
74 'url': 'https://4d.rtvslo.si/arhiv/dnevnik/174842550',
75 'only_matching': True
76 }
77 ]
78
79 def _real_extract(self, url):
80 v_id = self._match_id(url)
81 meta = self._download_json(self._API_BASE.format('getRecordingDrm', v_id), v_id)['response']
82
83 thumbs = [{'id': k, 'url': v, 'http_headers': {'Accept': 'image/jpeg'}}
84 for k, v in (meta.get('images') or {}).items()]
85
86 subs = {}
87 for s in traverse_obj(meta, 'subs', 'subtitles', default=[]):
88 lang = self.SUB_LANGS_MAP.get(s.get('language'), s.get('language') or 'und')
89 subs.setdefault(lang, []).append({
90 'url': s.get('file'),
91 'ext': traverse_obj(s, 'format', expected_type=str.lower),
92 })
93
94 jwt = meta.get('jwt')
95 if not jwt:
96 raise ExtractorError('Site did not provide an authentication token, cannot proceed.')
97
98 media = self._download_json(self._API_BASE.format('getMedia', v_id), v_id, query={'jwt': jwt})['response']
99
100 formats = []
101 adaptive_url = traverse_obj(media, ('addaptiveMedia', 'hls_sec'), expected_type=url_or_none)
102 if adaptive_url:
103 formats = self._extract_wowza_formats(adaptive_url, v_id, skip_protocols=['smil'])
104
105 adaptive_url = traverse_obj(media, ('addaptiveMedia_sl', 'hls_sec'), expected_type=url_or_none)
106 if adaptive_url:
107 for f in self._extract_wowza_formats(adaptive_url, v_id, skip_protocols=['smil']):
108 formats.append({
109 **f,
110 'format_id': 'sign-' + f['format_id'],
111 'format_note': 'Sign language interpretation', 'preference': -10,
112 'language': (
113 'slv' if f.get('language') == 'eng' and f.get('acodec') != 'none'
114 else f.get('language'))
115 })
116
117 formats.extend(
118 {
119 'url': f['streams'][strm],
120 'ext': traverse_obj(f, 'mediaType', expected_type=str.lower),
121 'width': f.get('width'),
122 'height': f.get('height'),
123 'tbr': f.get('bitrate'),
124 'filesize': f.get('filesize'),
125 }
126 for strm in ('http', 'https')
127 for f in media.get('mediaFiles') or []
128 if traverse_obj(f, ('streams', strm))
129 )
130
131 if any('intermission.mp4' in x['url'] for x in formats):
132 self.raise_geo_restricted(countries=self._GEO_COUNTRIES, metadata_available=True)
133 if any('dummy_720p.mp4' in x.get('manifest_url', '') for x in formats) and meta.get('stub') == 'error':
134 raise ExtractorError(f'{self.IE_NAME} said: Clip not available', expected=True)
135
136 return {
137 'id': v_id,
138 'webpage_url': ''.join(traverse_obj(meta, ('canonical', ('domain', 'path')))),
139 'title': meta.get('title'),
140 'formats': formats,
141 'subtitles': subs,
142 'thumbnails': thumbs,
143 'description': meta.get('description'),
144 'timestamp': unified_timestamp(traverse_obj(meta, 'broadcastDate', ('broadcastDates', 0))),
145 'release_timestamp': unified_timestamp(meta.get('recordingDate')),
146 'duration': meta.get('duration') or parse_duration(meta.get('length')),
147 'tags': meta.get('genre'),
148 'series': meta.get('showName'),
149 'series_id': meta.get('showId'),
150 }