]>
Commit | Line | Data |
---|---|---|
0783fd55 | 1 | import json |
2 | import uuid | |
a2d821d7 | 3 | |
1335c3ac S |
4 | from .common import InfoExtractor |
5 | from ..utils import ( | |
6 | ExtractorError, | |
edfc7725 | 7 | int_or_none, |
6066d03d | 8 | mimetype2ext, |
0783fd55 | 9 | parse_iso8601, |
10 | try_call, | |
2c15db82 | 11 | update_url_query, |
a2d821d7 | 12 | url_or_none, |
1335c3ac | 13 | ) |
0783fd55 | 14 | from ..utils.traversal import traverse_obj |
f2b8db57 | 15 | |
ab4cbeff FNJS |
16 | SERIES_API = 'https://production-cdn.dr-massive.com/api/page?device=web_browser&item_detail_expand=all&lang=da&max_list_prefetch=3&path=%s' |
17 | ||
18 | ||
18c1c424 | 19 | class DRTVIE(InfoExtractor): |
5709d661 S |
20 | _VALID_URL = r'''(?x) |
21 | https?:// | |
22 | (?: | |
0783fd55 | 23 | (?:www\.)?dr\.dk/tv/se(?:/ondemand)?/(?:[^/?#]+/)*| |
29f7c58a | 24 | (?:www\.)?(?:dr\.dk|dr-massive\.com)/drtv/(?:se|episode|program)/ |
5709d661 S |
25 | ) |
26 | (?P<id>[\da-z_-]+) | |
27 | ''' | |
96182695 S |
28 | _GEO_BYPASS = False |
29 | _GEO_COUNTRIES = ['DK'] | |
2c15db82 | 30 | IE_NAME = 'drtv' |
5e9e3d0f | 31 | _TESTS = [{ |
3fcce302 | 32 | 'url': 'https://www.dr.dk/tv/se/boern/ultra/klassen-ultra/klassen-darlig-taber-10', |
a2d821d7 | 33 | 'md5': '25e659cccc9a2ed956110a299fdf5983', |
f2b8db57 | 34 | 'info_dict': { |
3fcce302 | 35 | 'id': 'klassen-darlig-taber-10', |
f2b8db57 | 36 | 'ext': 'mp4', |
3fcce302 S |
37 | 'title': 'Klassen - Dårlig taber (10)', |
38 | 'description': 'md5:815fe1b7fa656ed80580f31e8b3c79aa', | |
a2d821d7 S |
39 | 'timestamp': 1539085800, |
40 | 'upload_date': '20181009', | |
3fcce302 | 41 | 'duration': 606.84, |
a2d821d7 S |
42 | 'series': 'Klassen', |
43 | 'season': 'Klassen I', | |
44 | 'season_number': 1, | |
45 | 'season_id': 'urn:dr:mu:bundle:57d7e8216187a4031cfd6f6b', | |
46 | 'episode': 'Episode 10', | |
47 | 'episode_number': 10, | |
48 | 'release_year': 2016, | |
3fcce302 | 49 | }, |
a2d821d7 | 50 | 'expected_warnings': ['Unable to download f4m manifest'], |
7ddbf09c | 51 | 'skip': 'this video has been removed', |
b972fb03 | 52 | }, { |
8d65880e | 53 | # with SignLanguage formats |
b972fb03 | 54 | 'url': 'https://www.dr.dk/tv/se/historien-om-danmark/-/historien-om-danmark-stenalder', |
b972fb03 | 55 | 'info_dict': { |
7ddbf09c | 56 | 'id': '00831690010', |
b972fb03 | 57 | 'ext': 'mp4', |
a2d821d7 | 58 | 'title': 'Historien om Danmark: Stenalder', |
8d65880e | 59 | 'description': 'md5:8c66dcbc1669bbc6f873879880f37f2a', |
a2d821d7 S |
60 | 'timestamp': 1546628400, |
61 | 'upload_date': '20190104', | |
9a06b7b1 | 62 | 'duration': 3504.619, |
8d65880e | 63 | 'formats': 'mincount:20', |
7ddbf09c FH |
64 | 'release_year': 2017, |
65 | 'season_id': 'urn:dr:mu:bundle:5afc03ad6187a4065ca5fd35', | |
66 | 'season_number': 1, | |
67 | 'season': 'Historien om Danmark', | |
68 | 'series': 'Historien om Danmark', | |
8d65880e | 69 | }, |
0783fd55 | 70 | 'skip': 'this video has been removed', |
5709d661 | 71 | }, { |
0783fd55 | 72 | 'url': 'https://www.dr.dk/drtv/se/frank-and-kastaniegaarden_71769', |
5709d661 S |
73 | 'info_dict': { |
74 | 'id': '00951930010', | |
75 | 'ext': 'mp4', | |
0783fd55 | 76 | 'title': 'Frank & Kastaniegaarden', |
77 | 'description': 'md5:974e1780934cf3275ef10280204bccb0', | |
78 | 'release_timestamp': 1546545600, | |
79 | 'release_date': '20190103', | |
80 | 'duration': 2576, | |
81 | 'season': 'Frank & Kastaniegaarden', | |
82 | 'season_id': '67125', | |
7ddbf09c FH |
83 | 'release_year': 2019, |
84 | 'season_number': 2019, | |
9a06b7b1 | 85 | 'series': 'Frank & Kastaniegaarden', |
86 | 'episode_number': 1, | |
0783fd55 | 87 | 'episode': 'Frank & Kastaniegaarden', |
88 | 'thumbnail': r're:https?://.+', | |
5709d661 S |
89 | }, |
90 | 'params': { | |
91 | 'skip_download': True, | |
92 | }, | |
0783fd55 | 93 | }, { |
94 | # Foreign and Regular subtitle track | |
95 | 'url': 'https://www.dr.dk/drtv/se/spise-med-price_-pasta-selv_397445', | |
96 | 'info_dict': { | |
97 | 'id': '00212301010', | |
98 | 'ext': 'mp4', | |
99 | 'episode_number': 1, | |
100 | 'title': 'Spise med Price: Pasta Selv', | |
101 | 'alt_title': '1. Pasta Selv', | |
102 | 'release_date': '20230807', | |
103 | 'description': 'md5:2da9060524fed707810d71080b3d0cd8', | |
104 | 'duration': 1750, | |
105 | 'season': 'Spise med Price', | |
106 | 'release_timestamp': 1691438400, | |
107 | 'season_id': '397440', | |
108 | 'episode': 'Spise med Price: Pasta Selv', | |
109 | 'thumbnail': r're:https?://.+', | |
110 | 'season_number': 15, | |
111 | 'series': 'Spise med Price', | |
112 | 'release_year': 2022, | |
113 | 'subtitles': 'mincount:2', | |
114 | }, | |
115 | 'params': { | |
116 | 'skip_download': 'm3u8', | |
117 | }, | |
5709d661 S |
118 | }, { |
119 | 'url': 'https://www.dr.dk/drtv/episode/bonderoeven_71769', | |
120 | 'only_matching': True, | |
121 | }, { | |
122 | 'url': 'https://dr-massive.com/drtv/se/bonderoeven_71769', | |
123 | 'only_matching': True, | |
29f7c58a | 124 | }, { |
125 | 'url': 'https://www.dr.dk/drtv/program/jagten_220924', | |
126 | 'only_matching': True, | |
5e9e3d0f | 127 | }] |
f2b8db57 | 128 | |
0783fd55 | 129 | SUBTITLE_LANGS = { |
130 | 'DanishLanguageSubtitles': 'da', | |
131 | 'ForeignLanguageSubtitles': 'da_foreign', | |
132 | 'CombinedLanguageSubtitles': 'da_combined', | |
133 | } | |
5709d661 | 134 | |
0783fd55 | 135 | _TOKEN = None |
136 | ||
137 | def _real_initialize(self): | |
138 | if self._TOKEN: | |
139 | return | |
140 | ||
141 | token_response = self._download_json( | |
142 | 'https://production.dr-massive.com/api/authorization/anonymous-sso', None, | |
143 | note='Downloading anonymous token', headers={ | |
144 | 'content-type': 'application/json', | |
145 | }, query={ | |
146 | 'device': 'web_browser', | |
147 | 'ff': 'idp,ldp,rpt', | |
148 | 'lang': 'da', | |
149 | 'supportFallbackToken': 'true', | |
150 | }, data=json.dumps({ | |
151 | 'deviceId': str(uuid.uuid4()), | |
152 | 'scopes': ['Catalog'], | |
153 | 'optout': True, | |
154 | }).encode()) | |
155 | ||
156 | self._TOKEN = traverse_obj( | |
157 | token_response, (lambda _, x: x['type'] == 'UserAccount', 'value', {str}), get_all=False) | |
158 | if not self._TOKEN: | |
159 | raise ExtractorError('Unable to get anonymous token') | |
5709d661 | 160 | |
0783fd55 | 161 | def _real_extract(self, url): |
162 | url_slug = self._match_id(url) | |
163 | webpage = self._download_webpage(url, url_slug) | |
164 | ||
165 | json_data = self._search_json( | |
166 | r'window\.__data\s*=', webpage, 'data', url_slug, fatal=False) or {} | |
167 | item = traverse_obj( | |
168 | json_data, ('cache', 'page', ..., (None, ('entries', 0)), 'item', {dict}), get_all=False) | |
169 | if item: | |
170 | item_id = item.get('id') | |
5709d661 | 171 | else: |
0783fd55 | 172 | item_id = url_slug.rsplit('_', 1)[-1] |
173 | item = self._download_json( | |
174 | f'https://production-cdn.dr-massive.com/api/items/{item_id}', item_id, | |
175 | note='Attempting to download backup item data', query={ | |
176 | 'device': 'web_browser', | |
177 | 'expand': 'all', | |
178 | 'ff': 'idp,ldp,rpt', | |
179 | 'geoLocation': 'dk', | |
180 | 'isDeviceAbroad': 'false', | |
181 | 'lang': 'da', | |
182 | 'segments': 'drtv,optedout', | |
183 | 'sub': 'Anonymous', | |
184 | }) | |
185 | ||
186 | video_id = try_call(lambda: item['customId'].rsplit(':', 1)[-1]) or item_id | |
187 | stream_data = self._download_json( | |
188 | f'https://production.dr-massive.com/api/account/items/{item_id}/videos', video_id, | |
189 | note='Downloading stream data', query={ | |
190 | 'delivery': 'stream', | |
191 | 'device': 'web_browser', | |
192 | 'ff': 'idp,ldp,rpt', | |
193 | 'lang': 'da', | |
194 | 'resolution': 'HD-1080', | |
195 | 'sub': 'Anonymous', | |
196 | }, headers={'authorization': f'Bearer {self._TOKEN}'}) | |
f2b8db57 S |
197 | |
198 | formats = [] | |
199 | subtitles = {} | |
0783fd55 | 200 | for stream in traverse_obj(stream_data, (lambda _, x: x['url'])): |
201 | format_id = stream.get('format', 'na') | |
202 | access_service = stream.get('accessService') | |
203 | preference = None | |
204 | subtitle_suffix = '' | |
205 | if access_service in ('SpokenSubtitles', 'SignLanguage', 'VisuallyInterpreted'): | |
206 | preference = -1 | |
207 | format_id += f'-{access_service}' | |
208 | subtitle_suffix = f'-{access_service}' | |
209 | elif access_service == 'StandardVideo': | |
210 | preference = 1 | |
211 | fmts, subs = self._extract_m3u8_formats_and_subtitles( | |
f96ab86c | 212 | stream.get('url'), video_id, ext='mp4', preference=preference, m3u8_id=format_id, fatal=False) |
0783fd55 | 213 | formats.extend(fmts) |
214 | ||
215 | api_subtitles = traverse_obj(stream, ('subtitles', lambda _, v: url_or_none(v['link']), {dict})) | |
216 | if not api_subtitles: | |
217 | self._merge_subtitles(subs, target=subtitles) | |
218 | ||
219 | for sub_track in api_subtitles: | |
220 | lang = sub_track.get('language') or 'da' | |
221 | subtitles.setdefault(self.SUBTITLE_LANGS.get(lang, lang) + subtitle_suffix, []).append({ | |
222 | 'url': sub_track['link'], | |
223 | 'ext': mimetype2ext(sub_track.get('format')) or 'vtt' | |
224 | }) | |
225 | ||
226 | if not formats and traverse_obj(item, ('season', 'customFields', 'IsGeoRestricted')): | |
227 | self.raise_geo_restricted(countries=self._GEO_COUNTRIES) | |
f2b8db57 | 228 | |
f2b8db57 S |
229 | return { |
230 | 'id': video_id, | |
f2b8db57 | 231 | 'formats': formats, |
18c1c424 | 232 | 'subtitles': subtitles, |
0783fd55 | 233 | **traverse_obj(item, { |
234 | 'title': 'title', | |
235 | 'alt_title': 'contextualTitle', | |
236 | 'description': 'description', | |
237 | 'thumbnail': ('images', 'wallpaper'), | |
238 | 'release_timestamp': ('customFields', 'BroadcastTimeDK', {parse_iso8601}), | |
239 | 'duration': ('duration', {int_or_none}), | |
240 | 'series': ('season', 'show', 'title'), | |
241 | 'season': ('season', 'title'), | |
242 | 'season_number': ('season', 'seasonNumber', {int_or_none}), | |
243 | 'season_id': 'seasonId', | |
244 | 'episode': 'episodeName', | |
245 | 'episode_number': ('episodeNumber', {int_or_none}), | |
246 | 'release_year': ('releaseYear', {int_or_none}), | |
247 | }), | |
f2b8db57 | 248 | } |
2c15db82 RA |
249 | |
250 | ||
251 | class DRTVLiveIE(InfoExtractor): | |
252 | IE_NAME = 'drtv:live' | |
253 | _VALID_URL = r'https?://(?:www\.)?dr\.dk/(?:tv|TV)/live/(?P<id>[\da-z-]+)' | |
fc11ad38 | 254 | _GEO_COUNTRIES = ['DK'] |
2c15db82 RA |
255 | _TEST = { |
256 | 'url': 'https://www.dr.dk/tv/live/dr1', | |
257 | 'info_dict': { | |
258 | 'id': 'dr1', | |
259 | 'ext': 'mp4', | |
260 | 'title': 're:^DR1 [0-9]{4}-[0-9]{2}-[0-9]{2} [0-9]{2}:[0-9]{2}$', | |
261 | }, | |
262 | 'params': { | |
263 | # m3u8 download | |
264 | 'skip_download': True, | |
265 | }, | |
266 | } | |
267 | ||
268 | def _real_extract(self, url): | |
269 | channel_id = self._match_id(url) | |
270 | channel_data = self._download_json( | |
271 | 'https://www.dr.dk/mu-online/api/1.0/channel/' + channel_id, | |
272 | channel_id) | |
39ca3b5c | 273 | title = channel_data['Title'] |
2c15db82 RA |
274 | |
275 | formats = [] | |
276 | for streaming_server in channel_data.get('StreamingServers', []): | |
277 | server = streaming_server.get('Server') | |
278 | if not server: | |
279 | continue | |
280 | link_type = streaming_server.get('LinkType') | |
281 | for quality in streaming_server.get('Qualities', []): | |
282 | for stream in quality.get('Streams', []): | |
283 | stream_path = stream.get('Stream') | |
284 | if not stream_path: | |
285 | continue | |
286 | stream_url = update_url_query( | |
287 | '%s/%s' % (server, stream_path), {'b': ''}) | |
288 | if link_type == 'HLS': | |
289 | formats.extend(self._extract_m3u8_formats( | |
290 | stream_url, channel_id, 'mp4', | |
291 | m3u8_id=link_type, fatal=False, live=True)) | |
292 | elif link_type == 'HDS': | |
293 | formats.extend(self._extract_f4m_formats(update_url_query( | |
294 | '%s/%s' % (server, stream_path), {'hdcore': '3.7.0'}), | |
295 | channel_id, f4m_id=link_type, fatal=False)) | |
2c15db82 RA |
296 | |
297 | return { | |
298 | 'id': channel_id, | |
299 | 'title': title, | |
300 | 'thumbnail': channel_data.get('PrimaryImageUri'), | |
301 | 'formats': formats, | |
302 | 'is_live': True, | |
303 | } | |
ab4cbeff FNJS |
304 | |
305 | ||
306 | class DRTVSeasonIE(InfoExtractor): | |
307 | IE_NAME = 'drtv:season' | |
308 | _VALID_URL = r'https?://(?:www\.)?(?:dr\.dk|dr-massive\.com)/drtv/saeson/(?P<display_id>[\w-]+)_(?P<id>\d+)' | |
309 | _GEO_COUNTRIES = ['DK'] | |
310 | _TESTS = [{ | |
311 | 'url': 'https://www.dr.dk/drtv/saeson/frank-and-kastaniegaarden_9008', | |
312 | 'info_dict': { | |
313 | 'id': '9008', | |
314 | 'display_id': 'frank-and-kastaniegaarden', | |
315 | 'title': 'Frank & Kastaniegaarden', | |
316 | 'series': 'Frank & Kastaniegaarden', | |
0783fd55 | 317 | 'season_number': 2008, |
318 | 'alt_title': 'Season 2008', | |
ab4cbeff FNJS |
319 | }, |
320 | 'playlist_mincount': 8 | |
321 | }, { | |
322 | 'url': 'https://www.dr.dk/drtv/saeson/frank-and-kastaniegaarden_8761', | |
323 | 'info_dict': { | |
324 | 'id': '8761', | |
325 | 'display_id': 'frank-and-kastaniegaarden', | |
326 | 'title': 'Frank & Kastaniegaarden', | |
327 | 'series': 'Frank & Kastaniegaarden', | |
0783fd55 | 328 | 'season_number': 2009, |
329 | 'alt_title': 'Season 2009', | |
ab4cbeff FNJS |
330 | }, |
331 | 'playlist_mincount': 19 | |
332 | }] | |
333 | ||
334 | def _real_extract(self, url): | |
335 | display_id, season_id = self._match_valid_url(url).group('display_id', 'id') | |
336 | data = self._download_json(SERIES_API % f'/saeson/{display_id}_{season_id}', display_id) | |
337 | ||
338 | entries = [{ | |
339 | '_type': 'url', | |
340 | 'url': f'https://www.dr.dk/drtv{episode["path"]}', | |
341 | 'ie_key': DRTVIE.ie_key(), | |
342 | 'title': episode.get('title'), | |
0783fd55 | 343 | 'alt_title': episode.get('contextualTitle'), |
ab4cbeff FNJS |
344 | 'episode': episode.get('episodeName'), |
345 | 'description': episode.get('shortDescription'), | |
346 | 'series': traverse_obj(data, ('entries', 0, 'item', 'title')), | |
347 | 'season_number': traverse_obj(data, ('entries', 0, 'item', 'seasonNumber')), | |
348 | 'episode_number': episode.get('episodeNumber'), | |
349 | } for episode in traverse_obj(data, ('entries', 0, 'item', 'episodes', 'items'))] | |
350 | ||
351 | return { | |
352 | '_type': 'playlist', | |
353 | 'id': season_id, | |
354 | 'display_id': display_id, | |
355 | 'title': traverse_obj(data, ('entries', 0, 'item', 'title')), | |
0783fd55 | 356 | 'alt_title': traverse_obj(data, ('entries', 0, 'item', 'contextualTitle')), |
ab4cbeff FNJS |
357 | 'series': traverse_obj(data, ('entries', 0, 'item', 'title')), |
358 | 'entries': entries, | |
359 | 'season_number': traverse_obj(data, ('entries', 0, 'item', 'seasonNumber')) | |
360 | } | |
361 | ||
362 | ||
363 | class DRTVSeriesIE(InfoExtractor): | |
364 | IE_NAME = 'drtv:series' | |
365 | _VALID_URL = r'https?://(?:www\.)?(?:dr\.dk|dr-massive\.com)/drtv/serie/(?P<display_id>[\w-]+)_(?P<id>\d+)' | |
366 | _GEO_COUNTRIES = ['DK'] | |
367 | _TESTS = [{ | |
368 | 'url': 'https://www.dr.dk/drtv/serie/frank-and-kastaniegaarden_6954', | |
369 | 'info_dict': { | |
370 | 'id': '6954', | |
371 | 'display_id': 'frank-and-kastaniegaarden', | |
372 | 'title': 'Frank & Kastaniegaarden', | |
373 | 'series': 'Frank & Kastaniegaarden', | |
0783fd55 | 374 | 'alt_title': '', |
ab4cbeff FNJS |
375 | }, |
376 | 'playlist_mincount': 15 | |
377 | }] | |
378 | ||
379 | def _real_extract(self, url): | |
380 | display_id, series_id = self._match_valid_url(url).group('display_id', 'id') | |
381 | data = self._download_json(SERIES_API % f'/serie/{display_id}_{series_id}', display_id) | |
382 | ||
383 | entries = [{ | |
384 | '_type': 'url', | |
385 | 'url': f'https://www.dr.dk/drtv{season.get("path")}', | |
386 | 'ie_key': DRTVSeasonIE.ie_key(), | |
387 | 'title': season.get('title'), | |
0783fd55 | 388 | 'alt_title': season.get('contextualTitle'), |
ab4cbeff FNJS |
389 | 'series': traverse_obj(data, ('entries', 0, 'item', 'title')), |
390 | 'season_number': traverse_obj(data, ('entries', 0, 'item', 'seasonNumber')) | |
391 | } for season in traverse_obj(data, ('entries', 0, 'item', 'show', 'seasons', 'items'))] | |
392 | ||
393 | return { | |
394 | '_type': 'playlist', | |
395 | 'id': series_id, | |
396 | 'display_id': display_id, | |
397 | 'title': traverse_obj(data, ('entries', 0, 'item', 'title')), | |
0783fd55 | 398 | 'alt_title': traverse_obj(data, ('entries', 0, 'item', 'contextualTitle')), |
ab4cbeff FNJS |
399 | 'series': traverse_obj(data, ('entries', 0, 'item', 'title')), |
400 | 'entries': entries | |
401 | } |