]> jfr.im git - yt-dlp.git/blame - yt_dlp/extractor/drtv.py
[ie/bilibili] Add support for series, favorites and watch later (#7518)
[yt-dlp.git] / yt_dlp / extractor / drtv.py
CommitLineData
a2d821d7
S
1import binascii
2import hashlib
3import re
4
1335c3ac 5from .common import InfoExtractor
1d3586d0 6from ..aes import aes_cbc_decrypt_bytes, unpad_pkcs7
a2d821d7 7from ..compat import compat_urllib_parse_unquote
1335c3ac
S
8from ..utils import (
9 ExtractorError,
6066d03d 10 float_or_none,
edfc7725 11 int_or_none,
6066d03d 12 mimetype2ext,
a2d821d7 13 str_or_none,
ab4cbeff 14 traverse_obj,
a2d821d7 15 unified_timestamp,
2c15db82 16 update_url_query,
a2d821d7 17 url_or_none,
1335c3ac 18)
f2b8db57 19
ab4cbeff
FNJS
20SERIES_API = 'https://production-cdn.dr-massive.com/api/page?device=web_browser&item_detail_expand=all&lang=da&max_list_prefetch=3&path=%s'
21
22
18c1c424 23class DRTVIE(InfoExtractor):
5709d661
S
24 _VALID_URL = r'''(?x)
25 https?://
26 (?:
9a06b7b1 27 (?:www\.)?dr\.dk/(?:tv/se|nyheder|(?P<radio>radio|lyd)(?:/ondemand)?)/(?:[^/]+/)*|
29f7c58a 28 (?:www\.)?(?:dr\.dk|dr-massive\.com)/drtv/(?:se|episode|program)/
5709d661
S
29 )
30 (?P<id>[\da-z_-]+)
31 '''
96182695
S
32 _GEO_BYPASS = False
33 _GEO_COUNTRIES = ['DK']
2c15db82 34 IE_NAME = 'drtv'
5e9e3d0f 35 _TESTS = [{
3fcce302 36 'url': 'https://www.dr.dk/tv/se/boern/ultra/klassen-ultra/klassen-darlig-taber-10',
a2d821d7 37 'md5': '25e659cccc9a2ed956110a299fdf5983',
f2b8db57 38 'info_dict': {
3fcce302 39 'id': 'klassen-darlig-taber-10',
f2b8db57 40 'ext': 'mp4',
3fcce302
S
41 'title': 'Klassen - Dårlig taber (10)',
42 'description': 'md5:815fe1b7fa656ed80580f31e8b3c79aa',
a2d821d7
S
43 'timestamp': 1539085800,
44 'upload_date': '20181009',
3fcce302 45 'duration': 606.84,
a2d821d7
S
46 'series': 'Klassen',
47 'season': 'Klassen I',
48 'season_number': 1,
49 'season_id': 'urn:dr:mu:bundle:57d7e8216187a4031cfd6f6b',
50 'episode': 'Episode 10',
51 'episode_number': 10,
52 'release_year': 2016,
3fcce302 53 },
a2d821d7 54 'expected_warnings': ['Unable to download f4m manifest'],
7ddbf09c 55 'skip': 'this video has been removed',
5e9e3d0f 56 }, {
8d65880e 57 # embed
5e9e3d0f 58 'url': 'https://www.dr.dk/nyheder/indland/live-christianias-rydning-af-pusher-street-er-i-gang',
5e9e3d0f 59 'info_dict': {
a2d821d7 60 'id': 'urn:dr:mu:programcard:57c926176187a50a9c6e83c6',
5e9e3d0f 61 'ext': 'mp4',
a2d821d7 62 'title': 'christiania pusher street ryddes drdkrjpo',
8d65880e 63 'description': 'md5:2a71898b15057e9b97334f61d04e6eb5',
5e9e3d0f
SB
64 'timestamp': 1472800279,
65 'upload_date': '20160902',
66 'duration': 131.4,
3fcce302 67 },
8d65880e
S
68 'params': {
69 'skip_download': True,
70 },
a2d821d7 71 'expected_warnings': ['Unable to download f4m manifest'],
b972fb03 72 }, {
8d65880e 73 # with SignLanguage formats
b972fb03 74 'url': 'https://www.dr.dk/tv/se/historien-om-danmark/-/historien-om-danmark-stenalder',
b972fb03 75 'info_dict': {
7ddbf09c 76 'id': '00831690010',
b972fb03 77 'ext': 'mp4',
a2d821d7 78 'title': 'Historien om Danmark: Stenalder',
8d65880e 79 'description': 'md5:8c66dcbc1669bbc6f873879880f37f2a',
a2d821d7
S
80 'timestamp': 1546628400,
81 'upload_date': '20190104',
9a06b7b1 82 'duration': 3504.619,
8d65880e 83 'formats': 'mincount:20',
7ddbf09c
FH
84 'release_year': 2017,
85 'season_id': 'urn:dr:mu:bundle:5afc03ad6187a4065ca5fd35',
86 'season_number': 1,
87 'season': 'Historien om Danmark',
88 'series': 'Historien om Danmark',
8d65880e
S
89 },
90 'params': {
91 'skip_download': True,
b972fb03 92 },
f5629946 93 }, {
7ddbf09c 94 'url': 'https://www.dr.dk/lyd/p4kbh/regionale-nyheder-kh4/p4-nyheder-2019-06-26-17-30-9',
f5629946 95 'only_matching': True,
5709d661
S
96 }, {
97 'url': 'https://www.dr.dk/drtv/se/bonderoeven_71769',
98 'info_dict': {
99 'id': '00951930010',
100 'ext': 'mp4',
7ddbf09c
FH
101 'title': 'Bonderøven 2019 (1:8)',
102 'description': 'md5:b6dcfe9b6f0bea6703e9a0092739a5bd',
9a06b7b1 103 'timestamp': 1654856100,
104 'upload_date': '20220610',
5709d661 105 'duration': 2576.6,
7ddbf09c
FH
106 'season': 'Bonderøven 2019',
107 'season_id': 'urn:dr:mu:bundle:5c201667a11fa01ca4528ce5',
108 'release_year': 2019,
109 'season_number': 2019,
9a06b7b1 110 'series': 'Frank & Kastaniegaarden',
111 'episode_number': 1,
112 'episode': 'Episode 1',
5709d661
S
113 },
114 'params': {
115 'skip_download': True,
116 },
117 }, {
118 'url': 'https://www.dr.dk/drtv/episode/bonderoeven_71769',
119 'only_matching': True,
120 }, {
121 'url': 'https://dr-massive.com/drtv/se/bonderoeven_71769',
122 'only_matching': True,
29f7c58a 123 }, {
124 'url': 'https://www.dr.dk/drtv/program/jagten_220924',
125 'only_matching': True,
7ddbf09c
FH
126 }, {
127 'url': 'https://www.dr.dk/lyd/p4aarhus/regionale-nyheder-ar4/regionale-nyheder-2022-05-05-12-30-3',
128 'info_dict': {
129 'id': 'urn:dr:mu:programcard:6265cb2571401424d0360113',
130 'title': "Regionale nyheder",
131 'ext': 'mp4',
132 'duration': 120.043,
133 'series': 'P4 Østjylland regionale nyheder',
134 'timestamp': 1651746600,
135 'season': 'Regionale nyheder',
136 'release_year': 0,
137 'season_id': 'urn:dr:mu:bundle:61c26889539f0201586b73c5',
138 'description': '',
139 'upload_date': '20220505',
140 },
141 'params': {
142 'skip_download': True,
143 },
9a06b7b1 144 'skip': 'this video has been removed',
145 }, {
146 'url': 'https://www.dr.dk/lyd/p4kbh/regionale-nyheder-kh4/regionale-nyheder-2023-03-14-10-30-9',
147 'info_dict': {
148 'ext': 'mp4',
149 'id': '14802310112',
150 'timestamp': 1678786200,
151 'duration': 120.043,
152 'season_id': 'urn:dr:mu:bundle:63a4f7c87140143504b6710f',
153 'series': 'P4 København regionale nyheder',
154 'upload_date': '20230314',
155 'release_year': 0,
156 'description': 'Hør seneste regionale nyheder fra P4 København.',
157 'season': 'Regionale nyheder',
158 'title': 'Regionale nyheder',
159 },
5e9e3d0f 160 }]
f2b8db57
S
161
162 def _real_extract(self, url):
9a06b7b1 163 raw_video_id, is_radio_url = self._match_valid_url(url).group('id', 'radio')
f2b8db57 164
ab4cbeff 165 webpage = self._download_webpage(url, raw_video_id)
78271e33 166
aff84bec
S
167 if '>Programmet er ikke længere tilgængeligt' in webpage:
168 raise ExtractorError(
ab4cbeff 169 'Video %s is not available' % raw_video_id, expected=True)
aff84bec 170
78271e33 171 video_id = self._search_regex(
5e9e3d0f 172 (r'data-(?:material-identifier|episode-slug)="([^"]+)"',
a2d821d7
S
173 r'data-resource="[^>"]+mu/programcard/expanded/([^"]+)"'),
174 webpage, 'video id', default=None)
175
176 if not video_id:
5709d661 177 video_id = self._search_regex(
a2d821d7 178 r'(urn(?:%3A|:)dr(?:%3A|:)mu(?:%3A|:)programcard(?:%3A|:)[\da-f]+)',
5709d661
S
179 webpage, 'urn', default=None)
180 if video_id:
181 video_id = compat_urllib_parse_unquote(video_id)
182
183 _PROGRAMCARD_BASE = 'https://www.dr.dk/mu-online/api/1.4/programcard'
184 query = {'expanded': 'true'}
185
186 if video_id:
187 programcard_url = '%s/%s' % (_PROGRAMCARD_BASE, video_id)
188 else:
189 programcard_url = _PROGRAMCARD_BASE
9a06b7b1 190 if is_radio_url:
191 video_id = self._search_nextjs_data(
192 webpage, raw_video_id)['props']['pageProps']['episode']['productionNumber']
193 else:
194 json_data = self._search_json(
195 r'window\.__data\s*=', webpage, 'data', raw_video_id)
196 video_id = traverse_obj(json_data, (
197 'cache', 'page', ..., (None, ('entries', 0)), 'item', 'customId',
198 {lambda x: x.split(':')[-1]}), get_all=False)
199 if not video_id:
200 raise ExtractorError('Unable to extract video id')
5709d661 201 query['productionnumber'] = video_id
f2b8db57 202
1602a240 203 data = self._download_json(
5709d661 204 programcard_url, video_id, 'Downloading video JSON', query=query)
f2b8db57 205
7481998b 206 supplementary_data = {}
207 if re.search(r'_\d+$', raw_video_id):
208 supplementary_data = self._download_json(
209 SERIES_API % f'/episode/{raw_video_id}', raw_video_id, fatal=False) or {}
ab4cbeff 210
a2d821d7
S
211 title = str_or_none(data.get('Title')) or re.sub(
212 r'\s*\|\s*(?:TV\s*\|\s*DR|DRTV)$', '',
213 self._og_search_title(webpage))
6066d03d
S
214 description = self._og_search_description(
215 webpage, default=None) or data.get('Description')
5e9e3d0f 216
a2d821d7
S
217 timestamp = unified_timestamp(
218 data.get('PrimaryBroadcastStartTime') or data.get('SortDateTime'))
f2b8db57
S
219
220 thumbnail = None
221 duration = None
222
223 restricted_to_denmark = False
224
225 formats = []
226 subtitles = {}
227
a2d821d7
S
228 assets = []
229 primary_asset = data.get('PrimaryAsset')
230 if isinstance(primary_asset, dict):
231 assets.append(primary_asset)
232 secondary_assets = data.get('SecondaryAssets')
233 if isinstance(secondary_assets, list):
234 for secondary_asset in secondary_assets:
235 if isinstance(secondary_asset, dict):
236 assets.append(secondary_asset)
237
238 def hex_to_bytes(hex):
239 return binascii.a2b_hex(hex.encode('ascii'))
240
241 def decrypt_uri(e):
242 n = int(e[2:10], 16)
243 a = e[10 + n:]
1d3586d0 244 data = hex_to_bytes(e[10:10 + n])
245 key = hashlib.sha256(('%s:sRBzYNXBzkKgnjj8pGtkACch' % a).encode('utf-8')).digest()
246 iv = hex_to_bytes(a)
247 decrypted = unpad_pkcs7(aes_cbc_decrypt_bytes(data, key, iv))
248 return decrypted.decode('utf-8').split('?')[0]
a2d821d7
S
249
250 for asset in assets:
2c15db82
RA
251 kind = asset.get('Kind')
252 if kind == 'Image':
a2d821d7 253 thumbnail = url_or_none(asset.get('Uri'))
8d65880e 254 elif kind in ('VideoResource', 'AudioResource'):
6066d03d
S
255 duration = float_or_none(asset.get('DurationInMilliseconds'), 1000)
256 restricted_to_denmark = asset.get('RestrictedToDenmark')
8d65880e 257 asset_target = asset.get('Target')
6066d03d
S
258 for link in asset.get('Links', []):
259 uri = link.get('Uri')
a2d821d7
S
260 if not uri:
261 encrypted_uri = link.get('EncryptedUri')
262 if not encrypted_uri:
263 continue
264 try:
265 uri = decrypt_uri(encrypted_uri)
266 except Exception:
267 self.report_warning(
268 'Unable to decrypt EncryptedUri', video_id)
269 continue
270 uri = url_or_none(uri)
6066d03d
S
271 if not uri:
272 continue
273 target = link.get('Target')
274 format_id = target or ''
49fe4175 275 if asset_target in ('SpokenSubtitles', 'SignLanguage', 'VisuallyInterpreted'):
b972fb03 276 preference = -1
8d65880e 277 format_id += '-%s' % asset_target
49fe4175
S
278 elif asset_target == 'Default':
279 preference = 1
280 else:
281 preference = None
1335c3ac 282 if target == 'HDS':
2c15db82 283 f4m_formats = self._extract_f4m_formats(
1335c3ac 284 uri + '?hdcore=3.3.0&plugin=aasp-3.3.0.99.43',
9a0942ad 285 video_id, preference, f4m_id=format_id, fatal=False)
2c15db82
RA
286 if kind == 'AudioResource':
287 for f in f4m_formats:
288 f['vcodec'] = 'none'
289 formats.extend(f4m_formats)
1335c3ac 290 elif target == 'HLS':
9a06b7b1 291 fmts, subs = self._extract_m3u8_formats_and_subtitles(
6066d03d 292 uri, video_id, 'mp4', entry_protocol='m3u8_native',
9a06b7b1 293 quality=preference, m3u8_id=format_id, fatal=False)
294 formats.extend(fmts)
295 self._merge_subtitles(subs, target=subtitles)
1335c3ac
S
296 else:
297 bitrate = link.get('Bitrate')
298 if bitrate:
299 format_id += '-%s' % bitrate
300 formats.append({
301 'url': uri,
302 'format_id': format_id,
6066d03d 303 'tbr': int_or_none(bitrate),
1335c3ac 304 'ext': link.get('FileFormat'),
2c15db82 305 'vcodec': 'none' if kind == 'AudioResource' else None,
f983b875 306 'quality': preference,
1335c3ac 307 })
a2d821d7
S
308 subtitles_list = asset.get('SubtitlesList') or asset.get('Subtitleslist')
309 if isinstance(subtitles_list, list):
310 LANGS = {
311 'Danish': 'da',
312 }
313 for subs in subtitles_list:
314 if not isinstance(subs, dict):
315 continue
316 sub_uri = url_or_none(subs.get('Uri'))
317 if not sub_uri:
318 continue
319 lang = subs.get('Language') or 'da'
320 subtitles.setdefault(LANGS.get(lang, lang), []).append({
321 'url': sub_uri,
322 'ext': mimetype2ext(subs.get('MimeType')) or 'vtt'
323 })
f2b8db57
S
324
325 if not formats and restricted_to_denmark:
6066d03d
S
326 self.raise_geo_restricted(
327 'Unfortunately, DR is not allowed to show this program outside Denmark.',
96182695 328 countries=self._GEO_COUNTRIES)
f2b8db57 329
f2b8db57
S
330 return {
331 'id': video_id,
332 'title': title,
333 'description': description,
334 'thumbnail': thumbnail,
335 'timestamp': timestamp,
336 'duration': duration,
337 'formats': formats,
18c1c424 338 'subtitles': subtitles,
a2d821d7
S
339 'series': str_or_none(data.get('SeriesTitle')),
340 'season': str_or_none(data.get('SeasonTitle')),
341 'season_number': int_or_none(data.get('SeasonNumber')),
342 'season_id': str_or_none(data.get('SeasonUrn')),
ab4cbeff
FNJS
343 'episode': traverse_obj(supplementary_data, ('entries', 0, 'item', 'contextualTitle')) or str_or_none(data.get('EpisodeTitle')),
344 'episode_number': traverse_obj(supplementary_data, ('entries', 0, 'item', 'episodeNumber')) or int_or_none(data.get('EpisodeNumber')),
a2d821d7 345 'release_year': int_or_none(data.get('ProductionYear')),
f2b8db57 346 }
2c15db82
RA
347
348
349class DRTVLiveIE(InfoExtractor):
350 IE_NAME = 'drtv:live'
351 _VALID_URL = r'https?://(?:www\.)?dr\.dk/(?:tv|TV)/live/(?P<id>[\da-z-]+)'
fc11ad38 352 _GEO_COUNTRIES = ['DK']
2c15db82
RA
353 _TEST = {
354 'url': 'https://www.dr.dk/tv/live/dr1',
355 'info_dict': {
356 'id': 'dr1',
357 'ext': 'mp4',
358 'title': 're:^DR1 [0-9]{4}-[0-9]{2}-[0-9]{2} [0-9]{2}:[0-9]{2}$',
359 },
360 'params': {
361 # m3u8 download
362 'skip_download': True,
363 },
364 }
365
366 def _real_extract(self, url):
367 channel_id = self._match_id(url)
368 channel_data = self._download_json(
369 'https://www.dr.dk/mu-online/api/1.0/channel/' + channel_id,
370 channel_id)
39ca3b5c 371 title = channel_data['Title']
2c15db82
RA
372
373 formats = []
374 for streaming_server in channel_data.get('StreamingServers', []):
375 server = streaming_server.get('Server')
376 if not server:
377 continue
378 link_type = streaming_server.get('LinkType')
379 for quality in streaming_server.get('Qualities', []):
380 for stream in quality.get('Streams', []):
381 stream_path = stream.get('Stream')
382 if not stream_path:
383 continue
384 stream_url = update_url_query(
385 '%s/%s' % (server, stream_path), {'b': ''})
386 if link_type == 'HLS':
387 formats.extend(self._extract_m3u8_formats(
388 stream_url, channel_id, 'mp4',
389 m3u8_id=link_type, fatal=False, live=True))
390 elif link_type == 'HDS':
391 formats.extend(self._extract_f4m_formats(update_url_query(
392 '%s/%s' % (server, stream_path), {'hdcore': '3.7.0'}),
393 channel_id, f4m_id=link_type, fatal=False))
2c15db82
RA
394
395 return {
396 'id': channel_id,
397 'title': title,
398 'thumbnail': channel_data.get('PrimaryImageUri'),
399 'formats': formats,
400 'is_live': True,
401 }
ab4cbeff
FNJS
402
403
404class DRTVSeasonIE(InfoExtractor):
405 IE_NAME = 'drtv:season'
406 _VALID_URL = r'https?://(?:www\.)?(?:dr\.dk|dr-massive\.com)/drtv/saeson/(?P<display_id>[\w-]+)_(?P<id>\d+)'
407 _GEO_COUNTRIES = ['DK']
408 _TESTS = [{
409 'url': 'https://www.dr.dk/drtv/saeson/frank-and-kastaniegaarden_9008',
410 'info_dict': {
411 'id': '9008',
412 'display_id': 'frank-and-kastaniegaarden',
413 'title': 'Frank & Kastaniegaarden',
414 'series': 'Frank & Kastaniegaarden',
415 },
416 'playlist_mincount': 8
417 }, {
418 'url': 'https://www.dr.dk/drtv/saeson/frank-and-kastaniegaarden_8761',
419 'info_dict': {
420 'id': '8761',
421 'display_id': 'frank-and-kastaniegaarden',
422 'title': 'Frank & Kastaniegaarden',
423 'series': 'Frank & Kastaniegaarden',
424 },
425 'playlist_mincount': 19
426 }]
427
428 def _real_extract(self, url):
429 display_id, season_id = self._match_valid_url(url).group('display_id', 'id')
430 data = self._download_json(SERIES_API % f'/saeson/{display_id}_{season_id}', display_id)
431
432 entries = [{
433 '_type': 'url',
434 'url': f'https://www.dr.dk/drtv{episode["path"]}',
435 'ie_key': DRTVIE.ie_key(),
436 'title': episode.get('title'),
437 'episode': episode.get('episodeName'),
438 'description': episode.get('shortDescription'),
439 'series': traverse_obj(data, ('entries', 0, 'item', 'title')),
440 'season_number': traverse_obj(data, ('entries', 0, 'item', 'seasonNumber')),
441 'episode_number': episode.get('episodeNumber'),
442 } for episode in traverse_obj(data, ('entries', 0, 'item', 'episodes', 'items'))]
443
444 return {
445 '_type': 'playlist',
446 'id': season_id,
447 'display_id': display_id,
448 'title': traverse_obj(data, ('entries', 0, 'item', 'title')),
449 'series': traverse_obj(data, ('entries', 0, 'item', 'title')),
450 'entries': entries,
451 'season_number': traverse_obj(data, ('entries', 0, 'item', 'seasonNumber'))
452 }
453
454
455class DRTVSeriesIE(InfoExtractor):
456 IE_NAME = 'drtv:series'
457 _VALID_URL = r'https?://(?:www\.)?(?:dr\.dk|dr-massive\.com)/drtv/serie/(?P<display_id>[\w-]+)_(?P<id>\d+)'
458 _GEO_COUNTRIES = ['DK']
459 _TESTS = [{
460 'url': 'https://www.dr.dk/drtv/serie/frank-and-kastaniegaarden_6954',
461 'info_dict': {
462 'id': '6954',
463 'display_id': 'frank-and-kastaniegaarden',
464 'title': 'Frank & Kastaniegaarden',
465 'series': 'Frank & Kastaniegaarden',
466 },
467 'playlist_mincount': 15
468 }]
469
470 def _real_extract(self, url):
471 display_id, series_id = self._match_valid_url(url).group('display_id', 'id')
472 data = self._download_json(SERIES_API % f'/serie/{display_id}_{series_id}', display_id)
473
474 entries = [{
475 '_type': 'url',
476 'url': f'https://www.dr.dk/drtv{season.get("path")}',
477 'ie_key': DRTVSeasonIE.ie_key(),
478 'title': season.get('title'),
479 'series': traverse_obj(data, ('entries', 0, 'item', 'title')),
480 'season_number': traverse_obj(data, ('entries', 0, 'item', 'seasonNumber'))
481 } for season in traverse_obj(data, ('entries', 0, 'item', 'show', 'seasons', 'items'))]
482
483 return {
484 '_type': 'playlist',
485 'id': series_id,
486 'display_id': display_id,
487 'title': traverse_obj(data, ('entries', 0, 'item', 'title')),
488 'series': traverse_obj(data, ('entries', 0, 'item', 'title')),
489 'entries': entries
490 }