]> jfr.im git - yt-dlp.git/blame - yt_dlp/extractor/svt.py
[cleanup] Add more ruff rules (#10149)
[yt-dlp.git] / yt_dlp / extractor / svt.py
CommitLineData
2301b5c1 1import json
df5ae3eb
S
2import re
3
1309b396
PH
4from .common import InfoExtractor
5from ..utils import (
6 determine_ext,
e4f90ea0 7 dict_get,
23bdae09 8 int_or_none,
2301b5c1 9 traverse_obj,
23bdae09 10 try_get,
2301b5c1 11 unified_timestamp,
1309b396
PH
12)
13
14
79998cd5 15class SVTBaseIE(InfoExtractor):
4248dad9 16 _GEO_COUNTRIES = ['SE']
6d4c2597 17
23bdae09 18 def _extract_video(self, video_info, video_id):
488ff2dd 19 is_live = dict_get(video_info, ('live', 'simulcast'), default=False)
20 m3u8_protocol = 'm3u8' if is_live else 'm3u8_native'
1309b396 21 formats = []
3f047fc4 22 subtitles = {}
1309b396 23 for vr in video_info['videoReferences']:
21d21b0c 24 player_type = vr.get('playerType') or vr.get('format')
1309b396 25 vurl = vr['url']
df5ae3eb
S
26 ext = determine_ext(vurl)
27 if ext == 'm3u8':
3f047fc4 28 fmts, subs = self._extract_m3u8_formats_and_subtitles(
1309b396 29 vurl, video_id,
488ff2dd 30 ext='mp4', entry_protocol=m3u8_protocol,
3f047fc4
F
31 m3u8_id=player_type, fatal=False)
32 formats.extend(fmts)
33 self._merge_subtitles(subs, target=subtitles)
df5ae3eb
S
34 elif ext == 'f4m':
35 formats.extend(self._extract_f4m_formats(
36 vurl + '?hdcore=3.3.0', video_id,
edfd9351 37 f4m_id=player_type, fatal=False))
38 elif ext == 'mpd':
3f047fc4
F
39 fmts, subs = self._extract_mpd_formats_and_subtitles(
40 vurl, video_id, mpd_id=player_type, fatal=False)
41 formats.extend(fmts)
42 self._merge_subtitles(subs, target=subtitles)
1309b396
PH
43 else:
44 formats.append({
edfd9351 45 'format_id': player_type,
1309b396
PH
46 'url': vurl,
47 })
a0566bbf 48 rights = try_get(video_info, lambda x: x['rights'], dict) or {}
49 if not formats and rights.get('geoBlockedSweden'):
04d906ea 50 self.raise_geo_restricted(
4248dad9 51 'This video is only available in Sweden',
b7da73eb 52 countries=self._GEO_COUNTRIES, metadata_available=True)
1309b396 53
e4f90ea0 54 subtitle_references = dict_get(video_info, ('subtitles', 'subtitleReferences'))
594c4d79
S
55 if isinstance(subtitle_references, list):
56 for sr in subtitle_references:
57 subtitle_url = sr.get('url')
e4f90ea0 58 subtitle_lang = sr.get('language', 'sv')
594c4d79 59 if subtitle_url:
3f047fc4
F
60 sub = {
61 'url': subtitle_url,
62 }
e4f90ea0 63 if determine_ext(subtitle_url) == 'm3u8':
3f047fc4
F
64 # XXX: no way of testing, is it ever hit?
65 sub['ext'] = 'vtt'
66 subtitles.setdefault(subtitle_lang, []).append(sub)
1f16b958 67
23bdae09
S
68 title = video_info.get('title')
69
70 series = video_info.get('programTitle')
71 season_number = int_or_none(video_info.get('season'))
72 episode = video_info.get('episodeTitle')
73 episode_number = int_or_none(video_info.get('episodeNumber'))
74
a0566bbf 75 timestamp = unified_timestamp(rights.get('validFrom'))
23bdae09
S
76 duration = int_or_none(dict_get(video_info, ('materialLength', 'contentDuration')))
77 age_limit = None
78 adult = dict_get(
79 video_info, ('inappropriateForChildren', 'blockedForChildren'),
80 skip_false_values=False)
81 if adult is not None:
82 age_limit = 18 if adult else 0
1309b396
PH
83
84 return {
85 'id': video_id,
23bdae09 86 'title': title,
1309b396 87 'formats': formats,
1f16b958 88 'subtitles': subtitles,
1309b396 89 'duration': duration,
a0566bbf 90 'timestamp': timestamp,
df5ae3eb 91 'age_limit': age_limit,
23bdae09
S
92 'series': series,
93 'season_number': season_number,
94 'episode': episode,
95 'episode_number': episode_number,
488ff2dd 96 'is_live': is_live,
1309b396 97 }
79998cd5
S
98
99
100class SVTIE(SVTBaseIE):
101 _VALID_URL = r'https?://(?:www\.)?svt\.se/wd\?(?:.*?&)?widgetId=(?P<widget_id>\d+)&.*?\barticleId=(?P<id>\d+)'
add96eb9 102 _EMBED_REGEX = [rf'(?:<iframe src|href)="(?P<url>{_VALID_URL}[^"]*)"']
79998cd5
S
103 _TEST = {
104 'url': 'http://www.svt.se/wd?widgetId=23991&sectionId=541&articleId=2900353&type=embed&contextSectionId=123&autostart=false',
e4f90ea0 105 'md5': '33e9a5d8f646523ce0868ecfb0eed77d',
79998cd5
S
106 'info_dict': {
107 'id': '2900353',
e4f90ea0
YCH
108 'ext': 'mp4',
109 'title': 'Stjärnorna skojar till det - under SVT-intervjun',
79998cd5
S
110 'duration': 27,
111 'age_limit': 0,
112 },
113 }
114
115 def _real_extract(self, url):
5ad28e7f 116 mobj = self._match_valid_url(url)
79998cd5
S
117 widget_id = mobj.group('widget_id')
118 article_id = mobj.group('id')
e4f90ea0
YCH
119
120 info = self._download_json(
add96eb9 121 f'http://www.svt.se/wd?widgetId={widget_id}&articleId={article_id}&format=json&type=embed&output=json',
79998cd5
S
122 article_id)
123
23bdae09 124 info_dict = self._extract_video(info['video'], article_id)
e4f90ea0
YCH
125 info_dict['title'] = info['context']['title']
126 return info_dict
127
79998cd5 128
1236ac6b
S
129class SVTPlayBaseIE(SVTBaseIE):
130 _SVTPLAY_RE = r'root\s*\[\s*(["\'])_*svtplay\1\s*\]\s*=\s*(?P<json>{.+?})\s*;\s*\n'
131
132
133class SVTPlayIE(SVTPlayBaseIE):
79998cd5 134 IE_DESC = 'SVT Play and Öppet arkiv'
7b393f9c
S
135 _VALID_URL = r'''(?x)
136 (?:
a0566bbf 137 (?:
138 svt:|
139 https?://(?:www\.)?svt\.se/barnkanalen/barnplay/[^/]+/
140 )
141 (?P<svt_id>[^/?#&]+)|
7b393f9c 142 https?://(?:www\.)?(?:svtplay|oppetarkiv)\.se/(?:video|klipp|kanaler)/(?P<id>[^/?#&]+)
41d1cca3 143 (?:.*?(?:modalId|id)=(?P<modal_id>[\da-zA-Z-]+))?
7b393f9c
S
144 )
145 '''
23bdae09 146 _TESTS = [{
421a4595 147 'url': 'https://www.svtplay.se/video/30479064',
a0566bbf 148 'md5': '2382036fd6f8c994856c323fe51c426e',
79998cd5 149 'info_dict': {
421a4595 150 'id': '8zVbDPA',
594c4d79 151 'ext': 'mp4',
421a4595 152 'title': 'Designdrömmar i Stenungsund',
153 'timestamp': 1615770000,
154 'upload_date': '20210315',
155 'duration': 3519,
a0566bbf 156 'thumbnail': r're:^https?://(?:.*[\.-]jpg|www.svtstatic.se/image/.*)$',
79998cd5 157 'age_limit': 0,
594c4d79
S
158 'subtitles': {
159 'sv': [{
a0566bbf 160 'ext': 'vtt',
add96eb9 161 }],
594c4d79 162 },
79998cd5 163 },
a0566bbf 164 'params': {
2301b5c1
MW
165 'skip_download': 'm3u8',
166 },
167 'skip': 'Episode is no longer available',
168 }, {
169 'url': 'https://www.svtplay.se/video/emBxBQj',
170 'md5': '2382036fd6f8c994856c323fe51c426e',
171 'info_dict': {
172 'id': 'eyBd9aj',
173 'ext': 'mp4',
174 'title': '1. Farlig kryssning',
175 'timestamp': 1491019200,
176 'upload_date': '20170401',
177 'duration': 2566,
178 'thumbnail': r're:^https?://(?:.*[\.-]jpg|www.svtstatic.se/image/.*)$',
179 'age_limit': 0,
180 'episode': '1. Farlig kryssning',
181 'series': 'Rederiet',
182 'subtitles': {
add96eb9 183 'sv': 'count:3',
2301b5c1
MW
184 },
185 },
186 'params': {
187 'skip_download': 'm3u8',
188 },
189 }, {
190 'url': 'https://www.svtplay.se/video/jz2rYz7/anders-hansen-moter/james-fallon?info=visa',
191 'info_dict': {
192 'id': 'jvXAGVb',
193 'ext': 'mp4',
194 'title': 'James Fallon',
195 'timestamp': 1673917200,
196 'upload_date': '20230117',
197 'duration': 1081,
198 'thumbnail': r're:^https?://(?:.*[\.-]jpg|www.svtstatic.se/image/.*)$',
199 'age_limit': 0,
200 'episode': 'James Fallon',
201 'series': 'Anders Hansen möter...',
202 },
203 'params': {
204 'skip_download': 'dash',
a0566bbf 205 },
421a4595 206 }, {
207 'url': 'https://www.svtplay.se/video/30479064/husdrommar/husdrommar-sasong-8-designdrommar-i-stenungsund?modalId=8zVbDPA',
208 'only_matching': True,
41d1cca3 209 }, {
210 'url': 'https://www.svtplay.se/video/30684086/rapport/rapport-24-apr-18-00-7?id=e72gVpa',
211 'only_matching': True,
23bdae09
S
212 }, {
213 # geo restricted to Sweden
214 'url': 'http://www.oppetarkiv.se/video/5219710/trollflojten',
215 'only_matching': True,
3b34ab53
S
216 }, {
217 'url': 'http://www.svtplay.se/klipp/9023742/stopptid-om-bjorn-borg',
218 'only_matching': True,
488ff2dd 219 }, {
220 'url': 'https://www.svtplay.se/kanaler/svt1',
221 'only_matching': True,
7b393f9c
S
222 }, {
223 'url': 'svt:1376446-003A',
224 'only_matching': True,
225 }, {
226 'url': 'svt:14278044',
227 'only_matching': True,
a0566bbf 228 }, {
229 'url': 'https://www.svt.se/barnkanalen/barnplay/kar/eWv5MLX/',
230 'only_matching': True,
231 }, {
232 'url': 'svt:eWv5MLX',
233 'only_matching': True,
23bdae09 234 }]
e4f90ea0 235
7b393f9c
S
236 def _extract_by_video_id(self, video_id, webpage=None):
237 data = self._download_json(
add96eb9 238 f'https://api.svt.se/videoplayer-api/video/{video_id}',
7b393f9c
S
239 video_id, headers=self.geo_verification_headers())
240 info_dict = self._extract_video(data, video_id)
241 if not info_dict.get('title'):
242 title = dict_get(info_dict, ('episode', 'series'))
243 if not title and webpage:
244 title = re.sub(
245 r'\s*\|\s*.+?$', '', self._og_search_title(webpage))
246 if not title:
247 title = video_id
248 info_dict['title'] = title
7b393f9c
S
249 return info_dict
250
79998cd5 251 def _real_extract(self, url):
5ad28e7f 252 mobj = self._match_valid_url(url)
421a4595 253 video_id = mobj.group('id')
254 svt_id = mobj.group('svt_id') or mobj.group('modal_id')
7b393f9c
S
255
256 if svt_id:
257 return self._extract_by_video_id(svt_id)
e4f90ea0
YCH
258
259 webpage = self._download_webpage(url, video_id)
260
23bdae09
S
261 data = self._parse_json(
262 self._search_regex(
1236ac6b
S
263 self._SVTPLAY_RE, webpage, 'embedded data', default='{}',
264 group='json'),
23bdae09 265 video_id, fatal=False)
e4f90ea0
YCH
266
267 thumbnail = self._og_search_thumbnail(webpage)
268
23bdae09
S
269 if data:
270 video_info = try_get(
271 data, lambda x: x['context']['dispatcher']['stores']['VideoTitlePageStore']['data']['video'],
272 dict)
273 if video_info:
274 info_dict = self._extract_video(video_info, video_id)
275 info_dict.update({
276 'title': data['context']['dispatcher']['stores']['MetaStore']['title'],
277 'thumbnail': thumbnail,
278 })
279 return info_dict
280
5ed05f26
S
281 svt_id = try_get(
282 data, lambda x: x['statistics']['dataLake']['content']['id'],
add96eb9 283 str)
5ed05f26 284
2301b5c1
MW
285 if not svt_id:
286 nextjs_data = self._search_nextjs_data(webpage, video_id, fatal=False)
287 svt_id = traverse_obj(nextjs_data, (
288 'props', 'urqlState', ..., 'data', {json.loads}, 'detailsPageByPath',
289 'video', 'svtId', {str}), get_all=False)
290
5ed05f26
S
291 if not svt_id:
292 svt_id = self._search_regex(
293 (r'<video[^>]+data-video-id=["\']([\da-zA-Z-]+)',
2301b5c1 294 r'<[^>]+\bdata-rt=["\']top-area-play-button["\'][^>]+\bhref=["\'][^"\']*video/[\w-]+/[^"\']*\b(?:modalId|id)=([\w-]+)'),
5ed05f26 295 webpage, 'video id')
23bdae09 296
a0566bbf 297 info_dict = self._extract_by_video_id(svt_id, webpage)
298 info_dict['thumbnail'] = thumbnail
299
300 return info_dict
fd97fa7b
MW
301
302
1236ac6b 303class SVTSeriesIE(SVTPlayBaseIE):
8e4d3f83 304 _VALID_URL = r'https?://(?:www\.)?svtplay\.se/(?P<id>[^/?&#]+)(?:.+?\btab=(?P<season_slug>[^&#]+))?'
fd97fa7b
MW
305 _TESTS = [{
306 'url': 'https://www.svtplay.se/rederiet',
307 'info_dict': {
8e4d3f83 308 'id': '14445680',
fd97fa7b 309 'title': 'Rederiet',
8e4d3f83 310 'description': 'md5:d9fdfff17f5d8f73468176ecd2836039',
fd97fa7b
MW
311 },
312 'playlist_mincount': 318,
df146eb2 313 }, {
8e4d3f83 314 'url': 'https://www.svtplay.se/rederiet?tab=season-2-14445680',
df146eb2 315 'info_dict': {
8e4d3f83 316 'id': 'season-2-14445680',
df146eb2 317 'title': 'Rederiet - Säsong 2',
8e4d3f83 318 'description': 'md5:d9fdfff17f5d8f73468176ecd2836039',
df146eb2 319 },
8e4d3f83 320 'playlist_mincount': 12,
fd97fa7b
MW
321 }]
322
323 @classmethod
324 def suitable(cls, url):
add96eb9 325 return False if SVTIE.suitable(url) or SVTPlayIE.suitable(url) else super().suitable(url)
fd97fa7b
MW
326
327 def _real_extract(self, url):
5ad28e7f 328 series_slug, season_id = self._match_valid_url(url).groups()
8e4d3f83
RA
329
330 series = self._download_json(
331 'https://api.svt.se/contento/graphql', series_slug,
332 'Downloading series page', query={
333 'query': '''{
334 listablesBySlug(slugs: ["%s"]) {
335 associatedContent(include: [productionPeriod, season]) {
336 items {
337 item {
338 ... on Episode {
339 videoSvtId
340 }
341 }
342 }
343 id
344 name
345 }
346 id
347 longDescription
348 name
349 shortDescription
350 }
add96eb9 351}''' % series_slug, # noqa: UP031
8e4d3f83 352 })['data']['listablesBySlug'][0]
df146eb2
S
353
354 season_name = None
fd97fa7b
MW
355
356 entries = []
8e4d3f83 357 for season in series['associatedContent']:
df146eb2
S
358 if not isinstance(season, dict):
359 continue
8e4d3f83
RA
360 if season_id:
361 if season.get('id') != season_id:
df146eb2
S
362 continue
363 season_name = season.get('name')
8e4d3f83
RA
364 items = season.get('items')
365 if not isinstance(items, list):
fd97fa7b 366 continue
8e4d3f83
RA
367 for item in items:
368 video = item.get('item') or {}
369 content_id = video.get('videoSvtId')
add96eb9 370 if not content_id or not isinstance(content_id, str):
fd97fa7b 371 continue
8e4d3f83
RA
372 entries.append(self.url_result(
373 'svt:' + content_id, SVTPlayIE.ie_key(), content_id))
b71bb3ba 374
8e4d3f83
RA
375 title = series.get('name')
376 season_name = season_name or season_id
df146eb2
S
377
378 if title and season_name:
add96eb9 379 title = f'{title} - {season_name}'
8e4d3f83
RA
380 elif season_id:
381 title = season_id
df146eb2 382
fd97fa7b 383 return self.playlist_result(
8e4d3f83
RA
384 entries, season_id or series.get('id'), title,
385 dict_get(series, ('longDescription', 'shortDescription')))
7b393f9c
S
386
387
ddd4b5e1 388class SVTPageIE(SVTBaseIE):
389 _VALID_URL = r'https?://(?:www\.)?svt\.se/(?:[^/?#]+/)*(?P<id>[^/?&#]+)'
7b393f9c 390 _TESTS = [{
ddd4b5e1 391 'url': 'https://www.svt.se/nyheter/lokalt/skane/viktor-18-forlorade-armar-och-ben-i-sepsis-vill-ateruppta-karaten-och-bli-svetsare',
392 'info_dict': {
393 'title': 'Viktor, 18, förlorade armar och ben i sepsis – vill återuppta karaten och bli svetsare',
394 'id': 'viktor-18-forlorade-armar-och-ben-i-sepsis-vill-ateruppta-karaten-och-bli-svetsare',
395 },
396 'playlist_count': 2,
397 }, {
398 'url': 'https://www.svt.se/nyheter/lokalt/skane/forsvarsmakten-om-trafikkaoset-pa-e22-kunde-inte-varit-dar-snabbare',
399 'info_dict': {
400 'id': 'jXvk42E',
401 'title': 'Försvarsmakten om trafikkaoset på E22: Kunde inte varit där snabbare',
402 'ext': 'mp4',
add96eb9 403 'duration': 80,
ddd4b5e1 404 'age_limit': 0,
405 'timestamp': 1704370009,
406 'episode': 'Försvarsmakten om trafikkaoset på E22: Kunde inte varit där snabbare',
407 'series': 'Lokala Nyheter Skåne',
add96eb9 408 'upload_date': '20240104',
ddd4b5e1 409 },
410 'params': {
411 'skip_download': True,
add96eb9 412 },
ddd4b5e1 413 }, {
414 'url': 'https://www.svt.se/nyheter/svtforum/2023-tungt-ar-for-svensk-media',
415 'info_dict': {
416 'title': '2023 tungt år för svensk media',
417 'id': 'ewqAZv4',
418 'ext': 'mp4',
add96eb9 419 'duration': 3074,
ddd4b5e1 420 'age_limit': 0,
421 'series': '',
422 'timestamp': 1702980479,
423 'upload_date': '20231219',
add96eb9 424 'episode': 'Mediestudier',
ddd4b5e1 425 },
426 'params': {
427 'skip_download': True,
add96eb9 428 },
ddd4b5e1 429 }, {
43e79947 430 'url': 'https://www.svt.se/sport/ishockey/bakom-masken-lehners-kamp-mot-mental-ohalsa',
7b393f9c 431 'info_dict': {
43e79947
RA
432 'id': '25298267',
433 'title': 'Bakom masken – Lehners kamp mot mental ohälsa',
7b393f9c 434 },
43e79947 435 'playlist_count': 4,
add96eb9 436 'skip': 'Video is gone',
7b393f9c 437 }, {
43e79947 438 'url': 'https://www.svt.se/nyheter/utrikes/svenska-andrea-ar-en-mil-fran-branderna-i-kalifornien',
7b393f9c 439 'info_dict': {
43e79947
RA
440 'id': '24243746',
441 'title': 'Svenska Andrea redo att fly sitt hem i Kalifornien',
7b393f9c 442 },
43e79947 443 'playlist_count': 2,
add96eb9 444 'skip': 'Video is gone',
7b393f9c
S
445 }, {
446 # only programTitle
447 'url': 'http://www.svt.se/sport/ishockey/jagr-tacklar-giroux-under-intervjun',
448 'info_dict': {
43e79947 449 'id': '8439V2K',
7b393f9c
S
450 'ext': 'mp4',
451 'title': 'Stjärnorna skojar till det - under SVT-intervjun',
452 'duration': 27,
453 'age_limit': 0,
454 },
add96eb9 455 'skip': 'Video is gone',
7b393f9c
S
456 }, {
457 'url': 'https://www.svt.se/nyheter/lokalt/vast/svt-testar-tar-nagon-upp-skrapet-1',
458 'only_matching': True,
459 }, {
460 'url': 'https://www.svt.se/vader/manadskronikor/maj2018',
461 'only_matching': True,
462 }]
463
464 @classmethod
465 def suitable(cls, url):
add96eb9 466 return False if SVTIE.suitable(url) or SVTPlayIE.suitable(url) else super().suitable(url)
7b393f9c
S
467
468 def _real_extract(self, url):
ddd4b5e1 469 display_id = self._match_id(url)
7b393f9c 470
ddd4b5e1 471 webpage = self._download_webpage(url, display_id)
472 title = self._og_search_title(webpage)
7b393f9c 473
ddd4b5e1 474 urql_state = self._search_json(
475 r'window\.svt\.nyh\.urqlState\s*=', webpage, 'json data', display_id)
7b393f9c 476
ddd4b5e1 477 data = traverse_obj(urql_state, (..., 'data', {str}, {json.loads}), get_all=False) or {}
7b393f9c 478
ddd4b5e1 479 def entries():
480 for video_id in set(traverse_obj(data, (
add96eb9 481 'page', (('topMedia', 'svtId'), ('body', ..., 'video', 'svtId')), {str},
ddd4b5e1 482 ))):
483 info = self._extract_video(
484 self._download_json(f'https://api.svt.se/video/{video_id}', video_id), video_id)
485 info['title'] = title
486 yield info
43e79947 487
ddd4b5e1 488 return self.playlist_result(entries(), display_id, title)