]>
Commit | Line | Data |
---|---|---|
28f12728 | 1 | # coding: utf-8 |
1309b396 PH |
2 | from __future__ import unicode_literals |
3 | ||
df5ae3eb S |
4 | import re |
5 | ||
1309b396 | 6 | from .common import InfoExtractor |
19a107f2 AG |
7 | from ..compat import ( |
8 | compat_parse_qs, | |
9 | compat_urllib_parse_urlparse, | |
10 | ) | |
1309b396 PH |
11 | from ..utils import ( |
12 | determine_ext, | |
e4f90ea0 | 13 | dict_get, |
23bdae09 | 14 | int_or_none, |
19a107f2 | 15 | orderedSet, |
7b393f9c | 16 | strip_or_none, |
23bdae09 | 17 | try_get, |
19a107f2 AG |
18 | urljoin, |
19 | compat_str, | |
1309b396 PH |
20 | ) |
21 | ||
22 | ||
79998cd5 | 23 | class SVTBaseIE(InfoExtractor): |
4248dad9 | 24 | _GEO_COUNTRIES = ['SE'] |
6d4c2597 | 25 | |
23bdae09 | 26 | def _extract_video(self, video_info, video_id): |
488ff2dd | 27 | is_live = dict_get(video_info, ('live', 'simulcast'), default=False) |
28 | m3u8_protocol = 'm3u8' if is_live else 'm3u8_native' | |
1309b396 PH |
29 | formats = [] |
30 | for vr in video_info['videoReferences']: | |
21d21b0c | 31 | player_type = vr.get('playerType') or vr.get('format') |
1309b396 | 32 | vurl = vr['url'] |
df5ae3eb S |
33 | ext = determine_ext(vurl) |
34 | if ext == 'm3u8': | |
1309b396 PH |
35 | formats.extend(self._extract_m3u8_formats( |
36 | vurl, video_id, | |
488ff2dd | 37 | ext='mp4', entry_protocol=m3u8_protocol, |
edfd9351 | 38 | m3u8_id=player_type, fatal=False)) |
df5ae3eb S |
39 | elif ext == 'f4m': |
40 | formats.extend(self._extract_f4m_formats( | |
41 | vurl + '?hdcore=3.3.0', video_id, | |
edfd9351 | 42 | f4m_id=player_type, fatal=False)) |
43 | elif ext == 'mpd': | |
44 | if player_type == 'dashhbbtv': | |
45 | formats.extend(self._extract_mpd_formats( | |
46 | vurl, video_id, mpd_id=player_type, fatal=False)) | |
1309b396 PH |
47 | else: |
48 | formats.append({ | |
edfd9351 | 49 | 'format_id': player_type, |
1309b396 PH |
50 | 'url': vurl, |
51 | }) | |
23bdae09 | 52 | if not formats and video_info.get('rights', {}).get('geoBlockedSweden'): |
04d906ea | 53 | self.raise_geo_restricted( |
4248dad9 S |
54 | 'This video is only available in Sweden', |
55 | countries=self._GEO_COUNTRIES) | |
1309b396 PH |
56 | self._sort_formats(formats) |
57 | ||
1f16b958 | 58 | subtitles = {} |
e4f90ea0 | 59 | subtitle_references = dict_get(video_info, ('subtitles', 'subtitleReferences')) |
594c4d79 S |
60 | if isinstance(subtitle_references, list): |
61 | for sr in subtitle_references: | |
62 | subtitle_url = sr.get('url') | |
e4f90ea0 | 63 | subtitle_lang = sr.get('language', 'sv') |
594c4d79 | 64 | if subtitle_url: |
e4f90ea0 YCH |
65 | if determine_ext(subtitle_url) == 'm3u8': |
66 | # TODO(yan12125): handle WebVTT in m3u8 manifests | |
67 | continue | |
68 | ||
69 | subtitles.setdefault(subtitle_lang, []).append({'url': subtitle_url}) | |
1f16b958 | 70 | |
23bdae09 S |
71 | title = video_info.get('title') |
72 | ||
73 | series = video_info.get('programTitle') | |
74 | season_number = int_or_none(video_info.get('season')) | |
75 | episode = video_info.get('episodeTitle') | |
76 | episode_number = int_or_none(video_info.get('episodeNumber')) | |
77 | ||
78 | duration = int_or_none(dict_get(video_info, ('materialLength', 'contentDuration'))) | |
79 | age_limit = None | |
80 | adult = dict_get( | |
81 | video_info, ('inappropriateForChildren', 'blockedForChildren'), | |
82 | skip_false_values=False) | |
83 | if adult is not None: | |
84 | age_limit = 18 if adult else 0 | |
1309b396 PH |
85 | |
86 | return { | |
87 | 'id': video_id, | |
23bdae09 | 88 | 'title': title, |
1309b396 | 89 | 'formats': formats, |
1f16b958 | 90 | 'subtitles': subtitles, |
1309b396 | 91 | 'duration': duration, |
df5ae3eb | 92 | 'age_limit': age_limit, |
23bdae09 S |
93 | 'series': series, |
94 | 'season_number': season_number, | |
95 | 'episode': episode, | |
96 | 'episode_number': episode_number, | |
488ff2dd | 97 | 'is_live': is_live, |
1309b396 | 98 | } |
79998cd5 S |
99 | |
100 | ||
101 | class SVTIE(SVTBaseIE): | |
102 | _VALID_URL = r'https?://(?:www\.)?svt\.se/wd\?(?:.*?&)?widgetId=(?P<widget_id>\d+)&.*?\barticleId=(?P<id>\d+)' | |
103 | _TEST = { | |
104 | 'url': 'http://www.svt.se/wd?widgetId=23991§ionId=541&articleId=2900353&type=embed&contextSectionId=123&autostart=false', | |
e4f90ea0 | 105 | 'md5': '33e9a5d8f646523ce0868ecfb0eed77d', |
79998cd5 S |
106 | 'info_dict': { |
107 | 'id': '2900353', | |
e4f90ea0 YCH |
108 | 'ext': 'mp4', |
109 | 'title': 'Stjärnorna skojar till det - under SVT-intervjun', | |
79998cd5 S |
110 | 'duration': 27, |
111 | 'age_limit': 0, | |
112 | }, | |
113 | } | |
114 | ||
bab19a8e S |
115 | @staticmethod |
116 | def _extract_url(webpage): | |
117 | mobj = re.search( | |
118 | r'(?:<iframe src|href)="(?P<url>%s[^"]*)"' % SVTIE._VALID_URL, webpage) | |
119 | if mobj: | |
120 | return mobj.group('url') | |
121 | ||
79998cd5 S |
122 | def _real_extract(self, url): |
123 | mobj = re.match(self._VALID_URL, url) | |
124 | widget_id = mobj.group('widget_id') | |
125 | article_id = mobj.group('id') | |
e4f90ea0 YCH |
126 | |
127 | info = self._download_json( | |
79998cd5 S |
128 | 'http://www.svt.se/wd?widgetId=%s&articleId=%s&format=json&type=embed&output=json' % (widget_id, article_id), |
129 | article_id) | |
130 | ||
23bdae09 | 131 | info_dict = self._extract_video(info['video'], article_id) |
e4f90ea0 YCH |
132 | info_dict['title'] = info['context']['title'] |
133 | return info_dict | |
134 | ||
79998cd5 | 135 | |
1236ac6b S |
136 | class SVTPlayBaseIE(SVTBaseIE): |
137 | _SVTPLAY_RE = r'root\s*\[\s*(["\'])_*svtplay\1\s*\]\s*=\s*(?P<json>{.+?})\s*;\s*\n' | |
138 | ||
139 | ||
140 | class SVTPlayIE(SVTPlayBaseIE): | |
79998cd5 | 141 | IE_DESC = 'SVT Play and Öppet arkiv' |
7b393f9c S |
142 | _VALID_URL = r'''(?x) |
143 | (?: | |
144 | svt:(?P<svt_id>[^/?#&]+)| | |
145 | https?://(?:www\.)?(?:svtplay|oppetarkiv)\.se/(?:video|klipp|kanaler)/(?P<id>[^/?#&]+) | |
146 | ) | |
147 | ''' | |
23bdae09 | 148 | _TESTS = [{ |
594c4d79 S |
149 | 'url': 'http://www.svtplay.se/video/5996901/flygplan-till-haile-selassie/flygplan-till-haile-selassie-2', |
150 | 'md5': '2b6704fe4a28801e1a098bbf3c5ac611', | |
79998cd5 | 151 | 'info_dict': { |
594c4d79 S |
152 | 'id': '5996901', |
153 | 'ext': 'mp4', | |
154 | 'title': 'Flygplan till Haile Selassie', | |
155 | 'duration': 3527, | |
ec85ded8 | 156 | 'thumbnail': r're:^https?://.*[\.-]jpg$', |
79998cd5 | 157 | 'age_limit': 0, |
594c4d79 S |
158 | 'subtitles': { |
159 | 'sv': [{ | |
160 | 'ext': 'wsrt', | |
161 | }] | |
162 | }, | |
79998cd5 | 163 | }, |
23bdae09 S |
164 | }, { |
165 | # geo restricted to Sweden | |
166 | 'url': 'http://www.oppetarkiv.se/video/5219710/trollflojten', | |
167 | 'only_matching': True, | |
3b34ab53 S |
168 | }, { |
169 | 'url': 'http://www.svtplay.se/klipp/9023742/stopptid-om-bjorn-borg', | |
170 | 'only_matching': True, | |
488ff2dd | 171 | }, { |
172 | 'url': 'https://www.svtplay.se/kanaler/svt1', | |
173 | 'only_matching': True, | |
7b393f9c S |
174 | }, { |
175 | 'url': 'svt:1376446-003A', | |
176 | 'only_matching': True, | |
177 | }, { | |
178 | 'url': 'svt:14278044', | |
179 | 'only_matching': True, | |
23bdae09 | 180 | }] |
e4f90ea0 | 181 | |
7b393f9c S |
182 | def _adjust_title(self, info): |
183 | if info['is_live']: | |
184 | info['title'] = self._live_title(info['title']) | |
185 | ||
186 | def _extract_by_video_id(self, video_id, webpage=None): | |
187 | data = self._download_json( | |
e6a25fea | 188 | 'https://api.svt.se/videoplayer-api/video/%s' % video_id, |
7b393f9c S |
189 | video_id, headers=self.geo_verification_headers()) |
190 | info_dict = self._extract_video(data, video_id) | |
191 | if not info_dict.get('title'): | |
192 | title = dict_get(info_dict, ('episode', 'series')) | |
193 | if not title and webpage: | |
194 | title = re.sub( | |
195 | r'\s*\|\s*.+?$', '', self._og_search_title(webpage)) | |
196 | if not title: | |
197 | title = video_id | |
198 | info_dict['title'] = title | |
199 | self._adjust_title(info_dict) | |
200 | return info_dict | |
201 | ||
79998cd5 | 202 | def _real_extract(self, url): |
7b393f9c S |
203 | mobj = re.match(self._VALID_URL, url) |
204 | video_id, svt_id = mobj.group('id', 'svt_id') | |
205 | ||
206 | if svt_id: | |
207 | return self._extract_by_video_id(svt_id) | |
e4f90ea0 YCH |
208 | |
209 | webpage = self._download_webpage(url, video_id) | |
210 | ||
23bdae09 S |
211 | data = self._parse_json( |
212 | self._search_regex( | |
1236ac6b S |
213 | self._SVTPLAY_RE, webpage, 'embedded data', default='{}', |
214 | group='json'), | |
23bdae09 | 215 | video_id, fatal=False) |
e4f90ea0 YCH |
216 | |
217 | thumbnail = self._og_search_thumbnail(webpage) | |
218 | ||
23bdae09 S |
219 | if data: |
220 | video_info = try_get( | |
221 | data, lambda x: x['context']['dispatcher']['stores']['VideoTitlePageStore']['data']['video'], | |
222 | dict) | |
223 | if video_info: | |
224 | info_dict = self._extract_video(video_info, video_id) | |
225 | info_dict.update({ | |
226 | 'title': data['context']['dispatcher']['stores']['MetaStore']['title'], | |
227 | 'thumbnail': thumbnail, | |
228 | }) | |
7b393f9c | 229 | self._adjust_title(info_dict) |
23bdae09 S |
230 | return info_dict |
231 | ||
7b393f9c | 232 | svt_id = self._search_regex( |
23bdae09 | 233 | r'<video[^>]+data-video-id=["\']([\da-zA-Z-]+)', |
7b393f9c | 234 | webpage, 'video id') |
23bdae09 | 235 | |
7b393f9c | 236 | return self._extract_by_video_id(svt_id, webpage) |
fd97fa7b MW |
237 | |
238 | ||
1236ac6b | 239 | class SVTSeriesIE(SVTPlayBaseIE): |
19a107f2 | 240 | _VALID_URL = r'https?://(?:www\.)?svtplay\.se/(?P<id>[^/?&#]+)' |
fd97fa7b MW |
241 | _TESTS = [{ |
242 | 'url': 'https://www.svtplay.se/rederiet', | |
243 | 'info_dict': { | |
19a107f2 | 244 | 'id': 'rederiet', |
fd97fa7b | 245 | 'title': 'Rederiet', |
19a107f2 | 246 | 'description': 'md5:505d491a58f4fcf6eb418ecab947e69e', |
fd97fa7b MW |
247 | }, |
248 | 'playlist_mincount': 318, | |
df146eb2 | 249 | }, { |
19a107f2 | 250 | 'url': 'https://www.svtplay.se/rederiet?tab=sasong2', |
df146eb2 | 251 | 'info_dict': { |
19a107f2 | 252 | 'id': 'rederiet-sasong2', |
df146eb2 | 253 | 'title': 'Rederiet - Säsong 2', |
19a107f2 | 254 | 'description': 'md5:505d491a58f4fcf6eb418ecab947e69e', |
df146eb2 | 255 | }, |
19a107f2 | 256 | 'playlist_count': 12, |
fd97fa7b MW |
257 | }] |
258 | ||
259 | @classmethod | |
260 | def suitable(cls, url): | |
b71bb3ba | 261 | return False if SVTIE.suitable(url) or SVTPlayIE.suitable(url) else super(SVTSeriesIE, cls).suitable(url) |
fd97fa7b MW |
262 | |
263 | def _real_extract(self, url): | |
19a107f2 AG |
264 | series_id = self._match_id(url) |
265 | ||
266 | qs = compat_parse_qs(compat_urllib_parse_urlparse(url).query) | |
267 | season_slug = qs.get('tab', [None])[0] | |
268 | ||
269 | if season_slug: | |
270 | series_id += '-%s' % season_slug | |
271 | ||
272 | webpage = self._download_webpage( | |
273 | url, series_id, 'Downloading series page') | |
274 | ||
275 | root = self._parse_json( | |
276 | self._search_regex( | |
277 | self._SVTPLAY_RE, webpage, 'content', group='json'), | |
278 | series_id) | |
df146eb2 S |
279 | |
280 | season_name = None | |
fd97fa7b MW |
281 | |
282 | entries = [] | |
19a107f2 | 283 | for season in root['relatedVideoContent']['relatedVideosAccordion']: |
df146eb2 S |
284 | if not isinstance(season, dict): |
285 | continue | |
19a107f2 AG |
286 | if season_slug: |
287 | if season.get('slug') != season_slug: | |
df146eb2 S |
288 | continue |
289 | season_name = season.get('name') | |
19a107f2 AG |
290 | videos = season.get('videos') |
291 | if not isinstance(videos, list): | |
fd97fa7b | 292 | continue |
19a107f2 AG |
293 | for video in videos: |
294 | content_url = video.get('contentUrl') | |
295 | if not content_url or not isinstance(content_url, compat_str): | |
fd97fa7b | 296 | continue |
19a107f2 AG |
297 | entries.append( |
298 | self.url_result( | |
299 | urljoin(url, content_url), | |
300 | ie=SVTPlayIE.ie_key(), | |
301 | video_title=video.get('title') | |
302 | )) | |
303 | ||
304 | metadata = root.get('metaData') | |
305 | if not isinstance(metadata, dict): | |
306 | metadata = {} | |
b71bb3ba | 307 | |
19a107f2 AG |
308 | title = metadata.get('title') |
309 | season_name = season_name or season_slug | |
df146eb2 S |
310 | |
311 | if title and season_name: | |
312 | title = '%s - %s' % (title, season_name) | |
19a107f2 AG |
313 | elif season_slug: |
314 | title = season_slug | |
df146eb2 | 315 | |
fd97fa7b | 316 | return self.playlist_result( |
19a107f2 | 317 | entries, series_id, title, metadata.get('description')) |
7b393f9c S |
318 | |
319 | ||
320 | class SVTPageIE(InfoExtractor): | |
19a107f2 | 321 | _VALID_URL = r'https?://(?:www\.)?svt\.se/(?:[^/]+/)*(?P<id>[^/?&#]+)' |
7b393f9c | 322 | _TESTS = [{ |
19a107f2 | 323 | 'url': 'https://www.svt.se/sport/oseedat/guide-sommartraningen-du-kan-gora-var-och-nar-du-vill', |
7b393f9c | 324 | 'info_dict': { |
19a107f2 AG |
325 | 'id': 'guide-sommartraningen-du-kan-gora-var-och-nar-du-vill', |
326 | 'title': 'GUIDE: Sommarträning du kan göra var och när du vill', | |
7b393f9c | 327 | }, |
19a107f2 | 328 | 'playlist_count': 7, |
7b393f9c | 329 | }, { |
19a107f2 | 330 | 'url': 'https://www.svt.se/nyheter/inrikes/ebba-busch-thor-kd-har-delvis-ratt-om-no-go-zoner', |
7b393f9c | 331 | 'info_dict': { |
19a107f2 AG |
332 | 'id': 'ebba-busch-thor-kd-har-delvis-ratt-om-no-go-zoner', |
333 | 'title': 'Ebba Busch Thor har bara delvis rätt om ”no-go-zoner”', | |
7b393f9c | 334 | }, |
19a107f2 | 335 | 'playlist_count': 1, |
7b393f9c S |
336 | }, { |
337 | # only programTitle | |
338 | 'url': 'http://www.svt.se/sport/ishockey/jagr-tacklar-giroux-under-intervjun', | |
339 | 'info_dict': { | |
19a107f2 | 340 | 'id': '2900353', |
7b393f9c S |
341 | 'ext': 'mp4', |
342 | 'title': 'Stjärnorna skojar till det - under SVT-intervjun', | |
343 | 'duration': 27, | |
344 | 'age_limit': 0, | |
345 | }, | |
346 | }, { | |
347 | 'url': 'https://www.svt.se/nyheter/lokalt/vast/svt-testar-tar-nagon-upp-skrapet-1', | |
348 | 'only_matching': True, | |
349 | }, { | |
350 | 'url': 'https://www.svt.se/vader/manadskronikor/maj2018', | |
351 | 'only_matching': True, | |
352 | }] | |
353 | ||
354 | @classmethod | |
355 | def suitable(cls, url): | |
356 | return False if SVTIE.suitable(url) else super(SVTPageIE, cls).suitable(url) | |
357 | ||
358 | def _real_extract(self, url): | |
19a107f2 | 359 | playlist_id = self._match_id(url) |
7b393f9c | 360 | |
19a107f2 | 361 | webpage = self._download_webpage(url, playlist_id) |
7b393f9c | 362 | |
19a107f2 AG |
363 | entries = [ |
364 | self.url_result( | |
365 | 'svt:%s' % video_id, ie=SVTPlayIE.ie_key(), video_id=video_id) | |
366 | for video_id in orderedSet(re.findall( | |
367 | r'data-video-id=["\'](\d+)', webpage))] | |
7b393f9c | 368 | |
19a107f2 | 369 | title = strip_or_none(self._og_search_title(webpage, default=None)) |
7b393f9c | 370 | |
19a107f2 | 371 | return self.playlist_result(entries, playlist_id, title) |