]>
Commit | Line | Data |
---|---|---|
28f12728 | 1 | # coding: utf-8 |
1309b396 PH |
2 | from __future__ import unicode_literals |
3 | ||
df5ae3eb S |
4 | import re |
5 | ||
1309b396 PH |
6 | from .common import InfoExtractor |
7 | from ..utils import ( | |
8 | determine_ext, | |
e4f90ea0 | 9 | dict_get, |
23bdae09 S |
10 | int_or_none, |
11 | try_get, | |
1309b396 PH |
12 | ) |
13 | ||
14 | ||
79998cd5 | 15 | class SVTBaseIE(InfoExtractor): |
23bdae09 | 16 | def _extract_video(self, video_info, video_id): |
1309b396 PH |
17 | formats = [] |
18 | for vr in video_info['videoReferences']: | |
edfd9351 | 19 | player_type = vr.get('playerType') |
1309b396 | 20 | vurl = vr['url'] |
df5ae3eb S |
21 | ext = determine_ext(vurl) |
22 | if ext == 'm3u8': | |
1309b396 PH |
23 | formats.extend(self._extract_m3u8_formats( |
24 | vurl, video_id, | |
25 | ext='mp4', entry_protocol='m3u8_native', | |
edfd9351 | 26 | m3u8_id=player_type, fatal=False)) |
df5ae3eb S |
27 | elif ext == 'f4m': |
28 | formats.extend(self._extract_f4m_formats( | |
29 | vurl + '?hdcore=3.3.0', video_id, | |
edfd9351 | 30 | f4m_id=player_type, fatal=False)) |
31 | elif ext == 'mpd': | |
32 | if player_type == 'dashhbbtv': | |
33 | formats.extend(self._extract_mpd_formats( | |
34 | vurl, video_id, mpd_id=player_type, fatal=False)) | |
1309b396 PH |
35 | else: |
36 | formats.append({ | |
edfd9351 | 37 | 'format_id': player_type, |
1309b396 PH |
38 | 'url': vurl, |
39 | }) | |
23bdae09 S |
40 | if not formats and video_info.get('rights', {}).get('geoBlockedSweden'): |
41 | self.raise_geo_restricted('This video is only available in Sweden') | |
1309b396 PH |
42 | self._sort_formats(formats) |
43 | ||
1f16b958 | 44 | subtitles = {} |
e4f90ea0 | 45 | subtitle_references = dict_get(video_info, ('subtitles', 'subtitleReferences')) |
594c4d79 S |
46 | if isinstance(subtitle_references, list): |
47 | for sr in subtitle_references: | |
48 | subtitle_url = sr.get('url') | |
e4f90ea0 | 49 | subtitle_lang = sr.get('language', 'sv') |
594c4d79 | 50 | if subtitle_url: |
e4f90ea0 YCH |
51 | if determine_ext(subtitle_url) == 'm3u8': |
52 | # TODO(yan12125): handle WebVTT in m3u8 manifests | |
53 | continue | |
54 | ||
55 | subtitles.setdefault(subtitle_lang, []).append({'url': subtitle_url}) | |
1f16b958 | 56 | |
23bdae09 S |
57 | title = video_info.get('title') |
58 | ||
59 | series = video_info.get('programTitle') | |
60 | season_number = int_or_none(video_info.get('season')) | |
61 | episode = video_info.get('episodeTitle') | |
62 | episode_number = int_or_none(video_info.get('episodeNumber')) | |
63 | ||
64 | duration = int_or_none(dict_get(video_info, ('materialLength', 'contentDuration'))) | |
65 | age_limit = None | |
66 | adult = dict_get( | |
67 | video_info, ('inappropriateForChildren', 'blockedForChildren'), | |
68 | skip_false_values=False) | |
69 | if adult is not None: | |
70 | age_limit = 18 if adult else 0 | |
1309b396 PH |
71 | |
72 | return { | |
73 | 'id': video_id, | |
23bdae09 | 74 | 'title': title, |
1309b396 | 75 | 'formats': formats, |
1f16b958 | 76 | 'subtitles': subtitles, |
1309b396 | 77 | 'duration': duration, |
df5ae3eb | 78 | 'age_limit': age_limit, |
23bdae09 S |
79 | 'series': series, |
80 | 'season_number': season_number, | |
81 | 'episode': episode, | |
82 | 'episode_number': episode_number, | |
1309b396 | 83 | } |
79998cd5 S |
84 | |
85 | ||
86 | class SVTIE(SVTBaseIE): | |
87 | _VALID_URL = r'https?://(?:www\.)?svt\.se/wd\?(?:.*?&)?widgetId=(?P<widget_id>\d+)&.*?\barticleId=(?P<id>\d+)' | |
88 | _TEST = { | |
89 | 'url': 'http://www.svt.se/wd?widgetId=23991§ionId=541&articleId=2900353&type=embed&contextSectionId=123&autostart=false', | |
e4f90ea0 | 90 | 'md5': '33e9a5d8f646523ce0868ecfb0eed77d', |
79998cd5 S |
91 | 'info_dict': { |
92 | 'id': '2900353', | |
e4f90ea0 YCH |
93 | 'ext': 'mp4', |
94 | 'title': 'Stjärnorna skojar till det - under SVT-intervjun', | |
79998cd5 S |
95 | 'duration': 27, |
96 | 'age_limit': 0, | |
97 | }, | |
98 | } | |
99 | ||
bab19a8e S |
100 | @staticmethod |
101 | def _extract_url(webpage): | |
102 | mobj = re.search( | |
103 | r'(?:<iframe src|href)="(?P<url>%s[^"]*)"' % SVTIE._VALID_URL, webpage) | |
104 | if mobj: | |
105 | return mobj.group('url') | |
106 | ||
79998cd5 S |
107 | def _real_extract(self, url): |
108 | mobj = re.match(self._VALID_URL, url) | |
109 | widget_id = mobj.group('widget_id') | |
110 | article_id = mobj.group('id') | |
e4f90ea0 YCH |
111 | |
112 | info = self._download_json( | |
79998cd5 S |
113 | 'http://www.svt.se/wd?widgetId=%s&articleId=%s&format=json&type=embed&output=json' % (widget_id, article_id), |
114 | article_id) | |
115 | ||
23bdae09 | 116 | info_dict = self._extract_video(info['video'], article_id) |
e4f90ea0 YCH |
117 | info_dict['title'] = info['context']['title'] |
118 | return info_dict | |
119 | ||
79998cd5 S |
120 | |
121 | class SVTPlayIE(SVTBaseIE): | |
122 | IE_DESC = 'SVT Play and Öppet arkiv' | |
3b34ab53 | 123 | _VALID_URL = r'https?://(?:www\.)?(?:svtplay|oppetarkiv)\.se/(?:video|klipp)/(?P<id>[0-9]+)' |
23bdae09 | 124 | _TESTS = [{ |
594c4d79 S |
125 | 'url': 'http://www.svtplay.se/video/5996901/flygplan-till-haile-selassie/flygplan-till-haile-selassie-2', |
126 | 'md5': '2b6704fe4a28801e1a098bbf3c5ac611', | |
79998cd5 | 127 | 'info_dict': { |
594c4d79 S |
128 | 'id': '5996901', |
129 | 'ext': 'mp4', | |
130 | 'title': 'Flygplan till Haile Selassie', | |
131 | 'duration': 3527, | |
79998cd5 S |
132 | 'thumbnail': 're:^https?://.*[\.-]jpg$', |
133 | 'age_limit': 0, | |
594c4d79 S |
134 | 'subtitles': { |
135 | 'sv': [{ | |
136 | 'ext': 'wsrt', | |
137 | }] | |
138 | }, | |
79998cd5 | 139 | }, |
23bdae09 S |
140 | }, { |
141 | # geo restricted to Sweden | |
142 | 'url': 'http://www.oppetarkiv.se/video/5219710/trollflojten', | |
143 | 'only_matching': True, | |
3b34ab53 S |
144 | }, { |
145 | 'url': 'http://www.svtplay.se/klipp/9023742/stopptid-om-bjorn-borg', | |
146 | 'only_matching': True, | |
23bdae09 | 147 | }] |
e4f90ea0 | 148 | |
79998cd5 | 149 | def _real_extract(self, url): |
e4f90ea0 YCH |
150 | video_id = self._match_id(url) |
151 | ||
152 | webpage = self._download_webpage(url, video_id) | |
153 | ||
23bdae09 S |
154 | data = self._parse_json( |
155 | self._search_regex( | |
156 | r'root\["__svtplay"\]\s*=\s*([^;]+);', | |
157 | webpage, 'embedded data', default='{}'), | |
158 | video_id, fatal=False) | |
e4f90ea0 YCH |
159 | |
160 | thumbnail = self._og_search_thumbnail(webpage) | |
161 | ||
23bdae09 S |
162 | if data: |
163 | video_info = try_get( | |
164 | data, lambda x: x['context']['dispatcher']['stores']['VideoTitlePageStore']['data']['video'], | |
165 | dict) | |
166 | if video_info: | |
167 | info_dict = self._extract_video(video_info, video_id) | |
168 | info_dict.update({ | |
169 | 'title': data['context']['dispatcher']['stores']['MetaStore']['title'], | |
170 | 'thumbnail': thumbnail, | |
171 | }) | |
172 | return info_dict | |
173 | ||
174 | video_id = self._search_regex( | |
175 | r'<video[^>]+data-video-id=["\']([\da-zA-Z-]+)', | |
176 | webpage, 'video id', default=None) | |
177 | ||
178 | if video_id: | |
179 | data = self._download_json( | |
180 | 'http://www.svt.se/videoplayer-api/video/%s' % video_id, video_id) | |
181 | info_dict = self._extract_video(data, video_id) | |
182 | if not info_dict.get('title'): | |
183 | info_dict['title'] = re.sub( | |
184 | r'\s*\|\s*.+?$', '', | |
185 | info_dict.get('episode') or self._og_search_title(webpage)) | |
186 | return info_dict |