]>
Commit | Line | Data |
---|---|---|
28f12728 | 1 | # coding: utf-8 |
1309b396 PH |
2 | from __future__ import unicode_literals |
3 | ||
df5ae3eb S |
4 | import re |
5 | ||
1309b396 PH |
6 | from .common import InfoExtractor |
7 | from ..utils import ( | |
8 | determine_ext, | |
e4f90ea0 | 9 | dict_get, |
23bdae09 S |
10 | int_or_none, |
11 | try_get, | |
1309b396 PH |
12 | ) |
13 | ||
14 | ||
79998cd5 | 15 | class SVTBaseIE(InfoExtractor): |
4248dad9 | 16 | _GEO_COUNTRIES = ['SE'] |
6d4c2597 | 17 | |
23bdae09 | 18 | def _extract_video(self, video_info, video_id): |
1309b396 PH |
19 | formats = [] |
20 | for vr in video_info['videoReferences']: | |
21d21b0c | 21 | player_type = vr.get('playerType') or vr.get('format') |
1309b396 | 22 | vurl = vr['url'] |
df5ae3eb S |
23 | ext = determine_ext(vurl) |
24 | if ext == 'm3u8': | |
1309b396 PH |
25 | formats.extend(self._extract_m3u8_formats( |
26 | vurl, video_id, | |
27 | ext='mp4', entry_protocol='m3u8_native', | |
edfd9351 | 28 | m3u8_id=player_type, fatal=False)) |
df5ae3eb S |
29 | elif ext == 'f4m': |
30 | formats.extend(self._extract_f4m_formats( | |
31 | vurl + '?hdcore=3.3.0', video_id, | |
edfd9351 | 32 | f4m_id=player_type, fatal=False)) |
33 | elif ext == 'mpd': | |
34 | if player_type == 'dashhbbtv': | |
35 | formats.extend(self._extract_mpd_formats( | |
36 | vurl, video_id, mpd_id=player_type, fatal=False)) | |
1309b396 PH |
37 | else: |
38 | formats.append({ | |
edfd9351 | 39 | 'format_id': player_type, |
1309b396 PH |
40 | 'url': vurl, |
41 | }) | |
23bdae09 | 42 | if not formats and video_info.get('rights', {}).get('geoBlockedSweden'): |
04d906ea | 43 | self.raise_geo_restricted( |
4248dad9 S |
44 | 'This video is only available in Sweden', |
45 | countries=self._GEO_COUNTRIES) | |
1309b396 PH |
46 | self._sort_formats(formats) |
47 | ||
1f16b958 | 48 | subtitles = {} |
e4f90ea0 | 49 | subtitle_references = dict_get(video_info, ('subtitles', 'subtitleReferences')) |
594c4d79 S |
50 | if isinstance(subtitle_references, list): |
51 | for sr in subtitle_references: | |
52 | subtitle_url = sr.get('url') | |
e4f90ea0 | 53 | subtitle_lang = sr.get('language', 'sv') |
594c4d79 | 54 | if subtitle_url: |
e4f90ea0 YCH |
55 | if determine_ext(subtitle_url) == 'm3u8': |
56 | # TODO(yan12125): handle WebVTT in m3u8 manifests | |
57 | continue | |
58 | ||
59 | subtitles.setdefault(subtitle_lang, []).append({'url': subtitle_url}) | |
1f16b958 | 60 | |
23bdae09 S |
61 | title = video_info.get('title') |
62 | ||
63 | series = video_info.get('programTitle') | |
64 | season_number = int_or_none(video_info.get('season')) | |
65 | episode = video_info.get('episodeTitle') | |
66 | episode_number = int_or_none(video_info.get('episodeNumber')) | |
67 | ||
68 | duration = int_or_none(dict_get(video_info, ('materialLength', 'contentDuration'))) | |
69 | age_limit = None | |
70 | adult = dict_get( | |
71 | video_info, ('inappropriateForChildren', 'blockedForChildren'), | |
72 | skip_false_values=False) | |
73 | if adult is not None: | |
74 | age_limit = 18 if adult else 0 | |
1309b396 PH |
75 | |
76 | return { | |
77 | 'id': video_id, | |
23bdae09 | 78 | 'title': title, |
1309b396 | 79 | 'formats': formats, |
1f16b958 | 80 | 'subtitles': subtitles, |
1309b396 | 81 | 'duration': duration, |
df5ae3eb | 82 | 'age_limit': age_limit, |
23bdae09 S |
83 | 'series': series, |
84 | 'season_number': season_number, | |
85 | 'episode': episode, | |
86 | 'episode_number': episode_number, | |
1309b396 | 87 | } |
79998cd5 S |
88 | |
89 | ||
90 | class SVTIE(SVTBaseIE): | |
91 | _VALID_URL = r'https?://(?:www\.)?svt\.se/wd\?(?:.*?&)?widgetId=(?P<widget_id>\d+)&.*?\barticleId=(?P<id>\d+)' | |
92 | _TEST = { | |
93 | 'url': 'http://www.svt.se/wd?widgetId=23991§ionId=541&articleId=2900353&type=embed&contextSectionId=123&autostart=false', | |
e4f90ea0 | 94 | 'md5': '33e9a5d8f646523ce0868ecfb0eed77d', |
79998cd5 S |
95 | 'info_dict': { |
96 | 'id': '2900353', | |
e4f90ea0 YCH |
97 | 'ext': 'mp4', |
98 | 'title': 'Stjärnorna skojar till det - under SVT-intervjun', | |
79998cd5 S |
99 | 'duration': 27, |
100 | 'age_limit': 0, | |
101 | }, | |
102 | } | |
103 | ||
bab19a8e S |
104 | @staticmethod |
105 | def _extract_url(webpage): | |
106 | mobj = re.search( | |
107 | r'(?:<iframe src|href)="(?P<url>%s[^"]*)"' % SVTIE._VALID_URL, webpage) | |
108 | if mobj: | |
109 | return mobj.group('url') | |
110 | ||
79998cd5 S |
111 | def _real_extract(self, url): |
112 | mobj = re.match(self._VALID_URL, url) | |
113 | widget_id = mobj.group('widget_id') | |
114 | article_id = mobj.group('id') | |
e4f90ea0 YCH |
115 | |
116 | info = self._download_json( | |
79998cd5 S |
117 | 'http://www.svt.se/wd?widgetId=%s&articleId=%s&format=json&type=embed&output=json' % (widget_id, article_id), |
118 | article_id) | |
119 | ||
23bdae09 | 120 | info_dict = self._extract_video(info['video'], article_id) |
e4f90ea0 YCH |
121 | info_dict['title'] = info['context']['title'] |
122 | return info_dict | |
123 | ||
79998cd5 S |
124 | |
125 | class SVTPlayIE(SVTBaseIE): | |
126 | IE_DESC = 'SVT Play and Öppet arkiv' | |
3b34ab53 | 127 | _VALID_URL = r'https?://(?:www\.)?(?:svtplay|oppetarkiv)\.se/(?:video|klipp)/(?P<id>[0-9]+)' |
23bdae09 | 128 | _TESTS = [{ |
594c4d79 S |
129 | 'url': 'http://www.svtplay.se/video/5996901/flygplan-till-haile-selassie/flygplan-till-haile-selassie-2', |
130 | 'md5': '2b6704fe4a28801e1a098bbf3c5ac611', | |
79998cd5 | 131 | 'info_dict': { |
594c4d79 S |
132 | 'id': '5996901', |
133 | 'ext': 'mp4', | |
134 | 'title': 'Flygplan till Haile Selassie', | |
135 | 'duration': 3527, | |
ec85ded8 | 136 | 'thumbnail': r're:^https?://.*[\.-]jpg$', |
79998cd5 | 137 | 'age_limit': 0, |
594c4d79 S |
138 | 'subtitles': { |
139 | 'sv': [{ | |
140 | 'ext': 'wsrt', | |
141 | }] | |
142 | }, | |
79998cd5 | 143 | }, |
23bdae09 S |
144 | }, { |
145 | # geo restricted to Sweden | |
146 | 'url': 'http://www.oppetarkiv.se/video/5219710/trollflojten', | |
147 | 'only_matching': True, | |
3b34ab53 S |
148 | }, { |
149 | 'url': 'http://www.svtplay.se/klipp/9023742/stopptid-om-bjorn-borg', | |
150 | 'only_matching': True, | |
23bdae09 | 151 | }] |
e4f90ea0 | 152 | |
79998cd5 | 153 | def _real_extract(self, url): |
e4f90ea0 YCH |
154 | video_id = self._match_id(url) |
155 | ||
156 | webpage = self._download_webpage(url, video_id) | |
157 | ||
23bdae09 S |
158 | data = self._parse_json( |
159 | self._search_regex( | |
160 | r'root\["__svtplay"\]\s*=\s*([^;]+);', | |
161 | webpage, 'embedded data', default='{}'), | |
162 | video_id, fatal=False) | |
e4f90ea0 YCH |
163 | |
164 | thumbnail = self._og_search_thumbnail(webpage) | |
165 | ||
23bdae09 S |
166 | if data: |
167 | video_info = try_get( | |
168 | data, lambda x: x['context']['dispatcher']['stores']['VideoTitlePageStore']['data']['video'], | |
169 | dict) | |
170 | if video_info: | |
171 | info_dict = self._extract_video(video_info, video_id) | |
172 | info_dict.update({ | |
173 | 'title': data['context']['dispatcher']['stores']['MetaStore']['title'], | |
174 | 'thumbnail': thumbnail, | |
175 | }) | |
176 | return info_dict | |
177 | ||
178 | video_id = self._search_regex( | |
179 | r'<video[^>]+data-video-id=["\']([\da-zA-Z-]+)', | |
180 | webpage, 'video id', default=None) | |
181 | ||
182 | if video_id: | |
183 | data = self._download_json( | |
184 | 'http://www.svt.se/videoplayer-api/video/%s' % video_id, video_id) | |
185 | info_dict = self._extract_video(data, video_id) | |
186 | if not info_dict.get('title'): | |
187 | info_dict['title'] = re.sub( | |
188 | r'\s*\|\s*.+?$', '', | |
189 | info_dict.get('episode') or self._og_search_title(webpage)) | |
190 | return info_dict |