]>
Commit | Line | Data |
---|---|---|
0a8a7e68 TI |
1 | import json |
2 | import re | |
3 | ||
22354455 HL |
4 | from .common import InfoExtractor |
5 | from ..compat import compat_urllib_parse_urlparse | |
9414338a S |
6 | from ..utils import ( |
7 | determine_ext, | |
db75f14d | 8 | ExtractorError, |
29f7c58a | 9 | find_xpath_attr, |
9414338a | 10 | int_or_none, |
0a8a7e68 TI |
11 | traverse_obj, |
12 | try_call, | |
29f7c58a | 13 | unified_strdate, |
14 | url_or_none, | |
22889ab1 | 15 | xpath_attr, |
9414338a S |
16 | xpath_text, |
17 | ) | |
22354455 HL |
18 | |
19 | ||
20 | class RuutuIE(InfoExtractor): | |
29f7c58a | 21 | _VALID_URL = r'''(?x) |
22 | https?:// | |
23 | (?: | |
24 | (?:www\.)?(?:ruutu|supla)\.fi/(?:video|supla|audio)/| | |
25 | static\.nelonenmedia\.fi/player/misc/embed_player\.html\?.*?\bnid= | |
26 | ) | |
27 | (?P<id>\d+) | |
28 | ''' | |
22354455 HL |
29 | _TESTS = [ |
30 | { | |
22889ab1 | 31 | 'url': 'http://www.ruutu.fi/video/2058907', |
22354455 HL |
32 | 'md5': 'ab2093f39be1ca8581963451b3c0234f', |
33 | 'info_dict': { | |
9414338a | 34 | 'id': '2058907', |
22354455 HL |
35 | 'ext': 'mp4', |
36 | 'title': 'Oletko aina halunnut tietää mitä tapahtuu vain hetki ennen lähetystä? - Nyt se selvisi!', | |
9414338a | 37 | 'description': 'md5:cfc6ccf0e57a814360df464a91ff67d6', |
ec85ded8 | 38 | 'thumbnail': r're:^https?://.*\.jpg$', |
9414338a S |
39 | 'duration': 114, |
40 | 'age_limit': 0, | |
a0fe51d5 | 41 | 'upload_date': '20150508', |
22354455 | 42 | }, |
22354455 HL |
43 | }, |
44 | { | |
c482b3c6 | 45 | 'url': 'http://www.ruutu.fi/video/2057306', |
22354455 HL |
46 | 'md5': '065a10ae4d5b8cfd9d0c3d332465e3d9', |
47 | 'info_dict': { | |
9414338a | 48 | 'id': '2057306', |
22354455 HL |
49 | 'ext': 'mp4', |
50 | 'title': 'Superpesis: katso koko kausi Ruudussa', | |
73498a89 | 51 | 'description': 'md5:bfb7336df2a12dc21d18fa696c9f8f23', |
ec85ded8 | 52 | 'thumbnail': r're:^https?://.*\.jpg$', |
9414338a S |
53 | 'duration': 40, |
54 | 'age_limit': 0, | |
a0fe51d5 TI |
55 | 'upload_date': '20150507', |
56 | 'series': 'Superpesis', | |
57 | 'categories': ['Urheilu'], | |
22354455 | 58 | }, |
22354455 | 59 | }, |
73498a89 | 60 | { |
61 | 'url': 'http://www.supla.fi/supla/2231370', | |
62 | 'md5': 'df14e782d49a2c0df03d3be2a54ef949', | |
63 | 'info_dict': { | |
64 | 'id': '2231370', | |
65 | 'ext': 'mp4', | |
66 | 'title': 'Osa 1: Mikael Jungner', | |
67 | 'description': 'md5:7d90f358c47542e3072ff65d7b1bcffe', | |
ec85ded8 | 68 | 'thumbnail': r're:^https?://.*\.jpg$', |
73498a89 | 69 | 'age_limit': 0, |
a0fe51d5 TI |
70 | 'upload_date': '20151012', |
71 | 'series': 'Läpivalaisu', | |
73498a89 | 72 | }, |
73 | }, | |
2e25f80d TV |
74 | # Episode where <SourceFile> is "NOT-USED", but has other |
75 | # downloadable sources available. | |
76 | { | |
77 | 'url': 'http://www.ruutu.fi/video/3193728', | |
78 | 'only_matching': True, | |
79 | }, | |
8410653f S |
80 | { |
81 | # audio podcast | |
82 | 'url': 'https://www.supla.fi/supla/3382410', | |
83 | 'md5': 'b9d7155fed37b2ebf6021d74c4b8e908', | |
84 | 'info_dict': { | |
85 | 'id': '3382410', | |
86 | 'ext': 'mp3', | |
87 | 'title': 'Mikä ihmeen poltergeist?', | |
88 | 'description': 'md5:bbb6963df17dfd0ecd9eb9a61bf14b52', | |
89 | 'thumbnail': r're:^https?://.*\.jpg$', | |
90 | 'age_limit': 0, | |
a0fe51d5 TI |
91 | 'upload_date': '20190320', |
92 | 'series': 'Mysteeritarinat', | |
93 | 'duration': 1324, | |
8410653f | 94 | }, |
29f7c58a | 95 | 'expected_warnings': [ |
96 | 'HTTP Error 502: Bad Gateway', | |
97 | 'Failed to download m3u8 information', | |
98 | ], | |
99 | }, | |
100 | { | |
101 | 'url': 'http://www.supla.fi/audio/2231370', | |
102 | 'only_matching': True, | |
103 | }, | |
104 | { | |
105 | 'url': 'https://static.nelonenmedia.fi/player/misc/embed_player.html?nid=3618790', | |
106 | 'only_matching': True, | |
107 | }, | |
108 | { | |
109 | # episode | |
110 | 'url': 'https://www.ruutu.fi/video/3401964', | |
111 | 'info_dict': { | |
112 | 'id': '3401964', | |
113 | 'ext': 'mp4', | |
114 | 'title': 'Temptation Island Suomi - Kausi 5 - Jakso 17', | |
115 | 'description': 'md5:87cf01d5e1e88adf0c8a2937d2bd42ba', | |
116 | 'thumbnail': r're:^https?://.*\.jpg$', | |
117 | 'duration': 2582, | |
118 | 'age_limit': 12, | |
119 | 'upload_date': '20190508', | |
120 | 'series': 'Temptation Island Suomi', | |
121 | 'season_number': 5, | |
122 | 'episode_number': 17, | |
123 | 'categories': ['Reality ja tositapahtumat', 'Kotimaiset suosikit', 'Romantiikka ja parisuhde'], | |
124 | }, | |
125 | 'params': { | |
126 | 'skip_download': True, | |
127 | }, | |
128 | }, | |
129 | { | |
130 | # premium | |
131 | 'url': 'https://www.ruutu.fi/video/3618715', | |
132 | 'only_matching': True, | |
133 | }, | |
22354455 | 134 | ] |
29f7c58a | 135 | _API_BASE = 'https://gatling.nelonenmedia.fi' |
22354455 | 136 | |
0a8a7e68 | 137 | @classmethod |
a0fe51d5 TI |
138 | def _extract_urls(cls, webpage): |
139 | # nelonen.fi | |
0a8a7e68 TI |
140 | settings = try_call( |
141 | lambda: json.loads(re.search( | |
142 | r'jQuery\.extend\(Drupal\.settings, ({.+?})\);', webpage).group(1), strict=False)) | |
a0fe51d5 TI |
143 | if settings: |
144 | video_id = traverse_obj(settings, ( | |
145 | 'mediaCrossbowSettings', 'file', 'field_crossbow_video_id', 'und', 0, 'value')) | |
146 | if video_id: | |
147 | return [f'http://www.ruutu.fi/video/{video_id}'] | |
148 | # hs.fi and is.fi | |
149 | settings = try_call( | |
150 | lambda: json.loads(re.search( | |
151 | '(?s)<script[^>]+id=[\'"]__NEXT_DATA__[\'"][^>]*>([^<]+)</script>', | |
152 | webpage).group(1), strict=False)) | |
153 | if settings: | |
154 | video_ids = set(traverse_obj(settings, ( | |
155 | 'props', 'pageProps', 'page', 'assetData', 'splitBody', ..., 'video', 'sourceId')) or []) | |
156 | if video_ids: | |
157 | return [f'http://www.ruutu.fi/video/{v}' for v in video_ids] | |
158 | video_id = traverse_obj(settings, ( | |
159 | 'props', 'pageProps', 'page', 'assetData', 'mainVideo', 'sourceId')) | |
160 | if video_id: | |
161 | return [f'http://www.ruutu.fi/video/{video_id}'] | |
0a8a7e68 | 162 | |
22354455 | 163 | def _real_extract(self, url): |
22889ab1 | 164 | video_id = self._match_id(url) |
22354455 | 165 | |
22889ab1 | 166 | video_xml = self._download_xml( |
29f7c58a | 167 | '%s/media-xml-cache' % self._API_BASE, video_id, |
cab26223 | 168 | query={'id': video_id}) |
22354455 HL |
169 | |
170 | formats = [] | |
9414338a S |
171 | processed_urls = [] |
172 | ||
173 | def extract_formats(node): | |
174 | for child in node: | |
175 | if child.tag.endswith('Files'): | |
176 | extract_formats(child) | |
177 | elif child.tag.endswith('File'): | |
178 | video_url = child.text | |
3089bc74 S |
179 | if (not video_url or video_url in processed_urls |
180 | or any(p in video_url for p in ('NOT_USED', 'NOT-USED'))): | |
2e25f80d | 181 | continue |
9414338a S |
182 | processed_urls.append(video_url) |
183 | ext = determine_ext(video_url) | |
29f7c58a | 184 | auth_video_url = url_or_none(self._download_webpage( |
185 | '%s/auth/access/v2' % self._API_BASE, video_id, | |
186 | note='Downloading authenticated %s stream URL' % ext, | |
187 | fatal=False, query={'stream': video_url})) | |
188 | if auth_video_url: | |
189 | processed_urls.append(auth_video_url) | |
190 | video_url = auth_video_url | |
9414338a | 191 | if ext == 'm3u8': |
7e5edcfd | 192 | formats.extend(self._extract_m3u8_formats( |
29f7c58a | 193 | video_url, video_id, 'mp4', |
194 | entry_protocol='m3u8_native', m3u8_id='hls', | |
195 | fatal=False)) | |
9414338a | 196 | elif ext == 'f4m': |
7e5edcfd S |
197 | formats.extend(self._extract_f4m_formats( |
198 | video_url, video_id, f4m_id='hds', fatal=False)) | |
dadb8361 | 199 | elif ext == 'mpd': |
4d345bf1 S |
200 | # video-only and audio-only streams are of different |
201 | # duration resulting in out of sync issue | |
202 | continue | |
dadb8361 RA |
203 | formats.extend(self._extract_mpd_formats( |
204 | video_url, video_id, mpd_id='dash', fatal=False)) | |
8410653f S |
205 | elif ext == 'mp3' or child.tag == 'AudioMediaFile': |
206 | formats.append({ | |
207 | 'format_id': 'audio', | |
208 | 'url': video_url, | |
209 | 'vcodec': 'none', | |
210 | }) | |
9414338a S |
211 | else: |
212 | proto = compat_urllib_parse_urlparse(video_url).scheme | |
213 | if not child.tag.startswith('HTTP') and proto != 'rtmp': | |
214 | continue | |
215 | preference = -1 if proto == 'rtmp' else 1 | |
216 | label = child.get('label') | |
217 | tbr = int_or_none(child.get('bitrate')) | |
76a353c9 S |
218 | format_id = '%s-%s' % (proto, label if label else tbr) if label or tbr else proto |
219 | if not self._is_valid_url(video_url, video_id, format_id): | |
220 | continue | |
59a9efe8 | 221 | width, height = [int_or_none(x) for x in child.get('resolution', 'x').split('x')[:2]] |
9414338a | 222 | formats.append({ |
76a353c9 | 223 | 'format_id': format_id, |
9414338a S |
224 | 'url': video_url, |
225 | 'width': width, | |
226 | 'height': height, | |
227 | 'tbr': tbr, | |
228 | 'preference': preference, | |
229 | }) | |
22354455 | 230 | |
9414338a | 231 | extract_formats(video_xml.find('./Clip')) |
db75f14d | 232 | |
29f7c58a | 233 | def pv(name): |
a0fe51d5 TI |
234 | value = try_call(lambda: find_xpath_attr( |
235 | video_xml, './Clip/PassthroughVariables/variable', 'name', name).get('value')) | |
236 | if value != 'NA': | |
237 | return value or None | |
29f7c58a | 238 | |
239 | if not formats: | |
a06916d9 | 240 | if (not self.get_param('allow_unplayable_formats') |
06869367 | 241 | and xpath_text(video_xml, './Clip/DRM', default=None)): |
88acdbc2 | 242 | self.report_drm(video_id) |
29f7c58a | 243 | ns_st_cds = pv('ns_st_cds') |
244 | if ns_st_cds != 'free': | |
245 | raise ExtractorError('This video is %s.' % ns_st_cds, expected=True) | |
db75f14d | 246 | |
22354455 HL |
247 | self._sort_formats(formats) |
248 | ||
29f7c58a | 249 | themes = pv('themes') |
250 | ||
22354455 HL |
251 | return { |
252 | 'id': video_id, | |
22889ab1 S |
253 | 'title': xpath_attr(video_xml, './/Behavior/Program', 'program_name', 'title', fatal=True), |
254 | 'description': xpath_attr(video_xml, './/Behavior/Program', 'description', 'description'), | |
255 | 'thumbnail': xpath_attr(video_xml, './/Behavior/Startpicture', 'href', 'thumbnail'), | |
29f7c58a | 256 | 'duration': int_or_none(xpath_text(video_xml, './/Runtime', 'duration')) or int_or_none(pv('runtime')), |
9414338a | 257 | 'age_limit': int_or_none(xpath_text(video_xml, './/AgeLimit', 'age limit')), |
29f7c58a | 258 | 'upload_date': unified_strdate(pv('date_start')), |
259 | 'series': pv('series_name'), | |
260 | 'season_number': int_or_none(pv('season_number')), | |
261 | 'episode_number': int_or_none(pv('episode_number')), | |
a0fe51d5 | 262 | 'categories': themes.split(',') if themes else None, |
9414338a | 263 | 'formats': formats, |
22354455 | 264 | } |