]>
Commit | Line | Data |
---|---|---|
1683735e TI |
1 | # coding: utf-8 |
2 | from __future__ import unicode_literals | |
3 | ||
1683735e | 4 | import re |
1683735e TI |
5 | |
6 | from .common import InfoExtractor | |
7 | from ..utils import ( | |
8 | base_url, | |
9 | ExtractorError, | |
10 | try_get, | |
11 | ) | |
12 | from ..compat import compat_str | |
1683735e TI |
13 | |
14 | ||
15 | class ElonetIE(InfoExtractor): | |
16 | _VALID_URL = r'https?://elonet\.finna\.fi/Record/kavi\.elonet_elokuva_(?P<id>[0-9]+)' | |
a00d781b F |
17 | _TESTS = [{ |
18 | # m3u8 with subtitles | |
1683735e TI |
19 | 'url': 'https://elonet.finna.fi/Record/kavi.elonet_elokuva_107867', |
20 | 'md5': '8efc954b96c543711707f87de757caea', | |
21 | 'info_dict': { | |
22 | 'id': '107867', | |
23 | 'ext': 'mp4', | |
24 | 'title': 'Valkoinen peura', | |
25 | 'description': 'Valkoinen peura (1952) on Erik Blombergin ohjaama ja yhdessä Mirjami Kuosmasen kanssa käsikirjoittama tarunomainen kertomus valkoisen peuran hahmossa lii...', | |
26 | 'thumbnail': 'https://elonet.finna.fi/Cover/Show?id=kavi.elonet_elokuva_107867&index=0&size=large', | |
27 | }, | |
a00d781b F |
28 | }, { |
29 | # DASH with subtitles | |
30 | 'url': 'https://elonet.finna.fi/Record/kavi.elonet_elokuva_116539', | |
31 | 'info_dict': { | |
32 | 'id': '116539', | |
33 | 'ext': 'mp4', | |
34 | 'title': 'Minulla on tiikeri', | |
35 | 'description': 'Pienellä pojalla, joka asuu kerrostalossa, on kotieläimenä tiikeri. Se on kuitenkin salaisuus. Kerrostalon räpätäti on Kotilaisen täti, joka on aina vali...', | |
36 | 'thumbnail': 'https://elonet.finna.fi/Cover/Show?id=kavi.elonet_elokuva_116539&index=0&size=large&source=Solr', | |
37 | } | |
38 | }] | |
1683735e TI |
39 | |
40 | def _real_extract(self, url): | |
41 | video_id = self._match_id(url) | |
42 | webpage = self._download_webpage(url, video_id) | |
43 | ||
44 | title = self._html_search_regex( | |
45 | r'<meta .*property="og:title" .*content="(.+?)"', webpage, 'title') | |
46 | description = self._html_search_regex( | |
47 | r'<meta .*property="og:description" .*content="(.+?)"', webpage, 'description') | |
48 | thumbnail = self._html_search_regex( | |
49 | r'<meta .*property="og:image" .*content="(.+?)"', webpage, 'thumbnail') | |
50 | ||
51 | json_s = self._html_search_regex( | |
52 | r'data-video-sources="(.+?)"', webpage, 'json') | |
53 | src = try_get( | |
54 | self._parse_json(json_s, video_id), | |
55 | lambda x: x[0]["src"], compat_str) | |
56 | formats = [] | |
a00d781b | 57 | subtitles = {} |
1683735e | 58 | if re.search(r'\.m3u8\??', src): |
1683735e TI |
59 | res = self._download_webpage_handle( |
60 | # elonet servers have certificate problems | |
61 | src.replace('https:', 'http:'), video_id, | |
62 | note='Downloading m3u8 information', | |
63 | errnote='Failed to download m3u8 information') | |
64 | if res: | |
65 | doc, urlh = res | |
66 | url = urlh.geturl() | |
a00d781b | 67 | formats, subtitles = self._parse_m3u8_formats_and_subtitles(doc, url) |
1683735e TI |
68 | for f in formats: |
69 | f['ext'] = 'mp4' | |
70 | elif re.search(r'\.mpd\??', src): | |
1683735e TI |
71 | res = self._download_xml_handle( |
72 | src, video_id, | |
73 | note='Downloading MPD manifest', | |
74 | errnote='Failed to download MPD manifest') | |
75 | if res: | |
76 | doc, urlh = res | |
77 | url = base_url(urlh.geturl()) | |
a00d781b | 78 | formats, subtitles = self._parse_mpd_formats_and_subtitles(doc, mpd_base_url=url) |
1683735e TI |
79 | else: |
80 | raise ExtractorError("Unknown streaming format") | |
81 | ||
82 | return { | |
83 | 'id': video_id, | |
84 | 'title': title, | |
85 | 'description': description, | |
86 | 'thumbnail': thumbnail, | |
87 | 'formats': formats, | |
a00d781b | 88 | 'subtitles': subtitles, |
1683735e | 89 | } |