2 from __future__
import unicode_literals
8 from .common
import InfoExtractor
14 from ..compat
import compat_str
15 from ..downloader
.hls
import HlsFD
18 class ElonetIE(InfoExtractor
):
19 _VALID_URL
= r
'https?://elonet\.finna\.fi/Record/kavi\.elonet_elokuva_(?P<id>[0-9]+)'
21 'url': 'https://elonet.finna.fi/Record/kavi.elonet_elokuva_107867',
22 'md5': '8efc954b96c543711707f87de757caea',
26 'title': 'Valkoinen peura',
27 'description': 'Valkoinen peura (1952) on Erik Blombergin ohjaama ja yhdessä Mirjami Kuosmasen kanssa käsikirjoittama tarunomainen kertomus valkoisen peuran hahmossa lii...',
28 'thumbnail': 'https://elonet.finna.fi/Cover/Show?id=kavi.elonet_elokuva_107867&index=0&size=large',
32 def _download_m3u8_chunked_subtitle(self
, chunklist_url
):
34 Download VTT subtitles from pieces in manifest URL.
35 Return a string containing joined chunks with extra headers removed.
37 with tempfile
.NamedTemporaryFile(delete
=True) as outfile
:
39 hlsdl
= HlsFD(self
._downloader
, {})
40 hlsdl
.download(compat_str(fname
), {"url": chunklist_url}
)
41 with open(fname
, 'r') as fin
:
42 # Remove (some) headers
43 fdata
= re
.sub(r
'X-TIMESTAMP-MAP.*\n+|WEBVTT\n+', '', fin
.read())
45 return "WEBVTT\n\n" + fdata
47 def _parse_m3u8_subtitles(self
, m3u8_doc
, m3u8_url
):
49 Parse subtitles from HLS / m3u8 manifest.
52 baseurl
= m3u8_url
[:m3u8_url
.rindex('/') + 1]
53 for line
in m3u8_doc
.split('\n'):
54 if 'EXT-X-MEDIA:TYPE=SUBTITLES' in line
:
55 lang
= self
._search
_regex
(
56 r
'LANGUAGE="(.+?)"', line
, 'lang', default
=False)
57 uri
= self
._search
_regex
(
58 r
'URI="(.+?)"', line
, 'uri', default
=False)
60 data
= self
._download
_m
3u8_chunked
_subtitle
(baseurl
+ uri
)
61 subtitles
[lang
] = [{'ext': 'vtt', 'data': data}
]
64 def _parse_mpd_subtitles(self
, mpd_doc
):
66 Parse subtitles from MPD manifest.
68 ns
= '{urn:mpeg:dash:schema:mpd:2011}'
70 for aset
in mpd_doc
.findall(".//%sAdaptationSet[@mimeType='text/vtt']" % (ns
)):
71 lang
= aset
.attrib
.get('lang', 'unk')
72 url
= aset
.find("./%sRepresentation/%sBaseURL" % (ns
, ns
)).text
73 subtitles
[lang
] = [{'ext': 'vtt', 'url': url}
]
76 def _get_subtitles(self
, fmt
, doc
, url
):
78 subs
= self
._parse
_m
3u8_subtitles
(doc
, url
)
80 subs
= self
._parse
_mpd
_subtitles
(doc
)
82 self
._downloader
.report_warning(
83 "Cannot download subtitles from '%s' streams." % (fmt
))
87 def _real_extract(self
, url
):
88 video_id
= self
._match
_id
(url
)
89 webpage
= self
._download
_webpage
(url
, video_id
)
91 title
= self
._html
_search
_regex
(
92 r
'<meta .*property="og:title" .*content="(.+?)"', webpage
, 'title')
93 description
= self
._html
_search
_regex
(
94 r
'<meta .*property="og:description" .*content="(.+?)"', webpage
, 'description')
95 thumbnail
= self
._html
_search
_regex
(
96 r
'<meta .*property="og:image" .*content="(.+?)"', webpage
, 'thumbnail')
98 json_s
= self
._html
_search
_regex
(
99 r
'data-video-sources="(.+?)"', webpage
, 'json')
101 self
._parse
_json
(json_s
, video_id
),
102 lambda x
: x
[0]["src"], compat_str
)
104 if re
.search(r
'\.m3u8\??', src
):
106 res
= self
._download
_webpage
_handle
(
107 # elonet servers have certificate problems
108 src
.replace('https:', 'http:'), video_id
,
109 note
='Downloading m3u8 information',
110 errnote
='Failed to download m3u8 information')
114 formats
= self
._parse
_m
3u8_formats
(doc
, url
)
117 elif re
.search(r
'\.mpd\??', src
):
119 res
= self
._download
_xml
_handle
(
121 note
='Downloading MPD manifest',
122 errnote
='Failed to download MPD manifest')
125 url
= base_url(urlh
.geturl())
126 formats
= self
._parse
_mpd
_formats
(doc
, mpd_base_url
=url
)
128 raise ExtractorError("Unknown streaming format")
133 'description': description
,
134 'thumbnail': thumbnail
,
136 'subtitles': self
.extract_subtitles(fmt
, doc
, url
),