]>
Commit | Line | Data |
---|---|---|
89284910 JS |
1 | # coding: utf-8 |
2 | ||
54543467 JMF |
3 | import re |
4 | import xml.etree.ElementTree | |
5 | import json | |
6 | ||
7 | from .common import InfoExtractor | |
8 | from ..utils import ( | |
9 | compat_urlparse, | |
10 | ExtractorError, | |
11 | find_xpath_attr, | |
12 | ) | |
13 | ||
14 | class ORFIE(InfoExtractor): | |
15 | _VALID_URL = r'https?://tvthek.orf.at/(programs/.+?/episodes|topics/.+?)/(?P<id>\d+)' | |
16 | ||
54543467 JMF |
17 | def _real_extract(self, url): |
18 | mobj = re.match(self._VALID_URL, url) | |
19 | playlist_id = mobj.group('id') | |
20 | webpage = self._download_webpage(url, playlist_id) | |
21 | ||
22 | flash_xml = self._search_regex('ORF.flashXML = \'(.+?)\'', webpage, u'flash xml') | |
23 | flash_xml = compat_urlparse.parse_qs('xml='+flash_xml)['xml'][0] | |
24 | flash_config = xml.etree.ElementTree.fromstring(flash_xml.encode('utf-8')) | |
25 | playlist_json = self._search_regex(r'playlist\': \'(\[.*?\])\'', webpage, u'playlist').replace(r'\"','"') | |
26 | playlist = json.loads(playlist_json) | |
27 | ||
28 | videos = [] | |
29 | ns = '{http://tempuri.org/XMLSchema.xsd}' | |
30 | xpath = '%(ns)sPlaylist/%(ns)sItems/%(ns)sItem' % {'ns': ns} | |
31 | webpage_description = self._og_search_description(webpage) | |
32 | for (i, (item, info)) in enumerate(zip(flash_config.findall(xpath), playlist), 1): | |
33 | # Get best quality url | |
34 | rtmp_url = None | |
35 | for q in ['Q6A', 'Q4A', 'Q1A']: | |
36 | video_url = find_xpath_attr(item, '%sVideoUrl' % ns, 'quality', q) | |
37 | if video_url is not None: | |
38 | rtmp_url = video_url.text | |
39 | break | |
40 | if rtmp_url is None: | |
41 | raise ExtractorError(u'Couldn\'t get video url: %s' % info['id']) | |
42 | description = self._html_search_regex( | |
43 | r'id="playlist_entry_%s".*?<p>(.*?)</p>' % i, webpage, | |
44 | u'description', default=webpage_description, flags=re.DOTALL) | |
45 | videos.append({ | |
46 | '_type': 'video', | |
47 | 'id': info['id'], | |
48 | 'title': info['title'], | |
49 | 'url': rtmp_url, | |
50 | 'ext': 'flv', | |
51 | 'description': description, | |
52 | }) | |
53 | ||
54 | return videos |