]>
Commit | Line | Data |
---|---|---|
54543467 JMF |
1 | import re |
2 | import xml.etree.ElementTree | |
3 | import json | |
4 | ||
5 | from .common import InfoExtractor | |
6 | from ..utils import ( | |
7 | compat_urlparse, | |
8 | ExtractorError, | |
9 | find_xpath_attr, | |
10 | ) | |
11 | ||
12 | class ORFIE(InfoExtractor): | |
13 | _VALID_URL = r'https?://tvthek.orf.at/(programs/.+?/episodes|topics/.+?)/(?P<id>\d+)' | |
14 | ||
15 | _TEST = { | |
16 | u'url': u'http://tvthek.orf.at/programs/1171769-Wetter-ZIB/episodes/6557323-Wetter', | |
17 | u'file': u'6566957.flv', | |
18 | u'info_dict': { | |
19 | u'title': u'Wetter', | |
20 | u'description': u'Christa Kummer, Marcus Wadsak und Kollegen präsentieren abwechselnd ihre täglichen Wetterprognosen für Österreich.\r \r Mehr Wetter unter wetter.ORF.at', | |
21 | }, | |
22 | u'params': { | |
23 | # It uses rtmp | |
24 | u'skip_download': True, | |
25 | } | |
26 | } | |
27 | ||
28 | def _real_extract(self, url): | |
29 | mobj = re.match(self._VALID_URL, url) | |
30 | playlist_id = mobj.group('id') | |
31 | webpage = self._download_webpage(url, playlist_id) | |
32 | ||
33 | flash_xml = self._search_regex('ORF.flashXML = \'(.+?)\'', webpage, u'flash xml') | |
34 | flash_xml = compat_urlparse.parse_qs('xml='+flash_xml)['xml'][0] | |
35 | flash_config = xml.etree.ElementTree.fromstring(flash_xml.encode('utf-8')) | |
36 | playlist_json = self._search_regex(r'playlist\': \'(\[.*?\])\'', webpage, u'playlist').replace(r'\"','"') | |
37 | playlist = json.loads(playlist_json) | |
38 | ||
39 | videos = [] | |
40 | ns = '{http://tempuri.org/XMLSchema.xsd}' | |
41 | xpath = '%(ns)sPlaylist/%(ns)sItems/%(ns)sItem' % {'ns': ns} | |
42 | webpage_description = self._og_search_description(webpage) | |
43 | for (i, (item, info)) in enumerate(zip(flash_config.findall(xpath), playlist), 1): | |
44 | # Get best quality url | |
45 | rtmp_url = None | |
46 | for q in ['Q6A', 'Q4A', 'Q1A']: | |
47 | video_url = find_xpath_attr(item, '%sVideoUrl' % ns, 'quality', q) | |
48 | if video_url is not None: | |
49 | rtmp_url = video_url.text | |
50 | break | |
51 | if rtmp_url is None: | |
52 | raise ExtractorError(u'Couldn\'t get video url: %s' % info['id']) | |
53 | description = self._html_search_regex( | |
54 | r'id="playlist_entry_%s".*?<p>(.*?)</p>' % i, webpage, | |
55 | u'description', default=webpage_description, flags=re.DOTALL) | |
56 | videos.append({ | |
57 | '_type': 'video', | |
58 | 'id': info['id'], | |
59 | 'title': info['title'], | |
60 | 'url': rtmp_url, | |
61 | 'ext': 'flv', | |
62 | 'description': description, | |
63 | }) | |
64 | ||
65 | return videos |