]>
Commit | Line | Data |
---|---|---|
89284910 | 1 | # coding: utf-8 |
5d73273f | 2 | from __future__ import unicode_literals |
89284910 | 3 | |
54543467 | 4 | import json |
5d73273f | 5 | import re |
54543467 JMF |
6 | |
7 | from .common import InfoExtractor | |
8 | from ..utils import ( | |
5d73273f PH |
9 | HEADRequest, |
10 | unified_strdate, | |
54543467 JMF |
11 | ) |
12 | ||
5d73273f | 13 | |
54543467 | 14 | class ORFIE(InfoExtractor): |
5d73273f PH |
15 | _VALID_URL = r'https?://tvthek\.orf\.at/(?:programs/.+?/episodes|topics/.+?|program/[^/]+)/(?P<id>\d+)' |
16 | ||
17 | _TEST = { | |
18 | 'url': 'http://tvthek.orf.at/program/matinee-Was-Sie-schon-immer-ueber-Klassik-wissen-wollten/7317210/Was-Sie-schon-immer-ueber-Klassik-wissen-wollten/7319746/Was-Sie-schon-immer-ueber-Klassik-wissen-wollten/7319747', | |
19 | 'file': '7319747.mp4', | |
20 | 'md5': 'bd803c5d8c32d3c64a0ea4b4eeddf375', | |
21 | 'info_dict': { | |
22 | 'title': 'Was Sie schon immer über Klassik wissen wollten', | |
23 | 'description': 'md5:0ddf0d5f0060bd53f744edaa5c2e04a4', | |
24 | 'duration': 3508, | |
25 | 'upload_date': '20140105', | |
26 | }, | |
27 | 'skip': 'Blocked outside of Austria', | |
28 | } | |
54543467 | 29 | |
54543467 JMF |
30 | def _real_extract(self, url): |
31 | mobj = re.match(self._VALID_URL, url) | |
32 | playlist_id = mobj.group('id') | |
33 | webpage = self._download_webpage(url, playlist_id) | |
34 | ||
5d73273f PH |
35 | data_json = self._search_regex( |
36 | r'initializeAdworx\((.+?)\);\n', webpage, 'video info') | |
37 | all_data = json.loads(data_json) | |
38 | sdata = all_data[0]['values']['segments'] | |
39 | ||
40 | def quality_to_int(s): | |
41 | m = re.search('([0-9]+)', s) | |
42 | if m is None: | |
43 | return -1 | |
44 | return int(m.group(1)) | |
45 | ||
46 | entries = [] | |
47 | for sd in sdata: | |
48 | video_id = sd['id'] | |
49 | formats = [{ | |
50 | 'preference': -10 if fd['delivery'] == 'hls' else None, | |
51 | 'format_id': '%s-%s-%s' % ( | |
52 | fd['delivery'], fd['quality'], fd['quality_string']), | |
53 | 'url': fd['src'], | |
54 | 'protocol': fd['protocol'], | |
55 | 'quality': quality_to_int(fd['quality']), | |
56 | } for fd in sd['playlist_item_array']['sources']] | |
57 | ||
58 | # Check for geoblocking. | |
59 | # There is a property is_geoprotection, but that's always false | |
60 | geo_str = sd.get('geoprotection_string') | |
61 | if geo_str: | |
62 | try: | |
63 | http_url = next( | |
64 | f['url'] | |
65 | for f in formats | |
66 | if re.match(r'^https?://.*\.mp4$', f['url'])) | |
67 | except StopIteration: | |
68 | pass | |
69 | else: | |
70 | req = HEADRequest(http_url) | |
71 | response = self._request_webpage( | |
72 | req, video_id, | |
73 | note='Testing for geoblocking', | |
74 | errnote=(( | |
75 | 'This video seems to be blocked outside of %s. ' | |
76 | 'You may want to try the streaming-* formats.') | |
77 | % geo_str), | |
78 | fatal=False) | |
79 | ||
80 | self._sort_formats(formats) | |
81 | ||
82 | upload_date = unified_strdate(sd['created_date']) | |
83 | entries.append({ | |
54543467 | 84 | '_type': 'video', |
5d73273f PH |
85 | 'id': video_id, |
86 | 'title': sd['header'], | |
87 | 'formats': formats, | |
88 | 'description': sd.get('description'), | |
89 | 'duration': int(sd['duration_in_seconds']), | |
90 | 'upload_date': upload_date, | |
91 | 'thumbnail': sd.get('image_full_url'), | |
92 | }) | |
93 | ||
94 | return { | |
95 | '_type': 'playlist', | |
96 | 'entries': entries, | |
97 | 'id': playlist_id, | |
98 | } |