]>
Commit | Line | Data |
---|---|---|
e0b4cc48 | 1 | from __future__ import unicode_literals |
73e79f2a PH |
2 | |
3 | import re | |
73e79f2a | 4 | |
b2727d0b S |
5 | from .common import InfoExtractor |
6 | from ..utils import ( | |
7 | int_or_none, | |
8 | unified_strdate, | |
9 | xpath_text, | |
10 | determine_ext, | |
b2727d0b S |
11 | float_or_none, |
12 | ExtractorError, | |
13 | ) | |
73e79f2a PH |
14 | |
15 | ||
b2727d0b | 16 | class DreiSatIE(InfoExtractor): |
73e79f2a | 17 | IE_NAME = '3sat' |
fe3a60f0 RA |
18 | _GEO_COUNTRIES = ['DE'] |
19 | _VALID_URL = r'https?://(?:www\.)?3sat\.de/mediathek/(?:(?:index|mediathek)\.php)?\?(?:(?:mode|display)=[^&]+&)*obj=(?P<id>[0-9]+)' | |
67fc8ecd NJ |
20 | _TESTS = [ |
21 | { | |
22 | 'url': 'http://www.3sat.de/mediathek/index.php?mode=play&obj=45918', | |
23 | 'md5': 'be37228896d30a88f315b638900a026e', | |
24 | 'info_dict': { | |
25 | 'id': '45918', | |
26 | 'ext': 'mp4', | |
27 | 'title': 'Waidmannsheil', | |
28 | 'description': 'md5:cce00ca1d70e21425e72c86a98a56817', | |
7b0d333a NP |
29 | 'uploader': 'SCHWEIZWEIT', |
30 | 'uploader_id': '100000210', | |
67fc8ecd | 31 | 'upload_date': '20140913' |
7b0d333a NP |
32 | }, |
33 | 'params': { | |
34 | 'skip_download': True, # m3u8 downloads | |
67fc8ecd NJ |
35 | } |
36 | }, | |
37 | { | |
38 | 'url': 'http://www.3sat.de/mediathek/mediathek.php?mode=play&obj=51066', | |
39 | 'only_matching': True, | |
40 | }, | |
41 | ] | |
73e79f2a | 42 | |
b2727d0b S |
43 | def _parse_smil_formats(self, smil, smil_url, video_id, namespace=None, f4m_params=None, transform_rtmp_url=None): |
44 | param_groups = {} | |
45 | for param_group in smil.findall(self._xpath_ns('./head/paramGroup', namespace)): | |
fe3a60f0 RA |
46 | group_id = param_group.get(self._xpath_ns( |
47 | 'id', 'http://www.w3.org/XML/1998/namespace')) | |
b2727d0b S |
48 | params = {} |
49 | for param in param_group: | |
50 | params[param.get('name')] = param.get('value') | |
51 | param_groups[group_id] = params | |
52 | ||
53 | formats = [] | |
54 | for video in smil.findall(self._xpath_ns('.//video', namespace)): | |
55 | src = video.get('src') | |
56 | if not src: | |
57 | continue | |
fe3a60f0 | 58 | bitrate = int_or_none(self._search_regex(r'_(\d+)k', src, 'bitrate', None)) or float_or_none(video.get('system-bitrate') or video.get('systemBitrate'), 1000) |
b2727d0b S |
59 | group_id = video.get('paramGroup') |
60 | param_group = param_groups[group_id] | |
61 | for proto in param_group['protocols'].split(','): | |
62 | formats.append({ | |
63 | 'url': '%s://%s' % (proto, param_group['host']), | |
64 | 'app': param_group['app'], | |
65 | 'play_path': src, | |
66 | 'ext': 'flv', | |
67 | 'format_id': '%s-%d' % (proto, bitrate), | |
68 | 'tbr': bitrate, | |
69 | }) | |
70 | self._sort_formats(formats) | |
71 | return formats | |
72 | ||
73 | def extract_from_xml_url(self, video_id, xml_url): | |
74 | doc = self._download_xml( | |
75 | xml_url, video_id, | |
76 | note='Downloading video info', | |
77 | errnote='Failed to download video info') | |
78 | ||
fe3a60f0 RA |
79 | status_code = xpath_text(doc, './status/statuscode') |
80 | if status_code and status_code != 'ok': | |
81 | if status_code == 'notVisibleAnymore': | |
b2727d0b S |
82 | message = 'Video %s is not available' % video_id |
83 | else: | |
fe3a60f0 | 84 | message = '%s returned error: %s' % (self.IE_NAME, status_code) |
b2727d0b S |
85 | raise ExtractorError(message, expected=True) |
86 | ||
fe3a60f0 RA |
87 | title = xpath_text(doc, './/information/title', 'title', True) |
88 | ||
89 | urls = [] | |
b2727d0b | 90 | formats = [] |
fe3a60f0 RA |
91 | for fnode in doc.findall('.//formitaeten/formitaet'): |
92 | video_url = xpath_text(fnode, 'url') | |
93 | if not video_url or video_url in urls: | |
94 | continue | |
95 | urls.append(video_url) | |
96 | ||
b2727d0b | 97 | is_available = 'http://www.metafilegenerator' not in video_url |
fe3a60f0 RA |
98 | geoloced = 'static_geoloced_online' in video_url |
99 | if not is_available or geoloced: | |
b2727d0b | 100 | continue |
fe3a60f0 | 101 | |
b2727d0b | 102 | format_id = fnode.attrib['basetype'] |
b2727d0b S |
103 | format_m = re.match(r'''(?x) |
104 | (?P<vcodec>[^_]+)_(?P<acodec>[^_]+)_(?P<container>[^_]+)_ | |
105 | (?P<proto>[^_]+)_(?P<index>[^_]+)_(?P<indexproto>[^_]+) | |
106 | ''', format_id) | |
107 | ||
108 | ext = determine_ext(video_url, None) or format_m.group('container') | |
b2727d0b S |
109 | |
110 | if ext == 'meta': | |
111 | continue | |
112 | elif ext == 'smil': | |
113 | formats.extend(self._extract_smil_formats( | |
114 | video_url, video_id, fatal=False)) | |
115 | elif ext == 'm3u8': | |
116 | # the certificates are misconfigured (see | |
067aa17e | 117 | # https://github.com/ytdl-org/youtube-dl/issues/8665) |
b2727d0b S |
118 | if video_url.startswith('https://'): |
119 | continue | |
120 | formats.extend(self._extract_m3u8_formats( | |
fe3a60f0 RA |
121 | video_url, video_id, 'mp4', 'm3u8_native', |
122 | m3u8_id=format_id, fatal=False)) | |
b2727d0b S |
123 | elif ext == 'f4m': |
124 | formats.extend(self._extract_f4m_formats( | |
125 | video_url, video_id, f4m_id=format_id, fatal=False)) | |
126 | else: | |
fe3a60f0 RA |
127 | quality = xpath_text(fnode, './quality') |
128 | if quality: | |
129 | format_id += '-' + quality | |
b2727d0b | 130 | |
fe3a60f0 RA |
131 | abr = int_or_none(xpath_text(fnode, './audioBitrate'), 1000) |
132 | vbr = int_or_none(xpath_text(fnode, './videoBitrate'), 1000) | |
b2727d0b | 133 | |
fe3a60f0 RA |
134 | tbr = int_or_none(self._search_regex( |
135 | r'_(\d+)k', video_url, 'bitrate', None)) | |
136 | if tbr and vbr and not abr: | |
137 | abr = tbr - vbr | |
b2727d0b S |
138 | |
139 | formats.append({ | |
140 | 'format_id': format_id, | |
141 | 'url': video_url, | |
142 | 'ext': ext, | |
143 | 'acodec': format_m.group('acodec'), | |
144 | 'vcodec': format_m.group('vcodec'), | |
145 | 'abr': abr, | |
146 | 'vbr': vbr, | |
fe3a60f0 RA |
147 | 'tbr': tbr, |
148 | 'width': int_or_none(xpath_text(fnode, './width')), | |
149 | 'height': int_or_none(xpath_text(fnode, './height')), | |
150 | 'filesize': int_or_none(xpath_text(fnode, './filesize')), | |
151 | 'protocol': format_m.group('proto').lower(), | |
b2727d0b | 152 | }) |
fe3a60f0 RA |
153 | |
154 | geolocation = xpath_text(doc, './/details/geolocation') | |
155 | if not formats and geolocation and geolocation != 'none': | |
156 | self.raise_geo_restricted(countries=self._GEO_COUNTRIES) | |
b2727d0b S |
157 | |
158 | self._sort_formats(formats) | |
159 | ||
fe3a60f0 RA |
160 | thumbnails = [] |
161 | for node in doc.findall('.//teaserimages/teaserimage'): | |
162 | thumbnail_url = node.text | |
163 | if not thumbnail_url: | |
164 | continue | |
165 | thumbnail = { | |
166 | 'url': thumbnail_url, | |
167 | } | |
168 | thumbnail_key = node.get('key') | |
169 | if thumbnail_key: | |
170 | m = re.match('^([0-9]+)x([0-9]+)$', thumbnail_key) | |
171 | if m: | |
172 | thumbnail['width'] = int(m.group(1)) | |
173 | thumbnail['height'] = int(m.group(2)) | |
174 | thumbnails.append(thumbnail) | |
175 | ||
176 | upload_date = unified_strdate(xpath_text(doc, './/details/airtime')) | |
177 | ||
b2727d0b S |
178 | return { |
179 | 'id': video_id, | |
180 | 'title': title, | |
fe3a60f0 RA |
181 | 'description': xpath_text(doc, './/information/detail'), |
182 | 'duration': int_or_none(xpath_text(doc, './/details/lengthSec')), | |
b2727d0b | 183 | 'thumbnails': thumbnails, |
fe3a60f0 RA |
184 | 'uploader': xpath_text(doc, './/details/originChannelTitle'), |
185 | 'uploader_id': xpath_text(doc, './/details/originChannelId'), | |
b2727d0b S |
186 | 'upload_date': upload_date, |
187 | 'formats': formats, | |
188 | } | |
189 | ||
73e79f2a | 190 | def _real_extract(self, url): |
fe3a60f0 RA |
191 | video_id = self._match_id(url) |
192 | details_url = 'http://www.3sat.de/mediathek/xmlservice/web/beitragsDetails?id=%s' % video_id | |
4059eabd | 193 | return self.extract_from_xml_url(video_id, details_url) |