]>
Commit | Line | Data |
---|---|---|
201e3c99 NÉ |
1 | # coding: utf-8 |
2 | from __future__ import unicode_literals | |
3 | ||
4 | import re | |
5 | import json | |
6 | ||
7 | from .common import InfoExtractor | |
8 | from ..utils import clean_html | |
9 | ||
10 | class RTBFVideoIE(InfoExtractor): | |
11 | _VALID_URL = r'https?://www.rtbf.be/video/(?P<title>[^?]+)\?.*id=(?P<id>[0-9]+)' | |
12 | _TEST = { | |
13 | 'url': 'https://www.rtbf.be/video/detail_les-diables-au-coeur-episode-2?id=1921274', | |
14 | 'md5': '799f334ddf2c0a582ba80c44655be570', | |
15 | 'info_dict': { | |
16 | 'id': '1921274', | |
17 | 'ext': 'mp4', | |
18 | 'title': 'Les Diables au coeur (épisode 2)', | |
19 | 'duration': 3099, | |
20 | } | |
21 | } | |
22 | ||
23 | def _real_extract(self, url): | |
24 | mobj = re.match(self._VALID_URL, url) | |
25 | video_id = mobj.group('id') | |
26 | ||
27 | # TODO more code goes here, for example ... | |
28 | webpage = self._download_webpage(url, video_id) | |
29 | title = self._html_search_regex( | |
30 | r'<meta property="og:description" content="([^"]*)"', | |
31 | webpage, 'title', mobj.group('title')) | |
201e3c99 NÉ |
32 | |
33 | iframe_url = self._html_search_regex(r'<iframe [^>]*src="([^"]+)"', | |
34 | webpage, 'iframe') | |
35 | iframe = self._download_webpage(iframe_url, video_id) | |
36 | ||
37 | data_video_idx = iframe.find('data-video') | |
38 | next_data_idx = iframe.find('data-', data_video_idx + 1) | |
39 | json_data_start = data_video_idx + len('data-video=') + 1 | |
40 | json_data_end = next_data_idx - 2 | |
41 | video_data = json.loads(clean_html(iframe[json_data_start:json_data_end])) | |
42 | ||
43 | return { | |
44 | 'id': video_id, | |
45 | 'title': title, | |
46 | 'url': video_data['data']['downloadUrl'], | |
47 | 'duration': video_data['data']['duration'], | |
48 | } |