]>
Commit | Line | Data |
---|---|---|
911344e5 S |
1 | # coding: utf-8 |
2 | from __future__ import unicode_literals | |
3 | ||
4 | import re | |
5 | ||
6 | from .common import InfoExtractor | |
7 | from ..utils import float_or_none | |
8 | ||
9 | ||
10 | class VRTIE(InfoExtractor): | |
11 | _VALID_URL = r'https?://(?:deredactie|sporza|cobra)\.be/cm/(?:[^/]+/)+(?P<id>[^/]+)/*' | |
12 | _TESTS = [ | |
13 | # deredactie.be | |
14 | { | |
15 | 'url': 'http://deredactie.be/cm/vrtnieuws/videozone/programmas/journaal/EP_141025_JOL', | |
16 | 'md5': '4cebde1eb60a53782d4f3992cbd46ec8', | |
17 | 'info_dict': { | |
18 | 'id': '2129880', | |
19 | 'ext': 'flv', | |
20 | 'title': 'Het journaal L - 25/10/14', | |
21 | 'description': None, | |
22 | 'timestamp': 1414271750.949, | |
23 | 'upload_date': '20141025', | |
24 | 'duration': 929, | |
25 | } | |
26 | }, | |
27 | # sporza.be | |
28 | { | |
29 | 'url': 'http://sporza.be/cm/sporza/videozone/programmas/extratime/EP_141020_Extra_time', | |
30 | 'md5': '11f53088da9bf8e7cfc42456697953ff', | |
31 | 'info_dict': { | |
32 | 'id': '2124639', | |
33 | 'ext': 'flv', | |
34 | 'title': 'Bekijk Extra Time van 20 oktober', | |
35 | 'description': 'md5:83ac5415a4f1816c6a93f8138aef2426', | |
36 | 'timestamp': 1413835980.560, | |
37 | 'upload_date': '20141020', | |
38 | 'duration': 3238, | |
5f6a1245 | 39 | } |
911344e5 S |
40 | }, |
41 | # cobra.be | |
42 | { | |
43 | 'url': 'http://cobra.be/cm/cobra/videozone/rubriek/film-videozone/141022-mv-ellis-cafecorsari', | |
44 | 'md5': '78a2b060a5083c4f055449a72477409d', | |
45 | 'info_dict': { | |
46 | 'id': '2126050', | |
47 | 'ext': 'flv', | |
48 | 'title': 'Bret Easton Ellis in Café Corsari', | |
49 | 'description': 'md5:f699986e823f32fd6036c1855a724ee9', | |
50 | 'timestamp': 1413967500.494, | |
51 | 'upload_date': '20141022', | |
52 | 'duration': 661, | |
53 | } | |
54 | }, | |
55 | ] | |
56 | ||
57 | def _real_extract(self, url): | |
58 | video_id = self._match_id(url) | |
59 | ||
60 | webpage = self._download_webpage(url, video_id) | |
61 | ||
62 | video_id = self._search_regex( | |
63 | r'data-video-id="([^"]+)_[^"]+"', webpage, 'video id', fatal=False) | |
64 | ||
65 | formats = [] | |
66 | mobj = re.search( | |
67 | r'data-video-iphone-server="(?P<server>[^"]+)"\s+data-video-iphone-path="(?P<path>[^"]+)"', | |
68 | webpage) | |
69 | if mobj: | |
70 | formats.extend(self._extract_m3u8_formats( | |
71 | '%s/%s' % (mobj.group('server'), mobj.group('path')), | |
72 | video_id, 'mp4')) | |
73 | mobj = re.search(r'data-video-src="(?P<src>[^"]+)"', webpage) | |
74 | if mobj: | |
75 | formats.extend(self._extract_f4m_formats( | |
76 | '%s/manifest.f4m' % mobj.group('src'), video_id)) | |
77 | self._sort_formats(formats) | |
78 | ||
79 | title = self._og_search_title(webpage) | |
80 | description = self._og_search_description(webpage, default=None) | |
81 | thumbnail = self._og_search_thumbnail(webpage) | |
82 | timestamp = float_or_none(self._search_regex( | |
83 | r'data-video-sitestat-pubdate="(\d+)"', webpage, 'timestamp', fatal=False), 1000) | |
84 | duration = float_or_none(self._search_regex( | |
85 | r'data-video-duration="(\d+)"', webpage, 'duration', fatal=False), 1000) | |
86 | ||
87 | return { | |
88 | 'id': video_id, | |
89 | 'title': title, | |
90 | 'description': description, | |
91 | 'thumbnail': thumbnail, | |
92 | 'timestamp': timestamp, | |
93 | 'duration': duration, | |
94 | 'formats': formats, | |
5f6a1245 | 95 | } |