]>
Commit | Line | Data |
---|---|---|
9eb4f404 S |
1 | # coding: utf-8 |
2 | from __future__ import unicode_literals | |
f58487b3 TV |
3 | |
4 | import re | |
cc1ac110 | 5 | import itertools |
f58487b3 | 6 | |
9eb4f404 S |
7 | from .common import InfoExtractor |
8 | ||
9 | ||
10 | class VierIE(InfoExtractor): | |
11 | IE_NAME = 'vier' | |
a3ba8a7a | 12 | _VALID_URL = r'https?://(?:www\.)?(?P<site>vier|vijf)\.be/(?:[^/]+/videos/(?P<display_id>[^/]+)(?:/(?P<id>\d+))?|video/v3/embed/(?P<embed_id>\d+))' |
9eb4f404 S |
13 | _TESTS = [{ |
14 | 'url': 'http://www.vier.be/planb/videos/het-wordt-warm-de-moestuin/16129', | |
15 | 'info_dict': { | |
16 | 'id': '16129', | |
17 | 'display_id': 'het-wordt-warm-de-moestuin', | |
18 | 'ext': 'mp4', | |
19 | 'title': 'Het wordt warm in De Moestuin', | |
20 | 'description': 'De vele uren werk eisen hun tol. Wim droomt van assistentie...', | |
21 | }, | |
22 | 'params': { | |
23 | # m3u8 download | |
24 | 'skip_download': True, | |
25 | }, | |
a3ba8a7a LV |
26 | }, { |
27 | 'url': 'http://www.vijf.be/temptationisland/videos/zo-grappig-temptation-island-hosts-moeten-kiezen-tussen-onmogelijke-dilemmas/2561614', | |
28 | 'info_dict': { | |
29 | 'id': '2561614', | |
30 | 'display_id': 'zo-grappig-temptation-island-hosts-moeten-kiezen-tussen-onmogelijke-dilemmas', | |
31 | 'ext': 'mp4', | |
32 | 'title': 'ZO grappig: Temptation Island hosts moeten kiezen tussen onmogelijke dilemma\'s', | |
33 | 'description': 'Het spel is simpel: Annelien Coorevits en Rick Brandsteder krijgen telkens 2 dilemma\'s voorgeschoteld en ze MOETEN een keuze maken.', | |
34 | }, | |
35 | 'params': { | |
36 | # m3u8 download | |
37 | 'skip_download': True, | |
38 | }, | |
9eb4f404 S |
39 | }, { |
40 | 'url': 'http://www.vier.be/planb/videos/mieren-herders-van-de-bladluizen', | |
41 | 'only_matching': True, | |
42 | }, { | |
43 | 'url': 'http://www.vier.be/video/v3/embed/16129', | |
44 | 'only_matching': True, | |
45 | }] | |
46 | ||
47 | def _real_extract(self, url): | |
48 | mobj = re.match(self._VALID_URL, url) | |
49 | embed_id = mobj.group('embed_id') | |
50 | display_id = mobj.group('display_id') or embed_id | |
a3ba8a7a | 51 | site = mobj.group('site') |
9eb4f404 S |
52 | |
53 | webpage = self._download_webpage(url, display_id) | |
54 | ||
55 | video_id = self._search_regex( | |
484c9d2d S |
56 | [r'data-nid="(\d+)"', r'"nid"\s*:\s*"(\d+)"'], |
57 | webpage, 'video id') | |
9eb4f404 | 58 | application = self._search_regex( |
484c9d2d | 59 | [r'data-application="([^"]+)"', r'"application"\s*:\s*"([^"]+)"'], |
a3ba8a7a | 60 | webpage, 'application', default=site + '_vod') |
9eb4f404 | 61 | filename = self._search_regex( |
484c9d2d S |
62 | [r'data-filename="([^"]+)"', r'"filename"\s*:\s*"([^"]+)"'], |
63 | webpage, 'filename') | |
9eb4f404 | 64 | |
6ad02195 | 65 | playlist_url = 'http://vod.streamcloud.be/%s/_definst_/mp4:%s.mp4/playlist.m3u8' % (application, filename) |
0384932e | 66 | formats = self._extract_wowza_formats(playlist_url, display_id, skip_protocols=['dash']) |
19dbaeec | 67 | self._sort_formats(formats) |
9eb4f404 S |
68 | |
69 | title = self._og_search_title(webpage, default=display_id) | |
70 | description = self._og_search_description(webpage, default=None) | |
71 | thumbnail = self._og_search_thumbnail(webpage, default=None) | |
72 | ||
73 | return { | |
74 | 'id': video_id, | |
75 | 'display_id': display_id, | |
76 | 'title': title, | |
77 | 'description': description, | |
78 | 'thumbnail': thumbnail, | |
79 | 'formats': formats, | |
f58487b3 | 80 | } |
9eb4f404 S |
81 | |
82 | ||
83 | class VierVideosIE(InfoExtractor): | |
84 | IE_NAME = 'vier:videos' | |
a3ba8a7a | 85 | _VALID_URL = r'https?://(?:www\.)?(?P<site>vier|vijf)\.be/(?P<program>[^/]+)/videos(?:\?.*\bpage=(?P<page>\d+)|$)' |
9eb4f404 S |
86 | _TESTS = [{ |
87 | 'url': 'http://www.vier.be/demoestuin/videos', | |
88 | 'info_dict': { | |
89 | 'id': 'demoestuin', | |
90 | }, | |
91 | 'playlist_mincount': 153, | |
a3ba8a7a LV |
92 | }, { |
93 | 'url': 'http://www.vijf.be/temptationisland/videos', | |
94 | 'info_dict': { | |
95 | 'id': 'temptationisland', | |
96 | }, | |
97 | 'playlist_mincount': 159, | |
9eb4f404 S |
98 | }, { |
99 | 'url': 'http://www.vier.be/demoestuin/videos?page=6', | |
100 | 'info_dict': { | |
101 | 'id': 'demoestuin-page6', | |
102 | }, | |
103 | 'playlist_mincount': 20, | |
104 | }, { | |
105 | 'url': 'http://www.vier.be/demoestuin/videos?page=7', | |
106 | 'info_dict': { | |
107 | 'id': 'demoestuin-page7', | |
108 | }, | |
109 | 'playlist_mincount': 13, | |
110 | }] | |
111 | ||
112 | def _real_extract(self, url): | |
113 | mobj = re.match(self._VALID_URL, url) | |
114 | program = mobj.group('program') | |
a3ba8a7a | 115 | site = mobj.group('site') |
9eb4f404 | 116 | |
9eb4f404 S |
117 | page_id = mobj.group('page') |
118 | if page_id: | |
119 | page_id = int(page_id) | |
120 | start_page = page_id | |
9eb4f404 S |
121 | playlist_id = '%s-page%d' % (program, page_id) |
122 | else: | |
123 | start_page = 0 | |
9eb4f404 S |
124 | playlist_id = program |
125 | ||
126 | entries = [] | |
cc1ac110 | 127 | for current_page_id in itertools.count(start_page): |
9eb4f404 | 128 | current_page = self._download_webpage( |
a3ba8a7a | 129 | 'http://www.%s.be/%s/videos?page=%d' % (site, program, current_page_id), |
9eb4f404 | 130 | program, |
cc1ac110 | 131 | 'Downloading page %d' % (current_page_id + 1)) |
9eb4f404 | 132 | page_entries = [ |
a3ba8a7a | 133 | self.url_result('http://www.' + site + '.be' + video_url, 'Vier') |
9eb4f404 | 134 | for video_url in re.findall( |
a3ba8a7a | 135 | r'<h[23]><a href="(/[^/]+/videos/[^/]+(?:/\d+)?)">', current_page)] |
9eb4f404 | 136 | entries.extend(page_entries) |
cc1ac110 S |
137 | if page_id or '>Meer<' not in current_page: |
138 | break | |
9eb4f404 S |
139 | |
140 | return self.playlist_result(entries, playlist_id) |