]>
Commit | Line | Data |
---|---|---|
9eb4f404 S |
1 | # coding: utf-8 |
2 | from __future__ import unicode_literals | |
f58487b3 TV |
3 | |
4 | import re | |
cc1ac110 | 5 | import itertools |
f58487b3 | 6 | |
9eb4f404 S |
7 | from .common import InfoExtractor |
8 | ||
9 | ||
10 | class VierIE(InfoExtractor): | |
11 | IE_NAME = 'vier' | |
e129fa08 | 12 | IE_DESC = 'vier.be and vijf.be' |
a3ba8a7a | 13 | _VALID_URL = r'https?://(?:www\.)?(?P<site>vier|vijf)\.be/(?:[^/]+/videos/(?P<display_id>[^/]+)(?:/(?P<id>\d+))?|video/v3/embed/(?P<embed_id>\d+))' |
9eb4f404 S |
14 | _TESTS = [{ |
15 | 'url': 'http://www.vier.be/planb/videos/het-wordt-warm-de-moestuin/16129', | |
16 | 'info_dict': { | |
17 | 'id': '16129', | |
18 | 'display_id': 'het-wordt-warm-de-moestuin', | |
19 | 'ext': 'mp4', | |
20 | 'title': 'Het wordt warm in De Moestuin', | |
21 | 'description': 'De vele uren werk eisen hun tol. Wim droomt van assistentie...', | |
22 | }, | |
23 | 'params': { | |
24 | # m3u8 download | |
25 | 'skip_download': True, | |
26 | }, | |
a3ba8a7a LV |
27 | }, { |
28 | 'url': 'http://www.vijf.be/temptationisland/videos/zo-grappig-temptation-island-hosts-moeten-kiezen-tussen-onmogelijke-dilemmas/2561614', | |
29 | 'info_dict': { | |
30 | 'id': '2561614', | |
31 | 'display_id': 'zo-grappig-temptation-island-hosts-moeten-kiezen-tussen-onmogelijke-dilemmas', | |
32 | 'ext': 'mp4', | |
33 | 'title': 'ZO grappig: Temptation Island hosts moeten kiezen tussen onmogelijke dilemma\'s', | |
34 | 'description': 'Het spel is simpel: Annelien Coorevits en Rick Brandsteder krijgen telkens 2 dilemma\'s voorgeschoteld en ze MOETEN een keuze maken.', | |
35 | }, | |
36 | 'params': { | |
37 | # m3u8 download | |
38 | 'skip_download': True, | |
39 | }, | |
9eb4f404 S |
40 | }, { |
41 | 'url': 'http://www.vier.be/planb/videos/mieren-herders-van-de-bladluizen', | |
42 | 'only_matching': True, | |
43 | }, { | |
44 | 'url': 'http://www.vier.be/video/v3/embed/16129', | |
45 | 'only_matching': True, | |
46 | }] | |
47 | ||
48 | def _real_extract(self, url): | |
49 | mobj = re.match(self._VALID_URL, url) | |
50 | embed_id = mobj.group('embed_id') | |
51 | display_id = mobj.group('display_id') or embed_id | |
a3ba8a7a | 52 | site = mobj.group('site') |
9eb4f404 S |
53 | |
54 | webpage = self._download_webpage(url, display_id) | |
55 | ||
56 | video_id = self._search_regex( | |
484c9d2d S |
57 | [r'data-nid="(\d+)"', r'"nid"\s*:\s*"(\d+)"'], |
58 | webpage, 'video id') | |
9eb4f404 | 59 | application = self._search_regex( |
484c9d2d | 60 | [r'data-application="([^"]+)"', r'"application"\s*:\s*"([^"]+)"'], |
a3ba8a7a | 61 | webpage, 'application', default=site + '_vod') |
9eb4f404 | 62 | filename = self._search_regex( |
484c9d2d S |
63 | [r'data-filename="([^"]+)"', r'"filename"\s*:\s*"([^"]+)"'], |
64 | webpage, 'filename') | |
9eb4f404 | 65 | |
6ad02195 | 66 | playlist_url = 'http://vod.streamcloud.be/%s/_definst_/mp4:%s.mp4/playlist.m3u8' % (application, filename) |
0384932e | 67 | formats = self._extract_wowza_formats(playlist_url, display_id, skip_protocols=['dash']) |
19dbaeec | 68 | self._sort_formats(formats) |
9eb4f404 S |
69 | |
70 | title = self._og_search_title(webpage, default=display_id) | |
71 | description = self._og_search_description(webpage, default=None) | |
72 | thumbnail = self._og_search_thumbnail(webpage, default=None) | |
73 | ||
74 | return { | |
75 | 'id': video_id, | |
76 | 'display_id': display_id, | |
77 | 'title': title, | |
78 | 'description': description, | |
79 | 'thumbnail': thumbnail, | |
80 | 'formats': formats, | |
f58487b3 | 81 | } |
9eb4f404 S |
82 | |
83 | ||
84 | class VierVideosIE(InfoExtractor): | |
85 | IE_NAME = 'vier:videos' | |
a3ba8a7a | 86 | _VALID_URL = r'https?://(?:www\.)?(?P<site>vier|vijf)\.be/(?P<program>[^/]+)/videos(?:\?.*\bpage=(?P<page>\d+)|$)' |
9eb4f404 S |
87 | _TESTS = [{ |
88 | 'url': 'http://www.vier.be/demoestuin/videos', | |
89 | 'info_dict': { | |
90 | 'id': 'demoestuin', | |
91 | }, | |
92 | 'playlist_mincount': 153, | |
a3ba8a7a LV |
93 | }, { |
94 | 'url': 'http://www.vijf.be/temptationisland/videos', | |
95 | 'info_dict': { | |
96 | 'id': 'temptationisland', | |
97 | }, | |
98 | 'playlist_mincount': 159, | |
9eb4f404 S |
99 | }, { |
100 | 'url': 'http://www.vier.be/demoestuin/videos?page=6', | |
101 | 'info_dict': { | |
102 | 'id': 'demoestuin-page6', | |
103 | }, | |
104 | 'playlist_mincount': 20, | |
105 | }, { | |
106 | 'url': 'http://www.vier.be/demoestuin/videos?page=7', | |
107 | 'info_dict': { | |
108 | 'id': 'demoestuin-page7', | |
109 | }, | |
110 | 'playlist_mincount': 13, | |
111 | }] | |
112 | ||
113 | def _real_extract(self, url): | |
114 | mobj = re.match(self._VALID_URL, url) | |
115 | program = mobj.group('program') | |
a3ba8a7a | 116 | site = mobj.group('site') |
9eb4f404 | 117 | |
9eb4f404 S |
118 | page_id = mobj.group('page') |
119 | if page_id: | |
120 | page_id = int(page_id) | |
121 | start_page = page_id | |
9eb4f404 S |
122 | playlist_id = '%s-page%d' % (program, page_id) |
123 | else: | |
124 | start_page = 0 | |
9eb4f404 S |
125 | playlist_id = program |
126 | ||
127 | entries = [] | |
cc1ac110 | 128 | for current_page_id in itertools.count(start_page): |
9eb4f404 | 129 | current_page = self._download_webpage( |
a3ba8a7a | 130 | 'http://www.%s.be/%s/videos?page=%d' % (site, program, current_page_id), |
9eb4f404 | 131 | program, |
cc1ac110 | 132 | 'Downloading page %d' % (current_page_id + 1)) |
9eb4f404 | 133 | page_entries = [ |
a3ba8a7a | 134 | self.url_result('http://www.' + site + '.be' + video_url, 'Vier') |
9eb4f404 | 135 | for video_url in re.findall( |
a3ba8a7a | 136 | r'<h[23]><a href="(/[^/]+/videos/[^/]+(?:/\d+)?)">', current_page)] |
9eb4f404 | 137 | entries.extend(page_entries) |
cc1ac110 S |
138 | if page_id or '>Meer<' not in current_page: |
139 | break | |
9eb4f404 S |
140 | |
141 | return self.playlist_result(entries, playlist_id) |