]>
Commit | Line | Data |
---|---|---|
9eb4f404 S |
1 | # coding: utf-8 |
2 | from __future__ import unicode_literals | |
f58487b3 TV |
3 | |
4 | import re | |
5 | ||
9eb4f404 S |
6 | from .common import InfoExtractor |
7 | ||
8 | ||
9 | class VierIE(InfoExtractor): | |
10 | IE_NAME = 'vier' | |
11 | _VALID_URL = r'https?://(?:www\.)?vier\.be/(?:[^/]+/videos/(?P<display_id>[^/]+)(?:/(?P<id>\d+))?|video/v3/embed/(?P<embed_id>\d+))' | |
12 | _TESTS = [{ | |
13 | 'url': 'http://www.vier.be/planb/videos/het-wordt-warm-de-moestuin/16129', | |
14 | 'info_dict': { | |
15 | 'id': '16129', | |
16 | 'display_id': 'het-wordt-warm-de-moestuin', | |
17 | 'ext': 'mp4', | |
18 | 'title': 'Het wordt warm in De Moestuin', | |
19 | 'description': 'De vele uren werk eisen hun tol. Wim droomt van assistentie...', | |
20 | }, | |
21 | 'params': { | |
22 | # m3u8 download | |
23 | 'skip_download': True, | |
24 | }, | |
25 | }, { | |
26 | 'url': 'http://www.vier.be/planb/videos/mieren-herders-van-de-bladluizen', | |
27 | 'only_matching': True, | |
28 | }, { | |
29 | 'url': 'http://www.vier.be/video/v3/embed/16129', | |
30 | 'only_matching': True, | |
31 | }] | |
32 | ||
33 | def _real_extract(self, url): | |
34 | mobj = re.match(self._VALID_URL, url) | |
35 | embed_id = mobj.group('embed_id') | |
36 | display_id = mobj.group('display_id') or embed_id | |
37 | ||
38 | webpage = self._download_webpage(url, display_id) | |
39 | ||
40 | video_id = self._search_regex( | |
484c9d2d S |
41 | [r'data-nid="(\d+)"', r'"nid"\s*:\s*"(\d+)"'], |
42 | webpage, 'video id') | |
9eb4f404 | 43 | application = self._search_regex( |
484c9d2d S |
44 | [r'data-application="([^"]+)"', r'"application"\s*:\s*"([^"]+)"'], |
45 | webpage, 'application', default='vier_vod') | |
9eb4f404 | 46 | filename = self._search_regex( |
484c9d2d S |
47 | [r'data-filename="([^"]+)"', r'"filename"\s*:\s*"([^"]+)"'], |
48 | webpage, 'filename') | |
9eb4f404 S |
49 | |
50 | playlist_url = 'http://vod.streamcloud.be/%s/mp4:_definst_/%s.mp4/playlist.m3u8' % (application, filename) | |
51 | formats = self._extract_m3u8_formats(playlist_url, display_id, 'mp4') | |
52 | ||
53 | title = self._og_search_title(webpage, default=display_id) | |
54 | description = self._og_search_description(webpage, default=None) | |
55 | thumbnail = self._og_search_thumbnail(webpage, default=None) | |
56 | ||
57 | return { | |
58 | 'id': video_id, | |
59 | 'display_id': display_id, | |
60 | 'title': title, | |
61 | 'description': description, | |
62 | 'thumbnail': thumbnail, | |
63 | 'formats': formats, | |
f58487b3 | 64 | } |
9eb4f404 S |
65 | |
66 | ||
67 | class VierVideosIE(InfoExtractor): | |
68 | IE_NAME = 'vier:videos' | |
823a1552 | 69 | _VALID_URL = r'https?://(?:www\.)?vier\.be/(?P<program>[^/]+)/videos(?:\?.*\bpage=(?P<page>\d+)|$)' |
9eb4f404 S |
70 | _TESTS = [{ |
71 | 'url': 'http://www.vier.be/demoestuin/videos', | |
72 | 'info_dict': { | |
73 | 'id': 'demoestuin', | |
74 | }, | |
75 | 'playlist_mincount': 153, | |
76 | }, { | |
77 | 'url': 'http://www.vier.be/demoestuin/videos?page=6', | |
78 | 'info_dict': { | |
79 | 'id': 'demoestuin-page6', | |
80 | }, | |
81 | 'playlist_mincount': 20, | |
82 | }, { | |
83 | 'url': 'http://www.vier.be/demoestuin/videos?page=7', | |
84 | 'info_dict': { | |
85 | 'id': 'demoestuin-page7', | |
86 | }, | |
87 | 'playlist_mincount': 13, | |
88 | }] | |
89 | ||
90 | def _real_extract(self, url): | |
91 | mobj = re.match(self._VALID_URL, url) | |
92 | program = mobj.group('program') | |
93 | ||
94 | webpage = self._download_webpage(url, program) | |
95 | ||
96 | page_id = mobj.group('page') | |
97 | if page_id: | |
98 | page_id = int(page_id) | |
99 | start_page = page_id | |
100 | last_page = start_page + 1 | |
101 | playlist_id = '%s-page%d' % (program, page_id) | |
102 | else: | |
103 | start_page = 0 | |
104 | last_page = int(self._search_regex( | |
105 | r'videos\?page=(\d+)">laatste</a>', | |
106 | webpage, 'last page', default=0)) + 1 | |
107 | playlist_id = program | |
108 | ||
109 | entries = [] | |
110 | for current_page_id in range(start_page, last_page): | |
111 | current_page = self._download_webpage( | |
112 | 'http://www.vier.be/%s/videos?page=%d' % (program, current_page_id), | |
113 | program, | |
114 | 'Downloading page %d' % (current_page_id + 1)) if current_page_id != page_id else webpage | |
115 | page_entries = [ | |
116 | self.url_result('http://www.vier.be' + video_url, 'Vier') | |
117 | for video_url in re.findall( | |
118 | r'<h3><a href="(/[^/]+/videos/[^/]+(?:/\d+)?)">', current_page)] | |
119 | entries.extend(page_entries) | |
120 | ||
121 | return self.playlist_result(entries, playlist_id) |