]>
Commit | Line | Data |
---|---|---|
9eb4f404 S |
1 | # coding: utf-8 |
2 | from __future__ import unicode_literals | |
f58487b3 TV |
3 | |
4 | import re | |
cc1ac110 | 5 | import itertools |
f58487b3 | 6 | |
9eb4f404 S |
7 | from .common import InfoExtractor |
8 | ||
9 | ||
10 | class VierIE(InfoExtractor): | |
11 | IE_NAME = 'vier' | |
12 | _VALID_URL = r'https?://(?:www\.)?vier\.be/(?:[^/]+/videos/(?P<display_id>[^/]+)(?:/(?P<id>\d+))?|video/v3/embed/(?P<embed_id>\d+))' | |
13 | _TESTS = [{ | |
14 | 'url': 'http://www.vier.be/planb/videos/het-wordt-warm-de-moestuin/16129', | |
15 | 'info_dict': { | |
16 | 'id': '16129', | |
17 | 'display_id': 'het-wordt-warm-de-moestuin', | |
18 | 'ext': 'mp4', | |
19 | 'title': 'Het wordt warm in De Moestuin', | |
20 | 'description': 'De vele uren werk eisen hun tol. Wim droomt van assistentie...', | |
21 | }, | |
22 | 'params': { | |
23 | # m3u8 download | |
24 | 'skip_download': True, | |
25 | }, | |
26 | }, { | |
27 | 'url': 'http://www.vier.be/planb/videos/mieren-herders-van-de-bladluizen', | |
28 | 'only_matching': True, | |
29 | }, { | |
30 | 'url': 'http://www.vier.be/video/v3/embed/16129', | |
31 | 'only_matching': True, | |
32 | }] | |
33 | ||
34 | def _real_extract(self, url): | |
35 | mobj = re.match(self._VALID_URL, url) | |
36 | embed_id = mobj.group('embed_id') | |
37 | display_id = mobj.group('display_id') or embed_id | |
38 | ||
39 | webpage = self._download_webpage(url, display_id) | |
40 | ||
41 | video_id = self._search_regex( | |
484c9d2d S |
42 | [r'data-nid="(\d+)"', r'"nid"\s*:\s*"(\d+)"'], |
43 | webpage, 'video id') | |
9eb4f404 | 44 | application = self._search_regex( |
484c9d2d S |
45 | [r'data-application="([^"]+)"', r'"application"\s*:\s*"([^"]+)"'], |
46 | webpage, 'application', default='vier_vod') | |
9eb4f404 | 47 | filename = self._search_regex( |
484c9d2d S |
48 | [r'data-filename="([^"]+)"', r'"filename"\s*:\s*"([^"]+)"'], |
49 | webpage, 'filename') | |
9eb4f404 S |
50 | |
51 | playlist_url = 'http://vod.streamcloud.be/%s/mp4:_definst_/%s.mp4/playlist.m3u8' % (application, filename) | |
52 | formats = self._extract_m3u8_formats(playlist_url, display_id, 'mp4') | |
19dbaeec | 53 | self._sort_formats(formats) |
9eb4f404 S |
54 | |
55 | title = self._og_search_title(webpage, default=display_id) | |
56 | description = self._og_search_description(webpage, default=None) | |
57 | thumbnail = self._og_search_thumbnail(webpage, default=None) | |
58 | ||
59 | return { | |
60 | 'id': video_id, | |
61 | 'display_id': display_id, | |
62 | 'title': title, | |
63 | 'description': description, | |
64 | 'thumbnail': thumbnail, | |
65 | 'formats': formats, | |
f58487b3 | 66 | } |
9eb4f404 S |
67 | |
68 | ||
69 | class VierVideosIE(InfoExtractor): | |
70 | IE_NAME = 'vier:videos' | |
823a1552 | 71 | _VALID_URL = r'https?://(?:www\.)?vier\.be/(?P<program>[^/]+)/videos(?:\?.*\bpage=(?P<page>\d+)|$)' |
9eb4f404 S |
72 | _TESTS = [{ |
73 | 'url': 'http://www.vier.be/demoestuin/videos', | |
74 | 'info_dict': { | |
75 | 'id': 'demoestuin', | |
76 | }, | |
77 | 'playlist_mincount': 153, | |
78 | }, { | |
79 | 'url': 'http://www.vier.be/demoestuin/videos?page=6', | |
80 | 'info_dict': { | |
81 | 'id': 'demoestuin-page6', | |
82 | }, | |
83 | 'playlist_mincount': 20, | |
84 | }, { | |
85 | 'url': 'http://www.vier.be/demoestuin/videos?page=7', | |
86 | 'info_dict': { | |
87 | 'id': 'demoestuin-page7', | |
88 | }, | |
89 | 'playlist_mincount': 13, | |
90 | }] | |
91 | ||
92 | def _real_extract(self, url): | |
93 | mobj = re.match(self._VALID_URL, url) | |
94 | program = mobj.group('program') | |
95 | ||
9eb4f404 S |
96 | page_id = mobj.group('page') |
97 | if page_id: | |
98 | page_id = int(page_id) | |
99 | start_page = page_id | |
9eb4f404 S |
100 | playlist_id = '%s-page%d' % (program, page_id) |
101 | else: | |
102 | start_page = 0 | |
9eb4f404 S |
103 | playlist_id = program |
104 | ||
105 | entries = [] | |
cc1ac110 | 106 | for current_page_id in itertools.count(start_page): |
9eb4f404 S |
107 | current_page = self._download_webpage( |
108 | 'http://www.vier.be/%s/videos?page=%d' % (program, current_page_id), | |
109 | program, | |
cc1ac110 | 110 | 'Downloading page %d' % (current_page_id + 1)) |
9eb4f404 S |
111 | page_entries = [ |
112 | self.url_result('http://www.vier.be' + video_url, 'Vier') | |
113 | for video_url in re.findall( | |
114 | r'<h3><a href="(/[^/]+/videos/[^/]+(?:/\d+)?)">', current_page)] | |
115 | entries.extend(page_entries) | |
cc1ac110 S |
116 | if page_id or '>Meer<' not in current_page: |
117 | break | |
9eb4f404 S |
118 | |
119 | return self.playlist_result(entries, playlist_id) |