]> jfr.im git - yt-dlp.git/blame - youtube_dl/extractor/vier.py
[vier] Add new extractor
[yt-dlp.git] / youtube_dl / extractor / vier.py
CommitLineData
f58487b3
TV
1from .common import InfoExtractor
2from ..utils import escape_url
3
4import re
5
6class VierIE (InfoExtractor):
7 _VALID_URL = r'(?:http://)?www.vier.be/(?P<program>.*)/videos/(.+?)/(?P<id>\d*)'
8 _TEST = {
9 'url': 'http://www.vier.be/planb/videos/het-wordt-warm-de-moestuin/16129',
10 'md5': 'bf48f4eb998cbde44ecd02fc42c51149',
11 'info_dict': {
12 'id': '16129',
13 'ext': 'mp4',
14 'title': 'Het wordt warm in De Moestuin',
15 'description': 'De vele uren werk eisen hun tol. Wim droomt van assistentie...',
16 },
17 }
18
19 def _real_extract (self, url):
20 mobj = re.match (self._VALID_URL, url)
21
22 program = mobj.group ('program')
23 video_id = mobj.group ('id')
24
25 webpage = self._download_webpage (url, video_id)
26
27 title = self._html_search_regex(r'<meta property="og:title" content="(.+?)" />', webpage, u'title')
28 description = self._html_search_regex (r'<meta property="og:description" content="(.+?)" />', webpage, u'description')
29 vod_id = self._html_search_regex(r'"filename" : "(.+?)"', webpage, u'playlist URL')
30 url = escape_url ("http://vod.streamcloud.be/vier_vod/mp4:_definst_/" + vod_id + ".mp4/playlist.m3u8")
31
32 return {
33 'id': video_id,
34 'title': title,
35 'description': description,
36 'formats': self._extract_m3u8_formats(url, video_id, 'mp4'),
37 }
38
39class VierVideosIE (InfoExtractor):
40 _VALID_URL = r'http://www.vier.be/(?P<program>.*)/videos(\?page=(?P<page>\d*))?$'
41 _TESTS = [{
42 'url': 'http://www.vier.be/demoestuin/videos',
43 'info_dict': {
44 'id': 'demoestuin page(0)',
45 },
46 'playlist_mincount': 20,
47 },
48 {
49 'url': 'http://www.vier.be/demoestuin/videos?page=6',
50 'info_dict': {
51 'id': 'demoestuin page(6)',
52 },
53 'playlist_mincount': 20,
54 }]
55
56 def _real_extract (self, url):
57 mobj = re.match (self._VALID_URL, url)
58
59 program = mobj.group ('program')
60 page = mobj.group ('page')
61 if page == None:
62 page = 0
63
64 videos_id = program + " page(" + str (page) + ")"
65 videos_page = self._download_webpage (url, videos_id, note='Retrieving videos page')
66
67 return {
68 '_type': 'playlist',
69 'id': videos_id,
70 'entries': [{
71 '_type': 'url',
72 'url': "http://www.vier.be/" + eurl[0],
73 'ie_key': 'Vier',
74 } for eurl in re.findall (r'<h3><a href="(.+?)">(.+?)</a></h3>', videos_page)]
75 }