]>
jfr.im git - yt-dlp.git/blob - youtube_dl/extractor/vier.py
2 from __future__
import unicode_literals
7 from . common
import InfoExtractor
10 class VierIE ( InfoExtractor
):
12 IE_DESC
= 'vier.be and vijf.be'
13 _VALID_URL
= r
'https?://(?:www\.)?(?P<site>vier|vijf)\.be/(?:[^/]+/videos/(?P<display_id>[^/]+)(?:/(?P<id>\d+))?|video/v3/embed/(?P<embed_id>\d+))'
15 'url' : 'http://www.vier.be/planb/videos/het-wordt-warm-de-moestuin/16129' ,
18 'display_id' : 'het-wordt-warm-de-moestuin' ,
20 'title' : 'Het wordt warm in De Moestuin' ,
21 'description' : 'De vele uren werk eisen hun tol. Wim droomt van assistentie...' ,
25 'skip_download' : True ,
28 'url' : 'http://www.vijf.be/temptationisland/videos/zo-grappig-temptation-island-hosts-moeten-kiezen-tussen-onmogelijke-dilemmas/2561614' ,
31 'display_id' : 'zo-grappig-temptation-island-hosts-moeten-kiezen-tussen-onmogelijke-dilemmas' ,
33 'title' : 'ZO grappig: Temptation Island hosts moeten kiezen tussen onmogelijke dilemma \' s' ,
34 'description' : 'Het spel is simpel: Annelien Coorevits en Rick Brandsteder krijgen telkens 2 dilemma \' s voorgeschoteld en ze MOETEN een keuze maken.' ,
38 'skip_download' : True ,
41 'url' : 'http://www.vier.be/planb/videos/mieren-herders-van-de-bladluizen' ,
42 'only_matching' : True ,
44 'url' : 'http://www.vier.be/video/v3/embed/16129' ,
45 'only_matching' : True ,
48 def _real_extract ( self
, url
):
49 mobj
= re
. match ( self
._ VALID
_U RL
, url
)
50 embed_id
= mobj
. group ( 'embed_id' )
51 display_id
= mobj
. group ( 'display_id' ) or embed_id
52 site
= mobj
. group ( 'site' )
54 webpage
= self
._ download
_ webpage
( url
, display_id
)
56 video_id
= self
._ search
_ regex
(
57 [ r
'data-nid="(\d+)"' , r
'"nid"\s*:\s*"(\d+)"' ],
59 application
= self
._ search
_ regex
(
60 [ r
'data-application="([^"]+)"' , r
'"application"\s*:\s*"([^"]+)"' ],
61 webpage
, 'application' , default
= site
+ '_vod' )
62 filename
= self
._ search
_ regex
(
63 [ r
'data-filename="([^"]+)"' , r
'"filename"\s*:\s*"([^"]+)"' ],
66 playlist_url
= 'http://vod.streamcloud.be/ %s /_definst_/mp4: %s .mp4/playlist.m3u8' % ( application
, filename
)
67 formats
= self
._ extract
_ wowza
_ formats
( playlist_url
, display_id
, skip_protocols
=[ 'dash' ])
68 self
._ sort
_ formats
( formats
)
70 title
= self
._ og
_ search
_ title
( webpage
, default
= display_id
)
71 description
= self
._ og
_ search
_ description
( webpage
, default
= None )
72 thumbnail
= self
._ og
_ search
_ thumbnail
( webpage
, default
= None )
76 'display_id' : display_id
,
78 'description' : description
,
79 'thumbnail' : thumbnail
,
84 class VierVideosIE ( InfoExtractor
):
85 IE_NAME
= 'vier:videos'
86 _VALID_URL
= r
'https?://(?:www\.)?(?P<site>vier|vijf)\.be/(?P<program>[^/]+)/videos(?:\?.*\bpage=(?P<page>\d+)|$)'
88 'url' : 'http://www.vier.be/demoestuin/videos' ,
92 'playlist_mincount' : 153 ,
94 'url' : 'http://www.vijf.be/temptationisland/videos' ,
96 'id' : 'temptationisland' ,
98 'playlist_mincount' : 159 ,
100 'url' : 'http://www.vier.be/demoestuin/videos?page=6' ,
102 'id' : 'demoestuin-page6' ,
104 'playlist_mincount' : 20 ,
106 'url' : 'http://www.vier.be/demoestuin/videos?page=7' ,
108 'id' : 'demoestuin-page7' ,
110 'playlist_mincount' : 13 ,
113 def _real_extract ( self
, url
):
114 mobj
= re
. match ( self
._ VALID
_U RL
, url
)
115 program
= mobj
. group ( 'program' )
116 site
= mobj
. group ( 'site' )
118 page_id
= mobj
. group ( 'page' )
120 page_id
= int ( page_id
)
122 playlist_id
= ' %s- page %d ' % ( program
, page_id
)
125 playlist_id
= program
128 for current_page_id
in itertools
. count ( start_page
):
129 current_page
= self
._ download
_ webpage
(
130 'http://www. %s .be/ %s /videos?page= %d ' % ( site
, program
, current_page_id
),
132 'Downloading page %d ' % ( current_page_id
+ 1 ))
134 self
. url_result ( 'http://www.' + site
+ '.be' + video_url
, 'Vier' )
135 for video_url
in re
. findall (
136 r
'<h[23]><a href="(/[^/]+/videos/[^/]+(?:/\d+)?)">' , current_page
)]
137 entries
. extend ( page_entries
)
138 if page_id
or '>Meer<' not in current_page
:
141 return self
. playlist_result ( entries
, playlist_id
)