]> jfr.im git - yt-dlp.git/blame - youtube_dl/extractor/rtlnl.py
[rts] Add support for articles with videos on rhs (Closes #6332)
[yt-dlp.git] / youtube_dl / extractor / rtlnl.py
CommitLineData
59b8ab58 1# coding: utf-8
6493f5d7
JMF
2from __future__ import unicode_literals
3
6493f5d7 4from .common import InfoExtractor
59b8ab58
PH
5from ..utils import (
6 int_or_none,
7 parse_duration,
8)
6493f5d7
JMF
9
10
59b8ab58
PH
11class RtlNlIE(InfoExtractor):
12 IE_NAME = 'rtl.nl'
13 IE_DESC = 'rtl.nl and rtlxl.nl'
14 _VALID_URL = r'''(?x)
a9d56c68 15 https?://(?:www\.)?
59b8ab58
PH
16 (?:
17 rtlxl\.nl/\#!/[^/]+/|
a9d56c68 18 rtl\.nl/system/videoplayer/(?:[^/]+/)+(?:video_)?embed\.html\b.+?\buuid=
59b8ab58
PH
19 )
20 (?P<id>[0-9a-f-]+)'''
6493f5d7 21
59b8ab58 22 _TESTS = [{
6493f5d7 23 'url': 'http://www.rtlxl.nl/#!/rtl-nieuws-132237/6e4203a6-0a5e-3596-8424-c599a59e0677',
de2d9f5f 24 'md5': 'cc16baa36a6c169391f0764fa6b16654',
6493f5d7
JMF
25 'info_dict': {
26 'id': '6e4203a6-0a5e-3596-8424-c599a59e0677',
de2d9f5f 27 'ext': 'mp4',
6493f5d7 28 'title': 'RTL Nieuws - Laat',
de2d9f5f 29 'description': 'md5:6b61f66510c8889923b11f2778c72dc5',
6493f5d7
JMF
30 'timestamp': 1408051800,
31 'upload_date': '20140814',
7adcbe75 32 'duration': 576.880,
6493f5d7 33 },
59b8ab58
PH
34 }, {
35 'url': 'http://www.rtl.nl/system/videoplayer/derden/rtlnieuws/video_embed.html#uuid=84ae5571-ac25-4225-ae0c-ef8d9efb2aed/autoplay=false',
36 'md5': 'dea7474214af1271d91ef332fb8be7ea',
37 'info_dict': {
38 'id': '84ae5571-ac25-4225-ae0c-ef8d9efb2aed',
39 'ext': 'mp4',
40 'timestamp': 1424039400,
41 'title': 'RTL Nieuws - Nieuwe beelden Kopenhagen: chaos direct na aanslag',
42 'thumbnail': 're:^https?://screenshots\.rtl\.nl/system/thumb/sz=[0-9]+x[0-9]+/uuid=84ae5571-ac25-4225-ae0c-ef8d9efb2aed$',
43 'upload_date': '20150215',
44 'description': 'Er zijn nieuwe beelden vrijgegeven die vlak na de aanslag in Kopenhagen zijn gemaakt. Op de video is goed te zien hoe omstanders zich bekommeren om één van de slachtoffers, terwijl de eerste agenten ter plaatse komen.',
45 }
826a7da8
S
46 }, {
47 # empty synopsis and missing episodes (see https://github.com/rg3/youtube-dl/issues/6275)
48 'url': 'http://www.rtl.nl/system/videoplayer/derden/rtlnieuws/video_embed.html#uuid=f536aac0-1dc3-4314-920e-3bd1c5b3811a/autoplay=false',
49 'info_dict': {
50 'id': 'f536aac0-1dc3-4314-920e-3bd1c5b3811a',
51 'ext': 'mp4',
52 'title': 'RTL Nieuws - Meer beelden van overval juwelier',
53 'thumbnail': 're:^https?://screenshots\.rtl\.nl/system/thumb/sz=[0-9]+x[0-9]+/uuid=f536aac0-1dc3-4314-920e-3bd1c5b3811a$',
54 'timestamp': 1437233400,
55 'upload_date': '20150718',
56 'duration': 30.474,
57 },
58 'params': {
59 'skip_download': True,
60 },
9dfc4fa1 61 }, {
89d42c2c 62 # encrypted m3u8 streams, georestricted
9dfc4fa1
S
63 'url': 'http://www.rtlxl.nl/#!/afl-2-257632/52a74543-c504-4cde-8aa8-ec66fe8d68a7',
64 'only_matching': True,
a9d56c68
S
65 }, {
66 'url': 'http://www.rtl.nl/system/videoplayer/derden/embed.html#!/uuid=bb0353b0-d6a4-1dad-90e9-18fe75b8d1f0',
67 'only_matching': True,
59b8ab58 68 }]
6493f5d7
JMF
69
70 def _real_extract(self, url):
59b8ab58 71 uuid = self._match_id(url)
6493f5d7 72 info = self._download_json(
bea41c7f 73 'http://www.rtl.nl/system/s4m/vfd/version=2/uuid=%s/fmt=adaptive/' % uuid,
6493f5d7 74 uuid)
7adcbe75 75
6493f5d7 76 material = info['material'][0]
826a7da8
S
77 title = info['abstracts'][0]['name']
78 subtitle = material.get('title')
79 if subtitle:
80 title += ' - %s' % subtitle
81 description = material.get('synopsis')
6493f5d7 82
43232d5c
S
83 meta = info.get('meta', {})
84
49f0da7a 85 # Use unencrypted m3u8 streams (See https://github.com/rg3/youtube-dl/issues/4118)
0c20ee7d
S
86 # NB: nowadays, recent ffmpeg and avconv can handle these encrypted streams, so
87 # this adaptive -> flash workaround is not required in general, but it also
88 # allows bypassing georestriction therefore is retained for now.
43232d5c
S
89 videopath = material['videopath'].replace('/adaptive/', '/flash/')
90 m3u8_url = meta.get('videohost', 'http://manifest.us.rtl.nl') + videopath
de2d9f5f 91
37eddd31 92 formats = self._extract_m3u8_formats(m3u8_url, uuid, ext='mp4')
de2d9f5f 93
4698b14b 94 video_urlpart = videopath.split('/flash/')[1][:-5]
de2d9f5f
S
95 PG_URL_TEMPLATE = 'http://pg.us.rtl.nl/rtlxl/network/%s/progressive/%s.mp4'
96
97 formats.extend([
98 {
99 'url': PG_URL_TEMPLATE % ('a2m', video_urlpart),
100 'format_id': 'pg-sd',
101 },
102 {
103 'url': PG_URL_TEMPLATE % ('a3m', video_urlpart),
104 'format_id': 'pg-hd',
480b7c32 105 'quality': 0,
de2d9f5f
S
106 }
107 ])
37eddd31
NJ
108 self._sort_formats(formats)
109
59b8ab58 110 thumbnails = []
43232d5c 111
59b8ab58
PH
112 for p in ('poster_base_url', '"thumb_base_url"'):
113 if not meta.get(p):
114 continue
115
116 thumbnails.append({
117 'url': self._proto_relative_url(meta[p] + uuid),
118 'width': int_or_none(self._search_regex(
119 r'/sz=([0-9]+)', meta[p], 'thumbnail width', fatal=False)),
120 'height': int_or_none(self._search_regex(
121 r'/sz=[0-9]+x([0-9]+)',
122 meta[p], 'thumbnail height', fatal=False))
123 })
124
6493f5d7
JMF
125 return {
126 'id': uuid,
826a7da8 127 'title': title,
de2d9f5f 128 'formats': formats,
6493f5d7 129 'timestamp': material['original_date'],
59b8ab58 130 'description': description,
7adcbe75 131 'duration': parse_duration(material.get('duration')),
59b8ab58 132 'thumbnails': thumbnails,
6493f5d7 133 }