]> jfr.im git - yt-dlp.git/blame - youtube_dl/extractor/rtlnl.py
[formula1] Relax _VALID_URL (Closes #10283)
[yt-dlp.git] / youtube_dl / extractor / rtlnl.py
CommitLineData
59b8ab58 1# coding: utf-8
6493f5d7
JMF
2from __future__ import unicode_literals
3
6493f5d7 4from .common import InfoExtractor
59b8ab58
PH
5from ..utils import (
6 int_or_none,
7 parse_duration,
8)
6493f5d7
JMF
9
10
59b8ab58
PH
11class RtlNlIE(InfoExtractor):
12 IE_NAME = 'rtl.nl'
13 IE_DESC = 'rtl.nl and rtlxl.nl'
14 _VALID_URL = r'''(?x)
a9d56c68 15 https?://(?:www\.)?
59b8ab58
PH
16 (?:
17 rtlxl\.nl/\#!/[^/]+/|
a9d56c68 18 rtl\.nl/system/videoplayer/(?:[^/]+/)+(?:video_)?embed\.html\b.+?\buuid=
59b8ab58
PH
19 )
20 (?P<id>[0-9a-f-]+)'''
6493f5d7 21
59b8ab58 22 _TESTS = [{
ca278a18
S
23 'url': 'http://www.rtlxl.nl/#!/rtl-nieuws-132237/82b1aad1-4a14-3d7b-b554-b0aed1b2c416',
24 'md5': '473d1946c1fdd050b2c0161a4b13c373',
6493f5d7 25 'info_dict': {
ca278a18 26 'id': '82b1aad1-4a14-3d7b-b554-b0aed1b2c416',
de2d9f5f 27 'ext': 'mp4',
ca278a18
S
28 'title': 'RTL Nieuws',
29 'description': 'md5:d41d8cd98f00b204e9800998ecf8427e',
30 'timestamp': 1461951000,
31 'upload_date': '20160429',
32 'duration': 1167.96,
6493f5d7 33 },
59b8ab58 34 }, {
373e1230 35 # best format avaialble a3t
59b8ab58
PH
36 'url': 'http://www.rtl.nl/system/videoplayer/derden/rtlnieuws/video_embed.html#uuid=84ae5571-ac25-4225-ae0c-ef8d9efb2aed/autoplay=false',
37 'md5': 'dea7474214af1271d91ef332fb8be7ea',
38 'info_dict': {
39 'id': '84ae5571-ac25-4225-ae0c-ef8d9efb2aed',
40 'ext': 'mp4',
41 'timestamp': 1424039400,
42 'title': 'RTL Nieuws - Nieuwe beelden Kopenhagen: chaos direct na aanslag',
cd63d091 43 'thumbnail': 're:^https?://screenshots\.rtl\.nl/(?:[^/]+/)*sz=[0-9]+x[0-9]+/uuid=84ae5571-ac25-4225-ae0c-ef8d9efb2aed$',
59b8ab58
PH
44 'upload_date': '20150215',
45 'description': 'Er zijn nieuwe beelden vrijgegeven die vlak na de aanslag in Kopenhagen zijn gemaakt. Op de video is goed te zien hoe omstanders zich bekommeren om één van de slachtoffers, terwijl de eerste agenten ter plaatse komen.',
46 }
826a7da8
S
47 }, {
48 # empty synopsis and missing episodes (see https://github.com/rg3/youtube-dl/issues/6275)
373e1230 49 # best format available nettv
826a7da8
S
50 'url': 'http://www.rtl.nl/system/videoplayer/derden/rtlnieuws/video_embed.html#uuid=f536aac0-1dc3-4314-920e-3bd1c5b3811a/autoplay=false',
51 'info_dict': {
52 'id': 'f536aac0-1dc3-4314-920e-3bd1c5b3811a',
53 'ext': 'mp4',
54 'title': 'RTL Nieuws - Meer beelden van overval juwelier',
cd63d091 55 'thumbnail': 're:^https?://screenshots\.rtl\.nl/(?:[^/]+/)*sz=[0-9]+x[0-9]+/uuid=f536aac0-1dc3-4314-920e-3bd1c5b3811a$',
826a7da8
S
56 'timestamp': 1437233400,
57 'upload_date': '20150718',
58 'duration': 30.474,
59 },
60 'params': {
61 'skip_download': True,
62 },
9dfc4fa1 63 }, {
89d42c2c 64 # encrypted m3u8 streams, georestricted
9dfc4fa1
S
65 'url': 'http://www.rtlxl.nl/#!/afl-2-257632/52a74543-c504-4cde-8aa8-ec66fe8d68a7',
66 'only_matching': True,
a9d56c68
S
67 }, {
68 'url': 'http://www.rtl.nl/system/videoplayer/derden/embed.html#!/uuid=bb0353b0-d6a4-1dad-90e9-18fe75b8d1f0',
69 'only_matching': True,
59b8ab58 70 }]
6493f5d7
JMF
71
72 def _real_extract(self, url):
59b8ab58 73 uuid = self._match_id(url)
6493f5d7 74 info = self._download_json(
bea41c7f 75 'http://www.rtl.nl/system/s4m/vfd/version=2/uuid=%s/fmt=adaptive/' % uuid,
6493f5d7 76 uuid)
7adcbe75 77
6493f5d7 78 material = info['material'][0]
826a7da8
S
79 title = info['abstracts'][0]['name']
80 subtitle = material.get('title')
81 if subtitle:
82 title += ' - %s' % subtitle
83 description = material.get('synopsis')
6493f5d7 84
43232d5c
S
85 meta = info.get('meta', {})
86
a9e8f60e
S
87 # m3u8 streams are encrypted and may not be handled properly by older ffmpeg/avconv.
88 # To workaround this previously adaptive -> flash trick was used to obtain
89 # unencrypted m3u8 streams (see https://github.com/rg3/youtube-dl/issues/4118)
90 # and bypass georestrictions as well.
91 # Currently, unencrypted m3u8 playlists are (intentionally?) invalid and therefore
92 # unusable albeit can be fixed by simple string replacement (see
93 # https://github.com/rg3/youtube-dl/pull/6337)
94 # Since recent ffmpeg and avconv handle encrypted streams just fine encrypted
95 # streams are used now.
a8b7b260 96 videopath = material['videopath']
43232d5c 97 m3u8_url = meta.get('videohost', 'http://manifest.us.rtl.nl') + videopath
de2d9f5f 98
0571ffda
S
99 formats = self._extract_m3u8_formats(
100 m3u8_url, uuid, 'mp4', m3u8_id='hls', fatal=False)
de2d9f5f 101
a8b7b260 102 video_urlpart = videopath.split('/adaptive/')[1][:-5]
de2d9f5f
S
103 PG_URL_TEMPLATE = 'http://pg.us.rtl.nl/rtlxl/network/%s/progressive/%s.mp4'
104
0571ffda
S
105 PG_FORMATS = (
106 ('a2t', 512, 288),
107 ('a3t', 704, 400),
108 ('nettv', 1280, 720),
109 )
110
111 def pg_format(format_id, width, height):
112 return {
113 'url': PG_URL_TEMPLATE % (format_id, video_urlpart),
114 'format_id': 'pg-%s' % format_id,
115 'protocol': 'http',
116 'width': width,
117 'height': height,
de2d9f5f 118 }
0571ffda
S
119
120 if not formats:
121 formats = [pg_format(*pg_tuple) for pg_tuple in PG_FORMATS]
122 else:
123 pg_formats = []
124 for format_id, width, height in PG_FORMATS:
125 try:
126 # Find hls format with the same width and height corresponding
127 # to progressive format and copy metadata from it.
9e1b96ae
S
128 f = next(f for f in formats if f.get('height') == height)
129 # hls formats may have invalid width
130 f['width'] = width
131 f_copy = f.copy()
132 f_copy.update(pg_format(format_id, width, height))
133 pg_formats.append(f_copy)
0571ffda
S
134 except StopIteration:
135 # Missing hls format does mean that no progressive format with
136 # such width and height exists either.
137 pass
138 formats.extend(pg_formats)
37eddd31
NJ
139 self._sort_formats(formats)
140
59b8ab58 141 thumbnails = []
43232d5c 142
59b8ab58
PH
143 for p in ('poster_base_url', '"thumb_base_url"'):
144 if not meta.get(p):
145 continue
146
147 thumbnails.append({
148 'url': self._proto_relative_url(meta[p] + uuid),
149 'width': int_or_none(self._search_regex(
150 r'/sz=([0-9]+)', meta[p], 'thumbnail width', fatal=False)),
151 'height': int_or_none(self._search_regex(
152 r'/sz=[0-9]+x([0-9]+)',
153 meta[p], 'thumbnail height', fatal=False))
154 })
155
6493f5d7
JMF
156 return {
157 'id': uuid,
826a7da8 158 'title': title,
de2d9f5f 159 'formats': formats,
6493f5d7 160 'timestamp': material['original_date'],
59b8ab58 161 'description': description,
7adcbe75 162 'duration': parse_duration(material.get('duration')),
59b8ab58 163 'thumbnails': thumbnails,
6493f5d7 164 }