]> jfr.im git - yt-dlp.git/blame - youtube_dl/extractor/rtlnl.py
[rtlnl] Extend _VALID_URL (#26549) (closes #25821)
[yt-dlp.git] / youtube_dl / extractor / rtlnl.py
CommitLineData
59b8ab58 1# coding: utf-8
6493f5d7
JMF
2from __future__ import unicode_literals
3
6493f5d7 4from .common import InfoExtractor
59b8ab58
PH
5from ..utils import (
6 int_or_none,
7 parse_duration,
8)
6493f5d7
JMF
9
10
59b8ab58
PH
11class RtlNlIE(InfoExtractor):
12 IE_NAME = 'rtl.nl'
13 IE_DESC = 'rtl.nl and rtlxl.nl'
14 _VALID_URL = r'''(?x)
7a64c33a 15 https?://(?:(?:www|static)\.)?
59b8ab58 16 (?:
97f34a48 17 rtlxl\.nl/(?:[^\#]*\#!|programma)/[^/]+/|
7a64c33a 18 rtl\.nl/(?:(?:system/videoplayer/(?:[^/]+/)+(?:video_)?embed\.html|embed)\b.+?\buuid=|video/)
59b8ab58
PH
19 )
20 (?P<id>[0-9a-f-]+)'''
6493f5d7 21
59b8ab58 22 _TESTS = [{
97f34a48
DL
23 # new URL schema
24 'url': 'https://www.rtlxl.nl/programma/rtl-nieuws/0bd1384d-d970-3086-98bb-5c104e10c26f',
25 'md5': '490428f1187b60d714f34e1f2e3af0b6',
26 'info_dict': {
27 'id': '0bd1384d-d970-3086-98bb-5c104e10c26f',
28 'ext': 'mp4',
29 'title': 'RTL Nieuws',
30 'description': 'md5:d41d8cd98f00b204e9800998ecf8427e',
31 'timestamp': 1593293400,
32 'upload_date': '20200627',
33 'duration': 661.08,
34 },
35 }, {
36 # old URL schema
ca278a18
S
37 'url': 'http://www.rtlxl.nl/#!/rtl-nieuws-132237/82b1aad1-4a14-3d7b-b554-b0aed1b2c416',
38 'md5': '473d1946c1fdd050b2c0161a4b13c373',
6493f5d7 39 'info_dict': {
ca278a18 40 'id': '82b1aad1-4a14-3d7b-b554-b0aed1b2c416',
de2d9f5f 41 'ext': 'mp4',
ca278a18
S
42 'title': 'RTL Nieuws',
43 'description': 'md5:d41d8cd98f00b204e9800998ecf8427e',
44 'timestamp': 1461951000,
45 'upload_date': '20160429',
46 'duration': 1167.96,
6493f5d7 47 },
97f34a48 48 'skip': '404',
59b8ab58 49 }, {
843ad179 50 # best format available a3t
59b8ab58
PH
51 'url': 'http://www.rtl.nl/system/videoplayer/derden/rtlnieuws/video_embed.html#uuid=84ae5571-ac25-4225-ae0c-ef8d9efb2aed/autoplay=false',
52 'md5': 'dea7474214af1271d91ef332fb8be7ea',
53 'info_dict': {
54 'id': '84ae5571-ac25-4225-ae0c-ef8d9efb2aed',
55 'ext': 'mp4',
56 'timestamp': 1424039400,
57 'title': 'RTL Nieuws - Nieuwe beelden Kopenhagen: chaos direct na aanslag',
ec85ded8 58 'thumbnail': r're:^https?://screenshots\.rtl\.nl/(?:[^/]+/)*sz=[0-9]+x[0-9]+/uuid=84ae5571-ac25-4225-ae0c-ef8d9efb2aed$',
59b8ab58
PH
59 'upload_date': '20150215',
60 'description': 'Er zijn nieuwe beelden vrijgegeven die vlak na de aanslag in Kopenhagen zijn gemaakt. Op de video is goed te zien hoe omstanders zich bekommeren om één van de slachtoffers, terwijl de eerste agenten ter plaatse komen.',
61 }
826a7da8 62 }, {
067aa17e 63 # empty synopsis and missing episodes (see https://github.com/ytdl-org/youtube-dl/issues/6275)
373e1230 64 # best format available nettv
826a7da8
S
65 'url': 'http://www.rtl.nl/system/videoplayer/derden/rtlnieuws/video_embed.html#uuid=f536aac0-1dc3-4314-920e-3bd1c5b3811a/autoplay=false',
66 'info_dict': {
67 'id': 'f536aac0-1dc3-4314-920e-3bd1c5b3811a',
68 'ext': 'mp4',
69 'title': 'RTL Nieuws - Meer beelden van overval juwelier',
ec85ded8 70 'thumbnail': r're:^https?://screenshots\.rtl\.nl/(?:[^/]+/)*sz=[0-9]+x[0-9]+/uuid=f536aac0-1dc3-4314-920e-3bd1c5b3811a$',
826a7da8
S
71 'timestamp': 1437233400,
72 'upload_date': '20150718',
73 'duration': 30.474,
74 },
75 'params': {
76 'skip_download': True,
77 },
9dfc4fa1 78 }, {
89d42c2c 79 # encrypted m3u8 streams, georestricted
9dfc4fa1
S
80 'url': 'http://www.rtlxl.nl/#!/afl-2-257632/52a74543-c504-4cde-8aa8-ec66fe8d68a7',
81 'only_matching': True,
a9d56c68
S
82 }, {
83 'url': 'http://www.rtl.nl/system/videoplayer/derden/embed.html#!/uuid=bb0353b0-d6a4-1dad-90e9-18fe75b8d1f0',
84 'only_matching': True,
7f2ed475
S
85 }, {
86 'url': 'http://rtlxl.nl/?_ga=1.204735956.572365465.1466978370#!/rtl-nieuws-132237/3c487912-023b-49ac-903e-2c5d79f8410f',
87 'only_matching': True,
0a3924e7
S
88 }, {
89 'url': 'https://www.rtl.nl/video/c603c9c2-601d-4b5e-8175-64f1e942dc7d/',
90 'only_matching': True,
7a64c33a
RA
91 }, {
92 'url': 'https://static.rtl.nl/embed/?uuid=1a2970fc-5c0b-43ff-9fdc-927e39e6d1bc&autoplay=false&publicatiepunt=rtlnieuwsnl',
93 'only_matching': True,
59b8ab58 94 }]
6493f5d7
JMF
95
96 def _real_extract(self, url):
59b8ab58 97 uuid = self._match_id(url)
6493f5d7 98 info = self._download_json(
bea41c7f 99 'http://www.rtl.nl/system/s4m/vfd/version=2/uuid=%s/fmt=adaptive/' % uuid,
6493f5d7 100 uuid)
7adcbe75 101
6493f5d7 102 material = info['material'][0]
826a7da8
S
103 title = info['abstracts'][0]['name']
104 subtitle = material.get('title')
105 if subtitle:
106 title += ' - %s' % subtitle
107 description = material.get('synopsis')
6493f5d7 108
43232d5c
S
109 meta = info.get('meta', {})
110
a8b7b260 111 videopath = material['videopath']
43232d5c 112 m3u8_url = meta.get('videohost', 'http://manifest.us.rtl.nl') + videopath
de2d9f5f 113
0571ffda
S
114 formats = self._extract_m3u8_formats(
115 m3u8_url, uuid, 'mp4', m3u8_id='hls', fatal=False)
37eddd31
NJ
116 self._sort_formats(formats)
117
59b8ab58 118 thumbnails = []
43232d5c 119
59b8ab58
PH
120 for p in ('poster_base_url', '"thumb_base_url"'):
121 if not meta.get(p):
122 continue
123
124 thumbnails.append({
125 'url': self._proto_relative_url(meta[p] + uuid),
126 'width': int_or_none(self._search_regex(
127 r'/sz=([0-9]+)', meta[p], 'thumbnail width', fatal=False)),
128 'height': int_or_none(self._search_regex(
129 r'/sz=[0-9]+x([0-9]+)',
130 meta[p], 'thumbnail height', fatal=False))
131 })
132
6493f5d7
JMF
133 return {
134 'id': uuid,
826a7da8 135 'title': title,
de2d9f5f 136 'formats': formats,
6493f5d7 137 'timestamp': material['original_date'],
59b8ab58 138 'description': description,
7adcbe75 139 'duration': parse_duration(material.get('duration')),
59b8ab58 140 'thumbnails': thumbnails,
6493f5d7 141 }