]> jfr.im git - yt-dlp.git/blame - youtube_dl/extractor/rtlnl.py
Fix "invalid escape sequences" error on Python 3.6
[yt-dlp.git] / youtube_dl / extractor / rtlnl.py
CommitLineData
59b8ab58 1# coding: utf-8
6493f5d7
JMF
2from __future__ import unicode_literals
3
6493f5d7 4from .common import InfoExtractor
59b8ab58
PH
5from ..utils import (
6 int_or_none,
7 parse_duration,
8)
6493f5d7
JMF
9
10
59b8ab58
PH
11class RtlNlIE(InfoExtractor):
12 IE_NAME = 'rtl.nl'
13 IE_DESC = 'rtl.nl and rtlxl.nl'
14 _VALID_URL = r'''(?x)
a9d56c68 15 https?://(?:www\.)?
59b8ab58 16 (?:
7f2ed475 17 rtlxl\.nl/[^\#]*\#!/[^/]+/|
a9d56c68 18 rtl\.nl/system/videoplayer/(?:[^/]+/)+(?:video_)?embed\.html\b.+?\buuid=
59b8ab58
PH
19 )
20 (?P<id>[0-9a-f-]+)'''
6493f5d7 21
59b8ab58 22 _TESTS = [{
ca278a18
S
23 'url': 'http://www.rtlxl.nl/#!/rtl-nieuws-132237/82b1aad1-4a14-3d7b-b554-b0aed1b2c416',
24 'md5': '473d1946c1fdd050b2c0161a4b13c373',
6493f5d7 25 'info_dict': {
ca278a18 26 'id': '82b1aad1-4a14-3d7b-b554-b0aed1b2c416',
de2d9f5f 27 'ext': 'mp4',
ca278a18
S
28 'title': 'RTL Nieuws',
29 'description': 'md5:d41d8cd98f00b204e9800998ecf8427e',
30 'timestamp': 1461951000,
31 'upload_date': '20160429',
32 'duration': 1167.96,
6493f5d7 33 },
59b8ab58 34 }, {
373e1230 35 # best format avaialble a3t
59b8ab58
PH
36 'url': 'http://www.rtl.nl/system/videoplayer/derden/rtlnieuws/video_embed.html#uuid=84ae5571-ac25-4225-ae0c-ef8d9efb2aed/autoplay=false',
37 'md5': 'dea7474214af1271d91ef332fb8be7ea',
38 'info_dict': {
39 'id': '84ae5571-ac25-4225-ae0c-ef8d9efb2aed',
40 'ext': 'mp4',
41 'timestamp': 1424039400,
42 'title': 'RTL Nieuws - Nieuwe beelden Kopenhagen: chaos direct na aanslag',
ec85ded8 43 'thumbnail': r're:^https?://screenshots\.rtl\.nl/(?:[^/]+/)*sz=[0-9]+x[0-9]+/uuid=84ae5571-ac25-4225-ae0c-ef8d9efb2aed$',
59b8ab58
PH
44 'upload_date': '20150215',
45 'description': 'Er zijn nieuwe beelden vrijgegeven die vlak na de aanslag in Kopenhagen zijn gemaakt. Op de video is goed te zien hoe omstanders zich bekommeren om één van de slachtoffers, terwijl de eerste agenten ter plaatse komen.',
46 }
826a7da8
S
47 }, {
48 # empty synopsis and missing episodes (see https://github.com/rg3/youtube-dl/issues/6275)
373e1230 49 # best format available nettv
826a7da8
S
50 'url': 'http://www.rtl.nl/system/videoplayer/derden/rtlnieuws/video_embed.html#uuid=f536aac0-1dc3-4314-920e-3bd1c5b3811a/autoplay=false',
51 'info_dict': {
52 'id': 'f536aac0-1dc3-4314-920e-3bd1c5b3811a',
53 'ext': 'mp4',
54 'title': 'RTL Nieuws - Meer beelden van overval juwelier',
ec85ded8 55 'thumbnail': r're:^https?://screenshots\.rtl\.nl/(?:[^/]+/)*sz=[0-9]+x[0-9]+/uuid=f536aac0-1dc3-4314-920e-3bd1c5b3811a$',
826a7da8
S
56 'timestamp': 1437233400,
57 'upload_date': '20150718',
58 'duration': 30.474,
59 },
60 'params': {
61 'skip_download': True,
62 },
9dfc4fa1 63 }, {
89d42c2c 64 # encrypted m3u8 streams, georestricted
9dfc4fa1
S
65 'url': 'http://www.rtlxl.nl/#!/afl-2-257632/52a74543-c504-4cde-8aa8-ec66fe8d68a7',
66 'only_matching': True,
a9d56c68
S
67 }, {
68 'url': 'http://www.rtl.nl/system/videoplayer/derden/embed.html#!/uuid=bb0353b0-d6a4-1dad-90e9-18fe75b8d1f0',
69 'only_matching': True,
7f2ed475
S
70 }, {
71 'url': 'http://rtlxl.nl/?_ga=1.204735956.572365465.1466978370#!/rtl-nieuws-132237/3c487912-023b-49ac-903e-2c5d79f8410f',
72 'only_matching': True,
59b8ab58 73 }]
6493f5d7
JMF
74
75 def _real_extract(self, url):
59b8ab58 76 uuid = self._match_id(url)
6493f5d7 77 info = self._download_json(
bea41c7f 78 'http://www.rtl.nl/system/s4m/vfd/version=2/uuid=%s/fmt=adaptive/' % uuid,
6493f5d7 79 uuid)
7adcbe75 80
6493f5d7 81 material = info['material'][0]
826a7da8
S
82 title = info['abstracts'][0]['name']
83 subtitle = material.get('title')
84 if subtitle:
85 title += ' - %s' % subtitle
86 description = material.get('synopsis')
6493f5d7 87
43232d5c
S
88 meta = info.get('meta', {})
89
a9e8f60e
S
90 # m3u8 streams are encrypted and may not be handled properly by older ffmpeg/avconv.
91 # To workaround this previously adaptive -> flash trick was used to obtain
92 # unencrypted m3u8 streams (see https://github.com/rg3/youtube-dl/issues/4118)
93 # and bypass georestrictions as well.
94 # Currently, unencrypted m3u8 playlists are (intentionally?) invalid and therefore
95 # unusable albeit can be fixed by simple string replacement (see
96 # https://github.com/rg3/youtube-dl/pull/6337)
97 # Since recent ffmpeg and avconv handle encrypted streams just fine encrypted
98 # streams are used now.
a8b7b260 99 videopath = material['videopath']
43232d5c 100 m3u8_url = meta.get('videohost', 'http://manifest.us.rtl.nl') + videopath
de2d9f5f 101
0571ffda
S
102 formats = self._extract_m3u8_formats(
103 m3u8_url, uuid, 'mp4', m3u8_id='hls', fatal=False)
de2d9f5f 104
a8b7b260 105 video_urlpart = videopath.split('/adaptive/')[1][:-5]
de2d9f5f
S
106 PG_URL_TEMPLATE = 'http://pg.us.rtl.nl/rtlxl/network/%s/progressive/%s.mp4'
107
0571ffda
S
108 PG_FORMATS = (
109 ('a2t', 512, 288),
110 ('a3t', 704, 400),
111 ('nettv', 1280, 720),
112 )
113
114 def pg_format(format_id, width, height):
115 return {
116 'url': PG_URL_TEMPLATE % (format_id, video_urlpart),
117 'format_id': 'pg-%s' % format_id,
118 'protocol': 'http',
119 'width': width,
120 'height': height,
de2d9f5f 121 }
0571ffda
S
122
123 if not formats:
124 formats = [pg_format(*pg_tuple) for pg_tuple in PG_FORMATS]
125 else:
126 pg_formats = []
127 for format_id, width, height in PG_FORMATS:
128 try:
129 # Find hls format with the same width and height corresponding
130 # to progressive format and copy metadata from it.
9e1b96ae
S
131 f = next(f for f in formats if f.get('height') == height)
132 # hls formats may have invalid width
133 f['width'] = width
134 f_copy = f.copy()
135 f_copy.update(pg_format(format_id, width, height))
136 pg_formats.append(f_copy)
0571ffda
S
137 except StopIteration:
138 # Missing hls format does mean that no progressive format with
139 # such width and height exists either.
140 pass
141 formats.extend(pg_formats)
37eddd31
NJ
142 self._sort_formats(formats)
143
59b8ab58 144 thumbnails = []
43232d5c 145
59b8ab58
PH
146 for p in ('poster_base_url', '"thumb_base_url"'):
147 if not meta.get(p):
148 continue
149
150 thumbnails.append({
151 'url': self._proto_relative_url(meta[p] + uuid),
152 'width': int_or_none(self._search_regex(
153 r'/sz=([0-9]+)', meta[p], 'thumbnail width', fatal=False)),
154 'height': int_or_none(self._search_regex(
155 r'/sz=[0-9]+x([0-9]+)',
156 meta[p], 'thumbnail height', fatal=False))
157 })
158
6493f5d7
JMF
159 return {
160 'id': uuid,
826a7da8 161 'title': title,
de2d9f5f 162 'formats': formats,
6493f5d7 163 'timestamp': material['original_date'],
59b8ab58 164 'description': description,
7adcbe75 165 'duration': parse_duration(material.get('duration')),
59b8ab58 166 'thumbnails': thumbnails,
6493f5d7 167 }