]> jfr.im git - yt-dlp.git/blob - yt_dlp/extractor/rtbf.py
[extractor] Standardize `_live_title`
[yt-dlp.git] / yt_dlp / extractor / rtbf.py
1 # coding: utf-8
2 from __future__ import unicode_literals
3
4 import re
5
6 from .common import InfoExtractor
7 from ..utils import (
8 ExtractorError,
9 float_or_none,
10 int_or_none,
11 strip_or_none,
12 )
13
14
15 class RTBFIE(InfoExtractor):
16 _VALID_URL = r'''(?x)
17 https?://(?:www\.)?rtbf\.be/
18 (?:
19 video/[^?]+\?.*\bid=|
20 ouftivi/(?:[^/]+/)*[^?]+\?.*\bvideoId=|
21 auvio/[^/]+\?.*\b(?P<live>l)?id=
22 )(?P<id>\d+)'''
23 _TESTS = [{
24 'url': 'https://www.rtbf.be/video/detail_les-diables-au-coeur-episode-2?id=1921274',
25 'md5': '8c876a1cceeb6cf31b476461ade72384',
26 'info_dict': {
27 'id': '1921274',
28 'ext': 'mp4',
29 'title': 'Les Diables au coeur (épisode 2)',
30 'description': '(du 25/04/2014)',
31 'duration': 3099.54,
32 'upload_date': '20140425',
33 'timestamp': 1398456300,
34 }
35 }, {
36 # geo restricted
37 'url': 'http://www.rtbf.be/ouftivi/heros/detail_scooby-doo-mysteres-associes?id=1097&videoId=2057442',
38 'only_matching': True,
39 }, {
40 'url': 'http://www.rtbf.be/ouftivi/niouzz?videoId=2055858',
41 'only_matching': True,
42 }, {
43 'url': 'http://www.rtbf.be/auvio/detail_jeudi-en-prime-siegfried-bracke?id=2102996',
44 'only_matching': True,
45 }, {
46 # Live
47 'url': 'https://www.rtbf.be/auvio/direct_pure-fm?lid=134775',
48 'only_matching': True,
49 }, {
50 # Audio
51 'url': 'https://www.rtbf.be/auvio/detail_cinq-heures-cinema?id=2360811',
52 'only_matching': True,
53 }, {
54 # With Subtitle
55 'url': 'https://www.rtbf.be/auvio/detail_les-carnets-du-bourlingueur?id=2361588',
56 'only_matching': True,
57 }]
58 _IMAGE_HOST = 'http://ds1.ds.static.rtbf.be'
59 _PROVIDERS = {
60 'YOUTUBE': 'Youtube',
61 'DAILYMOTION': 'Dailymotion',
62 'VIMEO': 'Vimeo',
63 }
64 _QUALITIES = [
65 ('mobile', 'SD'),
66 ('web', 'MD'),
67 ('high', 'HD'),
68 ]
69
70 def _real_extract(self, url):
71 live, media_id = self._match_valid_url(url).groups()
72 embed_page = self._download_webpage(
73 'https://www.rtbf.be/auvio/embed/' + ('direct' if live else 'media'),
74 media_id, query={'id': media_id})
75 data = self._parse_json(self._html_search_regex(
76 r'data-media="([^"]+)"', embed_page, 'media data'), media_id)
77
78 error = data.get('error')
79 if error:
80 raise ExtractorError('%s said: %s' % (self.IE_NAME, error), expected=True)
81
82 provider = data.get('provider')
83 if provider in self._PROVIDERS:
84 return self.url_result(data['url'], self._PROVIDERS[provider])
85
86 title = data['title']
87 is_live = data.get('isLive')
88 height_re = r'-(\d+)p\.'
89 formats = []
90
91 m3u8_url = data.get('urlHlsAes128') or data.get('urlHls')
92 if m3u8_url:
93 formats.extend(self._extract_m3u8_formats(
94 m3u8_url, media_id, 'mp4', m3u8_id='hls', fatal=False))
95
96 fix_url = lambda x: x.replace('//rtbf-vod.', '//rtbf.') if '/geo/drm/' in x else x
97 http_url = data.get('url')
98 if formats and http_url and re.search(height_re, http_url):
99 http_url = fix_url(http_url)
100 for m3u8_f in formats[:]:
101 height = m3u8_f.get('height')
102 if not height:
103 continue
104 f = m3u8_f.copy()
105 del f['protocol']
106 f.update({
107 'format_id': m3u8_f['format_id'].replace('hls-', 'http-'),
108 'url': re.sub(height_re, '-%dp.' % height, http_url),
109 })
110 formats.append(f)
111 else:
112 sources = data.get('sources') or {}
113 for key, format_id in self._QUALITIES:
114 format_url = sources.get(key)
115 if not format_url:
116 continue
117 height = int_or_none(self._search_regex(
118 height_re, format_url, 'height', default=None))
119 formats.append({
120 'format_id': format_id,
121 'url': fix_url(format_url),
122 'height': height,
123 })
124
125 mpd_url = data.get('urlDash')
126 if mpd_url and (self.get_param('allow_unplayable_formats') or not data.get('drm')):
127 formats.extend(self._extract_mpd_formats(
128 mpd_url, media_id, mpd_id='dash', fatal=False))
129
130 audio_url = data.get('urlAudio')
131 if audio_url:
132 formats.append({
133 'format_id': 'audio',
134 'url': audio_url,
135 'vcodec': 'none',
136 })
137 self._sort_formats(formats)
138
139 subtitles = {}
140 for track in (data.get('tracks') or {}).values():
141 sub_url = track.get('url')
142 if not sub_url:
143 continue
144 subtitles.setdefault(track.get('lang') or 'fr', []).append({
145 'url': sub_url,
146 })
147
148 return {
149 'id': media_id,
150 'formats': formats,
151 'title': title,
152 'description': strip_or_none(data.get('description')),
153 'thumbnail': data.get('thumbnail'),
154 'duration': float_or_none(data.get('realDuration')),
155 'timestamp': int_or_none(data.get('liveFrom')),
156 'series': data.get('programLabel'),
157 'subtitles': subtitles,
158 'is_live': is_live,
159 }