]> jfr.im git - yt-dlp.git/blame - yt_dlp/extractor/radiofrance.py
[extractor/youtube] Bring back `_extract_chapters_from_description`
[yt-dlp.git] / yt_dlp / extractor / radiofrance.py
CommitLineData
0e2a436d
PH
1import re
2
3from .common import InfoExtractor
0e2a436d
PH
4
5
6class RadioFranceIE(InfoExtractor):
7 _VALID_URL = r'^https?://maison\.radiofrance\.fr/radiovisions/(?P<id>[^?#]+)'
51fb2e98 8 IE_NAME = 'radiofrance'
0e2a436d
PH
9
10 _TEST = {
51fb2e98
PH
11 'url': 'http://maison.radiofrance.fr/radiovisions/one-one',
12 'md5': 'bdbb28ace95ed0e04faab32ba3160daf',
13 'info_dict': {
14 'id': 'one-one',
15 'ext': 'ogg',
611c1dd9
S
16 'title': 'One to one',
17 'description': "Plutôt que d'imaginer la radio de demain comme technologie ou comme création de contenu, je veux montrer que quelles que soient ses évolutions, j'ai l'intime conviction que la radio continuera d'être un grand média de proximité pour les auditeurs.",
18 'uploader': 'Thomas Hercouët',
0e2a436d
PH
19 },
20 }
21
22 def _real_extract(self, url):
5ad28e7f 23 m = self._match_valid_url(url)
0e2a436d
PH
24 video_id = m.group('id')
25
26 webpage = self._download_webpage(url, video_id)
51fb2e98 27 title = self._html_search_regex(r'<h1>(.*?)</h1>', webpage, 'title')
0e2a436d
PH
28 description = self._html_search_regex(
29 r'<div class="bloc_page_wrapper"><div class="text">(.*?)</div>',
51fb2e98 30 webpage, 'description', fatal=False)
0e2a436d
PH
31 uploader = self._html_search_regex(
32 r'<div class="credit">&nbsp;&nbsp;&copy;&nbsp;(.*?)</div>',
51fb2e98 33 webpage, 'uploader', fatal=False)
0e2a436d
PH
34
35 formats_str = self._html_search_regex(
36 r'class="jp-jplayer[^"]*" data-source="([^"]+)">',
51fb2e98 37 webpage, 'audio URLs')
0e2a436d
PH
38 formats = [
39 {
7de6e075
PH
40 'format_id': fm[0],
41 'url': fm[1],
0e2a436d 42 'vcodec': 'none',
f983b875 43 'quality': i,
0e2a436d 44 }
51fb2e98
PH
45 for i, fm in
46 enumerate(re.findall(r"([a-z0-9]+)\s*:\s*'([^']+)'", formats_str))
0e2a436d 47 ]
51fb2e98 48 self._sort_formats(formats)
0e2a436d
PH
49
50 return {
51 'id': video_id,
52 'title': title,
53 'formats': formats,
54 'description': description,
55 'uploader': uploader,
56 }