]> jfr.im git - yt-dlp.git/blame - youtube_dl/extractor/radiofrance.py
Merge pull request #8061 from dstftw/introduce-chapter-and-series-fields
[yt-dlp.git] / youtube_dl / extractor / radiofrance.py
CommitLineData
0e2a436d 1# coding: utf-8
51fb2e98
PH
2from __future__ import unicode_literals
3
0e2a436d
PH
4import re
5
6from .common import InfoExtractor
0e2a436d
PH
7
8
9class RadioFranceIE(InfoExtractor):
10 _VALID_URL = r'^https?://maison\.radiofrance\.fr/radiovisions/(?P<id>[^?#]+)'
51fb2e98 11 IE_NAME = 'radiofrance'
0e2a436d
PH
12
13 _TEST = {
51fb2e98
PH
14 'url': 'http://maison.radiofrance.fr/radiovisions/one-one',
15 'md5': 'bdbb28ace95ed0e04faab32ba3160daf',
16 'info_dict': {
17 'id': 'one-one',
18 'ext': 'ogg',
19 "title": "One to one",
20 "description": "Plutôt que d'imaginer la radio de demain comme technologie ou comme création de contenu, je veux montrer que quelles que soient ses évolutions, j'ai l'intime conviction que la radio continuera d'être un grand média de proximité pour les auditeurs.",
21 "uploader": "Thomas Hercouët",
0e2a436d
PH
22 },
23 }
24
25 def _real_extract(self, url):
26 m = re.match(self._VALID_URL, url)
27 video_id = m.group('id')
28
29 webpage = self._download_webpage(url, video_id)
51fb2e98 30 title = self._html_search_regex(r'<h1>(.*?)</h1>', webpage, 'title')
0e2a436d
PH
31 description = self._html_search_regex(
32 r'<div class="bloc_page_wrapper"><div class="text">(.*?)</div>',
51fb2e98 33 webpage, 'description', fatal=False)
0e2a436d
PH
34 uploader = self._html_search_regex(
35 r'<div class="credit">&nbsp;&nbsp;&copy;&nbsp;(.*?)</div>',
51fb2e98 36 webpage, 'uploader', fatal=False)
0e2a436d
PH
37
38 formats_str = self._html_search_regex(
39 r'class="jp-jplayer[^"]*" data-source="([^"]+)">',
51fb2e98 40 webpage, 'audio URLs')
0e2a436d
PH
41 formats = [
42 {
7de6e075
PH
43 'format_id': fm[0],
44 'url': fm[1],
0e2a436d 45 'vcodec': 'none',
51fb2e98 46 'preference': i,
0e2a436d 47 }
51fb2e98
PH
48 for i, fm in
49 enumerate(re.findall(r"([a-z0-9]+)\s*:\s*'([^']+)'", formats_str))
0e2a436d 50 ]
51fb2e98 51 self._sort_formats(formats)
0e2a436d
PH
52
53 return {
54 'id': video_id,
55 'title': title,
56 'formats': formats,
57 'description': description,
58 'uploader': uploader,
59 }