]>
Commit | Line | Data |
---|---|---|
cce929ea PH |
1 | # coding: utf-8 |
2 | from __future__ import unicode_literals | |
3 | ||
cce929ea | 4 | from .common import InfoExtractor |
23d9ded6 PH |
5 | from ..utils import ( |
6 | determine_ext, | |
31a12140 RA |
7 | extract_attributes, |
8 | int_or_none, | |
23d9ded6 | 9 | ) |
cce929ea PH |
10 | |
11 | ||
12 | class FranceCultureIE(InfoExtractor): | |
82997dad | 13 | _VALID_URL = r'https?://(?:www\.)?franceculture\.fr/emissions/(?:[^/]+/)*(?P<id>[^/?#&]+)' |
cce929ea | 14 | _TEST = { |
82997dad | 15 | 'url': 'http://www.franceculture.fr/emissions/carnet-nomade/rendez-vous-au-pays-des-geeks', |
cce929ea | 16 | 'info_dict': { |
82997dad S |
17 | 'id': 'rendez-vous-au-pays-des-geeks', |
18 | 'display_id': 'rendez-vous-au-pays-des-geeks', | |
cce929ea PH |
19 | 'ext': 'mp3', |
20 | 'title': 'Rendez-vous au pays des geeks', | |
ec85ded8 | 21 | 'thumbnail': r're:^https?://.*\.jpg$', |
cce929ea | 22 | 'upload_date': '20140301', |
31a12140 | 23 | 'timestamp': 1393642916, |
82997dad | 24 | 'vcodec': 'none', |
cce929ea PH |
25 | } |
26 | } | |
27 | ||
82997dad S |
28 | def _real_extract(self, url): |
29 | display_id = self._match_id(url) | |
23d9ded6 | 30 | |
82997dad | 31 | webpage = self._download_webpage(url, display_id) |
cce929ea | 32 | |
31a12140 RA |
33 | video_data = extract_attributes(self._search_regex( |
34 | r'(?s)<div[^>]+class="[^"]*?(?:title-zone-diffusion|heading-zone-(?:wrapper|player-button))[^"]*?"[^>]*>.*?(<button[^>]+data-asset-source="[^"]+"[^>]+>)', | |
35 | webpage, 'video data')) | |
ecf17d16 | 36 | |
31a12140 RA |
37 | video_url = video_data['data-asset-source'] |
38 | title = video_data.get('data-asset-title') or self._og_search_title(webpage) | |
cce929ea | 39 | |
31a12140 RA |
40 | description = self._html_search_regex( |
41 | r'(?s)<div[^>]+class="intro"[^>]*>.*?<h2>(.+?)</h2>', | |
42 | webpage, 'description', default=None) | |
82997dad | 43 | thumbnail = self._search_regex( |
31a12140 | 44 | r'(?s)<figure[^>]+itemtype="https://schema.org/ImageObject"[^>]*>.*?<img[^>]+(?:data-dejavu-)?src="([^"]+)"', |
82997dad | 45 | webpage, 'thumbnail', fatal=False) |
23d9ded6 | 46 | uploader = self._html_search_regex( |
31a12140 | 47 | r'(?s)<span class="author">(.*?)</span>', |
23d9ded6 | 48 | webpage, 'uploader', default=None) |
31a12140 | 49 | ext = determine_ext(video_url.lower()) |
cce929ea PH |
50 | |
51 | return { | |
82997dad S |
52 | 'id': display_id, |
53 | 'display_id': display_id, | |
cce929ea | 54 | 'url': video_url, |
cce929ea | 55 | 'title': title, |
31a12140 | 56 | 'description': description, |
cce929ea | 57 | 'thumbnail': thumbnail, |
31a12140 RA |
58 | 'ext': ext, |
59 | 'vcodec': 'none' if ext == 'mp3' else None, | |
82997dad | 60 | 'uploader': uploader, |
31a12140 RA |
61 | 'timestamp': int_or_none(video_data.get('data-asset-created-date')), |
62 | 'duration': int_or_none(video_data.get('data-duration')), | |
cce929ea | 63 | } |