]>
Commit | Line | Data |
---|---|---|
cce929ea PH |
1 | # coding: utf-8 |
2 | from __future__ import unicode_literals | |
3 | ||
cce929ea | 4 | from .common import InfoExtractor |
23d9ded6 PH |
5 | from ..utils import ( |
6 | determine_ext, | |
31a12140 RA |
7 | extract_attributes, |
8 | int_or_none, | |
23d9ded6 | 9 | ) |
cce929ea PH |
10 | |
11 | ||
12 | class FranceCultureIE(InfoExtractor): | |
82997dad | 13 | _VALID_URL = r'https?://(?:www\.)?franceculture\.fr/emissions/(?:[^/]+/)*(?P<id>[^/?#&]+)' |
a820dc72 | 14 | _TESTS = [{ |
82997dad | 15 | 'url': 'http://www.franceculture.fr/emissions/carnet-nomade/rendez-vous-au-pays-des-geeks', |
cce929ea | 16 | 'info_dict': { |
82997dad S |
17 | 'id': 'rendez-vous-au-pays-des-geeks', |
18 | 'display_id': 'rendez-vous-au-pays-des-geeks', | |
cce929ea PH |
19 | 'ext': 'mp3', |
20 | 'title': 'Rendez-vous au pays des geeks', | |
ec85ded8 | 21 | 'thumbnail': r're:^https?://.*\.jpg$', |
cce929ea | 22 | 'upload_date': '20140301', |
a820dc72 | 23 | 'timestamp': 1393700400, |
82997dad | 24 | 'vcodec': 'none', |
cce929ea | 25 | } |
a820dc72 RA |
26 | }, { |
27 | # no thumbnail | |
28 | 'url': 'https://www.franceculture.fr/emissions/la-recherche-montre-en-main/la-recherche-montre-en-main-du-mercredi-10-octobre-2018', | |
29 | 'only_matching': True, | |
30 | }] | |
cce929ea | 31 | |
82997dad S |
32 | def _real_extract(self, url): |
33 | display_id = self._match_id(url) | |
23d9ded6 | 34 | |
82997dad | 35 | webpage = self._download_webpage(url, display_id) |
cce929ea | 36 | |
31a12140 | 37 | video_data = extract_attributes(self._search_regex( |
50d19895 S |
38 | r'''(?sx) |
39 | (?: | |
40 | </h1>| | |
41 | <div[^>]+class="[^"]*?(?:title-zone-diffusion|heading-zone-(?:wrapper|player-button))[^"]*?"[^>]*> | |
42 | ).*? | |
a820dc72 | 43 | (<button[^>]+data-(?:url|asset-source)="[^"]+"[^>]+>) |
50d19895 | 44 | ''', |
31a12140 | 45 | webpage, 'video data')) |
ecf17d16 | 46 | |
a820dc72 RA |
47 | video_url = video_data.get('data-url') or video_data['data-asset-source'] |
48 | title = video_data.get('data-asset-title') or video_data.get('data-diffusion-title') or self._og_search_title(webpage) | |
cce929ea | 49 | |
31a12140 RA |
50 | description = self._html_search_regex( |
51 | r'(?s)<div[^>]+class="intro"[^>]*>.*?<h2>(.+?)</h2>', | |
52 | webpage, 'description', default=None) | |
82997dad | 53 | thumbnail = self._search_regex( |
31a12140 | 54 | r'(?s)<figure[^>]+itemtype="https://schema.org/ImageObject"[^>]*>.*?<img[^>]+(?:data-dejavu-)?src="([^"]+)"', |
a820dc72 | 55 | webpage, 'thumbnail', default=None) |
23d9ded6 | 56 | uploader = self._html_search_regex( |
31a12140 | 57 | r'(?s)<span class="author">(.*?)</span>', |
23d9ded6 | 58 | webpage, 'uploader', default=None) |
31a12140 | 59 | ext = determine_ext(video_url.lower()) |
cce929ea PH |
60 | |
61 | return { | |
82997dad S |
62 | 'id': display_id, |
63 | 'display_id': display_id, | |
cce929ea | 64 | 'url': video_url, |
cce929ea | 65 | 'title': title, |
31a12140 | 66 | 'description': description, |
cce929ea | 67 | 'thumbnail': thumbnail, |
31a12140 RA |
68 | 'ext': ext, |
69 | 'vcodec': 'none' if ext == 'mp3' else None, | |
82997dad | 70 | 'uploader': uploader, |
a820dc72 | 71 | 'timestamp': int_or_none(video_data.get('data-start-time')) or int_or_none(video_data.get('data-asset-created-date')), |
31a12140 | 72 | 'duration': int_or_none(video_data.get('data-duration')), |
cce929ea | 73 | } |