]>
Commit | Line | Data |
---|---|---|
1 | # coding: utf-8 | |
2 | from __future__ import unicode_literals | |
3 | ||
4 | from .common import InfoExtractor | |
5 | from ..utils import month_by_name | |
6 | ||
7 | ||
8 | class FranceInterIE(InfoExtractor): | |
9 | _VALID_URL = r'https?://(?:www\.)?franceinter\.fr/emissions/(?P<id>[^?#]+)' | |
10 | ||
11 | _TEST = { | |
12 | 'url': 'https://www.franceinter.fr/emissions/affaires-sensibles/affaires-sensibles-07-septembre-2016', | |
13 | 'md5': '9e54d7bdb6fdc02a841007f8a975c094', | |
14 | 'info_dict': { | |
15 | 'id': 'affaires-sensibles/affaires-sensibles-07-septembre-2016', | |
16 | 'ext': 'mp3', | |
17 | 'title': 'Affaire Cahuzac : le contentieux du compte en Suisse', | |
18 | 'description': 'md5:401969c5d318c061f86bda1fa359292b', | |
19 | 'thumbnail': r're:^https?://.*\.jpg', | |
20 | 'upload_date': '20160907', | |
21 | }, | |
22 | } | |
23 | ||
24 | def _real_extract(self, url): | |
25 | video_id = self._match_id(url) | |
26 | ||
27 | webpage = self._download_webpage(url, video_id) | |
28 | ||
29 | video_url = self._search_regex( | |
30 | r'(?s)<div[^>]+class=["\']page-diffusion["\'][^>]*>.*?<button[^>]+data-url=(["\'])(?P<url>(?:(?!\1).)+)\1', | |
31 | webpage, 'video url', group='url') | |
32 | ||
33 | title = self._og_search_title(webpage) | |
34 | description = self._og_search_description(webpage) | |
35 | thumbnail = self._html_search_meta(['og:image', 'twitter:image'], webpage) | |
36 | ||
37 | upload_date_str = self._search_regex( | |
38 | r'class=["\']\s*cover-emission-period\s*["\'][^>]*>[^<]+\s+(\d{1,2}\s+[^\s]+\s+\d{4})<', | |
39 | webpage, 'upload date', fatal=False) | |
40 | if upload_date_str: | |
41 | upload_date_list = upload_date_str.split() | |
42 | upload_date_list.reverse() | |
43 | upload_date_list[1] = '%02d' % (month_by_name(upload_date_list[1], lang='fr') or 0) | |
44 | upload_date_list[2] = '%02d' % int(upload_date_list[2]) | |
45 | upload_date = ''.join(upload_date_list) | |
46 | else: | |
47 | upload_date = None | |
48 | ||
49 | return { | |
50 | 'id': video_id, | |
51 | 'title': title, | |
52 | 'description': description, | |
53 | 'thumbnail': thumbnail, | |
54 | 'upload_date': upload_date, | |
55 | 'formats': [{ | |
56 | 'url': video_url, | |
57 | 'vcodec': 'none', | |
58 | }], | |
59 | } |