]>
Commit | Line | Data |
---|---|---|
9d63137e AG |
1 | from .common import InfoExtractor |
2 | from ..utils import ( | |
3 | clean_html, | |
4 | dict_get, | |
5 | try_get, | |
6 | unified_strdate, | |
7 | ) | |
8 | ||
9 | ||
10 | class CanalAlphaIE(InfoExtractor): | |
73f035e1 | 11 | _VALID_URL = r'https?://(?:www\.)?canalalpha\.ch/play/[^/]+/[^/]+/(?P<id>\d+)/?.*' |
9d63137e AG |
12 | |
13 | _TESTS = [{ | |
14 | 'url': 'https://www.canalalpha.ch/play/le-journal/episode/24520/jeudi-28-octobre-2021', | |
15 | 'info_dict': { | |
16 | 'id': '24520', | |
17 | 'ext': 'mp4', | |
18 | 'title': 'Jeudi 28 octobre 2021', | |
19 | 'description': 'md5:d30c6c3e53f8ad40d405379601973b30', | |
20 | 'thumbnail': 'https://static.canalalpha.ch/poster/journal/journal_20211028.jpg', | |
21 | 'upload_date': '20211028', | |
22 | 'duration': 1125, | |
23 | }, | |
24 | 'params': {'skip_download': True} | |
25 | }, { | |
26 | 'url': 'https://www.canalalpha.ch/play/le-journal/topic/24512/la-poste-fait-de-neuchatel-un-pole-cryptographique', | |
27 | 'info_dict': { | |
28 | 'id': '24512', | |
29 | 'ext': 'mp4', | |
30 | 'title': 'La Poste fait de Neuchâtel un pôle cryptographique', | |
31 | 'description': 'md5:4ba63ae78a0974d1a53d6703b6e1dedf', | |
32 | 'thumbnail': 'https://static.canalalpha.ch/poster/news/news_39712.jpg', | |
33 | 'upload_date': '20211028', | |
34 | 'duration': 138, | |
35 | }, | |
36 | 'params': {'skip_download': True} | |
37 | }, { | |
38 | 'url': 'https://www.canalalpha.ch/play/eureka/episode/24484/ces-innovations-qui-veulent-rendre-lagriculture-plus-durable', | |
39 | 'info_dict': { | |
40 | 'id': '24484', | |
41 | 'ext': 'mp4', | |
42 | 'title': 'Ces innovations qui veulent rendre l’agriculture plus durable', | |
43 | 'description': 'md5:3de3f151180684621e85be7c10e4e613', | |
44 | 'thumbnail': 'https://static.canalalpha.ch/poster/magazine/magazine_10236.jpg', | |
45 | 'upload_date': '20211026', | |
46 | 'duration': 360, | |
47 | }, | |
48 | 'params': {'skip_download': True} | |
49 | }, { | |
50 | 'url': 'https://www.canalalpha.ch/play/avec-le-temps/episode/23516/redonner-de-leclat-grace-au-polissage', | |
51 | 'info_dict': { | |
52 | 'id': '23516', | |
53 | 'ext': 'mp4', | |
54 | 'title': 'Redonner de l\'éclat grâce au polissage', | |
55 | 'description': 'md5:0d8fbcda1a5a4d6f6daa3165402177e1', | |
56 | 'thumbnail': 'https://static.canalalpha.ch/poster/magazine/magazine_9990.png', | |
57 | 'upload_date': '20210726', | |
58 | 'duration': 360, | |
59 | }, | |
60 | 'params': {'skip_download': True} | |
61 | }] | |
62 | ||
63 | def _real_extract(self, url): | |
64 | id = self._match_id(url) | |
65 | webpage = self._download_webpage(url, id) | |
66 | data_json = self._parse_json(self._search_regex( | |
67 | r'window\.__SERVER_STATE__\s?=\s?({(?:(?!};)[^"]|"([^"]|\\")*")+})\s?;', | |
68 | webpage, 'data_json'), id)['1']['data']['data'] | |
69 | manifests = try_get(data_json, lambda x: x['video']['manifests'], expected_type=dict) or {} | |
70 | subtitles = {} | |
71 | formats = [{ | |
72 | 'url': video['$url'], | |
73 | 'ext': 'mp4', | |
74 | 'width': try_get(video, lambda x: x['res']['width'], expected_type=int), | |
75 | 'height': try_get(video, lambda x: x['res']['height'], expected_type=int), | |
76 | } for video in try_get(data_json, lambda x: x['video']['mp4'], expected_type=list) or [] if video.get('$url')] | |
77 | if manifests.get('hls'): | |
6970b600 | 78 | m3u8_frmts, m3u8_subs = self._parse_m3u8_formats_and_subtitles(manifests['hls'], video_id=id) |
9d63137e AG |
79 | formats.extend(m3u8_frmts) |
80 | subtitles = self._merge_subtitles(subtitles, m3u8_subs) | |
81 | if manifests.get('dash'): | |
6970b600 | 82 | dash_frmts, dash_subs = self._parse_mpd_formats_and_subtitles(manifests['dash']) |
9d63137e AG |
83 | formats.extend(dash_frmts) |
84 | subtitles = self._merge_subtitles(subtitles, dash_subs) | |
85 | self._sort_formats(formats) | |
86 | return { | |
87 | 'id': id, | |
88 | 'title': data_json.get('title').strip(), | |
89 | 'description': clean_html(dict_get(data_json, ('longDesc', 'shortDesc'))), | |
90 | 'thumbnail': data_json.get('poster'), | |
91 | 'upload_date': unified_strdate(dict_get(data_json, ('webPublishAt', 'featuredAt', 'diffusionDate'))), | |
92 | 'duration': try_get(data_json, lambda x: x['video']['duration'], expected_type=int), | |
93 | 'formats': formats, | |
94 | 'subtitles': subtitles, | |
95 | } |