]>
Commit | Line | Data |
---|---|---|
1 | from .common import InfoExtractor | |
2 | from ..utils import ( | |
3 | clean_html, | |
4 | dict_get, | |
5 | try_get, | |
6 | unified_strdate, | |
7 | ) | |
8 | ||
9 | ||
10 | class CanalAlphaIE(InfoExtractor): | |
11 | _VALID_URL = r'https?://(?:www\.)?canalalpha\.ch/play/[^/]+/[^/]+/(?P<id>\d+)/?.*' | |
12 | ||
13 | _TESTS = [{ | |
14 | 'url': 'https://www.canalalpha.ch/play/le-journal/episode/24520/jeudi-28-octobre-2021', | |
15 | 'info_dict': { | |
16 | 'id': '24520', | |
17 | 'ext': 'mp4', | |
18 | 'title': 'Jeudi 28 octobre 2021', | |
19 | 'description': 'md5:d30c6c3e53f8ad40d405379601973b30', | |
20 | 'thumbnail': 'https://static.canalalpha.ch/poster/journal/journal_20211028.jpg', | |
21 | 'upload_date': '20211028', | |
22 | 'duration': 1125, | |
23 | }, | |
24 | 'params': {'skip_download': True}, | |
25 | }, { | |
26 | 'url': 'https://www.canalalpha.ch/play/le-journal/topic/24512/la-poste-fait-de-neuchatel-un-pole-cryptographique', | |
27 | 'info_dict': { | |
28 | 'id': '24512', | |
29 | 'ext': 'mp4', | |
30 | 'title': 'La Poste fait de Neuchâtel un pôle cryptographique', | |
31 | 'description': 'md5:4ba63ae78a0974d1a53d6703b6e1dedf', | |
32 | 'thumbnail': 'https://static.canalalpha.ch/poster/news/news_39712.jpg', | |
33 | 'upload_date': '20211028', | |
34 | 'duration': 138, | |
35 | }, | |
36 | 'params': {'skip_download': True}, | |
37 | }, { | |
38 | 'url': 'https://www.canalalpha.ch/play/eureka/episode/24484/ces-innovations-qui-veulent-rendre-lagriculture-plus-durable', | |
39 | 'info_dict': { | |
40 | 'id': '24484', | |
41 | 'ext': 'mp4', | |
42 | 'title': 'Ces innovations qui veulent rendre l’agriculture plus durable', | |
43 | 'description': 'md5:85d594a3b5dc6ccfc4a85aba6e73b129', | |
44 | 'thumbnail': 'https://static.canalalpha.ch/poster/magazine/magazine_10236.jpg', | |
45 | 'upload_date': '20211026', | |
46 | 'duration': 360, | |
47 | }, | |
48 | 'params': {'skip_download': True}, | |
49 | }, { | |
50 | 'url': 'https://www.canalalpha.ch/play/avec-le-temps/episode/23516/redonner-de-leclat-grace-au-polissage', | |
51 | 'info_dict': { | |
52 | 'id': '23516', | |
53 | 'ext': 'mp4', | |
54 | 'title': 'Redonner de l\'éclat grâce au polissage', | |
55 | 'description': 'md5:0d8fbcda1a5a4d6f6daa3165402177e1', | |
56 | 'thumbnail': 'https://static.canalalpha.ch/poster/magazine/magazine_9990.png', | |
57 | 'upload_date': '20210726', | |
58 | 'duration': 360, | |
59 | }, | |
60 | 'params': {'skip_download': True}, | |
61 | }, { | |
62 | 'url': 'https://www.canalalpha.ch/play/le-journal/topic/33500/encore-des-mesures-deconomie-dans-le-jura', | |
63 | 'info_dict': { | |
64 | 'id': '33500', | |
65 | 'ext': 'mp4', | |
66 | 'title': 'Encore des mesures d\'économie dans le Jura', | |
67 | 'description': 'md5:938b5b556592f2d1b9ab150268082a80', | |
68 | 'thumbnail': 'https://static.canalalpha.ch/poster/news/news_46665.jpg', | |
69 | 'upload_date': '20240411', | |
70 | 'duration': 105, | |
71 | }, | |
72 | }] | |
73 | ||
74 | def _real_extract(self, url): | |
75 | video_id = self._match_id(url) | |
76 | webpage = self._download_webpage(url, video_id) | |
77 | data_json = self._parse_json(self._search_regex( | |
78 | r'window\.__SERVER_STATE__\s?=\s?({(?:(?!};)[^"]|"([^"]|\\")*")+})\s?;', | |
79 | webpage, 'data_json'), video_id)['1']['data']['data'] | |
80 | manifests = try_get(data_json, lambda x: x['video']['manifests'], expected_type=dict) or {} | |
81 | subtitles = {} | |
82 | formats = [{ | |
83 | 'url': video['$url'], | |
84 | 'ext': 'mp4', | |
85 | 'width': try_get(video, lambda x: x['res']['width'], expected_type=int), | |
86 | 'height': try_get(video, lambda x: x['res']['height'], expected_type=int), | |
87 | } for video in try_get(data_json, lambda x: x['video']['mp4'], expected_type=list) or [] if video.get('$url')] | |
88 | if manifests.get('hls'): | |
89 | fmts, subs = self._extract_m3u8_formats_and_subtitles( | |
90 | manifests['hls'], video_id, m3u8_id='hls', fatal=False) | |
91 | formats.extend(fmts) | |
92 | self._merge_subtitles(subs, target=subtitles) | |
93 | if manifests.get('dash'): | |
94 | fmts, subs = self._extract_mpd_formats_and_subtitles( | |
95 | manifests['dash'], video_id, mpd_id='dash', fatal=False) | |
96 | formats.extend(fmts) | |
97 | self._merge_subtitles(subs, target=subtitles) | |
98 | return { | |
99 | 'id': video_id, | |
100 | 'title': data_json.get('title').strip(), | |
101 | 'description': clean_html(dict_get(data_json, ('longDesc', 'shortDesc'))), | |
102 | 'thumbnail': data_json.get('poster'), | |
103 | 'upload_date': unified_strdate(dict_get(data_json, ('webPublishAt', 'featuredAt', 'diffusionDate'))), | |
104 | 'duration': try_get(data_json, lambda x: x['video']['duration'], expected_type=int), | |
105 | 'formats': formats, | |
106 | 'subtitles': subtitles, | |
107 | } |