]>
Commit | Line | Data |
---|---|---|
50e93e03 | 1 | from .common import InfoExtractor |
2 | from ..compat import compat_str | |
3 | from ..utils import ( | |
4 | int_or_none, | |
5 | str_or_none, | |
6 | try_get, | |
7 | unified_timestamp, | |
8 | update_url_query, | |
9 | urljoin, | |
10 | ) | |
11 | ||
50e93e03 | 12 | |
13 | class CPACIE(InfoExtractor): | |
14 | IE_NAME = 'cpac' | |
15 | _VALID_URL = r'https?://(?:www\.)?cpac\.ca/(?P<fr>l-)?episode\?id=(?P<id>[\da-f]{8}(?:-[\da-f]{4}){3}-[\da-f]{12})' | |
16 | _TEST = { | |
17 | # 'url': 'http://www.cpac.ca/en/programs/primetime-politics/episodes/65490909', | |
18 | 'url': 'https://www.cpac.ca/episode?id=fc7edcae-4660-47e1-ba61-5b7f29a9db0f', | |
19 | 'md5': 'e46ad699caafd7aa6024279f2614e8fa', | |
20 | 'info_dict': { | |
21 | 'id': 'fc7edcae-4660-47e1-ba61-5b7f29a9db0f', | |
22 | 'ext': 'mp4', | |
23 | 'upload_date': '20220215', | |
24 | 'title': 'News Conference to Celebrate National Kindness Week – February 15, 2022', | |
25 | 'description': 'md5:466a206abd21f3a6f776cdef290c23fb', | |
26 | 'timestamp': 1644901200, | |
27 | }, | |
28 | 'params': { | |
29 | 'format': 'bestvideo', | |
30 | 'hls_prefer_native': True, | |
31 | }, | |
32 | } | |
33 | ||
34 | def _real_extract(self, url): | |
35 | video_id = self._match_id(url) | |
36 | url_lang = 'fr' if '/l-episode?' in url else 'en' | |
37 | ||
38 | content = self._download_json( | |
39 | 'https://www.cpac.ca/api/1/services/contentModel.json?url=/site/website/episode/index.xml&crafterSite=cpacca&id=' + video_id, | |
40 | video_id) | |
41 | video_url = try_get(content, lambda x: x['page']['details']['videoUrl'], compat_str) | |
42 | formats = [] | |
43 | if video_url: | |
44 | content = content['page'] | |
45 | title = str_or_none(content['details']['title_%s_t' % (url_lang, )]) | |
46 | formats = self._extract_m3u8_formats(video_url, video_id, m3u8_id='hls', ext='mp4') | |
47 | for fmt in formats: | |
48 | # prefer language to match URL | |
49 | fmt_lang = fmt.get('language') | |
50 | if fmt_lang == url_lang: | |
51 | fmt['language_preference'] = 10 | |
52 | elif not fmt_lang: | |
53 | fmt['language_preference'] = -1 | |
54 | else: | |
55 | fmt['language_preference'] = -10 | |
56 | ||
50e93e03 | 57 | category = str_or_none(content['details']['category_%s_t' % (url_lang, )]) |
58 | ||
59 | def is_live(v_type): | |
60 | return (v_type == 'live') if v_type is not None else None | |
61 | ||
62 | return { | |
63 | 'id': video_id, | |
64 | 'formats': formats, | |
65 | 'title': title, | |
66 | 'description': str_or_none(content['details'].get('description_%s_t' % (url_lang, ))), | |
67 | 'timestamp': unified_timestamp(content['details'].get('liveDateTime')), | |
68 | 'category': [category] if category else None, | |
69 | 'thumbnail': urljoin(url, str_or_none(content['details'].get('image_%s_s' % (url_lang, )))), | |
70 | 'is_live': is_live(content['details'].get('type')), | |
71 | } | |
72 | ||
73 | ||
74 | class CPACPlaylistIE(InfoExtractor): | |
75 | IE_NAME = 'cpac:playlist' | |
76 | _VALID_URL = r'(?i)https?://(?:www\.)?cpac\.ca/(?:program|search|(?P<fr>emission|rechercher))\?(?:[^&]+&)*?(?P<id>(?:id=\d+|programId=\d+|key=[^&]+))' | |
77 | ||
78 | _TESTS = [{ | |
79 | 'url': 'https://www.cpac.ca/program?id=6', | |
80 | 'info_dict': { | |
81 | 'id': 'id=6', | |
82 | 'title': 'Headline Politics', | |
83 | 'description': 'Watch CPAC’s signature long-form coverage of the day’s pressing political events as they unfold.', | |
84 | }, | |
85 | 'playlist_count': 10, | |
86 | }, { | |
87 | 'url': 'https://www.cpac.ca/search?key=hudson&type=all&order=desc', | |
88 | 'info_dict': { | |
89 | 'id': 'key=hudson', | |
90 | 'title': 'hudson', | |
91 | }, | |
92 | 'playlist_count': 22, | |
93 | }, { | |
94 | 'url': 'https://www.cpac.ca/search?programId=50', | |
95 | 'info_dict': { | |
96 | 'id': 'programId=50', | |
97 | 'title': '50', | |
98 | }, | |
99 | 'playlist_count': 9, | |
100 | }, { | |
101 | 'url': 'https://www.cpac.ca/emission?id=6', | |
102 | 'only_matching': True, | |
103 | }, { | |
104 | 'url': 'https://www.cpac.ca/rechercher?key=hudson&type=all&order=desc', | |
105 | 'only_matching': True, | |
106 | }] | |
107 | ||
108 | def _real_extract(self, url): | |
109 | video_id = self._match_id(url) | |
110 | url_lang = 'fr' if any(x in url for x in ('/emission?', '/rechercher?')) else 'en' | |
111 | pl_type, list_type = ('program', 'itemList') if any(x in url for x in ('/program?', '/emission?')) else ('search', 'searchResult') | |
112 | api_url = ( | |
113 | 'https://www.cpac.ca/api/1/services/contentModel.json?url=/site/website/%s/index.xml&crafterSite=cpacca&%s' | |
114 | % (pl_type, video_id, )) | |
115 | content = self._download_json(api_url, video_id) | |
116 | entries = [] | |
117 | total_pages = int_or_none(try_get(content, lambda x: x['page'][list_type]['totalPages']), default=1) | |
118 | for page in range(1, total_pages + 1): | |
119 | if page > 1: | |
120 | api_url = update_url_query(api_url, {'page': '%d' % (page, ), }) | |
121 | content = self._download_json( | |
122 | api_url, video_id, | |
123 | note='Downloading continuation - %d' % (page, ), | |
124 | fatal=False) | |
125 | ||
126 | for item in try_get(content, lambda x: x['page'][list_type]['item'], list) or []: | |
127 | episode_url = urljoin(url, try_get(item, lambda x: x['url_%s_s' % (url_lang, )])) | |
128 | if episode_url: | |
129 | entries.append(episode_url) | |
130 | ||
131 | return self.playlist_result( | |
132 | (self.url_result(entry) for entry in entries), | |
133 | playlist_id=video_id, | |
134 | playlist_title=try_get(content, lambda x: x['page']['program']['title_%s_t' % (url_lang, )]) or video_id.split('=')[-1], | |
135 | playlist_description=try_get(content, lambda x: x['page']['program']['description_%s_t' % (url_lang, )]), | |
136 | ) |