]>
Commit | Line | Data |
---|---|---|
50e93e03 | 1 | # coding: utf-8 |
2 | from __future__ import unicode_literals | |
3 | ||
4 | from .common import InfoExtractor | |
5 | from ..compat import compat_str | |
6 | from ..utils import ( | |
7 | int_or_none, | |
8 | str_or_none, | |
9 | try_get, | |
10 | unified_timestamp, | |
11 | update_url_query, | |
12 | urljoin, | |
13 | ) | |
14 | ||
15 | # compat_range | |
16 | try: | |
17 | if callable(xrange): | |
18 | range = xrange | |
19 | except (NameError, TypeError): | |
20 | pass | |
21 | ||
22 | ||
23 | class CPACIE(InfoExtractor): | |
24 | IE_NAME = 'cpac' | |
25 | _VALID_URL = r'https?://(?:www\.)?cpac\.ca/(?P<fr>l-)?episode\?id=(?P<id>[\da-f]{8}(?:-[\da-f]{4}){3}-[\da-f]{12})' | |
26 | _TEST = { | |
27 | # 'url': 'http://www.cpac.ca/en/programs/primetime-politics/episodes/65490909', | |
28 | 'url': 'https://www.cpac.ca/episode?id=fc7edcae-4660-47e1-ba61-5b7f29a9db0f', | |
29 | 'md5': 'e46ad699caafd7aa6024279f2614e8fa', | |
30 | 'info_dict': { | |
31 | 'id': 'fc7edcae-4660-47e1-ba61-5b7f29a9db0f', | |
32 | 'ext': 'mp4', | |
33 | 'upload_date': '20220215', | |
34 | 'title': 'News Conference to Celebrate National Kindness Week – February 15, 2022', | |
35 | 'description': 'md5:466a206abd21f3a6f776cdef290c23fb', | |
36 | 'timestamp': 1644901200, | |
37 | }, | |
38 | 'params': { | |
39 | 'format': 'bestvideo', | |
40 | 'hls_prefer_native': True, | |
41 | }, | |
42 | } | |
43 | ||
44 | def _real_extract(self, url): | |
45 | video_id = self._match_id(url) | |
46 | url_lang = 'fr' if '/l-episode?' in url else 'en' | |
47 | ||
48 | content = self._download_json( | |
49 | 'https://www.cpac.ca/api/1/services/contentModel.json?url=/site/website/episode/index.xml&crafterSite=cpacca&id=' + video_id, | |
50 | video_id) | |
51 | video_url = try_get(content, lambda x: x['page']['details']['videoUrl'], compat_str) | |
52 | formats = [] | |
53 | if video_url: | |
54 | content = content['page'] | |
55 | title = str_or_none(content['details']['title_%s_t' % (url_lang, )]) | |
56 | formats = self._extract_m3u8_formats(video_url, video_id, m3u8_id='hls', ext='mp4') | |
57 | for fmt in formats: | |
58 | # prefer language to match URL | |
59 | fmt_lang = fmt.get('language') | |
60 | if fmt_lang == url_lang: | |
61 | fmt['language_preference'] = 10 | |
62 | elif not fmt_lang: | |
63 | fmt['language_preference'] = -1 | |
64 | else: | |
65 | fmt['language_preference'] = -10 | |
66 | ||
67 | self._sort_formats(formats) | |
68 | ||
69 | category = str_or_none(content['details']['category_%s_t' % (url_lang, )]) | |
70 | ||
71 | def is_live(v_type): | |
72 | return (v_type == 'live') if v_type is not None else None | |
73 | ||
74 | return { | |
75 | 'id': video_id, | |
76 | 'formats': formats, | |
77 | 'title': title, | |
78 | 'description': str_or_none(content['details'].get('description_%s_t' % (url_lang, ))), | |
79 | 'timestamp': unified_timestamp(content['details'].get('liveDateTime')), | |
80 | 'category': [category] if category else None, | |
81 | 'thumbnail': urljoin(url, str_or_none(content['details'].get('image_%s_s' % (url_lang, )))), | |
82 | 'is_live': is_live(content['details'].get('type')), | |
83 | } | |
84 | ||
85 | ||
86 | class CPACPlaylistIE(InfoExtractor): | |
87 | IE_NAME = 'cpac:playlist' | |
88 | _VALID_URL = r'(?i)https?://(?:www\.)?cpac\.ca/(?:program|search|(?P<fr>emission|rechercher))\?(?:[^&]+&)*?(?P<id>(?:id=\d+|programId=\d+|key=[^&]+))' | |
89 | ||
90 | _TESTS = [{ | |
91 | 'url': 'https://www.cpac.ca/program?id=6', | |
92 | 'info_dict': { | |
93 | 'id': 'id=6', | |
94 | 'title': 'Headline Politics', | |
95 | 'description': 'Watch CPAC’s signature long-form coverage of the day’s pressing political events as they unfold.', | |
96 | }, | |
97 | 'playlist_count': 10, | |
98 | }, { | |
99 | 'url': 'https://www.cpac.ca/search?key=hudson&type=all&order=desc', | |
100 | 'info_dict': { | |
101 | 'id': 'key=hudson', | |
102 | 'title': 'hudson', | |
103 | }, | |
104 | 'playlist_count': 22, | |
105 | }, { | |
106 | 'url': 'https://www.cpac.ca/search?programId=50', | |
107 | 'info_dict': { | |
108 | 'id': 'programId=50', | |
109 | 'title': '50', | |
110 | }, | |
111 | 'playlist_count': 9, | |
112 | }, { | |
113 | 'url': 'https://www.cpac.ca/emission?id=6', | |
114 | 'only_matching': True, | |
115 | }, { | |
116 | 'url': 'https://www.cpac.ca/rechercher?key=hudson&type=all&order=desc', | |
117 | 'only_matching': True, | |
118 | }] | |
119 | ||
120 | def _real_extract(self, url): | |
121 | video_id = self._match_id(url) | |
122 | url_lang = 'fr' if any(x in url for x in ('/emission?', '/rechercher?')) else 'en' | |
123 | pl_type, list_type = ('program', 'itemList') if any(x in url for x in ('/program?', '/emission?')) else ('search', 'searchResult') | |
124 | api_url = ( | |
125 | 'https://www.cpac.ca/api/1/services/contentModel.json?url=/site/website/%s/index.xml&crafterSite=cpacca&%s' | |
126 | % (pl_type, video_id, )) | |
127 | content = self._download_json(api_url, video_id) | |
128 | entries = [] | |
129 | total_pages = int_or_none(try_get(content, lambda x: x['page'][list_type]['totalPages']), default=1) | |
130 | for page in range(1, total_pages + 1): | |
131 | if page > 1: | |
132 | api_url = update_url_query(api_url, {'page': '%d' % (page, ), }) | |
133 | content = self._download_json( | |
134 | api_url, video_id, | |
135 | note='Downloading continuation - %d' % (page, ), | |
136 | fatal=False) | |
137 | ||
138 | for item in try_get(content, lambda x: x['page'][list_type]['item'], list) or []: | |
139 | episode_url = urljoin(url, try_get(item, lambda x: x['url_%s_s' % (url_lang, )])) | |
140 | if episode_url: | |
141 | entries.append(episode_url) | |
142 | ||
143 | return self.playlist_result( | |
144 | (self.url_result(entry) for entry in entries), | |
145 | playlist_id=video_id, | |
146 | playlist_title=try_get(content, lambda x: x['page']['program']['title_%s_t' % (url_lang, )]) or video_id.split('=')[-1], | |
147 | playlist_description=try_get(content, lambda x: x['page']['program']['description_%s_t' % (url_lang, )]), | |
148 | ) |