]> jfr.im git - yt-dlp.git/blame - yt_dlp/extractor/cpac.py
[ie/matchtv] Fix extractor (#10190)
[yt-dlp.git] / yt_dlp / extractor / cpac.py
CommitLineData
50e93e03 1from .common import InfoExtractor
50e93e03 2from ..utils import (
3 int_or_none,
4 str_or_none,
5 try_get,
6 unified_timestamp,
7 update_url_query,
8 urljoin,
9)
10
50e93e03 11
12class CPACIE(InfoExtractor):
13 IE_NAME = 'cpac'
14 _VALID_URL = r'https?://(?:www\.)?cpac\.ca/(?P<fr>l-)?episode\?id=(?P<id>[\da-f]{8}(?:-[\da-f]{4}){3}-[\da-f]{12})'
15 _TEST = {
16 # 'url': 'http://www.cpac.ca/en/programs/primetime-politics/episodes/65490909',
17 'url': 'https://www.cpac.ca/episode?id=fc7edcae-4660-47e1-ba61-5b7f29a9db0f',
18 'md5': 'e46ad699caafd7aa6024279f2614e8fa',
19 'info_dict': {
20 'id': 'fc7edcae-4660-47e1-ba61-5b7f29a9db0f',
21 'ext': 'mp4',
22 'upload_date': '20220215',
23 'title': 'News Conference to Celebrate National Kindness Week – February 15, 2022',
24 'description': 'md5:466a206abd21f3a6f776cdef290c23fb',
25 'timestamp': 1644901200,
26 },
27 'params': {
28 'format': 'bestvideo',
29 'hls_prefer_native': True,
30 },
31 }
32
33 def _real_extract(self, url):
34 video_id = self._match_id(url)
35 url_lang = 'fr' if '/l-episode?' in url else 'en'
36
37 content = self._download_json(
38 'https://www.cpac.ca/api/1/services/contentModel.json?url=/site/website/episode/index.xml&crafterSite=cpacca&id=' + video_id,
39 video_id)
add96eb9 40 video_url = try_get(content, lambda x: x['page']['details']['videoUrl'], str)
50e93e03 41 formats = []
42 if video_url:
43 content = content['page']
add96eb9 44 title = str_or_none(content['details'][f'title_{url_lang}_t'])
50e93e03 45 formats = self._extract_m3u8_formats(video_url, video_id, m3u8_id='hls', ext='mp4')
46 for fmt in formats:
47 # prefer language to match URL
48 fmt_lang = fmt.get('language')
49 if fmt_lang == url_lang:
50 fmt['language_preference'] = 10
51 elif not fmt_lang:
52 fmt['language_preference'] = -1
53 else:
54 fmt['language_preference'] = -10
55
add96eb9 56 category = str_or_none(content['details'][f'category_{url_lang}_t'])
50e93e03 57
58 def is_live(v_type):
59 return (v_type == 'live') if v_type is not None else None
60
61 return {
62 'id': video_id,
63 'formats': formats,
64 'title': title,
add96eb9 65 'description': str_or_none(content['details'].get(f'description_{url_lang}_t')),
50e93e03 66 'timestamp': unified_timestamp(content['details'].get('liveDateTime')),
f4f9f6d0 67 'categories': [category] if category else None,
add96eb9 68 'thumbnail': urljoin(url, str_or_none(content['details'].get(f'image_{url_lang}_s'))),
50e93e03 69 'is_live': is_live(content['details'].get('type')),
70 }
71
72
73class CPACPlaylistIE(InfoExtractor):
74 IE_NAME = 'cpac:playlist'
75 _VALID_URL = r'(?i)https?://(?:www\.)?cpac\.ca/(?:program|search|(?P<fr>emission|rechercher))\?(?:[^&]+&)*?(?P<id>(?:id=\d+|programId=\d+|key=[^&]+))'
76
77 _TESTS = [{
78 'url': 'https://www.cpac.ca/program?id=6',
79 'info_dict': {
80 'id': 'id=6',
81 'title': 'Headline Politics',
82 'description': 'Watch CPAC’s signature long-form coverage of the day’s pressing political events as they unfold.',
83 },
84 'playlist_count': 10,
85 }, {
86 'url': 'https://www.cpac.ca/search?key=hudson&type=all&order=desc',
87 'info_dict': {
88 'id': 'key=hudson',
89 'title': 'hudson',
90 },
91 'playlist_count': 22,
92 }, {
93 'url': 'https://www.cpac.ca/search?programId=50',
94 'info_dict': {
95 'id': 'programId=50',
96 'title': '50',
97 },
98 'playlist_count': 9,
99 }, {
100 'url': 'https://www.cpac.ca/emission?id=6',
101 'only_matching': True,
102 }, {
103 'url': 'https://www.cpac.ca/rechercher?key=hudson&type=all&order=desc',
104 'only_matching': True,
105 }]
106
107 def _real_extract(self, url):
108 video_id = self._match_id(url)
109 url_lang = 'fr' if any(x in url for x in ('/emission?', '/rechercher?')) else 'en'
110 pl_type, list_type = ('program', 'itemList') if any(x in url for x in ('/program?', '/emission?')) else ('search', 'searchResult')
111 api_url = (
add96eb9 112 f'https://www.cpac.ca/api/1/services/contentModel.json?url=/site/website/{pl_type}/index.xml&crafterSite=cpacca&{video_id}')
50e93e03 113 content = self._download_json(api_url, video_id)
114 entries = []
115 total_pages = int_or_none(try_get(content, lambda x: x['page'][list_type]['totalPages']), default=1)
116 for page in range(1, total_pages + 1):
117 if page > 1:
add96eb9 118 api_url = update_url_query(api_url, {'page': page})
50e93e03 119 content = self._download_json(
120 api_url, video_id,
add96eb9 121 note=f'Downloading continuation - {page}',
50e93e03 122 fatal=False)
123
124 for item in try_get(content, lambda x: x['page'][list_type]['item'], list) or []:
add96eb9 125 episode_url = urljoin(url, try_get(item, lambda x: x[f'url_{url_lang}_s']))
50e93e03 126 if episode_url:
127 entries.append(episode_url)
128
129 return self.playlist_result(
130 (self.url_result(entry) for entry in entries),
131 playlist_id=video_id,
add96eb9 132 playlist_title=try_get(content, lambda x: x['page']['program'][f'title_{url_lang}_t']) or video_id.split('=')[-1],
133 playlist_description=try_get(content, lambda x: x['page']['program'][f'description_{url_lang}_t']),
50e93e03 134 )