]>
Commit | Line | Data |
---|---|---|
f1657a98 | 1 | from .common import InfoExtractor |
4b8b0dde | 2 | from ..utils import ( |
3 | int_or_none, | |
4 | join_nonempty, | |
5 | merge_dicts, | |
6 | parse_count, | |
7 | url_or_none, | |
8 | urljoin, | |
9 | ) | |
10 | from ..utils.traversal import traverse_obj | |
f1657a98 | 11 | |
12 | ||
4b8b0dde | 13 | class NFBBaseIE(InfoExtractor): |
14 | _VALID_URL_BASE = r'https?://(?:www\.)?(?P<site>nfb|onf)\.ca' | |
15 | _GEO_COUNTRIES = ['CA'] | |
16 | ||
17 | def _extract_ep_data(self, webpage, video_id, fatal=False): | |
18 | return self._search_json( | |
19 | r'const\s+episodesData\s*=', webpage, 'episode data', video_id, | |
20 | contains_pattern=r'\[\s*{(?s:.+)}\s*\]', fatal=fatal) or [] | |
21 | ||
22 | def _extract_ep_info(self, data, video_id, slug=None): | |
23 | info = traverse_obj(data, (lambda _, v: video_id in v['embed_url'], { | |
24 | 'description': ('description', {str}), | |
25 | 'thumbnail': ('thumbnail_url', {url_or_none}), | |
26 | 'uploader': ('data_layer', 'episodeMaker', {str}), | |
27 | 'release_year': ('data_layer', 'episodeYear', {int_or_none}), | |
28 | 'episode': ('data_layer', 'episodeTitle', {str}), | |
29 | 'season': ('data_layer', 'seasonTitle', {str}), | |
30 | 'season_number': ('data_layer', 'seasonTitle', {parse_count}), | |
31 | 'series': ('data_layer', 'seriesTitle', {str}), | |
32 | }), get_all=False) | |
33 | ||
34 | return { | |
35 | **info, | |
36 | 'id': video_id, | |
37 | 'title': join_nonempty('series', 'episode', from_dict=info, delim=' - '), | |
38 | 'episode_number': int_or_none(self._search_regex( | |
39 | r'[/-]e(?:pisode)?-?(\d+)(?:[/-]|$)', slug or video_id, 'episode number', default=None)), | |
40 | } | |
41 | ||
42 | ||
43 | class NFBIE(NFBBaseIE): | |
44 | IE_NAME = 'nfb' | |
45 | IE_DESC = 'nfb.ca and onf.ca films and episodes' | |
46 | _VALID_URL = [ | |
47 | rf'{NFBBaseIE._VALID_URL_BASE}/(?P<type>film)/(?P<id>[^/?#&]+)', | |
48 | rf'{NFBBaseIE._VALID_URL_BASE}/(?P<type>series?)/(?P<id>[^/?#&]+/s(?:ea|ai)son\d+/episode\d+)', | |
49 | ] | |
f1657a98 | 50 | _TESTS = [{ |
4b8b0dde | 51 | 'note': 'NFB film', |
f1657a98 | 52 | 'url': 'https://www.nfb.ca/film/trafficopter/', |
53 | 'info_dict': { | |
54 | 'id': 'trafficopter', | |
55 | 'ext': 'mp4', | |
56 | 'title': 'Trafficopter', | |
57 | 'description': 'md5:060228455eb85cf88785c41656776bc0', | |
58 | 'thumbnail': r're:^https?://.*\.jpg$', | |
59 | 'uploader': 'Barrie Howells', | |
60 | 'release_year': 1972, | |
4b8b0dde | 61 | 'duration': 600.0, |
62 | }, | |
63 | 'params': {'skip_download': 'm3u8'}, | |
64 | }, { | |
65 | 'note': 'ONF film', | |
66 | 'url': 'https://www.onf.ca/film/mal-du-siecle/', | |
67 | 'info_dict': { | |
68 | 'id': 'mal-du-siecle', | |
69 | 'ext': 'mp4', | |
70 | 'title': 'Le mal du siècle', | |
71 | 'description': 'md5:1abf774d77569ebe603419f2d344102b', | |
72 | 'thumbnail': r're:^https?://.*\.jpg$', | |
73 | 'uploader': 'Catherine Lepage', | |
74 | 'release_year': 2019, | |
75 | 'duration': 300.0, | |
76 | }, | |
77 | 'params': {'skip_download': 'm3u8'}, | |
78 | }, { | |
79 | 'note': 'NFB episode with English title', | |
80 | 'url': 'https://www.nfb.ca/series/true-north-inside-the-rise-of-toronto-basketball/season1/episode9/', | |
81 | 'info_dict': { | |
82 | 'id': 'true-north-episode9-true-north-finale-making-it', | |
83 | 'ext': 'mp4', | |
84 | 'title': 'True North: Inside the Rise of Toronto Basketball - Finale: Making It', | |
85 | 'description': 'We catch up with each player in the midst of their journey as they reflect on their road ahead.', | |
86 | 'series': 'True North: Inside the Rise of Toronto Basketball', | |
87 | 'release_year': 2018, | |
88 | 'season': 'Season 1', | |
89 | 'season_number': 1, | |
90 | 'episode': 'Finale: Making It', | |
91 | 'episode_number': 9, | |
92 | 'uploader': 'Ryan Sidhoo', | |
93 | 'thumbnail': r're:^https?://.*\.jpg$', | |
f1657a98 | 94 | }, |
4b8b0dde | 95 | 'params': {'skip_download': 'm3u8'}, |
96 | }, { | |
97 | 'note': 'ONF episode with French title', | |
98 | 'url': 'https://www.onf.ca/serie/direction-nord-la-montee-du-basketball-a-toronto/saison1/episode9/', | |
99 | 'info_dict': { | |
100 | 'id': 'direction-nord-episode-9', | |
101 | 'ext': 'mp4', | |
102 | 'title': 'Direction nord – La montée du basketball à Toronto - Finale : Réussir', | |
103 | 'description': 'md5:349a57419b71432b97bf6083d92b029d', | |
104 | 'series': 'Direction nord – La montée du basketball à Toronto', | |
105 | 'release_year': 2018, | |
106 | 'season': 'Saison 1', | |
107 | 'season_number': 1, | |
108 | 'episode': 'Finale : Réussir', | |
109 | 'episode_number': 9, | |
110 | 'uploader': 'Ryan Sidhoo', | |
111 | 'thumbnail': r're:^https?://.*\.jpg$', | |
112 | }, | |
113 | 'params': {'skip_download': 'm3u8'}, | |
114 | }, { | |
115 | 'note': 'NFB episode with French title (needs geo-bypass)', | |
116 | 'url': 'https://www.nfb.ca/series/etoile-du-nord/saison1/episode1/', | |
117 | 'info_dict': { | |
118 | 'id': 'etoile-du-nord-episode-1-lobservation', | |
119 | 'ext': 'mp4', | |
120 | 'title': 'Étoile du Nord - L\'observation', | |
121 | 'description': 'md5:161a4617260dee3de70f509b2c9dd21b', | |
122 | 'series': 'Étoile du Nord', | |
123 | 'release_year': 2023, | |
124 | 'season': 'Saison 1', | |
125 | 'season_number': 1, | |
126 | 'episode': 'L\'observation', | |
127 | 'episode_number': 1, | |
128 | 'uploader': 'Patrick Bossé', | |
129 | 'thumbnail': r're:^https?://.*\.jpg$', | |
130 | }, | |
131 | 'params': {'skip_download': 'm3u8'}, | |
132 | }, { | |
133 | 'note': 'ONF episode with English title (needs geo-bypass)', | |
134 | 'url': 'https://www.onf.ca/serie/north-star/season1/episode1/', | |
135 | 'info_dict': { | |
136 | 'id': 'north-star-episode-1-observation', | |
137 | 'ext': 'mp4', | |
138 | 'title': 'North Star - Observation', | |
139 | 'description': 'md5:c727f370839d8a817392b9e3f23655c7', | |
140 | 'series': 'North Star', | |
141 | 'release_year': 2023, | |
142 | 'season': 'Season 1', | |
143 | 'season_number': 1, | |
144 | 'episode': 'Observation', | |
145 | 'episode_number': 1, | |
146 | 'uploader': 'Patrick Bossé', | |
147 | 'thumbnail': r're:^https?://.*\.jpg$', | |
148 | }, | |
149 | 'params': {'skip_download': 'm3u8'}, | |
150 | }, { | |
151 | 'note': 'NFB episode with /film/ URL and English title (needs geo-bypass)', | |
152 | 'url': 'https://www.nfb.ca/film/north-star-episode-1-observation/', | |
153 | 'info_dict': { | |
154 | 'id': 'north-star-episode-1-observation', | |
155 | 'ext': 'mp4', | |
156 | 'title': 'North Star - Observation', | |
157 | 'description': 'md5:c727f370839d8a817392b9e3f23655c7', | |
158 | 'series': 'North Star', | |
159 | 'release_year': 2023, | |
160 | 'season': 'Season 1', | |
161 | 'season_number': 1, | |
162 | 'episode': 'Observation', | |
163 | 'episode_number': 1, | |
164 | 'uploader': 'Patrick Bossé', | |
165 | 'thumbnail': r're:^https?://.*\.jpg$', | |
166 | }, | |
167 | 'params': {'skip_download': 'm3u8'}, | |
168 | }, { | |
169 | 'note': 'ONF episode with /film/ URL and French title (needs geo-bypass)', | |
170 | 'url': 'https://www.onf.ca/film/etoile-du-nord-episode-1-lobservation/', | |
171 | 'info_dict': { | |
172 | 'id': 'etoile-du-nord-episode-1-lobservation', | |
173 | 'ext': 'mp4', | |
174 | 'title': 'Étoile du Nord - L\'observation', | |
175 | 'description': 'md5:161a4617260dee3de70f509b2c9dd21b', | |
176 | 'series': 'Étoile du Nord', | |
177 | 'release_year': 2023, | |
178 | 'season': 'Saison 1', | |
179 | 'season_number': 1, | |
180 | 'episode': 'L\'observation', | |
181 | 'episode_number': 1, | |
182 | 'uploader': 'Patrick Bossé', | |
183 | 'thumbnail': r're:^https?://.*\.jpg$', | |
184 | }, | |
185 | 'params': {'skip_download': 'm3u8'}, | |
186 | }, { | |
187 | 'note': 'Season 2 episode w/o episode num in id, extract from json ld', | |
188 | 'url': 'https://www.onf.ca/film/liste-des-choses-qui-existent-saison-2-ours', | |
189 | 'info_dict': { | |
190 | 'id': 'liste-des-choses-qui-existent-saison-2-ours', | |
191 | 'ext': 'mp4', | |
192 | 'title': 'La liste des choses qui existent - L\'ours en peluche', | |
193 | 'description': 'md5:d5e8d8fc5f3a7385a9cf0f509b37e28a', | |
194 | 'series': 'La liste des choses qui existent', | |
195 | 'release_year': 2022, | |
196 | 'season': 'Saison 2', | |
197 | 'season_number': 2, | |
198 | 'episode': 'L\'ours en peluche', | |
199 | 'episode_number': 12, | |
200 | 'uploader': 'Francis Papillon', | |
201 | 'thumbnail': r're:^https?://.*\.jpg$', | |
202 | }, | |
203 | 'params': {'skip_download': 'm3u8'}, | |
204 | }, { | |
205 | 'note': 'NFB film /embed/player/ page', | |
206 | 'url': 'https://www.nfb.ca/film/afterlife/embed/player/', | |
207 | 'info_dict': { | |
208 | 'id': 'afterlife', | |
209 | 'ext': 'mp4', | |
210 | 'title': 'Afterlife', | |
211 | 'description': 'md5:84951394f594f1fb1e62d9c43242fdf5', | |
212 | 'release_year': 1978, | |
213 | 'duration': 420.0, | |
214 | 'uploader': 'Ishu Patel', | |
215 | 'thumbnail': r're:^https?://.*\.jpg$', | |
216 | }, | |
217 | 'params': {'skip_download': 'm3u8'}, | |
f1657a98 | 218 | }] |
219 | ||
220 | def _real_extract(self, url): | |
4b8b0dde | 221 | site, type_, slug = self._match_valid_url(url).group('site', 'type', 'id') |
222 | # Need to construct the URL since we match /embed/player/ URLs as well | |
223 | webpage, urlh = self._download_webpage_handle(f'https://www.{site}.ca/{type_}/{slug}/', slug) | |
224 | # type_ can change from film to serie(s) after redirect; new slug may have episode number | |
225 | type_, slug = self._match_valid_url(urlh.url).group('type', 'id') | |
f1657a98 | 226 | |
4b8b0dde | 227 | embed_url = urljoin(f'https://www.{site}.ca', self._html_search_regex( |
228 | r'<[^>]+\bid=["\']player-iframe["\'][^>]*\bsrc=["\']([^"\']+)', webpage, 'embed url')) | |
229 | video_id = self._match_id(embed_url) # embed url has unique slug | |
230 | player = self._download_webpage(embed_url, video_id, 'Downloading player page') | |
231 | if 'MESSAGE_GEOBLOCKED' in player: | |
232 | self.raise_geo_restricted(countries=self._GEO_COUNTRIES) | |
f1657a98 | 233 | |
4b8b0dde | 234 | formats, subtitles = self._extract_m3u8_formats_and_subtitles( |
235 | self._html_search_regex(r'source:\s*\'([^\']+)', player, 'm3u8 url'), | |
236 | video_id, 'mp4', m3u8_id='hls') | |
f1657a98 | 237 | |
4b8b0dde | 238 | if dv_source := self._html_search_regex(r'dvSource:\s*\'([^\']+)', player, 'dv', default=None): |
239 | fmts, subs = self._extract_m3u8_formats_and_subtitles( | |
240 | dv_source, video_id, 'mp4', m3u8_id='dv', preference=-2, fatal=False) | |
241 | for fmt in fmts: | |
242 | fmt['format_note'] = 'described video' | |
243 | formats.extend(fmts) | |
244 | self._merge_subtitles(subs, target=subtitles) | |
f1657a98 | 245 | |
4b8b0dde | 246 | info = { |
f1657a98 | 247 | 'id': video_id, |
248 | 'title': self._html_search_regex( | |
249 | r'<[^>]+\bid=["\']titleHeader["\'][^>]*>\s*<h1[^>]*>\s*([^<]+?)\s*</h1>', | |
250 | webpage, 'title', default=None), | |
251 | 'description': self._html_search_regex( | |
252 | r'<[^>]+\bid=["\']tabSynopsis["\'][^>]*>\s*<p[^>]*>\s*([^<]+)', | |
253 | webpage, 'description', default=None), | |
254 | 'thumbnail': self._html_search_regex( | |
4b8b0dde | 255 | r'poster:\s*\'([^\']+)', player, 'thumbnail', default=None), |
f1657a98 | 256 | 'uploader': self._html_search_regex( |
4b8b0dde | 257 | r'<[^>]+\bitemprop=["\']name["\'][^>]*>([^<]+)', webpage, 'uploader', default=None), |
f1657a98 | 258 | 'release_year': int_or_none(self._html_search_regex( |
259 | r'<[^>]+\bitemprop=["\']datePublished["\'][^>]*>([^<]+)', | |
260 | webpage, 'release_year', default=None)), | |
4b8b0dde | 261 | } if type_ == 'film' else self._extract_ep_info(self._extract_ep_data(webpage, video_id, slug), video_id) |
262 | ||
263 | return merge_dicts({ | |
f1657a98 | 264 | 'formats': formats, |
265 | 'subtitles': subtitles, | |
4b8b0dde | 266 | }, info, self._search_json_ld(webpage, video_id, default={})) |
267 | ||
268 | ||
269 | class NFBSeriesIE(NFBBaseIE): | |
270 | IE_NAME = 'nfb:series' | |
271 | IE_DESC = 'nfb.ca and onf.ca series' | |
272 | _VALID_URL = rf'{NFBBaseIE._VALID_URL_BASE}/(?P<type>series?)/(?P<id>[^/?#&]+)/?(?:[?#]|$)' | |
273 | _TESTS = [{ | |
274 | 'url': 'https://www.nfb.ca/series/true-north-inside-the-rise-of-toronto-basketball/', | |
275 | 'playlist_mincount': 9, | |
276 | 'info_dict': { | |
277 | 'id': 'true-north-inside-the-rise-of-toronto-basketball', | |
278 | }, | |
279 | }, { | |
280 | 'url': 'https://www.onf.ca/serie/la-liste-des-choses-qui-existent-serie/', | |
281 | 'playlist_mincount': 26, | |
282 | 'info_dict': { | |
283 | 'id': 'la-liste-des-choses-qui-existent-serie', | |
284 | }, | |
285 | }] | |
286 | ||
287 | def _entries(self, episodes): | |
288 | for episode in traverse_obj(episodes, lambda _, v: NFBIE.suitable(v['embed_url'])): | |
289 | mobj = NFBIE._match_valid_url(episode['embed_url']) | |
290 | yield self.url_result( | |
291 | mobj[0], NFBIE, **self._extract_ep_info([episode], mobj.group('id'))) | |
292 | ||
293 | def _real_extract(self, url): | |
294 | site, type_, series_id = self._match_valid_url(url).group('site', 'type', 'id') | |
295 | season_path = 'saison' if type_ == 'serie' else 'season' | |
296 | webpage = self._download_webpage( | |
297 | f'https://www.{site}.ca/{type_}/{series_id}/{season_path}1/episode1', series_id) | |
298 | episodes = self._extract_ep_data(webpage, series_id, fatal=True) | |
299 | ||
300 | return self.playlist_result(self._entries(episodes), series_id) |