]> jfr.im git - yt-dlp.git/blame - yt_dlp/extractor/nfb.py
Add new options `--impersonate` and `--list-impersonate-targets`
[yt-dlp.git] / yt_dlp / extractor / nfb.py
CommitLineData
f1657a98 1from .common import InfoExtractor
4b8b0dde 2from ..utils import (
3 int_or_none,
4 join_nonempty,
5 merge_dicts,
6 parse_count,
7 url_or_none,
8 urljoin,
9)
10from ..utils.traversal import traverse_obj
f1657a98 11
12
4b8b0dde 13class NFBBaseIE(InfoExtractor):
14 _VALID_URL_BASE = r'https?://(?:www\.)?(?P<site>nfb|onf)\.ca'
15 _GEO_COUNTRIES = ['CA']
16
17 def _extract_ep_data(self, webpage, video_id, fatal=False):
18 return self._search_json(
19 r'const\s+episodesData\s*=', webpage, 'episode data', video_id,
20 contains_pattern=r'\[\s*{(?s:.+)}\s*\]', fatal=fatal) or []
21
22 def _extract_ep_info(self, data, video_id, slug=None):
23 info = traverse_obj(data, (lambda _, v: video_id in v['embed_url'], {
24 'description': ('description', {str}),
25 'thumbnail': ('thumbnail_url', {url_or_none}),
26 'uploader': ('data_layer', 'episodeMaker', {str}),
27 'release_year': ('data_layer', 'episodeYear', {int_or_none}),
28 'episode': ('data_layer', 'episodeTitle', {str}),
29 'season': ('data_layer', 'seasonTitle', {str}),
30 'season_number': ('data_layer', 'seasonTitle', {parse_count}),
31 'series': ('data_layer', 'seriesTitle', {str}),
32 }), get_all=False)
33
34 return {
35 **info,
36 'id': video_id,
37 'title': join_nonempty('series', 'episode', from_dict=info, delim=' - '),
38 'episode_number': int_or_none(self._search_regex(
39 r'[/-]e(?:pisode)?-?(\d+)(?:[/-]|$)', slug or video_id, 'episode number', default=None)),
40 }
41
42
43class NFBIE(NFBBaseIE):
44 IE_NAME = 'nfb'
45 IE_DESC = 'nfb.ca and onf.ca films and episodes'
46 _VALID_URL = [
47 rf'{NFBBaseIE._VALID_URL_BASE}/(?P<type>film)/(?P<id>[^/?#&]+)',
48 rf'{NFBBaseIE._VALID_URL_BASE}/(?P<type>series?)/(?P<id>[^/?#&]+/s(?:ea|ai)son\d+/episode\d+)',
49 ]
f1657a98 50 _TESTS = [{
4b8b0dde 51 'note': 'NFB film',
f1657a98 52 'url': 'https://www.nfb.ca/film/trafficopter/',
53 'info_dict': {
54 'id': 'trafficopter',
55 'ext': 'mp4',
56 'title': 'Trafficopter',
57 'description': 'md5:060228455eb85cf88785c41656776bc0',
58 'thumbnail': r're:^https?://.*\.jpg$',
59 'uploader': 'Barrie Howells',
60 'release_year': 1972,
4b8b0dde 61 'duration': 600.0,
62 },
63 'params': {'skip_download': 'm3u8'},
64 }, {
65 'note': 'ONF film',
66 'url': 'https://www.onf.ca/film/mal-du-siecle/',
67 'info_dict': {
68 'id': 'mal-du-siecle',
69 'ext': 'mp4',
70 'title': 'Le mal du siècle',
71 'description': 'md5:1abf774d77569ebe603419f2d344102b',
72 'thumbnail': r're:^https?://.*\.jpg$',
73 'uploader': 'Catherine Lepage',
74 'release_year': 2019,
75 'duration': 300.0,
76 },
77 'params': {'skip_download': 'm3u8'},
78 }, {
79 'note': 'NFB episode with English title',
80 'url': 'https://www.nfb.ca/series/true-north-inside-the-rise-of-toronto-basketball/season1/episode9/',
81 'info_dict': {
82 'id': 'true-north-episode9-true-north-finale-making-it',
83 'ext': 'mp4',
84 'title': 'True North: Inside the Rise of Toronto Basketball - Finale: Making It',
85 'description': 'We catch up with each player in the midst of their journey as they reflect on their road ahead.',
86 'series': 'True North: Inside the Rise of Toronto Basketball',
87 'release_year': 2018,
88 'season': 'Season 1',
89 'season_number': 1,
90 'episode': 'Finale: Making It',
91 'episode_number': 9,
92 'uploader': 'Ryan Sidhoo',
93 'thumbnail': r're:^https?://.*\.jpg$',
f1657a98 94 },
4b8b0dde 95 'params': {'skip_download': 'm3u8'},
96 }, {
97 'note': 'ONF episode with French title',
98 'url': 'https://www.onf.ca/serie/direction-nord-la-montee-du-basketball-a-toronto/saison1/episode9/',
99 'info_dict': {
100 'id': 'direction-nord-episode-9',
101 'ext': 'mp4',
102 'title': 'Direction nord – La montée du basketball à Toronto - Finale : Réussir',
103 'description': 'md5:349a57419b71432b97bf6083d92b029d',
104 'series': 'Direction nord – La montée du basketball à Toronto',
105 'release_year': 2018,
106 'season': 'Saison 1',
107 'season_number': 1,
108 'episode': 'Finale : Réussir',
109 'episode_number': 9,
110 'uploader': 'Ryan Sidhoo',
111 'thumbnail': r're:^https?://.*\.jpg$',
112 },
113 'params': {'skip_download': 'm3u8'},
114 }, {
115 'note': 'NFB episode with French title (needs geo-bypass)',
116 'url': 'https://www.nfb.ca/series/etoile-du-nord/saison1/episode1/',
117 'info_dict': {
118 'id': 'etoile-du-nord-episode-1-lobservation',
119 'ext': 'mp4',
120 'title': 'Étoile du Nord - L\'observation',
121 'description': 'md5:161a4617260dee3de70f509b2c9dd21b',
122 'series': 'Étoile du Nord',
123 'release_year': 2023,
124 'season': 'Saison 1',
125 'season_number': 1,
126 'episode': 'L\'observation',
127 'episode_number': 1,
128 'uploader': 'Patrick Bossé',
129 'thumbnail': r're:^https?://.*\.jpg$',
130 },
131 'params': {'skip_download': 'm3u8'},
132 }, {
133 'note': 'ONF episode with English title (needs geo-bypass)',
134 'url': 'https://www.onf.ca/serie/north-star/season1/episode1/',
135 'info_dict': {
136 'id': 'north-star-episode-1-observation',
137 'ext': 'mp4',
138 'title': 'North Star - Observation',
139 'description': 'md5:c727f370839d8a817392b9e3f23655c7',
140 'series': 'North Star',
141 'release_year': 2023,
142 'season': 'Season 1',
143 'season_number': 1,
144 'episode': 'Observation',
145 'episode_number': 1,
146 'uploader': 'Patrick Bossé',
147 'thumbnail': r're:^https?://.*\.jpg$',
148 },
149 'params': {'skip_download': 'm3u8'},
150 }, {
151 'note': 'NFB episode with /film/ URL and English title (needs geo-bypass)',
152 'url': 'https://www.nfb.ca/film/north-star-episode-1-observation/',
153 'info_dict': {
154 'id': 'north-star-episode-1-observation',
155 'ext': 'mp4',
156 'title': 'North Star - Observation',
157 'description': 'md5:c727f370839d8a817392b9e3f23655c7',
158 'series': 'North Star',
159 'release_year': 2023,
160 'season': 'Season 1',
161 'season_number': 1,
162 'episode': 'Observation',
163 'episode_number': 1,
164 'uploader': 'Patrick Bossé',
165 'thumbnail': r're:^https?://.*\.jpg$',
166 },
167 'params': {'skip_download': 'm3u8'},
168 }, {
169 'note': 'ONF episode with /film/ URL and French title (needs geo-bypass)',
170 'url': 'https://www.onf.ca/film/etoile-du-nord-episode-1-lobservation/',
171 'info_dict': {
172 'id': 'etoile-du-nord-episode-1-lobservation',
173 'ext': 'mp4',
174 'title': 'Étoile du Nord - L\'observation',
175 'description': 'md5:161a4617260dee3de70f509b2c9dd21b',
176 'series': 'Étoile du Nord',
177 'release_year': 2023,
178 'season': 'Saison 1',
179 'season_number': 1,
180 'episode': 'L\'observation',
181 'episode_number': 1,
182 'uploader': 'Patrick Bossé',
183 'thumbnail': r're:^https?://.*\.jpg$',
184 },
185 'params': {'skip_download': 'm3u8'},
186 }, {
187 'note': 'Season 2 episode w/o episode num in id, extract from json ld',
188 'url': 'https://www.onf.ca/film/liste-des-choses-qui-existent-saison-2-ours',
189 'info_dict': {
190 'id': 'liste-des-choses-qui-existent-saison-2-ours',
191 'ext': 'mp4',
192 'title': 'La liste des choses qui existent - L\'ours en peluche',
193 'description': 'md5:d5e8d8fc5f3a7385a9cf0f509b37e28a',
194 'series': 'La liste des choses qui existent',
195 'release_year': 2022,
196 'season': 'Saison 2',
197 'season_number': 2,
198 'episode': 'L\'ours en peluche',
199 'episode_number': 12,
200 'uploader': 'Francis Papillon',
201 'thumbnail': r're:^https?://.*\.jpg$',
202 },
203 'params': {'skip_download': 'm3u8'},
204 }, {
205 'note': 'NFB film /embed/player/ page',
206 'url': 'https://www.nfb.ca/film/afterlife/embed/player/',
207 'info_dict': {
208 'id': 'afterlife',
209 'ext': 'mp4',
210 'title': 'Afterlife',
211 'description': 'md5:84951394f594f1fb1e62d9c43242fdf5',
212 'release_year': 1978,
213 'duration': 420.0,
214 'uploader': 'Ishu Patel',
215 'thumbnail': r're:^https?://.*\.jpg$',
216 },
217 'params': {'skip_download': 'm3u8'},
f1657a98 218 }]
219
220 def _real_extract(self, url):
4b8b0dde 221 site, type_, slug = self._match_valid_url(url).group('site', 'type', 'id')
222 # Need to construct the URL since we match /embed/player/ URLs as well
223 webpage, urlh = self._download_webpage_handle(f'https://www.{site}.ca/{type_}/{slug}/', slug)
224 # type_ can change from film to serie(s) after redirect; new slug may have episode number
225 type_, slug = self._match_valid_url(urlh.url).group('type', 'id')
f1657a98 226
4b8b0dde 227 embed_url = urljoin(f'https://www.{site}.ca', self._html_search_regex(
228 r'<[^>]+\bid=["\']player-iframe["\'][^>]*\bsrc=["\']([^"\']+)', webpage, 'embed url'))
229 video_id = self._match_id(embed_url) # embed url has unique slug
230 player = self._download_webpage(embed_url, video_id, 'Downloading player page')
231 if 'MESSAGE_GEOBLOCKED' in player:
232 self.raise_geo_restricted(countries=self._GEO_COUNTRIES)
f1657a98 233
4b8b0dde 234 formats, subtitles = self._extract_m3u8_formats_and_subtitles(
235 self._html_search_regex(r'source:\s*\'([^\']+)', player, 'm3u8 url'),
236 video_id, 'mp4', m3u8_id='hls')
f1657a98 237
4b8b0dde 238 if dv_source := self._html_search_regex(r'dvSource:\s*\'([^\']+)', player, 'dv', default=None):
239 fmts, subs = self._extract_m3u8_formats_and_subtitles(
240 dv_source, video_id, 'mp4', m3u8_id='dv', preference=-2, fatal=False)
241 for fmt in fmts:
242 fmt['format_note'] = 'described video'
243 formats.extend(fmts)
244 self._merge_subtitles(subs, target=subtitles)
f1657a98 245
4b8b0dde 246 info = {
f1657a98 247 'id': video_id,
248 'title': self._html_search_regex(
249 r'<[^>]+\bid=["\']titleHeader["\'][^>]*>\s*<h1[^>]*>\s*([^<]+?)\s*</h1>',
250 webpage, 'title', default=None),
251 'description': self._html_search_regex(
252 r'<[^>]+\bid=["\']tabSynopsis["\'][^>]*>\s*<p[^>]*>\s*([^<]+)',
253 webpage, 'description', default=None),
254 'thumbnail': self._html_search_regex(
4b8b0dde 255 r'poster:\s*\'([^\']+)', player, 'thumbnail', default=None),
f1657a98 256 'uploader': self._html_search_regex(
4b8b0dde 257 r'<[^>]+\bitemprop=["\']name["\'][^>]*>([^<]+)', webpage, 'uploader', default=None),
f1657a98 258 'release_year': int_or_none(self._html_search_regex(
259 r'<[^>]+\bitemprop=["\']datePublished["\'][^>]*>([^<]+)',
260 webpage, 'release_year', default=None)),
4b8b0dde 261 } if type_ == 'film' else self._extract_ep_info(self._extract_ep_data(webpage, video_id, slug), video_id)
262
263 return merge_dicts({
f1657a98 264 'formats': formats,
265 'subtitles': subtitles,
4b8b0dde 266 }, info, self._search_json_ld(webpage, video_id, default={}))
267
268
269class NFBSeriesIE(NFBBaseIE):
270 IE_NAME = 'nfb:series'
271 IE_DESC = 'nfb.ca and onf.ca series'
272 _VALID_URL = rf'{NFBBaseIE._VALID_URL_BASE}/(?P<type>series?)/(?P<id>[^/?#&]+)/?(?:[?#]|$)'
273 _TESTS = [{
274 'url': 'https://www.nfb.ca/series/true-north-inside-the-rise-of-toronto-basketball/',
275 'playlist_mincount': 9,
276 'info_dict': {
277 'id': 'true-north-inside-the-rise-of-toronto-basketball',
278 },
279 }, {
280 'url': 'https://www.onf.ca/serie/la-liste-des-choses-qui-existent-serie/',
281 'playlist_mincount': 26,
282 'info_dict': {
283 'id': 'la-liste-des-choses-qui-existent-serie',
284 },
285 }]
286
287 def _entries(self, episodes):
288 for episode in traverse_obj(episodes, lambda _, v: NFBIE.suitable(v['embed_url'])):
289 mobj = NFBIE._match_valid_url(episode['embed_url'])
290 yield self.url_result(
291 mobj[0], NFBIE, **self._extract_ep_info([episode], mobj.group('id')))
292
293 def _real_extract(self, url):
294 site, type_, series_id = self._match_valid_url(url).group('site', 'type', 'id')
295 season_path = 'saison' if type_ == 'serie' else 'season'
296 webpage = self._download_webpage(
297 f'https://www.{site}.ca/{type_}/{series_id}/{season_path}1/episode1', series_id)
298 episodes = self._extract_ep_data(webpage, series_id, fatal=True)
299
300 return self.playlist_result(self._entries(episodes), series_id)