]>
Commit | Line | Data |
---|---|---|
77cc7c6e | 1 | import re |
061d1cd9 | 2 | |
298a120a | 3 | from .common import InfoExtractor |
77cc7c6e | 4 | from ..utils import ( |
81c8b9bd | 5 | ExtractorError, |
6 | int_or_none, | |
7 | join_nonempty, | |
77cc7c6e LNO |
8 | parse_duration, |
9 | traverse_obj, | |
10 | unescapeHTML, | |
11 | unified_timestamp, | |
81c8b9bd | 12 | url_or_none, |
8f0be90e | 13 | urljoin, |
77cc7c6e | 14 | ) |
298a120a AN |
15 | |
16 | ||
29f7c58a | 17 | class NhkBaseIE(InfoExtractor): |
6d1b3489 | 18 | _API_URL_TEMPLATE = 'https://nwapi.nhk.jp/nhkworld/%sod%slist/v7b/%s/%s/%s/all%s.json' |
29f7c58a | 19 | _BASE_URL_REGEX = r'https?://www3\.nhk\.or\.jp/nhkworld/(?P<lang>[a-z]{2})/ondemand' |
20 | _TYPE_REGEX = r'/(?P<type>video|audio)/' | |
298a120a | 21 | |
29f7c58a | 22 | def _call_api(self, m_id, lang, is_video, is_episode, is_clip): |
23 | return self._download_json( | |
24 | self._API_URL_TEMPLATE % ( | |
25 | 'v' if is_video else 'r', | |
26 | 'clip' if is_clip else 'esd', | |
27 | 'episode' if is_episode else 'program', | |
28 | m_id, lang, '/all' if is_video else ''), | |
29 | m_id, query={'apikey': 'EJfK8jdS57GqlupFgAfAAwr573q01y6k'})['data']['episodes'] or [] | |
30 | ||
31 | def _extract_episode_info(self, url, episode=None): | |
32 | fetch_episode = episode is None | |
5ad28e7f | 33 | lang, m_type, episode_id = NhkVodIE._match_valid_url(url).groups() |
6d1b3489 | 34 | if len(episode_id) == 7: |
061d1cd9 | 35 | episode_id = episode_id[:4] + '-' + episode_id[4:] |
f9b373af | 36 | |
061d1cd9 | 37 | is_video = m_type == 'video' |
29f7c58a | 38 | if fetch_episode: |
39 | episode = self._call_api( | |
40 | episode_id, lang, is_video, True, episode_id[:4] == '9999')[0] | |
061d1cd9 | 41 | title = episode.get('sub_title_clean') or episode['sub_title'] |
45396dd2 | 42 | |
061d1cd9 RA |
43 | def get_clean_field(key): |
44 | return episode.get(key + '_clean') or episode.get(key) | |
45396dd2 | 45 | |
061d1cd9 | 46 | series = get_clean_field('title') |
45396dd2 | 47 | |
061d1cd9 RA |
48 | thumbnails = [] |
49 | for s, w, h in [('', 640, 360), ('_l', 1280, 720)]: | |
50 | img_path = episode.get('image' + s) | |
51 | if not img_path: | |
52 | continue | |
53 | thumbnails.append({ | |
54 | 'id': '%dp' % h, | |
55 | 'height': h, | |
56 | 'width': w, | |
57 | 'url': 'https://www3.nhk.or.jp' + img_path, | |
58 | }) | |
298a120a | 59 | |
061d1cd9 RA |
60 | info = { |
61 | 'id': episode_id + '-' + lang, | |
f9b373af | 62 | 'title': '%s - %s' % (series, title) if series and title else title, |
061d1cd9 RA |
63 | 'description': get_clean_field('description'), |
64 | 'thumbnails': thumbnails, | |
f9b373af S |
65 | 'series': series, |
66 | 'episode': title, | |
67 | } | |
061d1cd9 | 68 | if is_video: |
29f7c58a | 69 | vod_id = episode['vod_id'] |
061d1cd9 RA |
70 | info.update({ |
71 | '_type': 'url_transparent', | |
a373befa | 72 | 'ie_key': 'Piksel', |
f41b949a | 73 | 'url': 'https://movie-s.nhk.or.jp/v/refid/nhkworld/prefid/' + vod_id, |
29f7c58a | 74 | 'id': vod_id, |
061d1cd9 RA |
75 | }) |
76 | else: | |
29f7c58a | 77 | if fetch_episode: |
78 | audio_path = episode['audio']['audio'] | |
79 | info['formats'] = self._extract_m3u8_formats( | |
80 | 'https://nhkworld-vh.akamaihd.net/i%s/master.m3u8' % audio_path, | |
81 | episode_id, 'm4a', entry_protocol='m3u8_native', | |
82 | m3u8_id='hls', fatal=False) | |
83 | for f in info['formats']: | |
84 | f['language'] = lang | |
85 | else: | |
86 | info.update({ | |
87 | '_type': 'url_transparent', | |
88 | 'ie_key': NhkVodIE.ie_key(), | |
89 | 'url': url, | |
90 | }) | |
061d1cd9 | 91 | return info |
29f7c58a | 92 | |
93 | ||
94 | class NhkVodIE(NhkBaseIE): | |
6d1b3489 | 95 | # the 7-character IDs can have alphabetic chars too: assume [a-z] rather than just [a-f], eg |
96 | _VALID_URL = r'%s%s(?P<id>[0-9a-z]{7}|[^/]+?-\d{8}-[0-9a-z]+)' % (NhkBaseIE._BASE_URL_REGEX, NhkBaseIE._TYPE_REGEX) | |
29f7c58a | 97 | # Content available only for a limited period of time. Visit |
98 | # https://www3.nhk.or.jp/nhkworld/en/ondemand/ for working samples. | |
99 | _TESTS = [{ | |
f41b949a DR |
100 | 'url': 'https://www3.nhk.or.jp/nhkworld/en/ondemand/video/2061601/', |
101 | 'info_dict': { | |
102 | 'id': 'yd8322ch', | |
103 | 'ext': 'mp4', | |
104 | 'description': 'md5:109c8b05d67a62d0592f2b445d2cd898', | |
105 | 'title': 'GRAND SUMO Highlights - [Recap] May Tournament Day 1 (Opening Day)', | |
106 | 'upload_date': '20230514', | |
107 | 'timestamp': 1684083791, | |
108 | 'series': 'GRAND SUMO Highlights', | |
109 | 'episode': '[Recap] May Tournament Day 1 (Opening Day)', | |
110 | 'thumbnail': 'https://mz-edge.stream.co.jp/thumbs/aid/t1684084443/4028649.jpg?w=1920&h=1080', | |
111 | }, | |
112 | }, { | |
29f7c58a | 113 | # video clip |
114 | 'url': 'https://www3.nhk.or.jp/nhkworld/en/ondemand/video/9999011/', | |
115 | 'md5': '7a90abcfe610ec22a6bfe15bd46b30ca', | |
116 | 'info_dict': { | |
117 | 'id': 'a95j5iza', | |
118 | 'ext': 'mp4', | |
119 | 'title': "Dining with the Chef - Chef Saito's Family recipe: MENCHI-KATSU", | |
120 | 'description': 'md5:5aee4a9f9d81c26281862382103b0ea5', | |
121 | 'timestamp': 1565965194, | |
122 | 'upload_date': '20190816', | |
f41b949a DR |
123 | 'thumbnail': 'https://mz-edge.stream.co.jp/thumbs/aid/t1567086278/3715195.jpg?w=1920&h=1080', |
124 | 'series': 'Dining with the Chef', | |
125 | 'episode': 'Chef Saito\'s Family recipe: MENCHI-KATSU', | |
29f7c58a | 126 | }, |
127 | }, { | |
128 | # audio clip | |
129 | 'url': 'https://www3.nhk.or.jp/nhkworld/en/ondemand/audio/r_inventions-20201104-1/', | |
130 | 'info_dict': { | |
131 | 'id': 'r_inventions-20201104-1-en', | |
132 | 'ext': 'm4a', | |
133 | 'title': "Japan's Top Inventions - Miniature Video Cameras", | |
134 | 'description': 'md5:07ea722bdbbb4936fdd360b6a480c25b', | |
135 | }, | |
f41b949a | 136 | 'skip': '404 Not Found', |
29f7c58a | 137 | }, { |
138 | 'url': 'https://www3.nhk.or.jp/nhkworld/en/ondemand/video/2015173/', | |
139 | 'only_matching': True, | |
140 | }, { | |
141 | 'url': 'https://www3.nhk.or.jp/nhkworld/en/ondemand/audio/plugin-20190404-1/', | |
142 | 'only_matching': True, | |
143 | }, { | |
144 | 'url': 'https://www3.nhk.or.jp/nhkworld/fr/ondemand/audio/plugin-20190404-1/', | |
145 | 'only_matching': True, | |
146 | }, { | |
147 | 'url': 'https://www3.nhk.or.jp/nhkworld/en/ondemand/audio/j_art-20150903-1/', | |
148 | 'only_matching': True, | |
6d1b3489 | 149 | }, { |
150 | # video, alphabetic character in ID #29670 | |
151 | 'url': 'https://www3.nhk.or.jp/nhkworld/en/ondemand/video/9999a34/', | |
6d1b3489 | 152 | 'info_dict': { |
153 | 'id': 'qfjay6cg', | |
154 | 'ext': 'mp4', | |
155 | 'title': 'DESIGN TALKS plus - Fishermen’s Finery', | |
156 | 'description': 'md5:8a8f958aaafb0d7cb59d38de53f1e448', | |
157 | 'thumbnail': r're:^https?:/(/[a-z0-9.-]+)+\.jpg\?w=1920&h=1080$', | |
158 | 'upload_date': '20210615', | |
159 | 'timestamp': 1623722008, | |
f41b949a DR |
160 | }, |
161 | 'skip': '404 Not Found', | |
29f7c58a | 162 | }] |
163 | ||
164 | def _real_extract(self, url): | |
165 | return self._extract_episode_info(url) | |
166 | ||
167 | ||
168 | class NhkVodProgramIE(NhkBaseIE): | |
169 | _VALID_URL = r'%s/program%s(?P<id>[0-9a-z]+)(?:.+?\btype=(?P<episode_type>clip|(?:radio|tv)Episode))?' % (NhkBaseIE._BASE_URL_REGEX, NhkBaseIE._TYPE_REGEX) | |
170 | _TESTS = [{ | |
171 | # video program episodes | |
f41b949a DR |
172 | 'url': 'https://www3.nhk.or.jp/nhkworld/en/ondemand/program/video/sumo', |
173 | 'info_dict': { | |
174 | 'id': 'sumo', | |
175 | 'title': 'GRAND SUMO Highlights', | |
176 | }, | |
177 | 'playlist_mincount': 12, | |
178 | }, { | |
29f7c58a | 179 | 'url': 'https://www3.nhk.or.jp/nhkworld/en/ondemand/program/video/japanrailway', |
180 | 'info_dict': { | |
181 | 'id': 'japanrailway', | |
182 | 'title': 'Japan Railway Journal', | |
183 | }, | |
f41b949a | 184 | 'playlist_mincount': 12, |
29f7c58a | 185 | }, { |
186 | # video program clips | |
187 | 'url': 'https://www3.nhk.or.jp/nhkworld/en/ondemand/program/video/japanrailway/?type=clip', | |
188 | 'info_dict': { | |
189 | 'id': 'japanrailway', | |
190 | 'title': 'Japan Railway Journal', | |
191 | }, | |
192 | 'playlist_mincount': 5, | |
193 | }, { | |
194 | 'url': 'https://www3.nhk.or.jp/nhkworld/en/ondemand/program/video/10yearshayaomiyazaki/', | |
195 | 'only_matching': True, | |
196 | }, { | |
197 | # audio program | |
198 | 'url': 'https://www3.nhk.or.jp/nhkworld/en/ondemand/program/audio/listener/', | |
199 | 'only_matching': True, | |
200 | }] | |
201 | ||
202 | def _real_extract(self, url): | |
5ad28e7f | 203 | lang, m_type, program_id, episode_type = self._match_valid_url(url).groups() |
29f7c58a | 204 | |
205 | episodes = self._call_api( | |
206 | program_id, lang, m_type == 'video', False, episode_type == 'clip') | |
207 | ||
208 | entries = [] | |
209 | for episode in episodes: | |
210 | episode_path = episode.get('url') | |
211 | if not episode_path: | |
212 | continue | |
213 | entries.append(self._extract_episode_info( | |
214 | urljoin(url, episode_path), episode)) | |
215 | ||
216 | program_title = None | |
217 | if entries: | |
218 | program_title = entries[0].get('series') | |
219 | ||
220 | return self.playlist_result(entries, program_id, program_title) | |
77cc7c6e LNO |
221 | |
222 | ||
223 | class NhkForSchoolBangumiIE(InfoExtractor): | |
224 | _VALID_URL = r'https?://www2\.nhk\.or\.jp/school/movie/(?P<type>bangumi|clip)\.cgi\?das_id=(?P<id>[a-zA-Z0-9_-]+)' | |
225 | _TESTS = [{ | |
226 | 'url': 'https://www2.nhk.or.jp/school/movie/bangumi.cgi?das_id=D0005150191_00000', | |
227 | 'info_dict': { | |
228 | 'id': 'D0005150191_00003', | |
229 | 'title': 'にている かな', | |
230 | 'duration': 599.999, | |
231 | 'timestamp': 1396414800, | |
232 | ||
233 | 'upload_date': '20140402', | |
234 | 'ext': 'mp4', | |
235 | ||
236 | 'chapters': 'count:12' | |
237 | }, | |
238 | 'params': { | |
239 | # m3u8 download | |
240 | 'skip_download': True, | |
241 | }, | |
242 | }] | |
243 | ||
244 | def _real_extract(self, url): | |
245 | program_type, video_id = self._match_valid_url(url).groups() | |
246 | ||
247 | webpage = self._download_webpage( | |
248 | f'https://www2.nhk.or.jp/school/movie/{program_type}.cgi?das_id={video_id}', video_id) | |
249 | ||
250 | # searches all variables | |
251 | base_values = {g.group(1): g.group(2) for g in re.finditer(r'var\s+([a-zA-Z_]+)\s*=\s*"([^"]+?)";', webpage)} | |
252 | # and programObj values too | |
253 | program_values = {g.group(1): g.group(3) for g in re.finditer(r'(?:program|clip)Obj\.([a-zA-Z_]+)\s*=\s*(["\'])([^"]+?)\2;', webpage)} | |
254 | # extract all chapters | |
255 | chapter_durations = [parse_duration(g.group(1)) for g in re.finditer(r'chapterTime\.push\(\'([0-9:]+?)\'\);', webpage)] | |
256 | chapter_titles = [' '.join([g.group(1) or '', unescapeHTML(g.group(2))]).strip() for g in re.finditer(r'<div class="cpTitle"><span>(scene\s*\d+)?</span>([^<]+?)</div>', webpage)] | |
257 | ||
258 | # this is how player_core.js is actually doing (!) | |
259 | version = base_values.get('r_version') or program_values.get('version') | |
260 | if version: | |
261 | video_id = f'{video_id.split("_")[0]}_{version}' | |
262 | ||
263 | formats = self._extract_m3u8_formats( | |
264 | f'https://nhks-vh.akamaihd.net/i/das/{video_id[0:8]}/{video_id}_V_000.f4v/master.m3u8', | |
265 | video_id, ext='mp4', m3u8_id='hls') | |
77cc7c6e LNO |
266 | |
267 | duration = parse_duration(base_values.get('r_duration')) | |
268 | ||
269 | chapters = None | |
270 | if chapter_durations and chapter_titles and len(chapter_durations) == len(chapter_titles): | |
271 | start_time = chapter_durations | |
272 | end_time = chapter_durations[1:] + [duration] | |
273 | chapters = [{ | |
274 | 'start_time': s, | |
275 | 'end_time': e, | |
276 | 'title': t, | |
277 | } for s, e, t in zip(start_time, end_time, chapter_titles)] | |
278 | ||
279 | return { | |
280 | 'id': video_id, | |
281 | 'title': program_values.get('name'), | |
282 | 'duration': parse_duration(base_values.get('r_duration')), | |
283 | 'timestamp': unified_timestamp(base_values['r_upload']), | |
284 | 'formats': formats, | |
285 | 'chapters': chapters, | |
286 | } | |
287 | ||
288 | ||
289 | class NhkForSchoolSubjectIE(InfoExtractor): | |
290 | IE_DESC = 'Portal page for each school subjects, like Japanese (kokugo, 国語) or math (sansuu/suugaku or 算数・数学)' | |
291 | KNOWN_SUBJECTS = ( | |
292 | 'rika', 'syakai', 'kokugo', | |
293 | 'sansuu', 'seikatsu', 'doutoku', | |
294 | 'ongaku', 'taiiku', 'zukou', | |
295 | 'gijutsu', 'katei', 'sougou', | |
296 | 'eigo', 'tokkatsu', | |
297 | 'tokushi', 'sonota', | |
298 | ) | |
299 | _VALID_URL = r'https?://www\.nhk\.or\.jp/school/(?P<id>%s)/?(?:[\?#].*)?$' % '|'.join(re.escape(s) for s in KNOWN_SUBJECTS) | |
300 | ||
301 | _TESTS = [{ | |
302 | 'url': 'https://www.nhk.or.jp/school/sougou/', | |
303 | 'info_dict': { | |
304 | 'id': 'sougou', | |
305 | 'title': '総合的な学習の時間', | |
306 | }, | |
307 | 'playlist_mincount': 16, | |
308 | }, { | |
309 | 'url': 'https://www.nhk.or.jp/school/rika/', | |
310 | 'info_dict': { | |
311 | 'id': 'rika', | |
312 | 'title': '理科', | |
313 | }, | |
314 | 'playlist_mincount': 15, | |
315 | }] | |
316 | ||
317 | def _real_extract(self, url): | |
318 | subject_id = self._match_id(url) | |
319 | webpage = self._download_webpage(url, subject_id) | |
320 | ||
321 | return self.playlist_from_matches( | |
322 | re.finditer(rf'href="((?:https?://www\.nhk\.or\.jp)?/school/{re.escape(subject_id)}/[^/]+/)"', webpage), | |
323 | subject_id, | |
324 | self._html_search_regex(r'(?s)<span\s+class="subjectName">\s*<img\s*[^<]+>\s*([^<]+?)</span>', webpage, 'title', fatal=False), | |
325 | lambda g: urljoin(url, g.group(1))) | |
326 | ||
327 | ||
328 | class NhkForSchoolProgramListIE(InfoExtractor): | |
329 | _VALID_URL = r'https?://www\.nhk\.or\.jp/school/(?P<id>(?:%s)/[a-zA-Z0-9_-]+)' % ( | |
330 | '|'.join(re.escape(s) for s in NhkForSchoolSubjectIE.KNOWN_SUBJECTS) | |
331 | ) | |
332 | _TESTS = [{ | |
333 | 'url': 'https://www.nhk.or.jp/school/sougou/q/', | |
334 | 'info_dict': { | |
335 | 'id': 'sougou/q', | |
336 | 'title': 'Q~こどものための哲学', | |
337 | }, | |
338 | 'playlist_mincount': 20, | |
339 | }] | |
340 | ||
341 | def _real_extract(self, url): | |
342 | program_id = self._match_id(url) | |
343 | ||
344 | webpage = self._download_webpage(f'https://www.nhk.or.jp/school/{program_id}/', program_id) | |
345 | ||
62b8dac4 | 346 | title = (self._generic_title('', webpage) |
04f3fd2c | 347 | or self._html_search_regex(r'<h3>([^<]+?)とは?\s*</h3>', webpage, 'title', fatal=False)) |
77cc7c6e LNO |
348 | title = re.sub(r'\s*\|\s*NHK\s+for\s+School\s*$', '', title) if title else None |
349 | description = self._html_search_regex( | |
350 | r'(?s)<div\s+class="programDetail\s*">\s*<p>[^<]+</p>', | |
351 | webpage, 'description', fatal=False, group=0) | |
352 | ||
353 | bangumi_list = self._download_json( | |
354 | f'https://www.nhk.or.jp/school/{program_id}/meta/program.json', program_id) | |
355 | # they're always bangumi | |
356 | bangumis = [ | |
357 | self.url_result(f'https://www2.nhk.or.jp/school/movie/bangumi.cgi?das_id={x}') | |
358 | for x in traverse_obj(bangumi_list, ('part', ..., 'part-video-dasid')) or []] | |
359 | ||
360 | return self.playlist_result(bangumis, program_id, title, description) | |
8f0be90e | 361 | |
362 | ||
363 | class NhkRadiruIE(InfoExtractor): | |
364 | _GEO_COUNTRIES = ['JP'] | |
365 | IE_DESC = 'NHK らじる (Radiru/Rajiru)' | |
366 | _VALID_URL = r'https?://www\.nhk\.or\.jp/radio/(?:player/ondemand|ondemand/detail)\.html\?p=(?P<site>[\da-zA-Z]+)_(?P<corner>[\da-zA-Z]+)(?:_(?P<headline>[\da-zA-Z]+))?' | |
367 | _TESTS = [{ | |
368 | 'url': 'https://www.nhk.or.jp/radio/player/ondemand.html?p=0449_01_3853544', | |
369 | 'skip': 'Episode expired on 2023-04-16', | |
370 | 'info_dict': { | |
371 | 'channel': 'NHK-FM', | |
372 | 'description': 'md5:94b08bdeadde81a97df4ec882acce3e9', | |
373 | 'ext': 'm4a', | |
374 | 'id': '0449_01_3853544', | |
375 | 'series': 'ジャズ・トゥナイト', | |
376 | 'thumbnail': 'https://www.nhk.or.jp/prog/img/449/g449.jpg', | |
377 | 'timestamp': 1680969600, | |
378 | 'title': 'ジャズ・トゥナイト NEWジャズ特集', | |
379 | 'upload_date': '20230408', | |
380 | 'release_timestamp': 1680962400, | |
381 | 'release_date': '20230408', | |
382 | 'was_live': True, | |
383 | }, | |
384 | }, { | |
385 | # playlist, airs every weekday so it should _hopefully_ be okay forever | |
386 | 'url': 'https://www.nhk.or.jp/radio/ondemand/detail.html?p=0458_01', | |
387 | 'info_dict': { | |
388 | 'id': '0458_01', | |
389 | 'title': 'ベストオブクラシック', | |
390 | 'description': '世界中の上質な演奏会をじっくり堪能する本格派クラシック番組。', | |
391 | 'channel': 'NHK-FM', | |
392 | 'thumbnail': 'https://www.nhk.or.jp/prog/img/458/g458.jpg', | |
393 | }, | |
394 | 'playlist_mincount': 3, | |
395 | }, { | |
396 | # one with letters in the id | |
397 | 'url': 'https://www.nhk.or.jp/radio/player/ondemand.html?p=F300_06_3738470', | |
398 | 'note': 'Expires on 2024-03-31', | |
399 | 'info_dict': { | |
400 | 'id': 'F300_06_3738470', | |
401 | 'ext': 'm4a', | |
402 | 'title': '有島武郎「一房のぶどう」', | |
403 | 'description': '朗読:川野一宇(ラジオ深夜便アンカー)\r\n\r\n(2016年12月8日放送「ラジオ深夜便『アンカー朗読シリーズ』」より)', | |
404 | 'channel': 'NHKラジオ第1、NHK-FM', | |
405 | 'timestamp': 1635757200, | |
406 | 'thumbnail': 'https://www.nhk.or.jp/radioondemand/json/F300/img/corner/box_109_thumbnail.jpg', | |
407 | 'release_date': '20161207', | |
408 | 'series': 'らじる文庫 by ラジオ深夜便 ', | |
409 | 'release_timestamp': 1481126700, | |
410 | 'upload_date': '20211101', | |
411 | } | |
412 | }, { | |
413 | # news | |
414 | 'url': 'https://www.nhk.or.jp/radio/player/ondemand.html?p=F261_01_3855109', | |
415 | 'skip': 'Expires on 2023-04-17', | |
416 | 'info_dict': { | |
417 | 'id': 'F261_01_3855109', | |
418 | 'ext': 'm4a', | |
419 | 'channel': 'NHKラジオ第1', | |
420 | 'timestamp': 1681635900, | |
421 | 'release_date': '20230416', | |
422 | 'series': 'NHKラジオニュース', | |
423 | 'title': '午後6時のNHKニュース', | |
424 | 'thumbnail': 'https://www.nhk.or.jp/radioondemand/json/F261/img/RADIONEWS_640.jpg', | |
425 | 'upload_date': '20230416', | |
426 | 'release_timestamp': 1681635600, | |
427 | }, | |
428 | }] | |
429 | ||
430 | def _extract_episode_info(self, headline, programme_id, series_meta): | |
431 | episode_id = f'{programme_id}_{headline["headline_id"]}' | |
432 | episode = traverse_obj(headline, ('file_list', 0, {dict})) | |
433 | ||
434 | return { | |
435 | **series_meta, | |
436 | 'id': episode_id, | |
437 | 'formats': self._extract_m3u8_formats(episode.get('file_name'), episode_id, fatal=False), | |
438 | 'container': 'm4a_dash', # force fixup, AAC-only HLS | |
439 | 'was_live': True, | |
440 | 'series': series_meta.get('title'), | |
441 | 'thumbnail': url_or_none(headline.get('headline_image')) or series_meta.get('thumbnail'), | |
442 | **traverse_obj(episode, { | |
443 | 'title': 'file_title', | |
444 | 'description': 'file_title_sub', | |
445 | 'timestamp': ('open_time', {unified_timestamp}), | |
446 | 'release_timestamp': ('aa_vinfo4', {lambda x: x.split('_')[0]}, {unified_timestamp}), | |
447 | }), | |
448 | } | |
449 | ||
450 | def _real_extract(self, url): | |
451 | site_id, corner_id, headline_id = self._match_valid_url(url).group('site', 'corner', 'headline') | |
452 | programme_id = f'{site_id}_{corner_id}' | |
453 | ||
454 | if site_id == 'F261': | |
455 | json_url = 'https://www.nhk.or.jp/s-media/news/news-site/list/v1/all.json' | |
456 | else: | |
457 | json_url = f'https://www.nhk.or.jp/radioondemand/json/{site_id}/bangumi_{programme_id}.json' | |
458 | ||
459 | meta = self._download_json(json_url, programme_id)['main'] | |
460 | ||
461 | series_meta = traverse_obj(meta, { | |
462 | 'title': 'program_name', | |
463 | 'channel': 'media_name', | |
464 | 'thumbnail': (('thumbnail_c', 'thumbnail_p'), {url_or_none}), | |
465 | }, get_all=False) | |
466 | ||
467 | if headline_id: | |
468 | return self._extract_episode_info( | |
469 | traverse_obj(meta, ( | |
470 | 'detail_list', lambda _, v: v['headline_id'] == headline_id), get_all=False), | |
471 | programme_id, series_meta) | |
472 | ||
473 | def entries(): | |
474 | for headline in traverse_obj(meta, ('detail_list', ..., {dict})): | |
475 | yield self._extract_episode_info(headline, programme_id, series_meta) | |
476 | ||
477 | return self.playlist_result( | |
478 | entries(), programme_id, playlist_description=meta.get('site_detail'), **series_meta) | |
479 | ||
480 | ||
481 | class NhkRadioNewsPageIE(InfoExtractor): | |
482 | _VALID_URL = r'https?://www\.nhk\.or\.jp/radionews/?(?:$|[?#])' | |
483 | _TESTS = [{ | |
484 | # airs daily, on-the-hour most hours | |
485 | 'url': 'https://www.nhk.or.jp/radionews/', | |
486 | 'playlist_mincount': 5, | |
487 | 'info_dict': { | |
488 | 'id': 'F261_01', | |
489 | 'thumbnail': 'https://www.nhk.or.jp/radioondemand/json/F261/img/RADIONEWS_640.jpg', | |
490 | 'description': 'md5:bf2c5b397e44bc7eb26de98d8f15d79d', | |
491 | 'channel': 'NHKラジオ第1', | |
492 | 'title': 'NHKラジオニュース', | |
493 | } | |
494 | }] | |
495 | ||
496 | def _real_extract(self, url): | |
497 | return self.url_result('https://www.nhk.or.jp/radio/ondemand/detail.html?p=F261_01', NhkRadiruIE) | |
81c8b9bd | 498 | |
499 | ||
500 | class NhkRadiruLiveIE(InfoExtractor): | |
501 | _GEO_COUNTRIES = ['JP'] | |
502 | _VALID_URL = r'https?://www\.nhk\.or\.jp/radio/player/\?ch=(?P<id>r[12]|fm)' | |
503 | _TESTS = [{ | |
504 | # radio 1, no area specified | |
505 | 'url': 'https://www.nhk.or.jp/radio/player/?ch=r1', | |
506 | 'info_dict': { | |
507 | 'id': 'r1-tokyo', | |
508 | 'title': 're:^NHKネットラジオ第1 東京.+$', | |
509 | 'ext': 'm4a', | |
510 | 'thumbnail': 'https://www.nhk.or.jp/common/img/media/r1-200x200.png', | |
511 | 'live_status': 'is_live', | |
512 | }, | |
513 | }, { | |
514 | # radio 2, area specified | |
515 | # (the area doesnt actually matter, r2 is national) | |
516 | 'url': 'https://www.nhk.or.jp/radio/player/?ch=r2', | |
517 | 'params': {'extractor_args': {'nhkradirulive': {'area': ['fukuoka']}}}, | |
518 | 'info_dict': { | |
519 | 'id': 'r2-fukuoka', | |
520 | 'title': 're:^NHKネットラジオ第2 福岡.+$', | |
521 | 'ext': 'm4a', | |
522 | 'thumbnail': 'https://www.nhk.or.jp/common/img/media/r2-200x200.png', | |
523 | 'live_status': 'is_live', | |
524 | }, | |
525 | }, { | |
526 | # fm, area specified | |
527 | 'url': 'https://www.nhk.or.jp/radio/player/?ch=fm', | |
528 | 'params': {'extractor_args': {'nhkradirulive': {'area': ['sapporo']}}}, | |
529 | 'info_dict': { | |
530 | 'id': 'fm-sapporo', | |
531 | 'title': 're:^NHKネットラジオFM 札幌.+$', | |
532 | 'ext': 'm4a', | |
533 | 'thumbnail': 'https://www.nhk.or.jp/common/img/media/fm-200x200.png', | |
534 | 'live_status': 'is_live', | |
535 | } | |
536 | }] | |
537 | ||
538 | _NOA_STATION_IDS = {'r1': 'n1', 'r2': 'n2', 'fm': 'n3'} | |
539 | ||
540 | def _real_extract(self, url): | |
541 | station = self._match_id(url) | |
542 | area = self._configuration_arg('area', ['tokyo'])[0] | |
543 | ||
544 | config = self._download_xml( | |
545 | 'https://www.nhk.or.jp/radio/config/config_web.xml', station, 'Downloading area information') | |
546 | data = config.find(f'.//data//area[.="{area}"]/..') | |
547 | ||
548 | if not data: | |
549 | raise ExtractorError('Invalid area. Valid areas are: %s' % ', '.join( | |
550 | [i.text for i in config.findall('.//data//area')]), expected=True) | |
551 | ||
552 | noa_info = self._download_json( | |
553 | f'https:{config.find(".//url_program_noa").text}'.format(area=data.find('areakey').text), | |
554 | station, note=f'Downloading {area} station metadata') | |
555 | present_info = traverse_obj(noa_info, ('nowonair_list', self._NOA_STATION_IDS.get(station), 'present')) | |
556 | ||
557 | return { | |
558 | 'title': ' '.join(traverse_obj(present_info, (('service', 'area',), 'name', {str}))), | |
559 | 'id': join_nonempty(station, area), | |
560 | 'thumbnails': traverse_obj(present_info, ('service', 'images', ..., { | |
561 | 'url': 'url', | |
562 | 'width': ('width', {int_or_none}), | |
563 | 'height': ('height', {int_or_none}), | |
564 | })), | |
565 | 'formats': self._extract_m3u8_formats(data.find(f'{station}hls').text, station), | |
566 | 'is_live': True, | |
567 | } |