3 from .common
import InfoExtractor
22 class NhkBaseIE(InfoExtractor
):
23 _API_URL_TEMPLATE
= 'https://nwapi.nhk.jp/nhkworld/%sod%slist/v7b/%s/%s/%s/all%s.json'
24 _BASE_URL_REGEX
= r
'https?://www3\.nhk\.or\.jp/nhkworld/(?P<lang>[a-z]{2})/'
26 def _call_api(self
, m_id
, lang
, is_video
, is_episode
, is_clip
):
27 return self
._download
_json
(
28 self
._API
_URL
_TEMPLATE
% (
29 'v' if is_video
else 'r',
30 'clip' if is_clip
else 'esd',
31 'episode' if is_episode
else 'program',
32 m_id
, lang
, '/all' if is_video
else ''),
33 m_id
, query
={'apikey': 'EJfK8jdS57GqlupFgAfAAwr573q01y6k'}
)['data']['episodes'] or []
35 def _get_api_info(self
, refresh
=True):
37 return self
.cache
.load('nhk', 'api_info')
39 self
.cache
.store('nhk', 'api_info', {})
40 movie_player_js
= self
._download
_webpage
(
41 'https://movie-a.nhk.or.jp/world/player/js/movie-player.js', None,
42 note
='Downloading stream API information')
44 'url': self
._search
_regex
(
45 r
'prod:[^;]+\bapiUrl:\s*[\'"]([^\'"]+)[\'"]', movie_player_js, None, 'stream API url'),
46 'token': self._search_regex(
47 r'prod:[^;]+\btoken:\s*[\'"]([^
\'"]+)[\'"]', movie_player_js, None, 'stream API token
'),
49 self.cache.store('nhk
', 'api_info
', api_info)
52 def _extract_stream_info(self, vod_id):
53 for refresh in (False, True):
54 api_info = self._get_api_info(refresh)
58 api_url = api_info.pop('url
')
61 api_url, vod_id, 'Downloading stream url info
', fatal=False, query={
64 'optional_id
': vod_id,
67 stream_url = traverse_obj(
68 meta, ('movie_url
', ('mb_auto
', 'auto_sp
', 'auto_pc
'), {url_or_none}), get_all=False)
71 formats, subtitles = self._extract_m3u8_formats_and_subtitles(stream_url, vod_id)
73 **traverse_obj(meta, {
74 'duration
': ('duration
', {int_or_none}),
75 'timestamp
': ('publication_date
', {unified_timestamp}),
76 'release_timestamp
': ('insert_date
', {unified_timestamp}),
77 'modified_timestamp
': ('update_date
', {unified_timestamp}),
80 'subtitles
': subtitles,
82 raise ExtractorError('Unable to extract stream url
')
84 def _extract_episode_info(self, url, episode=None):
85 fetch_episode = episode is None
86 lang, m_type, episode_id = NhkVodIE._match_valid_url(url).group('lang
', 'type', 'id')
87 is_video = m_type != 'audio
'
90 episode_id = episode_id[:4] + '-' + episode_id[4:]
93 episode = self._call_api(
94 episode_id, lang, is_video, True, episode_id[:4] == '9999')[0]
96 def get_clean_field(key):
97 return clean_html(episode.get(key + '_clean
') or episode.get(key))
99 title = get_clean_field('sub_title
')
100 series = get_clean_field('title
')
103 for s, w, h in [('', 640, 360), ('_l
', 1280, 720)]:
104 img_path = episode.get('image
' + s)
111 'url
': 'https
://www3
.nhk
.or.jp
' + img_path,
116 title = f'{series}
- {title}
'
117 elif series and not title:
121 else: # title, no series
125 'id': episode_id + '-' + lang,
127 'description
': get_clean_field('description
'),
128 'thumbnails
': thumbnails,
130 'episode
': episode_name,
134 vod_id = episode['vod_id
']
136 **self._extract_stream_info(vod_id),
142 # From https://www3.nhk.or.jp/nhkworld/common/player/radio/inline/rod.html
143 audio_path = remove_end(episode['audio
']['audio
'], '.m4a
')
144 info['formats
'] = self._extract_m3u8_formats(
145 f'{urljoin("https://vod-stream.nhk.jp", audio_path)}
/index
.m3u8
',
146 episode_id, 'm4a
', entry_protocol='m3u8_native
',
147 m3u8_id='hls
', fatal=False)
148 for f in info['formats
']:
152 '_type
': 'url_transparent
',
153 'ie_key
': NhkVodIE.ie_key(),
159 class NhkVodIE(NhkBaseIE):
161 rf'{NhkBaseIE._BASE_URL_REGEX}shows
/(?
:(?P
<type>video
)/)?
(?P
<id>\d{{4}
}[\da
-z
]\d
+)/?
(?
:$|
[?
#])',
162 rf
'{NhkBaseIE._BASE_URL_REGEX}(?:ondemand|shows)/(?P<type>audio)/(?P<id>[^/?#]+?-\d{{8}}-[\da-z]+)',
163 rf
'{NhkBaseIE._BASE_URL_REGEX}ondemand/(?P<type>video)/(?P<id>\d{{4}}[\da-z]\d+)', # deprecated
165 # Content available only for a limited period of time. Visit
166 # https://www3.nhk.or.jp/nhkworld/en/ondemand/ for working samples.
168 'url': 'https://www3.nhk.or.jp/nhkworld/en/ondemand/video/2049126/',
170 'id': 'nw_vod_v_en_2049_126_20230413233000_01_1681398302',
172 'title': 'Japan Railway Journal - The Tohoku Shinkansen: Full Speed Ahead',
173 'description': 'md5:49f7c5b206e03868a2fdf0d0814b92f6',
174 'thumbnail': r
're:https://.+/.+\.jpg',
175 'episode': 'The Tohoku Shinkansen: Full Speed Ahead',
176 'series': 'Japan Railway Journal',
177 'modified_timestamp': 1707217907,
178 'timestamp': 1681428600,
179 'release_timestamp': 1693883728,
181 'upload_date': '20230413',
182 'modified_date': '20240206',
183 'release_date': '20230905',
187 'url': 'https://www3.nhk.or.jp/nhkworld/en/ondemand/video/9999011/',
188 'md5': '153c3016dfd252ba09726588149cf0e7',
190 'id': 'lpZXIwaDE6_Z-976CPsFdxyICyWUzlT5',
192 'title': 'Dining with the Chef - Chef Saito\'s Family recipe: MENCHI-KATSU',
193 'description': 'md5:5aee4a9f9d81c26281862382103b0ea5',
194 'thumbnail': r
're:https://.+/.+\.jpg',
195 'series': 'Dining with the Chef',
196 'episode': 'Chef Saito\'s Family recipe: MENCHI-KATSU',
198 'upload_date': '20190816',
199 'release_date': '20230902',
200 'release_timestamp': 1693619292,
201 'modified_timestamp': 1707217907,
202 'modified_date': '20240206',
203 'timestamp': 1565997540,
207 'url': 'https://www3.nhk.or.jp/nhkworld/en/ondemand/audio/livinginjapan-20231001-1/',
209 'id': 'livinginjapan-20231001-1-en',
211 'title': 'Living in Japan - Tips for Travelers to Japan / Ramen Vending Machines',
212 'series': 'Living in Japan',
213 'description': 'md5:0a0e2077d8f07a03071e990a6f51bfab',
214 'thumbnail': r
're:https://.+/.+\.jpg',
215 'episode': 'Tips for Travelers to Japan / Ramen Vending Machines',
218 'url': 'https://www3.nhk.or.jp/nhkworld/en/ondemand/video/2015173/',
219 'only_matching': True,
221 'url': 'https://www3.nhk.or.jp/nhkworld/en/ondemand/audio/plugin-20190404-1/',
222 'only_matching': True,
224 'url': 'https://www3.nhk.or.jp/nhkworld/fr/ondemand/audio/plugin-20190404-1/',
225 'only_matching': True,
227 'url': 'https://www3.nhk.or.jp/nhkworld/en/ondemand/audio/j_art-20150903-1/',
228 'only_matching': True,
230 # video, alphabetic character in ID #29670
231 'url': 'https://www3.nhk.or.jp/nhkworld/en/ondemand/video/9999a34/',
235 'title': 'DESIGN TALKS plus - Fishermen’s Finery',
236 'description': 'md5:8a8f958aaafb0d7cb59d38de53f1e448',
237 'thumbnail': r
're:^https?:/(/[a-z0-9.-]+)+\.jpg\?w=1920&h=1080$',
238 'upload_date': '20210615',
239 'timestamp': 1623722008,
241 'skip': '404 Not Found',
243 # japanese-language, longer id than english
244 'url': 'https://www3.nhk.or.jp/nhkworld/ja/ondemand/video/0020271111/',
246 'id': 'nw_ja_v_jvod_ohayou_20231008',
248 'title': 'おはよう日本(7時台) - 10月8日放送',
249 'series': 'おはよう日本(7時台)',
250 'episode': '10月8日放送',
251 'thumbnail': r
're:https://.+/.+\.jpg',
252 'description': 'md5:9c1d6cbeadb827b955b20e99ab920ff0',
254 'skip': 'expires 2023-10-15',
256 # a one-off (single-episode series). title from the api is just '<p></p>'
257 'url': 'https://www3.nhk.or.jp/nhkworld/en/ondemand/video/3004952/',
259 'id': 'nw_vod_v_en_3004_952_20230723091000_01_1690074552',
261 'title': 'Barakan Discovers - AMAMI OSHIMA: Isson\'s Treasure Isla',
262 'description': 'md5:5db620c46a0698451cc59add8816b797',
263 'thumbnail': r
're:https://.+/.+\.jpg',
264 'release_date': '20230905',
265 'timestamp': 1690103400,
267 'release_timestamp': 1693898699,
268 'upload_date': '20230723',
269 'modified_timestamp': 1707217907,
270 'modified_date': '20240206',
271 'episode': 'AMAMI OSHIMA: Isson\'s Treasure Isla',
272 'series': 'Barakan Discovers',
275 # /ondemand/video/ url with alphabetical character in 5th position of id
276 'url': 'https://www3.nhk.or.jp/nhkworld/en/ondemand/video/9999a07/',
278 'id': 'nw_c_en_9999-a07',
280 'episode': 'Mini-Dramas on SDGs: Ep 1 Close the Gender Gap [Director\'s Cut]',
281 'series': 'Mini-Dramas on SDGs',
282 'modified_date': '20240206',
283 'title': 'Mini-Dramas on SDGs - Mini-Dramas on SDGs: Ep 1 Close the Gender Gap [Director\'s Cut]',
284 'description': 'md5:3f9dcb4db22fceb675d90448a040d3f6',
285 'timestamp': 1621962360,
287 'release_date': '20230903',
288 'modified_timestamp': 1707217907,
289 'upload_date': '20210525',
290 'thumbnail': r
're:https://.+/.+\.jpg',
291 'release_timestamp': 1693713487,
294 'url': 'https://www3.nhk.or.jp/nhkworld/en/ondemand/video/9999d17/',
296 'id': 'nw_c_en_9999-d17',
298 'title': 'Flowers of snow blossom - The 72 Pentads of Yamato',
299 'description': 'Today’s focus: Snow',
300 'release_timestamp': 1693792402,
301 'release_date': '20230904',
302 'upload_date': '20220128',
303 'timestamp': 1643370960,
304 'thumbnail': r
're:https://.+/.+\.jpg',
307 'modified_date': '20240206',
308 'modified_timestamp': 1707217907,
311 # new /shows/ url format
312 'url': 'https://www3.nhk.or.jp/nhkworld/en/shows/2032307/',
314 'id': 'nw_vod_v_en_2032_307_20240321113000_01_1710990282',
316 'title': 'Japanology Plus - 20th Anniversary Special Part 1',
317 'description': 'md5:817d41fc8e54339ad2a916161ea24faf',
318 'episode': '20th Anniversary Special Part 1',
319 'series': 'Japanology Plus',
320 'thumbnail': r
're:https://.+/.+\.jpg',
322 'timestamp': 1711020600,
323 'upload_date': '20240321',
324 'release_timestamp': 1711022683,
325 'release_date': '20240321',
326 'modified_timestamp': 1711031012,
327 'modified_date': '20240321',
330 'url': 'https://www3.nhk.or.jp/nhkworld/en/shows/3020025/',
332 'id': 'nw_vod_v_en_3020_025_20230325144000_01_1679723944',
334 'title': '100 Ideas to Save the World - Working Styles Evolve',
335 'description': 'md5:9e6c7778eaaf4f7b4af83569649f84d9',
336 'episode': 'Working Styles Evolve',
337 'series': '100 Ideas to Save the World',
338 'thumbnail': r
're:https://.+/.+\.jpg',
340 'upload_date': '20230325',
341 'timestamp': 1679755200,
342 'release_date': '20230905',
343 'release_timestamp': 1693880540,
344 'modified_date': '20240206',
345 'modified_timestamp': 1707217907,
348 # new /shows/audio/ url format
349 'url': 'https://www3.nhk.or.jp/nhkworld/en/shows/audio/livinginjapan-20231001-1/',
350 'only_matching': True,
352 # valid url even if can't be found in wild; support needed for clip entries extraction
353 'url': 'https://www3.nhk.or.jp/nhkworld/en/shows/9999o80/',
354 'only_matching': True,
357 def _real_extract(self
, url
):
358 return self
._extract
_episode
_info
(url
)
361 class NhkVodProgramIE(NhkBaseIE
):
362 _VALID_URL
= rf
'''(?x)
363 {NhkBaseIE._BASE_URL_REGEX}(?:shows|tv)/
364 (?:(?P<type>audio)/programs/)?(?P<id>\w+)/?
365 (?:\?(?:[^#]+&)?type=(?P<episode_type>clip|(?:radio|tv)Episode))?'''
367 # video program episodes
368 'url': 'https://www3.nhk.or.jp/nhkworld/en/shows/sumo/',
371 'title': 'GRAND SUMO Highlights',
372 'description': 'md5:fc20d02dc6ce85e4b72e0273aa52fdbf',
374 'playlist_mincount': 1,
376 'url': 'https://www3.nhk.or.jp/nhkworld/en/shows/japanrailway/',
378 'id': 'japanrailway',
379 'title': 'Japan Railway Journal',
380 'description': 'md5:ea39d93af7d05835baadf10d1aae0e3f',
382 'playlist_mincount': 12,
384 # video program clips
385 'url': 'https://www3.nhk.or.jp/nhkworld/en/shows/japanrailway/?type=clip',
387 'id': 'japanrailway',
388 'title': 'Japan Railway Journal',
389 'description': 'md5:ea39d93af7d05835baadf10d1aae0e3f',
391 'playlist_mincount': 12,
394 'url': 'https://www3.nhk.or.jp/nhkworld/en/shows/audio/programs/livinginjapan/',
396 'id': 'livinginjapan',
397 'title': 'Living in Japan',
398 'description': 'md5:665bb36ec2a12c5a7f598ee713fc2b54',
400 'playlist_mincount': 12,
403 'url': 'https://www3.nhk.or.jp/nhkworld/en/tv/designtalksplus/',
405 'id': 'designtalksplus',
406 'title': 'DESIGN TALKS plus',
407 'description': 'md5:47b3b3a9f10d4ac7b33b53b70a7d2837',
409 'playlist_mincount': 20,
411 'url': 'https://www3.nhk.or.jp/nhkworld/en/shows/10yearshayaomiyazaki/',
412 'only_matching': True,
416 def suitable(cls
, url
):
417 return False if NhkVodIE
.suitable(url
) else super().suitable(url
)
419 def _extract_meta_from_class_elements(self
, class_values
, html
):
420 for class_value
in class_values
:
421 if value
:= clean_html(get_element_by_class(class_value
, html
)):
424 def _real_extract(self
, url
):
425 lang
, m_type
, program_id
, episode_type
= self
._match
_valid
_url
(url
).group('lang', 'type', 'id', 'episode_type')
426 episodes
= self
._call
_api
(
427 program_id
, lang
, m_type
!= 'audio', False, episode_type
== 'clip')
430 for episode
in episodes
:
431 if episode_path
:= episode
.get('url'):
432 yield self
._extract
_episode
_info
(urljoin(url
, episode_path
), episode
)
434 html
= self
._download
_webpage
(url
, program_id
)
435 program_title
= self
._extract
_meta
_from
_class
_elements
([
436 'p-programDetail__title', # /ondemand/program/
437 'pProgramHero__logoText', # /shows/
438 'tAudioProgramMain__title', # /shows/audio/programs/
439 'p-program-name'], html
) # /tv/
440 program_description
= self
._extract
_meta
_from
_class
_elements
([
441 'p-programDetail__text', # /ondemand/program/
442 'pProgramHero__description', # /shows/
443 'tAudioProgramMain__info', # /shows/audio/programs/
444 'p-program-description'], html
) # /tv/
446 return self
.playlist_result(entries(), program_id
, program_title
, program_description
)
449 class NhkForSchoolBangumiIE(InfoExtractor
):
450 _VALID_URL
= r
'https?://www2\.nhk\.or\.jp/school/movie/(?P<type>bangumi|clip)\.cgi\?das_id=(?P<id>[a-zA-Z0-9_-]+)'
452 'url': 'https://www2.nhk.or.jp/school/movie/bangumi.cgi?das_id=D0005150191_00000',
454 'id': 'D0005150191_00003',
457 'timestamp': 1396414800,
459 'upload_date': '20140402',
462 'chapters': 'count:12',
466 'skip_download': True,
470 def _real_extract(self
, url
):
471 program_type
, video_id
= self
._match
_valid
_url
(url
).groups()
473 webpage
= self
._download
_webpage
(
474 f
'https://www2.nhk.or.jp/school/movie/{program_type}.cgi?das_id={video_id}', video_id
)
476 # searches all variables
477 base_values
= {g.group(1): g.group(2) for g in re.finditer(r'var\s+([a-zA-Z_]+)\s*=\s*"([^"]+?)";', webpage)}
478 # and programObj values too
479 program_values
= {g.group(1): g.group(3) for g in re.finditer(r'(?:program|clip)Obj\.([a-zA-Z_]+)\s*=\s*(["\'])([^"]+?)\2;', webpage)}
480 # extract all chapters
481 chapter_durations
= [parse_duration(g
.group(1)) for g
in re
.finditer(r
'chapterTime\.push\(\'([0-9:]+?
)\'\
);', webpage)]
482 chapter_titles = [' '.join([g.group(1) or '', unescapeHTML(g.group(2))]).strip() for g in re.finditer(r'<div
class="cpTitle"><span
>(scene\s
*\d
+)?
</span
>([^
<]+?
)</div
>', webpage)]
484 # this is how player_core.js is actually doing (!)
485 version = base_values.get('r_version
') or program_values.get('version
')
487 video_id = f'{video_id.split("_")[0]}_{version}
'
489 formats = self._extract_m3u8_formats(
490 f'https
://nhks
-vh
.akamaihd
.net
/i
/das
/{video_id[0:8]}
/{video_id}_V_000
.f4v
/master
.m3u8
',
491 video_id, ext='mp4
', m3u8_id='hls
')
493 duration = parse_duration(base_values.get('r_duration
'))
496 if chapter_durations and chapter_titles and len(chapter_durations) == len(chapter_titles):
497 start_time = chapter_durations
498 end_time = chapter_durations[1:] + [duration]
503 } for s, e, t in zip(start_time, end_time, chapter_titles)]
507 'title
': program_values.get('name
'),
508 'duration
': parse_duration(base_values.get('r_duration
')),
509 'timestamp
': unified_timestamp(base_values['r_upload
']),
511 'chapters
': chapters,
515 class NhkForSchoolSubjectIE(InfoExtractor):
516 IE_DESC = 'Portal page
for each school subjects
, like
Japanese (kokugo
, 国語
) or math (sansuu
/suugaku
or 算数・数学
)'
518 'rika
', 'syakai
', 'kokugo
',
519 'sansuu
', 'seikatsu
', 'doutoku
',
520 'ongaku
', 'taiiku
', 'zukou
',
521 'gijutsu
', 'katei
', 'sougou
',
525 _VALID_URL = r'https?
://www\
.nhk\
.or\
.jp
/school
/(?P
<id>{})/?
(?
:[\?#].*)?$'.format(
526 '|'.join(re
.escape(s
) for s
in KNOWN_SUBJECTS
))
529 'url': 'https://www.nhk.or.jp/school/sougou/',
532 'title': '総合的な学習の時間',
534 'playlist_mincount': 16,
536 'url': 'https://www.nhk.or.jp/school/rika/',
541 'playlist_mincount': 15,
544 def _real_extract(self
, url
):
545 subject_id
= self
._match
_id
(url
)
546 webpage
= self
._download
_webpage
(url
, subject_id
)
548 return self
.playlist_from_matches(
549 re
.finditer(rf
'href="((?:https?://www\.nhk\.or\.jp)?/school/{re.escape(subject_id)}/[^/]+/)"', webpage
),
551 self
._html
_search
_regex
(r
'(?s)<span\s+class="subjectName">\s*<img\s*[^<]+>\s*([^<]+?)</span>', webpage
, 'title', fatal
=False),
552 lambda g
: urljoin(url
, g
.group(1)))
555 class NhkForSchoolProgramListIE(InfoExtractor
):
556 _VALID_URL
= r
'https?://www\.nhk\.or\.jp/school/(?P<id>(?:{})/[a-zA-Z0-9_-]+)'.format(
557 '|'.join(re
.escape(s
) for s
in NhkForSchoolSubjectIE
.KNOWN_SUBJECTS
))
559 'url': 'https://www.nhk.or.jp/school/sougou/q/',
562 'title': 'Q~こどものための哲学',
564 'playlist_mincount': 20,
567 def _real_extract(self
, url
):
568 program_id
= self
._match
_id
(url
)
570 webpage
= self
._download
_webpage
(f
'https://www.nhk.or.jp/school/{program_id}/', program_id
)
572 title
= (self
._generic
_title
('', webpage
)
573 or self
._html
_search
_regex
(r
'<h3>([^<]+?)とは?\s*</h3>', webpage
, 'title', fatal
=False))
574 title
= re
.sub(r
'\s*\|\s*NHK\s+for\s+School\s*$', '', title
) if title
else None
575 description
= self
._html
_search
_regex
(
576 r
'(?s)<div\s+class="programDetail\s*">\s*<p>[^<]+</p>',
577 webpage
, 'description', fatal
=False, group
=0)
579 bangumi_list
= self
._download
_json
(
580 f
'https://www.nhk.or.jp/school/{program_id}/meta/program.json', program_id
)
581 # they're always bangumi
583 self
.url_result(f
'https://www2.nhk.or.jp/school/movie/bangumi.cgi?das_id={x}')
584 for x
in traverse_obj(bangumi_list
, ('part', ..., 'part-video-dasid')) or []]
586 return self
.playlist_result(bangumis
, program_id
, title
, description
)
589 class NhkRadiruIE(InfoExtractor
):
590 _GEO_COUNTRIES
= ['JP']
591 IE_DESC
= 'NHK らじる (Radiru/Rajiru)'
592 _VALID_URL
= r
'https?://www\.nhk\.or\.jp/radio/(?:player/ondemand|ondemand/detail)\.html\?p=(?P<site>[\da-zA-Z]+)_(?P<corner>[\da-zA-Z]+)(?:_(?P<headline>[\da-zA-Z]+))?'
594 'url': 'https://www.nhk.or.jp/radio/player/ondemand.html?p=0449_01_4003239',
595 'skip': 'Episode expired on 2024-06-09',
597 'title': 'ジャズ・トゥナイト ジャズ「Night and Day」特集',
598 'id': '0449_01_4003239',
600 'uploader': 'NHK FM 東京',
601 'description': 'md5:ad05f3c3f3f6e99b2e69f9b5e49551dc',
602 'series': 'ジャズ・トゥナイト',
603 'channel': 'NHK FM 東京',
604 'thumbnail': 'https://www.nhk.or.jp/prog/img/449/g449.jpg',
605 'upload_date': '20240601',
606 'series_id': '0449_01',
607 'release_date': '20240601',
608 'timestamp': 1717257600,
609 'release_timestamp': 1717250400,
612 # playlist, airs every weekday so it should _hopefully_ be okay forever
613 'url': 'https://www.nhk.or.jp/radio/ondemand/detail.html?p=0458_01',
616 'title': 'ベストオブクラシック',
617 'description': '世界中の上質な演奏会をじっくり堪能する本格派クラシック番組。',
618 'thumbnail': 'https://www.nhk.or.jp/prog/img/458/g458.jpg',
619 'series_id': '0458_01',
620 'uploader': 'NHK FM',
622 'series': 'ベストオブクラシック',
624 'playlist_mincount': 3,
626 # one with letters in the id
627 'url': 'https://www.nhk.or.jp/radio/player/ondemand.html?p=F683_01_3910688',
628 'note': 'Expires on 2025-03-31',
630 'id': 'F683_01_3910688',
632 'title': '夏目漱石「文鳥」第1回',
633 'series': '【らじる文庫】夏目漱石「文鳥」(全4回)',
634 'series_id': 'F683_01',
635 'description': '朗読:浅井理アナウンサー',
636 'thumbnail': 'https://www.nhk.or.jp/radioondemand/json/F683/img/roudoku_05_rod_640.jpg',
637 'upload_date': '20240106',
638 'release_date': '20240106',
639 'uploader': 'NHK R1',
640 'release_timestamp': 1704511800,
642 'timestamp': 1704512700,
644 'expected_warnings': ['Unable to download JSON metadata',
645 'Failed to get extended metadata. API returned Error 1: Invalid parameters'],
648 'url': 'https://www.nhk.or.jp/radio/player/ondemand.html?p=F261_01_4012173',
650 'id': 'F261_01_4012173',
652 'channel': 'NHKラジオ第1',
653 'uploader': 'NHKラジオ第1',
654 'series': 'NHKラジオニュース',
655 'title': '午前0時のNHKニュース',
656 'thumbnail': 'https://www.nhk.or.jp/radioondemand/json/F261/img/RADIONEWS_640.jpg',
657 'release_timestamp': 1718290800,
658 'release_date': '20240613',
659 'timestamp': 1718291400,
660 'upload_date': '20240613',
663 # fallback when extended metadata fails
664 'url': 'https://www.nhk.or.jp/radio/player/ondemand.html?p=2834_01_4009298',
665 'skip': 'Expires on 2024-06-07',
667 'id': '2834_01_4009298',
668 'title': 'まち☆キラ!開成町特集',
670 'release_date': '20240531',
671 'upload_date': '20240531',
673 'thumbnail': 'https://www.nhk.or.jp/prog/img/2834/g2834.jpg',
674 'channel': 'NHK R1,FM',
676 'timestamp': 1717123800,
677 'uploader': 'NHK R1,FM',
678 'release_timestamp': 1717120800,
679 'series_id': '2834_01',
681 'expected_warnings': ['Failed to get extended metadata. API returned empty list.'],
686 def _extract_extended_metadata(self
, episode_id
, aa_vinfo
):
687 service
, _
, area
= traverse_obj(aa_vinfo
, (2, {str}
, {lambda x: (x or '').partition(',')}
))
688 detail_url
= try_call(
689 lambda: self
._API
_URL
_TMPL
.format(area
=area
, service
=service
, dateid
=aa_vinfo
[3]))
693 response
= self
._download
_json
(
694 detail_url
, episode_id
, 'Downloading extended metadata',
695 'Failed to download extended metadata', fatal
=False, expected_status
=400)
699 if error
:= traverse_obj(response
, ('error', {dict}
)):
701 'Failed to get extended metadata. API returned '
702 f
'Error {join_nonempty("code", "message", from_dict=error, delim=": ")}')
705 full_meta
= traverse_obj(response
, ('list', service
, 0, {dict}
))
707 self
.report_warning('Failed to get extended metadata. API returned empty list.')
710 station
= ' '.join(traverse_obj(full_meta
, (('service', 'area'), 'name', {str}
))) or None
713 'preference': 1 if id_
.startswith('thumbnail') else -2 if id_
.startswith('logo') else -1,
714 **traverse_obj(thumb
, {
716 'width': ('width', {int_or_none}
),
717 'height': ('height', {int_or_none}
),
719 } for id_
, thumb
in traverse_obj(full_meta
, ('images', {dict.items}
, lambda _
, v
: v
[1]['url']))]
724 'description': join_nonempty(
725 'subtitle', 'content', 'act', 'music', delim
='\n\n', from_dict
=full_meta
),
726 'thumbnails': thumbnails
,
727 **traverse_obj(full_meta
, {
728 'title': ('title', {str}
),
729 'timestamp': ('end_time', {unified_timestamp}
),
730 'release_timestamp': ('start_time', {unified_timestamp}
),
734 def _extract_episode_info(self
, episode
, programme_id
, series_meta
):
735 episode_id
= f
'{programme_id}_{episode["id"]}'
736 aa_vinfo
= traverse_obj(episode
, ('aa_contents_id', {lambda x: x.split(';')}
))
737 extended_metadata
= self
._extract
_extended
_metadata
(episode_id
, aa_vinfo
)
738 fallback_start_time
, _
, fallback_end_time
= traverse_obj(
739 aa_vinfo
, (4, {str}
, {lambda x: (x or '').partition('_')}
))
744 'formats': self
._extract
_m
3u8_formats
(episode
.get('stream_url'), episode_id
, fatal
=False),
745 'container': 'm4a_dash', # force fixup, AAC-only HLS
747 'title': episode
.get('program_title'),
748 'description': episode
.get('program_sub_title'), # fallback
749 'timestamp': unified_timestamp(fallback_end_time
),
750 'release_timestamp': unified_timestamp(fallback_start_time
),
754 def _extract_news_info(self
, headline
, programme_id
, series_meta
):
755 episode_id
= f
'{programme_id}_{headline["headline_id"]}'
756 episode
= traverse_obj(headline
, ('file_list', 0, {dict}
))
761 'formats': self
._extract
_m
3u8_formats
(episode
.get('file_name'), episode_id
, fatal
=False),
762 'container': 'm4a_dash', # force fixup, AAC-only HLS
764 'series': series_meta
.get('title'),
765 'thumbnail': url_or_none(headline
.get('headline_image')) or series_meta
.get('thumbnail'),
766 **traverse_obj(episode
, {
767 'title': ('file_title', {str}
),
768 'description': ('file_title_sub', {str}
),
769 'timestamp': ('open_time', {unified_timestamp}
),
770 'release_timestamp': ('aa_vinfo4', {lambda x: x.split('_')[0]}
, {unified_timestamp}
),
774 def _real_initialize(self
):
775 if self
._API
_URL
_TMPL
:
777 api_config
= self
._download
_xml
(
778 'https://www.nhk.or.jp/radio/config/config_web.xml', None, 'Downloading API config', fatal
=False)
779 NhkRadiruIE
._API
_URL
_TMPL
= try_call(lambda: f
'https:{api_config.find(".//url_program_detail").text}')
781 def _real_extract(self
, url
):
782 site_id
, corner_id
, headline_id
= self
._match
_valid
_url
(url
).group('site', 'corner', 'headline')
783 programme_id
= f
'{site_id}_{corner_id}'
785 if site_id
== 'F261': # XXX: News programmes use old API (for now?)
786 meta
= self
._download
_json
(
787 'https://www.nhk.or.jp/s-media/news/news-site/list/v1/all.json', programme_id
)['main']
788 series_meta
= traverse_obj(meta
, {
789 'title': ('program_name', {str}
),
790 'channel': ('media_name', {str}
),
791 'uploader': ('media_name', {str}
),
792 'thumbnail': (('thumbnail_c', 'thumbnail_p'), {url_or_none}
),
796 headline
= traverse_obj(
797 meta
, ('detail_list', lambda _
, v
: v
['headline_id'] == headline_id
, any
))
799 raise ExtractorError('Content not found; it has most likely expired', expected
=True)
800 return self
._extract
_news
_info
(headline
, programme_id
, series_meta
)
803 for headline
in traverse_obj(meta
, ('detail_list', ..., {dict}
)):
804 yield self
._extract
_news
_info
(headline
, programme_id
, series_meta
)
806 return self
.playlist_result(
807 news_entries(), programme_id
, description
=meta
.get('site_detail'), **series_meta
)
809 meta
= self
._download
_json
(
810 'https://www.nhk.or.jp/radio-api/app/v1/web/ondemand/series', programme_id
, query
={
812 'corner_site_id': corner_id
,
815 fallback_station
= join_nonempty('NHK', traverse_obj(meta
, ('radio_broadcast', {str}
)), delim
=' ')
817 'series': join_nonempty('title', 'corner_name', delim
=' ', from_dict
=meta
),
818 'series_id': programme_id
,
819 'thumbnail': traverse_obj(meta
, ('thumbnail_url', {url_or_none}
)),
820 'channel': fallback_station
,
821 'uploader': fallback_station
,
825 episode
= traverse_obj(meta
, ('episodes', lambda _
, v
: v
['id'] == int(headline_id
), any
))
827 raise ExtractorError('Content not found; it has most likely expired', expected
=True)
828 return self
._extract
_episode
_info
(episode
, programme_id
, series_meta
)
831 for episode
in traverse_obj(meta
, ('episodes', ..., {dict}
)):
832 yield self
._extract
_episode
_info
(episode
, programme_id
, series_meta
)
834 return self
.playlist_result(
835 entries(), programme_id
, title
=series_meta
.get('series'),
836 description
=meta
.get('series_description'), **series_meta
)
839 class NhkRadioNewsPageIE(InfoExtractor
):
840 _VALID_URL
= r
'https?://www\.nhk\.or\.jp/radionews/?(?:$|[?#])'
842 # airs daily, on-the-hour most hours
843 'url': 'https://www.nhk.or.jp/radionews/',
844 'playlist_mincount': 5,
847 'thumbnail': 'https://www.nhk.or.jp/radioondemand/json/F261/img/RADIONEWS_640.jpg',
848 'description': 'md5:bf2c5b397e44bc7eb26de98d8f15d79d',
849 'channel': 'NHKラジオ第1',
850 'uploader': 'NHKラジオ第1',
851 'title': 'NHKラジオニュース',
855 def _real_extract(self
, url
):
856 return self
.url_result('https://www.nhk.or.jp/radio/ondemand/detail.html?p=F261_01', NhkRadiruIE
)
859 class NhkRadiruLiveIE(InfoExtractor
):
860 _GEO_COUNTRIES
= ['JP']
861 _VALID_URL
= r
'https?://www\.nhk\.or\.jp/radio/player/\?ch=(?P<id>r[12]|fm)'
863 # radio 1, no area specified
864 'url': 'https://www.nhk.or.jp/radio/player/?ch=r1',
867 'title': 're:^NHKネットラジオ第1 東京.+$',
869 'thumbnail': 'https://www.nhk.or.jp/common/img/media/r1-200x200.png',
870 'live_status': 'is_live',
873 # radio 2, area specified
874 # (the area doesnt actually matter, r2 is national)
875 'url': 'https://www.nhk.or.jp/radio/player/?ch=r2',
876 'params': {'extractor_args': {'nhkradirulive': {'area': ['fukuoka']}
}},
879 'title': 're:^NHKネットラジオ第2 福岡.+$',
881 'thumbnail': 'https://www.nhk.or.jp/common/img/media/r2-200x200.png',
882 'live_status': 'is_live',
886 'url': 'https://www.nhk.or.jp/radio/player/?ch=fm',
887 'params': {'extractor_args': {'nhkradirulive': {'area': ['sapporo']}
}},
890 'title': 're:^NHKネットラジオFM 札幌.+$',
892 'thumbnail': 'https://www.nhk.or.jp/common/img/media/fm-200x200.png',
893 'live_status': 'is_live',
897 _NOA_STATION_IDS
= {'r1': 'n1', 'r2': 'n2', 'fm': 'n3'}
899 def _real_extract(self
, url
):
900 station
= self
._match
_id
(url
)
901 area
= self
._configuration
_arg
('area', ['tokyo'])[0]
903 config
= self
._download
_xml
(
904 'https://www.nhk.or.jp/radio/config/config_web.xml', station
, 'Downloading area information')
905 data
= config
.find(f
'.//data//area[.="{area}"]/..')
908 raise ExtractorError('Invalid area. Valid areas are: {}'.format(', '.join(
909 [i
.text
for i
in config
.findall('.//data//area')])), expected
=True)
911 noa_info
= self
._download
_json
(
912 f
'https:{config.find(".//url_program_noa").text}'.format(area
=data
.find('areakey').text
),
913 station
, note
=f
'Downloading {area} station metadata', fatal
=False)
914 present_info
= traverse_obj(noa_info
, ('nowonair_list', self
._NOA
_STATION
_IDS
.get(station
), 'present'))
917 'title': ' '.join(traverse_obj(present_info
, (('service', 'area'), 'name', {str}
))),
918 'id': join_nonempty(station
, area
),
919 'thumbnails': traverse_obj(present_info
, ('service', 'images', ..., {
921 'width': ('width', {int_or_none}
),
922 'height': ('height', {int_or_none}
),
924 'formats': self
._extract
_m
3u8_formats
(data
.find(f
'{station}hls').text
, station
),