3 from .common
import InfoExtractor
21 class NhkBaseIE(InfoExtractor
):
22 _API_URL_TEMPLATE
= 'https://nwapi.nhk.jp/nhkworld/%sod%slist/v7b/%s/%s/%s/all%s.json'
23 _BASE_URL_REGEX
= r
'https?://www3\.nhk\.or\.jp/nhkworld/(?P<lang>[a-z]{2})/'
25 def _call_api(self
, m_id
, lang
, is_video
, is_episode
, is_clip
):
26 return self
._download
_json
(
27 self
._API
_URL
_TEMPLATE
% (
28 'v' if is_video
else 'r',
29 'clip' if is_clip
else 'esd',
30 'episode' if is_episode
else 'program',
31 m_id
, lang
, '/all' if is_video
else ''),
32 m_id
, query
={'apikey': 'EJfK8jdS57GqlupFgAfAAwr573q01y6k'}
)['data']['episodes'] or []
34 def _get_api_info(self
, refresh
=True):
36 return self
.cache
.load('nhk', 'api_info')
38 self
.cache
.store('nhk', 'api_info', {})
39 movie_player_js
= self
._download
_webpage
(
40 'https://movie-a.nhk.or.jp/world/player/js/movie-player.js', None,
41 note
='Downloading stream API information')
43 'url': self
._search
_regex
(
44 r
'prod:[^;]+\bapiUrl:\s*[\'"]([^\'"]+)[\'"]', movie_player_js, None, 'stream API url'),
45 'token': self._search_regex(
46 r'prod:[^;]+\btoken:\s*[\'"]([^
\'"]+)[\'"]', movie_player_js, None, 'stream API token
'),
48 self.cache.store('nhk
', 'api_info
', api_info)
51 def _extract_stream_info(self, vod_id):
52 for refresh in (False, True):
53 api_info = self._get_api_info(refresh)
57 api_url = api_info.pop('url
')
60 api_url, vod_id, 'Downloading stream url info
', fatal=False, query={
63 'optional_id
': vod_id,
66 stream_url = traverse_obj(
67 meta, ('movie_url
', ('mb_auto
', 'auto_sp
', 'auto_pc
'), {url_or_none}), get_all=False)
70 formats, subtitles = self._extract_m3u8_formats_and_subtitles(stream_url, vod_id)
72 **traverse_obj(meta, {
73 'duration
': ('duration
', {int_or_none}),
74 'timestamp
': ('publication_date
', {unified_timestamp}),
75 'release_timestamp
': ('insert_date
', {unified_timestamp}),
76 'modified_timestamp
': ('update_date
', {unified_timestamp}),
79 'subtitles
': subtitles,
81 raise ExtractorError('Unable to extract stream url
')
83 def _extract_episode_info(self, url, episode=None):
84 fetch_episode = episode is None
85 lang, m_type, episode_id = NhkVodIE._match_valid_url(url).group('lang
', 'type', 'id')
86 is_video = m_type != 'audio
'
89 episode_id = episode_id[:4] + '-' + episode_id[4:]
92 episode = self._call_api(
93 episode_id, lang, is_video, True, episode_id[:4] == '9999')[0]
95 def get_clean_field(key):
96 return clean_html(episode.get(key + '_clean
') or episode.get(key))
98 title = get_clean_field('sub_title
')
99 series = get_clean_field('title
')
102 for s, w, h in [('', 640, 360), ('_l
', 1280, 720)]:
103 img_path = episode.get('image
' + s)
110 'url
': 'https
://www3
.nhk
.or.jp
' + img_path,
115 title = f'{series}
- {title}
'
116 elif series and not title:
120 else: # title, no series
124 'id': episode_id + '-' + lang,
126 'description
': get_clean_field('description
'),
127 'thumbnails
': thumbnails,
129 'episode
': episode_name,
133 vod_id = episode['vod_id
']
135 **self._extract_stream_info(vod_id),
141 # From https://www3.nhk.or.jp/nhkworld/common/player/radio/inline/rod.html
142 audio_path = remove_end(episode['audio
']['audio
'], '.m4a
')
143 info['formats
'] = self._extract_m3u8_formats(
144 f'{urljoin("https://vod-stream.nhk.jp", audio_path)}
/index
.m3u8
',
145 episode_id, 'm4a
', entry_protocol='m3u8_native
',
146 m3u8_id='hls
', fatal=False)
147 for f in info['formats
']:
151 '_type
': 'url_transparent
',
152 'ie_key
': NhkVodIE.ie_key(),
158 class NhkVodIE(NhkBaseIE):
160 rf'{NhkBaseIE._BASE_URL_REGEX}shows
/(?
:(?P
<type>video
)/)?
(?P
<id>\d{{4}
}[\da
-z
]\d
+)/?
(?
:$|
[?
#])',
161 rf
'{NhkBaseIE._BASE_URL_REGEX}(?:ondemand|shows)/(?P<type>audio)/(?P<id>[^/?#]+?-\d{{8}}-[\da-z]+)',
162 rf
'{NhkBaseIE._BASE_URL_REGEX}ondemand/(?P<type>video)/(?P<id>\d{{4}}[\da-z]\d+)', # deprecated
164 # Content available only for a limited period of time. Visit
165 # https://www3.nhk.or.jp/nhkworld/en/ondemand/ for working samples.
167 'url': 'https://www3.nhk.or.jp/nhkworld/en/ondemand/video/2049126/',
169 'id': 'nw_vod_v_en_2049_126_20230413233000_01_1681398302',
171 'title': 'Japan Railway Journal - The Tohoku Shinkansen: Full Speed Ahead',
172 'description': 'md5:49f7c5b206e03868a2fdf0d0814b92f6',
173 'thumbnail': r
're:https://.+/.+\.jpg',
174 'episode': 'The Tohoku Shinkansen: Full Speed Ahead',
175 'series': 'Japan Railway Journal',
176 'modified_timestamp': 1707217907,
177 'timestamp': 1681428600,
178 'release_timestamp': 1693883728,
180 'upload_date': '20230413',
181 'modified_date': '20240206',
182 'release_date': '20230905',
186 'url': 'https://www3.nhk.or.jp/nhkworld/en/ondemand/video/9999011/',
187 'md5': '153c3016dfd252ba09726588149cf0e7',
189 'id': 'lpZXIwaDE6_Z-976CPsFdxyICyWUzlT5',
191 'title': 'Dining with the Chef - Chef Saito\'s Family recipe: MENCHI-KATSU',
192 'description': 'md5:5aee4a9f9d81c26281862382103b0ea5',
193 'thumbnail': r
're:https://.+/.+\.jpg',
194 'series': 'Dining with the Chef',
195 'episode': 'Chef Saito\'s Family recipe: MENCHI-KATSU',
197 'upload_date': '20190816',
198 'release_date': '20230902',
199 'release_timestamp': 1693619292,
200 'modified_timestamp': 1707217907,
201 'modified_date': '20240206',
202 'timestamp': 1565997540,
206 'url': 'https://www3.nhk.or.jp/nhkworld/en/ondemand/audio/livinginjapan-20231001-1/',
208 'id': 'livinginjapan-20231001-1-en',
210 'title': 'Living in Japan - Tips for Travelers to Japan / Ramen Vending Machines',
211 'series': 'Living in Japan',
212 'description': 'md5:0a0e2077d8f07a03071e990a6f51bfab',
213 'thumbnail': r
're:https://.+/.+\.jpg',
214 'episode': 'Tips for Travelers to Japan / Ramen Vending Machines'
217 'url': 'https://www3.nhk.or.jp/nhkworld/en/ondemand/video/2015173/',
218 'only_matching': True,
220 'url': 'https://www3.nhk.or.jp/nhkworld/en/ondemand/audio/plugin-20190404-1/',
221 'only_matching': True,
223 'url': 'https://www3.nhk.or.jp/nhkworld/fr/ondemand/audio/plugin-20190404-1/',
224 'only_matching': True,
226 'url': 'https://www3.nhk.or.jp/nhkworld/en/ondemand/audio/j_art-20150903-1/',
227 'only_matching': True,
229 # video, alphabetic character in ID #29670
230 'url': 'https://www3.nhk.or.jp/nhkworld/en/ondemand/video/9999a34/',
234 'title': 'DESIGN TALKS plus - Fishermen’s Finery',
235 'description': 'md5:8a8f958aaafb0d7cb59d38de53f1e448',
236 'thumbnail': r
're:^https?:/(/[a-z0-9.-]+)+\.jpg\?w=1920&h=1080$',
237 'upload_date': '20210615',
238 'timestamp': 1623722008,
240 'skip': '404 Not Found',
242 # japanese-language, longer id than english
243 'url': 'https://www3.nhk.or.jp/nhkworld/ja/ondemand/video/0020271111/',
245 'id': 'nw_ja_v_jvod_ohayou_20231008',
247 'title': 'おはよう日本(7時台) - 10月8日放送',
248 'series': 'おはよう日本(7時台)',
249 'episode': '10月8日放送',
250 'thumbnail': r
're:https://.+/.+\.jpg',
251 'description': 'md5:9c1d6cbeadb827b955b20e99ab920ff0',
253 'skip': 'expires 2023-10-15',
255 # a one-off (single-episode series). title from the api is just '<p></p>'
256 'url': 'https://www3.nhk.or.jp/nhkworld/en/ondemand/video/3004952/',
258 'id': 'nw_vod_v_en_3004_952_20230723091000_01_1690074552',
260 'title': 'Barakan Discovers - AMAMI OSHIMA: Isson\'s Treasure Isla',
261 'description': 'md5:5db620c46a0698451cc59add8816b797',
262 'thumbnail': r
're:https://.+/.+\.jpg',
263 'release_date': '20230905',
264 'timestamp': 1690103400,
266 'release_timestamp': 1693898699,
267 'upload_date': '20230723',
268 'modified_timestamp': 1707217907,
269 'modified_date': '20240206',
270 'episode': 'AMAMI OSHIMA: Isson\'s Treasure Isla',
271 'series': 'Barakan Discovers',
274 # /ondemand/video/ url with alphabetical character in 5th position of id
275 'url': 'https://www3.nhk.or.jp/nhkworld/en/ondemand/video/9999a07/',
277 'id': 'nw_c_en_9999-a07',
279 'episode': 'Mini-Dramas on SDGs: Ep 1 Close the Gender Gap [Director\'s Cut]',
280 'series': 'Mini-Dramas on SDGs',
281 'modified_date': '20240206',
282 'title': 'Mini-Dramas on SDGs - Mini-Dramas on SDGs: Ep 1 Close the Gender Gap [Director\'s Cut]',
283 'description': 'md5:3f9dcb4db22fceb675d90448a040d3f6',
284 'timestamp': 1621962360,
286 'release_date': '20230903',
287 'modified_timestamp': 1707217907,
288 'upload_date': '20210525',
289 'thumbnail': r
're:https://.+/.+\.jpg',
290 'release_timestamp': 1693713487,
293 'url': 'https://www3.nhk.or.jp/nhkworld/en/ondemand/video/9999d17/',
295 'id': 'nw_c_en_9999-d17',
297 'title': 'Flowers of snow blossom - The 72 Pentads of Yamato',
298 'description': 'Today’s focus: Snow',
299 'release_timestamp': 1693792402,
300 'release_date': '20230904',
301 'upload_date': '20220128',
302 'timestamp': 1643370960,
303 'thumbnail': r
're:https://.+/.+\.jpg',
306 'modified_date': '20240206',
307 'modified_timestamp': 1707217907,
310 # new /shows/ url format
311 'url': 'https://www3.nhk.or.jp/nhkworld/en/shows/2032307/',
313 'id': 'nw_vod_v_en_2032_307_20240321113000_01_1710990282',
315 'title': 'Japanology Plus - 20th Anniversary Special Part 1',
316 'description': 'md5:817d41fc8e54339ad2a916161ea24faf',
317 'episode': '20th Anniversary Special Part 1',
318 'series': 'Japanology Plus',
319 'thumbnail': r
're:https://.+/.+\.jpg',
321 'timestamp': 1711020600,
322 'upload_date': '20240321',
323 'release_timestamp': 1711022683,
324 'release_date': '20240321',
325 'modified_timestamp': 1711031012,
326 'modified_date': '20240321',
329 'url': 'https://www3.nhk.or.jp/nhkworld/en/shows/3020025/',
331 'id': 'nw_vod_v_en_3020_025_20230325144000_01_1679723944',
333 'title': '100 Ideas to Save the World - Working Styles Evolve',
334 'description': 'md5:9e6c7778eaaf4f7b4af83569649f84d9',
335 'episode': 'Working Styles Evolve',
336 'series': '100 Ideas to Save the World',
337 'thumbnail': r
're:https://.+/.+\.jpg',
339 'upload_date': '20230325',
340 'timestamp': 1679755200,
341 'release_date': '20230905',
342 'release_timestamp': 1693880540,
343 'modified_date': '20240206',
344 'modified_timestamp': 1707217907,
347 # new /shows/audio/ url format
348 'url': 'https://www3.nhk.or.jp/nhkworld/en/shows/audio/livinginjapan-20231001-1/',
349 'only_matching': True,
351 # valid url even if can't be found in wild; support needed for clip entries extraction
352 'url': 'https://www3.nhk.or.jp/nhkworld/en/shows/9999o80/',
353 'only_matching': True,
356 def _real_extract(self
, url
):
357 return self
._extract
_episode
_info
(url
)
360 class NhkVodProgramIE(NhkBaseIE
):
361 _VALID_URL
= rf
'''(?x)
362 {NhkBaseIE._BASE_URL_REGEX}(?:shows|tv)/
363 (?:(?P<type>audio)/programs/)?(?P<id>\w+)/?
364 (?:\?(?:[^#]+&)?type=(?P<episode_type>clip|(?:radio|tv)Episode))?'''
366 # video program episodes
367 'url': 'https://www3.nhk.or.jp/nhkworld/en/shows/sumo/',
370 'title': 'GRAND SUMO Highlights',
371 'description': 'md5:fc20d02dc6ce85e4b72e0273aa52fdbf',
373 'playlist_mincount': 1,
375 'url': 'https://www3.nhk.or.jp/nhkworld/en/shows/japanrailway/',
377 'id': 'japanrailway',
378 'title': 'Japan Railway Journal',
379 'description': 'md5:ea39d93af7d05835baadf10d1aae0e3f',
381 'playlist_mincount': 12,
383 # video program clips
384 'url': 'https://www3.nhk.or.jp/nhkworld/en/shows/japanrailway/?type=clip',
386 'id': 'japanrailway',
387 'title': 'Japan Railway Journal',
388 'description': 'md5:ea39d93af7d05835baadf10d1aae0e3f',
390 'playlist_mincount': 12,
393 'url': 'https://www3.nhk.or.jp/nhkworld/en/shows/audio/programs/livinginjapan/',
395 'id': 'livinginjapan',
396 'title': 'Living in Japan',
397 'description': 'md5:665bb36ec2a12c5a7f598ee713fc2b54',
399 'playlist_mincount': 12,
402 'url': 'https://www3.nhk.or.jp/nhkworld/en/tv/designtalksplus/',
404 'id': 'designtalksplus',
405 'title': 'DESIGN TALKS plus',
406 'description': 'md5:47b3b3a9f10d4ac7b33b53b70a7d2837',
408 'playlist_mincount': 20,
410 'url': 'https://www3.nhk.or.jp/nhkworld/en/shows/10yearshayaomiyazaki/',
411 'only_matching': True,
415 def suitable(cls
, url
):
416 return False if NhkVodIE
.suitable(url
) else super().suitable(url
)
418 def _extract_meta_from_class_elements(self
, class_values
, html
):
419 for class_value
in class_values
:
420 if value
:= clean_html(get_element_by_class(class_value
, html
)):
423 def _real_extract(self
, url
):
424 lang
, m_type
, program_id
, episode_type
= self
._match
_valid
_url
(url
).group('lang', 'type', 'id', 'episode_type')
425 episodes
= self
._call
_api
(
426 program_id
, lang
, m_type
!= 'audio', False, episode_type
== 'clip')
429 for episode
in episodes
:
430 if episode_path
:= episode
.get('url'):
431 yield self
._extract
_episode
_info
(urljoin(url
, episode_path
), episode
)
433 html
= self
._download
_webpage
(url
, program_id
)
434 program_title
= self
._extract
_meta
_from
_class
_elements
([
435 'p-programDetail__title', # /ondemand/program/
436 'pProgramHero__logoText', # /shows/
437 'tAudioProgramMain__title', # /shows/audio/programs/
438 'p-program-name'], html
) # /tv/
439 program_description
= self
._extract
_meta
_from
_class
_elements
([
440 'p-programDetail__text', # /ondemand/program/
441 'pProgramHero__description', # /shows/
442 'tAudioProgramMain__info', # /shows/audio/programs/
443 'p-program-description'], html
) # /tv/
445 return self
.playlist_result(entries(), program_id
, program_title
, program_description
)
448 class NhkForSchoolBangumiIE(InfoExtractor
):
449 _VALID_URL
= r
'https?://www2\.nhk\.or\.jp/school/movie/(?P<type>bangumi|clip)\.cgi\?das_id=(?P<id>[a-zA-Z0-9_-]+)'
451 'url': 'https://www2.nhk.or.jp/school/movie/bangumi.cgi?das_id=D0005150191_00000',
453 'id': 'D0005150191_00003',
456 'timestamp': 1396414800,
458 'upload_date': '20140402',
461 'chapters': 'count:12'
465 'skip_download': True,
469 def _real_extract(self
, url
):
470 program_type
, video_id
= self
._match
_valid
_url
(url
).groups()
472 webpage
= self
._download
_webpage
(
473 f
'https://www2.nhk.or.jp/school/movie/{program_type}.cgi?das_id={video_id}', video_id
)
475 # searches all variables
476 base_values
= {g.group(1): g.group(2) for g in re.finditer(r'var\s+([a-zA-Z_]+)\s*=\s*"([^"]+?)";', webpage)}
477 # and programObj values too
478 program_values
= {g.group(1): g.group(3) for g in re.finditer(r'(?:program|clip)Obj\.([a-zA-Z_]+)\s*=\s*(["\'])([^"]+?)\2;', webpage)}
479 # extract all chapters
480 chapter_durations
= [parse_duration(g
.group(1)) for g
in re
.finditer(r
'chapterTime\.push\(\'([0-9:]+?
)\'\
);', webpage)]
481 chapter_titles = [' '.join([g.group(1) or '', unescapeHTML(g.group(2))]).strip() for g in re.finditer(r'<div
class="cpTitle"><span
>(scene\s
*\d
+)?
</span
>([^
<]+?
)</div
>', webpage)]
483 # this is how player_core.js is actually doing (!)
484 version = base_values.get('r_version
') or program_values.get('version
')
486 video_id = f'{video_id.split("_")[0]}_{version}
'
488 formats = self._extract_m3u8_formats(
489 f'https
://nhks
-vh
.akamaihd
.net
/i
/das
/{video_id[0:8]}
/{video_id}_V_000
.f4v
/master
.m3u8
',
490 video_id, ext='mp4
', m3u8_id='hls
')
492 duration = parse_duration(base_values.get('r_duration
'))
495 if chapter_durations and chapter_titles and len(chapter_durations) == len(chapter_titles):
496 start_time = chapter_durations
497 end_time = chapter_durations[1:] + [duration]
502 } for s, e, t in zip(start_time, end_time, chapter_titles)]
506 'title
': program_values.get('name
'),
507 'duration
': parse_duration(base_values.get('r_duration
')),
508 'timestamp
': unified_timestamp(base_values['r_upload
']),
510 'chapters
': chapters,
514 class NhkForSchoolSubjectIE(InfoExtractor):
515 IE_DESC = 'Portal page
for each school subjects
, like
Japanese (kokugo
, 国語
) or math (sansuu
/suugaku
or 算数・数学
)'
517 'rika
', 'syakai
', 'kokugo
',
518 'sansuu
', 'seikatsu
', 'doutoku
',
519 'ongaku
', 'taiiku
', 'zukou
',
520 'gijutsu
', 'katei
', 'sougou
',
524 _VALID_URL = r'https?
://www\
.nhk\
.or\
.jp
/school
/(?P
<id>%s)/?
(?
:[\?#].*)?$' % '|'.join(re.escape(s) for s in KNOWN_SUBJECTS)
527 'url': 'https://www.nhk.or.jp/school/sougou/',
530 'title': '総合的な学習の時間',
532 'playlist_mincount': 16,
534 'url': 'https://www.nhk.or.jp/school/rika/',
539 'playlist_mincount': 15,
542 def _real_extract(self
, url
):
543 subject_id
= self
._match
_id
(url
)
544 webpage
= self
._download
_webpage
(url
, subject_id
)
546 return self
.playlist_from_matches(
547 re
.finditer(rf
'href="((?:https?://www\.nhk\.or\.jp)?/school/{re.escape(subject_id)}/[^/]+/)"', webpage
),
549 self
._html
_search
_regex
(r
'(?s)<span\s+class="subjectName">\s*<img\s*[^<]+>\s*([^<]+?)</span>', webpage
, 'title', fatal
=False),
550 lambda g
: urljoin(url
, g
.group(1)))
553 class NhkForSchoolProgramListIE(InfoExtractor
):
554 _VALID_URL
= r
'https?://www\.nhk\.or\.jp/school/(?P<id>(?:%s)/[a-zA-Z0-9_-]+)' % (
555 '|'.join(re
.escape(s
) for s
in NhkForSchoolSubjectIE
.KNOWN_SUBJECTS
)
558 'url': 'https://www.nhk.or.jp/school/sougou/q/',
561 'title': 'Q~こどものための哲学',
563 'playlist_mincount': 20,
566 def _real_extract(self
, url
):
567 program_id
= self
._match
_id
(url
)
569 webpage
= self
._download
_webpage
(f
'https://www.nhk.or.jp/school/{program_id}/', program_id
)
571 title
= (self
._generic
_title
('', webpage
)
572 or self
._html
_search
_regex
(r
'<h3>([^<]+?)とは?\s*</h3>', webpage
, 'title', fatal
=False))
573 title
= re
.sub(r
'\s*\|\s*NHK\s+for\s+School\s*$', '', title
) if title
else None
574 description
= self
._html
_search
_regex
(
575 r
'(?s)<div\s+class="programDetail\s*">\s*<p>[^<]+</p>',
576 webpage
, 'description', fatal
=False, group
=0)
578 bangumi_list
= self
._download
_json
(
579 f
'https://www.nhk.or.jp/school/{program_id}/meta/program.json', program_id
)
580 # they're always bangumi
582 self
.url_result(f
'https://www2.nhk.or.jp/school/movie/bangumi.cgi?das_id={x}')
583 for x
in traverse_obj(bangumi_list
, ('part', ..., 'part-video-dasid')) or []]
585 return self
.playlist_result(bangumis
, program_id
, title
, description
)
588 class NhkRadiruIE(InfoExtractor
):
589 _GEO_COUNTRIES
= ['JP']
590 IE_DESC
= 'NHK らじる (Radiru/Rajiru)'
591 _VALID_URL
= r
'https?://www\.nhk\.or\.jp/radio/(?:player/ondemand|ondemand/detail)\.html\?p=(?P<site>[\da-zA-Z]+)_(?P<corner>[\da-zA-Z]+)(?:_(?P<headline>[\da-zA-Z]+))?'
593 'url': 'https://www.nhk.or.jp/radio/player/ondemand.html?p=0449_01_3926210',
594 'skip': 'Episode expired on 2024-02-24',
596 'title': 'ジャズ・トゥナイト シリーズJAZZジャイアンツ 56 ジョニー・ホッジス',
597 'id': '0449_01_3926210',
599 'series': 'ジャズ・トゥナイト',
600 'uploader': 'NHK-FM',
602 'thumbnail': 'https://www.nhk.or.jp/prog/img/449/g449.jpg',
603 'release_date': '20240217',
604 'description': 'md5:a456ee8e5e59e6dd2a7d32e62386e811',
605 'timestamp': 1708185600,
606 'release_timestamp': 1708178400,
607 'upload_date': '20240217',
610 # playlist, airs every weekday so it should _hopefully_ be okay forever
611 'url': 'https://www.nhk.or.jp/radio/ondemand/detail.html?p=0458_01',
614 'title': 'ベストオブクラシック',
615 'description': '世界中の上質な演奏会をじっくり堪能する本格派クラシック番組。',
617 'uploader': 'NHK-FM',
618 'thumbnail': 'https://www.nhk.or.jp/prog/img/458/g458.jpg',
620 'playlist_mincount': 3,
622 # one with letters in the id
623 'url': 'https://www.nhk.or.jp/radio/player/ondemand.html?p=F300_06_3738470',
624 'note': 'Expires on 2024-03-31',
626 'id': 'F300_06_3738470',
628 'title': '有島武郎「一房のぶどう」',
629 'description': '朗読:川野一宇(ラジオ深夜便アンカー)\r\n\r\n(2016年12月8日放送「ラジオ深夜便『アンカー朗読シリーズ』」より)',
630 'channel': 'NHKラジオ第1、NHK-FM',
631 'uploader': 'NHKラジオ第1、NHK-FM',
632 'timestamp': 1635757200,
633 'thumbnail': 'https://www.nhk.or.jp/radioondemand/json/F300/img/corner/box_109_thumbnail.jpg',
634 'release_date': '20161207',
635 'series': 'らじる文庫 by ラジオ深夜便 ',
636 'release_timestamp': 1481126700,
637 'upload_date': '20211101',
639 'expected_warnings': ['Unable to download JSON metadata', 'Failed to get extended description'],
642 'url': 'https://www.nhk.or.jp/radio/player/ondemand.html?p=F261_01_3855109',
643 'skip': 'Expires on 2023-04-17',
645 'id': 'F261_01_3855109',
647 'channel': 'NHKラジオ第1',
648 'uploader': 'NHKラジオ第1',
649 'timestamp': 1681635900,
650 'release_date': '20230416',
651 'series': 'NHKラジオニュース',
652 'title': '午後6時のNHKニュース',
653 'thumbnail': 'https://www.nhk.or.jp/radioondemand/json/F261/img/RADIONEWS_640.jpg',
654 'upload_date': '20230416',
655 'release_timestamp': 1681635600,
661 def _extract_extended_description(self
, episode_id
, episode
):
662 service
, _
, area
= traverse_obj(episode
, ('aa_vinfo2', {str}
, {lambda x: (x or '').partition(',')}
))
663 aa_vinfo3
= traverse_obj(episode
, ('aa_vinfo3', {str}
))
664 detail_url
= try_call(
665 lambda: self
._API
_URL
_TMPL
.format(service
=service
, area
=area
, dateid
=aa_vinfo3
))
669 full_meta
= traverse_obj(
670 self
._download
_json
(detail_url
, episode_id
, 'Downloading extended metadata', fatal
=False),
671 ('list', service
, 0, {dict}
)) or {}
672 return join_nonempty('subtitle', 'content', 'act', 'music', delim
='\n\n', from_dict
=full_meta
)
674 def _extract_episode_info(self
, headline
, programme_id
, series_meta
):
675 episode_id
= f
'{programme_id}_{headline["headline_id"]}'
676 episode
= traverse_obj(headline
, ('file_list', 0, {dict}
))
677 description
= self
._extract
_extended
_description
(episode_id
, episode
)
679 self
.report_warning('Failed to get extended description, falling back to summary')
680 description
= traverse_obj(episode
, ('file_title_sub', {str}
))
685 'formats': self
._extract
_m
3u8_formats
(episode
.get('file_name'), episode_id
, fatal
=False),
686 'container': 'm4a_dash', # force fixup, AAC-only HLS
688 'series': series_meta
.get('title'),
689 'thumbnail': url_or_none(headline
.get('headline_image')) or series_meta
.get('thumbnail'),
690 'description': description
,
691 **traverse_obj(episode
, {
692 'title': 'file_title',
693 'timestamp': ('open_time', {unified_timestamp}
),
694 'release_timestamp': ('aa_vinfo4', {lambda x: x.split('_')[0]}
, {unified_timestamp}
),
698 def _real_initialize(self
):
699 if self
._API
_URL
_TMPL
:
701 api_config
= self
._download
_xml
(
702 'https://www.nhk.or.jp/radio/config/config_web.xml', None, 'Downloading API config', fatal
=False)
703 NhkRadiruIE
._API
_URL
_TMPL
= try_call(lambda: f
'https:{api_config.find(".//url_program_detail").text}')
705 def _real_extract(self
, url
):
706 site_id
, corner_id
, headline_id
= self
._match
_valid
_url
(url
).group('site', 'corner', 'headline')
707 programme_id
= f
'{site_id}_{corner_id}'
709 if site_id
== 'F261':
710 json_url
= 'https://www.nhk.or.jp/s-media/news/news-site/list/v1/all.json'
712 json_url
= f
'https://www.nhk.or.jp/radioondemand/json/{site_id}/bangumi_{programme_id}.json'
714 meta
= self
._download
_json
(json_url
, programme_id
)['main']
716 series_meta
= traverse_obj(meta
, {
717 'title': 'program_name',
718 'channel': 'media_name',
719 'uploader': 'media_name',
720 'thumbnail': (('thumbnail_c', 'thumbnail_p'), {url_or_none}
),
724 return self
._extract
_episode
_info
(
726 'detail_list', lambda _
, v
: v
['headline_id'] == headline_id
), get_all
=False),
727 programme_id
, series_meta
)
730 for headline
in traverse_obj(meta
, ('detail_list', ..., {dict}
)):
731 yield self
._extract
_episode
_info
(headline
, programme_id
, series_meta
)
733 return self
.playlist_result(
734 entries(), programme_id
, playlist_description
=meta
.get('site_detail'), **series_meta
)
737 class NhkRadioNewsPageIE(InfoExtractor
):
738 _VALID_URL
= r
'https?://www\.nhk\.or\.jp/radionews/?(?:$|[?#])'
740 # airs daily, on-the-hour most hours
741 'url': 'https://www.nhk.or.jp/radionews/',
742 'playlist_mincount': 5,
745 'thumbnail': 'https://www.nhk.or.jp/radioondemand/json/F261/img/RADIONEWS_640.jpg',
746 'description': 'md5:bf2c5b397e44bc7eb26de98d8f15d79d',
747 'channel': 'NHKラジオ第1',
748 'uploader': 'NHKラジオ第1',
749 'title': 'NHKラジオニュース',
753 def _real_extract(self
, url
):
754 return self
.url_result('https://www.nhk.or.jp/radio/ondemand/detail.html?p=F261_01', NhkRadiruIE
)
757 class NhkRadiruLiveIE(InfoExtractor
):
758 _GEO_COUNTRIES
= ['JP']
759 _VALID_URL
= r
'https?://www\.nhk\.or\.jp/radio/player/\?ch=(?P<id>r[12]|fm)'
761 # radio 1, no area specified
762 'url': 'https://www.nhk.or.jp/radio/player/?ch=r1',
765 'title': 're:^NHKネットラジオ第1 東京.+$',
767 'thumbnail': 'https://www.nhk.or.jp/common/img/media/r1-200x200.png',
768 'live_status': 'is_live',
771 # radio 2, area specified
772 # (the area doesnt actually matter, r2 is national)
773 'url': 'https://www.nhk.or.jp/radio/player/?ch=r2',
774 'params': {'extractor_args': {'nhkradirulive': {'area': ['fukuoka']}
}},
777 'title': 're:^NHKネットラジオ第2 福岡.+$',
779 'thumbnail': 'https://www.nhk.or.jp/common/img/media/r2-200x200.png',
780 'live_status': 'is_live',
784 'url': 'https://www.nhk.or.jp/radio/player/?ch=fm',
785 'params': {'extractor_args': {'nhkradirulive': {'area': ['sapporo']}
}},
788 'title': 're:^NHKネットラジオFM 札幌.+$',
790 'thumbnail': 'https://www.nhk.or.jp/common/img/media/fm-200x200.png',
791 'live_status': 'is_live',
795 _NOA_STATION_IDS
= {'r1': 'n1', 'r2': 'n2', 'fm': 'n3'}
797 def _real_extract(self
, url
):
798 station
= self
._match
_id
(url
)
799 area
= self
._configuration
_arg
('area', ['tokyo'])[0]
801 config
= self
._download
_xml
(
802 'https://www.nhk.or.jp/radio/config/config_web.xml', station
, 'Downloading area information')
803 data
= config
.find(f
'.//data//area[.="{area}"]/..')
806 raise ExtractorError('Invalid area. Valid areas are: %s' % ', '.join(
807 [i
.text
for i
in config
.findall('.//data//area')]), expected
=True)
809 noa_info
= self
._download
_json
(
810 f
'https:{config.find(".//url_program_noa").text}'.format(area
=data
.find('areakey').text
),
811 station
, note
=f
'Downloading {area} station metadata', fatal
=False)
812 present_info
= traverse_obj(noa_info
, ('nowonair_list', self
._NOA
_STATION
_IDS
.get(station
), 'present'))
815 'title': ' '.join(traverse_obj(present_info
, (('service', 'area',), 'name', {str}
))),
816 'id': join_nonempty(station
, area
),
817 'thumbnails': traverse_obj(present_info
, ('service', 'images', ..., {
819 'width': ('width', {int_or_none}
),
820 'height': ('height', {int_or_none}
),
822 'formats': self
._extract
_m
3u8_formats
(data
.find(f
'{station}hls').text
, station
),