3 from .common
import InfoExtractor
19 class NhkBaseIE(InfoExtractor
):
20 _API_URL_TEMPLATE
= 'https://nwapi.nhk.jp/nhkworld/%sod%slist/v7b/%s/%s/%s/all%s.json'
21 _BASE_URL_REGEX
= r
'https?://www3\.nhk\.or\.jp/nhkworld/(?P<lang>[a-z]{2})/ondemand'
22 _TYPE_REGEX
= r
'/(?P<type>video|audio)/'
24 def _call_api(self
, m_id
, lang
, is_video
, is_episode
, is_clip
):
25 return self
._download
_json
(
26 self
._API
_URL
_TEMPLATE
% (
27 'v' if is_video
else 'r',
28 'clip' if is_clip
else 'esd',
29 'episode' if is_episode
else 'program',
30 m_id
, lang
, '/all' if is_video
else ''),
31 m_id
, query
={'apikey': 'EJfK8jdS57GqlupFgAfAAwr573q01y6k'}
)['data']['episodes'] or []
33 def _get_api_info(self
, refresh
=True):
35 return self
.cache
.load('nhk', 'api_info')
37 self
.cache
.store('nhk', 'api_info', {})
38 movie_player_js
= self
._download
_webpage
(
39 'https://movie-a.nhk.or.jp/world/player/js/movie-player.js', None,
40 note
='Downloading stream API information')
42 'url': self
._search
_regex
(
43 r
'prod:[^;]+\bapiUrl:\s*[\'"]([^\'"]+)[\'"]', movie_player_js, None, 'stream API url'),
44 'token': self._search_regex(
45 r'prod:[^;]+\btoken:\s*[\'"]([^
\'"]+)[\'"]', movie_player_js, None, 'stream API token
'),
47 self.cache.store('nhk
', 'api_info
', api_info)
50 def _extract_stream_info(self, vod_id):
51 for refresh in (False, True):
52 api_info = self._get_api_info(refresh)
56 api_url = api_info.pop('url
')
59 api_url, vod_id, 'Downloading stream url info
', fatal=False, query={
62 'optional_id
': vod_id,
65 stream_url = traverse_obj(
66 meta, ('movie_url
', ('mb_auto
', 'auto_sp
', 'auto_pc
'), {url_or_none}), get_all=False)
69 formats, subtitles = self._extract_m3u8_formats_and_subtitles(stream_url, vod_id)
71 **traverse_obj(meta, {
72 'duration
': ('duration
', {int_or_none}),
73 'timestamp
': ('publication_date
', {unified_timestamp}),
74 'release_timestamp
': ('insert_date
', {unified_timestamp}),
75 'modified_timestamp
': ('update_date
', {unified_timestamp}),
78 'subtitles
': subtitles,
80 raise ExtractorError('Unable to extract stream url
')
82 def _extract_episode_info(self, url, episode=None):
83 fetch_episode = episode is None
84 lang, m_type, episode_id = NhkVodIE._match_valid_url(url).group('lang
', 'type', 'id')
85 is_video = m_type == 'video
'
88 episode_id = episode_id[:4] + '-' + episode_id[4:]
91 episode = self._call_api(
92 episode_id, lang, is_video, True, episode_id[:4] == '9999')[0]
94 def get_clean_field(key):
95 return clean_html(episode.get(key + '_clean
') or episode.get(key))
97 title = get_clean_field('sub_title
')
98 series = get_clean_field('title
')
101 for s, w, h in [('', 640, 360), ('_l
', 1280, 720)]:
102 img_path = episode.get('image
' + s)
109 'url
': 'https
://www3
.nhk
.or.jp
' + img_path,
114 title = f'{series}
- {title}
'
115 elif series and not title:
119 else: # title, no series
123 'id': episode_id + '-' + lang,
125 'description
': get_clean_field('description
'),
126 'thumbnails
': thumbnails,
128 'episode
': episode_name,
132 vod_id = episode['vod_id
']
134 **self._extract_stream_info(vod_id),
140 audio_path = episode['audio
']['audio
']
141 info['formats
'] = self._extract_m3u8_formats(
142 'https
://nhkworld
-vh
.akamaihd
.net
/i
%s/master
.m3u8
' % audio_path,
143 episode_id, 'm4a
', entry_protocol='m3u8_native
',
144 m3u8_id='hls
', fatal=False)
145 for f in info['formats
']:
149 '_type
': 'url_transparent
',
150 'ie_key
': NhkVodIE.ie_key(),
156 class NhkVodIE(NhkBaseIE):
157 # the 7-character IDs can have alphabetic chars too: assume [a-z] rather than just [a-f], eg
158 _VALID_URL = [rf'{NhkBaseIE._BASE_URL_REGEX}
/(?P
<type>video
)/(?P
<id>[0-9a
-z
]+)',
159 rf'{NhkBaseIE._BASE_URL_REGEX}
/(?P
<type>audio
)/(?P
<id>[^
/?
#]+?-\d{{8}}-[0-9a-z]+)']
160 # Content available only for a limited period of time. Visit
161 # https://www3.nhk.or.jp/nhkworld/en/ondemand/ for working samples.
163 'url': 'https://www3.nhk.or.jp/nhkworld/en/ondemand/video/2049126/',
165 'id': 'nw_vod_v_en_2049_126_20230413233000_01_1681398302',
167 'title': 'Japan Railway Journal - The Tohoku Shinkansen: Full Speed Ahead',
168 'description': 'md5:49f7c5b206e03868a2fdf0d0814b92f6',
169 'thumbnail': 'md5:51bcef4a21936e7fea1ff4e06353f463',
170 'episode': 'The Tohoku Shinkansen: Full Speed Ahead',
171 'series': 'Japan Railway Journal',
172 'modified_timestamp': 1694243656,
173 'timestamp': 1681428600,
174 'release_timestamp': 1693883728,
176 'upload_date': '20230413',
177 'modified_date': '20230909',
178 'release_date': '20230905',
183 'url': 'https://www3.nhk.or.jp/nhkworld/en/ondemand/video/9999011/',
184 'md5': '153c3016dfd252ba09726588149cf0e7',
186 'id': 'lpZXIwaDE6_Z-976CPsFdxyICyWUzlT5',
188 'title': 'Dining with the Chef - Chef Saito\'s Family recipe: MENCHI-KATSU',
189 'description': 'md5:5aee4a9f9d81c26281862382103b0ea5',
190 'thumbnail': 'md5:d6a4d9b6e9be90aaadda0bcce89631ed',
191 'series': 'Dining with the Chef',
192 'episode': 'Chef Saito\'s Family recipe: MENCHI-KATSU',
194 'upload_date': '20190816',
195 'release_date': '20230902',
196 'release_timestamp': 1693619292,
197 'modified_timestamp': 1694168033,
198 'modified_date': '20230908',
199 'timestamp': 1565997540,
203 'url': 'https://www3.nhk.or.jp/nhkworld/en/ondemand/audio/livinginjapan-20231001-1/',
205 'id': 'livinginjapan-20231001-1-en',
207 'title': 'Living in Japan - Tips for Travelers to Japan / Ramen Vending Machines',
208 'series': 'Living in Japan',
209 'description': 'md5:0a0e2077d8f07a03071e990a6f51bfab',
210 'thumbnail': 'md5:960622fb6e06054a4a1a0c97ea752545',
211 'episode': 'Tips for Travelers to Japan / Ramen Vending Machines'
214 'url': 'https://www3.nhk.or.jp/nhkworld/en/ondemand/video/2015173/',
215 'only_matching': True,
217 'url': 'https://www3.nhk.or.jp/nhkworld/en/ondemand/audio/plugin-20190404-1/',
218 'only_matching': True,
220 'url': 'https://www3.nhk.or.jp/nhkworld/fr/ondemand/audio/plugin-20190404-1/',
221 'only_matching': True,
223 'url': 'https://www3.nhk.or.jp/nhkworld/en/ondemand/audio/j_art-20150903-1/',
224 'only_matching': True,
226 # video, alphabetic character in ID #29670
227 'url': 'https://www3.nhk.or.jp/nhkworld/en/ondemand/video/9999a34/',
231 'title': 'DESIGN TALKS plus - Fishermen’s Finery',
232 'description': 'md5:8a8f958aaafb0d7cb59d38de53f1e448',
233 'thumbnail': r
're:^https?:/(/[a-z0-9.-]+)+\.jpg\?w=1920&h=1080$',
234 'upload_date': '20210615',
235 'timestamp': 1623722008,
237 'skip': '404 Not Found',
239 # japanese-language, longer id than english
240 'url': 'https://www3.nhk.or.jp/nhkworld/ja/ondemand/video/0020271111/',
242 'id': 'nw_ja_v_jvod_ohayou_20231008',
244 'title': 'おはよう日本(7時台) - 10月8日放送',
245 'series': 'おはよう日本(7時台)',
246 'episode': '10月8日放送',
247 'thumbnail': 'md5:d733b1c8e965ab68fb02b2d347d0e9b4',
248 'description': 'md5:9c1d6cbeadb827b955b20e99ab920ff0',
250 'skip': 'expires 2023-10-15',
252 # a one-off (single-episode series). title from the api is just '<p></p>'
253 'url': 'https://www3.nhk.or.jp/nhkworld/en/ondemand/video/3004952/',
255 'id': 'nw_vod_v_en_3004_952_20230723091000_01_1690074552',
257 'title': 'Barakan Discovers AMAMI OSHIMA: Isson\'s Treasure Island',
258 'description': 'md5:5db620c46a0698451cc59add8816b797',
259 'thumbnail': 'md5:67d9ff28009ba379bfa85ad1aaa0e2bd',
260 'release_date': '20230905',
261 'timestamp': 1690103400,
263 'release_timestamp': 1693898699,
264 'modified_timestamp': 1698057495,
265 'modified_date': '20231023',
266 'upload_date': '20230723',
270 def _real_extract(self
, url
):
271 return self
._extract
_episode
_info
(url
)
274 class NhkVodProgramIE(NhkBaseIE
):
275 _VALID_URL
= rf
'{NhkBaseIE._BASE_URL_REGEX}/program{NhkBaseIE._TYPE_REGEX}(?P<id>\w+)(?:.+?\btype=(?P<episode_type>clip|(?:radio|tv)Episode))?'
277 # video program episodes
278 'url': 'https://www3.nhk.or.jp/nhkworld/en/ondemand/program/video/sumo',
281 'title': 'GRAND SUMO Highlights',
282 'description': 'md5:fc20d02dc6ce85e4b72e0273aa52fdbf',
284 'playlist_mincount': 0,
286 'url': 'https://www3.nhk.or.jp/nhkworld/en/ondemand/program/video/japanrailway',
288 'id': 'japanrailway',
289 'title': 'Japan Railway Journal',
290 'description': 'md5:ea39d93af7d05835baadf10d1aae0e3f',
292 'playlist_mincount': 12,
294 # video program clips
295 'url': 'https://www3.nhk.or.jp/nhkworld/en/ondemand/program/video/japanrailway/?type=clip',
297 'id': 'japanrailway',
298 'title': 'Japan Railway Journal',
299 'description': 'md5:ea39d93af7d05835baadf10d1aae0e3f',
301 'playlist_mincount': 5,
303 'url': 'https://www3.nhk.or.jp/nhkworld/en/ondemand/program/video/10yearshayaomiyazaki/',
304 'only_matching': True,
307 'url': 'https://www3.nhk.or.jp/nhkworld/en/ondemand/program/audio/listener/',
308 'only_matching': True,
311 def _real_extract(self
, url
):
312 lang
, m_type
, program_id
, episode_type
= self
._match
_valid
_url
(url
).group('lang', 'type', 'id', 'episode_type')
313 episodes
= self
._call
_api
(
314 program_id
, lang
, m_type
== 'video', False, episode_type
== 'clip')
317 for episode
in episodes
:
318 episode_path
= episode
.get('url')
321 entries
.append(self
._extract
_episode
_info
(
322 urljoin(url
, episode_path
), episode
))
324 html
= self
._download
_webpage
(url
, program_id
)
325 program_title
= clean_html(get_element_by_class('p-programDetail__title', html
))
326 program_description
= clean_html(get_element_by_class('p-programDetail__text', html
))
328 return self
.playlist_result(entries
, program_id
, program_title
, program_description
)
331 class NhkForSchoolBangumiIE(InfoExtractor
):
332 _VALID_URL
= r
'https?://www2\.nhk\.or\.jp/school/movie/(?P<type>bangumi|clip)\.cgi\?das_id=(?P<id>[a-zA-Z0-9_-]+)'
334 'url': 'https://www2.nhk.or.jp/school/movie/bangumi.cgi?das_id=D0005150191_00000',
336 'id': 'D0005150191_00003',
339 'timestamp': 1396414800,
341 'upload_date': '20140402',
344 'chapters': 'count:12'
348 'skip_download': True,
352 def _real_extract(self
, url
):
353 program_type
, video_id
= self
._match
_valid
_url
(url
).groups()
355 webpage
= self
._download
_webpage
(
356 f
'https://www2.nhk.or.jp/school/movie/{program_type}.cgi?das_id={video_id}', video_id
)
358 # searches all variables
359 base_values
= {g.group(1): g.group(2) for g in re.finditer(r'var\s+([a-zA-Z_]+)\s*=\s*"([^"]+?)";', webpage)}
360 # and programObj values too
361 program_values
= {g.group(1): g.group(3) for g in re.finditer(r'(?:program|clip)Obj\.([a-zA-Z_]+)\s*=\s*(["\'])([^"]+?)\2;', webpage)}
362 # extract all chapters
363 chapter_durations
= [parse_duration(g
.group(1)) for g
in re
.finditer(r
'chapterTime\.push\(\'([0-9:]+?
)\'\
);', webpage)]
364 chapter_titles = [' '.join([g.group(1) or '', unescapeHTML(g.group(2))]).strip() for g in re.finditer(r'<div
class="cpTitle"><span
>(scene\s
*\d
+)?
</span
>([^
<]+?
)</div
>', webpage)]
366 # this is how player_core.js is actually doing (!)
367 version = base_values.get('r_version
') or program_values.get('version
')
369 video_id = f'{video_id.split("_")[0]}_{version}
'
371 formats = self._extract_m3u8_formats(
372 f'https
://nhks
-vh
.akamaihd
.net
/i
/das
/{video_id[0:8]}
/{video_id}_V_000
.f4v
/master
.m3u8
',
373 video_id, ext='mp4
', m3u8_id='hls
')
375 duration = parse_duration(base_values.get('r_duration
'))
378 if chapter_durations and chapter_titles and len(chapter_durations) == len(chapter_titles):
379 start_time = chapter_durations
380 end_time = chapter_durations[1:] + [duration]
385 } for s, e, t in zip(start_time, end_time, chapter_titles)]
389 'title
': program_values.get('name
'),
390 'duration
': parse_duration(base_values.get('r_duration
')),
391 'timestamp
': unified_timestamp(base_values['r_upload
']),
393 'chapters
': chapters,
397 class NhkForSchoolSubjectIE(InfoExtractor):
398 IE_DESC = 'Portal page
for each school subjects
, like
Japanese (kokugo
, 国語
) or math (sansuu
/suugaku
or 算数・数学
)'
400 'rika
', 'syakai
', 'kokugo
',
401 'sansuu
', 'seikatsu
', 'doutoku
',
402 'ongaku
', 'taiiku
', 'zukou
',
403 'gijutsu
', 'katei
', 'sougou
',
407 _VALID_URL = r'https?
://www\
.nhk\
.or\
.jp
/school
/(?P
<id>%s)/?
(?
:[\?#].*)?$' % '|'.join(re.escape(s) for s in KNOWN_SUBJECTS)
410 'url': 'https://www.nhk.or.jp/school/sougou/',
413 'title': '総合的な学習の時間',
415 'playlist_mincount': 16,
417 'url': 'https://www.nhk.or.jp/school/rika/',
422 'playlist_mincount': 15,
425 def _real_extract(self
, url
):
426 subject_id
= self
._match
_id
(url
)
427 webpage
= self
._download
_webpage
(url
, subject_id
)
429 return self
.playlist_from_matches(
430 re
.finditer(rf
'href="((?:https?://www\.nhk\.or\.jp)?/school/{re.escape(subject_id)}/[^/]+/)"', webpage
),
432 self
._html
_search
_regex
(r
'(?s)<span\s+class="subjectName">\s*<img\s*[^<]+>\s*([^<]+?)</span>', webpage
, 'title', fatal
=False),
433 lambda g
: urljoin(url
, g
.group(1)))
436 class NhkForSchoolProgramListIE(InfoExtractor
):
437 _VALID_URL
= r
'https?://www\.nhk\.or\.jp/school/(?P<id>(?:%s)/[a-zA-Z0-9_-]+)' % (
438 '|'.join(re
.escape(s
) for s
in NhkForSchoolSubjectIE
.KNOWN_SUBJECTS
)
441 'url': 'https://www.nhk.or.jp/school/sougou/q/',
444 'title': 'Q~こどものための哲学',
446 'playlist_mincount': 20,
449 def _real_extract(self
, url
):
450 program_id
= self
._match
_id
(url
)
452 webpage
= self
._download
_webpage
(f
'https://www.nhk.or.jp/school/{program_id}/', program_id
)
454 title
= (self
._generic
_title
('', webpage
)
455 or self
._html
_search
_regex
(r
'<h3>([^<]+?)とは?\s*</h3>', webpage
, 'title', fatal
=False))
456 title
= re
.sub(r
'\s*\|\s*NHK\s+for\s+School\s*$', '', title
) if title
else None
457 description
= self
._html
_search
_regex
(
458 r
'(?s)<div\s+class="programDetail\s*">\s*<p>[^<]+</p>',
459 webpage
, 'description', fatal
=False, group
=0)
461 bangumi_list
= self
._download
_json
(
462 f
'https://www.nhk.or.jp/school/{program_id}/meta/program.json', program_id
)
463 # they're always bangumi
465 self
.url_result(f
'https://www2.nhk.or.jp/school/movie/bangumi.cgi?das_id={x}')
466 for x
in traverse_obj(bangumi_list
, ('part', ..., 'part-video-dasid')) or []]
468 return self
.playlist_result(bangumis
, program_id
, title
, description
)
471 class NhkRadiruIE(InfoExtractor
):
472 _GEO_COUNTRIES
= ['JP']
473 IE_DESC
= 'NHK らじる (Radiru/Rajiru)'
474 _VALID_URL
= r
'https?://www\.nhk\.or\.jp/radio/(?:player/ondemand|ondemand/detail)\.html\?p=(?P<site>[\da-zA-Z]+)_(?P<corner>[\da-zA-Z]+)(?:_(?P<headline>[\da-zA-Z]+))?'
476 'url': 'https://www.nhk.or.jp/radio/player/ondemand.html?p=0449_01_3853544',
477 'skip': 'Episode expired on 2023-04-16',
480 'uploader': 'NHK-FM',
481 'description': 'md5:94b08bdeadde81a97df4ec882acce3e9',
483 'id': '0449_01_3853544',
484 'series': 'ジャズ・トゥナイト',
485 'thumbnail': 'https://www.nhk.or.jp/prog/img/449/g449.jpg',
486 'timestamp': 1680969600,
487 'title': 'ジャズ・トゥナイト NEWジャズ特集',
488 'upload_date': '20230408',
489 'release_timestamp': 1680962400,
490 'release_date': '20230408',
494 # playlist, airs every weekday so it should _hopefully_ be okay forever
495 'url': 'https://www.nhk.or.jp/radio/ondemand/detail.html?p=0458_01',
498 'title': 'ベストオブクラシック',
499 'description': '世界中の上質な演奏会をじっくり堪能する本格派クラシック番組。',
501 'uploader': 'NHK-FM',
502 'thumbnail': 'https://www.nhk.or.jp/prog/img/458/g458.jpg',
504 'playlist_mincount': 3,
506 # one with letters in the id
507 'url': 'https://www.nhk.or.jp/radio/player/ondemand.html?p=F300_06_3738470',
508 'note': 'Expires on 2024-03-31',
510 'id': 'F300_06_3738470',
512 'title': '有島武郎「一房のぶどう」',
513 'description': '朗読:川野一宇(ラジオ深夜便アンカー)\r\n\r\n(2016年12月8日放送「ラジオ深夜便『アンカー朗読シリーズ』」より)',
514 'channel': 'NHKラジオ第1、NHK-FM',
515 'uploader': 'NHKラジオ第1、NHK-FM',
516 'timestamp': 1635757200,
517 'thumbnail': 'https://www.nhk.or.jp/radioondemand/json/F300/img/corner/box_109_thumbnail.jpg',
518 'release_date': '20161207',
519 'series': 'らじる文庫 by ラジオ深夜便 ',
520 'release_timestamp': 1481126700,
521 'upload_date': '20211101',
525 'url': 'https://www.nhk.or.jp/radio/player/ondemand.html?p=F261_01_3855109',
526 'skip': 'Expires on 2023-04-17',
528 'id': 'F261_01_3855109',
530 'channel': 'NHKラジオ第1',
531 'uploader': 'NHKラジオ第1',
532 'timestamp': 1681635900,
533 'release_date': '20230416',
534 'series': 'NHKラジオニュース',
535 'title': '午後6時のNHKニュース',
536 'thumbnail': 'https://www.nhk.or.jp/radioondemand/json/F261/img/RADIONEWS_640.jpg',
537 'upload_date': '20230416',
538 'release_timestamp': 1681635600,
542 def _extract_episode_info(self
, headline
, programme_id
, series_meta
):
543 episode_id
= f
'{programme_id}_{headline["headline_id"]}'
544 episode
= traverse_obj(headline
, ('file_list', 0, {dict}
))
549 'formats': self
._extract
_m
3u8_formats
(episode
.get('file_name'), episode_id
, fatal
=False),
550 'container': 'm4a_dash', # force fixup, AAC-only HLS
552 'series': series_meta
.get('title'),
553 'thumbnail': url_or_none(headline
.get('headline_image')) or series_meta
.get('thumbnail'),
554 **traverse_obj(episode
, {
555 'title': 'file_title',
556 'description': 'file_title_sub',
557 'timestamp': ('open_time', {unified_timestamp}
),
558 'release_timestamp': ('aa_vinfo4', {lambda x: x.split('_')[0]}
, {unified_timestamp}
),
562 def _real_extract(self
, url
):
563 site_id
, corner_id
, headline_id
= self
._match
_valid
_url
(url
).group('site', 'corner', 'headline')
564 programme_id
= f
'{site_id}_{corner_id}'
566 if site_id
== 'F261':
567 json_url
= 'https://www.nhk.or.jp/s-media/news/news-site/list/v1/all.json'
569 json_url
= f
'https://www.nhk.or.jp/radioondemand/json/{site_id}/bangumi_{programme_id}.json'
571 meta
= self
._download
_json
(json_url
, programme_id
)['main']
573 series_meta
= traverse_obj(meta
, {
574 'title': 'program_name',
575 'channel': 'media_name',
576 'uploader': 'media_name',
577 'thumbnail': (('thumbnail_c', 'thumbnail_p'), {url_or_none}
),
581 return self
._extract
_episode
_info
(
583 'detail_list', lambda _
, v
: v
['headline_id'] == headline_id
), get_all
=False),
584 programme_id
, series_meta
)
587 for headline
in traverse_obj(meta
, ('detail_list', ..., {dict}
)):
588 yield self
._extract
_episode
_info
(headline
, programme_id
, series_meta
)
590 return self
.playlist_result(
591 entries(), programme_id
, playlist_description
=meta
.get('site_detail'), **series_meta
)
594 class NhkRadioNewsPageIE(InfoExtractor
):
595 _VALID_URL
= r
'https?://www\.nhk\.or\.jp/radionews/?(?:$|[?#])'
597 # airs daily, on-the-hour most hours
598 'url': 'https://www.nhk.or.jp/radionews/',
599 'playlist_mincount': 5,
602 'thumbnail': 'https://www.nhk.or.jp/radioondemand/json/F261/img/RADIONEWS_640.jpg',
603 'description': 'md5:bf2c5b397e44bc7eb26de98d8f15d79d',
604 'channel': 'NHKラジオ第1',
605 'uploader': 'NHKラジオ第1',
606 'title': 'NHKラジオニュース',
610 def _real_extract(self
, url
):
611 return self
.url_result('https://www.nhk.or.jp/radio/ondemand/detail.html?p=F261_01', NhkRadiruIE
)
614 class NhkRadiruLiveIE(InfoExtractor
):
615 _GEO_COUNTRIES
= ['JP']
616 _VALID_URL
= r
'https?://www\.nhk\.or\.jp/radio/player/\?ch=(?P<id>r[12]|fm)'
618 # radio 1, no area specified
619 'url': 'https://www.nhk.or.jp/radio/player/?ch=r1',
622 'title': 're:^NHKネットラジオ第1 東京.+$',
624 'thumbnail': 'https://www.nhk.or.jp/common/img/media/r1-200x200.png',
625 'live_status': 'is_live',
628 # radio 2, area specified
629 # (the area doesnt actually matter, r2 is national)
630 'url': 'https://www.nhk.or.jp/radio/player/?ch=r2',
631 'params': {'extractor_args': {'nhkradirulive': {'area': ['fukuoka']}
}},
634 'title': 're:^NHKネットラジオ第2 福岡.+$',
636 'thumbnail': 'https://www.nhk.or.jp/common/img/media/r2-200x200.png',
637 'live_status': 'is_live',
641 'url': 'https://www.nhk.or.jp/radio/player/?ch=fm',
642 'params': {'extractor_args': {'nhkradirulive': {'area': ['sapporo']}
}},
645 'title': 're:^NHKネットラジオFM 札幌.+$',
647 'thumbnail': 'https://www.nhk.or.jp/common/img/media/fm-200x200.png',
648 'live_status': 'is_live',
652 _NOA_STATION_IDS
= {'r1': 'n1', 'r2': 'n2', 'fm': 'n3'}
654 def _real_extract(self
, url
):
655 station
= self
._match
_id
(url
)
656 area
= self
._configuration
_arg
('area', ['tokyo'])[0]
658 config
= self
._download
_xml
(
659 'https://www.nhk.or.jp/radio/config/config_web.xml', station
, 'Downloading area information')
660 data
= config
.find(f
'.//data//area[.="{area}"]/..')
663 raise ExtractorError('Invalid area. Valid areas are: %s' % ', '.join(
664 [i
.text
for i
in config
.findall('.//data//area')]), expected
=True)
666 noa_info
= self
._download
_json
(
667 f
'https:{config.find(".//url_program_noa").text}'.format(area
=data
.find('areakey').text
),
668 station
, note
=f
'Downloading {area} station metadata', fatal
=False)
669 present_info
= traverse_obj(noa_info
, ('nowonair_list', self
._NOA
_STATION
_IDS
.get(station
), 'present'))
672 'title': ' '.join(traverse_obj(present_info
, (('service', 'area',), 'name', {str}
))),
673 'id': join_nonempty(station
, area
),
674 'thumbnails': traverse_obj(present_info
, ('service', 'images', ..., {
676 'width': ('width', {int_or_none}
),
677 'height': ('height', {int_or_none}
),
679 'formats': self
._extract
_m
3u8_formats
(data
.find(f
'{station}hls').text
, station
),