4 from .common
import InfoExtractor
15 class RadikoBaseIE(InfoExtractor
):
17 _HOSTS_FOR_TIME_FREE_FFMPEG_UNSUPPORTED
= (
18 'https://c-rpaa.smartstream.ne.jp',
19 'https://si-c-radiko.smartstream.ne.jp',
20 'https://tf-f-rpaa-radiko.smartstream.ne.jp',
21 'https://tf-c-rpaa-radiko.smartstream.ne.jp',
22 'https://si-f-radiko.smartstream.ne.jp',
23 'https://rpaa.smartstream.ne.jp',
25 _HOSTS_FOR_TIME_FREE_FFMPEG_SUPPORTED
= (
26 'https://rd-wowza-radiko.radiko-cf.com',
28 'https://f-radiko.smartstream.ne.jp',
30 # Following URL forcibly connects not Time Free but Live
32 'https://c-radiko.smartstream.ne.jp',
35 def _auth_client(self
):
36 _
, auth1_handle
= self
._download
_webpage
_handle
(
37 'https://radiko.jp/v2/api/auth1', None, 'Downloading authentication page',
39 'x-radiko-app': 'pc_html5',
40 'x-radiko-app-version': '0.0.1',
41 'x-radiko-device': 'pc',
42 'x-radiko-user': 'dummy_user',
44 auth1_header
= auth1_handle
.headers
46 auth_token
= auth1_header
['X-Radiko-AuthToken']
47 kl
= int(auth1_header
['X-Radiko-KeyLength'])
48 ko
= int(auth1_header
['X-Radiko-KeyOffset'])
49 raw_partial_key
= self
._extract
_full
_key
()[ko
:ko
+ kl
]
50 partial_key
= base64
.b64encode(raw_partial_key
).decode()
52 area_id
= self
._download
_webpage
(
53 'https://radiko.jp/v2/api/auth2', None, 'Authenticating',
55 'x-radiko-device': 'pc',
56 'x-radiko-user': 'dummy_user',
57 'x-radiko-authtoken': auth_token
,
58 'x-radiko-partialkey': partial_key
,
61 auth_data
= (auth_token
, area_id
)
62 self
.cache
.store('radiko', 'auth_data', auth_data
)
65 def _extract_full_key(self
):
69 jscode
= self
._download
_webpage
(
70 'https://radiko.jp/apps/js/playerCommon.js', None,
71 note
='Downloading player js code')
72 full_key
= self
._search
_regex
(
73 (r
"RadikoJSPlayer\([^,]*,\s*(['\"])pc_html5\
1,\s
*(['\"])(?P<fullkey>[0-9a-f]+)\2,\s*{"),
74 jscode, 'full key
', fatal=False, group='fullkey
')
77 full_key = full_key.encode()
78 else: # use full key ever known
79 full_key = b'bcd151073c03b352e1ef2fd66c32209da9ca0afa
'
81 self._FULL_KEY = full_key
84 def _find_program(self, video_id, station, cursor):
85 station_program = self._download_xml(
86 'https
://radiko
.jp
/v3
/program
/station
/weekly
/%s.xml
' % station, video_id,
87 note='Downloading radio program
for %s station
' % station)
90 for p in station_program.findall('.//prog
'):
91 ft_str, to_str = p.attrib['ft
'], p.attrib['to
']
92 ft = unified_timestamp(ft_str, False)
93 to = unified_timestamp(to_str, False)
94 if ft <= cursor and cursor < to:
98 raise ExtractorError('Cannot identify radio program to download
!')
100 return prog, station_program, ft, ft_str, to_str
102 def _extract_formats(self, video_id, station, is_onair, ft, cursor, auth_token, area_id, query):
103 m3u8_playlist_data = self._download_xml(
104 f'https
://radiko
.jp
/v3
/station
/stream
/pc_html5
/{station}
.xml
', video_id,
105 note='Downloading stream information
')
106 m3u8_urls = m3u8_playlist_data.findall('.//url
')
110 for url_tag in m3u8_urls:
111 pcu = url_tag.find('playlist_create_url
').text
112 url_attrib = url_tag.attrib
113 playlist_url = update_url_query(pcu, {
114 'station_id
': station,
117 'lsid
': '88ecea37e968c1f17d5413312d9f8003
',
120 if playlist_url in found:
123 found.add(playlist_url)
125 time_to_skip = None if is_onair else cursor - ft
127 domain = urllib.parse.urlparse(playlist_url).netloc
128 subformats = self._extract_m3u8_formats(
129 playlist_url, video_id, ext='m4a
',
130 live=True, fatal=False, m3u8_id=domain,
131 note=f'Downloading m3u8 information
from {domain}
',
133 'X
-Radiko
-AreaId
': area_id,
134 'X
-Radiko
-AuthToken
': auth_token,
136 for sf in subformats:
137 if (is_onair ^ pcu.startswith(self._HOSTS_FOR_LIVE)) or (
138 not is_onair and pcu.startswith(self._HOSTS_FOR_TIME_FREE_FFMPEG_UNSUPPORTED)):
139 sf['preference
'] = -100
140 sf['format_note
'] = 'not preferred
'
141 if not is_onair and url_attrib['timefree
'] == '1' and time_to_skip:
142 sf['downloader_options
'] = {'ffmpeg_args': ['-ss', time_to_skip]}
143 formats.extend(subformats)
148 class RadikoIE(RadikoBaseIE):
149 _VALID_URL = r'https?
://(?
:www\
.)?radiko\
.jp
/#!/ts/(?P<station>[A-Z0-9-]+)/(?P<id>\d+)'
152 # QRR (文化放送) station provides <desc>
153 'url': 'https://radiko.jp/#!/ts/QRR/20210425101300',
154 'only_matching': True,
156 # FMT (TOKYO FM) station does not provide <desc>
157 'url': 'https://radiko.jp/#!/ts/FMT/20210810150000',
158 'only_matching': True,
160 'url': 'https://radiko.jp/#!/ts/JOAK-FM/20210509090000',
161 'only_matching': True,
164 def _real_extract(self
, url
):
165 station
, video_id
= self
._match
_valid
_url
(url
).groups()
166 vid_int
= unified_timestamp(video_id
, False)
167 prog
, station_program
, ft
, radio_begin
, radio_end
= self
._find
_program
(video_id
, station
, vid_int
)
169 auth_cache
= self
.cache
.load('radiko', 'auth_data')
170 for attempt
in range(2):
171 auth_token
, area_id
= (not attempt
and auth_cache
) or self
._auth
_client
()
172 formats
= self
._extract
_formats
(
173 video_id
=video_id
, station
=station
, is_onair
=False,
174 ft
=ft
, cursor
=vid_int
, auth_token
=auth_token
, area_id
=area_id
,
176 'start_at': radio_begin
,
187 'title': try_call(lambda: prog
.find('title').text
),
188 'description': clean_html(try_call(lambda: prog
.find('info').text
)),
189 'uploader': try_call(lambda: station_program
.find('.//name').text
),
190 'uploader_id': station
,
191 'timestamp': vid_int
,
197 class RadikoRadioIE(RadikoBaseIE
):
198 _VALID_URL
= r
'https?://(?:www\.)?radiko\.jp/#!/live/(?P<id>[A-Z0-9-]+)'
201 # QRR (文化放送) station provides <desc>
202 'url': 'https://radiko.jp/#!/live/QRR',
203 'only_matching': True,
205 # FMT (TOKYO FM) station does not provide <desc>
206 'url': 'https://radiko.jp/#!/live/FMT',
207 'only_matching': True,
209 'url': 'https://radiko.jp/#!/live/JOAK-FM',
210 'only_matching': True,
213 def _real_extract(self
, url
):
214 station
= self
._match
_id
(url
)
215 self
.report_warning('Downloader will not stop at the end of the program! Press Ctrl+C to stop')
217 auth_token
, area_id
= self
._auth
_client
()
218 # get current time in JST (GMT+9:00 w/o DST)
219 vid_now
= time_seconds(hours
=9)
221 prog
, station_program
, ft
, _
, _
= self
._find
_program
(station
, station
, vid_now
)
223 title
= prog
.find('title').text
224 description
= clean_html(prog
.find('info').text
)
225 station_name
= station_program
.find('.//name').text
227 formats
= self
._extract
_formats
(
228 video_id
=station
, station
=station
, is_onair
=True,
229 ft
=ft
, cursor
=vid_now
, auth_token
=auth_token
, area_id
=area_id
,
235 'description': description
,
236 'uploader': station_name
,
237 'uploader_id': station
,