5 from .common
import InfoExtractor
16 class RadikoBaseIE(InfoExtractor
):
19 _HOSTS_FOR_TIME_FREE_FFMPEG_UNSUPPORTED
= (
20 'https://c-rpaa.smartstream.ne.jp',
21 'https://si-c-radiko.smartstream.ne.jp',
22 'https://tf-f-rpaa-radiko.smartstream.ne.jp',
23 'https://tf-c-rpaa-radiko.smartstream.ne.jp',
24 'https://si-f-radiko.smartstream.ne.jp',
25 'https://rpaa.smartstream.ne.jp',
27 _HOSTS_FOR_TIME_FREE_FFMPEG_SUPPORTED
= (
28 'https://rd-wowza-radiko.radiko-cf.com',
30 'https://f-radiko.smartstream.ne.jp',
32 # Following URL forcibly connects not Time Free but Live
34 'https://c-radiko.smartstream.ne.jp',
37 def _negotiate_token(self
):
38 _
, auth1_handle
= self
._download
_webpage
_handle
(
39 'https://radiko.jp/v2/api/auth1', None, 'Downloading authentication page',
41 'x-radiko-app': 'pc_html5',
42 'x-radiko-app-version': '0.0.1',
43 'x-radiko-device': 'pc',
44 'x-radiko-user': 'dummy_user',
46 auth1_header
= auth1_handle
.headers
48 auth_token
= auth1_header
['X-Radiko-AuthToken']
49 kl
= int(auth1_header
['X-Radiko-KeyLength'])
50 ko
= int(auth1_header
['X-Radiko-KeyOffset'])
51 raw_partial_key
= self
._extract
_full
_key
()[ko
:ko
+ kl
]
52 partial_key
= base64
.b64encode(raw_partial_key
).decode()
54 area_id
= self
._download
_webpage
(
55 'https://radiko.jp/v2/api/auth2', None, 'Authenticating',
57 'x-radiko-device': 'pc',
58 'x-radiko-user': 'dummy_user',
59 'x-radiko-authtoken': auth_token
,
60 'x-radiko-partialkey': partial_key
,
64 self
.raise_geo_restricted(countries
=['JP'])
66 auth_data
= (auth_token
, area_id
)
67 self
.cache
.store('radiko', 'auth_data', auth_data
)
70 def _auth_client(self
):
71 cachedata
= self
.cache
.load('radiko', 'auth_data')
72 if cachedata
is not None:
73 response
= self
._download
_webpage
(
74 'https://radiko.jp/v2/api/auth_check', None, 'Checking cached token', expected_status
=401,
75 headers
={'X-Radiko-AuthToken': cachedata[0], 'X-Radiko-AreaId': cachedata[1]}
)
78 return self
._negotiate
_token
()
80 def _extract_full_key(self
):
84 jscode
= self
._download
_webpage
(
85 'https://radiko.jp/apps/js/playerCommon.js', None,
86 note
='Downloading player js code')
87 full_key
= self
._search
_regex
(
88 (r
"RadikoJSPlayer\([^,]*,\s*(['\"])pc_html5\
1,\s
*(['\"])(?P<fullkey>[0-9a-f]+)\2,\s*{"),
89 jscode, 'full key
', fatal=False, group='fullkey
')
92 full_key = full_key.encode()
93 else: # use only full key ever known
94 full_key = b'bcd151073c03b352e1ef2fd66c32209da9ca0afa
'
96 self._FULL_KEY = full_key
99 def _find_program(self, video_id, station, cursor):
100 station_program = self._download_xml(
101 'https
://radiko
.jp
/v3
/program
/station
/weekly
/%s.xml
' % station, video_id,
102 note='Downloading radio program
for %s station
' % station)
105 for p in station_program.findall('.//prog
'):
106 ft_str, to_str = p.attrib['ft
'], p.attrib['to
']
107 ft = unified_timestamp(ft_str, False)
108 to = unified_timestamp(to_str, False)
109 if ft <= cursor and cursor < to:
113 raise ExtractorError('Cannot identify radio program to download
!')
115 return prog, station_program, ft, ft_str, to_str
117 def _extract_formats(self, video_id, station, is_onair, ft, cursor, auth_token, area_id, query):
118 m3u8_playlist_data = self._download_xml(
119 f'https
://radiko
.jp
/v3
/station
/stream
/pc_html5
/{station}
.xml
', video_id,
120 note='Downloading stream information
')
125 timefree_int = 0 if is_onair else 1
127 for element in m3u8_playlist_data.findall(f'.//url
[@timefree="{timefree_int}"]/playlist_create_url
'):
132 playlist_url = update_url_query(pcu, {
133 'station_id
': station,
136 'lsid
': ''.join(random.choices('0123456789abcdef
', k=32)),
140 time_to_skip = None if is_onair else cursor - ft
142 domain = urllib.parse.urlparse(playlist_url).netloc
143 subformats = self._extract_m3u8_formats(
144 playlist_url, video_id, ext='m4a
',
145 live=True, fatal=False, m3u8_id=domain,
146 note=f'Downloading m3u8 information
from {domain}
',
148 'X
-Radiko
-AreaId
': area_id,
149 'X
-Radiko
-AuthToken
': auth_token,
151 for sf in subformats:
152 if (is_onair ^ pcu.startswith(self._HOSTS_FOR_LIVE)) or (
153 not is_onair and pcu.startswith(self._HOSTS_FOR_TIME_FREE_FFMPEG_UNSUPPORTED)):
154 sf['preference
'] = -100
155 sf['format_note
'] = 'not preferred
'
156 if not is_onair and timefree_int == 1 and time_to_skip:
157 sf['downloader_options
'] = {'ffmpeg_args': ['-ss', str(time_to_skip)]}
158 formats.extend(subformats)
163 class RadikoIE(RadikoBaseIE):
164 _VALID_URL = r'https?
://(?
:www\
.)?radiko\
.jp
/#!/ts/(?P<station>[A-Z0-9-]+)/(?P<id>\d+)'
167 # QRR (文化放送) station provides <desc>
168 'url': 'https://radiko.jp/#!/ts/QRR/20210425101300',
169 'only_matching': True,
171 # FMT (TOKYO FM) station does not provide <desc>
172 'url': 'https://radiko.jp/#!/ts/FMT/20210810150000',
173 'only_matching': True,
175 'url': 'https://radiko.jp/#!/ts/JOAK-FM/20210509090000',
176 'only_matching': True,
179 def _real_extract(self
, url
):
180 station
, video_id
= self
._match
_valid
_url
(url
).groups()
181 vid_int
= unified_timestamp(video_id
, False)
182 prog
, station_program
, ft
, radio_begin
, radio_end
= self
._find
_program
(video_id
, station
, vid_int
)
184 auth_token
, area_id
= self
._auth
_client
()
188 'title': try_call(lambda: prog
.find('title').text
),
189 'description': clean_html(try_call(lambda: prog
.find('info').text
)),
190 'uploader': try_call(lambda: station_program
.find('.//name').text
),
191 'uploader_id': station
,
192 'timestamp': vid_int
,
194 'formats': self
._extract
_formats
(
195 video_id
=video_id
, station
=station
, is_onair
=False,
196 ft
=ft
, cursor
=vid_int
, auth_token
=auth_token
, area_id
=area_id
,
198 'start_at': radio_begin
,
208 class RadikoRadioIE(RadikoBaseIE
):
209 _VALID_URL
= r
'https?://(?:www\.)?radiko\.jp/#!/live/(?P<id>[A-Z0-9-]+)'
212 # QRR (文化放送) station provides <desc>
213 'url': 'https://radiko.jp/#!/live/QRR',
214 'only_matching': True,
216 # FMT (TOKYO FM) station does not provide <desc>
217 'url': 'https://radiko.jp/#!/live/FMT',
218 'only_matching': True,
220 'url': 'https://radiko.jp/#!/live/JOAK-FM',
221 'only_matching': True,
224 def _real_extract(self
, url
):
225 station
= self
._match
_id
(url
)
226 self
.report_warning('Downloader will not stop at the end of the program! Press Ctrl+C to stop')
228 auth_token
, area_id
= self
._auth
_client
()
229 # get current time in JST (GMT+9:00 w/o DST)
230 vid_now
= time_seconds(hours
=9)
232 prog
, station_program
, ft
, _
, _
= self
._find
_program
(station
, station
, vid_now
)
234 title
= prog
.find('title').text
235 description
= clean_html(prog
.find('info').text
)
236 station_name
= station_program
.find('.//name').text
238 formats
= self
._extract
_formats
(
239 video_id
=station
, station
=station
, is_onair
=True,
240 ft
=ft
, cursor
=vid_now
, auth_token
=auth_token
, area_id
=area_id
,
246 'description': description
,
247 'uploader': station_name
,
248 'uploader_id': station
,