2 from __future__
import unicode_literals
9 from .common
import InfoExtractor
16 from ..compat
import compat_urllib_parse
19 class RadikoBaseIE(InfoExtractor
):
22 def _auth_client(self
):
23 auth_cache
= self
._downloader
.cache
.load('radiko', 'auth_data')
27 _
, auth1_handle
= self
._download
_webpage
_handle
(
28 'https://radiko.jp/v2/api/auth1', None, 'Downloading authentication page',
30 'x-radiko-app': 'pc_html5',
31 'x-radiko-app-version': '0.0.1',
32 'x-radiko-device': 'pc',
33 'x-radiko-user': 'dummy_user',
35 auth1_header
= auth1_handle
.info()
37 auth_token
= auth1_header
['X-Radiko-AuthToken']
38 kl
= int(auth1_header
['X-Radiko-KeyLength'])
39 ko
= int(auth1_header
['X-Radiko-KeyOffset'])
40 raw_partial_key
= self
._extract
_full
_key
()[ko
:ko
+ kl
]
41 partial_key
= base64
.b64encode(raw_partial_key
).decode()
43 area_id
= self
._download
_webpage
(
44 'https://radiko.jp/v2/api/auth2', None, 'Authenticating',
46 'x-radiko-device': 'pc',
47 'x-radiko-user': 'dummy_user',
48 'x-radiko-authtoken': auth_token
,
49 'x-radiko-partialkey': partial_key
,
52 auth_data
= (auth_token
, area_id
)
53 self
._downloader
.cache
.store('radiko', 'auth_data', auth_data
)
56 def _extract_full_key(self
):
60 jscode
= self
._download
_webpage
(
61 'https://radiko.jp/apps/js/playerCommon.js', None,
62 note
='Downloading player js code')
63 full_key
= self
._search
_regex
(
64 (r
"RadikoJSPlayer\([^,]*,\s*(['\"])pc_html5\
1,\s
*(['\"])(?P<fullkey>[0-9a-f]+)\2,\s*{"),
65 jscode, 'full key
', fatal=False, group='fullkey
')
68 full_key = full_key.encode()
69 else: # use full key ever known
70 full_key = b'bcd151073c03b352e1ef2fd66c32209da9ca0afa
'
72 self._FULL_KEY = full_key
75 def _find_program(self, video_id, station, cursor):
76 station_program = self._download_xml(
77 'https
://radiko
.jp
/v3
/program
/station
/weekly
/%s.xml
' % station, video_id,
78 note='Downloading radio program
for %s station
' % station)
81 for p in station_program.findall('.//prog
'):
82 ft_str, to_str = p.attrib['ft
'], p.attrib['to
']
83 ft = unified_timestamp(ft_str, False)
84 to = unified_timestamp(to_str, False)
85 if ft <= cursor and cursor < to:
89 raise ExtractorError('Cannot identify radio program to download
!')
91 return prog, station_program, ft, ft_str, to_str
93 def _extract_formats(self, video_id, station, is_onair, ft, cursor, auth_token, area_id, query):
94 m3u8_playlist_data = self._download_xml(
95 'https
://radiko
.jp
/v3
/station
/stream
/pc_html5
/%s.xml
' % station, video_id,
96 note='Downloading m3u8 information
')
97 m3u8_urls = m3u8_playlist_data.findall('.//url
')
101 for url_tag in m3u8_urls:
102 pcu = url_tag.find('playlist_create_url
')
103 url_attrib = url_tag.attrib
104 playlist_url = update_url_query(pcu.text, {
105 'station_id
': station,
108 'lsid
': '77d0678df93a1034659c14d6fc89f018
',
111 if playlist_url in found:
114 found.add(playlist_url)
116 time_to_skip = None if is_onair else cursor - ft
118 subformats = self._extract_m3u8_formats(
119 playlist_url, video_id, ext='m4a
',
120 live=True, fatal=False, m3u8_id=None,
122 'X
-Radiko
-AreaId
': area_id,
123 'X
-Radiko
-AuthToken
': auth_token,
125 for sf in subformats:
126 domain = sf['format_id
'] = compat_urllib_parse.urlparse(sf['url
']).netloc
127 if re.match(r'^
[cf
]-radiko\
.smartstream\
.ne\
.jp$
', domain):
128 # Prioritize live radio vs playback based on extractor
129 sf['preference
'] = 100 if is_onair else -100
130 if not is_onair and url_attrib['timefree
'] == '1' and time_to_skip:
131 sf['_ffmpeg_args
'] = ['-ss
', time_to_skip]
132 formats.extend(subformats)
134 self._sort_formats(formats)
138 class RadikoIE(RadikoBaseIE):
139 _VALID_URL = r'https?
://(?
:www\
.)?radiko\
.jp
/#!/ts/(?P<station>[A-Z0-9-]+)/(?P<id>\d+)'
142 # QRR (文化放送) station provides <desc>
143 'url': 'https://radiko.jp/#!/ts/QRR/20210425101300',
144 'only_matching': True,
146 # FMT (TOKYO FM) station does not provide <desc>
147 'url': 'https://radiko.jp/#!/ts/FMT/20210810150000',
148 'only_matching': True,
150 'url': 'https://radiko.jp/#!/ts/JOAK-FM/20210509090000',
151 'only_matching': True,
154 def _real_extract(self
, url
):
155 station
, video_id
= self
._match
_valid
_url
(url
).groups()
156 vid_int
= unified_timestamp(video_id
, False)
158 auth_token
, area_id
= self
._auth
_client
()
160 prog
, station_program
, ft
, radio_begin
, radio_end
= self
._find
_program
(video_id
, station
, vid_int
)
162 title
= prog
.find('title').text
163 description
= clean_html(prog
.find('info').text
)
164 station_name
= station_program
.find('.//name').text
166 formats
= self
._extract
_formats
(
167 video_id
=video_id
, station
=station
, is_onair
=False,
168 ft
=ft
, cursor
=vid_int
, auth_token
=auth_token
, area_id
=area_id
,
170 'start_at': radio_begin
,
180 'description': description
,
181 'uploader': station_name
,
182 'uploader_id': station
,
183 'timestamp': vid_int
,
189 class RadikoRadioIE(RadikoBaseIE
):
190 _VALID_URL
= r
'https?://(?:www\.)?radiko\.jp/#!/live/(?P<id>[A-Z0-9-]+)'
193 # QRR (文化放送) station provides <desc>
194 'url': 'https://radiko.jp/#!/live/QRR',
195 'only_matching': True,
197 # FMT (TOKYO FM) station does not provide <desc>
198 'url': 'https://radiko.jp/#!/live/FMT',
199 'only_matching': True,
201 'url': 'https://radiko.jp/#!/live/JOAK-FM',
202 'only_matching': True,
205 def _real_extract(self
, url
):
206 station
= self
._match
_id
(url
)
207 self
.report_warning('Downloader will not stop at the end of the program! Press Ctrl+C to stop')
209 auth_token
, area_id
= self
._auth
_client
()
210 # get current time in JST (GMT+9:00 w/o DST)
211 vid_now
= datetime
.datetime
.now(datetime
.timezone(datetime
.timedelta(hours
=9)))
212 vid_now
= calendar
.timegm(vid_now
.timetuple())
214 prog
, station_program
, ft
, _
, _
= self
._find
_program
(station
, station
, vid_now
)
216 title
= prog
.find('title').text
217 description
= clean_html(prog
.find('info').text
)
218 station_name
= station_program
.find('.//name').text
220 formats
= self
._extract
_formats
(
221 video_id
=station
, station
=station
, is_onair
=True,
222 ft
=ft
, cursor
=vid_now
, auth_token
=auth_token
, area_id
=area_id
,
228 'description': description
,
229 'uploader': station_name
,
230 'uploader_id': station
,