3 from .common
import InfoExtractor
4 from ..utils
import traverse_obj
, try_call
, InAdvancePagedList
7 class XimalayaBaseIE(InfoExtractor
):
8 _GEO_COUNTRIES
= ['CN']
11 class XimalayaIE(XimalayaBaseIE
):
14 _VALID_URL
= r
'https?://(?:www\.|m\.)?ximalaya\.com/(:?(?P<uid>\d+)/)?sound/(?P<id>[0-9]+)'
17 'url': 'http://www.ximalaya.com/sound/47740352/',
22 'uploader_id': 61425525,
23 'uploader_url': 'http://www.ximalaya.com/zhubo/61425525/',
24 'title': '261.唐诗三百首.卷八.送孟浩然之广陵.李白',
25 'description': "contains:《送孟浩然之广陵》\n作者:李白\n故人西辞黄鹤楼,烟花三月下扬州。\n孤帆远影碧空尽,惟见长江天际流。",
26 'thumbnail': r
're:^https?://.*\.jpg',
30 'url': r
're:^https?://.*\.jpg',
33 'name': 'cover_url_142',
34 'url': r
're:^https?://.*\.jpg',
46 'url': 'http://m.ximalaya.com/61425525/sound/47740352/',
51 'uploader_id': 61425525,
52 'uploader_url': 'http://www.ximalaya.com/zhubo/61425525/',
53 'title': '261.唐诗三百首.卷八.送孟浩然之广陵.李白',
54 'description': "contains:《送孟浩然之广陵》\n作者:李白\n故人西辞黄鹤楼,烟花三月下扬州。\n孤帆远影碧空尽,惟见长江天际流。",
55 'thumbnail': r
're:^https?://.*\.jpg',
59 'url': r
're:^https?://.*\.jpg',
62 'name': 'cover_url_142',
63 'url': r
're:^https?://.*\.jpg',
76 def _real_extract(self
, url
):
77 scheme
= 'https' if url
.startswith('https') else 'http'
79 audio_id
= self
._match
_id
(url
)
80 audio_info_file
= '%s://m.ximalaya.com/tracks/%s.json' % (scheme
, audio_id
)
81 audio_info
= self
._download
_json
(audio_info_file
, audio_id
,
82 'Downloading info json %s' % audio_info_file
,
83 'Unable to download info file')
86 'format_id': f
'{bps}k',
90 } for bps
, k
in ((24, 'play_path_32'), (64, 'play_path_64')) if audio_info
.get(k
)]
93 for k
in audio_info
.keys():
94 # cover pics kyes like: cover_url', 'cover_url_142'
95 if k
.startswith('cover_url'):
96 thumbnail
= {'name': k, 'url': audio_info[k]}
97 if k
== 'cover_url_142':
98 thumbnail
['width'] = 180
99 thumbnail
['height'] = 180
100 thumbnails
.append(thumbnail
)
102 audio_uploader_id
= audio_info
.get('uid')
104 audio_description
= try_call(
105 lambda: audio_info
['intro'].replace('\r\n\r\n\r\n ', '\n').replace('\r\n', '\n'))
109 'uploader': audio_info
.get('nickname'),
110 'uploader_id': audio_uploader_id
,
111 'uploader_url': f
'{scheme}://www.ximalaya.com/zhubo/{audio_uploader_id}/' if audio_uploader_id
else None,
112 'title': audio_info
['title'],
113 'thumbnails': thumbnails
,
114 'description': audio_description
,
115 'categories': list(filter(None, [audio_info
.get('category_name')])),
116 'duration': audio_info
.get('duration'),
117 'view_count': audio_info
.get('play_count'),
118 'like_count': audio_info
.get('favorites_count'),
123 class XimalayaAlbumIE(XimalayaBaseIE
):
124 IE_NAME
= 'ximalaya:album'
125 IE_DESC
= '喜马拉雅FM 专辑'
126 _VALID_URL
= r
'https?://(?:www\.|m\.)?ximalaya\.com/(?:\d+/)?album/(?P<id>[0-9]+)'
128 'url': 'http://www.ximalaya.com/61425525/album/5534601/',
130 'title': '唐诗三百首(含赏析)',
133 'playlist_mincount': 323,
135 'url': 'https://www.ximalaya.com/album/6912905',
137 'title': '埃克哈特《修炼当下的力量》',
140 'playlist_mincount': 41,
143 def _real_extract(self
, url
):
144 playlist_id
= self
._match
_id
(url
)
146 first_page
= self
._fetch
_page
(playlist_id
, 1)
147 page_count
= math
.ceil(first_page
['trackTotalCount'] / first_page
['pageSize'])
149 entries
= InAdvancePagedList(
150 lambda idx
: self
._get
_entries
(self
._fetch
_page
(playlist_id
, idx
+ 1) if idx
else first_page
),
151 page_count
, first_page
['pageSize'])
153 title
= traverse_obj(first_page
, ('tracks', 0, 'albumTitle'), expected_type
=str)
155 return self
.playlist_result(entries
, playlist_id
, title
)
157 def _fetch_page(self
, playlist_id
, page_idx
):
158 return self
._download
_json
(
159 'https://www.ximalaya.com/revision/album/v1/getTracksList',
160 playlist_id
, note
=f
'Downloading tracks list page {page_idx}',
161 query
={'albumId': playlist_id, 'pageNum': page_idx}
)['data']
163 def _get_entries(self
, page_data
):
164 for e
in page_data
['tracks']:
165 yield self
.url_result(
166 self
._proto
_relative
_url
(f
'//www.ximalaya.com{e["url"]}'),
167 XimalayaIE
, e
.get('trackId'), e
.get('title'))