]>
Commit | Line | Data |
---|---|---|
dd634acd | 1 | import math |
a90641fe | 2 | |
3 | from .common import InfoExtractor | |
f4f9f6d0 | 4 | from ..utils import InAdvancePagedList, str_or_none, traverse_obj, try_call |
a90641fe | 5 | |
6 | ||
7 | class XimalayaBaseIE(InfoExtractor): | |
8 | _GEO_COUNTRIES = ['CN'] | |
9 | ||
10 | ||
11 | class XimalayaIE(XimalayaBaseIE): | |
12 | IE_NAME = 'ximalaya' | |
13 | IE_DESC = '喜马拉雅FM' | |
dd634acd | 14 | _VALID_URL = r'https?://(?:www\.|m\.)?ximalaya\.com/(:?(?P<uid>\d+)/)?sound/(?P<id>[0-9]+)' |
a90641fe | 15 | _TESTS = [ |
16 | { | |
dd634acd | 17 | 'url': 'http://www.ximalaya.com/sound/47740352/', |
a90641fe | 18 | 'info_dict': { |
19 | 'id': '47740352', | |
20 | 'ext': 'm4a', | |
21 | 'uploader': '小彬彬爱听书', | |
f4f9f6d0 | 22 | 'uploader_id': '61425525', |
a90641fe | 23 | 'uploader_url': 'http://www.ximalaya.com/zhubo/61425525/', |
24 | 'title': '261.唐诗三百首.卷八.送孟浩然之广陵.李白', | |
add96eb9 | 25 | 'description': 'contains:《送孟浩然之广陵》\n作者:李白\n故人西辞黄鹤楼,烟花三月下扬州。\n孤帆远影碧空尽,惟见长江天际流。', |
dd634acd | 26 | 'thumbnail': r're:^https?://.*\.jpg', |
a90641fe | 27 | 'thumbnails': [ |
28 | { | |
29 | 'name': 'cover_url', | |
dd634acd | 30 | 'url': r're:^https?://.*\.jpg', |
a90641fe | 31 | }, |
32 | { | |
33 | 'name': 'cover_url_142', | |
dd634acd | 34 | 'url': r're:^https?://.*\.jpg', |
a90641fe | 35 | 'width': 180, |
add96eb9 | 36 | 'height': 180, |
37 | }, | |
a90641fe | 38 | ], |
417cdaae | 39 | 'categories': ['其他'], |
a90641fe | 40 | 'duration': 93, |
41 | 'view_count': int, | |
42 | 'like_count': int, | |
add96eb9 | 43 | }, |
a90641fe | 44 | }, |
45 | { | |
46 | 'url': 'http://m.ximalaya.com/61425525/sound/47740352/', | |
47 | 'info_dict': { | |
48 | 'id': '47740352', | |
49 | 'ext': 'm4a', | |
50 | 'uploader': '小彬彬爱听书', | |
f4f9f6d0 | 51 | 'uploader_id': '61425525', |
a90641fe | 52 | 'uploader_url': 'http://www.ximalaya.com/zhubo/61425525/', |
53 | 'title': '261.唐诗三百首.卷八.送孟浩然之广陵.李白', | |
add96eb9 | 54 | 'description': 'contains:《送孟浩然之广陵》\n作者:李白\n故人西辞黄鹤楼,烟花三月下扬州。\n孤帆远影碧空尽,惟见长江天际流。', |
dd634acd | 55 | 'thumbnail': r're:^https?://.*\.jpg', |
a90641fe | 56 | 'thumbnails': [ |
57 | { | |
58 | 'name': 'cover_url', | |
dd634acd | 59 | 'url': r're:^https?://.*\.jpg', |
a90641fe | 60 | }, |
61 | { | |
62 | 'name': 'cover_url_142', | |
dd634acd | 63 | 'url': r're:^https?://.*\.jpg', |
a90641fe | 64 | 'width': 180, |
add96eb9 | 65 | 'height': 180, |
66 | }, | |
a90641fe | 67 | ], |
dd634acd | 68 | 'categories': ['人文'], |
a90641fe | 69 | 'duration': 93, |
70 | 'view_count': int, | |
71 | 'like_count': int, | |
add96eb9 | 72 | }, |
73 | }, | |
a90641fe | 74 | ] |
75 | ||
76 | def _real_extract(self, url): | |
a90641fe | 77 | scheme = 'https' if url.startswith('https') else 'http' |
78 | ||
79 | audio_id = self._match_id(url) | |
add96eb9 | 80 | audio_info_file = f'{scheme}://m.ximalaya.com/tracks/{audio_id}.json' |
81 | audio_info = self._download_json( | |
82 | audio_info_file, audio_id, | |
83 | f'Downloading info json {audio_info_file}', 'Unable to download info file') | |
a90641fe | 84 | |
dd634acd L |
85 | formats = [{ |
86 | 'format_id': f'{bps}k', | |
87 | 'url': audio_info[k], | |
88 | 'abr': bps, | |
add96eb9 | 89 | 'vcodec': 'none', |
dd634acd | 90 | } for bps, k in ((24, 'play_path_32'), (64, 'play_path_64')) if audio_info.get(k)] |
a90641fe | 91 | |
92 | thumbnails = [] | |
add96eb9 | 93 | for k in audio_info: |
a90641fe | 94 | # cover pics kyes like: cover_url', 'cover_url_142' |
95 | if k.startswith('cover_url'): | |
96 | thumbnail = {'name': k, 'url': audio_info[k]} | |
97 | if k == 'cover_url_142': | |
98 | thumbnail['width'] = 180 | |
99 | thumbnail['height'] = 180 | |
100 | thumbnails.append(thumbnail) | |
101 | ||
102 | audio_uploader_id = audio_info.get('uid') | |
103 | ||
dd634acd L |
104 | audio_description = try_call( |
105 | lambda: audio_info['intro'].replace('\r\n\r\n\r\n ', '\n').replace('\r\n', '\n')) | |
a90641fe | 106 | |
107 | return { | |
108 | 'id': audio_id, | |
109 | 'uploader': audio_info.get('nickname'), | |
f4f9f6d0 | 110 | 'uploader_id': str_or_none(audio_uploader_id), |
dd634acd | 111 | 'uploader_url': f'{scheme}://www.ximalaya.com/zhubo/{audio_uploader_id}/' if audio_uploader_id else None, |
a90641fe | 112 | 'title': audio_info['title'], |
113 | 'thumbnails': thumbnails, | |
114 | 'description': audio_description, | |
dd634acd | 115 | 'categories': list(filter(None, [audio_info.get('category_name')])), |
a90641fe | 116 | 'duration': audio_info.get('duration'), |
117 | 'view_count': audio_info.get('play_count'), | |
118 | 'like_count': audio_info.get('favorites_count'), | |
119 | 'formats': formats, | |
120 | } | |
121 | ||
122 | ||
123 | class XimalayaAlbumIE(XimalayaBaseIE): | |
124 | IE_NAME = 'ximalaya:album' | |
125 | IE_DESC = '喜马拉雅FM 专辑' | |
417cdaae | 126 | _VALID_URL = r'https?://(?:www\.|m\.)?ximalaya\.com/(?:\d+/)?album/(?P<id>[0-9]+)' |
a90641fe | 127 | _TESTS = [{ |
128 | 'url': 'http://www.ximalaya.com/61425525/album/5534601/', | |
129 | 'info_dict': { | |
130 | 'title': '唐诗三百首(含赏析)', | |
131 | 'id': '5534601', | |
132 | }, | |
dd634acd | 133 | 'playlist_mincount': 323, |
417cdaae CC |
134 | }, { |
135 | 'url': 'https://www.ximalaya.com/album/6912905', | |
136 | 'info_dict': { | |
137 | 'title': '埃克哈特《修炼当下的力量》', | |
138 | 'id': '6912905', | |
139 | }, | |
140 | 'playlist_mincount': 41, | |
dd634acd | 141 | }] |
a90641fe | 142 | |
143 | def _real_extract(self, url): | |
dd634acd | 144 | playlist_id = self._match_id(url) |
a90641fe | 145 | |
dd634acd L |
146 | first_page = self._fetch_page(playlist_id, 1) |
147 | page_count = math.ceil(first_page['trackTotalCount'] / first_page['pageSize']) | |
a90641fe | 148 | |
dd634acd L |
149 | entries = InAdvancePagedList( |
150 | lambda idx: self._get_entries(self._fetch_page(playlist_id, idx + 1) if idx else first_page), | |
151 | page_count, first_page['pageSize']) | |
a90641fe | 152 | |
dd634acd | 153 | title = traverse_obj(first_page, ('tracks', 0, 'albumTitle'), expected_type=str) |
a90641fe | 154 | |
dd634acd | 155 | return self.playlist_result(entries, playlist_id, title) |
a90641fe | 156 | |
dd634acd L |
157 | def _fetch_page(self, playlist_id, page_idx): |
158 | return self._download_json( | |
159 | 'https://www.ximalaya.com/revision/album/v1/getTracksList', | |
160 | playlist_id, note=f'Downloading tracks list page {page_idx}', | |
8790ea7b | 161 | query={'albumId': playlist_id, 'pageNum': page_idx})['data'] |
a90641fe | 162 | |
dd634acd L |
163 | def _get_entries(self, page_data): |
164 | for e in page_data['tracks']: | |
165 | yield self.url_result( | |
166 | self._proto_relative_url(f'//www.ximalaya.com{e["url"]}'), | |
167 | XimalayaIE, e.get('trackId'), e.get('title')) |