]>
Commit | Line | Data |
---|---|---|
1 | from .common import InfoExtractor | |
2 | from ..utils import ( | |
3 | float_or_none, | |
4 | format_field, | |
5 | int_or_none, | |
6 | traverse_obj, | |
7 | parse_codecs, | |
8 | parse_qs, | |
9 | ) | |
10 | ||
11 | ||
12 | class AcFunVideoBaseIE(InfoExtractor): | |
13 | def _extract_metadata(self, video_id, video_info): | |
14 | playjson = self._parse_json(video_info['ksPlayJson'], video_id) | |
15 | ||
16 | formats, subtitles = [], {} | |
17 | for video in traverse_obj(playjson, ('adaptationSet', 0, 'representation')): | |
18 | fmts, subs = self._extract_m3u8_formats_and_subtitles(video['url'], video_id, 'mp4', fatal=False) | |
19 | formats.extend(fmts) | |
20 | self._merge_subtitles(subs, target=subtitles) | |
21 | for f in fmts: | |
22 | f.update({ | |
23 | 'fps': float_or_none(video.get('frameRate')), | |
24 | 'width': int_or_none(video.get('width')), | |
25 | 'height': int_or_none(video.get('height')), | |
26 | 'tbr': float_or_none(video.get('avgBitrate')), | |
27 | **parse_codecs(video.get('codecs', '')) | |
28 | }) | |
29 | ||
30 | return { | |
31 | 'id': video_id, | |
32 | 'formats': formats, | |
33 | 'subtitles': subtitles, | |
34 | 'duration': float_or_none(video_info.get('durationMillis'), 1000), | |
35 | 'timestamp': int_or_none(video_info.get('uploadTime'), 1000), | |
36 | 'http_headers': {'Referer': 'https://www.acfun.cn/'}, | |
37 | } | |
38 | ||
39 | ||
40 | class AcFunVideoIE(AcFunVideoBaseIE): | |
41 | _VALID_URL = r'https?://www\.acfun\.cn/v/ac(?P<id>[_\d]+)' | |
42 | ||
43 | _TESTS = [{ | |
44 | 'url': 'https://www.acfun.cn/v/ac35457073', | |
45 | 'info_dict': { | |
46 | 'id': '35457073', | |
47 | 'ext': 'mp4', | |
48 | 'duration': 174.208, | |
49 | 'timestamp': 1656403967, | |
50 | 'title': '1 8 岁 现 状', | |
51 | 'description': '“赶紧回去!班主任查班了!”', | |
52 | 'uploader': '锤子game', | |
53 | 'uploader_id': '51246077', | |
54 | 'thumbnail': r're:^https?://.*\.(jpg|jpeg)', | |
55 | 'upload_date': '20220628', | |
56 | 'like_count': int, | |
57 | 'view_count': int, | |
58 | 'comment_count': int, | |
59 | 'tags': list, | |
60 | }, | |
61 | }, { | |
62 | # example for len(video_list) > 1 | |
63 | 'url': 'https://www.acfun.cn/v/ac35468952_2', | |
64 | 'info_dict': { | |
65 | 'id': '35468952_2', | |
66 | 'ext': 'mp4', | |
67 | 'title': '【动画剧集】Rocket & Groot Season 1(2022)/火箭浣熊与格鲁特第1季 P02 S01E02 十拿九穩', | |
68 | 'duration': 90.459, | |
69 | 'uploader': '比令', | |
70 | 'uploader_id': '37259967', | |
71 | 'upload_date': '20220629', | |
72 | 'timestamp': 1656479962, | |
73 | 'tags': list, | |
74 | 'like_count': int, | |
75 | 'view_count': int, | |
76 | 'comment_count': int, | |
77 | 'thumbnail': r're:^https?://.*\.(jpg|jpeg)', | |
78 | 'description': 'md5:67583aaf3a0f933bd606bc8a2d3ebb17', | |
79 | } | |
80 | }] | |
81 | ||
82 | def _real_extract(self, url): | |
83 | video_id = self._match_id(url) | |
84 | ||
85 | webpage = self._download_webpage(url, video_id) | |
86 | json_all = self._search_json(r'window.videoInfo\s*=', webpage, 'videoInfo', video_id) | |
87 | ||
88 | title = json_all.get('title') | |
89 | video_list = json_all.get('videoList') or [] | |
90 | video_internal_id = traverse_obj(json_all, ('currentVideoInfo', 'id')) | |
91 | if video_internal_id and len(video_list) > 1: | |
92 | part_idx, part_video_info = next( | |
93 | (idx + 1, v) for (idx, v) in enumerate(video_list) | |
94 | if v['id'] == video_internal_id) | |
95 | title = f'{title} P{part_idx:02d} {part_video_info["title"]}' | |
96 | ||
97 | return { | |
98 | **self._extract_metadata(video_id, json_all['currentVideoInfo']), | |
99 | 'title': title, | |
100 | 'thumbnail': json_all.get('coverUrl'), | |
101 | 'description': json_all.get('description'), | |
102 | 'uploader': traverse_obj(json_all, ('user', 'name')), | |
103 | 'uploader_id': traverse_obj(json_all, ('user', 'href')), | |
104 | 'tags': traverse_obj(json_all, ('tagList', ..., 'name')), | |
105 | 'view_count': int_or_none(json_all.get('viewCount')), | |
106 | 'like_count': int_or_none(json_all.get('likeCountShow')), | |
107 | 'comment_count': int_or_none(json_all.get('commentCountShow')), | |
108 | } | |
109 | ||
110 | ||
111 | class AcFunBangumiIE(AcFunVideoBaseIE): | |
112 | _VALID_URL = r'https?://www\.acfun\.cn/bangumi/(?P<id>aa[_\d]+)' | |
113 | ||
114 | _TESTS = [{ | |
115 | 'url': 'https://www.acfun.cn/bangumi/aa6002917_36188_1745457?ac=2', | |
116 | 'info_dict': { | |
117 | 'id': 'aa6002917_36188_1745457__2', | |
118 | 'ext': 'mp4', | |
119 | 'title': '【7月】租借女友 水原千鹤角色曲『DATE』特别PV', | |
120 | 'upload_date': '20200916', | |
121 | 'timestamp': 1600243813, | |
122 | 'duration': 92.091, | |
123 | }, | |
124 | }, { | |
125 | 'url': 'https://www.acfun.cn/bangumi/aa5023171_36188_1750645', | |
126 | 'info_dict': { | |
127 | 'id': 'aa5023171_36188_1750645', | |
128 | 'ext': 'mp4', | |
129 | 'title': '红孩儿之趴趴蛙寻石记 第5话 ', | |
130 | 'duration': 760.0, | |
131 | 'season': '红孩儿之趴趴蛙寻石记', | |
132 | 'season_id': 5023171, | |
133 | 'season_number': 1, # series has only 1 season | |
134 | 'episode': 'Episode 5', | |
135 | 'episode_number': 5, | |
136 | 'upload_date': '20181223', | |
137 | 'timestamp': 1545552185, | |
138 | 'thumbnail': r're:^https?://.*\.(jpg|jpeg|png)', | |
139 | 'comment_count': int, | |
140 | }, | |
141 | }, { | |
142 | 'url': 'https://www.acfun.cn/bangumi/aa6065485_36188_1885061', | |
143 | 'info_dict': { | |
144 | 'id': 'aa6065485_36188_1885061', | |
145 | 'ext': 'mp4', | |
146 | 'title': '叽歪老表(第二季) 第5话 坚不可摧', | |
147 | 'season': '叽歪老表(第二季)', | |
148 | 'season_number': 2, | |
149 | 'season_id': 6065485, | |
150 | 'episode': '坚不可摧', | |
151 | 'episode_number': 5, | |
152 | 'upload_date': '20220324', | |
153 | 'timestamp': 1648082786, | |
154 | 'duration': 105.002, | |
155 | 'thumbnail': r're:^https?://.*\.(jpg|jpeg|png)', | |
156 | 'comment_count': int, | |
157 | }, | |
158 | }] | |
159 | ||
160 | def _real_extract(self, url): | |
161 | video_id = self._match_id(url) | |
162 | ac_idx = parse_qs(url).get('ac', [None])[-1] | |
163 | video_id = f'{video_id}{format_field(ac_idx, None, "__%s")}' | |
164 | ||
165 | webpage = self._download_webpage(url, video_id) | |
166 | json_bangumi_data = self._search_json(r'window.bangumiData\s*=', webpage, 'bangumiData', video_id) | |
167 | ||
168 | if ac_idx: | |
169 | video_info = json_bangumi_data['hlVideoInfo'] | |
170 | return { | |
171 | **self._extract_metadata(video_id, video_info), | |
172 | 'title': video_info.get('title'), | |
173 | } | |
174 | ||
175 | video_info = json_bangumi_data['currentVideoInfo'] | |
176 | ||
177 | season_id = json_bangumi_data.get('bangumiId') | |
178 | season_number = season_id and next(( | |
179 | idx for idx, v in enumerate(json_bangumi_data.get('relatedBangumis') or [], 1) | |
180 | if v.get('id') == season_id), 1) | |
181 | ||
182 | json_bangumi_list = self._search_json( | |
183 | r'window\.bangumiList\s*=', webpage, 'bangumiList', video_id, fatal=False) | |
184 | video_internal_id = int_or_none(traverse_obj(json_bangumi_data, ('currentVideoInfo', 'id'))) | |
185 | episode_number = video_internal_id and next(( | |
186 | idx for idx, v in enumerate(json_bangumi_list.get('items') or [], 1) | |
187 | if v.get('videoId') == video_internal_id), None) | |
188 | ||
189 | return { | |
190 | **self._extract_metadata(video_id, video_info), | |
191 | 'title': json_bangumi_data.get('showTitle'), | |
192 | 'thumbnail': json_bangumi_data.get('image'), | |
193 | 'season': json_bangumi_data.get('bangumiTitle'), | |
194 | 'season_id': season_id, | |
195 | 'season_number': season_number, | |
196 | 'episode': json_bangumi_data.get('title'), | |
197 | 'episode_number': episode_number, | |
198 | 'comment_count': int_or_none(json_bangumi_data.get('commentCount')), | |
199 | } |