]>
Commit | Line | Data |
---|---|---|
1 | from .common import InfoExtractor | |
2 | from ..utils import ( | |
3 | float_or_none, | |
4 | format_field, | |
5 | int_or_none, | |
6 | str_or_none, | |
7 | traverse_obj, | |
8 | parse_codecs, | |
9 | parse_qs, | |
10 | ) | |
11 | ||
12 | ||
13 | class AcFunVideoBaseIE(InfoExtractor): | |
14 | def _extract_metadata(self, video_id, video_info): | |
15 | playjson = self._parse_json(video_info['ksPlayJson'], video_id) | |
16 | ||
17 | formats, subtitles = [], {} | |
18 | for video in traverse_obj(playjson, ('adaptationSet', 0, 'representation')): | |
19 | fmts, subs = self._extract_m3u8_formats_and_subtitles(video['url'], video_id, 'mp4', fatal=False) | |
20 | formats.extend(fmts) | |
21 | self._merge_subtitles(subs, target=subtitles) | |
22 | for f in fmts: | |
23 | f.update({ | |
24 | 'fps': float_or_none(video.get('frameRate')), | |
25 | 'width': int_or_none(video.get('width')), | |
26 | 'height': int_or_none(video.get('height')), | |
27 | 'tbr': float_or_none(video.get('avgBitrate')), | |
28 | **parse_codecs(video.get('codecs', '')) | |
29 | }) | |
30 | ||
31 | return { | |
32 | 'id': video_id, | |
33 | 'formats': formats, | |
34 | 'subtitles': subtitles, | |
35 | 'duration': float_or_none(video_info.get('durationMillis'), 1000), | |
36 | 'timestamp': int_or_none(video_info.get('uploadTime'), 1000), | |
37 | 'http_headers': {'Referer': 'https://www.acfun.cn/'}, | |
38 | } | |
39 | ||
40 | ||
41 | class AcFunVideoIE(AcFunVideoBaseIE): | |
42 | _VALID_URL = r'https?://www\.acfun\.cn/v/ac(?P<id>[_\d]+)' | |
43 | ||
44 | _TESTS = [{ | |
45 | 'url': 'https://www.acfun.cn/v/ac35457073', | |
46 | 'info_dict': { | |
47 | 'id': '35457073', | |
48 | 'ext': 'mp4', | |
49 | 'duration': 174.208, | |
50 | 'timestamp': 1656403967, | |
51 | 'title': '1 8 岁 现 状', | |
52 | 'description': '“赶紧回去!班主任查班了!”', | |
53 | 'uploader': '锤子game', | |
54 | 'uploader_id': '51246077', | |
55 | 'thumbnail': r're:^https?://.*\.(jpg|jpeg)', | |
56 | 'upload_date': '20220628', | |
57 | 'like_count': int, | |
58 | 'view_count': int, | |
59 | 'comment_count': int, | |
60 | 'tags': list, | |
61 | }, | |
62 | }, { | |
63 | # example for len(video_list) > 1 | |
64 | 'url': 'https://www.acfun.cn/v/ac35468952_2', | |
65 | 'info_dict': { | |
66 | 'id': '35468952_2', | |
67 | 'ext': 'mp4', | |
68 | 'title': '【动画剧集】Rocket & Groot Season 1(2022)/火箭浣熊与格鲁特第1季 P02 S01E02 十拿九穩', | |
69 | 'duration': 90.459, | |
70 | 'uploader': '比令', | |
71 | 'uploader_id': '37259967', | |
72 | 'upload_date': '20220629', | |
73 | 'timestamp': 1656479962, | |
74 | 'tags': list, | |
75 | 'like_count': int, | |
76 | 'view_count': int, | |
77 | 'comment_count': int, | |
78 | 'thumbnail': r're:^https?://.*\.(jpg|jpeg)', | |
79 | 'description': 'md5:67583aaf3a0f933bd606bc8a2d3ebb17', | |
80 | } | |
81 | }] | |
82 | ||
83 | def _real_extract(self, url): | |
84 | video_id = self._match_id(url) | |
85 | ||
86 | webpage = self._download_webpage(url, video_id) | |
87 | json_all = self._search_json(r'window.videoInfo\s*=', webpage, 'videoInfo', video_id) | |
88 | ||
89 | title = json_all.get('title') | |
90 | video_list = json_all.get('videoList') or [] | |
91 | video_internal_id = traverse_obj(json_all, ('currentVideoInfo', 'id')) | |
92 | if video_internal_id and len(video_list) > 1: | |
93 | part_idx, part_video_info = next( | |
94 | (idx + 1, v) for (idx, v) in enumerate(video_list) | |
95 | if v['id'] == video_internal_id) | |
96 | title = f'{title} P{part_idx:02d} {part_video_info["title"]}' | |
97 | ||
98 | return { | |
99 | **self._extract_metadata(video_id, json_all['currentVideoInfo']), | |
100 | 'title': title, | |
101 | 'thumbnail': json_all.get('coverUrl'), | |
102 | 'description': json_all.get('description'), | |
103 | 'uploader': traverse_obj(json_all, ('user', 'name')), | |
104 | 'uploader_id': traverse_obj(json_all, ('user', 'href')), | |
105 | 'tags': traverse_obj(json_all, ('tagList', ..., 'name')), | |
106 | 'view_count': int_or_none(json_all.get('viewCount')), | |
107 | 'like_count': int_or_none(json_all.get('likeCountShow')), | |
108 | 'comment_count': int_or_none(json_all.get('commentCountShow')), | |
109 | } | |
110 | ||
111 | ||
112 | class AcFunBangumiIE(AcFunVideoBaseIE): | |
113 | _VALID_URL = r'https?://www\.acfun\.cn/bangumi/(?P<id>aa[_\d]+)' | |
114 | ||
115 | _TESTS = [{ | |
116 | 'url': 'https://www.acfun.cn/bangumi/aa6002917_36188_1745457?ac=2', | |
117 | 'info_dict': { | |
118 | 'id': 'aa6002917_36188_1745457__2', | |
119 | 'ext': 'mp4', | |
120 | 'title': '【7月】租借女友 水原千鹤角色曲『DATE』特别PV', | |
121 | 'upload_date': '20200916', | |
122 | 'timestamp': 1600243813, | |
123 | 'duration': 92.091, | |
124 | }, | |
125 | }, { | |
126 | 'url': 'https://www.acfun.cn/bangumi/aa5023171_36188_1750645', | |
127 | 'info_dict': { | |
128 | 'id': 'aa5023171_36188_1750645', | |
129 | 'ext': 'mp4', | |
130 | 'title': '红孩儿之趴趴蛙寻石记 第5话 ', | |
131 | 'duration': 760.0, | |
132 | 'season': '红孩儿之趴趴蛙寻石记', | |
133 | 'season_id': '5023171', | |
134 | 'season_number': 1, # series has only 1 season | |
135 | 'episode': 'Episode 5', | |
136 | 'episode_number': 5, | |
137 | 'upload_date': '20181223', | |
138 | 'timestamp': 1545552185, | |
139 | 'thumbnail': r're:^https?://.*\.(jpg|jpeg|png)', | |
140 | 'comment_count': int, | |
141 | }, | |
142 | }, { | |
143 | 'url': 'https://www.acfun.cn/bangumi/aa6065485_36188_1885061', | |
144 | 'info_dict': { | |
145 | 'id': 'aa6065485_36188_1885061', | |
146 | 'ext': 'mp4', | |
147 | 'title': '叽歪老表(第二季) 第5话 坚不可摧', | |
148 | 'season': '叽歪老表(第二季)', | |
149 | 'season_number': 2, | |
150 | 'season_id': '6065485', | |
151 | 'episode': '坚不可摧', | |
152 | 'episode_number': 5, | |
153 | 'upload_date': '20220324', | |
154 | 'timestamp': 1648082786, | |
155 | 'duration': 105.002, | |
156 | 'thumbnail': r're:^https?://.*\.(jpg|jpeg|png)', | |
157 | 'comment_count': int, | |
158 | }, | |
159 | }] | |
160 | ||
161 | def _real_extract(self, url): | |
162 | video_id = self._match_id(url) | |
163 | ac_idx = parse_qs(url).get('ac', [None])[-1] | |
164 | video_id = f'{video_id}{format_field(ac_idx, None, "__%s")}' | |
165 | ||
166 | webpage = self._download_webpage(url, video_id) | |
167 | json_bangumi_data = self._search_json(r'window.bangumiData\s*=', webpage, 'bangumiData', video_id) | |
168 | ||
169 | if ac_idx: | |
170 | video_info = json_bangumi_data['hlVideoInfo'] | |
171 | return { | |
172 | **self._extract_metadata(video_id, video_info), | |
173 | 'title': video_info.get('title'), | |
174 | } | |
175 | ||
176 | video_info = json_bangumi_data['currentVideoInfo'] | |
177 | ||
178 | season_id = json_bangumi_data.get('bangumiId') | |
179 | season_number = season_id and next(( | |
180 | idx for idx, v in enumerate(json_bangumi_data.get('relatedBangumis') or [], 1) | |
181 | if v.get('id') == season_id), 1) | |
182 | ||
183 | json_bangumi_list = self._search_json( | |
184 | r'window\.bangumiList\s*=', webpage, 'bangumiList', video_id, fatal=False) | |
185 | video_internal_id = int_or_none(traverse_obj(json_bangumi_data, ('currentVideoInfo', 'id'))) | |
186 | episode_number = video_internal_id and next(( | |
187 | idx for idx, v in enumerate(json_bangumi_list.get('items') or [], 1) | |
188 | if v.get('videoId') == video_internal_id), None) | |
189 | ||
190 | return { | |
191 | **self._extract_metadata(video_id, video_info), | |
192 | 'title': json_bangumi_data.get('showTitle'), | |
193 | 'thumbnail': json_bangumi_data.get('image'), | |
194 | 'season': json_bangumi_data.get('bangumiTitle'), | |
195 | 'season_id': str_or_none(season_id), | |
196 | 'season_number': season_number, | |
197 | 'episode': json_bangumi_data.get('title'), | |
198 | 'episode_number': episode_number, | |
199 | 'comment_count': int_or_none(json_bangumi_data.get('commentCount')), | |
200 | } |