]>
Commit | Line | Data |
---|---|---|
697ebe4d H |
1 | import base64 |
2 | ||
3 | from .common import InfoExtractor | |
4 | from ..utils import ( | |
5 | ExtractorError, | |
6 | get_element_by_id, | |
7 | int_or_none, | |
8 | js_to_json, | |
9 | str_or_none, | |
10 | traverse_obj, | |
11 | ) | |
12 | ||
13 | ||
14 | class IxiguaIE(InfoExtractor): | |
15 | _VALID_URL = r'https?://(?:\w+\.)?ixigua\.com/(?:video/)?(?P<id>\d+).+' | |
16 | _TESTS = [{ | |
17 | 'url': 'https://www.ixigua.com/6996881461559165471', | |
18 | 'info_dict': { | |
19 | 'id': '6996881461559165471', | |
20 | 'ext': 'mp4', | |
21 | 'title': '盲目涉水风险大,亲身示范高水位行车注意事项', | |
22 | 'description': 'md5:8c82f46186299add4a1c455430740229', | |
23 | 'tags': ['video_car'], | |
24 | 'like_count': int, | |
25 | 'dislike_count': int, | |
26 | 'view_count': int, | |
27 | 'uploader': '懂车帝原创', | |
28 | 'uploader_id': '6480145787', | |
29 | 'thumbnail': r're:^https?://.+\.(avif|webp)', | |
30 | 'timestamp': 1629088414, | |
31 | 'duration': 1030, | |
32 | } | |
33 | }] | |
34 | ||
35 | def _get_json_data(self, webpage, video_id): | |
36 | js_data = get_element_by_id('SSR_HYDRATED_DATA', webpage) | |
37 | if not js_data: | |
38 | if self._cookies_passed: | |
39 | raise ExtractorError('Failed to get SSR_HYDRATED_DATA') | |
40 | raise ExtractorError('Cookies (not necessarily logged in) are needed', expected=True) | |
41 | ||
42 | return self._parse_json( | |
43 | js_data.replace('window._SSR_HYDRATED_DATA=', ''), video_id, transform_source=js_to_json) | |
44 | ||
45 | def _media_selector(self, json_data): | |
46 | for path, override in ( | |
47 | (('video_list', ), {}), | |
48 | (('dynamic_video', 'dynamic_video_list'), {'acodec': 'none'}), | |
49 | (('dynamic_video', 'dynamic_audio_list'), {'vcodec': 'none', 'ext': 'm4a'}), | |
50 | ): | |
51 | for media in traverse_obj(json_data, (..., *path, lambda _, v: v['main_url'])): | |
52 | yield { | |
53 | 'url': base64.b64decode(media['main_url']).decode(), | |
54 | 'width': int_or_none(media.get('vwidth')), | |
55 | 'height': int_or_none(media.get('vheight')), | |
56 | 'fps': int_or_none(media.get('fps')), | |
57 | 'vcodec': media.get('codec_type'), | |
58 | 'format_id': str_or_none(media.get('quality_type')), | |
59 | 'filesize': int_or_none(media.get('size')), | |
60 | 'ext': 'mp4', | |
61 | **override, | |
62 | } | |
63 | ||
64 | def _real_extract(self, url): | |
65 | video_id = self._match_id(url) | |
66 | webpage = self._download_webpage(url, video_id) | |
67 | json_data = self._get_json_data(webpage, video_id)['anyVideo']['gidInformation']['packerData']['video'] | |
68 | ||
69 | formats = list(self._media_selector(json_data.get('videoResource'))) | |
70 | self._sort_formats(formats) | |
71 | return { | |
72 | 'id': video_id, | |
73 | 'title': json_data.get('title'), | |
74 | 'description': json_data.get('video_abstract'), | |
75 | 'formats': formats, | |
76 | 'like_count': json_data.get('video_like_count'), | |
77 | 'duration': int_or_none(json_data.get('duration')), | |
78 | 'tags': [json_data.get('tag')], | |
79 | 'uploader_id': traverse_obj(json_data, ('user_info', 'user_id')), | |
80 | 'uploader': traverse_obj(json_data, ('user_info', 'name')), | |
81 | 'view_count': json_data.get('video_watch_count'), | |
82 | 'dislike_count': json_data.get('video_unlike_count'), | |
83 | 'timestamp': int_or_none(json_data.get('video_publish_time')), | |
84 | } |