]>
Commit | Line | Data |
---|---|---|
1 | # coding: utf-8 | |
2 | from __future__ import unicode_literals | |
3 | ||
4 | from .common import InfoExtractor | |
5 | from ..utils import ( | |
6 | compat_str, | |
7 | ExtractorError, | |
8 | int_or_none, | |
9 | str_or_none, | |
10 | try_get, | |
11 | url_or_none, | |
12 | ) | |
13 | ||
14 | ||
15 | class TikTokBaseIE(InfoExtractor): | |
16 | def _extract_aweme(self, data): | |
17 | video = data['video'] | |
18 | description = str_or_none(try_get(data, lambda x: x['desc'])) | |
19 | width = int_or_none(try_get(data, lambda x: video['width'])) | |
20 | height = int_or_none(try_get(data, lambda x: video['height'])) | |
21 | ||
22 | format_urls = set() | |
23 | formats = [] | |
24 | for format_id in ( | |
25 | 'play_addr_lowbr', 'play_addr', 'play_addr_h264', | |
26 | 'download_addr'): | |
27 | for format in try_get( | |
28 | video, lambda x: x[format_id]['url_list'], list) or []: | |
29 | format_url = url_or_none(format) | |
30 | if not format_url: | |
31 | continue | |
32 | if format_url in format_urls: | |
33 | continue | |
34 | format_urls.add(format_url) | |
35 | formats.append({ | |
36 | 'url': format_url, | |
37 | 'ext': 'mp4', | |
38 | 'height': height, | |
39 | 'width': width, | |
40 | }) | |
41 | self._sort_formats(formats) | |
42 | ||
43 | thumbnail = url_or_none(try_get( | |
44 | video, lambda x: x['cover']['url_list'][0], compat_str)) | |
45 | uploader = try_get(data, lambda x: x['author']['nickname'], compat_str) | |
46 | timestamp = int_or_none(data.get('create_time')) | |
47 | comment_count = int_or_none(data.get('comment_count')) or int_or_none( | |
48 | try_get(data, lambda x: x['statistics']['comment_count'])) | |
49 | repost_count = int_or_none(try_get( | |
50 | data, lambda x: x['statistics']['share_count'])) | |
51 | ||
52 | aweme_id = data['aweme_id'] | |
53 | ||
54 | return { | |
55 | 'id': aweme_id, | |
56 | 'title': uploader or aweme_id, | |
57 | 'description': description, | |
58 | 'thumbnail': thumbnail, | |
59 | 'uploader': uploader, | |
60 | 'timestamp': timestamp, | |
61 | 'comment_count': comment_count, | |
62 | 'repost_count': repost_count, | |
63 | 'formats': formats, | |
64 | } | |
65 | ||
66 | ||
67 | class TikTokIE(TikTokBaseIE): | |
68 | _VALID_URL = r'''(?x) | |
69 | https?:// | |
70 | (?: | |
71 | (?:m\.)?tiktok\.com/v| | |
72 | (?:www\.)?tiktok\.com/share/video | |
73 | ) | |
74 | /(?P<id>\d+) | |
75 | ''' | |
76 | _TESTS = [{ | |
77 | 'url': 'https://m.tiktok.com/v/6606727368545406213.html', | |
78 | 'md5': 'd584b572e92fcd48888051f238022420', | |
79 | 'info_dict': { | |
80 | 'id': '6606727368545406213', | |
81 | 'ext': 'mp4', | |
82 | 'title': 'Zureeal', | |
83 | 'description': '#bowsette#mario#cosplay#uk#lgbt#gaming#asian#bowsettecosplay', | |
84 | 'thumbnail': r're:^https?://.*~noop.image', | |
85 | 'uploader': 'Zureeal', | |
86 | 'timestamp': 1538248586, | |
87 | 'upload_date': '20180929', | |
88 | 'comment_count': int, | |
89 | 'repost_count': int, | |
90 | } | |
91 | }, { | |
92 | 'url': 'https://www.tiktok.com/share/video/6606727368545406213', | |
93 | 'only_matching': True, | |
94 | }] | |
95 | ||
96 | def _real_extract(self, url): | |
97 | video_id = self._match_id(url) | |
98 | webpage = self._download_webpage( | |
99 | 'https://m.tiktok.com/v/%s.html' % video_id, video_id) | |
100 | data = self._parse_json(self._search_regex( | |
101 | r'\bdata\s*=\s*({.+?})\s*;', webpage, 'data'), video_id) | |
102 | return self._extract_aweme(data) | |
103 | ||
104 | ||
105 | class TikTokUserIE(TikTokBaseIE): | |
106 | _VALID_URL = r'''(?x) | |
107 | https?:// | |
108 | (?: | |
109 | (?:m\.)?tiktok\.com/h5/share/usr| | |
110 | (?:www\.)?tiktok\.com/share/user | |
111 | ) | |
112 | /(?P<id>\d+) | |
113 | ''' | |
114 | _TESTS = [{ | |
115 | 'url': 'https://m.tiktok.com/h5/share/usr/188294915489964032.html', | |
116 | 'info_dict': { | |
117 | 'id': '188294915489964032', | |
118 | }, | |
119 | 'playlist_mincount': 24, | |
120 | }, { | |
121 | 'url': 'https://www.tiktok.com/share/user/188294915489964032', | |
122 | 'only_matching': True, | |
123 | }] | |
124 | ||
125 | def _real_extract(self, url): | |
126 | user_id = self._match_id(url) | |
127 | data = self._download_json( | |
128 | 'https://m.tiktok.com/h5/share/usr/list/%s/' % user_id, user_id, | |
129 | query={'_signature': '_'}) | |
130 | entries = [] | |
131 | for aweme in data['aweme_list']: | |
132 | try: | |
133 | entry = self._extract_aweme(aweme) | |
134 | except ExtractorError: | |
135 | continue | |
136 | entry['extractor_key'] = TikTokIE.ie_key() | |
137 | entries.append(entry) | |
138 | return self.playlist_result(entries, user_id) |