]>
Commit | Line | Data |
---|---|---|
1ead840d KS |
1 | # coding: utf-8 |
2 | from __future__ import unicode_literals | |
f7f18f90 A |
3 | |
4 | import itertools | |
1ead840d KS |
5 | |
6 | from .common import InfoExtractor | |
7 | from ..utils import ( | |
ce18a19b | 8 | ExtractorError, |
1ead840d KS |
9 | int_or_none, |
10 | str_or_none, | |
4b6d03ed | 11 | try_get |
1ead840d KS |
12 | ) |
13 | ||
14 | ||
4f8b70b5 | 15 | class TikTokIE(InfoExtractor): |
6255e567 | 16 | _VALID_URL = r'https?://www\.tiktok\.com/@[\w\._]+/video/(?P<id>\d+)' |
4b6d03ed | 17 | |
7c2ecbc1 | 18 | _TESTS = [{ |
4b6d03ed AG |
19 | 'url': 'https://www.tiktok.com/@leenabhushan/video/6748451240264420610', |
20 | 'md5': '34a7543afd5a151b0840ba6736fb633b', | |
ce18a19b | 21 | 'info_dict': { |
4f8b70b5 | 22 | 'id': '6748451240264420610', |
6fb11ca8 | 23 | 'ext': 'mp4', |
4f8b70b5 | 24 | 'title': '#jassmanak #lehanga #leenabhushan', |
25 | 'description': '#jassmanak #lehanga #leenabhushan', | |
26 | 'duration': 13, | |
4b6d03ed | 27 | 'height': 1280, |
4f8b70b5 | 28 | 'width': 720, |
6fb11ca8 | 29 | 'uploader': 'leenabhushan', |
4b6d03ed | 30 | 'uploader_id': '6691488002098119685', |
4f8b70b5 | 31 | 'uploader_url': 'https://www.tiktok.com/@leenabhushan', |
32 | 'creator': 'facestoriesbyleenabh', | |
33 | 'thumbnail': r're:^https?://[\w\/\.\-]+(~[\w\-]+\.image)?', | |
34 | 'upload_date': '20191016', | |
35 | 'timestamp': 1571246252, | |
36 | 'view_count': int, | |
37 | 'like_count': int, | |
38 | 'repost_count': int, | |
39 | 'comment_count': int, | |
ce18a19b | 40 | } |
7c2ecbc1 | 41 | }, { |
4b6d03ed AG |
42 | 'url': 'https://www.tiktok.com/@patroxofficial/video/6742501081818877190?langCountry=en', |
43 | 'md5': '06b9800d47d5fe51a19e322dd86e61c9', | |
44 | 'info_dict': { | |
4f8b70b5 | 45 | 'id': '6742501081818877190', |
46 | 'ext': 'mp4', | |
47 | 'title': 'md5:5e2a23877420bb85ce6521dbee39ba94', | |
4b6d03ed AG |
48 | 'description': 'md5:5e2a23877420bb85ce6521dbee39ba94', |
49 | 'duration': 27, | |
4b6d03ed | 50 | 'height': 960, |
4f8b70b5 | 51 | 'width': 540, |
52 | 'uploader': 'patrox', | |
53 | 'uploader_id': '18702747', | |
54 | 'uploader_url': 'https://www.tiktok.com/@patrox', | |
55 | 'creator': 'patroX', | |
4b6d03ed | 56 | 'thumbnail': r're:^https?://[\w\/\.\-]+(~[\w\-]+\.image)?', |
4b6d03ed | 57 | 'upload_date': '20190930', |
4f8b70b5 | 58 | 'timestamp': 1569860870, |
59 | 'view_count': int, | |
60 | 'like_count': int, | |
61 | 'repost_count': int, | |
62 | 'comment_count': int, | |
4b6d03ed | 63 | } |
7c2ecbc1 | 64 | }] |
ce18a19b | 65 | |
4f8b70b5 | 66 | def _extract_aweme(self, props_data, webpage, url): |
67 | video_info = try_get( | |
68 | props_data, lambda x: x['pageProps']['itemInfo']['itemStruct'], dict) | |
69 | author_info = try_get( | |
70 | props_data, lambda x: x['pageProps']['itemInfo']['itemStruct']['author'], dict) or {} | |
71 | stats_info = try_get(props_data, lambda x: x['pageProps']['itemInfo']['itemStruct']['stats'], dict) or {} | |
72 | ||
73 | user_id = str_or_none(author_info.get('uniqueId')) | |
74 | download_url = try_get(video_info, (lambda x: x['video']['playAddr'], | |
f7f18f90 | 75 | lambda x: x['video']['downloadAddr'])) |
4f8b70b5 | 76 | height = try_get(video_info, lambda x: x['video']['height'], int) |
77 | width = try_get(video_info, lambda x: x['video']['width'], int) | |
78 | thumbnails = [{ | |
79 | 'url': video_info.get('thumbnail') or self._og_search_thumbnail(webpage), | |
80 | 'width': width, | |
81 | 'height': height | |
82 | }] | |
83 | tracker = try_get(props_data, lambda x: x['initialProps']['$wid']) | |
84 | ||
85 | return { | |
86 | 'id': str_or_none(video_info.get('id')), | |
87 | 'url': download_url, | |
88 | 'ext': 'mp4', | |
89 | 'height': height, | |
90 | 'width': width, | |
91 | 'title': video_info.get('desc') or self._og_search_title(webpage), | |
92 | 'duration': try_get(video_info, lambda x: x['video']['duration'], int), | |
93 | 'view_count': int_or_none(stats_info.get('playCount')), | |
94 | 'like_count': int_or_none(stats_info.get('diggCount')), | |
95 | 'repost_count': int_or_none(stats_info.get('shareCount')), | |
96 | 'comment_count': int_or_none(stats_info.get('commentCount')), | |
97 | 'timestamp': try_get(video_info, lambda x: int(x['createTime']), int), | |
98 | 'creator': str_or_none(author_info.get('nickname')), | |
99 | 'uploader': user_id, | |
100 | 'uploader_id': str_or_none(author_info.get('id')), | |
101 | 'uploader_url': f'https://www.tiktok.com/@{user_id}', | |
102 | 'thumbnails': thumbnails, | |
103 | 'description': str_or_none(video_info.get('desc')), | |
104 | 'webpage_url': self._og_search_url(webpage), | |
105 | 'http_headers': { | |
106 | 'Referer': url, | |
107 | 'Cookie': 'tt_webid=%s; tt_webid_v2=%s' % (tracker, tracker), | |
108 | } | |
109 | } | |
110 | ||
ce18a19b S |
111 | def _real_extract(self, url): |
112 | video_id = self._match_id(url) | |
ce18a19b | 113 | |
7bbc0bbc | 114 | # If we only call once, we get a 403 when downlaoding the video. |
61e76c1e | 115 | self._download_webpage(url, video_id) |
6fb11ca8 | 116 | webpage = self._download_webpage(url, video_id, note='Downloading video webpage') |
4b6d03ed | 117 | json_string = self._search_regex( |
6255e567 AG |
118 | r'id=\"__NEXT_DATA__\"\s+type=\"application\/json\"\s*[^>]+>\s*(?P<json_string_ld>[^<]+)', |
119 | webpage, 'json_string', group='json_string_ld') | |
4b6d03ed | 120 | json_data = self._parse_json(json_string, video_id) |
4f5a0ad8 | 121 | props_data = try_get(json_data, lambda x: x['props'], expected_type=dict) |
ce18a19b | 122 | |
4b6d03ed | 123 | # Chech statusCode for success |
1418a043 | 124 | status = props_data.get('pageProps').get('statusCode') |
125 | if status == 0: | |
4f5a0ad8 | 126 | return self._extract_aweme(props_data, webpage, url) |
1418a043 | 127 | elif status == 10216: |
128 | raise ExtractorError('This video is private', expected=True) | |
4b6d03ed | 129 | |
6fb11ca8 | 130 | raise ExtractorError('Video not available', video_id=video_id) |
f7f18f90 A |
131 | |
132 | ||
133 | class TikTokUserIE(InfoExtractor): | |
134 | IE_NAME = 'tiktok:user' | |
526d74ec | 135 | _VALID_URL = r'https?://(?:www\.)?tiktok\.com/@(?P<id>[\w\._]+)/?(?:$|[#?])' |
f7f18f90 | 136 | _TESTS = [{ |
526d74ec | 137 | 'url': 'https://tiktok.com/@corgibobaa?lang=en', |
f7f18f90 A |
138 | 'playlist_mincount': 45, |
139 | 'info_dict': { | |
140 | 'id': '6935371178089399301', | |
141 | }, | |
142 | 'skip': 'Cookies (not necessarily logged in) are needed.' | |
143 | }, { | |
144 | 'url': 'https://www.tiktok.com/@meme', | |
145 | 'playlist_mincount': 593, | |
146 | 'info_dict': { | |
147 | 'id': '79005827461758976', | |
148 | }, | |
149 | 'skip': 'Cookies (not necessarily logged in) are needed.' | |
150 | }] | |
151 | ||
152 | def _entries(self, url, user_id): | |
153 | webpage = self._download_webpage(url, user_id) | |
154 | own_id = self._search_regex(r'\"id\":\"(?P<userid>\d+)', webpage, user_id, default=None) | |
155 | if not own_id: | |
156 | raise ExtractorError('Cookies (not necessarily logged in) are needed.', expected=True) | |
157 | secuid = self._search_regex(r'\"secUid\":\"(?P<secUid>[^\"]+)', webpage, user_id) | |
158 | verifyfp_cookie = self._get_cookies('https://www.tiktok.com').get('s_v_web_id') | |
159 | if not verifyfp_cookie: | |
160 | raise ExtractorError('Improper cookies (missing s_v_web_id).', expected=True) | |
161 | api_url = f'https://m.tiktok.com/api/post/item_list/?aid=1988&cookie_enabled=true&count=30&verifyFp={verifyfp_cookie.value}&secUid={secuid}&cursor=' | |
162 | cursor = '0' | |
163 | for page in itertools.count(): | |
164 | data_json = self._download_json(api_url + cursor, user_id, note='Downloading Page %d' % page) | |
165 | for video in data_json.get('itemList', []): | |
166 | video_id = video['id'] | |
167 | video_url = f'https://www.tiktok.com/@{user_id}/video/{video_id}' | |
168 | download_url = try_get(video, (lambda x: x['video']['playAddr'], | |
169 | lambda x: x['video']['downloadAddr'])) | |
170 | thumbnail = try_get(video, lambda x: x['video']['originCover']) | |
171 | height = try_get(video, lambda x: x['video']['height'], int) | |
172 | width = try_get(video, lambda x: x['video']['width'], int) | |
173 | yield { | |
174 | 'id': video_id, | |
175 | 'ie_key': TikTokIE.ie_key(), | |
176 | 'extractor': 'TikTok', | |
177 | 'url': download_url, | |
178 | 'ext': 'mp4', | |
179 | 'height': height, | |
180 | 'width': width, | |
181 | 'title': str_or_none(video.get('desc')), | |
182 | 'duration': try_get(video, lambda x: x['video']['duration'], int), | |
183 | 'view_count': try_get(video, lambda x: x['stats']['playCount'], int), | |
184 | 'like_count': try_get(video, lambda x: x['stats']['diggCount'], int), | |
185 | 'comment_count': try_get(video, lambda x: x['stats']['commentCount'], int), | |
186 | 'repost_count': try_get(video, lambda x: x['stats']['shareCount'], int), | |
187 | 'timestamp': video.get('createTime'), | |
188 | 'creator': try_get(video, lambda x: x['author']['nickname'], str), | |
189 | 'uploader': try_get(video, lambda x: x['author']['uniqueId'], str), | |
190 | 'uploader_id': try_get(video, lambda x: x['author']['id'], str), | |
191 | 'uploader_url': f'https://www.tiktok.com/@{user_id}', | |
192 | 'thumbnails': [{'url': thumbnail, 'height': height, 'width': width}], | |
193 | 'description': str_or_none(video.get('desc')), | |
194 | 'webpage_url': video_url, | |
195 | 'http_headers': { | |
196 | 'Referer': video_url, | |
197 | } | |
198 | } | |
526d74ec | 199 | if not data_json.get('hasMore'): |
f7f18f90 A |
200 | break |
201 | cursor = data_json['cursor'] | |
202 | ||
203 | def _real_extract(self, url): | |
204 | user_id = self._match_id(url) | |
205 | return self.playlist_result(self._entries(url, user_id), user_id) |