2 from __future__
import unicode_literals
6 from .common
import InfoExtractor
15 class TikTokIE(InfoExtractor
):
16 _VALID_URL
= r
'https?://www\.tiktok\.com/@[\w\._]+/video/(?P<id>\d+)'
19 'url': 'https://www.tiktok.com/@leenabhushan/video/6748451240264420610',
20 'md5': '34a7543afd5a151b0840ba6736fb633b',
22 'id': '6748451240264420610',
24 'title': '#jassmanak #lehanga #leenabhushan',
25 'description': '#jassmanak #lehanga #leenabhushan',
29 'uploader': 'leenabhushan',
30 'uploader_id': '6691488002098119685',
31 'uploader_url': 'https://www.tiktok.com/@leenabhushan',
32 'creator': 'facestoriesbyleenabh',
33 'thumbnail': r
're:^https?://[\w\/\.\-]+(~[\w\-]+\.image)?',
34 'upload_date': '20191016',
35 'timestamp': 1571246252,
42 'url': 'https://www.tiktok.com/@patroxofficial/video/6742501081818877190?langCountry=en',
43 'md5': '06b9800d47d5fe51a19e322dd86e61c9',
45 'id': '6742501081818877190',
47 'title': 'md5:5e2a23877420bb85ce6521dbee39ba94',
48 'description': 'md5:5e2a23877420bb85ce6521dbee39ba94',
53 'uploader_id': '18702747',
54 'uploader_url': 'https://www.tiktok.com/@patrox',
56 'thumbnail': r
're:^https?://[\w\/\.\-]+(~[\w\-]+\.image)?',
57 'upload_date': '20190930',
58 'timestamp': 1569860870,
66 def _extract_aweme(self
, props_data
, webpage
, url
):
68 props_data
, lambda x
: x
['pageProps']['itemInfo']['itemStruct'], dict)
69 author_info
= try_get(
70 props_data
, lambda x
: x
['pageProps']['itemInfo']['itemStruct']['author'], dict) or {}
71 stats_info
= try_get(props_data
, lambda x
: x
['pageProps']['itemInfo']['itemStruct']['stats'], dict) or {}
73 user_id
= str_or_none(author_info
.get('uniqueId'))
74 download_url
= try_get(video_info
, (lambda x
: x
['video']['playAddr'],
75 lambda x
: x
['video']['downloadAddr']))
76 height
= try_get(video_info
, lambda x
: x
['video']['height'], int)
77 width
= try_get(video_info
, lambda x
: x
['video']['width'], int)
79 'url': video_info
.get('thumbnail') or self
._og
_search
_thumbnail
(webpage
),
83 tracker
= try_get(props_data
, lambda x
: x
['initialProps']['$wid'])
86 'id': str_or_none(video_info
.get('id')),
91 'title': video_info
.get('desc') or self
._og
_search
_title
(webpage
),
92 'duration': try_get(video_info
, lambda x
: x
['video']['duration'], int),
93 'view_count': int_or_none(stats_info
.get('playCount')),
94 'like_count': int_or_none(stats_info
.get('diggCount')),
95 'repost_count': int_or_none(stats_info
.get('shareCount')),
96 'comment_count': int_or_none(stats_info
.get('commentCount')),
97 'timestamp': try_get(video_info
, lambda x
: int(x
['createTime']), int),
98 'creator': str_or_none(author_info
.get('nickname')),
100 'uploader_id': str_or_none(author_info
.get('id')),
101 'uploader_url': f
'https://www.tiktok.com/@{user_id}',
102 'thumbnails': thumbnails
,
103 'description': str_or_none(video_info
.get('desc')),
104 'webpage_url': self
._og
_search
_url
(webpage
),
107 'Cookie': 'tt_webid=%s; tt_webid_v2=%s' % (tracker
, tracker
),
111 def _real_extract(self
, url
):
112 video_id
= self
._match
_id
(url
)
114 # If we only call once, we get a 403 when downlaoding the video.
115 self
._download
_webpage
(url
, video_id
)
116 webpage
= self
._download
_webpage
(url
, video_id
, note
='Downloading video webpage')
117 json_string
= self
._search
_regex
(
118 r
'id=\"__NEXT_DATA__\"\s+type=\"application\/json\"\s*[^>]+>\s*(?P<json_string_ld>[^<]+)',
119 webpage
, 'json_string', group
='json_string_ld')
120 json_data
= self
._parse
_json
(json_string
, video_id
)
121 props_data
= try_get(json_data
, lambda x
: x
['props'], expected_type
=dict)
123 # Chech statusCode for success
124 status
= props_data
.get('pageProps').get('statusCode')
126 return self
._extract
_aweme
(props_data
, webpage
, url
)
127 elif status
== 10216:
128 raise ExtractorError('This video is private', expected
=True)
130 raise ExtractorError('Video not available', video_id
=video_id
)
133 class TikTokUserIE(InfoExtractor
):
134 IE_NAME
= 'tiktok:user'
135 _VALID_URL
= r
'https?://(?:www\.)?tiktok\.com/@(?P<id>[\w\._]+)/?(?:$|[#?])'
137 'url': 'https://tiktok.com/@corgibobaa?lang=en',
138 'playlist_mincount': 45,
140 'id': '6935371178089399301',
142 'skip': 'Cookies (not necessarily logged in) are needed.'
144 'url': 'https://www.tiktok.com/@meme',
145 'playlist_mincount': 593,
147 'id': '79005827461758976',
149 'skip': 'Cookies (not necessarily logged in) are needed.'
152 def _entries(self
, url
, user_id
):
153 webpage
= self
._download
_webpage
(url
, user_id
)
154 own_id
= self
._search
_regex
(r
'\"id\":\"(?P<userid>\d+)', webpage
, user_id
, default
=None)
156 raise ExtractorError('Cookies (not necessarily logged in) are needed.', expected
=True)
157 secuid
= self
._search
_regex
(r
'\"secUid\":\"(?P<secUid>[^\"]+)', webpage
, user_id
)
158 verifyfp_cookie
= self
._get
_cookies
('https://www.tiktok.com').get('s_v_web_id')
159 if not verifyfp_cookie
:
160 raise ExtractorError('Improper cookies (missing s_v_web_id).', expected
=True)
161 api_url
= f
'https://m.tiktok.com/api/post/item_list/?aid=1988&cookie_enabled=true&count=30&verifyFp={verifyfp_cookie.value}&secUid={secuid}&cursor='
163 for page
in itertools
.count():
164 data_json
= self
._download
_json
(api_url
+ cursor
, user_id
, note
='Downloading Page %d' % page
)
165 for video
in data_json
.get('itemList', []):
166 video_id
= video
['id']
167 video_url
= f
'https://www.tiktok.com/@{user_id}/video/{video_id}'
168 download_url
= try_get(video
, (lambda x
: x
['video']['playAddr'],
169 lambda x
: x
['video']['downloadAddr']))
170 thumbnail
= try_get(video
, lambda x
: x
['video']['originCover'])
171 height
= try_get(video
, lambda x
: x
['video']['height'], int)
172 width
= try_get(video
, lambda x
: x
['video']['width'], int)
175 'ie_key': TikTokIE
.ie_key(),
176 'extractor': 'TikTok',
181 'title': str_or_none(video
.get('desc')),
182 'duration': try_get(video
, lambda x
: x
['video']['duration'], int),
183 'view_count': try_get(video
, lambda x
: x
['stats']['playCount'], int),
184 'like_count': try_get(video
, lambda x
: x
['stats']['diggCount'], int),
185 'comment_count': try_get(video
, lambda x
: x
['stats']['commentCount'], int),
186 'repost_count': try_get(video
, lambda x
: x
['stats']['shareCount'], int),
187 'timestamp': video
.get('createTime'),
188 'creator': try_get(video
, lambda x
: x
['author']['nickname'], str),
189 'uploader': try_get(video
, lambda x
: x
['author']['uniqueId'], str),
190 'uploader_id': try_get(video
, lambda x
: x
['author']['id'], str),
191 'uploader_url': f
'https://www.tiktok.com/@{user_id}',
192 'thumbnails': [{'url': thumbnail, 'height': height, 'width': width}
],
193 'description': str_or_none(video
.get('desc')),
194 'webpage_url': video_url
,
196 'Referer': video_url
,
199 if not data_json
.get('hasMore'):
201 cursor
= data_json
['cursor']
203 def _real_extract(self
, url
):
204 user_id
= self
._match
_id
(url
)
205 return self
.playlist_result(self
._entries
(url
, user_id
), user_id
)