]> jfr.im git - yt-dlp.git/blame - yt_dlp/extractor/tiktok.py
[test/download] Fallback test to `bv`
[yt-dlp.git] / yt_dlp / extractor / tiktok.py
CommitLineData
1ead840d
KS
1# coding: utf-8
2from __future__ import unicode_literals
f7f18f90
A
3
4import itertools
bd9ff55b
M
5import random
6import string
7import time
0fd6661e 8import json
1ead840d
KS
9
10from .common import InfoExtractor
943d5ab1 11from ..compat import compat_urllib_parse_unquote
1ead840d 12from ..utils import (
ce18a19b 13 ExtractorError,
1ead840d
KS
14 int_or_none,
15 str_or_none,
bd9ff55b
M
16 traverse_obj,
17 try_get,
943d5ab1 18 url_or_none,
bd9ff55b 19 qualities,
1ead840d
KS
20)
21
22
0fd6661e 23class TikTokBaseIE(InfoExtractor):
bd9ff55b
M
24 _APP_VERSION = '20.9.3'
25 _MANIFEST_APP_VERSION = '291'
943d5ab1
M
26 _APP_NAME = 'trill'
27 _AID = 1180
28 _API_HOSTNAME = 'api-t2.tiktokv.com'
29 _UPLOADER_URL_FORMAT = 'https://www.tiktok.com/@%s'
bd9ff55b 30 QUALITIES = ('360p', '540p', '720p')
ce18a19b 31
0fd6661e
M
32 def _call_api(self, ep, query, video_id, fatal=True,
33 note='Downloading API JSON', errnote='Unable to download API page'):
34 real_query = {
35 **query,
bd9ff55b
M
36 'version_name': self._APP_VERSION,
37 'version_code': self._MANIFEST_APP_VERSION,
38 'build_number': self._APP_VERSION,
39 'manifest_version_code': self._MANIFEST_APP_VERSION,
40 'update_version_code': self._MANIFEST_APP_VERSION,
0930b11f 41 'openudid': ''.join(random.choice('0123456789abcdef') for _ in range(16)),
42 'uuid': ''.join([random.choice(string.digits) for _ in range(16)]),
bd9ff55b
M
43 '_rticket': int(time.time() * 1000),
44 'ts': int(time.time()),
45 'device_brand': 'Google',
46 'device_type': 'Pixel 4',
47 'device_platform': 'android',
48 'resolution': '1080*1920',
49 'dpi': 420,
50 'os_version': '10',
51 'os_api': '29',
52 'carrier_region': 'US',
53 'sys_region': 'US',
54 'region': 'US',
943d5ab1 55 'app_name': self._APP_NAME,
bd9ff55b
M
56 'app_language': 'en',
57 'language': 'en',
58 'timezone_name': 'America/New_York',
59 'timezone_offset': '-14400',
60 'channel': 'googleplay',
61 'ac': 'wifi',
62 'mcc_mnc': '310260',
63 'is_my_cn': 0,
943d5ab1 64 'aid': self._AID,
bd9ff55b
M
65 'ssmix': 'a',
66 'as': 'a1qwert123',
67 'cp': 'cbfhckdckkde1',
68 }
0930b11f 69 self._set_cookie(self._API_HOSTNAME, 'odin_tt', ''.join(random.choice('0123456789abcdef') for _ in range(160)))
0fd6661e 70 return self._download_json(
943d5ab1 71 'https://%s/aweme/v1/%s/' % (self._API_HOSTNAME, ep), video_id=video_id,
0fd6661e 72 fatal=fatal, note=note, errnote=errnote, headers={
bd9ff55b 73 'User-Agent': f'com.ss.android.ugc.trill/{self._MANIFEST_APP_VERSION} (Linux; U; Android 10; en_US; Pixel 4; Build/QQ3A.200805.001; Cronet/58.0.2991.0)',
0fd6661e
M
74 'Accept': 'application/json',
75 }, query=real_query)
76
943d5ab1 77 def _parse_aweme_video_app(self, aweme_detail):
0fd6661e 78 aweme_id = aweme_detail['aweme_id']
bd9ff55b
M
79 video_info = aweme_detail['video']
80
81 def parse_url_key(url_key):
82 format_id, codec, res, bitrate = self._search_regex(
83 r'v[^_]+_(?P<id>(?P<codec>[^_]+)_(?P<res>\d+p)_(?P<bitrate>\d+))', url_key,
84 'url key', default=(None, None, None, None), group=('id', 'codec', 'res', 'bitrate'))
85 if not format_id:
86 return {}, None
87 return {
88 'format_id': format_id,
89 'vcodec': 'h265' if codec == 'bytevc1' else codec,
90 'tbr': int_or_none(bitrate, scale=1000) or None,
91 'quality': qualities(self.QUALITIES)(res),
92 }, res
93
94 known_resolutions = {}
95
96 def extract_addr(addr, add_meta={}):
97 parsed_meta, res = parse_url_key(addr.get('url_key', ''))
98 if res:
99 known_resolutions.setdefault(res, {}).setdefault('height', add_meta.get('height'))
100 known_resolutions[res].setdefault('width', add_meta.get('width'))
101 parsed_meta.update(known_resolutions.get(res, {}))
102 add_meta.setdefault('height', int_or_none(res[:-1]))
103 return [{
104 'url': url,
105 'filesize': int_or_none(addr.get('data_size')),
106 'ext': 'mp4',
107 'acodec': 'aac',
0fd6661e
M
108 'source_preference': -2 if 'aweme/v1' in url else -1, # Downloads from API might get blocked
109 **add_meta, **parsed_meta,
110 'format_note': ' '.join(filter(None, (
111 add_meta.get('format_note'), '(API)' if 'aweme/v1' in url else '')))
bd9ff55b
M
112 } for url in addr.get('url_list') or []]
113
114 # Hack: Add direct video links first to prioritize them when removing duplicate formats
115 formats = []
116 if video_info.get('play_addr'):
117 formats.extend(extract_addr(video_info['play_addr'], {
118 'format_id': 'play_addr',
119 'format_note': 'Direct video',
120 'vcodec': 'h265' if traverse_obj(
121 video_info, 'is_bytevc1', 'is_h265') else 'h264', # Always h264?
122 'width': video_info.get('width'),
123 'height': video_info.get('height'),
124 }))
125 if video_info.get('download_addr'):
126 formats.extend(extract_addr(video_info['download_addr'], {
127 'format_id': 'download_addr',
128 'format_note': 'Download video%s' % (', watermarked' if video_info.get('has_watermark') else ''),
129 'vcodec': 'h264',
130 'width': video_info.get('width'),
131 'height': video_info.get('height'),
0fd6661e 132 'preference': -2 if video_info.get('has_watermark') else -1,
bd9ff55b
M
133 }))
134 if video_info.get('play_addr_h264'):
135 formats.extend(extract_addr(video_info['play_addr_h264'], {
136 'format_id': 'play_addr_h264',
137 'format_note': 'Direct video',
138 'vcodec': 'h264',
139 }))
140 if video_info.get('play_addr_bytevc1'):
141 formats.extend(extract_addr(video_info['play_addr_bytevc1'], {
142 'format_id': 'play_addr_bytevc1',
143 'format_note': 'Direct video',
144 'vcodec': 'h265',
145 }))
146
147 for bitrate in video_info.get('bit_rate', []):
148 if bitrate.get('play_addr'):
149 formats.extend(extract_addr(bitrate['play_addr'], {
150 'format_id': bitrate.get('gear_name'),
151 'format_note': 'Playback video',
152 'tbr': try_get(bitrate, lambda x: x['bit_rate'] / 1000),
153 'vcodec': 'h265' if traverse_obj(
154 bitrate, 'is_bytevc1', 'is_h265') else 'h264',
943d5ab1 155 'fps': bitrate.get('FPS'),
bd9ff55b
M
156 }))
157
158 self._remove_duplicate_formats(formats)
0fd6661e 159 self._sort_formats(formats, ('quality', 'codec', 'size', 'br'))
bd9ff55b
M
160
161 thumbnails = []
162 for cover_id in ('cover', 'ai_dynamic_cover', 'animated_cover', 'ai_dynamic_cover_bak',
163 'origin_cover', 'dynamic_cover'):
164 cover = video_info.get(cover_id)
165 if cover:
166 for cover_url in cover['url_list']:
167 thumbnails.append({
168 'id': cover_id,
169 'url': cover_url,
170 })
171
172 stats_info = aweme_detail.get('statistics', {})
173 author_info = aweme_detail.get('author', {})
174 music_info = aweme_detail.get('music', {})
943d5ab1
M
175 user_url = self._UPLOADER_URL_FORMAT % (traverse_obj(author_info,
176 'sec_uid', 'id', 'uid', 'unique_id',
177 expected_type=str_or_none, get_all=False))
bd9ff55b
M
178
179 contained_music_track = traverse_obj(
180 music_info, ('matched_song', 'title'), ('matched_pgc_sound', 'title'), expected_type=str)
181 contained_music_author = traverse_obj(
182 music_info, ('matched_song', 'author'), ('matched_pgc_sound', 'author'), 'author', expected_type=str)
183
184 is_generic_og_trackname = music_info.get('is_original_sound') and music_info.get('title') == 'original sound - %s' % music_info.get('owner_handle')
185 if is_generic_og_trackname:
186 music_track, music_author = contained_music_track or 'original sound', contained_music_author
187 else:
188 music_track, music_author = music_info.get('title'), music_info.get('author')
189
190 return {
191 'id': aweme_id,
192 'title': aweme_detail['desc'],
193 'description': aweme_detail['desc'],
194 'view_count': int_or_none(stats_info.get('play_count')),
195 'like_count': int_or_none(stats_info.get('digg_count')),
196 'repost_count': int_or_none(stats_info.get('share_count')),
197 'comment_count': int_or_none(stats_info.get('comment_count')),
198 'uploader': str_or_none(author_info.get('unique_id')),
943d5ab1 199 'creator': str_or_none(author_info.get('nickname')),
bd9ff55b 200 'uploader_id': str_or_none(author_info.get('uid')),
943d5ab1 201 'uploader_url': user_url,
bd9ff55b
M
202 'track': music_track,
203 'album': str_or_none(music_info.get('album')) or None,
204 'artist': music_author,
205 'timestamp': int_or_none(aweme_detail.get('create_time')),
206 'formats': formats,
207 'thumbnails': thumbnails,
208 'duration': int_or_none(traverse_obj(video_info, 'duration', ('download_addr', 'duration')), scale=1000)
209 }
210
0481e266 211 def _parse_aweme_video_web(self, aweme_detail, webpage_url):
943d5ab1
M
212 video_info = aweme_detail['video']
213 author_info = traverse_obj(aweme_detail, 'author', 'authorInfo', default={})
214 music_info = aweme_detail.get('music') or {}
215 stats_info = aweme_detail.get('stats') or {}
216 user_url = self._UPLOADER_URL_FORMAT % (traverse_obj(author_info,
217 'secUid', 'id', 'uid', 'uniqueId',
218 expected_type=str_or_none, get_all=False))
219
220 formats = []
221 play_url = video_info.get('playAddr')
222 width = video_info.get('width')
223 height = video_info.get('height')
224 if isinstance(play_url, str):
225 formats = [{
226 'url': self._proto_relative_url(play_url),
227 'ext': 'mp4',
228 'width': width,
229 'height': height,
230 }]
231 elif isinstance(play_url, list):
232 formats = [{
233 'url': self._proto_relative_url(url),
234 'ext': 'mp4',
235 'width': width,
236 'height': height,
237 } for url in traverse_obj(play_url, (..., 'src'), expected_type=url_or_none, default=[]) if url]
238
239 download_url = url_or_none(video_info.get('downloadAddr')) or traverse_obj(video_info, ('download', 'url'), expected_type=url_or_none)
240 if download_url:
241 formats.append({
242 'format_id': 'download',
243 'url': self._proto_relative_url(download_url),
244 'ext': 'mp4',
245 'width': width,
246 'height': height,
247 })
248 self._remove_duplicate_formats(formats)
249 self._sort_formats(formats)
250
251 thumbnails = []
252 for thumbnail_name in ('thumbnail', 'cover', 'dynamicCover', 'originCover'):
253 if aweme_detail.get(thumbnail_name):
254 thumbnails = [{
255 'url': self._proto_relative_url(aweme_detail[thumbnail_name]),
256 'width': width,
257 'height': height
258 }]
259
260 return {
261 'id': traverse_obj(aweme_detail, 'id', 'awemeId', expected_type=str_or_none),
262 'title': aweme_detail.get('desc'),
263 'duration': try_get(aweme_detail, lambda x: x['video']['duration'], int),
264 'view_count': int_or_none(stats_info.get('playCount')),
265 'like_count': int_or_none(stats_info.get('diggCount')),
266 'repost_count': int_or_none(stats_info.get('shareCount')),
267 'comment_count': int_or_none(stats_info.get('commentCount')),
268 'timestamp': int_or_none(aweme_detail.get('createTime')),
269 'creator': str_or_none(author_info.get('nickname')),
270 'uploader': str_or_none(author_info.get('uniqueId')),
271 'uploader_id': str_or_none(author_info.get('id')),
272 'uploader_url': user_url,
273 'track': str_or_none(music_info.get('title')),
274 'album': str_or_none(music_info.get('album')) or None,
275 'artist': str_or_none(music_info.get('authorName')),
276 'formats': formats,
277 'thumbnails': thumbnails,
278 'description': str_or_none(aweme_detail.get('desc')),
279 'http_headers': {
0481e266 280 'Referer': webpage_url
943d5ab1
M
281 }
282 }
283
0fd6661e
M
284
285class TikTokIE(TikTokBaseIE):
286 _VALID_URL = r'https?://www\.tiktok\.com/@[\w\.-]+/video/(?P<id>\d+)'
287
288 _TESTS = [{
289 'url': 'https://www.tiktok.com/@leenabhushan/video/6748451240264420610',
0481e266 290 'md5': '736bb7a466c6f0a6afeb597da1e6f5b7',
0fd6661e
M
291 'info_dict': {
292 'id': '6748451240264420610',
293 'ext': 'mp4',
294 'title': '#jassmanak #lehanga #leenabhushan',
295 'description': '#jassmanak #lehanga #leenabhushan',
296 'duration': 13,
0481e266 297 'height': 1024,
298 'width': 576,
0fd6661e
M
299 'uploader': 'leenabhushan',
300 'uploader_id': '6691488002098119685',
0481e266 301 'uploader_url': 'https://www.tiktok.com/@MS4wLjABAAAA_Eb4t1vodM1IuTy_cvp9CY22RAb59xqrO0Xtz9CYQJvgXaDvZxYnZYRzDWhhgJmy',
0fd6661e
M
302 'creator': 'facestoriesbyleenabh',
303 'thumbnail': r're:^https?://[\w\/\.\-]+(~[\w\-]+\.image)?',
304 'upload_date': '20191016',
305 'timestamp': 1571246252,
306 'view_count': int,
307 'like_count': int,
308 'repost_count': int,
309 'comment_count': int,
310 }
311 }, {
312 'url': 'https://www.tiktok.com/@patroxofficial/video/6742501081818877190?langCountry=en',
0481e266 313 'md5': '6f3cf8cdd9b28cb8363fe0a9a160695b',
0fd6661e
M
314 'info_dict': {
315 'id': '6742501081818877190',
316 'ext': 'mp4',
317 'title': 'md5:5e2a23877420bb85ce6521dbee39ba94',
318 'description': 'md5:5e2a23877420bb85ce6521dbee39ba94',
319 'duration': 27,
320 'height': 960,
321 'width': 540,
322 'uploader': 'patrox',
323 'uploader_id': '18702747',
0481e266 324 'uploader_url': 'https://www.tiktok.com/@MS4wLjABAAAAiFnldaILebi5heDoVU6bn4jBWWycX6-9U3xuNPqZ8Ws',
0fd6661e
M
325 'creator': 'patroX',
326 'thumbnail': r're:^https?://[\w\/\.\-]+(~[\w\-]+\.image)?',
327 'upload_date': '20190930',
328 'timestamp': 1569860870,
329 'view_count': int,
330 'like_count': int,
331 'repost_count': int,
332 'comment_count': int,
333 }
334 }, {
335 # Promoted content/ad
336 'url': 'https://www.tiktok.com/@MS4wLjABAAAAAR29F6J2Ktu0Daw03BJyXPNoRQ-W7U5a0Mn3lVCq2rQhjOd_WNLclHUoFgwX8Eno/video/6932675057474981122',
337 'only_matching': True,
338 }]
339
0fd6661e
M
340 def _extract_aweme_app(self, aweme_id):
341 aweme_detail = self._call_api('aweme/detail', {'aweme_id': aweme_id}, aweme_id,
342 note='Downloading video details', errnote='Unable to download video details')['aweme_detail']
943d5ab1 343 return self._parse_aweme_video_app(aweme_detail)
0fd6661e 344
ce18a19b
S
345 def _real_extract(self, url):
346 video_id = self._match_id(url)
ce18a19b 347
bd9ff55b
M
348 try:
349 return self._extract_aweme_app(video_id)
350 except ExtractorError as e:
351 self.report_warning(f'{e}; Retrying with webpage')
352
7bbc0bbc 353 # If we only call once, we get a 403 when downlaoding the video.
61e76c1e 354 self._download_webpage(url, video_id)
6fb11ca8 355 webpage = self._download_webpage(url, video_id, note='Downloading video webpage')
4b6d03ed 356 json_string = self._search_regex(
6255e567
AG
357 r'id=\"__NEXT_DATA__\"\s+type=\"application\/json\"\s*[^>]+>\s*(?P<json_string_ld>[^<]+)',
358 webpage, 'json_string', group='json_string_ld')
4b6d03ed 359 json_data = self._parse_json(json_string, video_id)
4f5a0ad8 360 props_data = try_get(json_data, lambda x: x['props'], expected_type=dict)
ce18a19b 361
4b6d03ed 362 # Chech statusCode for success
1418a043 363 status = props_data.get('pageProps').get('statusCode')
364 if status == 0:
0481e266 365 return self._parse_aweme_video_web(props_data['pageProps']['itemInfo']['itemStruct'], url)
1418a043 366 elif status == 10216:
367 raise ExtractorError('This video is private', expected=True)
4b6d03ed 368
6fb11ca8 369 raise ExtractorError('Video not available', video_id=video_id)
f7f18f90
A
370
371
0fd6661e 372class TikTokUserIE(TikTokBaseIE):
f7f18f90 373 IE_NAME = 'tiktok:user'
0fd6661e 374 _VALID_URL = r'https?://(?:www\.)?tiktok\.com/@(?P<id>[\w\.-]+)/?(?:$|[#?])'
f7f18f90 375 _TESTS = [{
526d74ec 376 'url': 'https://tiktok.com/@corgibobaa?lang=en',
f7f18f90
A
377 'playlist_mincount': 45,
378 'info_dict': {
379 'id': '6935371178089399301',
0481e266 380 'title': 'corgibobaa',
f7f18f90 381 },
0481e266 382 'expected_warnings': ['Retrying']
f7f18f90
A
383 }, {
384 'url': 'https://www.tiktok.com/@meme',
385 'playlist_mincount': 593,
386 'info_dict': {
387 'id': '79005827461758976',
0481e266 388 'title': 'meme',
f7f18f90 389 },
0481e266 390 'expected_warnings': ['Retrying']
f7f18f90
A
391 }]
392
0fd6661e
M
393 r''' # TODO: Fix by adding _signature to api_url
394 def _entries(self, webpage, user_id, username):
395 secuid = self._search_regex(r'\"secUid\":\"(?P<secUid>[^\"]+)', webpage, username)
f7f18f90
A
396 verifyfp_cookie = self._get_cookies('https://www.tiktok.com').get('s_v_web_id')
397 if not verifyfp_cookie:
398 raise ExtractorError('Improper cookies (missing s_v_web_id).', expected=True)
399 api_url = f'https://m.tiktok.com/api/post/item_list/?aid=1988&cookie_enabled=true&count=30&verifyFp={verifyfp_cookie.value}&secUid={secuid}&cursor='
400 cursor = '0'
401 for page in itertools.count():
0fd6661e 402 data_json = self._download_json(api_url + cursor, username, note='Downloading Page %d' % page)
f7f18f90
A
403 for video in data_json.get('itemList', []):
404 video_id = video['id']
405 video_url = f'https://www.tiktok.com/@{user_id}/video/{video_id}'
bd9ff55b 406 yield self._url_result(video_url, 'TikTok', video_id, str_or_none(video.get('desc')))
526d74ec 407 if not data_json.get('hasMore'):
f7f18f90
A
408 break
409 cursor = data_json['cursor']
0fd6661e
M
410 '''
411
412 def _entries_api(self, webpage, user_id, username):
413 query = {
414 'user_id': user_id,
415 'count': 21,
416 'max_cursor': 0,
417 'min_cursor': 0,
418 'retry_type': 'no_retry',
0930b11f 419 'device_id': ''.join(random.choice(string.digits) for _ in range(19)), # Some endpoints don't like randomized device_id, so it isn't directly set in _call_api.
0fd6661e
M
420 }
421
422 max_retries = self.get_param('extractor_retries', 3)
423 for page in itertools.count(1):
424 for retries in itertools.count():
425 try:
426 post_list = self._call_api('aweme/post', query, username,
427 note='Downloading user video list page %d%s' % (page, f' (attempt {retries})' if retries != 0 else ''),
428 errnote='Unable to download user video list')
429 except ExtractorError as e:
430 if isinstance(e.cause, json.JSONDecodeError) and e.cause.pos == 0 and retries != max_retries:
431 self.report_warning('%s. Retrying...' % str(e.cause or e.msg))
432 continue
433 raise
434 break
435 for video in post_list.get('aweme_list', []):
436 yield {
0481e266 437 **self._parse_aweme_video_app(video),
0fd6661e
M
438 'ie_key': TikTokIE.ie_key(),
439 'extractor': 'TikTok',
440 }
441 if not post_list.get('has_more'):
442 break
443 query['max_cursor'] = post_list['max_cursor']
f7f18f90
A
444
445 def _real_extract(self, url):
0481e266 446 user_name = self._match_id(url)
447 webpage = self._download_webpage(url, user_name, headers={
0fd6661e
M
448 'User-Agent': 'facebookexternalhit/1.1 (+http://www.facebook.com/externalhit_uatext.php)'
449 })
0481e266 450 user_id = self._html_search_regex(r'snssdk\d*://user/profile/(\d+)', webpage, 'user ID')
451 return self.playlist_result(self._entries_api(webpage, user_id, user_name), user_id, user_name)
943d5ab1
M
452
453
454class DouyinIE(TikTokIE):
455 _VALID_URL = r'https?://(?:www\.)?douyin\.com/video/(?P<id>[0-9]+)'
456 _TESTS = [{
457 'url': 'https://www.douyin.com/video/6961737553342991651',
458 'md5': '10523312c8b8100f353620ac9dc8f067',
459 'info_dict': {
460 'id': '6961737553342991651',
461 'ext': 'mp4',
462 'title': '#杨超越 小小水手带你去远航❤️',
463 'uploader': '杨超越',
464 'upload_date': '20210513',
465 'timestamp': 1620905839,
466 'uploader_id': '110403406559',
467 'view_count': int,
468 'like_count': int,
469 'repost_count': int,
470 'comment_count': int,
471 }
472 }, {
473 'url': 'https://www.douyin.com/video/6982497745948921092',
474 'md5': 'd78408c984b9b5102904cf6b6bc2d712',
475 'info_dict': {
476 'id': '6982497745948921092',
477 'ext': 'mp4',
478 'title': '这个夏日和小羊@杨超越 一起遇见白色幻想',
479 'uploader': '杨超越工作室',
480 'upload_date': '20210708',
481 'timestamp': 1625739481,
482 'uploader_id': '408654318141572',
483 'view_count': int,
484 'like_count': int,
485 'repost_count': int,
486 'comment_count': int,
487 }
488 }, {
489 'url': 'https://www.douyin.com/video/6953975910773099811',
490 'md5': '72e882e24f75064c218b76c8b713c185',
491 'info_dict': {
492 'id': '6953975910773099811',
493 'ext': 'mp4',
494 'title': '#一起看海 出现在你的夏日里',
495 'uploader': '杨超越',
496 'upload_date': '20210422',
497 'timestamp': 1619098692,
498 'uploader_id': '110403406559',
499 'view_count': int,
500 'like_count': int,
501 'repost_count': int,
502 'comment_count': int,
503 }
504 }, {
505 'url': 'https://www.douyin.com/video/6950251282489675042',
506 'md5': 'b4db86aec367ef810ddd38b1737d2fed',
507 'info_dict': {
508 'id': '6950251282489675042',
509 'ext': 'mp4',
510 'title': '哈哈哈,成功了哈哈哈哈哈哈',
511 'uploader': '杨超越',
512 'upload_date': '20210412',
513 'timestamp': 1618231483,
514 'uploader_id': '110403406559',
515 'view_count': int,
516 'like_count': int,
517 'repost_count': int,
518 'comment_count': int,
519 }
520 }, {
521 'url': 'https://www.douyin.com/video/6963263655114722595',
522 'md5': '1abe1c477d05ee62efb40bf2329957cf',
523 'info_dict': {
524 'id': '6963263655114722595',
525 'ext': 'mp4',
526 'title': '#哪个爱豆的105度最甜 换个角度看看我哈哈',
527 'uploader': '杨超越',
528 'upload_date': '20210517',
529 'timestamp': 1621261163,
530 'uploader_id': '110403406559',
531 'view_count': int,
532 'like_count': int,
533 'repost_count': int,
534 'comment_count': int,
535 }
536 }]
537 _APP_VERSION = '9.6.0'
538 _MANIFEST_APP_VERSION = '960'
539 _APP_NAME = 'aweme'
540 _AID = 1128
541 _API_HOSTNAME = 'aweme.snssdk.com'
542 _UPLOADER_URL_FORMAT = 'https://www.douyin.com/user/%s'
543
544 def _real_extract(self, url):
545 video_id = self._match_id(url)
546
547 try:
548 return self._extract_aweme_app(video_id)
549 except ExtractorError as e:
550 self.report_warning(f'{e}; Retrying with webpage')
551
552 webpage = self._download_webpage(url, video_id)
553 render_data_json = self._search_regex(
554 r'<script [^>]*\bid=[\'"]RENDER_DATA[\'"][^>]*>(%7B.+%7D)</script>',
555 webpage, 'render data', default=None)
556 if not render_data_json:
557 # TODO: Run verification challenge code to generate signature cookies
558 raise ExtractorError('Fresh cookies (not necessarily logged in) are needed')
559
560 render_data = self._parse_json(
561 render_data_json, video_id, transform_source=compat_urllib_parse_unquote)
562 return self._parse_aweme_video_web(
0481e266 563 traverse_obj(render_data, (..., 'aweme', 'detail'), get_all=False), url)