]> jfr.im git - yt-dlp.git/blame - yt_dlp/extractor/tiktok.py
[Douyin] Rewrite extractor (#1157)
[yt-dlp.git] / yt_dlp / extractor / tiktok.py
CommitLineData
1ead840d
KS
1# coding: utf-8
2from __future__ import unicode_literals
f7f18f90
A
3
4import itertools
bd9ff55b
M
5import random
6import string
7import time
0fd6661e 8import json
1ead840d
KS
9
10from .common import InfoExtractor
943d5ab1 11from ..compat import compat_urllib_parse_unquote
1ead840d 12from ..utils import (
ce18a19b 13 ExtractorError,
1ead840d
KS
14 int_or_none,
15 str_or_none,
bd9ff55b
M
16 traverse_obj,
17 try_get,
943d5ab1 18 url_or_none,
bd9ff55b 19 qualities,
1ead840d
KS
20)
21
22
0fd6661e 23class TikTokBaseIE(InfoExtractor):
bd9ff55b
M
24 _APP_VERSION = '20.9.3'
25 _MANIFEST_APP_VERSION = '291'
943d5ab1
M
26 _APP_NAME = 'trill'
27 _AID = 1180
28 _API_HOSTNAME = 'api-t2.tiktokv.com'
29 _UPLOADER_URL_FORMAT = 'https://www.tiktok.com/@%s'
bd9ff55b 30 QUALITIES = ('360p', '540p', '720p')
ce18a19b 31
0fd6661e
M
32 def _call_api(self, ep, query, video_id, fatal=True,
33 note='Downloading API JSON', errnote='Unable to download API page'):
34 real_query = {
35 **query,
bd9ff55b
M
36 'version_name': self._APP_VERSION,
37 'version_code': self._MANIFEST_APP_VERSION,
38 'build_number': self._APP_VERSION,
39 'manifest_version_code': self._MANIFEST_APP_VERSION,
40 'update_version_code': self._MANIFEST_APP_VERSION,
41 'openudid': ''.join(random.choice('0123456789abcdef') for i in range(16)),
42 'uuid': ''.join([random.choice(string.digits) for num in range(16)]),
43 '_rticket': int(time.time() * 1000),
44 'ts': int(time.time()),
45 'device_brand': 'Google',
46 'device_type': 'Pixel 4',
47 'device_platform': 'android',
48 'resolution': '1080*1920',
49 'dpi': 420,
50 'os_version': '10',
51 'os_api': '29',
52 'carrier_region': 'US',
53 'sys_region': 'US',
54 'region': 'US',
943d5ab1 55 'app_name': self._APP_NAME,
bd9ff55b
M
56 'app_language': 'en',
57 'language': 'en',
58 'timezone_name': 'America/New_York',
59 'timezone_offset': '-14400',
60 'channel': 'googleplay',
61 'ac': 'wifi',
62 'mcc_mnc': '310260',
63 'is_my_cn': 0,
943d5ab1 64 'aid': self._AID,
bd9ff55b
M
65 'ssmix': 'a',
66 'as': 'a1qwert123',
67 'cp': 'cbfhckdckkde1',
68 }
943d5ab1 69 self._set_cookie(self._API_HOSTNAME, 'odin_tt', ''.join(random.choice('0123456789abcdef') for i in range(160)))
0fd6661e 70 return self._download_json(
943d5ab1 71 'https://%s/aweme/v1/%s/' % (self._API_HOSTNAME, ep), video_id=video_id,
0fd6661e 72 fatal=fatal, note=note, errnote=errnote, headers={
bd9ff55b 73 'User-Agent': f'com.ss.android.ugc.trill/{self._MANIFEST_APP_VERSION} (Linux; U; Android 10; en_US; Pixel 4; Build/QQ3A.200805.001; Cronet/58.0.2991.0)',
0fd6661e
M
74 'Accept': 'application/json',
75 }, query=real_query)
76
943d5ab1 77 def _parse_aweme_video_app(self, aweme_detail):
0fd6661e 78 aweme_id = aweme_detail['aweme_id']
bd9ff55b
M
79 video_info = aweme_detail['video']
80
81 def parse_url_key(url_key):
82 format_id, codec, res, bitrate = self._search_regex(
83 r'v[^_]+_(?P<id>(?P<codec>[^_]+)_(?P<res>\d+p)_(?P<bitrate>\d+))', url_key,
84 'url key', default=(None, None, None, None), group=('id', 'codec', 'res', 'bitrate'))
85 if not format_id:
86 return {}, None
87 return {
88 'format_id': format_id,
89 'vcodec': 'h265' if codec == 'bytevc1' else codec,
90 'tbr': int_or_none(bitrate, scale=1000) or None,
91 'quality': qualities(self.QUALITIES)(res),
92 }, res
93
94 known_resolutions = {}
95
96 def extract_addr(addr, add_meta={}):
97 parsed_meta, res = parse_url_key(addr.get('url_key', ''))
98 if res:
99 known_resolutions.setdefault(res, {}).setdefault('height', add_meta.get('height'))
100 known_resolutions[res].setdefault('width', add_meta.get('width'))
101 parsed_meta.update(known_resolutions.get(res, {}))
102 add_meta.setdefault('height', int_or_none(res[:-1]))
103 return [{
104 'url': url,
105 'filesize': int_or_none(addr.get('data_size')),
106 'ext': 'mp4',
107 'acodec': 'aac',
0fd6661e
M
108 'source_preference': -2 if 'aweme/v1' in url else -1, # Downloads from API might get blocked
109 **add_meta, **parsed_meta,
110 'format_note': ' '.join(filter(None, (
111 add_meta.get('format_note'), '(API)' if 'aweme/v1' in url else '')))
bd9ff55b
M
112 } for url in addr.get('url_list') or []]
113
114 # Hack: Add direct video links first to prioritize them when removing duplicate formats
115 formats = []
116 if video_info.get('play_addr'):
117 formats.extend(extract_addr(video_info['play_addr'], {
118 'format_id': 'play_addr',
119 'format_note': 'Direct video',
120 'vcodec': 'h265' if traverse_obj(
121 video_info, 'is_bytevc1', 'is_h265') else 'h264', # Always h264?
122 'width': video_info.get('width'),
123 'height': video_info.get('height'),
124 }))
125 if video_info.get('download_addr'):
126 formats.extend(extract_addr(video_info['download_addr'], {
127 'format_id': 'download_addr',
128 'format_note': 'Download video%s' % (', watermarked' if video_info.get('has_watermark') else ''),
129 'vcodec': 'h264',
130 'width': video_info.get('width'),
131 'height': video_info.get('height'),
0fd6661e 132 'preference': -2 if video_info.get('has_watermark') else -1,
bd9ff55b
M
133 }))
134 if video_info.get('play_addr_h264'):
135 formats.extend(extract_addr(video_info['play_addr_h264'], {
136 'format_id': 'play_addr_h264',
137 'format_note': 'Direct video',
138 'vcodec': 'h264',
139 }))
140 if video_info.get('play_addr_bytevc1'):
141 formats.extend(extract_addr(video_info['play_addr_bytevc1'], {
142 'format_id': 'play_addr_bytevc1',
143 'format_note': 'Direct video',
144 'vcodec': 'h265',
145 }))
146
147 for bitrate in video_info.get('bit_rate', []):
148 if bitrate.get('play_addr'):
149 formats.extend(extract_addr(bitrate['play_addr'], {
150 'format_id': bitrate.get('gear_name'),
151 'format_note': 'Playback video',
152 'tbr': try_get(bitrate, lambda x: x['bit_rate'] / 1000),
153 'vcodec': 'h265' if traverse_obj(
154 bitrate, 'is_bytevc1', 'is_h265') else 'h264',
943d5ab1 155 'fps': bitrate.get('FPS'),
bd9ff55b
M
156 }))
157
158 self._remove_duplicate_formats(formats)
0fd6661e 159 self._sort_formats(formats, ('quality', 'codec', 'size', 'br'))
bd9ff55b
M
160
161 thumbnails = []
162 for cover_id in ('cover', 'ai_dynamic_cover', 'animated_cover', 'ai_dynamic_cover_bak',
163 'origin_cover', 'dynamic_cover'):
164 cover = video_info.get(cover_id)
165 if cover:
166 for cover_url in cover['url_list']:
167 thumbnails.append({
168 'id': cover_id,
169 'url': cover_url,
170 })
171
172 stats_info = aweme_detail.get('statistics', {})
173 author_info = aweme_detail.get('author', {})
174 music_info = aweme_detail.get('music', {})
943d5ab1
M
175 user_url = self._UPLOADER_URL_FORMAT % (traverse_obj(author_info,
176 'sec_uid', 'id', 'uid', 'unique_id',
177 expected_type=str_or_none, get_all=False))
bd9ff55b
M
178
179 contained_music_track = traverse_obj(
180 music_info, ('matched_song', 'title'), ('matched_pgc_sound', 'title'), expected_type=str)
181 contained_music_author = traverse_obj(
182 music_info, ('matched_song', 'author'), ('matched_pgc_sound', 'author'), 'author', expected_type=str)
183
184 is_generic_og_trackname = music_info.get('is_original_sound') and music_info.get('title') == 'original sound - %s' % music_info.get('owner_handle')
185 if is_generic_og_trackname:
186 music_track, music_author = contained_music_track or 'original sound', contained_music_author
187 else:
188 music_track, music_author = music_info.get('title'), music_info.get('author')
189
190 return {
191 'id': aweme_id,
192 'title': aweme_detail['desc'],
193 'description': aweme_detail['desc'],
194 'view_count': int_or_none(stats_info.get('play_count')),
195 'like_count': int_or_none(stats_info.get('digg_count')),
196 'repost_count': int_or_none(stats_info.get('share_count')),
197 'comment_count': int_or_none(stats_info.get('comment_count')),
198 'uploader': str_or_none(author_info.get('unique_id')),
943d5ab1 199 'creator': str_or_none(author_info.get('nickname')),
bd9ff55b 200 'uploader_id': str_or_none(author_info.get('uid')),
943d5ab1 201 'uploader_url': user_url,
bd9ff55b
M
202 'track': music_track,
203 'album': str_or_none(music_info.get('album')) or None,
204 'artist': music_author,
205 'timestamp': int_or_none(aweme_detail.get('create_time')),
206 'formats': formats,
207 'thumbnails': thumbnails,
208 'duration': int_or_none(traverse_obj(video_info, 'duration', ('download_addr', 'duration')), scale=1000)
209 }
210
943d5ab1
M
211 def _parse_aweme_video_web(self, aweme_detail, webpage, url):
212 video_info = aweme_detail['video']
213 author_info = traverse_obj(aweme_detail, 'author', 'authorInfo', default={})
214 music_info = aweme_detail.get('music') or {}
215 stats_info = aweme_detail.get('stats') or {}
216 user_url = self._UPLOADER_URL_FORMAT % (traverse_obj(author_info,
217 'secUid', 'id', 'uid', 'uniqueId',
218 expected_type=str_or_none, get_all=False))
219
220 formats = []
221 play_url = video_info.get('playAddr')
222 width = video_info.get('width')
223 height = video_info.get('height')
224 if isinstance(play_url, str):
225 formats = [{
226 'url': self._proto_relative_url(play_url),
227 'ext': 'mp4',
228 'width': width,
229 'height': height,
230 }]
231 elif isinstance(play_url, list):
232 formats = [{
233 'url': self._proto_relative_url(url),
234 'ext': 'mp4',
235 'width': width,
236 'height': height,
237 } for url in traverse_obj(play_url, (..., 'src'), expected_type=url_or_none, default=[]) if url]
238
239 download_url = url_or_none(video_info.get('downloadAddr')) or traverse_obj(video_info, ('download', 'url'), expected_type=url_or_none)
240 if download_url:
241 formats.append({
242 'format_id': 'download',
243 'url': self._proto_relative_url(download_url),
244 'ext': 'mp4',
245 'width': width,
246 'height': height,
247 })
248 self._remove_duplicate_formats(formats)
249 self._sort_formats(formats)
250
251 thumbnails = []
252 for thumbnail_name in ('thumbnail', 'cover', 'dynamicCover', 'originCover'):
253 if aweme_detail.get(thumbnail_name):
254 thumbnails = [{
255 'url': self._proto_relative_url(aweme_detail[thumbnail_name]),
256 'width': width,
257 'height': height
258 }]
259
260 return {
261 'id': traverse_obj(aweme_detail, 'id', 'awemeId', expected_type=str_or_none),
262 'title': aweme_detail.get('desc'),
263 'duration': try_get(aweme_detail, lambda x: x['video']['duration'], int),
264 'view_count': int_or_none(stats_info.get('playCount')),
265 'like_count': int_or_none(stats_info.get('diggCount')),
266 'repost_count': int_or_none(stats_info.get('shareCount')),
267 'comment_count': int_or_none(stats_info.get('commentCount')),
268 'timestamp': int_or_none(aweme_detail.get('createTime')),
269 'creator': str_or_none(author_info.get('nickname')),
270 'uploader': str_or_none(author_info.get('uniqueId')),
271 'uploader_id': str_or_none(author_info.get('id')),
272 'uploader_url': user_url,
273 'track': str_or_none(music_info.get('title')),
274 'album': str_or_none(music_info.get('album')) or None,
275 'artist': str_or_none(music_info.get('authorName')),
276 'formats': formats,
277 'thumbnails': thumbnails,
278 'description': str_or_none(aweme_detail.get('desc')),
279 'http_headers': {
280 'Referer': url
281 }
282 }
283
0fd6661e
M
284
285class TikTokIE(TikTokBaseIE):
286 _VALID_URL = r'https?://www\.tiktok\.com/@[\w\.-]+/video/(?P<id>\d+)'
287
288 _TESTS = [{
289 'url': 'https://www.tiktok.com/@leenabhushan/video/6748451240264420610',
290 'md5': '34a7543afd5a151b0840ba6736fb633b',
291 'info_dict': {
292 'id': '6748451240264420610',
293 'ext': 'mp4',
294 'title': '#jassmanak #lehanga #leenabhushan',
295 'description': '#jassmanak #lehanga #leenabhushan',
296 'duration': 13,
297 'height': 1280,
298 'width': 720,
299 'uploader': 'leenabhushan',
300 'uploader_id': '6691488002098119685',
301 'uploader_url': 'https://www.tiktok.com/@leenabhushan',
302 'creator': 'facestoriesbyleenabh',
303 'thumbnail': r're:^https?://[\w\/\.\-]+(~[\w\-]+\.image)?',
304 'upload_date': '20191016',
305 'timestamp': 1571246252,
306 'view_count': int,
307 'like_count': int,
308 'repost_count': int,
309 'comment_count': int,
310 }
311 }, {
312 'url': 'https://www.tiktok.com/@patroxofficial/video/6742501081818877190?langCountry=en',
313 'md5': '06b9800d47d5fe51a19e322dd86e61c9',
314 'info_dict': {
315 'id': '6742501081818877190',
316 'ext': 'mp4',
317 'title': 'md5:5e2a23877420bb85ce6521dbee39ba94',
318 'description': 'md5:5e2a23877420bb85ce6521dbee39ba94',
319 'duration': 27,
320 'height': 960,
321 'width': 540,
322 'uploader': 'patrox',
323 'uploader_id': '18702747',
324 'uploader_url': 'https://www.tiktok.com/@patrox',
325 'creator': 'patroX',
326 'thumbnail': r're:^https?://[\w\/\.\-]+(~[\w\-]+\.image)?',
327 'upload_date': '20190930',
328 'timestamp': 1569860870,
329 'view_count': int,
330 'like_count': int,
331 'repost_count': int,
332 'comment_count': int,
333 }
334 }, {
335 # Promoted content/ad
336 'url': 'https://www.tiktok.com/@MS4wLjABAAAAAR29F6J2Ktu0Daw03BJyXPNoRQ-W7U5a0Mn3lVCq2rQhjOd_WNLclHUoFgwX8Eno/video/6932675057474981122',
337 'only_matching': True,
338 }]
339
0fd6661e
M
340 def _extract_aweme_app(self, aweme_id):
341 aweme_detail = self._call_api('aweme/detail', {'aweme_id': aweme_id}, aweme_id,
342 note='Downloading video details', errnote='Unable to download video details')['aweme_detail']
943d5ab1 343 return self._parse_aweme_video_app(aweme_detail)
0fd6661e 344
ce18a19b
S
345 def _real_extract(self, url):
346 video_id = self._match_id(url)
ce18a19b 347
bd9ff55b
M
348 try:
349 return self._extract_aweme_app(video_id)
350 except ExtractorError as e:
351 self.report_warning(f'{e}; Retrying with webpage')
352
7bbc0bbc 353 # If we only call once, we get a 403 when downlaoding the video.
61e76c1e 354 self._download_webpage(url, video_id)
6fb11ca8 355 webpage = self._download_webpage(url, video_id, note='Downloading video webpage')
4b6d03ed 356 json_string = self._search_regex(
6255e567
AG
357 r'id=\"__NEXT_DATA__\"\s+type=\"application\/json\"\s*[^>]+>\s*(?P<json_string_ld>[^<]+)',
358 webpage, 'json_string', group='json_string_ld')
4b6d03ed 359 json_data = self._parse_json(json_string, video_id)
4f5a0ad8 360 props_data = try_get(json_data, lambda x: x['props'], expected_type=dict)
ce18a19b 361
4b6d03ed 362 # Chech statusCode for success
1418a043 363 status = props_data.get('pageProps').get('statusCode')
364 if status == 0:
943d5ab1 365 return self._parse_aweme_video_web(props_data['pageProps']['itemInfo']['itemStruct'], webpage, url)
1418a043 366 elif status == 10216:
367 raise ExtractorError('This video is private', expected=True)
4b6d03ed 368
6fb11ca8 369 raise ExtractorError('Video not available', video_id=video_id)
f7f18f90
A
370
371
0fd6661e 372class TikTokUserIE(TikTokBaseIE):
f7f18f90 373 IE_NAME = 'tiktok:user'
0fd6661e 374 _VALID_URL = r'https?://(?:www\.)?tiktok\.com/@(?P<id>[\w\.-]+)/?(?:$|[#?])'
f7f18f90 375 _TESTS = [{
526d74ec 376 'url': 'https://tiktok.com/@corgibobaa?lang=en',
f7f18f90
A
377 'playlist_mincount': 45,
378 'info_dict': {
379 'id': '6935371178089399301',
380 },
f7f18f90
A
381 }, {
382 'url': 'https://www.tiktok.com/@meme',
383 'playlist_mincount': 593,
384 'info_dict': {
385 'id': '79005827461758976',
386 },
f7f18f90
A
387 }]
388
0fd6661e
M
389 r''' # TODO: Fix by adding _signature to api_url
390 def _entries(self, webpage, user_id, username):
391 secuid = self._search_regex(r'\"secUid\":\"(?P<secUid>[^\"]+)', webpage, username)
f7f18f90
A
392 verifyfp_cookie = self._get_cookies('https://www.tiktok.com').get('s_v_web_id')
393 if not verifyfp_cookie:
394 raise ExtractorError('Improper cookies (missing s_v_web_id).', expected=True)
395 api_url = f'https://m.tiktok.com/api/post/item_list/?aid=1988&cookie_enabled=true&count=30&verifyFp={verifyfp_cookie.value}&secUid={secuid}&cursor='
396 cursor = '0'
397 for page in itertools.count():
0fd6661e 398 data_json = self._download_json(api_url + cursor, username, note='Downloading Page %d' % page)
f7f18f90
A
399 for video in data_json.get('itemList', []):
400 video_id = video['id']
401 video_url = f'https://www.tiktok.com/@{user_id}/video/{video_id}'
bd9ff55b 402 yield self._url_result(video_url, 'TikTok', video_id, str_or_none(video.get('desc')))
526d74ec 403 if not data_json.get('hasMore'):
f7f18f90
A
404 break
405 cursor = data_json['cursor']
0fd6661e
M
406 '''
407
408 def _entries_api(self, webpage, user_id, username):
409 query = {
410 'user_id': user_id,
411 'count': 21,
412 'max_cursor': 0,
413 'min_cursor': 0,
414 'retry_type': 'no_retry',
415 'device_id': ''.join(random.choice(string.digits) for i in range(19)), # Some endpoints don't like randomized device_id, so it isn't directly set in _call_api.
416 }
417
418 max_retries = self.get_param('extractor_retries', 3)
419 for page in itertools.count(1):
420 for retries in itertools.count():
421 try:
422 post_list = self._call_api('aweme/post', query, username,
423 note='Downloading user video list page %d%s' % (page, f' (attempt {retries})' if retries != 0 else ''),
424 errnote='Unable to download user video list')
425 except ExtractorError as e:
426 if isinstance(e.cause, json.JSONDecodeError) and e.cause.pos == 0 and retries != max_retries:
427 self.report_warning('%s. Retrying...' % str(e.cause or e.msg))
428 continue
429 raise
430 break
431 for video in post_list.get('aweme_list', []):
432 yield {
433 **self._parse_aweme_video(video),
434 'ie_key': TikTokIE.ie_key(),
435 'extractor': 'TikTok',
436 }
437 if not post_list.get('has_more'):
438 break
439 query['max_cursor'] = post_list['max_cursor']
f7f18f90
A
440
441 def _real_extract(self, url):
442 user_id = self._match_id(url)
0fd6661e
M
443 webpage = self._download_webpage(url, user_id, headers={
444 'User-Agent': 'facebookexternalhit/1.1 (+http://www.facebook.com/externalhit_uatext.php)'
445 })
446 own_id = self._html_search_regex(r'snssdk\d*://user/profile/(\d+)', webpage, 'user ID')
447 return self.playlist_result(self._entries_api(webpage, own_id, user_id), user_id)
943d5ab1
M
448
449
450class DouyinIE(TikTokIE):
451 _VALID_URL = r'https?://(?:www\.)?douyin\.com/video/(?P<id>[0-9]+)'
452 _TESTS = [{
453 'url': 'https://www.douyin.com/video/6961737553342991651',
454 'md5': '10523312c8b8100f353620ac9dc8f067',
455 'info_dict': {
456 'id': '6961737553342991651',
457 'ext': 'mp4',
458 'title': '#杨超越 小小水手带你去远航❤️',
459 'uploader': '杨超越',
460 'upload_date': '20210513',
461 'timestamp': 1620905839,
462 'uploader_id': '110403406559',
463 'view_count': int,
464 'like_count': int,
465 'repost_count': int,
466 'comment_count': int,
467 }
468 }, {
469 'url': 'https://www.douyin.com/video/6982497745948921092',
470 'md5': 'd78408c984b9b5102904cf6b6bc2d712',
471 'info_dict': {
472 'id': '6982497745948921092',
473 'ext': 'mp4',
474 'title': '这个夏日和小羊@杨超越 一起遇见白色幻想',
475 'uploader': '杨超越工作室',
476 'upload_date': '20210708',
477 'timestamp': 1625739481,
478 'uploader_id': '408654318141572',
479 'view_count': int,
480 'like_count': int,
481 'repost_count': int,
482 'comment_count': int,
483 }
484 }, {
485 'url': 'https://www.douyin.com/video/6953975910773099811',
486 'md5': '72e882e24f75064c218b76c8b713c185',
487 'info_dict': {
488 'id': '6953975910773099811',
489 'ext': 'mp4',
490 'title': '#一起看海 出现在你的夏日里',
491 'uploader': '杨超越',
492 'upload_date': '20210422',
493 'timestamp': 1619098692,
494 'uploader_id': '110403406559',
495 'view_count': int,
496 'like_count': int,
497 'repost_count': int,
498 'comment_count': int,
499 }
500 }, {
501 'url': 'https://www.douyin.com/video/6950251282489675042',
502 'md5': 'b4db86aec367ef810ddd38b1737d2fed',
503 'info_dict': {
504 'id': '6950251282489675042',
505 'ext': 'mp4',
506 'title': '哈哈哈,成功了哈哈哈哈哈哈',
507 'uploader': '杨超越',
508 'upload_date': '20210412',
509 'timestamp': 1618231483,
510 'uploader_id': '110403406559',
511 'view_count': int,
512 'like_count': int,
513 'repost_count': int,
514 'comment_count': int,
515 }
516 }, {
517 'url': 'https://www.douyin.com/video/6963263655114722595',
518 'md5': '1abe1c477d05ee62efb40bf2329957cf',
519 'info_dict': {
520 'id': '6963263655114722595',
521 'ext': 'mp4',
522 'title': '#哪个爱豆的105度最甜 换个角度看看我哈哈',
523 'uploader': '杨超越',
524 'upload_date': '20210517',
525 'timestamp': 1621261163,
526 'uploader_id': '110403406559',
527 'view_count': int,
528 'like_count': int,
529 'repost_count': int,
530 'comment_count': int,
531 }
532 }]
533 _APP_VERSION = '9.6.0'
534 _MANIFEST_APP_VERSION = '960'
535 _APP_NAME = 'aweme'
536 _AID = 1128
537 _API_HOSTNAME = 'aweme.snssdk.com'
538 _UPLOADER_URL_FORMAT = 'https://www.douyin.com/user/%s'
539
540 def _real_extract(self, url):
541 video_id = self._match_id(url)
542
543 try:
544 return self._extract_aweme_app(video_id)
545 except ExtractorError as e:
546 self.report_warning(f'{e}; Retrying with webpage')
547
548 webpage = self._download_webpage(url, video_id)
549 render_data_json = self._search_regex(
550 r'<script [^>]*\bid=[\'"]RENDER_DATA[\'"][^>]*>(%7B.+%7D)</script>',
551 webpage, 'render data', default=None)
552 if not render_data_json:
553 # TODO: Run verification challenge code to generate signature cookies
554 raise ExtractorError('Fresh cookies (not necessarily logged in) are needed')
555
556 render_data = self._parse_json(
557 render_data_json, video_id, transform_source=compat_urllib_parse_unquote)
558 return self._parse_aweme_video_web(
559 traverse_obj(render_data, (..., 'aweme', 'detail'), get_all=False), webpage, url)