)
from ..utils import (
ExtractorError,
+ HEADRequest,
+ get_first,
int_or_none,
join_nonempty,
LazyList,
class TikTokBaseIE(InfoExtractor):
- _APP_VERSION = '20.1.0'
- _MANIFEST_APP_VERSION = '210'
+ _APP_VERSIONS = [('20.9.3', '293'), ('20.4.3', '243'), ('20.2.1', '221'), ('20.1.2', '212'), ('20.0.4', '204')]
+ _WORKING_APP_VERSION = None
_APP_NAME = 'trill'
_AID = 1180
_API_HOSTNAME = 'api-h2.tiktokv.com'
_WEBPAGE_HOST = 'https://www.tiktok.com/'
QUALITIES = ('360p', '540p', '720p', '1080p')
- def _call_api(self, ep, query, video_id, fatal=True,
- note='Downloading API JSON', errnote='Unable to download API page'):
- real_query = {
+ def _call_api_impl(self, ep, query, manifest_app_version, video_id, fatal=True,
+ note='Downloading API JSON', errnote='Unable to download API page'):
+ self._set_cookie(self._API_HOSTNAME, 'odin_tt', ''.join(random.choice('0123456789abcdef') for _ in range(160)))
+ webpage_cookies = self._get_cookies(self._WEBPAGE_HOST)
+ if webpage_cookies.get('sid_tt'):
+ self._set_cookie(self._API_HOSTNAME, 'sid_tt', webpage_cookies['sid_tt'].value)
+ return self._download_json(
+ 'https://%s/aweme/v1/%s/' % (self._API_HOSTNAME, ep), video_id=video_id,
+ fatal=fatal, note=note, errnote=errnote, headers={
+ 'User-Agent': f'com.ss.android.ugc.trill/{manifest_app_version} (Linux; U; Android 10; en_US; Pixel 4; Build/QQ3A.200805.001; Cronet/58.0.2991.0)',
+ 'Accept': 'application/json',
+ }, query=query)
+
+ def _build_api_query(self, query, app_version, manifest_app_version):
+ return {
**query,
- 'version_name': self._APP_VERSION,
- 'version_code': self._MANIFEST_APP_VERSION,
- 'build_number': self._APP_VERSION,
- 'manifest_version_code': self._MANIFEST_APP_VERSION,
- 'update_version_code': self._MANIFEST_APP_VERSION,
+ 'version_name': app_version,
+ 'version_code': manifest_app_version,
+ 'build_number': app_version,
+ 'manifest_version_code': manifest_app_version,
+ 'update_version_code': manifest_app_version,
'openudid': ''.join(random.choice('0123456789abcdef') for _ in range(16)),
'uuid': ''.join([random.choice(string.digits) for _ in range(16)]),
'_rticket': int(time.time() * 1000),
'as': 'a1qwert123',
'cp': 'cbfhckdckkde1',
}
- self._set_cookie(self._API_HOSTNAME, 'odin_tt', ''.join(random.choice('0123456789abcdef') for _ in range(160)))
- webpage_cookies = self._get_cookies(self._WEBPAGE_HOST)
- if webpage_cookies.get('sid_tt'):
- self._set_cookie(self._API_HOSTNAME, 'sid_tt', webpage_cookies['sid_tt'].value)
- return self._download_json(
- 'https://%s/aweme/v1/%s/' % (self._API_HOSTNAME, ep), video_id=video_id,
- fatal=fatal, note=note, errnote=errnote, headers={
- 'User-Agent': f'com.ss.android.ugc.trill/{self._MANIFEST_APP_VERSION} (Linux; U; Android 10; en_US; Pixel 4; Build/QQ3A.200805.001; Cronet/58.0.2991.0)',
- 'Accept': 'application/json',
- }, query=real_query)
+
+ def _call_api(self, ep, query, video_id, fatal=True,
+ note='Downloading API JSON', errnote='Unable to download API page'):
+ if not self._WORKING_APP_VERSION:
+ app_version = self._configuration_arg('app_version', [''], ie_key=TikTokIE.ie_key())[0]
+ manifest_app_version = self._configuration_arg('manifest_app_version', [''], ie_key=TikTokIE.ie_key())[0]
+ if app_version and manifest_app_version:
+ self._WORKING_APP_VERSION = (app_version, manifest_app_version)
+ self.write_debug('Imported app version combo from extractor arguments')
+ elif app_version or manifest_app_version:
+ self.report_warning('Only one of the two required version params are passed as extractor arguments', only_once=True)
+
+ if self._WORKING_APP_VERSION:
+ app_version, manifest_app_version = self._WORKING_APP_VERSION
+ real_query = self._build_api_query(query, app_version, manifest_app_version)
+ return self._call_api_impl(ep, real_query, manifest_app_version, video_id, fatal, note, errnote)
+
+ for count, (app_version, manifest_app_version) in enumerate(self._APP_VERSIONS, start=1):
+ real_query = self._build_api_query(query, app_version, manifest_app_version)
+ try:
+ res = self._call_api_impl(ep, real_query, manifest_app_version, video_id, fatal, note, errnote)
+ self._WORKING_APP_VERSION = (app_version, manifest_app_version)
+ return res
+ except ExtractorError as e:
+ if isinstance(e.cause, json.JSONDecodeError) and e.cause.pos == 0:
+ if count == len(self._APP_VERSIONS):
+ if fatal:
+ raise e
+ else:
+ self.report_warning(str(e.cause or e.msg))
+ return
+ self.report_warning('%s. Retrying... (attempt %s of %s)' % (str(e.cause or e.msg), count, len(self._APP_VERSIONS)))
+ continue
+ raise e
def _get_subtitles(self, aweme_detail, aweme_id):
# TODO: Extract text positioning info
return {
'id': aweme_id,
- 'title': aweme_detail['desc'],
+ 'title': aweme_detail.get('desc'),
'description': aweme_detail['desc'],
'view_count': int_or_none(stats_info.get('play_count')),
'like_count': int_or_none(stats_info.get('digg_count')),
'comment_count': int,
},
'expected_warnings': ['Video not available']
+ }, {
+ # Video without title and description
+ 'url': 'https://www.tiktok.com/@pokemonlife22/video/7059698374567611694',
+ 'info_dict': {
+ 'id': '7059698374567611694',
+ 'ext': 'mp4',
+ 'title': 'N/A',
+ 'description': '',
+ 'uploader': 'pokemonlife22',
+ 'creator': 'Pokemon',
+ 'uploader_id': '6820838815978423302',
+ 'uploader_url': 'https://www.tiktok.com/@MS4wLjABAAAA0tF1nBwQVVMyrGu3CqttkNgM68Do1OXUFuCY0CRQk8fEtSVDj89HqoqvbSTmUP2W',
+ 'track': 'original sound',
+ 'timestamp': 1643714123,
+ 'duration': 6,
+ 'thumbnail': r're:^https?://[\w\/\.\-]+(~[\w\-]+\.image)?',
+ 'upload_date': '20220201',
+ 'artist': 'Pokemon',
+ 'view_count': int,
+ 'like_count': int,
+ 'repost_count': int,
+ 'comment_count': int,
+ },
+ 'expected_warnings': ['Video not available']
}, {
# Auto-captions available
'url': 'https://www.tiktok.com/@hankgreen1/video/7047596209028074758',
# If we only call once, we get a 403 when downlaoding the video.
self._download_webpage(url, video_id)
webpage = self._download_webpage(url, video_id, note='Downloading video webpage')
- next_json = self._search_regex(
- r'id=\"__NEXT_DATA__\"\s+type=\"application\/json\"\s*[^>]+>\s*(?P<next_data>[^<]+)',
- webpage, 'next data', group='next_data', default=None)
+ next_data = self._search_nextjs_data(webpage, video_id, default='{}')
- if next_json:
- next_data = self._parse_json(next_json, video_id)
+ if next_data:
status = traverse_obj(next_data, ('props', 'pageProps', 'statusCode'), expected_type=int) or 0
video_data = traverse_obj(next_data, ('props', 'pageProps', 'itemInfo', 'itemStruct'), expected_type=dict)
else:
'thumbnail': r're:https://.+_1080x1080\.webp'
},
'expected_warnings': ['Retrying']
+ }, {
+ 'url': 'https://www.tiktok.com/@6820838815978423302',
+ 'playlist_mincount': 5,
+ 'info_dict': {
+ 'id': '6820838815978423302',
+ 'title': '6820838815978423302',
+ 'thumbnail': r're:https://.+_1080x1080\.webp'
+ },
+ 'expected_warnings': ['Retrying']
}, {
'url': 'https://www.tiktok.com/@meme',
'playlist_mincount': 593,
webpage = self._download_webpage(url, user_name, headers={
'User-Agent': 'facebookexternalhit/1.1 (+http://www.facebook.com/externalhit_uatext.php)'
})
- user_id = self._html_search_regex(r'snssdk\d*://user/profile/(\d+)', webpage, 'user ID')
+ user_id = self._html_search_regex(r'snssdk\d*://user/profile/(\d+)', webpage, 'user ID', default=None) or user_name
videos = LazyList(self._video_entries_api(webpage, user_id, user_name))
thumbnail = traverse_obj(videos, (0, 'author', 'avatar_larger', 'url_list', 0))
'comment_count': int,
}
}]
- _APP_VERSION = '9.6.0'
- _MANIFEST_APP_VERSION = '960'
+ _APP_VERSIONS = [('9.6.0', '960')]
_APP_NAME = 'aweme'
_AID = 1128
_API_HOSTNAME = 'aweme.snssdk.com'
render_data = self._parse_json(
render_data_json, video_id, transform_source=compat_urllib_parse_unquote)
- return self._parse_aweme_video_web(
- traverse_obj(render_data, (..., 'aweme', 'detail'), get_all=False), url)
+ return self._parse_aweme_video_web(get_first(render_data, ('aweme', 'detail')), url)
+
+
+class TikTokVMIE(InfoExtractor):
+ _VALID_URL = r'https?://(?:vm|vt)\.tiktok\.com/(?P<id>\w+)'
+ IE_NAME = 'vm.tiktok'
+
+ _TESTS = [{
+ 'url': 'https://vm.tiktok.com/ZSe4FqkKd',
+ 'info_dict': {
+ 'id': '7023491746608712966',
+ 'ext': 'mp4',
+ 'title': 'md5:5607564db90271abbbf8294cca77eddd',
+ 'description': 'md5:5607564db90271abbbf8294cca77eddd',
+ 'duration': 11,
+ 'upload_date': '20211026',
+ 'uploader_id': '7007385080558846981',
+ 'creator': 'Memes',
+ 'artist': 'Memes',
+ 'track': 'original sound',
+ 'uploader': 'susmandem',
+ 'timestamp': 1635284105,
+ 'thumbnail': r're:https://.+\.webp.*',
+ 'like_count': int,
+ 'view_count': int,
+ 'comment_count': int,
+ 'repost_count': int,
+ 'uploader_url': 'https://www.tiktok.com/@MS4wLjABAAAAXcNoOEOxVyBzuII_E--T0MeCrLP0ay1Sm6x_n3dluiWEoWZD0VlQOytwad4W0i0n',
+ }
+ }, {
+ 'url': 'https://vt.tiktok.com/ZSe4FqkKd',
+ 'only_matching': True,
+ }]
+
+ def _real_extract(self, url):
+ return self.url_result(self._request_webpage(
+ HEADRequest(url), self._match_id(url), headers={'User-Agent': 'facebookexternalhit/1.1'}).geturl(), TikTokIE)