[yt-dlp.git] / yt_dlp / extractor / triller.py

import itertools
import json

from .common import InfoExtractor
from ..utils import (
    ExtractorError,
    int_or_none,
    str_or_none,
    traverse_obj,
    unified_strdate,
    unified_timestamp,
    url_basename,
)


class TrillerBaseIE(InfoExtractor):
    _NETRC_MACHINE = 'triller'
    _API_BASE_URL = 'https://social.triller.co/v1.5'
    _API_HEADERS = {'Origin': 'https://triller.co'}

    def _perform_login(self, username, password):
        if self._API_HEADERS.get('Authorization'):
            return

        user_check = self._download_json(
            f'{self._API_BASE_URL}/api/user/is-valid-username', None, note='Checking username',
            fatal=False, expected_status=400, headers={
                'Content-Type': 'application/json',
                'Origin': 'https://triller.co',
            }, data=json.dumps({'username': username}, separators=(',', ':')).encode('utf-8'))
        if user_check.get('status'):  # endpoint returns "status":false if username exists
            raise ExtractorError('Unable to login: Invalid username', expected=True)

        credentials = {
            'username': username,
            'password': password,
        }
        login = self._download_json(
            f'{self._API_BASE_URL}/user/auth', None, note='Logging in',
            fatal=False, expected_status=400, headers={
                'Content-Type': 'application/json',
                'Origin': 'https://triller.co',
            }, data=json.dumps(credentials, separators=(',', ':')).encode('utf-8'))
        if not login.get('auth_token'):
            if login.get('error') == 1008:
                raise ExtractorError('Unable to login: Incorrect password', expected=True)
            raise ExtractorError('Unable to login')

        self._API_HEADERS['Authorization'] = f'Bearer {login["auth_token"]}'

    def _get_comments(self, video_id, limit=15):
        comment_info = self._download_json(
            f'{self._API_BASE_URL}/api/videos/{video_id}/comments_v2',
            video_id, fatal=False, note='Downloading comments API JSON',
            headers=self._API_HEADERS, query={'limit': limit}) or {}
        if not comment_info.get('comments'):
            return
        for comment_dict in comment_info['comments']:
            yield {
                'author': traverse_obj(comment_dict, ('author', 'username')),
                'author_id': traverse_obj(comment_dict, ('author', 'user_id')),
                'id': comment_dict.get('id'),
                'text': comment_dict.get('body'),
                'timestamp': unified_timestamp(comment_dict.get('timestamp')),
            }

    def _check_user_info(self, user_info):
        if not user_info:
            self.report_warning('Unable to extract user info')
        elif user_info.get('private') and not user_info.get('followed_by_me'):
            raise ExtractorError('This video is private', expected=True)
        elif traverse_obj(user_info, 'blocked_by_user', 'blocking_user'):
            raise ExtractorError('The author of the video is blocked', expected=True)
        return user_info

    def _parse_video_info(self, video_info, username, user_info=None):
        video_uuid = video_info.get('video_uuid')
        video_id = video_info.get('id')

        formats = []
        video_url = traverse_obj(video_info, 'video_url', 'stream_url')
        if video_url:
            formats.append({
                'url': video_url,
                'ext': 'mp4',
                'vcodec': 'h264',
                'width': video_info.get('width'),
                'height': video_info.get('height'),
                'format_id': url_basename(video_url).split('.')[0],
                'filesize': video_info.get('filesize'),
            })
        video_set = video_info.get('video_set') or []
        for video in video_set:
            resolution = video.get('resolution') or ''
            formats.append({
                'url': video['url'],
                'ext': 'mp4',
                'vcodec': video.get('codec'),
                'vbr': int_or_none(video.get('bitrate'), 1000),
                'width': int_or_none(resolution.split('x')[0]),
                'height': int_or_none(resolution.split('x')[1]),
                'format_id': url_basename(video['url']).split('.')[0],
            })
        audio_url = video_info.get('audio_url')
        if audio_url:
            formats.append({
                'url': audio_url,
                'ext': 'm4a',
                'format_id': url_basename(audio_url).split('.')[0],
            })

        manifest_url = video_info.get('transcoded_url')
        if manifest_url:
            formats.extend(self._extract_m3u8_formats(
                manifest_url, video_id, 'mp4', entry_protocol='m3u8_native',
                m3u8_id='hls', fatal=False))

        comment_count = int_or_none(video_info.get('comment_count'))

        user_info = user_info or traverse_obj(video_info, 'user', default={})

        return {
            'id': str_or_none(video_id) or video_uuid,
            'title': video_info.get('description') or f'Video by {username}',
            'thumbnail': video_info.get('thumbnail_url'),
            'description': video_info.get('description'),
            'uploader': str_or_none(username),
            'uploader_id': str_or_none(user_info.get('user_id')),
            'creator': str_or_none(user_info.get('name')),
            'timestamp': unified_timestamp(video_info.get('timestamp')),
            'upload_date': unified_strdate(video_info.get('timestamp')),
            'duration': int_or_none(video_info.get('duration')),
            'view_count': int_or_none(video_info.get('play_count')),
            'like_count': int_or_none(video_info.get('likes_count')),
            'artist': str_or_none(video_info.get('song_artist')),
            'track': str_or_none(video_info.get('song_title')),
            'webpage_url': f'https://triller.co/@{username}/video/{video_uuid}',
            'uploader_url': f'https://triller.co/@{username}',
            'extractor_key': TrillerIE.ie_key(),
            'extractor': TrillerIE.IE_NAME,
            'formats': formats,
            'comment_count': comment_count,
            '__post_extractor': self.extract_comments(video_id, comment_count),
        }


class TrillerIE(TrillerBaseIE):
    _VALID_URL = r'''(?x)
            https?://(?:www\.)?triller\.co/
            @(?P<username>[\w\._]+)/video/
            (?P<id>[\da-f]{8}-[\da-f]{4}-[\da-f]{4}-[\da-f]{4}-[\da-f]{12})
        '''
    _TESTS = [{
        'url': 'https://triller.co/@theestallion/video/2358fcd7-3df2-4c77-84c8-1d091610a6cf',
        'md5': '228662d783923b60d78395fedddc0a20',
        'info_dict': {
            'id': '71595734',
            'ext': 'mp4',
            'title': 'md5:9a2bf9435c5c4292678996a464669416',
            'thumbnail': r're:^https://uploads\.cdn\.triller\.co/.+\.jpg$',
            'description': 'md5:9a2bf9435c5c4292678996a464669416',
            'uploader': 'theestallion',
            'uploader_id': '18992236',
            'creator': 'Megan Thee Stallion',
            'timestamp': 1660598222,
            'upload_date': '20220815',
            'duration': 47,
            'height': 3840,
            'width': 2160,
            'view_count': int,
            'like_count': int,
            'artist': 'Megan Thee Stallion',
            'track': 'Her',
            'webpage_url': 'https://triller.co/@theestallion/video/2358fcd7-3df2-4c77-84c8-1d091610a6cf',
            'uploader_url': 'https://triller.co/@theestallion',
            'comment_count': int,
        }
    }, {
        'url': 'https://triller.co/@charlidamelio/video/46c6fcfa-aa9e-4503-a50c-68444f44cddc',
        'md5': '874055f462af5b0699b9dbb527a505a0',
        'info_dict': {
            'id': '71621339',
            'ext': 'mp4',
            'title': 'md5:4c91ea82760fe0fffb71b8c3aa7295fc',
            'thumbnail': r're:^https://uploads\.cdn\.triller\.co/.+\.jpg$',
            'description': 'md5:4c91ea82760fe0fffb71b8c3aa7295fc',
            'uploader': 'charlidamelio',
            'uploader_id': '1875551',
            'creator': 'charli damelio',
            'timestamp': 1660773354,
            'upload_date': '20220817',
            'duration': 16,
            'height': 1920,
            'width': 1080,
            'view_count': int,
            'like_count': int,
            'artist': 'Dixie',
            'track': 'Someone to Blame',
            'webpage_url': 'https://triller.co/@charlidamelio/video/46c6fcfa-aa9e-4503-a50c-68444f44cddc',
            'uploader_url': 'https://triller.co/@charlidamelio',
            'comment_count': int,
        }
    }]

    def _real_extract(self, url):
        username, video_uuid = self._match_valid_url(url).group('username', 'id')

        video_info = traverse_obj(self._download_json(
            f'{self._API_BASE_URL}/api/videos/{video_uuid}',
            video_uuid, note='Downloading video info API JSON',
            errnote='Unable to download video info API JSON',
            headers=self._API_HEADERS), ('videos', 0))
        if not video_info:
            raise ExtractorError('No video info found in API response')

        user_info = self._check_user_info(video_info.get('user') or {})
        return self._parse_video_info(video_info, username, user_info)


class TrillerUserIE(TrillerBaseIE):
    _VALID_URL = r'https?://(?:www\.)?triller\.co/@(?P<id>[\w\._]+)/?(?:$|[#?])'
    _TESTS = [{
        # first videos request only returns 2 videos
        'url': 'https://triller.co/@theestallion',
        'playlist_mincount': 9,
        'info_dict': {
            'id': '18992236',
            'title': 'theestallion',
            'thumbnail': r're:^https://uploads\.cdn\.triller\.co/.+\.jpg$',
        }
    }, {
        'url': 'https://triller.co/@charlidamelio',
        'playlist_mincount': 25,
        'info_dict': {
            'id': '1875551',
            'title': 'charlidamelio',
            'thumbnail': r're:^https://uploads\.cdn\.triller\.co/.+\.jpg$',
        }
    }]

    def _real_initialize(self):
        if not self._API_HEADERS.get('Authorization'):
            guest = self._download_json(
                f'{self._API_BASE_URL}/user/create_guest',
                None, note='Creating guest session', data=b'', headers=self._API_HEADERS, query={
                    'platform': 'Web',
                    'app_version': '',
                })
            if not guest.get('auth_token'):
                raise ExtractorError('Unable to fetch required auth token for user extraction')

            self._API_HEADERS['Authorization'] = f'Bearer {guest["auth_token"]}'

    def _extract_video_list(self, username, user_id, limit=6):
        query = {
            'limit': limit,
        }
        for page in itertools.count(1):
            for retry in self.RetryManager():
                try:
                    video_list = self._download_json(
                        f'{self._API_BASE_URL}/api/users/{user_id}/videos',
                        username, note=f'Downloading user video list page {page}',
                        errnote='Unable to download user video list', headers=self._API_HEADERS,
                        query=query)
                except ExtractorError as e:
                    if isinstance(e.cause, json.JSONDecodeError) and e.cause.pos == 0:
                        retry.error = e
                        continue
                    raise
            if not video_list.get('videos'):
                break
            yield from video_list['videos']
            query['before_time'] = traverse_obj(video_list, ('videos', -1, 'timestamp'))
            if not query['before_time']:
                break

    def _entries(self, videos, username, user_info):
        for video in videos:
            yield self._parse_video_info(video, username, user_info)

    def _real_extract(self, url):
        username = self._match_id(url)
        user_info = self._check_user_info(self._download_json(
            f'{self._API_BASE_URL}/api/users/by_username/{username}',
            username, note='Downloading user info',
            errnote='Failed to download user info', headers=self._API_HEADERS).get('user', {}))

        user_id = str_or_none(user_info.get('user_id'))
        videos = self._extract_video_list(username, user_id)
        thumbnail = user_info.get('avatar_url')

        return self.playlist_result(
            self._entries(videos, username, user_info), user_id, username, thumbnail=thumbnail)
Commit	Line	Data
92aa6d68	1	import itertools
	2	import json
	3
	4	from .common import InfoExtractor
	5	from ..utils import (
d2c8aadf	6	ExtractorError,
92aa6d68	7	int_or_none,
	8	str_or_none,
	9	traverse_obj,
	10	unified_strdate,
	11	unified_timestamp,
	12	url_basename,
92aa6d68	13	)
	14
	15
	16	class TrillerBaseIE(InfoExtractor):
	17	_NETRC_MACHINE = 'triller'
92aa6d68	18	_API_BASE_URL = 'https://social.triller.co/v1.5'
d6f88719	19	_API_HEADERS = {'Origin': 'https://triller.co'}
92aa6d68	20
92aa6d68	21	def _perform_login(self, username, password):
d6f88719	22	if self._API_HEADERS.get('Authorization'):
92aa6d68	23	return
	24
	25	user_check = self._download_json(
	26	f'{self._API_BASE_URL}/api/user/is-valid-username', None, note='Checking username',
	27	fatal=False, expected_status=400, headers={
	28	'Content-Type': 'application/json',
	29	'Origin': 'https://triller.co',
	30	}, data=json.dumps({'username': username}, separators=(',', ':')).encode('utf-8'))
	31	if user_check.get('status'): # endpoint returns "status":false if username exists
	32	raise ExtractorError('Unable to login: Invalid username', expected=True)
	33
	34	credentials = {
	35	'username': username,
	36	'password': password,
	37	}
	38	login = self._download_json(
	39	f'{self._API_BASE_URL}/user/auth', None, note='Logging in',
	40	fatal=False, expected_status=400, headers={
	41	'Content-Type': 'application/json',
	42	'Origin': 'https://triller.co',
	43	}, data=json.dumps(credentials, separators=(',', ':')).encode('utf-8'))
	44	if not login.get('auth_token'):
	45	if login.get('error') == 1008:
	46	raise ExtractorError('Unable to login: Incorrect password', expected=True)
	47	raise ExtractorError('Unable to login')
	48
d6f88719	49	self._API_HEADERS['Authorization'] = f'Bearer {login["auth_token"]}'
92aa6d68	50
	51	def _get_comments(self, video_id, limit=15):
	52	comment_info = self._download_json(
	53	f'{self._API_BASE_URL}/api/videos/{video_id}/comments_v2',
	54	video_id, fatal=False, note='Downloading comments API JSON',
d6f88719	55	headers=self._API_HEADERS, query={'limit': limit}) or {}
92aa6d68	56	if not comment_info.get('comments'):
	57	return
	58	for comment_dict in comment_info['comments']:
	59	yield {
	60	'author': traverse_obj(comment_dict, ('author', 'username')),
	61	'author_id': traverse_obj(comment_dict, ('author', 'user_id')),
	62	'id': comment_dict.get('id'),
	63	'text': comment_dict.get('body'),
	64	'timestamp': unified_timestamp(comment_dict.get('timestamp')),
	65	}
	66
	67	def _check_user_info(self, user_info):
	68	if not user_info:
	69	self.report_warning('Unable to extract user info')
	70	elif user_info.get('private') and not user_info.get('followed_by_me'):
	71	raise ExtractorError('This video is private', expected=True)
	72	elif traverse_obj(user_info, 'blocked_by_user', 'blocking_user'):
	73	raise ExtractorError('The author of the video is blocked', expected=True)
	74	return user_info
	75
	76	def _parse_video_info(self, video_info, username, user_info=None):
	77	video_uuid = video_info.get('video_uuid')
	78	video_id = video_info.get('id')
	79
	80	formats = []
	81	video_url = traverse_obj(video_info, 'video_url', 'stream_url')
	82	if video_url:
	83	formats.append({
	84	'url': video_url,
	85	'ext': 'mp4',
	86	'vcodec': 'h264',
	87	'width': video_info.get('width'),
	88	'height': video_info.get('height'),
	89	'format_id': url_basename(video_url).split('.')[0],
	90	'filesize': video_info.get('filesize'),
	91	})
	92	video_set = video_info.get('video_set') or []
	93	for video in video_set:
	94	resolution = video.get('resolution') or ''
	95	formats.append({
	96	'url': video['url'],
	97	'ext': 'mp4',
	98	'vcodec': video.get('codec'),
	99	'vbr': int_or_none(video.get('bitrate'), 1000),
	100	'width': int_or_none(resolution.split('x')[0]),
	101	'height': int_or_none(resolution.split('x')[1]),
	102	'format_id': url_basename(video['url']).split('.')[0],
	103	})
	104	audio_url = video_info.get('audio_url')
	105	if audio_url:
	106	formats.append({
	107	'url': audio_url,
	108	'ext': 'm4a',
	109	'format_id': url_basename(audio_url).split('.')[0],
	110	})
	111
	112	manifest_url = video_info.get('transcoded_url')
	113	if manifest_url:
	114	formats.extend(self._extract_m3u8_formats(
	115	manifest_url, video_id, 'mp4', entry_protocol='m3u8_native',
	116	m3u8_id='hls', fatal=False))
92aa6d68	117
	118	comment_count = int_or_none(video_info.get('comment_count'))
	119
	120	user_info = user_info or traverse_obj(video_info, 'user', default={})
	121
	122	return {
	123	'id': str_or_none(video_id) or video_uuid,
	124	'title': video_info.get('description') or f'Video by {username}',
	125	'thumbnail': video_info.get('thumbnail_url'),
	126	'description': video_info.get('description'),
	127	'uploader': str_or_none(username),
	128	'uploader_id': str_or_none(user_info.get('user_id')),
	129	'creator': str_or_none(user_info.get('name')),
	130	'timestamp': unified_timestamp(video_info.get('timestamp')),
	131	'upload_date': unified_strdate(video_info.get('timestamp')),
	132	'duration': int_or_none(video_info.get('duration')),
	133	'view_count': int_or_none(video_info.get('play_count')),
	134	'like_count': int_or_none(video_info.get('likes_count')),
	135	'artist': str_or_none(video_info.get('song_artist')),
	136	'track': str_or_none(video_info.get('song_title')),
	137	'webpage_url': f'https://triller.co/@{username}/video/{video_uuid}',
	138	'uploader_url': f'https://triller.co/@{username}',
	139	'extractor_key': TrillerIE.ie_key(),
	140	'extractor': TrillerIE.IE_NAME,
	141	'formats': formats,
	142	'comment_count': comment_count,
	143	'__post_extractor': self.extract_comments(video_id, comment_count),
	144	}
	145
	146
	147	class TrillerIE(TrillerBaseIE):
	148	_VALID_URL = r'''(?x)
	149	https?://(?:www\.)?triller\.co/
	150	@(?P<username>[\w\._]+)/video/
	151	(?P<id>[\da-f]{8}-[\da-f]{4}-[\da-f]{4}-[\da-f]{4}-[\da-f]{12})
	152	'''
	153	_TESTS = [{
	154	'url': 'https://triller.co/@theestallion/video/2358fcd7-3df2-4c77-84c8-1d091610a6cf',
	155	'md5': '228662d783923b60d78395fedddc0a20',
	156	'info_dict': {
	157	'id': '71595734',
	158	'ext': 'mp4',
	159	'title': 'md5:9a2bf9435c5c4292678996a464669416',
	160	'thumbnail': r're:^https://uploads\.cdn\.triller\.co/.+\.jpg$',
	161	'description': 'md5:9a2bf9435c5c4292678996a464669416',
	162	'uploader': 'theestallion',
	163	'uploader_id': '18992236',
	164	'creator': 'Megan Thee Stallion',
	165	'timestamp': 1660598222,
	166	'upload_date': '20220815',
	167	'duration': 47,
	168	'height': 3840,
	169	'width': 2160,
	170	'view_count': int,
	171	'like_count': int,
	172	'artist': 'Megan Thee Stallion',
	173	'track': 'Her',
	174	'webpage_url': 'https://triller.co/@theestallion/video/2358fcd7-3df2-4c77-84c8-1d091610a6cf',
	175	'uploader_url': 'https://triller.co/@theestallion',
	176	'comment_count': int,
	177	}
	178	}, {
	179	'url': 'https://triller.co/@charlidamelio/video/46c6fcfa-aa9e-4503-a50c-68444f44cddc',
	180	'md5': '874055f462af5b0699b9dbb527a505a0',
181	'info_dict': {
182	'id': '71621339',
183	'ext': 'mp4',
184	'title': 'md5:4c91ea82760fe0fffb71b8c3aa7295fc',
185	'thumbnail': r're:^https://uploads\.cdn\.triller\.co/.+\.jpg$',
186	'description': 'md5:4c91ea82760fe0fffb71b8c3aa7295fc',
187	'uploader': 'charlidamelio',
188	'uploader_id': '1875551',
189	'creator': 'charli damelio',
190	'timestamp': 1660773354,
191	'upload_date': '20220817',
192	'duration': 16,
193	'height': 1920,
194	'width': 1080,
195	'view_count': int,
196	'like_count': int,
197	'artist': 'Dixie',
198	'track': 'Someone to Blame',
199	'webpage_url': 'https://triller.co/@charlidamelio/video/46c6fcfa-aa9e-4503-a50c-68444f44cddc',
200	'uploader_url': 'https://triller.co/@charlidamelio',
201	'comment_count': int,
202	}
203	}]
204
205	def _real_extract(self, url):
206	username, video_uuid = self._match_valid_url(url).group('username', 'id')
207
208	video_info = traverse_obj(self._download_json(
209	f'{self._API_BASE_URL}/api/videos/{video_uuid}',
210	video_uuid, note='Downloading video info API JSON',
211	errnote='Unable to download video info API JSON',
d6f88719	212	headers=self._API_HEADERS), ('videos', 0))
92aa6d68	213	if not video_info:
	214	raise ExtractorError('No video info found in API response')
	215
	216	user_info = self._check_user_info(video_info.get('user') or {})
	217	return self._parse_video_info(video_info, username, user_info)
	218
	219
	220	class TrillerUserIE(TrillerBaseIE):
	221	_VALID_URL = r'https?://(?:www\.)?triller\.co/@(?P<id>[\w\._]+)/?(?:$\|[#?])'
	222	_TESTS = [{
	223	# first videos request only returns 2 videos
	224	'url': 'https://triller.co/@theestallion',
	225	'playlist_mincount': 9,
	226	'info_dict': {
	227	'id': '18992236',
	228	'title': 'theestallion',
	229	'thumbnail': r're:^https://uploads\.cdn\.triller\.co/.+\.jpg$',
	230	}
	231	}, {
	232	'url': 'https://triller.co/@charlidamelio',
	233	'playlist_mincount': 25,
	234	'info_dict': {
	235	'id': '1875551',
	236	'title': 'charlidamelio',
	237	'thumbnail': r're:^https://uploads\.cdn\.triller\.co/.+\.jpg$',
	238	}
	239	}]
	240
	241	def _real_initialize(self):
d6f88719	242	if not self._API_HEADERS.get('Authorization'):
92aa6d68	243	guest = self._download_json(
92aa6d68	244	f'{self._API_BASE_URL}/user/create_guest',
d6f88719	245	None, note='Creating guest session', data=b'', headers=self._API_HEADERS, query={
92aa6d68	246	'platform': 'Web',
	247	'app_version': '',
	248	})
	249	if not guest.get('auth_token'):
	250	raise ExtractorError('Unable to fetch required auth token for user extraction')
	251
d6f88719	252	self._API_HEADERS['Authorization'] = f'Bearer {guest["auth_token"]}'
92aa6d68	253
	254	def _extract_video_list(self, username, user_id, limit=6):
	255	query = {
	256	'limit': limit,
	257	}
	258	for page in itertools.count(1):
	259	for retry in self.RetryManager():
	260	try:
	261	video_list = self._download_json(
	262	f'{self._API_BASE_URL}/api/users/{user_id}/videos',
	263	username, note=f'Downloading user video list page {page}',
d6f88719	264	errnote='Unable to download user video list', headers=self._API_HEADERS,
d6f88719	265	query=query)
92aa6d68	266	except ExtractorError as e:
	267	if isinstance(e.cause, json.JSONDecodeError) and e.cause.pos == 0:
	268	retry.error = e
	269	continue
	270	raise
	271	if not video_list.get('videos'):
	272	break
	273	yield from video_list['videos']
	274	query['before_time'] = traverse_obj(video_list, ('videos', -1, 'timestamp'))
	275	if not query['before_time']:
	276	break
	277
	278	def _entries(self, videos, username, user_info):
	279	for video in videos:
	280	yield self._parse_video_info(video, username, user_info)
	281
	282	def _real_extract(self, url):
	283	username = self._match_id(url)
	284	user_info = self._check_user_info(self._download_json(
	285	f'{self._API_BASE_URL}/api/users/by_username/{username}',
	286	username, note='Downloading user info',
d6f88719	287	errnote='Failed to download user info', headers=self._API_HEADERS).get('user', {}))
92aa6d68	288
	289	user_id = str_or_none(user_info.get('user_id'))
	290	videos = self._extract_video_list(username, user_id)
	291	thumbnail = user_info.get('avatar_url')
	292
	293	return self.playlist_result(
	294	self._entries(videos, username, user_info), user_id, username, thumbnail=thumbnail)