[yt-dlp.git] / yt_dlp / extractor / redgifs.py

import functools
import urllib

from .common import InfoExtractor
from ..compat import compat_parse_qs
from ..utils import (
    ExtractorError,
    int_or_none,
    qualities,
    try_get,
    OnDemandPagedList,
)


class RedGifsBaseInfoExtractor(InfoExtractor):
    _FORMATS = {
        'gif': 250,
        'sd': 480,
        'hd': None,
    }

    _API_HEADERS = {
        'referer': 'https://www.redgifs.com/',
        'origin': 'https://www.redgifs.com',
        'content-type': 'application/json',
    }

    def _parse_gif_data(self, gif_data):
        video_id = gif_data.get('id')
        quality = qualities(tuple(self._FORMATS.keys()))

        orig_height = int_or_none(gif_data.get('height'))
        aspect_ratio = try_get(gif_data, lambda x: orig_height / x['width'])

        formats = []
        for format_id, height in self._FORMATS.items():
            video_url = gif_data['urls'].get(format_id)
            if not video_url:
                continue
            height = min(orig_height, height or orig_height)
            formats.append({
                'url': video_url,
                'format_id': format_id,
                'width': height * aspect_ratio if aspect_ratio else None,
                'height': height,
                'quality': quality(format_id),
            })

        return {
            'id': video_id,
            'webpage_url': f'https://redgifs.com/watch/{video_id}',
            'extractor_key': RedGifsIE.ie_key(),
            'extractor': 'RedGifs',
            'title': ' '.join(gif_data.get('tags') or []) or 'RedGifs',
            'timestamp': int_or_none(gif_data.get('createDate')),
            'uploader': gif_data.get('userName'),
            'duration': int_or_none(gif_data.get('duration')),
            'view_count': int_or_none(gif_data.get('views')),
            'like_count': int_or_none(gif_data.get('likes')),
            'categories': gif_data.get('tags') or [],
            'tags': gif_data.get('tags'),
            'age_limit': 18,
            'formats': formats,
        }

    def _fetch_oauth_token(self, video_id):
        # https://github.com/Redgifs/api/wiki/Temporary-tokens
        auth = self._download_json('https://api.redgifs.com/v2/auth/temporary',
                                   video_id, note='Fetching temporary token')
        if not auth.get('token'):
            raise ExtractorError('Unable to get temporary token')
        self._API_HEADERS['authorization'] = f'Bearer {auth["token"]}'

    def _call_api(self, ep, video_id, *args, **kwargs):
        for attempt in range(2):
            if 'authorization' not in self._API_HEADERS:
                self._fetch_oauth_token(video_id)
            try:
                headers = dict(self._API_HEADERS)
                headers['x-customheader'] = f'https://www.redgifs.com/watch/{video_id}'
                data = self._download_json(
                    f'https://api.redgifs.com/v2/{ep}', video_id, headers=headers, *args, **kwargs)
                break
            except ExtractorError as e:
                if not attempt and isinstance(e.cause, urllib.error.HTTPError) and e.cause.code == 401:
                    del self._API_HEADERS['authorization']  # refresh the token
                raise

        if 'error' in data:
            raise ExtractorError(f'RedGifs said: {data["error"]}', expected=True, video_id=video_id)
        return data

    def _fetch_page(self, ep, video_id, query, page):
        query['page'] = page + 1
        data = self._call_api(
            ep, video_id, query=query, note=f'Downloading JSON metadata page {page + 1}')

        for entry in data['gifs']:
            yield self._parse_gif_data(entry)

    def _prepare_api_query(self, query, fields):
        api_query = [
            (field_name, query.get(field_name, (default,))[0])
            for field_name, default in fields.items()]

        return {key: val for key, val in api_query if val is not None}

    def _paged_entries(self, ep, item_id, query, fields):
        page = int_or_none(query.get('page', (None,))[0])
        page_fetcher = functools.partial(
            self._fetch_page, ep, item_id, self._prepare_api_query(query, fields))
        return page_fetcher(page) if page else OnDemandPagedList(page_fetcher, self._PAGE_SIZE)


class RedGifsIE(RedGifsBaseInfoExtractor):
    _VALID_URL = r'https?://(?:(?:www\.)?redgifs\.com/watch/|thumbs2\.redgifs\.com/)(?P<id>[^-/?#\.]+)'
    _TESTS = [{
        'url': 'https://www.redgifs.com/watch/squeakyhelplesswisent',
        'info_dict': {
            'id': 'squeakyhelplesswisent',
            'ext': 'mp4',
            'title': 'Hotwife Legs Thick',
            'timestamp': 1636287915,
            'upload_date': '20211107',
            'uploader': 'ignored52',
            'duration': 16,
            'view_count': int,
            'like_count': int,
            'categories': list,
            'age_limit': 18,
            'tags': list,
        }
    }, {
        'url': 'https://thumbs2.redgifs.com/SqueakyHelplessWisent-mobile.mp4#t=0',
        'info_dict': {
            'id': 'squeakyhelplesswisent',
            'ext': 'mp4',
            'title': 'Hotwife Legs Thick',
            'timestamp': 1636287915,
            'upload_date': '20211107',
            'uploader': 'ignored52',
            'duration': 16,
            'view_count': int,
            'like_count': int,
            'categories': list,
            'age_limit': 18,
            'tags': list,
        }
    }]

    def _real_extract(self, url):
        video_id = self._match_id(url).lower()
        video_info = self._call_api(
            f'gifs/{video_id}?views=yes', video_id, note='Downloading video info')
        return self._parse_gif_data(video_info['gif'])


class RedGifsSearchIE(RedGifsBaseInfoExtractor):
    IE_DESC = 'Redgifs search'
    _VALID_URL = r'https?://(?:www\.)?redgifs\.com/browse\?(?P<query>[^#]+)'
    _PAGE_SIZE = 80
    _TESTS = [
        {
            'url': 'https://www.redgifs.com/browse?tags=Lesbian',
            'info_dict': {
                'id': 'tags=Lesbian',
                'title': 'Lesbian',
                'description': 'RedGifs search for Lesbian, ordered by trending'
            },
            'playlist_mincount': 100,
        },
        {
            'url': 'https://www.redgifs.com/browse?type=g&order=latest&tags=Lesbian',
            'info_dict': {
                'id': 'type=g&order=latest&tags=Lesbian',
                'title': 'Lesbian',
                'description': 'RedGifs search for Lesbian, ordered by latest'
            },
            'playlist_mincount': 100,
        },
        {
            'url': 'https://www.redgifs.com/browse?type=g&order=latest&tags=Lesbian&page=2',
            'info_dict': {
                'id': 'type=g&order=latest&tags=Lesbian&page=2',
                'title': 'Lesbian',
                'description': 'RedGifs search for Lesbian, ordered by latest'
            },
            'playlist_count': 80,
        }
    ]

    def _real_extract(self, url):
        query_str = self._match_valid_url(url).group('query')
        query = compat_parse_qs(query_str)
        if not query.get('tags'):
            raise ExtractorError('Invalid query tags', expected=True)

        tags = query.get('tags')[0]
        order = query.get('order', ('trending',))[0]

        query['search_text'] = [tags]
        entries = self._paged_entries('gifs/search', query_str, query, {
            'search_text': None,
            'order': 'trending',
            'type': None,
        })

        return self.playlist_result(
            entries, query_str, tags, f'RedGifs search for {tags}, ordered by {order}')


class RedGifsUserIE(RedGifsBaseInfoExtractor):
    IE_DESC = 'Redgifs user'
    _VALID_URL = r'https?://(?:www\.)?redgifs\.com/users/(?P<username>[^/?#]+)(?:\?(?P<query>[^#]+))?'
    _PAGE_SIZE = 30
    _TESTS = [
        {
            'url': 'https://www.redgifs.com/users/lamsinka89',
            'info_dict': {
                'id': 'lamsinka89',
                'title': 'lamsinka89',
                'description': 'RedGifs user lamsinka89, ordered by recent'
            },
            'playlist_mincount': 100,
        },
        {
            'url': 'https://www.redgifs.com/users/lamsinka89?page=3',
            'info_dict': {
                'id': 'lamsinka89?page=3',
                'title': 'lamsinka89',
                'description': 'RedGifs user lamsinka89, ordered by recent'
            },
            'playlist_count': 30,
        },
        {
            'url': 'https://www.redgifs.com/users/lamsinka89?order=best&type=g',
            'info_dict': {
                'id': 'lamsinka89?order=best&type=g',
                'title': 'lamsinka89',
                'description': 'RedGifs user lamsinka89, ordered by best'
            },
            'playlist_mincount': 100,
        }
    ]

    def _real_extract(self, url):
        username, query_str = self._match_valid_url(url).group('username', 'query')
        playlist_id = f'{username}?{query_str}' if query_str else username

        query = compat_parse_qs(query_str)
        order = query.get('order', ('recent',))[0]

        entries = self._paged_entries(f'users/{username}/search', playlist_id, query, {
            'order': 'recent',
            'type': None,
        })

        return self.playlist_result(
            entries, playlist_id, username, f'RedGifs user {username}, ordered by {order}')
Commit	Line	Data
bf57cfa8	1	import functools
8c188d5d	2	import urllib
4e4ba1d7	3
4e4ba1d7	4	from .common import InfoExtractor
bf57cfa8	5	from ..compat import compat_parse_qs
4e4ba1d7	6	from ..utils import (
	7	ExtractorError,
	8	int_or_none,
	9	qualities,
	10	try_get,
bf57cfa8	11	OnDemandPagedList,
4e4ba1d7	12	)
	13
	14
bf57cfa8	15	class RedGifsBaseInfoExtractor(InfoExtractor):
4e4ba1d7	16	_FORMATS = {
	17	'gif': 250,
	18	'sd': 480,
	19	'hd': None,
	20	}
bf57cfa8	21
c53e5cf5	22	_API_HEADERS = {
	23	'referer': 'https://www.redgifs.com/',
	24	'origin': 'https://www.redgifs.com',
	25	'content-type': 'application/json',
	26	}
	27
bf57cfa8 DS	28	def _parse_gif_data(self, gif_data):
	29	video_id = gif_data.get('id')
	30	quality = qualities(tuple(self._FORMATS.keys()))
	31
	32	orig_height = int_or_none(gif_data.get('height'))
	33	aspect_ratio = try_get(gif_data, lambda x: orig_height / x['width'])
	34
	35	formats = []
	36	for format_id, height in self._FORMATS.items():
	37	video_url = gif_data['urls'].get(format_id)
	38	if not video_url:
	39	continue
	40	height = min(orig_height, height or orig_height)
	41	formats.append({
	42	'url': video_url,
	43	'format_id': format_id,
	44	'width': height * aspect_ratio if aspect_ratio else None,
	45	'height': height,
	46	'quality': quality(format_id),
	47	})
bf57cfa8 DS	48
	49	return {
	50	'id': video_id,
	51	'webpage_url': f'https://redgifs.com/watch/{video_id}',
c53e5cf5	52	'extractor_key': RedGifsIE.ie_key(),
bf57cfa8 DS	53	'extractor': 'RedGifs',
	54	'title': ' '.join(gif_data.get('tags') or []) or 'RedGifs',
	55	'timestamp': int_or_none(gif_data.get('createDate')),
	56	'uploader': gif_data.get('userName'),
	57	'duration': int_or_none(gif_data.get('duration')),
	58	'view_count': int_or_none(gif_data.get('views')),
	59	'like_count': int_or_none(gif_data.get('likes')),
	60	'categories': gif_data.get('tags') or [],
	61	'tags': gif_data.get('tags'),
	62	'age_limit': 18,
	63	'formats': formats,
	64	}
	65
c53e5cf5	66	def _fetch_oauth_token(self, video_id):
0c908911	67	# https://github.com/Redgifs/api/wiki/Temporary-tokens
	68	auth = self._download_json('https://api.redgifs.com/v2/auth/temporary',
	69	video_id, note='Fetching temporary token')
	70	if not auth.get('token'):
	71	raise ExtractorError('Unable to get temporary token')
	72	self._API_HEADERS['authorization'] = f'Bearer {auth["token"]}'
c53e5cf5	73
bf57cfa8	74	def _call_api(self, ep, video_id, args, *kwargs):
8c188d5d KW	75	for attempt in range(2):
	76	if 'authorization' not in self._API_HEADERS:
	77	self._fetch_oauth_token(video_id)
	78	try:
	79	headers = dict(self._API_HEADERS)
	80	headers['x-customheader'] = f'https://www.redgifs.com/watch/{video_id}'
	81	data = self._download_json(
	82	f'https://api.redgifs.com/v2/{ep}', video_id, headers=headers, args, *kwargs)
	83	break
	84	except ExtractorError as e:
	85	if not attempt and isinstance(e.cause, urllib.error.HTTPError) and e.cause.code == 401:
	86	del self._API_HEADERS['authorization'] # refresh the token
	87	raise
c53e5cf5	88
bf57cfa8 DS	89	if 'error' in data:
	90	raise ExtractorError(f'RedGifs said: {data["error"]}', expected=True, video_id=video_id)
	91	return data
	92
	93	def _fetch_page(self, ep, video_id, query, page):
	94	query['page'] = page + 1
	95	data = self._call_api(
	96	ep, video_id, query=query, note=f'Downloading JSON metadata page {page + 1}')
	97
	98	for entry in data['gifs']:
	99	yield self._parse_gif_data(entry)
	100
	101	def _prepare_api_query(self, query, fields):
	102	api_query = [
	103	(field_name, query.get(field_name, (default,))[0])
	104	for field_name, default in fields.items()]
	105
	106	return {key: val for key, val in api_query if val is not None}
	107
	108	def _paged_entries(self, ep, item_id, query, fields):
	109	page = int_or_none(query.get('page', (None,))[0])
	110	page_fetcher = functools.partial(
	111	self._fetch_page, ep, item_id, self._prepare_api_query(query, fields))
	112	return page_fetcher(page) if page else OnDemandPagedList(page_fetcher, self._PAGE_SIZE)
	113
	114
	115	class RedGifsIE(RedGifsBaseInfoExtractor):
	116	_VALID_URL = r'https?://(?:(?:www\.)?redgifs\.com/watch/\|thumbs2\.redgifs\.com/)(?P<id>[^-/?#\.]+)'
4e4ba1d7	117	_TESTS = [{
	118	'url': 'https://www.redgifs.com/watch/squeakyhelplesswisent',
	119	'info_dict': {
	120	'id': 'squeakyhelplesswisent',
	121	'ext': 'mp4',
	122	'title': 'Hotwife Legs Thick',
	123	'timestamp': 1636287915,
	124	'upload_date': '20211107',
	125	'uploader': 'ignored52',
	126	'duration': 16,
	127	'view_count': int,
	128	'like_count': int,
	129	'categories': list,
	130	'age_limit': 18,
c53e5cf5	131	'tags': list,
4e4ba1d7	132	}
	133	}, {
	134	'url': 'https://thumbs2.redgifs.com/SqueakyHelplessWisent-mobile.mp4#t=0',
	135	'info_dict': {
	136	'id': 'squeakyhelplesswisent',
	137	'ext': 'mp4',
	138	'title': 'Hotwife Legs Thick',
	139	'timestamp': 1636287915,
	140	'upload_date': '20211107',
	141	'uploader': 'ignored52',
	142	'duration': 16,
	143	'view_count': int,
	144	'like_count': int,
	145	'categories': list,
	146	'age_limit': 18,
c53e5cf5	147	'tags': list,
4e4ba1d7	148	}
	149	}]
	150
	151	def _real_extract(self, url):
	152	video_id = self._match_id(url).lower()
bf57cfa8	153	video_info = self._call_api(
c53e5cf5	154	f'gifs/{video_id}?views=yes', video_id, note='Downloading video info')
bf57cfa8	155	return self._parse_gif_data(video_info['gif'])
4e4ba1d7	156
4e4ba1d7	157
bf57cfa8 DS	158	class RedGifsSearchIE(RedGifsBaseInfoExtractor):
	159	IE_DESC = 'Redgifs search'
	160	_VALID_URL = r'https?://(?:www\.)?redgifs\.com/browse\?(?P<query>[^#]+)'
	161	_PAGE_SIZE = 80
	162	_TESTS = [
	163	{
	164	'url': 'https://www.redgifs.com/browse?tags=Lesbian',
	165	'info_dict': {
	166	'id': 'tags=Lesbian',
	167	'title': 'Lesbian',
	168	'description': 'RedGifs search for Lesbian, ordered by trending'
	169	},
	170	'playlist_mincount': 100,
	171	},
	172	{
	173	'url': 'https://www.redgifs.com/browse?type=g&order=latest&tags=Lesbian',
	174	'info_dict': {
	175	'id': 'type=g&order=latest&tags=Lesbian',
	176	'title': 'Lesbian',
	177	'description': 'RedGifs search for Lesbian, ordered by latest'
	178	},
	179	'playlist_mincount': 100,
	180	},
	181	{
	182	'url': 'https://www.redgifs.com/browse?type=g&order=latest&tags=Lesbian&page=2',
	183	'info_dict': {
	184	'id': 'type=g&order=latest&tags=Lesbian&page=2',
	185	'title': 'Lesbian',
	186	'description': 'RedGifs search for Lesbian, ordered by latest'
	187	},
	188	'playlist_count': 80,
	189	}
	190	]
4e4ba1d7	191
bf57cfa8 DS	192	def _real_extract(self, url):
	193	query_str = self._match_valid_url(url).group('query')
	194	query = compat_parse_qs(query_str)
	195	if not query.get('tags'):
	196	raise ExtractorError('Invalid query tags', expected=True)
4e4ba1d7	197
bf57cfa8 DS	198	tags = query.get('tags')[0]
bf57cfa8 DS	199	order = query.get('order', ('trending',))[0]
4e4ba1d7	200
bf57cfa8 DS	201	query['search_text'] = [tags]
	202	entries = self._paged_entries('gifs/search', query_str, query, {
	203	'search_text': None,
	204	'order': 'trending',
	205	'type': None,
	206	})
4e4ba1d7	207
bf57cfa8 DS	208	return self.playlist_result(
	209	entries, query_str, tags, f'RedGifs search for {tags}, ordered by {order}')
	210
	211
	212	class RedGifsUserIE(RedGifsBaseInfoExtractor):
	213	IE_DESC = 'Redgifs user'
	214	_VALID_URL = r'https?://(?:www\.)?redgifs\.com/users/(?P<username>[^/?#]+)(?:\?(?P<query>[^#]+))?'
	215	_PAGE_SIZE = 30
	216	_TESTS = [
	217	{
	218	'url': 'https://www.redgifs.com/users/lamsinka89',
	219	'info_dict': {
	220	'id': 'lamsinka89',
	221	'title': 'lamsinka89',
	222	'description': 'RedGifs user lamsinka89, ordered by recent'
	223	},
	224	'playlist_mincount': 100,
	225	},
	226	{
	227	'url': 'https://www.redgifs.com/users/lamsinka89?page=3',
	228	'info_dict': {
	229	'id': 'lamsinka89?page=3',
	230	'title': 'lamsinka89',
	231	'description': 'RedGifs user lamsinka89, ordered by recent'
	232	},
	233	'playlist_count': 30,
	234	},
	235	{
	236	'url': 'https://www.redgifs.com/users/lamsinka89?order=best&type=g',
	237	'info_dict': {
	238	'id': 'lamsinka89?order=best&type=g',
	239	'title': 'lamsinka89',
	240	'description': 'RedGifs user lamsinka89, ordered by best'
	241	},
	242	'playlist_mincount': 100,
4e4ba1d7	243	}
bf57cfa8 DS	244	]
	245
	246	def _real_extract(self, url):
	247	username, query_str = self._match_valid_url(url).group('username', 'query')
	248	playlist_id = f'{username}?{query_str}' if query_str else username
	249
	250	query = compat_parse_qs(query_str)
	251	order = query.get('order', ('recent',))[0]
	252
	253	entries = self._paged_entries(f'users/{username}/search', playlist_id, query, {
	254	'order': 'recent',
	255	'type': None,
	256	})
	257
	258	return self.playlist_result(
	259	entries, playlist_id, username, f'RedGifs user {username}, ordered by {order}')