yt_dlp/extractor/redgifs.py

   1 import functools
   2
   3 from .common import InfoExtractor
   4 from ..compat import compat_parse_qs
   5 from ..utils import (
   6     ExtractorError,
   7     int_or_none,
   8     qualities,
   9     try_get,
  10     OnDemandPagedList,
  11 )
  12
  13
  14 class RedGifsBaseInfoExtractor(InfoExtractor):
  15     _FORMATS = {
  16         'gif': 250,
  17         'sd': 480,
  18         'hd': None,
  19     }
  20
  21     _API_HEADERS = {
  22         'referer': 'https://www.redgifs.com/',
  23         'origin': 'https://www.redgifs.com',
  24         'content-type': 'application/json',
  25     }
  26
  27     def _parse_gif_data(self, gif_data):
  28         video_id = gif_data.get('id')
  29         quality = qualities(tuple(self._FORMATS.keys()))
  30
  31         orig_height = int_or_none(gif_data.get('height'))
  32         aspect_ratio = try_get(gif_data, lambda x: orig_height / x['width'])
  33
  34         formats = []
  35         for format_id, height in self._FORMATS.items():
  36             video_url = gif_data['urls'].get(format_id)
  37             if not video_url:
  38                 continue
  39             height = min(orig_height, height or orig_height)
  40             formats.append({
  41                 'url': video_url,
  42                 'format_id': format_id,
  43                 'width': height * aspect_ratio if aspect_ratio else None,
  44                 'height': height,
  45                 'quality': quality(format_id),
  46             })
  47         self._sort_formats(formats)
  48
  49         return {
  50             'id': video_id,
  51             'webpage_url': f'https://redgifs.com/watch/{video_id}',
  52             'extractor_key': RedGifsIE.ie_key(),
  53             'extractor': 'RedGifs',
  54             'title': ' '.join(gif_data.get('tags') or []) or 'RedGifs',
  55             'timestamp': int_or_none(gif_data.get('createDate')),
  56             'uploader': gif_data.get('userName'),
  57             'duration': int_or_none(gif_data.get('duration')),
  58             'view_count': int_or_none(gif_data.get('views')),
  59             'like_count': int_or_none(gif_data.get('likes')),
  60             'categories': gif_data.get('tags') or [],
  61             'tags': gif_data.get('tags'),
  62             'age_limit': 18,
  63             'formats': formats,
  64         }
  65
  66     def _fetch_oauth_token(self, video_id):
  67         # These pages contain the OAuth token that is necessary to make API calls.
  68         index_page = self._download_webpage(f'https://www.redgifs.com/watch/{video_id}', video_id)
  69         index_js_uri = self._html_search_regex(
  70             r'href="?(/assets/js/index[.a-z0-9]*.js)"?\W', index_page, 'index_js_uri')
  71         index_js = self._download_webpage(f'https://www.redgifs.com/{index_js_uri}', video_id)
  72         # It turns out that a { followed by any valid JSON punctuation will always result in the
  73         # first two characters of the base64 encoding being "ey".
  74         # Use this fact to find any such string constant of a reasonable length with the correct
  75         # punctuation for an oauth token
  76         oauth_token = self._html_search_regex(
  77             r'\w+\s*[=:]\s*"(ey[^"]+\.[^"]*\.[^"]{43,45})"', index_js, 'oauth token')
  78         self._API_HEADERS['authorization'] = f'Bearer {oauth_token}'
  79
  80     def _call_api(self, ep, video_id, *args, **kwargs):
  81         if 'authorization' not in self._API_HEADERS:
  82             self._fetch_oauth_token(video_id)
  83         assert 'authorization' in self._API_HEADERS
  84
  85         headers = dict(self._API_HEADERS)
  86         headers['x-customheader'] = f'https://www.redgifs.com/watch/{video_id}'
  87         data = self._download_json(
  88             f'https://api.redgifs.com/v2/{ep}', video_id, headers=headers, *args, **kwargs)
  89         if 'error' in data:
  90             raise ExtractorError(f'RedGifs said: {data["error"]}', expected=True, video_id=video_id)
  91         return data
  92
  93     def _fetch_page(self, ep, video_id, query, page):
  94         query['page'] = page + 1
  95         data = self._call_api(
  96             ep, video_id, query=query, note=f'Downloading JSON metadata page {page + 1}')
  97
  98         for entry in data['gifs']:
  99             yield self._parse_gif_data(entry)
 100
 101     def _prepare_api_query(self, query, fields):
 102         api_query = [
 103             (field_name, query.get(field_name, (default,))[0])
 104             for field_name, default in fields.items()]
 105
 106         return {key: val for key, val in api_query if val is not None}
 107
 108     def _paged_entries(self, ep, item_id, query, fields):
 109         page = int_or_none(query.get('page', (None,))[0])
 110         page_fetcher = functools.partial(
 111             self._fetch_page, ep, item_id, self._prepare_api_query(query, fields))
 112         return page_fetcher(page) if page else OnDemandPagedList(page_fetcher, self._PAGE_SIZE)
 113
 114
 115 class RedGifsIE(RedGifsBaseInfoExtractor):
 116     _VALID_URL = r'https?://(?:(?:www\.)?redgifs\.com/watch/|thumbs2\.redgifs\.com/)(?P<id>[^-/?#\.]+)'
 117     _TESTS = [{
 118         'url': 'https://www.redgifs.com/watch/squeakyhelplesswisent',
 119         'info_dict': {
 120             'id': 'squeakyhelplesswisent',
 121             'ext': 'mp4',
 122             'title': 'Hotwife Legs Thick',
 123             'timestamp': 1636287915,
 124             'upload_date': '20211107',
 125             'uploader': 'ignored52',
 126             'duration': 16,
 127             'view_count': int,
 128             'like_count': int,
 129             'categories': list,
 130             'age_limit': 18,
 131             'tags': list,
 132         }
 133     }, {
 134         'url': 'https://thumbs2.redgifs.com/SqueakyHelplessWisent-mobile.mp4#t=0',
 135         'info_dict': {
 136             'id': 'squeakyhelplesswisent',
 137             'ext': 'mp4',
 138             'title': 'Hotwife Legs Thick',
 139             'timestamp': 1636287915,
 140             'upload_date': '20211107',
 141             'uploader': 'ignored52',
 142             'duration': 16,
 143             'view_count': int,
 144             'like_count': int,
 145             'categories': list,
 146             'age_limit': 18,
 147             'tags': list,
 148         }
 149     }]
 150
 151     def _real_extract(self, url):
 152         video_id = self._match_id(url).lower()
 153         video_info = self._call_api(
 154             f'gifs/{video_id}?views=yes', video_id, note='Downloading video info')
 155         return self._parse_gif_data(video_info['gif'])
 156
 157
 158 class RedGifsSearchIE(RedGifsBaseInfoExtractor):
 159     IE_DESC = 'Redgifs search'
 160     _VALID_URL = r'https?://(?:www\.)?redgifs\.com/browse\?(?P<query>[^#]+)'
 161     _PAGE_SIZE = 80
 162     _TESTS = [
 163         {
 164             'url': 'https://www.redgifs.com/browse?tags=Lesbian',
 165             'info_dict': {
 166                 'id': 'tags=Lesbian',
 167                 'title': 'Lesbian',
 168                 'description': 'RedGifs search for Lesbian, ordered by trending'
 169             },
 170             'playlist_mincount': 100,
 171         },
 172         {
 173             'url': 'https://www.redgifs.com/browse?type=g&order=latest&tags=Lesbian',
 174             'info_dict': {
 175                 'id': 'type=g&order=latest&tags=Lesbian',
 176                 'title': 'Lesbian',
 177                 'description': 'RedGifs search for Lesbian, ordered by latest'
 178             },
 179             'playlist_mincount': 100,
 180         },
 181         {
 182             'url': 'https://www.redgifs.com/browse?type=g&order=latest&tags=Lesbian&page=2',
 183             'info_dict': {
 184                 'id': 'type=g&order=latest&tags=Lesbian&page=2',
 185                 'title': 'Lesbian',
 186                 'description': 'RedGifs search for Lesbian, ordered by latest'
 187             },
 188             'playlist_count': 80,
 189         }
 190     ]
 191
 192     def _real_extract(self, url):
 193         query_str = self._match_valid_url(url).group('query')
 194         query = compat_parse_qs(query_str)
 195         if not query.get('tags'):
 196             raise ExtractorError('Invalid query tags', expected=True)
 197
 198         tags = query.get('tags')[0]
 199         order = query.get('order', ('trending',))[0]
 200
 201         query['search_text'] = [tags]
 202         entries = self._paged_entries('gifs/search', query_str, query, {
 203             'search_text': None,
 204             'order': 'trending',
 205             'type': None,
 206         })
 207
 208         return self.playlist_result(
 209             entries, query_str, tags, f'RedGifs search for {tags}, ordered by {order}')
 210
 211
 212 class RedGifsUserIE(RedGifsBaseInfoExtractor):
 213     IE_DESC = 'Redgifs user'
 214     _VALID_URL = r'https?://(?:www\.)?redgifs\.com/users/(?P<username>[^/?#]+)(?:\?(?P<query>[^#]+))?'
 215     _PAGE_SIZE = 30
 216     _TESTS = [
 217         {
 218             'url': 'https://www.redgifs.com/users/lamsinka89',
 219             'info_dict': {
 220                 'id': 'lamsinka89',
 221                 'title': 'lamsinka89',
 222                 'description': 'RedGifs user lamsinka89, ordered by recent'
 223             },
 224             'playlist_mincount': 100,
 225         },
 226         {
 227             'url': 'https://www.redgifs.com/users/lamsinka89?page=3',
 228             'info_dict': {
 229                 'id': 'lamsinka89?page=3',
 230                 'title': 'lamsinka89',
 231                 'description': 'RedGifs user lamsinka89, ordered by recent'
 232             },
 233             'playlist_count': 30,
 234         },
 235         {
 236             'url': 'https://www.redgifs.com/users/lamsinka89?order=best&type=g',
 237             'info_dict': {
 238                 'id': 'lamsinka89?order=best&type=g',
 239                 'title': 'lamsinka89',
 240                 'description': 'RedGifs user lamsinka89, ordered by best'
 241             },
 242             'playlist_mincount': 100,
 243         }
 244     ]
 245
 246     def _real_extract(self, url):
 247         username, query_str = self._match_valid_url(url).group('username', 'query')
 248         playlist_id = f'{username}?{query_str}' if query_str else username
 249
 250         query = compat_parse_qs(query_str)
 251         order = query.get('order', ('recent',))[0]
 252
 253         entries = self._paged_entries(f'users/{username}/search', playlist_id, query, {
 254             'order': 'recent',
 255             'type': None,
 256         })
 257
 258         return self.playlist_result(
 259             entries, playlist_id, username, f'RedGifs user {username}, ordered by {order}')