yt_dlp/extractor/redgifs.py

   1 # coding: utf-8
   2 import functools
   3
   4 from .common import InfoExtractor
   5 from ..compat import compat_parse_qs
   6 from ..utils import (
   7     ExtractorError,
   8     int_or_none,
   9     qualities,
  10     try_get,
  11     OnDemandPagedList,
  12 )
  13
  14
  15 class RedGifsBaseInfoExtractor(InfoExtractor):
  16     _FORMATS = {
  17         'gif': 250,
  18         'sd': 480,
  19         'hd': None,
  20     }
  21
  22     def _parse_gif_data(self, gif_data):
  23         video_id = gif_data.get('id')
  24         quality = qualities(tuple(self._FORMATS.keys()))
  25
  26         orig_height = int_or_none(gif_data.get('height'))
  27         aspect_ratio = try_get(gif_data, lambda x: orig_height / x['width'])
  28
  29         formats = []
  30         for format_id, height in self._FORMATS.items():
  31             video_url = gif_data['urls'].get(format_id)
  32             if not video_url:
  33                 continue
  34             height = min(orig_height, height or orig_height)
  35             formats.append({
  36                 'url': video_url,
  37                 'format_id': format_id,
  38                 'width': height * aspect_ratio if aspect_ratio else None,
  39                 'height': height,
  40                 'quality': quality(format_id),
  41             })
  42         self._sort_formats(formats)
  43
  44         return {
  45             'id': video_id,
  46             'webpage_url': f'https://redgifs.com/watch/{video_id}',
  47             'ie_key': RedGifsIE.ie_key(),
  48             'extractor': 'RedGifs',
  49             'title': ' '.join(gif_data.get('tags') or []) or 'RedGifs',
  50             'timestamp': int_or_none(gif_data.get('createDate')),
  51             'uploader': gif_data.get('userName'),
  52             'duration': int_or_none(gif_data.get('duration')),
  53             'view_count': int_or_none(gif_data.get('views')),
  54             'like_count': int_or_none(gif_data.get('likes')),
  55             'categories': gif_data.get('tags') or [],
  56             'tags': gif_data.get('tags'),
  57             'age_limit': 18,
  58             'formats': formats,
  59         }
  60
  61     def _call_api(self, ep, video_id, *args, **kwargs):
  62         data = self._download_json(
  63             f'https://api.redgifs.com/v2/{ep}', video_id, *args, **kwargs)
  64         if 'error' in data:
  65             raise ExtractorError(f'RedGifs said: {data["error"]}', expected=True, video_id=video_id)
  66         return data
  67
  68     def _fetch_page(self, ep, video_id, query, page):
  69         query['page'] = page + 1
  70         data = self._call_api(
  71             ep, video_id, query=query, note=f'Downloading JSON metadata page {page + 1}')
  72
  73         for entry in data['gifs']:
  74             yield self._parse_gif_data(entry)
  75
  76     def _prepare_api_query(self, query, fields):
  77         api_query = [
  78             (field_name, query.get(field_name, (default,))[0])
  79             for field_name, default in fields.items()]
  80
  81         return {key: val for key, val in api_query if val is not None}
  82
  83     def _paged_entries(self, ep, item_id, query, fields):
  84         page = int_or_none(query.get('page', (None,))[0])
  85         page_fetcher = functools.partial(
  86             self._fetch_page, ep, item_id, self._prepare_api_query(query, fields))
  87         return page_fetcher(page) if page else OnDemandPagedList(page_fetcher, self._PAGE_SIZE)
  88
  89
  90 class RedGifsIE(RedGifsBaseInfoExtractor):
  91     _VALID_URL = r'https?://(?:(?:www\.)?redgifs\.com/watch/|thumbs2\.redgifs\.com/)(?P<id>[^-/?#\.]+)'
  92     _TESTS = [{
  93         'url': 'https://www.redgifs.com/watch/squeakyhelplesswisent',
  94         'info_dict': {
  95             'id': 'squeakyhelplesswisent',
  96             'ext': 'mp4',
  97             'title': 'Hotwife Legs Thick',
  98             'timestamp': 1636287915,
  99             'upload_date': '20211107',
 100             'uploader': 'ignored52',
 101             'duration': 16,
 102             'view_count': int,
 103             'like_count': int,
 104             'categories': list,
 105             'age_limit': 18,
 106         }
 107     }, {
 108         'url': 'https://thumbs2.redgifs.com/SqueakyHelplessWisent-mobile.mp4#t=0',
 109         'info_dict': {
 110             'id': 'squeakyhelplesswisent',
 111             'ext': 'mp4',
 112             'title': 'Hotwife Legs Thick',
 113             'timestamp': 1636287915,
 114             'upload_date': '20211107',
 115             'uploader': 'ignored52',
 116             'duration': 16,
 117             'view_count': int,
 118             'like_count': int,
 119             'categories': list,
 120             'age_limit': 18,
 121         }
 122     }]
 123
 124     def _real_extract(self, url):
 125         video_id = self._match_id(url).lower()
 126         video_info = self._call_api(
 127             f'gifs/{video_id}', video_id, note='Downloading video info')
 128         return self._parse_gif_data(video_info['gif'])
 129
 130
 131 class RedGifsSearchIE(RedGifsBaseInfoExtractor):
 132     IE_DESC = 'Redgifs search'
 133     _VALID_URL = r'https?://(?:www\.)?redgifs\.com/browse\?(?P<query>[^#]+)'
 134     _PAGE_SIZE = 80
 135     _TESTS = [
 136         {
 137             'url': 'https://www.redgifs.com/browse?tags=Lesbian',
 138             'info_dict': {
 139                 'id': 'tags=Lesbian',
 140                 'title': 'Lesbian',
 141                 'description': 'RedGifs search for Lesbian, ordered by trending'
 142             },
 143             'playlist_mincount': 100,
 144         },
 145         {
 146             'url': 'https://www.redgifs.com/browse?type=g&order=latest&tags=Lesbian',
 147             'info_dict': {
 148                 'id': 'type=g&order=latest&tags=Lesbian',
 149                 'title': 'Lesbian',
 150                 'description': 'RedGifs search for Lesbian, ordered by latest'
 151             },
 152             'playlist_mincount': 100,
 153         },
 154         {
 155             'url': 'https://www.redgifs.com/browse?type=g&order=latest&tags=Lesbian&page=2',
 156             'info_dict': {
 157                 'id': 'type=g&order=latest&tags=Lesbian&page=2',
 158                 'title': 'Lesbian',
 159                 'description': 'RedGifs search for Lesbian, ordered by latest'
 160             },
 161             'playlist_count': 80,
 162         }
 163     ]
 164
 165     def _real_extract(self, url):
 166         query_str = self._match_valid_url(url).group('query')
 167         query = compat_parse_qs(query_str)
 168         if not query.get('tags'):
 169             raise ExtractorError('Invalid query tags', expected=True)
 170
 171         tags = query.get('tags')[0]
 172         order = query.get('order', ('trending',))[0]
 173
 174         query['search_text'] = [tags]
 175         entries = self._paged_entries('gifs/search', query_str, query, {
 176             'search_text': None,
 177             'order': 'trending',
 178             'type': None,
 179         })
 180
 181         return self.playlist_result(
 182             entries, query_str, tags, f'RedGifs search for {tags}, ordered by {order}')
 183
 184
 185 class RedGifsUserIE(RedGifsBaseInfoExtractor):
 186     IE_DESC = 'Redgifs user'
 187     _VALID_URL = r'https?://(?:www\.)?redgifs\.com/users/(?P<username>[^/?#]+)(?:\?(?P<query>[^#]+))?'
 188     _PAGE_SIZE = 30
 189     _TESTS = [
 190         {
 191             'url': 'https://www.redgifs.com/users/lamsinka89',
 192             'info_dict': {
 193                 'id': 'lamsinka89',
 194                 'title': 'lamsinka89',
 195                 'description': 'RedGifs user lamsinka89, ordered by recent'
 196             },
 197             'playlist_mincount': 100,
 198         },
 199         {
 200             'url': 'https://www.redgifs.com/users/lamsinka89?page=3',
 201             'info_dict': {
 202                 'id': 'lamsinka89?page=3',
 203                 'title': 'lamsinka89',
 204                 'description': 'RedGifs user lamsinka89, ordered by recent'
 205             },
 206             'playlist_count': 30,
 207         },
 208         {
 209             'url': 'https://www.redgifs.com/users/lamsinka89?order=best&type=g',
 210             'info_dict': {
 211                 'id': 'lamsinka89?order=best&type=g',
 212                 'title': 'lamsinka89',
 213                 'description': 'RedGifs user lamsinka89, ordered by best'
 214             },
 215             'playlist_mincount': 100,
 216         }
 217     ]
 218
 219     def _real_extract(self, url):
 220         username, query_str = self._match_valid_url(url).group('username', 'query')
 221         playlist_id = f'{username}?{query_str}' if query_str else username
 222
 223         query = compat_parse_qs(query_str)
 224         order = query.get('order', ('recent',))[0]
 225
 226         entries = self._paged_entries(f'users/{username}/search', playlist_id, query, {
 227             'order': 'recent',
 228             'type': None,
 229         })
 230
 231         return self.playlist_result(
 232             entries, playlist_id, username, f'RedGifs user {username}, ordered by {order}')