yt_dlp/extractor/scrolller.py

   1 import json
   2
   3 from .common import InfoExtractor
   4 from ..utils import determine_ext, int_or_none
   5
   6
   7 class ScrolllerIE(InfoExtractor):
   8     _VALID_URL = r'https?://(?:www\.)?scrolller\.com/(?P<id>[\w-]+)'
   9     _TESTS = [{
  10         'url': 'https://scrolller.com/a-helping-hand-1k9pxikxkw',
  11         'info_dict': {
  12             'id': 'a-helping-hand-1k9pxikxkw',
  13             'ext': 'mp4',
  14             'thumbnail': 'https://zepto.scrolller.com/a-helping-hand-3ty9q8x094-540x960.jpg',
  15             'title': 'A helping hand',
  16             'age_limit': 0,
  17         }
  18     }, {
  19         'url': 'https://scrolller.com/tigers-chasing-a-drone-c5d1f2so6j',
  20         'info_dict': {
  21             'id': 'tigers-chasing-a-drone-c5d1f2so6j',
  22             'ext': 'mp4',
  23             'thumbnail': 'https://zepto.scrolller.com/tigers-chasing-a-drone-az9pkpguwe-540x303.jpg',
  24             'title': 'Tigers chasing a drone',
  25             'age_limit': 0,
  26         }
  27     }, {
  28         'url': 'https://scrolller.com/baby-rhino-smells-something-9chhugsv9p',
  29         'info_dict': {
  30             'id': 'baby-rhino-smells-something-9chhugsv9p',
  31             'ext': 'mp4',
  32             'thumbnail': 'https://atto.scrolller.com/hmm-whats-that-smell-bh54mf2c52-300x224.jpg',
  33             'title': 'Baby rhino smells something',
  34             'age_limit': 0,
  35         }
  36     }, {
  37         'url': 'https://scrolller.com/its-all-fun-and-games-cco8jjmoh7',
  38         'info_dict': {
  39             'id': 'its-all-fun-and-games-cco8jjmoh7',
  40             'ext': 'mp4',
  41             'thumbnail': 'https://atto.scrolller.com/its-all-fun-and-games-3amk9vg7m3-540x649.jpg',
  42             'title': 'It\'s all fun and games...',
  43             'age_limit': 0,
  44         }
  45     }, {
  46         'url': 'https://scrolller.com/may-the-force-be-with-you-octokuro-yeytg1fs7a',
  47         'info_dict': {
  48             'id': 'may-the-force-be-with-you-octokuro-yeytg1fs7a',
  49             'ext': 'mp4',
  50             'thumbnail': 'https://thumbs2.redgifs.com/DarkStarchyNautilus-poster.jpg',
  51             'title': 'May the force be with you (Octokuro)',
  52             'age_limit': 18,
  53         }
  54     }]
  55
  56     def _real_extract(self, url):
  57         video_id = self._match_id(url)
  58
  59         query = {
  60             'query': '''{
  61                 getSubredditPost(url:"/%s"){
  62                     id
  63                     title
  64                     isNsfw
  65                     mediaSources{
  66                         url
  67                         width
  68                         height
  69                     }
  70                 }
  71             }''' % video_id
  72         }
  73
  74         video_data = self._download_json(
  75             'https://api.scrolller.com/api/v2/graphql', video_id, data=json.dumps(query).encode(),
  76             headers={'Content-Type': 'application/json'})['data']['getSubredditPost']
  77
  78         formats, thumbnails = [], []
  79         for source in video_data['mediaSources']:
  80             if determine_ext(source.get('url')) in ('jpg', 'png'):
  81                 thumbnails.append({
  82                     'url': source['url'],
  83                     'width': int_or_none(source.get('width')),
  84                     'height': int_or_none(source.get('height')),
  85                 })
  86             elif source.get('url'):
  87                 formats.append({
  88                     'url': source['url'],
  89                     'width': int_or_none(source.get('width')),
  90                     'height': int_or_none(source.get('height')),
  91                 })
  92
  93         if not formats:
  94             self.raise_no_formats('There is no video.', expected=True, video_id=video_id)
  95
  96         return {
  97             'id': video_id,
  98             'title': video_data.get('title'),
  99             'thumbnails': thumbnails,
 100             'formats': formats,
 101             'age_limit': 18 if video_data.get('isNsfw') else 0
 102         }