]> jfr.im git - yt-dlp.git/commitdiff
[extractor/Scrolller] Add extractor (#4010)
authorAbubukker Chaudhary <redacted>
Tue, 28 Jun 2022 12:10:43 +0000 (08:10 -0400)
committerGitHub <redacted>
Tue, 28 Jun 2022 12:10:43 +0000 (17:40 +0530)
Closes #3635
Authored by: LunarFang416

yt_dlp/extractor/_extractors.py
yt_dlp/extractor/scrolller.py [new file with mode: 0644]

index 37328dfc8406ceab618b3bf0a6aa682966644f33..dfac569de26c733210734e57c6cfc3a9b45574aa 100644 (file)
     SCTEIE,
     SCTECourseIE,
 )
     SCTEIE,
     SCTECourseIE,
 )
+from .scrolller import ScrolllerIE
 from .seeker import SeekerIE
 from .senategov import SenateISVPIE, SenateGovIE
 from .sendtonews import SendtoNewsIE
 from .seeker import SeekerIE
 from .senategov import SenateISVPIE, SenateGovIE
 from .sendtonews import SendtoNewsIE
diff --git a/yt_dlp/extractor/scrolller.py b/yt_dlp/extractor/scrolller.py
new file mode 100644 (file)
index 0000000..8469f48
--- /dev/null
@@ -0,0 +1,104 @@
+import json
+
+from .common import InfoExtractor
+from ..utils import determine_ext, int_or_none
+
+
+class ScrolllerIE(InfoExtractor):
+    _VALID_URL = r'https?://(?:www\.)?scrolller\.com/(?P<id>[\w-]+)'
+    _TESTS = [{
+        'url': 'https://scrolller.com/a-helping-hand-1k9pxikxkw',
+        'info_dict': {
+            'id': 'a-helping-hand-1k9pxikxkw',
+            'ext': 'mp4',
+            'thumbnail': 'https://zepto.scrolller.com/a-helping-hand-3ty9q8x094-540x960.jpg',
+            'title': 'A helping hand',
+            'age_limit': 0,
+        }
+    }, {
+        'url': 'https://scrolller.com/tigers-chasing-a-drone-c5d1f2so6j',
+        'info_dict': {
+            'id': 'tigers-chasing-a-drone-c5d1f2so6j',
+            'ext': 'mp4',
+            'thumbnail': 'https://zepto.scrolller.com/tigers-chasing-a-drone-az9pkpguwe-540x303.jpg',
+            'title': 'Tigers chasing a drone',
+            'age_limit': 0,
+        }
+    }, {
+        'url': 'https://scrolller.com/baby-rhino-smells-something-9chhugsv9p',
+        'info_dict': {
+            'id': 'baby-rhino-smells-something-9chhugsv9p',
+            'ext': 'mp4',
+            'thumbnail': 'https://atto.scrolller.com/hmm-whats-that-smell-bh54mf2c52-300x224.jpg',
+            'title': 'Baby rhino smells something',
+            'age_limit': 0,
+        }
+    }, {
+        'url': 'https://scrolller.com/its-all-fun-and-games-cco8jjmoh7',
+        'info_dict': {
+            'id': 'its-all-fun-and-games-cco8jjmoh7',
+            'ext': 'mp4',
+            'thumbnail': 'https://atto.scrolller.com/its-all-fun-and-games-3amk9vg7m3-540x649.jpg',
+            'title': 'It\'s all fun and games...',
+            'age_limit': 0,
+        }
+    }, {
+        'url': 'https://scrolller.com/may-the-force-be-with-you-octokuro-yeytg1fs7a',
+        'info_dict': {
+            'id': 'may-the-force-be-with-you-octokuro-yeytg1fs7a',
+            'ext': 'mp4',
+            'thumbnail': 'https://thumbs2.redgifs.com/DarkStarchyNautilus-poster.jpg',
+            'title': 'May the force be with you (Octokuro)',
+            'age_limit': 18,
+        }
+    }]
+
+    def _real_extract(self, url):
+        video_id = self._match_id(url)
+
+        query = {
+            'query': '''{
+                getSubredditPost(url:"/%s"){
+                    id
+                    title
+                    isNsfw
+                    mediaSources{
+                        url
+                        width
+                        height
+                    }
+                }
+            }''' % video_id
+        }
+
+        video_data = self._download_json(
+            'https://api.scrolller.com/api/v2/graphql', video_id, data=json.dumps(query).encode(),
+            headers={'Content-Type': 'application/json'})['data']['getSubredditPost']
+
+        formats, thumbnails = [], []
+        for source in video_data['mediaSources']:
+            if determine_ext(source.get('url')) in ('jpg', 'png'):
+                thumbnails.append({
+                    'url': source['url'],
+                    'width': int_or_none(source.get('width')),
+                    'height': int_or_none(source.get('height')),
+                })
+            elif source.get('url'):
+                formats.append({
+                    'url': source['url'],
+                    'width': int_or_none(source.get('width')),
+                    'height': int_or_none(source.get('height')),
+                })
+
+        if not formats:
+            self.raise_no_formats('There is no video.', expected=True, video_id=video_id)
+
+        self._sort_formats(formats)
+
+        return {
+            'id': video_id,
+            'title': video_data.get('title'),
+            'thumbnails': thumbnails,
+            'formats': formats,
+            'age_limit': 18 if video_data.get('isNsfw') else 0
+        }