[extractor/Scrolller] Add extractor (#4010)

author Abubukker Chaudhary <redacted>

Tue, 28 Jun 2022 12:10:43 +0000 (08:10 -0400)

committer GitHub <redacted>

Tue, 28 Jun 2022 12:10:43 +0000 (17:40 +0530)
author Abubukker Chaudhary <redacted>
Tue, 28 Jun 2022 12:10:43 +0000 (08:10 -0400)
committer GitHub <redacted>
Tue, 28 Jun 2022 12:10:43 +0000 (17:40 +0530)
diff --git a/yt_dlp/extractor/_extractors.py b/yt_dlp/extractor/_extractors.py

index 37328dfc8406ceab618b3bf0a6aa682966644f33..dfac569de26c733210734e57c6cfc3a9b45574aa 100644 (file)
--- a/yt_dlp/extractor/_extractors.py
+++ b/yt_dlp/extractor/_extractors.py
@@ -1511,6 +1511,7 @@
      SCTEIE,
      SCTECourseIE,
  )
      SCTEIE,
      SCTECourseIE,
  )
+from .scrolller import ScrolllerIE
  from .seeker import SeekerIE
  from .senategov import SenateISVPIE, SenateGovIE
  from .sendtonews import SendtoNewsIE
  from .seeker import SeekerIE
  from .senategov import SenateISVPIE, SenateGovIE
  from .sendtonews import SendtoNewsIE
diff --git a/yt_dlp/extractor/scrolller.py b/yt_dlp/extractor/scrolller.py

new file mode 100644 (file)

index 0000000..8469f48
--- /dev/null
+++ b/yt_dlp/extractor/scrolller.py
@@ -0,0 +1,104 @@
+import json
+
+from .common import InfoExtractor
+from ..utils import determine_ext, int_or_none
+
+
+class ScrolllerIE(InfoExtractor):
+    _VALID_URL = r'https?://(?:www\.)?scrolller\.com/(?P<id>[\w-]+)'
+    _TESTS = [{
+        'url': 'https://scrolller.com/a-helping-hand-1k9pxikxkw',
+        'info_dict': {
+            'id': 'a-helping-hand-1k9pxikxkw',
+            'ext': 'mp4',
+            'thumbnail': 'https://zepto.scrolller.com/a-helping-hand-3ty9q8x094-540x960.jpg',
+            'title': 'A helping hand',
+            'age_limit': 0,
+        }
+    }, {
+        'url': 'https://scrolller.com/tigers-chasing-a-drone-c5d1f2so6j',
+        'info_dict': {
+            'id': 'tigers-chasing-a-drone-c5d1f2so6j',
+            'ext': 'mp4',
+            'thumbnail': 'https://zepto.scrolller.com/tigers-chasing-a-drone-az9pkpguwe-540x303.jpg',
+            'title': 'Tigers chasing a drone',
+            'age_limit': 0,
+        }
+    }, {
+        'url': 'https://scrolller.com/baby-rhino-smells-something-9chhugsv9p',
+        'info_dict': {
+            'id': 'baby-rhino-smells-something-9chhugsv9p',
+            'ext': 'mp4',
+            'thumbnail': 'https://atto.scrolller.com/hmm-whats-that-smell-bh54mf2c52-300x224.jpg',
+            'title': 'Baby rhino smells something',
+            'age_limit': 0,
+        }
+    }, {
+        'url': 'https://scrolller.com/its-all-fun-and-games-cco8jjmoh7',
+        'info_dict': {
+            'id': 'its-all-fun-and-games-cco8jjmoh7',
+            'ext': 'mp4',
+            'thumbnail': 'https://atto.scrolller.com/its-all-fun-and-games-3amk9vg7m3-540x649.jpg',
+            'title': 'It\'s all fun and games...',
+            'age_limit': 0,
+        }
+    }, {
+        'url': 'https://scrolller.com/may-the-force-be-with-you-octokuro-yeytg1fs7a',
+        'info_dict': {
+            'id': 'may-the-force-be-with-you-octokuro-yeytg1fs7a',
+            'ext': 'mp4',
+            'thumbnail': 'https://thumbs2.redgifs.com/DarkStarchyNautilus-poster.jpg',
+            'title': 'May the force be with you (Octokuro)',
+            'age_limit': 18,
+        }
+    }]
+
+    def _real_extract(self, url):
+        video_id = self._match_id(url)
+
+        query = {
+            'query': '''{
+                getSubredditPost(url:"/%s"){
+                    id
+                    title
+                    isNsfw
+                    mediaSources{
+                        url
+                        width
+                        height
+                    }
+                }
+            }''' % video_id
+        }
+
+        video_data = self._download_json(
+            'https://api.scrolller.com/api/v2/graphql', video_id, data=json.dumps(query).encode(),
+            headers={'Content-Type': 'application/json'})['data']['getSubredditPost']
+
+        formats, thumbnails = [], []
+        for source in video_data['mediaSources']:
+            if determine_ext(source.get('url')) in ('jpg', 'png'):
+                thumbnails.append({
+                    'url': source['url'],
+                    'width': int_or_none(source.get('width')),
+                    'height': int_or_none(source.get('height')),
+                })
+            elif source.get('url'):
+                formats.append({
+                    'url': source['url'],
+                    'width': int_or_none(source.get('width')),
+                    'height': int_or_none(source.get('height')),
+                })
+
+        if not formats:
+            self.raise_no_formats('There is no video.', expected=True, video_id=video_id)
+
+        self._sort_formats(formats)
+
+        return {
+            'id': video_id,
+            'title': video_data.get('title'),
+            'thumbnails': thumbnails,
+            'formats': formats,
+            'age_limit': 18 if video_data.get('isNsfw') else 0
+        }
author	Abubukker Chaudhary <redacted>
	Tue, 28 Jun 2022 12:10:43 +0000 (08:10 -0400)
committer	GitHub <redacted>
	Tue, 28 Jun 2022 12:10:43 +0000 (17:40 +0530)
yt_dlp/extractor/_extractors.py		patch \| blob \| blame \| history
yt_dlp/extractor/scrolller.py	[new file with mode: 0644]	patch \| blob