]> jfr.im git - yt-dlp.git/blob - yt_dlp/extractor/scrolller.py
[ie/orf:on] Improve extraction (#9677)
[yt-dlp.git] / yt_dlp / extractor / scrolller.py
1 import json
2
3 from .common import InfoExtractor
4 from ..utils import determine_ext, int_or_none
5
6
7 class ScrolllerIE(InfoExtractor):
8 _VALID_URL = r'https?://(?:www\.)?scrolller\.com/(?P<id>[\w-]+)'
9 _TESTS = [{
10 'url': 'https://scrolller.com/a-helping-hand-1k9pxikxkw',
11 'info_dict': {
12 'id': 'a-helping-hand-1k9pxikxkw',
13 'ext': 'mp4',
14 'thumbnail': 'https://zepto.scrolller.com/a-helping-hand-3ty9q8x094-540x960.jpg',
15 'title': 'A helping hand',
16 'age_limit': 0,
17 }
18 }, {
19 'url': 'https://scrolller.com/tigers-chasing-a-drone-c5d1f2so6j',
20 'info_dict': {
21 'id': 'tigers-chasing-a-drone-c5d1f2so6j',
22 'ext': 'mp4',
23 'thumbnail': 'https://zepto.scrolller.com/tigers-chasing-a-drone-az9pkpguwe-540x303.jpg',
24 'title': 'Tigers chasing a drone',
25 'age_limit': 0,
26 }
27 }, {
28 'url': 'https://scrolller.com/baby-rhino-smells-something-9chhugsv9p',
29 'info_dict': {
30 'id': 'baby-rhino-smells-something-9chhugsv9p',
31 'ext': 'mp4',
32 'thumbnail': 'https://atto.scrolller.com/hmm-whats-that-smell-bh54mf2c52-300x224.jpg',
33 'title': 'Baby rhino smells something',
34 'age_limit': 0,
35 }
36 }, {
37 'url': 'https://scrolller.com/its-all-fun-and-games-cco8jjmoh7',
38 'info_dict': {
39 'id': 'its-all-fun-and-games-cco8jjmoh7',
40 'ext': 'mp4',
41 'thumbnail': 'https://atto.scrolller.com/its-all-fun-and-games-3amk9vg7m3-540x649.jpg',
42 'title': 'It\'s all fun and games...',
43 'age_limit': 0,
44 }
45 }, {
46 'url': 'https://scrolller.com/may-the-force-be-with-you-octokuro-yeytg1fs7a',
47 'info_dict': {
48 'id': 'may-the-force-be-with-you-octokuro-yeytg1fs7a',
49 'ext': 'mp4',
50 'thumbnail': 'https://thumbs2.redgifs.com/DarkStarchyNautilus-poster.jpg',
51 'title': 'May the force be with you (Octokuro)',
52 'age_limit': 18,
53 }
54 }]
55
56 def _real_extract(self, url):
57 video_id = self._match_id(url)
58
59 query = {
60 'query': '''{
61 getSubredditPost(url:"/%s"){
62 id
63 title
64 isNsfw
65 mediaSources{
66 url
67 width
68 height
69 }
70 }
71 }''' % video_id
72 }
73
74 video_data = self._download_json(
75 'https://api.scrolller.com/api/v2/graphql', video_id, data=json.dumps(query).encode(),
76 headers={'Content-Type': 'application/json'})['data']['getSubredditPost']
77
78 formats, thumbnails = [], []
79 for source in video_data['mediaSources']:
80 if determine_ext(source.get('url')) in ('jpg', 'png'):
81 thumbnails.append({
82 'url': source['url'],
83 'width': int_or_none(source.get('width')),
84 'height': int_or_none(source.get('height')),
85 })
86 elif source.get('url'):
87 formats.append({
88 'url': source['url'],
89 'width': int_or_none(source.get('width')),
90 'height': int_or_none(source.get('height')),
91 })
92
93 if not formats:
94 self.raise_no_formats('There is no video.', expected=True, video_id=video_id)
95
96 return {
97 'id': video_id,
98 'title': video_data.get('title'),
99 'thumbnails': thumbnails,
100 'formats': formats,
101 'age_limit': 18 if video_data.get('isNsfw') else 0
102 }