]> jfr.im git - yt-dlp.git/blob - yt_dlp/extractor/murrtube.py
[extractor/generic] Decode unicode-escaped embed URLs (#5919)
[yt-dlp.git] / yt_dlp / extractor / murrtube.py
1 import functools
2 import json
3
4 from .common import InfoExtractor
5 from ..utils import (
6 ExtractorError,
7 OnDemandPagedList,
8 determine_ext,
9 int_or_none,
10 try_get,
11 )
12
13
14 class MurrtubeIE(InfoExtractor):
15 _VALID_URL = r'''(?x)
16 (?:
17 murrtube:|
18 https?://murrtube\.net/videos/(?P<slug>[a-z0-9\-]+)\-
19 )
20 (?P<id>[a-f0-9]{8}\-[a-f0-9]{4}\-[a-f0-9]{4}\-[a-f0-9]{4}\-[a-f0-9]{12})
21 '''
22 _TEST = {
23 'url': 'https://murrtube.net/videos/inferno-x-skyler-148b6f2a-fdcc-4902-affe-9c0f41aaaca0',
24 'md5': '169f494812d9a90914b42978e73aa690',
25 'info_dict': {
26 'id': '148b6f2a-fdcc-4902-affe-9c0f41aaaca0',
27 'ext': 'mp4',
28 'title': 'Inferno X Skyler',
29 'description': 'Humping a very good slutty sheppy (roomate)',
30 'thumbnail': r're:^https?://.*\.jpg$',
31 'duration': 284,
32 'uploader': 'Inferno Wolf',
33 'age_limit': 18,
34 'comment_count': int,
35 'view_count': int,
36 'like_count': int,
37 'tags': ['hump', 'breed', 'Fursuit', 'murrsuit', 'bareback'],
38 }
39 }
40
41 def _download_gql(self, video_id, op, note=None, fatal=True):
42 result = self._download_json(
43 'https://murrtube.net/graphql',
44 video_id, note, data=json.dumps(op).encode(), fatal=fatal,
45 headers={'Content-Type': 'application/json'})
46 return result['data']
47
48 def _real_extract(self, url):
49 video_id = self._match_id(url)
50 data = self._download_gql(video_id, {
51 'operationName': 'Medium',
52 'variables': {
53 'id': video_id,
54 },
55 'query': '''\
56 query Medium($id: ID!) {
57 medium(id: $id) {
58 title
59 description
60 key
61 duration
62 commentsCount
63 likesCount
64 viewsCount
65 thumbnailKey
66 tagList
67 user {
68 name
69 __typename
70 }
71 __typename
72 }
73 }'''})
74 meta = data['medium']
75
76 storage_url = 'https://storage.murrtube.net/murrtube/'
77 format_url = storage_url + meta.get('key', '')
78 thumbnail = storage_url + meta.get('thumbnailKey', '')
79
80 if determine_ext(format_url) == 'm3u8':
81 formats = self._extract_m3u8_formats(
82 format_url, video_id, 'mp4', entry_protocol='m3u8_native', fatal=False)
83 else:
84 formats = [{'url': format_url}]
85
86 return {
87 'id': video_id,
88 'title': meta.get('title'),
89 'description': meta.get('description'),
90 'formats': formats,
91 'thumbnail': thumbnail,
92 'duration': int_or_none(meta.get('duration')),
93 'uploader': try_get(meta, lambda x: x['user']['name']),
94 'view_count': meta.get('viewsCount'),
95 'like_count': meta.get('likesCount'),
96 'comment_count': meta.get('commentsCount'),
97 'tags': meta.get('tagList'),
98 'age_limit': 18,
99 }
100
101
102 class MurrtubeUserIE(MurrtubeIE): # XXX: Do not subclass from concrete IE
103 IE_DESC = 'Murrtube user profile'
104 _VALID_URL = r'https?://murrtube\.net/(?P<id>[^/]+)$'
105 _TEST = {
106 'url': 'https://murrtube.net/stormy',
107 'info_dict': {
108 'id': 'stormy',
109 },
110 'playlist_mincount': 27,
111 }
112 _PAGE_SIZE = 10
113
114 def _fetch_page(self, username, user_id, page):
115 data = self._download_gql(username, {
116 'operationName': 'Media',
117 'variables': {
118 'limit': self._PAGE_SIZE,
119 'offset': page * self._PAGE_SIZE,
120 'sort': 'latest',
121 'userId': user_id,
122 },
123 'query': '''\
124 query Media($q: String, $sort: String, $userId: ID, $offset: Int!, $limit: Int!) {
125 media(q: $q, sort: $sort, userId: $userId, offset: $offset, limit: $limit) {
126 id
127 __typename
128 }
129 }'''},
130 'Downloading page {0}'.format(page + 1))
131 if data is None:
132 raise ExtractorError(f'Failed to retrieve video list for page {page + 1}')
133
134 media = data['media']
135
136 for entry in media:
137 yield self.url_result('murrtube:{0}'.format(entry['id']), MurrtubeIE.ie_key())
138
139 def _real_extract(self, url):
140 username = self._match_id(url)
141 data = self._download_gql(username, {
142 'operationName': 'User',
143 'variables': {
144 'id': username,
145 },
146 'query': '''\
147 query User($id: ID!) {
148 user(id: $id) {
149 id
150 __typename
151 }
152 }'''},
153 'Downloading user info')
154 if data is None:
155 raise ExtractorError('Failed to fetch user info')
156
157 user = data['user']
158
159 entries = OnDemandPagedList(functools.partial(
160 self._fetch_page, username, user.get('id')), self._PAGE_SIZE)
161
162 return self.playlist_result(entries, username)