]> jfr.im git - yt-dlp.git/blob - yt_dlp/extractor/minds.py
Completely change project name to yt-dlp (#85)
[yt-dlp.git] / yt_dlp / extractor / minds.py
1 # coding: utf-8
2 from __future__ import unicode_literals
3
4 from .common import InfoExtractor
5 from ..compat import compat_str
6 from ..utils import (
7 clean_html,
8 int_or_none,
9 str_or_none,
10 strip_or_none,
11 )
12
13
14 class MindsBaseIE(InfoExtractor):
15 _VALID_URL_BASE = r'https?://(?:www\.)?minds\.com/'
16
17 def _call_api(self, path, video_id, resource, query=None):
18 api_url = 'https://www.minds.com/api/' + path
19 token = self._get_cookies(api_url).get('XSRF-TOKEN')
20 return self._download_json(
21 api_url, video_id, 'Downloading %s JSON metadata' % resource, headers={
22 'Referer': 'https://www.minds.com/',
23 'X-XSRF-TOKEN': token.value if token else '',
24 }, query=query)
25
26
27 class MindsIE(MindsBaseIE):
28 IE_NAME = 'minds'
29 _VALID_URL = MindsBaseIE._VALID_URL_BASE + r'(?:media|newsfeed|archive/view)/(?P<id>[0-9]+)'
30 _TESTS = [{
31 'url': 'https://www.minds.com/media/100000000000086822',
32 'md5': '215a658184a419764852239d4970b045',
33 'info_dict': {
34 'id': '100000000000086822',
35 'ext': 'mp4',
36 'title': 'Minds intro sequence',
37 'thumbnail': r're:https?://.+\.png',
38 'uploader_id': 'ottman',
39 'upload_date': '20130524',
40 'timestamp': 1369404826,
41 'uploader': 'Bill Ottman',
42 'view_count': int,
43 'like_count': int,
44 'dislike_count': int,
45 'tags': ['animation'],
46 'comment_count': int,
47 'license': 'attribution-cc',
48 },
49 }, {
50 # entity.type == 'activity' and empty title
51 'url': 'https://www.minds.com/newsfeed/798025111988506624',
52 'md5': 'b2733a74af78d7fd3f541c4cbbaa5950',
53 'info_dict': {
54 'id': '798022190320226304',
55 'ext': 'mp4',
56 'title': '798022190320226304',
57 'uploader': 'ColinFlaherty',
58 'upload_date': '20180111',
59 'timestamp': 1515639316,
60 'uploader_id': 'ColinFlaherty',
61 },
62 }, {
63 'url': 'https://www.minds.com/archive/view/715172106794442752',
64 'only_matching': True,
65 }, {
66 # youtube perma_url
67 'url': 'https://www.minds.com/newsfeed/1197131838022602752',
68 'only_matching': True,
69 }]
70
71 def _real_extract(self, url):
72 entity_id = self._match_id(url)
73 entity = self._call_api(
74 'v1/entities/entity/' + entity_id, entity_id, 'entity')['entity']
75 if entity.get('type') == 'activity':
76 if entity.get('custom_type') == 'video':
77 video_id = entity['entity_guid']
78 else:
79 return self.url_result(entity['perma_url'])
80 else:
81 assert(entity['subtype'] == 'video')
82 video_id = entity_id
83 # 1080p and webm formats available only on the sources array
84 video = self._call_api(
85 'v2/media/video/' + video_id, video_id, 'video')
86
87 formats = []
88 for source in (video.get('sources') or []):
89 src = source.get('src')
90 if not src:
91 continue
92 formats.append({
93 'format_id': source.get('label'),
94 'height': int_or_none(source.get('size')),
95 'url': src,
96 })
97 self._sort_formats(formats)
98
99 entity = video.get('entity') or entity
100 owner = entity.get('ownerObj') or {}
101 uploader_id = owner.get('username')
102
103 tags = entity.get('tags')
104 if tags and isinstance(tags, compat_str):
105 tags = [tags]
106
107 thumbnail = None
108 poster = video.get('poster') or entity.get('thumbnail_src')
109 if poster:
110 urlh = self._request_webpage(poster, video_id, fatal=False)
111 if urlh:
112 thumbnail = urlh.geturl()
113
114 return {
115 'id': video_id,
116 'title': entity.get('title') or video_id,
117 'formats': formats,
118 'description': clean_html(entity.get('description')) or None,
119 'license': str_or_none(entity.get('license')),
120 'timestamp': int_or_none(entity.get('time_created')),
121 'uploader': strip_or_none(owner.get('name')),
122 'uploader_id': uploader_id,
123 'uploader_url': 'https://www.minds.com/' + uploader_id if uploader_id else None,
124 'view_count': int_or_none(entity.get('play:count')),
125 'like_count': int_or_none(entity.get('thumbs:up:count')),
126 'dislike_count': int_or_none(entity.get('thumbs:down:count')),
127 'tags': tags,
128 'comment_count': int_or_none(entity.get('comments:count')),
129 'thumbnail': thumbnail,
130 }
131
132
133 class MindsFeedBaseIE(MindsBaseIE):
134 _PAGE_SIZE = 150
135
136 def _entries(self, feed_id):
137 query = {'limit': self._PAGE_SIZE, 'sync': 1}
138 i = 1
139 while True:
140 data = self._call_api(
141 'v2/feeds/container/%s/videos' % feed_id,
142 feed_id, 'page %s' % i, query)
143 entities = data.get('entities') or []
144 for entity in entities:
145 guid = entity.get('guid')
146 if not guid:
147 continue
148 yield self.url_result(
149 'https://www.minds.com/newsfeed/' + guid,
150 MindsIE.ie_key(), guid)
151 query['from_timestamp'] = data['load-next']
152 if not (query['from_timestamp'] and len(entities) == self._PAGE_SIZE):
153 break
154 i += 1
155
156 def _real_extract(self, url):
157 feed_id = self._match_id(url)
158 feed = self._call_api(
159 'v1/%s/%s' % (self._FEED_PATH, feed_id),
160 feed_id, self._FEED_TYPE)[self._FEED_TYPE]
161
162 return self.playlist_result(
163 self._entries(feed['guid']), feed_id,
164 strip_or_none(feed.get('name')),
165 feed.get('briefdescription'))
166
167
168 class MindsChannelIE(MindsFeedBaseIE):
169 _FEED_TYPE = 'channel'
170 IE_NAME = 'minds:' + _FEED_TYPE
171 _VALID_URL = MindsBaseIE._VALID_URL_BASE + r'(?!(?:newsfeed|media|api|archive|groups)/)(?P<id>[^/?&#]+)'
172 _FEED_PATH = 'channel'
173 _TEST = {
174 'url': 'https://www.minds.com/ottman',
175 'info_dict': {
176 'id': 'ottman',
177 'title': 'Bill Ottman',
178 'description': 'Co-creator & CEO @minds',
179 },
180 'playlist_mincount': 54,
181 }
182
183
184 class MindsGroupIE(MindsFeedBaseIE):
185 _FEED_TYPE = 'group'
186 IE_NAME = 'minds:' + _FEED_TYPE
187 _VALID_URL = MindsBaseIE._VALID_URL_BASE + r'groups/profile/(?P<id>[0-9]+)'
188 _FEED_PATH = 'groups/group'
189 _TEST = {
190 'url': 'https://www.minds.com/groups/profile/785582576369672204/feed/videos',
191 'info_dict': {
192 'id': '785582576369672204',
193 'title': 'Cooking Videos',
194 },
195 'playlist_mincount': 1,
196 }