]> jfr.im git - yt-dlp.git/blame - yt_dlp/extractor/chingari.py
Tolerate failure to `--write-link` due to unknown URL
[yt-dlp.git] / yt_dlp / extractor / chingari.py
CommitLineData
d1a77684
AG
1# coding: utf-8
2from __future__ import unicode_literals
3
4import itertools
5import json
6
7from .common import InfoExtractor
8from ..compat import compat_urllib_parse_unquote_plus
9from ..utils import (
10 clean_html,
11 ExtractorError,
12 int_or_none,
13 str_to_int,
14 url_or_none,
15)
16
17
18class ChingariBaseIE(InfoExtractor):
19 def _get_post(self, id, post_data):
20 media_data = post_data['mediaLocation']
21 base_url = media_data['base']
22 author_data = post_data.get('authorData', {})
23 song_data = post_data.get('song', {}) # revist this in future for differentiating b/w 'art' and 'author'
24
25 formats = [{
26 'format_id': frmt,
27 'width': str_to_int(frmt[1:]),
28 'url': base_url + frmt_path,
29 } for frmt, frmt_path in media_data.get('transcoded', {}).items()]
30
31 if media_data.get('path'):
32 formats.append({
33 'format_id': 'original',
34 'format_note': 'Direct video.',
35 'url': base_url + '/apipublic' + media_data['path'],
36 'quality': 10,
37 })
38 self._sort_formats(formats)
39 timestamp = str_to_int(post_data.get('created_at'))
40 if timestamp:
41 timestamp = int_or_none(timestamp, 1000)
42
43 thumbnail, uploader_url = None, None
44 if media_data.get('thumbnail'):
45 thumbnail = base_url + media_data.get('thumbnail')
46 if author_data.get('username'):
47 uploader_url = 'https://chingari.io/' + author_data.get('username')
48
49 return {
50 'id': id,
51 'title': compat_urllib_parse_unquote_plus(clean_html(post_data.get('caption'))),
52 'description': compat_urllib_parse_unquote_plus(clean_html(post_data.get('caption'))),
53 'duration': media_data.get('duration'),
54 'thumbnail': url_or_none(thumbnail),
55 'like_count': post_data.get('likeCount'),
56 'view_count': post_data.get('viewsCount'),
57 'comment_count': post_data.get('commentCount'),
58 'repost_count': post_data.get('shareCount'),
59 'timestamp': timestamp,
60 'uploader_id': post_data.get('userId') or author_data.get('_id'),
61 'uploader': author_data.get('name'),
62 'uploader_url': url_or_none(uploader_url),
63 'track': song_data.get('title'),
64 'artist': song_data.get('author'),
65 'formats': formats,
66 }
67
68
69class ChingariIE(ChingariBaseIE):
73f035e1 70 _VALID_URL = r'https?://(?:www\.)?chingari\.io/share/post\?id=(?P<id>[^&/#?]+)'
d1a77684
AG
71 _TESTS = [{
72 'url': 'https://chingari.io/share/post?id=612f8f4ce1dc57090e8a7beb',
73 'info_dict': {
74 'id': '612f8f4ce1dc57090e8a7beb',
75 'ext': 'mp4',
76 'title': 'Happy birthday Srila Prabhupada',
77 'description': 'md5:c7080ebfdfeb06016e638c286d6bc3fa',
78 'duration': 0,
79 'thumbnail': 'https://media.chingari.io/uploads/c41d30e2-06b6-4e3b-9b4b-edbb929cec06-1630506826911/thumbnail/198f993f-ce87-4623-82c6-cd071bd6d4f4-1630506828016.jpg',
80 'like_count': int,
81 'view_count': int,
82 'comment_count': int,
83 'repost_count': int,
84 'timestamp': 1630506828,
85 'upload_date': '20210901',
86 'uploader_id': '5f0403982c8bd344f4813f8c',
87 'uploader': 'ISKCON,Inc.',
88 'uploader_url': 'https://chingari.io/iskcon,inc',
89 'track': None,
90 'artist': None,
91 },
92 'params': {'skip_download': True}
93 }]
94
95 def _real_extract(self, url):
96 id = self._match_id(url)
97 post_json = self._download_json(f'https://api.chingari.io/post/post_details/{id}', id)
98 if post_json['code'] != 200:
99 raise ExtractorError(post_json['message'], expected=True)
100 post_data = post_json['data']
101 return self._get_post(id, post_data)
102
103
104class ChingariUserIE(ChingariBaseIE):
73f035e1 105 _VALID_URL = r'https?://(?:www\.)?chingari\.io/(?!share/post)(?P<id>[^/?]+)'
d1a77684
AG
106 _TESTS = [{
107 'url': 'https://chingari.io/dada1023',
108 'playlist_mincount': 3,
109 'info_dict': {
110 'id': 'dada1023',
111 },
112 'entries': [{
113 'url': 'https://chingari.io/share/post?id=614781f3ade60b3a0bfff42a',
114 'info_dict': {
115 'id': '614781f3ade60b3a0bfff42a',
116 'ext': 'mp4',
117 'title': '#chingaribappa ',
118 'description': 'md5:d1df21d84088770468fa63afe3b17857',
119 'duration': 7,
120 'thumbnail': 'https://media.chingari.io/uploads/346d86d4-abb2-474e-a164-ffccf2bbcb72-1632076273717/thumbnail/b0b3aac2-2b86-4dd1-909d-9ed6e57cf77c-1632076275552.jpg',
121 'like_count': int,
122 'view_count': int,
123 'comment_count': int,
124 'repost_count': int,
125 'timestamp': 1632076275,
126 'upload_date': '20210919',
127 'uploader_id': '5efc4b12cca35c3d1794c2d3',
128 'uploader': 'dada (girish) dhawale',
129 'uploader_url': 'https://chingari.io/dada1023',
130 'track': None,
131 'artist': None
132 },
133 'params': {'skip_download': True}
134 }, {
135 'url': 'https://chingari.io/share/post?id=6146b132bcbf860959e12cba',
136 'info_dict': {
137 'id': '6146b132bcbf860959e12cba',
138 'ext': 'mp4',
139 'title': 'Tactor harvesting',
140 'description': 'md5:8403f12dce68828b77ecee7eb7e887b7',
141 'duration': 59.3,
142 'thumbnail': 'https://media.chingari.io/uploads/b353ca70-7a87-400d-93a6-fa561afaec86-1632022814584/thumbnail/c09302e3-2043-41b1-a2fe-77d97e5bd676-1632022834260.jpg',
143 'like_count': int,
144 'view_count': int,
145 'comment_count': int,
146 'repost_count': int,
147 'timestamp': 1632022834,
148 'upload_date': '20210919',
149 'uploader_id': '5efc4b12cca35c3d1794c2d3',
150 'uploader': 'dada (girish) dhawale',
151 'uploader_url': 'https://chingari.io/dada1023',
152 'track': None,
153 'artist': None
154 },
155 'params': {'skip_download': True}
156 }, {
157 'url': 'https://chingari.io/share/post?id=6145651b74cb030a64c40b82',
158 'info_dict': {
159 'id': '6145651b74cb030a64c40b82',
160 'ext': 'mp4',
161 'title': '#odiabhajan ',
162 'description': 'md5:687ea36835b9276cf2af90f25e7654cb',
163 'duration': 56.67,
164 'thumbnail': 'https://media.chingari.io/uploads/6cbf216b-babc-4cce-87fe-ceaac8d706ac-1631937782708/thumbnail/8855754f-6669-48ce-b269-8cc0699ed6da-1631937819522.jpg',
165 'like_count': int,
166 'view_count': int,
167 'comment_count': int,
168 'repost_count': int,
169 'timestamp': 1631937819,
170 'upload_date': '20210918',
171 'uploader_id': '5efc4b12cca35c3d1794c2d3',
172 'uploader': 'dada (girish) dhawale',
173 'uploader_url': 'https://chingari.io/dada1023',
174 'track': None,
175 'artist': None
176 },
177 'params': {'skip_download': True}
178 }],
179 }, {
180 'url': 'https://chingari.io/iskcon%2Cinc',
181 'playlist_mincount': 1025,
182 'info_dict': {
183 'id': 'iskcon%2Cinc',
184 },
185 }]
186
187 def _entries(self, id):
188 skip = 0
189 has_more = True
190 for page in itertools.count():
191 posts = self._download_json('https://api.chingari.io/users/getPosts', id,
192 data=json.dumps({'userId': id, 'ownerId': id, 'skip': skip, 'limit': 20}).encode(),
193 headers={'content-type': 'application/json;charset=UTF-8'},
194 note='Downloading page %s' % page)
195 for post in posts.get('data', []):
196 post_data = post['post']
197 yield self._get_post(post_data['_id'], post_data)
198 skip += 20
199 has_more = posts['hasMoreData']
200 if not has_more:
201 break
202
203 def _real_extract(self, url):
204 alt_id = self._match_id(url)
205 post_json = self._download_json(f'https://api.chingari.io/user/{alt_id}', alt_id)
206 if post_json['code'] != 200:
207 raise ExtractorError(post_json['message'], expected=True)
208 id = post_json['data']['_id']
209 return self.playlist_result(self._entries(id), playlist_id=alt_id)