yt_dlp/extractor/chingari.py

   1 import itertools
   2 import json
   3 import urllib.parse
   4
   5 from .common import InfoExtractor
   6 from ..utils import (
   7     ExtractorError,
   8     clean_html,
   9     int_or_none,
  10     str_to_int,
  11     url_or_none,
  12 )
  13
  14
  15 class ChingariBaseIE(InfoExtractor):
  16     def _get_post(self, id, post_data):
  17         media_data = post_data['mediaLocation']
  18         base_url = media_data['base']
  19         author_data = post_data.get('authorData', {})
  20         song_data = post_data.get('song', {})  # revist this in future for differentiating b/w 'art' and 'author'
  21
  22         formats = [{
  23             'format_id': frmt,
  24             'width': str_to_int(frmt[1:]),
  25             'url': base_url + frmt_path,
  26         } for frmt, frmt_path in media_data.get('transcoded', {}).items()]
  27
  28         if media_data.get('path'):
  29             formats.append({
  30                 'format_id': 'original',
  31                 'format_note': 'Direct video.',
  32                 'url': base_url + '/apipublic' + media_data['path'],
  33                 'quality': 10,
  34             })
  35         self._sort_formats(formats)
  36         timestamp = str_to_int(post_data.get('created_at'))
  37         if timestamp:
  38             timestamp = int_or_none(timestamp, 1000)
  39
  40         thumbnail, uploader_url = None, None
  41         if media_data.get('thumbnail'):
  42             thumbnail = base_url + media_data.get('thumbnail')
  43         if author_data.get('username'):
  44             uploader_url = 'https://chingari.io/' + author_data.get('username')
  45
  46         return {
  47             'id': id,
  48             'extractor_key': ChingariIE.ie_key(),
  49             'extractor': 'Chingari',
  50             'title': urllib.parse.unquote_plus(clean_html(post_data.get('caption'))),
  51             'description': urllib.parse.unquote_plus(clean_html(post_data.get('caption'))),
  52             'duration': media_data.get('duration'),
  53             'thumbnail': url_or_none(thumbnail),
  54             'like_count': post_data.get('likeCount'),
  55             'view_count': post_data.get('viewsCount'),
  56             'comment_count': post_data.get('commentCount'),
  57             'repost_count': post_data.get('shareCount'),
  58             'timestamp': timestamp,
  59             'uploader_id': post_data.get('userId') or author_data.get('_id'),
  60             'uploader': author_data.get('name'),
  61             'uploader_url': url_or_none(uploader_url),
  62             'track': song_data.get('title'),
  63             'artist': song_data.get('author'),
  64             'formats': formats,
  65         }
  66
  67
  68 class ChingariIE(ChingariBaseIE):
  69     _VALID_URL = r'https?://(?:www\.)?chingari\.io/share/post\?id=(?P<id>[^&/#?]+)'
  70     _TESTS = [{
  71         'url': 'https://chingari.io/share/post?id=612f8f4ce1dc57090e8a7beb',
  72         'info_dict': {
  73             'id': '612f8f4ce1dc57090e8a7beb',
  74             'ext': 'mp4',
  75             'title': 'Happy birthday Srila Prabhupada',
  76             'description': 'md5:c7080ebfdfeb06016e638c286d6bc3fa',
  77             'duration': 0,
  78             'thumbnail': 'https://media.chingari.io/uploads/c41d30e2-06b6-4e3b-9b4b-edbb929cec06-1630506826911/thumbnail/198f993f-ce87-4623-82c6-cd071bd6d4f4-1630506828016.jpg',
  79             'like_count': int,
  80             'view_count': int,
  81             'comment_count': int,
  82             'repost_count': int,
  83             'timestamp': 1630506828,
  84             'upload_date': '20210901',
  85             'uploader_id': '5f0403982c8bd344f4813f8c',
  86             'uploader': 'ISKCON,Inc.',
  87             'uploader_url': 'https://chingari.io/iskcon,inc',
  88             'track': None,
  89             'artist': None,
  90         },
  91         'params': {'skip_download': True}
  92     }]
  93
  94     def _real_extract(self, url):
  95         id = self._match_id(url)
  96         post_json = self._download_json(f'https://api.chingari.io/post/post_details/{id}', id)
  97         if post_json['code'] != 200:
  98             raise ExtractorError(post_json['message'], expected=True)
  99         post_data = post_json['data']
 100         return self._get_post(id, post_data)
 101
 102
 103 class ChingariUserIE(ChingariBaseIE):
 104     _VALID_URL = r'https?://(?:www\.)?chingari\.io/(?!share/post)(?P<id>[^/?]+)'
 105     _TESTS = [{
 106         'url': 'https://chingari.io/dada1023',
 107         'info_dict': {
 108             'id': 'dada1023',
 109         },
 110         'params': {'playlistend': 3},
 111         'playlist': [{
 112             'url': 'https://chingari.io/share/post?id=614781f3ade60b3a0bfff42a',
 113             'info_dict': {
 114                 'id': '614781f3ade60b3a0bfff42a',
 115                 'ext': 'mp4',
 116                 'title': '#chingaribappa ',
 117                 'description': 'md5:d1df21d84088770468fa63afe3b17857',
 118                 'duration': 7,
 119                 'thumbnail': 'https://media.chingari.io/uploads/346d86d4-abb2-474e-a164-ffccf2bbcb72-1632076273717/thumbnail/b0b3aac2-2b86-4dd1-909d-9ed6e57cf77c-1632076275552.jpg',
 120                 'like_count': int,
 121                 'view_count': int,
 122                 'comment_count': int,
 123                 'repost_count': int,
 124                 'timestamp': 1632076275,
 125                 'upload_date': '20210919',
 126                 'uploader_id': '5efc4b12cca35c3d1794c2d3',
 127                 'uploader': 'dada (girish) dhawale',
 128                 'uploader_url': 'https://chingari.io/dada1023',
 129                 'track': None,
 130                 'artist': None
 131             },
 132             'params': {'skip_download': True}
 133         }, {
 134             'url': 'https://chingari.io/share/post?id=6146b132bcbf860959e12cba',
 135             'info_dict': {
 136                 'id': '6146b132bcbf860959e12cba',
 137                 'ext': 'mp4',
 138                 'title': 'Tactor harvesting',
 139                 'description': 'md5:8403f12dce68828b77ecee7eb7e887b7',
 140                 'duration': 59.3,
 141                 'thumbnail': 'https://media.chingari.io/uploads/b353ca70-7a87-400d-93a6-fa561afaec86-1632022814584/thumbnail/c09302e3-2043-41b1-a2fe-77d97e5bd676-1632022834260.jpg',
 142                 'like_count': int,
 143                 'view_count': int,
 144                 'comment_count': int,
 145                 'repost_count': int,
 146                 'timestamp': 1632022834,
 147                 'upload_date': '20210919',
 148                 'uploader_id': '5efc4b12cca35c3d1794c2d3',
 149                 'uploader': 'dada (girish) dhawale',
 150                 'uploader_url': 'https://chingari.io/dada1023',
 151                 'track': None,
 152                 'artist': None
 153             },
 154             'params': {'skip_download': True}
 155         }, {
 156             'url': 'https://chingari.io/share/post?id=6145651b74cb030a64c40b82',
 157             'info_dict': {
 158                 'id': '6145651b74cb030a64c40b82',
 159                 'ext': 'mp4',
 160                 'title': '#odiabhajan ',
 161                 'description': 'md5:687ea36835b9276cf2af90f25e7654cb',
 162                 'duration': 56.67,
 163                 'thumbnail': 'https://media.chingari.io/uploads/6cbf216b-babc-4cce-87fe-ceaac8d706ac-1631937782708/thumbnail/8855754f-6669-48ce-b269-8cc0699ed6da-1631937819522.jpg',
 164                 'like_count': int,
 165                 'view_count': int,
 166                 'comment_count': int,
 167                 'repost_count': int,
 168                 'timestamp': 1631937819,
 169                 'upload_date': '20210918',
 170                 'uploader_id': '5efc4b12cca35c3d1794c2d3',
 171                 'uploader': 'dada (girish) dhawale',
 172                 'uploader_url': 'https://chingari.io/dada1023',
 173                 'track': None,
 174                 'artist': None
 175             },
 176             'params': {'skip_download': True}
 177         }],
 178     }, {
 179         'url': 'https://chingari.io/iskcon%2Cinc',
 180         'playlist_mincount': 1025,
 181         'info_dict': {
 182             'id': 'iskcon%2Cinc',
 183         },
 184     }]
 185
 186     def _entries(self, id):
 187         skip = 0
 188         has_more = True
 189         for page in itertools.count():
 190             posts = self._download_json('https://api.chingari.io/users/getPosts', id,
 191                                         data=json.dumps({'userId': id, 'ownerId': id, 'skip': skip, 'limit': 20}).encode(),
 192                                         headers={'content-type': 'application/json;charset=UTF-8'},
 193                                         note='Downloading page %s' % page)
 194             for post in posts.get('data', []):
 195                 post_data = post['post']
 196                 yield self._get_post(post_data['_id'], post_data)
 197             skip += 20
 198             has_more = posts['hasMoreData']
 199             if not has_more:
 200                 break
 201
 202     def _real_extract(self, url):
 203         alt_id = self._match_id(url)
 204         post_json = self._download_json(f'https://api.chingari.io/user/{alt_id}', alt_id)
 205         if post_json['code'] != 200:
 206             raise ExtractorError(post_json['message'], expected=True)
 207         id = post_json['data']['_id']
 208         return self.playlist_result(self._entries(id), playlist_id=alt_id)