yt_dlp/extractor/trovo.py

   1 # coding: utf-8
   2 from __future__ import unicode_literals
   3
   4 import itertools
   5 import json
   6
   7 from .common import InfoExtractor
   8 from ..utils import (
   9     ExtractorError,
  10     int_or_none,
  11     str_or_none,
  12     try_get,
  13 )
  14
  15
  16 class TrovoBaseIE(InfoExtractor):
  17     _VALID_URL_BASE = r'https?://(?:www\.)?trovo\.live/'
  18     _HEADERS = {'Origin': 'https://trovo.live'}
  19
  20     def _call_api(self, video_id, query=None, data=None):
  21         return self._download_json(
  22             'https://gql.trovo.live/', video_id, query=query, data=data,
  23             headers={'Accept': 'application/json'})
  24
  25     def _extract_streamer_info(self, data):
  26         streamer_info = data.get('streamerInfo') or {}
  27         username = streamer_info.get('userName')
  28         return {
  29             'uploader': streamer_info.get('nickName'),
  30             'uploader_id': str_or_none(streamer_info.get('uid')),
  31             'uploader_url': 'https://trovo.live/' + username if username else None,
  32         }
  33
  34
  35 class TrovoIE(TrovoBaseIE):
  36     _VALID_URL = TrovoBaseIE._VALID_URL_BASE + r'(?!(?:clip|video)/)(?P<id>[^/?&#]+)'
  37
  38     def _real_extract(self, url):
  39         username = self._match_id(url)
  40         live_info = self._call_api(username, query={
  41             'query': '''{
  42   getLiveInfo(params: {userName: "%s"}) {
  43     isLive
  44     programInfo {
  45       coverUrl
  46       id
  47       streamInfo {
  48         desc
  49         playUrl
  50       }
  51       title
  52     }
  53     streamerInfo {
  54         nickName
  55         uid
  56         userName
  57     }
  58   }
  59 }''' % username,
  60         })['data']['getLiveInfo']
  61         if live_info.get('isLive') == 0:
  62             raise ExtractorError('%s is offline' % username, expected=True)
  63         program_info = live_info['programInfo']
  64         program_id = program_info['id']
  65         title = program_info['title']
  66
  67         formats = []
  68         for stream_info in (program_info.get('streamInfo') or []):
  69             play_url = stream_info.get('playUrl')
  70             if not play_url:
  71                 continue
  72             format_id = stream_info.get('desc')
  73             formats.append({
  74                 'format_id': format_id,
  75                 'height': int_or_none(format_id[:-1]) if format_id else None,
  76                 'url': play_url,
  77                 'http_headers': self._HEADERS,
  78             })
  79         self._sort_formats(formats)
  80
  81         info = {
  82             'id': program_id,
  83             'title': title,
  84             'formats': formats,
  85             'thumbnail': program_info.get('coverUrl'),
  86             'is_live': True,
  87         }
  88         info.update(self._extract_streamer_info(live_info))
  89         return info
  90
  91
  92 class TrovoVodIE(TrovoBaseIE):
  93     _VALID_URL = TrovoBaseIE._VALID_URL_BASE + r'(?:clip|video)/(?P<id>[^/?&#]+)'
  94     _TESTS = [{
  95         'url': 'https://trovo.live/video/ltv-100095501_100095501_1609596043',
  96         'info_dict': {
  97             'id': 'ltv-100095501_100095501_1609596043',
  98             'ext': 'mp4',
  99             'title': 'Spontaner 12 Stunden Stream! - Ok Boomer!',
 100             'uploader': 'Exsl',
 101             'timestamp': 1609640305,
 102             'upload_date': '20210103',
 103             'uploader_id': '100095501',
 104             'duration': 43977,
 105             'view_count': int,
 106             'like_count': int,
 107             'comment_count': int,
 108             'comments': 'mincount:8',
 109             'categories': ['Grand Theft Auto V'],
 110         },
 111         'skip': '404'
 112     }, {
 113         'url': 'https://trovo.live/clip/lc-5285890810184026005',
 114         'only_matching': True,
 115     }]
 116
 117     def _real_extract(self, url):
 118         vid = self._match_id(url)
 119         resp = self._call_api(vid, data=json.dumps([{
 120             'query': '''{
 121   batchGetVodDetailInfo(params: {vids: ["%s"]}) {
 122     VodDetailInfos
 123   }
 124 }''' % vid,
 125         }, {
 126             'query': '''{
 127   getCommentList(params: {appInfo: {postID: "%s"}, pageSize: 1000000000, preview: {}}) {
 128     commentList {
 129       author {
 130         nickName
 131         uid
 132       }
 133       commentID
 134       content
 135       createdAt
 136       parentID
 137     }
 138   }
 139 }''' % vid,
 140         }]).encode())
 141         vod_detail_info = resp[0]['data']['batchGetVodDetailInfo']['VodDetailInfos'][vid]
 142         vod_info = vod_detail_info['vodInfo']
 143         title = vod_info['title']
 144
 145         language = vod_info.get('languageName')
 146         formats = []
 147         for play_info in (vod_info.get('playInfos') or []):
 148             play_url = play_info.get('playUrl')
 149             if not play_url:
 150                 continue
 151             format_id = play_info.get('desc')
 152             formats.append({
 153                 'ext': 'mp4',
 154                 'filesize': int_or_none(play_info.get('fileSize')),
 155                 'format_id': format_id,
 156                 'height': int_or_none(format_id[:-1]) if format_id else None,
 157                 'language': language,
 158                 'protocol': 'm3u8_native',
 159                 'tbr': int_or_none(play_info.get('bitrate')),
 160                 'url': play_url,
 161                 'http_headers': self._HEADERS,
 162             })
 163         self._sort_formats(formats)
 164
 165         category = vod_info.get('categoryName')
 166         get_count = lambda x: int_or_none(vod_info.get(x + 'Num'))
 167
 168         comment_list = try_get(resp, lambda x: x[1]['data']['getCommentList']['commentList'], list) or []
 169         comments = []
 170         for comment in comment_list:
 171             content = comment.get('content')
 172             if not content:
 173                 continue
 174             author = comment.get('author') or {}
 175             parent = comment.get('parentID')
 176             comments.append({
 177                 'author': author.get('nickName'),
 178                 'author_id': str_or_none(author.get('uid')),
 179                 'id': str_or_none(comment.get('commentID')),
 180                 'text': content,
 181                 'timestamp': int_or_none(comment.get('createdAt')),
 182                 'parent': 'root' if parent == 0 else str_or_none(parent),
 183             })
 184
 185         info = {
 186             'id': vid,
 187             'title': title,
 188             'formats': formats,
 189             'thumbnail': vod_info.get('coverUrl'),
 190             'timestamp': int_or_none(vod_info.get('publishTs')),
 191             'duration': int_or_none(vod_info.get('duration')),
 192             'view_count': get_count('watch'),
 193             'like_count': get_count('like'),
 194             'comment_count': get_count('comment'),
 195             'comments': comments,
 196             'categories': [category] if category else None,
 197         }
 198         info.update(self._extract_streamer_info(vod_detail_info))
 199         return info
 200
 201
 202 class TrovoChannelBaseIE(TrovoBaseIE):
 203     def _get_vod_json(self, page, uid):
 204         raise NotImplementedError('This method must be implemented by subclasses')
 205
 206     def _entries(self, uid):
 207         for page in itertools.count(1):
 208             vod_json = self._get_vod_json(page, uid)
 209             vods = vod_json.get('vodInfos', [])
 210             for vod in vods:
 211                 yield self.url_result(
 212                     'https://trovo.live/%s/%s' % (self._TYPE, vod.get('vid')),
 213                     ie=TrovoVodIE.ie_key())
 214             has_more = vod_json['hasMore']
 215             if not has_more:
 216                 break
 217
 218     def _real_extract(self, url):
 219         id = self._match_id(url)
 220         uid = str(self._call_api(id, query={
 221             'query': '{getLiveInfo(params:{userName:"%s"}){streamerInfo{uid}}}' % id
 222         })['data']['getLiveInfo']['streamerInfo']['uid'])
 223         return self.playlist_result(self._entries(uid), playlist_id=uid)
 224
 225
 226 class TrovoChannelVodIE(TrovoChannelBaseIE):
 227     _VALID_URL = r'trovovod:(?P<id>[^\s]+)'
 228     IE_DESC = 'All VODs of a trovo.live channel; "trovovod:" prefix'
 229
 230     _TESTS = [{
 231         'url': 'trovovod:OneTappedYou',
 232         'playlist_mincount': 24,
 233         'info_dict': {
 234             'id': '100719456',
 235         },
 236     }]
 237
 238     _QUERY = '{getChannelLtvVideoInfos(params:{pageSize:99,currPage:%d,channelID:%s}){hasMore,vodInfos{vid}}}'
 239     _TYPE = 'video'
 240
 241     def _get_vod_json(self, page, uid):
 242         return self._call_api(uid, query={
 243             'query': self._QUERY % (page, uid)
 244         })['data']['getChannelLtvVideoInfos']
 245
 246
 247 class TrovoChannelClipIE(TrovoChannelBaseIE):
 248     _VALID_URL = r'trovoclip:(?P<id>[^\s]+)'
 249     IE_DESC = 'All Clips of a trovo.live channel; "trovoclip:" prefix'
 250
 251     _TESTS = [{
 252         'url': 'trovoclip:OneTappedYou',
 253         'playlist_mincount': 29,
 254         'info_dict': {
 255             'id': '100719456',
 256         },
 257     }]
 258
 259     _QUERY = '{getChannelClipVideoInfos(params:{pageSize:99,currPage:%d,channelID:%s,albumType:VOD_CLIP_ALBUM_TYPE_LATEST}){hasMore,vodInfos{vid}}}'
 260     _TYPE = 'clip'
 261
 262     def _get_vod_json(self, page, uid):
 263         return self._call_api(uid, query={
 264             'query': self._QUERY % (page, uid)
 265         })['data']['getChannelClipVideoInfos']