yt_dlp/extractor/trovo.py

   1 # coding: utf-8
   2 from __future__ import unicode_literals
   3
   4 import itertools
   5 import json
   6
   7 from .common import InfoExtractor
   8 from ..utils import (
   9     ExtractorError,
  10     int_or_none,
  11     str_or_none,
  12     try_get,
  13 )
  14
  15
  16 class TrovoBaseIE(InfoExtractor):
  17     _VALID_URL_BASE = r'https?://(?:www\.)?trovo\.live/'
  18     _HEADERS = {'Origin': 'https://trovo.live'}
  19
  20     def _extract_streamer_info(self, data):
  21         streamer_info = data.get('streamerInfo') or {}
  22         username = streamer_info.get('userName')
  23         return {
  24             'uploader': streamer_info.get('nickName'),
  25             'uploader_id': str_or_none(streamer_info.get('uid')),
  26             'uploader_url': 'https://trovo.live/' + username if username else None,
  27         }
  28
  29
  30 class TrovoIE(TrovoBaseIE):
  31     _VALID_URL = TrovoBaseIE._VALID_URL_BASE + r'(?!(?:clip|video)/)(?P<id>[^/?&#]+)'
  32
  33     def _real_extract(self, url):
  34         username = self._match_id(url)
  35         live_info = self._download_json(
  36             'https://gql.trovo.live/', username, query={
  37                 'query': '''{
  38   getLiveInfo(params: {userName: "%s"}) {
  39     isLive
  40     programInfo {
  41       coverUrl
  42       id
  43       streamInfo {
  44         desc
  45         playUrl
  46       }
  47       title
  48     }
  49     streamerInfo {
  50         nickName
  51         uid
  52         userName
  53     }
  54   }
  55 }''' % username,
  56             })['data']['getLiveInfo']
  57         if live_info.get('isLive') == 0:
  58             raise ExtractorError('%s is offline' % username, expected=True)
  59         program_info = live_info['programInfo']
  60         program_id = program_info['id']
  61         title = self._live_title(program_info['title'])
  62
  63         formats = []
  64         for stream_info in (program_info.get('streamInfo') or []):
  65             play_url = stream_info.get('playUrl')
  66             if not play_url:
  67                 continue
  68             format_id = stream_info.get('desc')
  69             formats.append({
  70                 'format_id': format_id,
  71                 'height': int_or_none(format_id[:-1]) if format_id else None,
  72                 'url': play_url,
  73                 'http_headers': self._HEADERS,
  74             })
  75         self._sort_formats(formats)
  76
  77         info = {
  78             'id': program_id,
  79             'title': title,
  80             'formats': formats,
  81             'thumbnail': program_info.get('coverUrl'),
  82             'is_live': True,
  83         }
  84         info.update(self._extract_streamer_info(live_info))
  85         return info
  86
  87
  88 class TrovoVodIE(TrovoBaseIE):
  89     _VALID_URL = TrovoBaseIE._VALID_URL_BASE + r'(?:clip|video)/(?P<id>[^/?&#]+)'
  90     _TESTS = [{
  91         'url': 'https://trovo.live/video/ltv-100095501_100095501_1609596043',
  92         'info_dict': {
  93             'id': 'ltv-100095501_100095501_1609596043',
  94             'ext': 'mp4',
  95             'title': 'Spontaner 12 Stunden Stream! - Ok Boomer!',
  96             'uploader': 'Exsl',
  97             'timestamp': 1609640305,
  98             'upload_date': '20210103',
  99             'uploader_id': '100095501',
 100             'duration': 43977,
 101             'view_count': int,
 102             'like_count': int,
 103             'comment_count': int,
 104             'comments': 'mincount:8',
 105             'categories': ['Grand Theft Auto V'],
 106         },
 107     }, {
 108         'url': 'https://trovo.live/clip/lc-5285890810184026005',
 109         'only_matching': True,
 110     }]
 111
 112     def _real_extract(self, url):
 113         vid = self._match_id(url)
 114         resp = self._download_json(
 115             'https://gql.trovo.live/', vid, data=json.dumps([{
 116                 'query': '''{
 117   batchGetVodDetailInfo(params: {vids: ["%s"]}) {
 118     VodDetailInfos
 119   }
 120 }''' % vid,
 121             }, {
 122                 'query': '''{
 123   getCommentList(params: {appInfo: {postID: "%s"}, pageSize: 1000000000, preview: {}}) {
 124     commentList {
 125       author {
 126         nickName
 127         uid
 128       }
 129       commentID
 130       content
 131       createdAt
 132       parentID
 133     }
 134   }
 135 }''' % vid,
 136             }]).encode(), headers={
 137                 'Content-Type': 'application/json',
 138             })
 139         vod_detail_info = resp[0]['data']['batchGetVodDetailInfo']['VodDetailInfos'][vid]
 140         vod_info = vod_detail_info['vodInfo']
 141         title = vod_info['title']
 142
 143         language = vod_info.get('languageName')
 144         formats = []
 145         for play_info in (vod_info.get('playInfos') or []):
 146             play_url = play_info.get('playUrl')
 147             if not play_url:
 148                 continue
 149             format_id = play_info.get('desc')
 150             formats.append({
 151                 'ext': 'mp4',
 152                 'filesize': int_or_none(play_info.get('fileSize')),
 153                 'format_id': format_id,
 154                 'height': int_or_none(format_id[:-1]) if format_id else None,
 155                 'language': language,
 156                 'protocol': 'm3u8_native',
 157                 'tbr': int_or_none(play_info.get('bitrate')),
 158                 'url': play_url,
 159                 'http_headers': self._HEADERS,
 160             })
 161         self._sort_formats(formats)
 162
 163         category = vod_info.get('categoryName')
 164         get_count = lambda x: int_or_none(vod_info.get(x + 'Num'))
 165
 166         comment_list = try_get(resp, lambda x: x[1]['data']['getCommentList']['commentList'], list) or []
 167         comments = []
 168         for comment in comment_list:
 169             content = comment.get('content')
 170             if not content:
 171                 continue
 172             author = comment.get('author') or {}
 173             parent = comment.get('parentID')
 174             comments.append({
 175                 'author': author.get('nickName'),
 176                 'author_id': str_or_none(author.get('uid')),
 177                 'id': str_or_none(comment.get('commentID')),
 178                 'text': content,
 179                 'timestamp': int_or_none(comment.get('createdAt')),
 180                 'parent': 'root' if parent == 0 else str_or_none(parent),
 181             })
 182
 183         info = {
 184             'id': vid,
 185             'title': title,
 186             'formats': formats,
 187             'thumbnail': vod_info.get('coverUrl'),
 188             'timestamp': int_or_none(vod_info.get('publishTs')),
 189             'duration': int_or_none(vod_info.get('duration')),
 190             'view_count': get_count('watch'),
 191             'like_count': get_count('like'),
 192             'comment_count': get_count('comment'),
 193             'comments': comments,
 194             'categories': [category] if category else None,
 195         }
 196         info.update(self._extract_streamer_info(vod_detail_info))
 197         return info
 198
 199
 200 class TrovoChannelBaseIE(InfoExtractor):
 201     def _get_vod_json(self, page, uid):
 202         raise NotImplementedError('This method must be implemented by subclasses')
 203
 204     def _entries(self, uid):
 205         for page in itertools.count(1):
 206             vod_json = self._get_vod_json(page, uid)
 207             vods = vod_json.get('vodInfos', [])
 208             for vod in vods:
 209                 yield self.url_result(
 210                     'https://trovo.live/%s/%s' % (self._TYPE, vod.get('vid')),
 211                     ie=TrovoVodIE.ie_key())
 212             has_more = vod_json['hasMore']
 213             if not has_more:
 214                 break
 215
 216     def _real_extract(self, url):
 217         id = self._match_id(url)
 218         uid = str(self._download_json('https://gql.trovo.live/', id, query={
 219             'query': '{getLiveInfo(params:{userName:"%s"}){streamerInfo{uid}}}' % id
 220         })['data']['getLiveInfo']['streamerInfo']['uid'])
 221         return self.playlist_result(self._entries(uid), playlist_id=uid)
 222
 223
 224 class TrovoChannelVodIE(TrovoChannelBaseIE):
 225     _VALID_URL = r'trovovod:(?P<id>[^\s]+)'
 226     IE_DESC = 'All VODs of a trovo.live channel; "trovovod:" prefix'
 227
 228     _TESTS = [{
 229         'url': 'trovovod:OneTappedYou',
 230         'playlist_mincount': 24,
 231         'info_dict': {
 232             'id': '100719456',
 233         },
 234     }]
 235
 236     _QUERY = '{getChannelLtvVideoInfos(params:{pageSize:99,currPage:%d,channelID:%s}){hasMore,vodInfos{vid}}}'
 237     _TYPE = 'video'
 238
 239     def _get_vod_json(self, page, uid):
 240         return self._download_json('https://gql.trovo.live/', uid, query={
 241             'query': self._QUERY % (page, uid)
 242         })['data']['getChannelLtvVideoInfos']
 243
 244
 245 class TrovoChannelClipIE(TrovoChannelBaseIE):
 246     _VALID_URL = r'trovoclip:(?P<id>[^\s]+)'
 247     IE_DESC = 'All Clips of a trovo.live channel; "trovoclip:" prefix'
 248
 249     _TESTS = [{
 250         'url': 'trovoclip:OneTappedYou',
 251         'playlist_mincount': 29,
 252         'info_dict': {
 253             'id': '100719456',
 254         },
 255     }]
 256
 257     _QUERY = '{getChannelClipVideoInfos(params:{pageSize:99,currPage:%d,channelID:%s,albumType:VOD_CLIP_ALBUM_TYPE_LATEST}){hasMore,vodInfos{vid}}}'
 258     _TYPE = 'clip'
 259
 260     def _get_vod_json(self, page, uid):
 261         return self._download_json('https://gql.trovo.live/', uid, query={
 262             'query': self._QUERY % (page, uid)
 263         })['data']['getChannelClipVideoInfos']