]> jfr.im git - yt-dlp.git/blame - yt_dlp/extractor/trovo.py
[YoutubeDL] Write verbose header to logger
[yt-dlp.git] / yt_dlp / extractor / trovo.py
CommitLineData
a820dc72
RA
1# coding: utf-8
2from __future__ import unicode_literals
3
4import json
5
6from .common import InfoExtractor
7from ..utils import (
8 ExtractorError,
9 int_or_none,
10 str_or_none,
11 try_get,
12)
13
14
15class TrovoBaseIE(InfoExtractor):
16 _VALID_URL_BASE = r'https?://(?:www\.)?trovo\.live/'
36147a63 17 _HEADERS = {'Origin': 'https://trovo.live'}
a820dc72
RA
18
19 def _extract_streamer_info(self, data):
20 streamer_info = data.get('streamerInfo') or {}
21 username = streamer_info.get('userName')
22 return {
23 'uploader': streamer_info.get('nickName'),
24 'uploader_id': str_or_none(streamer_info.get('uid')),
25 'uploader_url': 'https://trovo.live/' + username if username else None,
26 }
27
28
29class TrovoIE(TrovoBaseIE):
30 _VALID_URL = TrovoBaseIE._VALID_URL_BASE + r'(?!(?:clip|video)/)(?P<id>[^/?&#]+)'
31
32 def _real_extract(self, url):
33 username = self._match_id(url)
34 live_info = self._download_json(
35 'https://gql.trovo.live/', username, query={
36 'query': '''{
37 getLiveInfo(params: {userName: "%s"}) {
38 isLive
5d62709b 39 programInfo {
a820dc72
RA
40 coverUrl
41 id
42 streamInfo {
43 desc
44 playUrl
45 }
46 title
47 }
48 streamerInfo {
49 nickName
50 uid
51 userName
52 }
53 }
54}''' % username,
55 })['data']['getLiveInfo']
56 if live_info.get('isLive') == 0:
57 raise ExtractorError('%s is offline' % username, expected=True)
58 program_info = live_info['programInfo']
59 program_id = program_info['id']
60 title = self._live_title(program_info['title'])
61
62 formats = []
63 for stream_info in (program_info.get('streamInfo') or []):
64 play_url = stream_info.get('playUrl')
65 if not play_url:
66 continue
67 format_id = stream_info.get('desc')
68 formats.append({
69 'format_id': format_id,
70 'height': int_or_none(format_id[:-1]) if format_id else None,
71 'url': play_url,
36147a63 72 'http_headers': self._HEADERS,
a820dc72
RA
73 })
74 self._sort_formats(formats)
75
76 info = {
77 'id': program_id,
78 'title': title,
79 'formats': formats,
80 'thumbnail': program_info.get('coverUrl'),
81 'is_live': True,
82 }
83 info.update(self._extract_streamer_info(live_info))
84 return info
85
86
87class TrovoVodIE(TrovoBaseIE):
88 _VALID_URL = TrovoBaseIE._VALID_URL_BASE + r'(?:clip|video)/(?P<id>[^/?&#]+)'
89 _TESTS = [{
90 'url': 'https://trovo.live/video/ltv-100095501_100095501_1609596043',
91 'info_dict': {
92 'id': 'ltv-100095501_100095501_1609596043',
93 'ext': 'mp4',
94 'title': 'Spontaner 12 Stunden Stream! - Ok Boomer!',
95 'uploader': 'Exsl',
96 'timestamp': 1609640305,
97 'upload_date': '20210103',
98 'uploader_id': '100095501',
99 'duration': 43977,
100 'view_count': int,
101 'like_count': int,
102 'comment_count': int,
103 'comments': 'mincount:8',
104 'categories': ['Grand Theft Auto V'],
105 },
106 }, {
107 'url': 'https://trovo.live/clip/lc-5285890810184026005',
108 'only_matching': True,
109 }]
110
111 def _real_extract(self, url):
112 vid = self._match_id(url)
113 resp = self._download_json(
114 'https://gql.trovo.live/', vid, data=json.dumps([{
115 'query': '''{
116 batchGetVodDetailInfo(params: {vids: ["%s"]}) {
117 VodDetailInfos
118 }
119}''' % vid,
120 }, {
121 'query': '''{
122 getCommentList(params: {appInfo: {postID: "%s"}, pageSize: 1000000000, preview: {}}) {
123 commentList {
124 author {
125 nickName
126 uid
127 }
128 commentID
129 content
130 createdAt
131 parentID
132 }
133 }
134}''' % vid,
135 }]).encode(), headers={
136 'Content-Type': 'application/json',
137 })
138 vod_detail_info = resp[0]['data']['batchGetVodDetailInfo']['VodDetailInfos'][vid]
139 vod_info = vod_detail_info['vodInfo']
140 title = vod_info['title']
141
142 language = vod_info.get('languageName')
143 formats = []
144 for play_info in (vod_info.get('playInfos') or []):
145 play_url = play_info.get('playUrl')
146 if not play_url:
147 continue
148 format_id = play_info.get('desc')
149 formats.append({
150 'ext': 'mp4',
151 'filesize': int_or_none(play_info.get('fileSize')),
152 'format_id': format_id,
153 'height': int_or_none(format_id[:-1]) if format_id else None,
154 'language': language,
155 'protocol': 'm3u8_native',
156 'tbr': int_or_none(play_info.get('bitrate')),
157 'url': play_url,
36147a63 158 'http_headers': self._HEADERS,
a820dc72
RA
159 })
160 self._sort_formats(formats)
161
162 category = vod_info.get('categoryName')
163 get_count = lambda x: int_or_none(vod_info.get(x + 'Num'))
164
165 comment_list = try_get(resp, lambda x: x[1]['data']['getCommentList']['commentList'], list) or []
166 comments = []
167 for comment in comment_list:
168 content = comment.get('content')
169 if not content:
170 continue
171 author = comment.get('author') or {}
172 parent = comment.get('parentID')
173 comments.append({
174 'author': author.get('nickName'),
175 'author_id': str_or_none(author.get('uid')),
176 'id': str_or_none(comment.get('commentID')),
177 'text': content,
178 'timestamp': int_or_none(comment.get('createdAt')),
179 'parent': 'root' if parent == 0 else str_or_none(parent),
180 })
181
182 info = {
183 'id': vid,
184 'title': title,
185 'formats': formats,
186 'thumbnail': vod_info.get('coverUrl'),
187 'timestamp': int_or_none(vod_info.get('publishTs')),
188 'duration': int_or_none(vod_info.get('duration')),
189 'view_count': get_count('watch'),
190 'like_count': get_count('like'),
191 'comment_count': get_count('comment'),
192 'comments': comments,
193 'categories': [category] if category else None,
194 }
195 info.update(self._extract_streamer_info(vod_detail_info))
196 return info