]> jfr.im git - yt-dlp.git/blob - yt_dlp/extractor/hitbox.py
[extractor/youtube] Parse translated subtitles only when requested
[yt-dlp.git] / yt_dlp / extractor / hitbox.py
1 import re
2
3 from .common import InfoExtractor
4 from ..compat import compat_str
5 from ..utils import (
6 clean_html,
7 determine_ext,
8 float_or_none,
9 int_or_none,
10 parse_iso8601,
11 )
12
13
14 class HitboxIE(InfoExtractor):
15 IE_NAME = 'hitbox'
16 _VALID_URL = r'https?://(?:www\.)?(?:hitbox|smashcast)\.tv/(?:[^/]+/)*videos?/(?P<id>[0-9]+)'
17 _TESTS = [{
18 'url': 'http://www.hitbox.tv/video/203213',
19 'info_dict': {
20 'id': '203213',
21 'title': 'hitbox @ gamescom, Sub Button Hype extended, Giveaway - hitbox News Update with Oxy',
22 'alt_title': 'hitboxlive - Aug 9th #6',
23 'description': '',
24 'ext': 'mp4',
25 'thumbnail': r're:^https?://.*\.jpg$',
26 'duration': 215.1666,
27 'resolution': 'HD 720p',
28 'uploader': 'hitboxlive',
29 'view_count': int,
30 'timestamp': 1407576133,
31 'upload_date': '20140809',
32 'categories': ['Live Show'],
33 },
34 'params': {
35 # m3u8 download
36 'skip_download': True,
37 },
38 }, {
39 'url': 'https://www.smashcast.tv/hitboxlive/videos/203213',
40 'only_matching': True,
41 }]
42
43 def _extract_metadata(self, url, video_id):
44 thumb_base = 'https://edge.sf.hitbox.tv'
45 metadata = self._download_json(
46 '%s/%s' % (url, video_id), video_id, 'Downloading metadata JSON')
47
48 date = 'media_live_since'
49 media_type = 'livestream'
50 if metadata.get('media_type') == 'video':
51 media_type = 'video'
52 date = 'media_date_added'
53
54 video_meta = metadata.get(media_type, [])[0]
55 title = video_meta.get('media_status')
56 alt_title = video_meta.get('media_title')
57 description = clean_html(
58 video_meta.get('media_description')
59 or video_meta.get('media_description_md'))
60 duration = float_or_none(video_meta.get('media_duration'))
61 uploader = video_meta.get('media_user_name')
62 views = int_or_none(video_meta.get('media_views'))
63 timestamp = parse_iso8601(video_meta.get(date), ' ')
64 categories = [video_meta.get('category_name')]
65 thumbs = [{
66 'url': thumb_base + video_meta.get('media_thumbnail'),
67 'width': 320,
68 'height': 180
69 }, {
70 'url': thumb_base + video_meta.get('media_thumbnail_large'),
71 'width': 768,
72 'height': 432
73 }]
74
75 return {
76 'id': video_id,
77 'title': title,
78 'alt_title': alt_title,
79 'description': description,
80 'ext': 'mp4',
81 'thumbnails': thumbs,
82 'duration': duration,
83 'uploader': uploader,
84 'view_count': views,
85 'timestamp': timestamp,
86 'categories': categories,
87 }
88
89 def _real_extract(self, url):
90 video_id = self._match_id(url)
91
92 player_config = self._download_json(
93 'https://www.smashcast.tv/api/player/config/video/%s' % video_id,
94 video_id, 'Downloading video JSON')
95
96 formats = []
97 for video in player_config['clip']['bitrates']:
98 label = video.get('label')
99 if label == 'Auto':
100 continue
101 video_url = video.get('url')
102 if not video_url:
103 continue
104 bitrate = int_or_none(video.get('bitrate'))
105 if determine_ext(video_url) == 'm3u8':
106 if not video_url.startswith('http'):
107 continue
108 formats.append({
109 'url': video_url,
110 'ext': 'mp4',
111 'tbr': bitrate,
112 'format_note': label,
113 'protocol': 'm3u8_native',
114 })
115 else:
116 formats.append({
117 'url': video_url,
118 'tbr': bitrate,
119 'format_note': label,
120 })
121 self._sort_formats(formats)
122
123 metadata = self._extract_metadata(
124 'https://www.smashcast.tv/api/media/video', video_id)
125 metadata['formats'] = formats
126
127 return metadata
128
129
130 class HitboxLiveIE(HitboxIE):
131 IE_NAME = 'hitbox:live'
132 _VALID_URL = r'https?://(?:www\.)?(?:hitbox|smashcast)\.tv/(?P<id>[^/?#&]+)'
133 _TESTS = [{
134 'url': 'http://www.hitbox.tv/dimak',
135 'info_dict': {
136 'id': 'dimak',
137 'ext': 'mp4',
138 'description': 'md5:c9f80fa4410bc588d7faa40003fc7d0e',
139 'timestamp': int,
140 'upload_date': compat_str,
141 'title': compat_str,
142 'uploader': 'Dimak',
143 },
144 'params': {
145 # live
146 'skip_download': True,
147 },
148 }, {
149 'url': 'https://www.smashcast.tv/dimak',
150 'only_matching': True,
151 }]
152
153 @classmethod
154 def suitable(cls, url):
155 return False if HitboxIE.suitable(url) else super(HitboxLiveIE, cls).suitable(url)
156
157 def _real_extract(self, url):
158 video_id = self._match_id(url)
159
160 player_config = self._download_json(
161 'https://www.smashcast.tv/api/player/config/live/%s' % video_id,
162 video_id)
163
164 formats = []
165 cdns = player_config.get('cdns')
166 servers = []
167 for cdn in cdns:
168 # Subscribe URLs are not playable
169 if cdn.get('rtmpSubscribe') is True:
170 continue
171 base_url = cdn.get('netConnectionUrl')
172 host = re.search(r'.+\.([^\.]+\.[^\./]+)/.+', base_url).group(1)
173 if base_url not in servers:
174 servers.append(base_url)
175 for stream in cdn.get('bitrates'):
176 label = stream.get('label')
177 if label == 'Auto':
178 continue
179 stream_url = stream.get('url')
180 if not stream_url:
181 continue
182 bitrate = int_or_none(stream.get('bitrate'))
183 if stream.get('provider') == 'hls' or determine_ext(stream_url) == 'm3u8':
184 if not stream_url.startswith('http'):
185 continue
186 formats.append({
187 'url': stream_url,
188 'ext': 'mp4',
189 'tbr': bitrate,
190 'format_note': label,
191 'rtmp_live': True,
192 })
193 else:
194 formats.append({
195 'url': '%s/%s' % (base_url, stream_url),
196 'ext': 'mp4',
197 'tbr': bitrate,
198 'rtmp_live': True,
199 'format_note': host,
200 'page_url': url,
201 'player_url': 'http://www.hitbox.tv/static/player/flowplayer/flowplayer.commercial-3.2.16.swf',
202 })
203 self._sort_formats(formats)
204
205 metadata = self._extract_metadata(
206 'https://www.smashcast.tv/api/media/live', video_id)
207 metadata['formats'] = formats
208 metadata['is_live'] = True
209 metadata['title'] = metadata.get('title')
210
211 return metadata