]> jfr.im git - yt-dlp.git/blob - youtube_dl/extractor/hitbox.py
[hitbox:live] Extract formats before metadata
[yt-dlp.git] / youtube_dl / extractor / hitbox.py
1 # coding: utf-8
2 from __future__ import unicode_literals
3
4 import re
5
6 from .common import InfoExtractor
7 from ..utils import (
8 clean_html,
9 parse_iso8601,
10 float_or_none,
11 int_or_none,
12 compat_str,
13 determine_ext,
14 )
15
16
17 class HitboxIE(InfoExtractor):
18 IE_NAME = 'hitbox'
19 _VALID_URL = r'https?://(?:www\.)?hitbox\.tv/video/(?P<id>[0-9]+)'
20 _TEST = {
21 'url': 'http://www.hitbox.tv/video/203213',
22 'info_dict': {
23 'id': '203213',
24 'title': 'hitbox @ gamescom, Sub Button Hype extended, Giveaway - hitbox News Update with Oxy',
25 'alt_title': 'hitboxlive - Aug 9th #6',
26 'description': '',
27 'ext': 'mp4',
28 'thumbnail': 're:^https?://.*\.jpg$',
29 'duration': 215.1666,
30 'resolution': 'HD 720p',
31 'uploader': 'hitboxlive',
32 'view_count': int,
33 'timestamp': 1407576133,
34 'upload_date': '20140809',
35 'categories': ['Live Show'],
36 },
37 'params': {
38 # m3u8 download
39 'skip_download': True,
40 },
41 }
42
43 def _extract_metadata(self, url, video_id):
44 thumb_base = 'https://edge.sf.hitbox.tv'
45 metadata = self._download_json(
46 '%s/%s' % (url, video_id), video_id)
47
48 date = 'media_live_since'
49 media_type = 'livestream'
50 if metadata.get('media_type') == 'video':
51 media_type = 'video'
52 date = 'media_date_added'
53
54 video_meta = metadata.get(media_type, [])[0]
55 title = video_meta.get('media_status')
56 alt_title = video_meta.get('media_title')
57 description = clean_html(
58 video_meta.get('media_description') or
59 video_meta.get('media_description_md'))
60 duration = float_or_none(video_meta.get('media_duration'))
61 uploader = video_meta.get('media_user_name')
62 views = int_or_none(video_meta.get('media_views'))
63 timestamp = parse_iso8601(video_meta.get(date), ' ')
64 categories = [video_meta.get('category_name')]
65 thumbs = [
66 {'url': thumb_base + video_meta.get('media_thumbnail'),
67 'width': 320,
68 'height': 180},
69 {'url': thumb_base + video_meta.get('media_thumbnail_large'),
70 'width': 768,
71 'height': 432},
72 ]
73
74 return {
75 'id': video_id,
76 'title': title,
77 'alt_title': alt_title,
78 'description': description,
79 'ext': 'mp4',
80 'thumbnails': thumbs,
81 'duration': duration,
82 'uploader': uploader,
83 'view_count': views,
84 'timestamp': timestamp,
85 'categories': categories,
86 }
87
88 def _real_extract(self, url):
89 video_id = self._match_id(url)
90
91 player_config = self._download_json(
92 'https://www.hitbox.tv/api/player/config/video/%s' % video_id,
93 video_id)
94
95 formats = []
96 for video in player_config['clip']['bitrates']:
97 label = video.get('label')
98 if label == 'Auto':
99 continue
100 video_url = video.get('url')
101 if not video_url:
102 continue
103 bitrate = int_or_none(video.get('bitrate'))
104 if determine_ext(video_url) == 'm3u8':
105 if not video_url.startswith('http'):
106 continue
107 formats.append({
108 'url': video_url,
109 'ext': 'mp4',
110 'tbr': bitrate,
111 'format_note': label,
112 'protocol': 'm3u8_native',
113 })
114 else:
115 formats.append({
116 'url': video_url,
117 'tbr': bitrate,
118 'format_note': label,
119 })
120 self._sort_formats(formats)
121
122 metadata = self._extract_metadata(
123 'https://www.hitbox.tv/api/media/video',
124 video_id)
125 metadata['formats'] = formats
126
127 return metadata
128
129
130 class HitboxLiveIE(HitboxIE):
131 IE_NAME = 'hitbox:live'
132 _VALID_URL = r'https?://(?:www\.)?hitbox\.tv/(?!video)(?P<id>.+)'
133 _TEST = {
134 'url': 'http://www.hitbox.tv/dimak',
135 'info_dict': {
136 'id': 'dimak',
137 'ext': 'mp4',
138 'description': 'md5:c9f80fa4410bc588d7faa40003fc7d0e',
139 'timestamp': int,
140 'upload_date': compat_str,
141 'title': compat_str,
142 'uploader': 'Dimak',
143 },
144 'params': {
145 # live
146 'skip_download': True,
147 },
148 }
149
150 def _real_extract(self, url):
151 video_id = self._match_id(url)
152
153 player_config = self._download_json(
154 'https://www.hitbox.tv/api/player/config/live/%s' % video_id,
155 video_id)
156
157 formats = []
158 cdns = player_config.get('cdns')
159 servers = []
160 for cdn in cdns:
161 base_url = cdn.get('netConnectionUrl')
162 host = re.search('.+\.([^\.]+\.[^\./]+)/.+', base_url).group(1)
163 if base_url not in servers:
164 servers.append(base_url)
165 for stream in cdn.get('bitrates'):
166 label = stream.get('label')
167 if label == 'Auto':
168 continue
169 stream_url = stream.get('url')
170 if not stream_url:
171 continue
172 bitrate = int_or_none(stream.get('bitrate'))
173 if stream.get('provider') == 'hls' or determine_ext(stream_url) == 'm3u8':
174 if not stream_url.startswith('http'):
175 continue
176 formats.append({
177 'url': stream_url,
178 'ext': 'mp4',
179 'tbr': bitrate,
180 'format_note': label,
181 'rtmp_live': True,
182 })
183 else:
184 formats.append({
185 'url': '%s/%s' % (base_url, stream_url),
186 'ext': 'mp4',
187 'tbr': bitrate,
188 'rtmp_live': True,
189 'format_note': host,
190 'page_url': url,
191 'player_url': 'http://www.hitbox.tv/static/player/flowplayer/flowplayer.commercial-3.2.16.swf',
192 })
193 self._sort_formats(formats)
194
195 metadata = self._extract_metadata(
196 'https://www.hitbox.tv/api/media/live',
197 video_id)
198 metadata['formats'] = formats
199 metadata['is_live'] = True
200 metadata['title'] = self._live_title(metadata.get('title'))
201
202 return metadata