]> jfr.im git - yt-dlp.git/blame - youtube_dl/extractor/hitbox.py
Fix "invalid escape sequences" error on Python 3.6
[yt-dlp.git] / youtube_dl / extractor / hitbox.py
CommitLineData
da3f7fb7 1# coding: utf-8
2from __future__ import unicode_literals
0c0a70f4 3
e3947e2b 4import re
da3f7fb7 5
6from .common import InfoExtractor
7from ..utils import (
0c0a70f4
S
8 clean_html,
9 parse_iso8601,
10 float_or_none,
11 int_or_none,
12 compat_str,
65939eff 13 determine_ext,
da3f7fb7 14)
15
16
17class HitboxIE(InfoExtractor):
0c0a70f4 18 IE_NAME = 'hitbox'
da3f7fb7 19 _VALID_URL = r'https?://(?:www\.)?hitbox\.tv/video/(?P<id>[0-9]+)'
e3947e2b 20 _TEST = {
da3f7fb7 21 'url': 'http://www.hitbox.tv/video/203213',
22 'info_dict': {
23 'id': '203213',
24 'title': 'hitbox @ gamescom, Sub Button Hype extended, Giveaway - hitbox News Update with Oxy',
25 'alt_title': 'hitboxlive - Aug 9th #6',
0c0a70f4 26 'description': '',
da3f7fb7 27 'ext': 'mp4',
ec85ded8 28 'thumbnail': r're:^https?://.*\.jpg$',
0c0a70f4 29 'duration': 215.1666,
da3f7fb7 30 'resolution': 'HD 720p',
0c0a70f4 31 'uploader': 'hitboxlive',
da3f7fb7 32 'view_count': int,
0c0a70f4 33 'timestamp': 1407576133,
da3f7fb7 34 'upload_date': '20140809',
35 'categories': ['Live Show'],
36 },
37 'params': {
38 # m3u8 download
39 'skip_download': True,
40 },
e3947e2b 41 }
da3f7fb7 42
e3947e2b 43 def _extract_metadata(self, url, video_id):
da3f7fb7 44 thumb_base = 'https://edge.sf.hitbox.tv'
45 metadata = self._download_json(
33b066bd
S
46 '%s/%s' % (url, video_id), video_id,
47 'Downloading metadata JSON')
da3f7fb7 48
e3947e2b 49 date = 'media_live_since'
50 media_type = 'livestream'
51 if metadata.get('media_type') == 'video':
52 media_type = 'video'
53 date = 'media_date_added'
54
55 video_meta = metadata.get(media_type, [])[0]
da3f7fb7 56 title = video_meta.get('media_status')
57 alt_title = video_meta.get('media_title')
0c0a70f4
S
58 description = clean_html(
59 video_meta.get('media_description') or
60 video_meta.get('media_description_md'))
61 duration = float_or_none(video_meta.get('media_duration'))
da3f7fb7 62 uploader = video_meta.get('media_user_name')
0c0a70f4
S
63 views = int_or_none(video_meta.get('media_views'))
64 timestamp = parse_iso8601(video_meta.get(date), ' ')
da3f7fb7 65 categories = [video_meta.get('category_name')]
66 thumbs = [
67 {'url': thumb_base + video_meta.get('media_thumbnail'),
68 'width': 320,
69 'height': 180},
70 {'url': thumb_base + video_meta.get('media_thumbnail_large'),
71 'width': 768,
72 'height': 432},
73 ]
74
da3f7fb7 75 return {
76 'id': video_id,
77 'title': title,
78 'alt_title': alt_title,
79 'description': description,
da3f7fb7 80 'ext': 'mp4',
81 'thumbnails': thumbs,
82 'duration': duration,
0c0a70f4 83 'uploader': uploader,
da3f7fb7 84 'view_count': views,
0c0a70f4 85 'timestamp': timestamp,
da3f7fb7 86 'categories': categories,
da3f7fb7 87 }
e3947e2b 88
89 def _real_extract(self, url):
90 video_id = self._match_id(url)
91
e3947e2b 92 player_config = self._download_json(
0c0a70f4 93 'https://www.hitbox.tv/api/player/config/video/%s' % video_id,
33b066bd 94 video_id, 'Downloading video JSON')
e3947e2b 95
bc94bd51
S
96 formats = []
97 for video in player_config['clip']['bitrates']:
98 label = video.get('label')
99 if label == 'Auto':
100 continue
101 video_url = video.get('url')
102 if not video_url:
103 continue
104 bitrate = int_or_none(video.get('bitrate'))
105 if determine_ext(video_url) == 'm3u8':
106 if not video_url.startswith('http'):
107 continue
108 formats.append({
109 'url': video_url,
110 'ext': 'mp4',
111 'tbr': bitrate,
112 'format_note': label,
113 'protocol': 'm3u8_native',
114 })
115 else:
116 formats.append({
117 'url': video_url,
118 'tbr': bitrate,
119 'format_note': label,
120 })
29492f33 121 self._sort_formats(formats)
bc94bd51 122
008bee0f
S
123 metadata = self._extract_metadata(
124 'https://www.hitbox.tv/api/media/video',
125 video_id)
bc94bd51 126 metadata['formats'] = formats
e3947e2b 127
128 return metadata
129
130
131class HitboxLiveIE(HitboxIE):
0c0a70f4 132 IE_NAME = 'hitbox:live'
e3947e2b 133 _VALID_URL = r'https?://(?:www\.)?hitbox\.tv/(?!video)(?P<id>.+)'
134 _TEST = {
135 'url': 'http://www.hitbox.tv/dimak',
136 'info_dict': {
137 'id': 'dimak',
138 'ext': 'mp4',
0c0a70f4
S
139 'description': 'md5:c9f80fa4410bc588d7faa40003fc7d0e',
140 'timestamp': int,
141 'upload_date': compat_str,
142 'title': compat_str,
143 'uploader': 'Dimak',
e3947e2b 144 },
145 'params': {
146 # live
147 'skip_download': True,
148 },
149 }
150
151 def _real_extract(self, url):
152 video_id = self._match_id(url)
153
e3947e2b 154 player_config = self._download_json(
0c0a70f4
S
155 'https://www.hitbox.tv/api/player/config/live/%s' % video_id,
156 video_id)
e3947e2b 157
158 formats = []
159 cdns = player_config.get('cdns')
160 servers = []
161 for cdn in cdns:
1e10d02f
S
162 # Subscribe URLs are not playable
163 if cdn.get('rtmpSubscribe') is True:
164 continue
e3947e2b 165 base_url = cdn.get('netConnectionUrl')
ec85ded8 166 host = re.search(r'.+\.([^\.]+\.[^\./]+)/.+', base_url).group(1)
e3947e2b 167 if base_url not in servers:
168 servers.append(base_url)
169 for stream in cdn.get('bitrates'):
170 label = stream.get('label')
65939eff
S
171 if label == 'Auto':
172 continue
173 stream_url = stream.get('url')
174 if not stream_url:
175 continue
176 bitrate = int_or_none(stream.get('bitrate'))
177 if stream.get('provider') == 'hls' or determine_ext(stream_url) == 'm3u8':
178 if not stream_url.startswith('http'):
179 continue
e3947e2b 180 formats.append({
65939eff 181 'url': stream_url,
e3947e2b 182 'ext': 'mp4',
65939eff
S
183 'tbr': bitrate,
184 'format_note': label,
185 'rtmp_live': True,
186 })
187 else:
188 formats.append({
189 'url': '%s/%s' % (base_url, stream_url),
190 'ext': 'mp4',
191 'tbr': bitrate,
e3947e2b 192 'rtmp_live': True,
193 'format_note': host,
194 'page_url': url,
195 'player_url': 'http://www.hitbox.tv/static/player/flowplayer/flowplayer.commercial-3.2.16.swf',
196 })
e3947e2b 197 self._sort_formats(formats)
14f41bc2
S
198
199 metadata = self._extract_metadata(
200 'https://www.hitbox.tv/api/media/live',
201 video_id)
e3947e2b 202 metadata['formats'] = formats
203 metadata['is_live'] = True
204 metadata['title'] = self._live_title(metadata.get('title'))
14f41bc2 205
e3947e2b 206 return metadata