]> jfr.im git - yt-dlp.git/blame - youtube_dl/extractor/beampro.py
[udemy] Extract asset captions
[yt-dlp.git] / youtube_dl / extractor / beampro.py
CommitLineData
cd55c6cc 1# coding: utf-8
2from __future__ import unicode_literals
3
4from .common import InfoExtractor
5from ..utils import (
6 ExtractorError,
7 clean_html,
8 compat_str,
1e0d65f0 9 float_or_none,
cd55c6cc 10 int_or_none,
11 parse_iso8601,
12 try_get,
1e0d65f0 13 urljoin,
cd55c6cc 14)
15
16
1e0d65f0 17class BeamProBaseIE(InfoExtractor):
6bceb36b 18 _API_BASE = 'https://mixer.com/api/v1'
1e0d65f0
MF
19 _RATINGS = {'family': 0, 'teen': 13, '18+': 18}
20
21 def _extract_channel_info(self, chan):
22 user_id = chan.get('userId') or try_get(chan, lambda x: x['user']['id'])
23 return {
24 'uploader': chan.get('token') or try_get(
25 chan, lambda x: x['user']['username'], compat_str),
26 'uploader_id': compat_str(user_id) if user_id else None,
27 'age_limit': self._RATINGS.get(chan.get('audience')),
28 }
29
30
31class BeamProLiveIE(BeamProBaseIE):
6bceb36b
S
32 IE_NAME = 'Mixer:live'
33 _VALID_URL = r'https?://(?:\w+\.)?(?:beam\.pro|mixer\.com)/(?P<id>[^/?#&]+)'
cd55c6cc 34 _TEST = {
6bceb36b 35 'url': 'http://mixer.com/niterhayven',
cd55c6cc 36 'info_dict': {
37 'id': '261562',
38 'ext': 'mp4',
cd55c6cc 39 'title': 'Introducing The Witcher 3 // The Grind Starts Now!',
af62de10 40 'description': 'md5:0b161ac080f15fe05d18a07adb44a74d',
cd55c6cc 41 'thumbnail': r're:https://.*\.jpg$',
af62de10 42 'timestamp': 1483477281,
cd55c6cc 43 'upload_date': '20170103',
af62de10
S
44 'uploader': 'niterhayven',
45 'uploader_id': '373396',
46 'age_limit': 18,
cd55c6cc 47 'is_live': True,
af62de10 48 'view_count': int,
cd55c6cc 49 },
50 'skip': 'niterhayven is offline',
51 'params': {
52 'skip_download': True,
53 },
54 }
55
6bceb36b
S
56 _MANIFEST_URL_TEMPLATE = '%s/channels/%%s/manifest.%%s' % BeamProBaseIE._API_BASE
57
1e0d65f0
MF
58 @classmethod
59 def suitable(cls, url):
60 return False if BeamProVodIE.suitable(url) else super(BeamProLiveIE, cls).suitable(url)
61
cd55c6cc 62 def _real_extract(self, url):
af62de10 63 channel_name = self._match_id(url)
cd55c6cc 64
af62de10 65 chan = self._download_json(
6bceb36b 66 '%s/channels/%s' % (self._API_BASE, channel_name), channel_name)
cd55c6cc 67
af62de10
S
68 if chan.get('online') is False:
69 raise ExtractorError(
70 '{0} is offline'.format(channel_name), expected=True)
cd55c6cc 71
af62de10 72 channel_id = chan['id']
cd55c6cc 73
6bceb36b
S
74 def manifest_url(kind):
75 return self._MANIFEST_URL_TEMPLATE % (channel_id, kind)
76
af62de10 77 formats = self._extract_m3u8_formats(
6bceb36b
S
78 manifest_url('m3u8'), channel_name, ext='mp4', m3u8_id='hls',
79 fatal=False)
80 formats.extend(self._extract_smil_formats(
81 manifest_url('smil'), channel_name, fatal=False))
af62de10 82 self._sort_formats(formats)
cd55c6cc 83
1e0d65f0 84 info = {
af62de10
S
85 'id': compat_str(chan.get('id') or channel_name),
86 'title': self._live_title(chan.get('name') or channel_name),
87 'description': clean_html(chan.get('description')),
6bceb36b
S
88 'thumbnail': try_get(
89 chan, lambda x: x['thumbnail']['url'], compat_str),
af62de10 90 'timestamp': parse_iso8601(chan.get('updatedAt')),
af62de10
S
91 'is_live': True,
92 'view_count': int_or_none(chan.get('viewersTotal')),
93 'formats': formats,
cd55c6cc 94 }
1e0d65f0
MF
95 info.update(self._extract_channel_info(chan))
96
97 return info
98
99
100class BeamProVodIE(BeamProBaseIE):
6bceb36b
S
101 IE_NAME = 'Mixer:vod'
102 _VALID_URL = r'https?://(?:\w+\.)?(?:beam\.pro|mixer\.com)/[^/?#&]+\?.*?\bvod=(?P<id>\d+)'
1e0d65f0 103 _TEST = {
6bceb36b 104 'url': 'https://mixer.com/willow8714?vod=2259830',
1e0d65f0
MF
105 'md5': 'b2431e6e8347dc92ebafb565d368b76b',
106 'info_dict': {
107 'id': '2259830',
108 'ext': 'mp4',
109 'title': 'willow8714\'s Channel',
110 'duration': 6828.15,
111 'thumbnail': r're:https://.*source\.png$',
112 'timestamp': 1494046474,
113 'upload_date': '20170506',
114 'uploader': 'willow8714',
115 'uploader_id': '6085379',
116 'age_limit': 13,
117 'view_count': int,
118 },
6bceb36b
S
119 'params': {
120 'skip_download': True,
121 },
1e0d65f0
MF
122 }
123
6bceb36b
S
124 @staticmethod
125 def _extract_format(vod, vod_type):
1e0d65f0
MF
126 if not vod.get('baseUrl'):
127 return []
128
129 if vod_type == 'hls':
6bceb36b 130 filename, protocol = 'manifest.m3u8', 'm3u8_native'
1e0d65f0
MF
131 elif vod_type == 'raw':
132 filename, protocol = 'source.mp4', 'https'
133 else:
6bceb36b 134 assert False
1e0d65f0 135
6bceb36b 136 data = vod.get('data') if isinstance(vod.get('data'), dict) else {}
1e0d65f0
MF
137
138 format_id = [vod_type]
6bceb36b 139 if isinstance(data.get('Height'), compat_str):
1e0d65f0
MF
140 format_id.append('%sp' % data['Height'])
141
142 return [{
143 'url': urljoin(vod['baseUrl'], filename),
144 'format_id': '-'.join(format_id),
145 'ext': 'mp4',
146 'protocol': protocol,
147 'width': int_or_none(data.get('Width')),
148 'height': int_or_none(data.get('Height')),
149 'fps': int_or_none(data.get('Fps')),
150 'tbr': int_or_none(data.get('Bitrate'), 1000),
151 }]
152
153 def _real_extract(self, url):
154 vod_id = self._match_id(url)
155
156 vod_info = self._download_json(
6bceb36b 157 '%s/recordings/%s' % (self._API_BASE, vod_id), vod_id)
1e0d65f0
MF
158
159 state = vod_info.get('state')
160 if state != 'AVAILABLE':
161 raise ExtractorError(
6bceb36b
S
162 'VOD %s is not available (state: %s)' % (vod_id, state),
163 expected=True)
1e0d65f0
MF
164
165 formats = []
166 thumbnail_url = None
167
168 for vod in vod_info['vods']:
169 vod_type = vod.get('format')
170 if vod_type in ('hls', 'raw'):
171 formats.extend(self._extract_format(vod, vod_type))
172 elif vod_type == 'thumbnail':
173 thumbnail_url = urljoin(vod.get('baseUrl'), 'source.png')
174
175 self._sort_formats(formats)
176
177 info = {
178 'id': vod_id,
179 'title': vod_info.get('name') or vod_id,
180 'duration': float_or_none(vod_info.get('duration')),
181 'thumbnail': thumbnail_url,
182 'timestamp': parse_iso8601(vod_info.get('createdAt')),
183 'view_count': int_or_none(vod_info.get('viewsTotal')),
184 'formats': formats,
185 }
6bceb36b 186 info.update(self._extract_channel_info(vod_info.get('channel') or {}))
1e0d65f0
MF
187
188 return info