]> jfr.im git - yt-dlp.git/blob - yt_dlp/extractor/wimtv.py
263844d72b743de5744cde44e72a690da504f00a
[yt-dlp.git] / yt_dlp / extractor / wimtv.py
1 import re
2
3 from .common import InfoExtractor
4 from ..utils import (
5 determine_ext,
6 parse_duration,
7 urlencode_postdata,
8 ExtractorError,
9 )
10
11
12 class WimTVIE(InfoExtractor):
13 _player = None
14 _UUID_RE = r'[\da-f]{8}-[\da-f]{4}-[\da-f]{4}-[\da-f]{4}-[\da-f]{12}'
15 _VALID_URL = r'''(?x:
16 https?://platform.wim.tv/
17 (?:
18 (?:embed/)?\?
19 |\#/webtv/.+?/
20 )
21 (?P<type>vod|live|cast)[=/]
22 (?P<id>%s).*?)''' % _UUID_RE
23 _TESTS = [{
24 # vod stream
25 'url': 'https://platform.wim.tv/embed/?vod=db29fb32-bade-47b6-a3a6-cb69fe80267a',
26 'md5': 'db29fb32-bade-47b6-a3a6-cb69fe80267a',
27 'info_dict': {
28 'id': 'db29fb32-bade-47b6-a3a6-cb69fe80267a',
29 'ext': 'mp4',
30 'title': 'AMA SUPERCROSS 2020 - R2 ST. LOUIS',
31 'duration': 6481,
32 'thumbnail': r're:https?://.+?/thumbnail/.+?/720$'
33 },
34 'params': {
35 'skip_download': True,
36 },
37 }, {
38 # live stream
39 'url': 'https://platform.wim.tv/embed/?live=28e22c22-49db-40f3-8c37-8cbb0ff44556&autostart=true',
40 'info_dict': {
41 'id': '28e22c22-49db-40f3-8c37-8cbb0ff44556',
42 'ext': 'mp4',
43 'title': 'Streaming MSmotorTV',
44 'is_live': True,
45 },
46 'params': {
47 'skip_download': True,
48 },
49 }, {
50 'url': 'https://platform.wim.tv/#/webtv/automotornews/vod/422492b6-539e-474d-9c6b-68c9d5893365',
51 'only_matching': True,
52 }, {
53 'url': 'https://platform.wim.tv/#/webtv/renzoarborechannel/cast/f47e0d15-5b45-455e-bf0d-dba8ffa96365',
54 'only_matching': True,
55 }]
56
57 @staticmethod
58 def _extract_urls(webpage):
59 return [
60 mobj.group('url')
61 for mobj in re.finditer(
62 r'<iframe[^>]+src=["\'](?P<url>%s)' % WimTVIE._VALID_URL,
63 webpage)]
64
65 def _real_initialize(self):
66 if not self._player:
67 self._get_player_data()
68
69 def _get_player_data(self):
70 msg_id = 'Player data'
71 self._player = {}
72
73 datas = [{
74 'url': 'https://platform.wim.tv/common/libs/player/wimtv/wim-rest.js',
75 'vars': [{
76 'regex': r'appAuth = "(.+?)"',
77 'variable': 'app_auth',
78 }]
79 }, {
80 'url': 'https://platform.wim.tv/common/config/endpointconfig.js',
81 'vars': [{
82 'regex': r'PRODUCTION_HOSTNAME_THUMB = "(.+?)"',
83 'variable': 'thumb_server',
84 }, {
85 'regex': r'PRODUCTION_HOSTNAME_THUMB\s*\+\s*"(.+?)"',
86 'variable': 'thumb_server_path',
87 }]
88 }]
89
90 for data in datas:
91 temp = self._download_webpage(data['url'], msg_id)
92 for var in data['vars']:
93 val = self._search_regex(var['regex'], temp, msg_id)
94 if not val:
95 raise ExtractorError('%s not found' % var['variable'])
96 self._player[var['variable']] = val
97
98 def _generate_token(self):
99 json = self._download_json(
100 'https://platform.wim.tv/wimtv-server/oauth/token', 'Token generation',
101 headers={'Authorization': 'Basic %s' % self._player['app_auth']},
102 data=urlencode_postdata({'grant_type': 'client_credentials'}))
103 token = json.get('access_token')
104 if not token:
105 raise ExtractorError('access token not generated')
106 return token
107
108 def _generate_thumbnail(self, thumb_id, width='720'):
109 if not thumb_id or not self._player.get('thumb_server'):
110 return None
111 if not self._player.get('thumb_server_path'):
112 self._player['thumb_server_path'] = ''
113 return '%s%s/asset/thumbnail/%s/%s' % (
114 self._player['thumb_server'],
115 self._player['thumb_server_path'],
116 thumb_id, width)
117
118 def _real_extract(self, url):
119 urlc = self._match_valid_url(url).groupdict()
120 video_id = urlc['id']
121 stream_type = is_live = None
122 if urlc['type'] in {'live', 'cast'}:
123 stream_type = urlc['type'] + '/channel'
124 is_live = True
125 else:
126 stream_type = 'vod'
127 is_live = False
128 token = self._generate_token()
129 json = self._download_json(
130 'https://platform.wim.tv/wimtv-server/api/public/%s/%s/play' % (
131 stream_type, video_id), video_id,
132 headers={'Authorization': 'Bearer %s' % token,
133 'Content-Type': 'application/json'},
134 data=bytes('{}', 'utf-8'))
135
136 formats = []
137 for src in json.get('srcs') or []:
138 if src.get('mimeType') == 'application/x-mpegurl':
139 formats.extend(
140 self._extract_m3u8_formats(
141 src.get('uniqueStreamer'), video_id, 'mp4'))
142 if src.get('mimeType') == 'video/flash':
143 formats.append({
144 'format_id': 'rtmp',
145 'url': src.get('uniqueStreamer'),
146 'ext': determine_ext(src.get('uniqueStreamer'), 'flv'),
147 'rtmp_live': is_live,
148 })
149 json = json.get('resource')
150 thumb = self._generate_thumbnail(json.get('thumbnailId'))
151 self._sort_formats(formats)
152
153 return {
154 'id': video_id,
155 'title': json.get('title') or json.get('name'),
156 'duration': parse_duration(json.get('duration')),
157 'formats': formats,
158 'thumbnail': thumb,
159 'is_live': is_live,
160 }