]> jfr.im git - yt-dlp.git/blob - yt_dlp/extractor/laola1tv.py
[ie/box] Fix formats extraction (#8649)
[yt-dlp.git] / yt_dlp / extractor / laola1tv.py
1 import json
2 import re
3
4 from .common import InfoExtractor
5 from ..utils import (
6 ExtractorError,
7 unified_strdate,
8 urlencode_postdata,
9 xpath_element,
10 xpath_text,
11 update_url_query,
12 js_to_json,
13 )
14
15
16 class Laola1TvEmbedIE(InfoExtractor):
17 IE_NAME = 'laola1tv:embed'
18 _VALID_URL = r'https?://(?:www\.)?laola1\.tv/titanplayer\.php\?.*?\bvideoid=(?P<id>\d+)'
19 _TESTS = [{
20 # flashvars.premium = "false";
21 'url': 'https://www.laola1.tv/titanplayer.php?videoid=708065&type=V&lang=en&portal=int&customer=1024',
22 'info_dict': {
23 'id': '708065',
24 'ext': 'mp4',
25 'title': 'MA Long CHN - FAN Zhendong CHN',
26 'uploader': 'ITTF - International Table Tennis Federation',
27 'upload_date': '20161211',
28 },
29 }]
30
31 def _extract_token_url(self, stream_access_url, video_id, data):
32 return self._download_json(
33 self._proto_relative_url(stream_access_url, 'https:'), video_id,
34 headers={
35 'Content-Type': 'application/json',
36 }, data=json.dumps(data).encode())['data']['stream-access'][0]
37
38 def _extract_formats(self, token_url, video_id):
39 token_doc = self._download_xml(
40 token_url, video_id, 'Downloading token',
41 headers=self.geo_verification_headers())
42
43 token_attrib = xpath_element(token_doc, './/token').attrib
44
45 if token_attrib['status'] != '0':
46 raise ExtractorError(
47 'Token error: %s' % token_attrib['comment'], expected=True)
48
49 formats = self._extract_akamai_formats(
50 '%s?hdnea=%s' % (token_attrib['url'], token_attrib['auth']),
51 video_id)
52 return formats
53
54 def _real_extract(self, url):
55 video_id = self._match_id(url)
56 webpage = self._download_webpage(url, video_id)
57 flash_vars = self._search_regex(
58 r'(?s)flashvars\s*=\s*({.+?});', webpage, 'flash vars')
59
60 def get_flashvar(x, *args, **kwargs):
61 flash_var = self._search_regex(
62 r'%s\s*:\s*"([^"]+)"' % x,
63 flash_vars, x, default=None)
64 if not flash_var:
65 flash_var = self._search_regex([
66 r'flashvars\.%s\s*=\s*"([^"]+)"' % x,
67 r'%s\s*=\s*"([^"]+)"' % x],
68 webpage, x, *args, **kwargs)
69 return flash_var
70
71 hd_doc = self._download_xml(
72 'http://www.laola1.tv/server/hd_video.php', video_id, query={
73 'play': get_flashvar('streamid'),
74 'partner': get_flashvar('partnerid'),
75 'portal': get_flashvar('portalid'),
76 'lang': get_flashvar('sprache'),
77 'v5ident': '',
78 })
79
80 _v = lambda x, **k: xpath_text(hd_doc, './/video/' + x, **k)
81 title = _v('title', fatal=True)
82
83 token_url = None
84 premium = get_flashvar('premium', default=None)
85 if premium:
86 token_url = update_url_query(
87 _v('url', fatal=True), {
88 'timestamp': get_flashvar('timestamp'),
89 'auth': get_flashvar('auth'),
90 })
91 else:
92 data_abo = urlencode_postdata(
93 dict((i, v) for i, v in enumerate(_v('req_liga_abos').split(','))))
94 stream_access_url = update_url_query(
95 'https://club.laola1.tv/sp/laola1/api/v3/user/session/premium/player/stream-access', {
96 'videoId': _v('id'),
97 'target': self._search_regex(r'vs_target = (\d+);', webpage, 'vs target'),
98 'label': _v('label'),
99 'area': _v('area'),
100 })
101 token_url = self._extract_token_url(stream_access_url, video_id, data_abo)
102
103 formats = self._extract_formats(token_url, video_id)
104
105 categories_str = _v('meta_sports')
106 categories = categories_str.split(',') if categories_str else []
107 is_live = _v('islive') == 'true'
108
109 return {
110 'id': video_id,
111 'title': title,
112 'upload_date': unified_strdate(_v('time_date')),
113 'uploader': _v('meta_organisation'),
114 'categories': categories,
115 'is_live': is_live,
116 'formats': formats,
117 }
118
119
120 class Laola1TvBaseIE(Laola1TvEmbedIE): # XXX: Do not subclass from concrete IE
121 def _extract_video(self, url):
122 display_id = self._match_id(url)
123 webpage = self._download_webpage(url, display_id)
124
125 if 'Dieser Livestream ist bereits beendet.' in webpage:
126 raise ExtractorError('This live stream has already finished.', expected=True)
127
128 conf = self._parse_json(self._search_regex(
129 r'(?s)conf\s*=\s*({.+?});', webpage, 'conf'),
130 display_id,
131 transform_source=lambda s: js_to_json(re.sub(r'shareurl:.+,', '', s)))
132 video_id = conf['videoid']
133
134 config = self._download_json(conf['configUrl'], video_id, query={
135 'videoid': video_id,
136 'partnerid': conf['partnerid'],
137 'language': conf.get('language', ''),
138 'portal': conf.get('portalid', ''),
139 })
140 error = config.get('error')
141 if error:
142 raise ExtractorError('%s said: %s' % (self.IE_NAME, error), expected=True)
143
144 video_data = config['video']
145 title = video_data['title']
146 is_live = video_data.get('isLivestream') and video_data.get('isLive')
147 meta = video_data.get('metaInformation')
148 sports = meta.get('sports')
149 categories = sports.split(',') if sports else []
150
151 token_url = self._extract_token_url(
152 video_data['streamAccess'], video_id,
153 video_data['abo']['required'])
154
155 formats = self._extract_formats(token_url, video_id)
156
157 return {
158 'id': video_id,
159 'display_id': display_id,
160 'title': title,
161 'description': video_data.get('description'),
162 'thumbnail': video_data.get('image'),
163 'categories': categories,
164 'formats': formats,
165 'is_live': is_live,
166 }
167
168
169 class Laola1TvIE(Laola1TvBaseIE):
170 IE_NAME = 'laola1tv'
171 _VALID_URL = r'https?://(?:www\.)?laola1\.tv/[a-z]+-[a-z]+/[^/]+/(?P<id>[^/?#&]+)'
172
173 _TESTS = [{
174 'url': 'http://www.laola1.tv/de-de/video/straubing-tigers-koelner-haie/227883.html',
175 'info_dict': {
176 'id': '227883',
177 'display_id': 'straubing-tigers-koelner-haie',
178 'ext': 'flv',
179 'title': 'Straubing Tigers - Kölner Haie',
180 'upload_date': '20140912',
181 'is_live': False,
182 'categories': ['Eishockey'],
183 },
184 'params': {
185 'skip_download': True,
186 },
187 }, {
188 'url': 'http://www.laola1.tv/de-de/video/straubing-tigers-koelner-haie',
189 'info_dict': {
190 'id': '464602',
191 'display_id': 'straubing-tigers-koelner-haie',
192 'ext': 'flv',
193 'title': 'Straubing Tigers - Kölner Haie',
194 'upload_date': '20160129',
195 'is_live': False,
196 'categories': ['Eishockey'],
197 },
198 'params': {
199 'skip_download': True,
200 },
201 }, {
202 'url': 'http://www.laola1.tv/de-de/livestream/2016-03-22-belogorie-belgorod-trentino-diatec-lde',
203 'info_dict': {
204 'id': '487850',
205 'display_id': '2016-03-22-belogorie-belgorod-trentino-diatec-lde',
206 'ext': 'flv',
207 'title': 'Belogorie BELGOROD - TRENTINO Diatec',
208 'upload_date': '20160322',
209 'uploader': 'CEV - Europäischer Volleyball Verband',
210 'is_live': True,
211 'categories': ['Volleyball'],
212 },
213 'params': {
214 'skip_download': True,
215 },
216 'skip': 'This live stream has already finished.',
217 }]
218
219 def _real_extract(self, url):
220 return self._extract_video(url)
221
222
223 class EHFTVIE(Laola1TvBaseIE):
224 IE_NAME = 'ehftv'
225 _VALID_URL = r'https?://(?:www\.)?ehftv\.com/[a-z]+(?:-[a-z]+)?/[^/]+/(?P<id>[^/?#&]+)'
226
227 _TESTS = [{
228 'url': 'https://www.ehftv.com/int/video/paris-saint-germain-handball-pge-vive-kielce/1166761',
229 'info_dict': {
230 'id': '1166761',
231 'display_id': 'paris-saint-germain-handball-pge-vive-kielce',
232 'ext': 'mp4',
233 'title': 'Paris Saint-Germain Handball - PGE Vive Kielce',
234 'is_live': False,
235 'categories': ['Handball'],
236 },
237 'params': {
238 'skip_download': True,
239 },
240 }]
241
242 def _real_extract(self, url):
243 return self._extract_video(url)
244
245
246 class ITTFIE(InfoExtractor):
247 _VALID_URL = r'https?://tv\.ittf\.com/video/[^/]+/(?P<id>\d+)'
248 _TEST = {
249 'url': 'https://tv.ittf.com/video/peng-wang-wei-matsudaira-kenta/951802',
250 'only_matching': True,
251 }
252
253 def _real_extract(self, url):
254 return self.url_result(
255 update_url_query('https://www.laola1.tv/titanplayer.php', {
256 'videoid': self._match_id(url),
257 'type': 'V',
258 'lang': 'en',
259 'portal': 'int',
260 'customer': 1024,
261 }), Laola1TvEmbedIE.ie_key())