]> jfr.im git - yt-dlp.git/blob - yt_dlp/extractor/goplay.py
[ie/matchtv] Fix extractor (#10190)
[yt-dlp.git] / yt_dlp / extractor / goplay.py
1 import base64
2 import binascii
3 import datetime as dt
4 import hashlib
5 import hmac
6 import json
7 import os
8
9 from .common import InfoExtractor
10 from ..utils import (
11 ExtractorError,
12 traverse_obj,
13 unescapeHTML,
14 )
15
16
17 class GoPlayIE(InfoExtractor):
18 _VALID_URL = r'https?://(www\.)?goplay\.be/video/([^/]+/[^/]+/|)(?P<display_id>[^/#]+)'
19
20 _NETRC_MACHINE = 'goplay'
21
22 _TESTS = [{
23 'url': 'https://www.goplay.be/video/de-container-cup/de-container-cup-s3/de-container-cup-s3-aflevering-2#autoplay',
24 'info_dict': {
25 'id': '9c4214b8-e55d-4e4b-a446-f015f6c6f811',
26 'ext': 'mp4',
27 'title': 'S3 - Aflevering 2',
28 'series': 'De Container Cup',
29 'season': 'Season 3',
30 'season_number': 3,
31 'episode': 'Episode 2',
32 'episode_number': 2,
33 },
34 'skip': 'This video is only available for registered users',
35 }, {
36 'url': 'https://www.goplay.be/video/a-family-for-thr-holidays-s1-aflevering-1#autoplay',
37 'info_dict': {
38 'id': '74e3ed07-748c-49e4-85a0-393a93337dbf',
39 'ext': 'mp4',
40 'title': 'A Family for the Holidays',
41 },
42 'skip': 'This video is only available for registered users',
43 }, {
44 'url': 'https://www.goplay.be/video/de-mol/de-mol-s11/de-mol-s11-aflevering-1#autoplay',
45 'info_dict': {
46 'id': '03eb8f2f-153e-41cb-9805-0d3a29dab656',
47 'ext': 'mp4',
48 'title': 'S11 - Aflevering 1',
49 'episode': 'Episode 1',
50 'series': 'De Mol',
51 'season_number': 11,
52 'episode_number': 1,
53 'season': 'Season 11',
54 },
55 'params': {
56 'skip_download': True,
57 },
58 'skip': 'This video is only available for registered users',
59 }]
60
61 _id_token = None
62
63 def _perform_login(self, username, password):
64 self.report_login()
65 aws = AwsIdp(ie=self, pool_id='eu-west-1_dViSsKM5Y', client_id='6s1h851s8uplco5h6mqh1jac8m')
66 self._id_token, _ = aws.authenticate(username=username, password=password)
67
68 def _real_initialize(self):
69 if not self._id_token:
70 raise self.raise_login_required(method='password')
71
72 def _real_extract(self, url):
73 url, display_id = self._match_valid_url(url).group(0, 'display_id')
74 webpage = self._download_webpage(url, display_id)
75 video_data_json = self._html_search_regex(r'<div\s+data-hero="([^"]+)"', webpage, 'video_data')
76 video_data = self._parse_json(unescapeHTML(video_data_json), display_id).get('data')
77
78 movie = video_data.get('movie')
79 if movie:
80 video_id = movie['videoUuid']
81 info_dict = {
82 'title': movie.get('title'),
83 }
84 else:
85 episode = traverse_obj(video_data, ('playlists', ..., 'episodes', lambda _, v: v['pageInfo']['url'] == url), get_all=False)
86 video_id = episode['videoUuid']
87 info_dict = {
88 'title': episode.get('episodeTitle'),
89 'series': traverse_obj(episode, ('program', 'title')),
90 'season_number': episode.get('seasonNumber'),
91 'episode_number': episode.get('episodeNumber'),
92 }
93
94 api = self._download_json(
95 f'https://api.goplay.be/web/v1/videos/long-form/{video_id}',
96 video_id, headers={
97 'Authorization': f'Bearer {self._id_token}',
98 **self.geo_verification_headers(),
99 })
100
101 if 'manifestUrls' in api:
102 formats, subtitles = self._extract_m3u8_formats_and_subtitles(
103 api['manifestUrls']['hls'], video_id, ext='mp4', m3u8_id='HLS')
104
105 else:
106 if 'ssai' not in api:
107 raise ExtractorError('expecting Google SSAI stream')
108
109 ssai_content_source_id = api['ssai']['contentSourceID']
110 ssai_video_id = api['ssai']['videoID']
111
112 dai = self._download_json(
113 f'https://dai.google.com/ondemand/dash/content/{ssai_content_source_id}/vid/{ssai_video_id}/streams',
114 video_id, data=b'{"api-key":"null"}',
115 headers={'content-type': 'application/json'})
116
117 periods = self._extract_mpd_periods(dai['stream_manifest'], video_id)
118
119 # skip pre-roll and mid-roll ads
120 periods = [p for p in periods if '-ad-' not in p['id']]
121
122 formats, subtitles = self._merge_mpd_periods(periods)
123
124 info_dict.update({
125 'id': video_id,
126 'formats': formats,
127 'subtitles': subtitles,
128 })
129 return info_dict
130
131
132 # Taken from https://github.com/add-ons/plugin.video.viervijfzes/blob/master/resources/lib/viervijfzes/auth_awsidp.py
133 # Released into Public domain by https://github.com/michaelarnauts
134
135 class InvalidLoginException(ExtractorError):
136 """ The login credentials are invalid """
137
138
139 class AuthenticationException(ExtractorError):
140 """ Something went wrong while logging in """
141
142
143 class AwsIdp:
144 """ AWS Identity Provider """
145
146 def __init__(self, ie, pool_id, client_id):
147 """
148 :param InfoExtrator ie: The extractor that instantiated this class.
149 :param str pool_id: The AWS user pool to connect to (format: <region>_<poolid>).
150 E.g.: eu-west-1_aLkOfYN3T
151 :param str client_id: The client application ID (the ID of the application connecting)
152 """
153
154 self.ie = ie
155
156 self.pool_id = pool_id
157 if '_' not in self.pool_id:
158 raise ValueError('Invalid pool_id format. Should be <region>_<poolid>.')
159
160 self.client_id = client_id
161 self.region = self.pool_id.split('_')[0]
162 self.url = f'https://cognito-idp.{self.region}.amazonaws.com/'
163
164 # Initialize the values
165 # https://github.com/aws/amazon-cognito-identity-js/blob/master/src/AuthenticationHelper.js#L22
166 self.n_hex = (
167 'FFFFFFFFFFFFFFFFC90FDAA22168C234C4C6628B80DC1CD1'
168 '29024E088A67CC74020BBEA63B139B22514A08798E3404DD'
169 'EF9519B3CD3A431B302B0A6DF25F14374FE1356D6D51C245'
170 'E485B576625E7EC6F44C42E9A637ED6B0BFF5CB6F406B7ED'
171 'EE386BFB5A899FA5AE9F24117C4B1FE649286651ECE45B3D'
172 'C2007CB8A163BF0598DA48361C55D39A69163FA8FD24CF5F'
173 '83655D23DCA3AD961C62F356208552BB9ED529077096966D'
174 '670C354E4ABC9804F1746C08CA18217C32905E462E36CE3B'
175 'E39E772C180E86039B2783A2EC07A28FB5C55DF06F4C52C9'
176 'DE2BCBF6955817183995497CEA956AE515D2261898FA0510'
177 '15728E5A8AAAC42DAD33170D04507A33A85521ABDF1CBA64'
178 'ECFB850458DBEF0A8AEA71575D060C7DB3970F85A6E1E4C7'
179 'ABF5AE8CDB0933D71E8C94E04A25619DCEE3D2261AD2EE6B'
180 'F12FFA06D98A0864D87602733EC86A64521F2B18177B200C'
181 'BBE117577A615D6C770988C0BAD946E208E24FA074E5AB31'
182 '43DB5BFCE0FD108E4B82D120A93AD2CAFFFFFFFFFFFFFFFF')
183
184 # https://github.com/aws/amazon-cognito-identity-js/blob/master/src/AuthenticationHelper.js#L49
185 self.g_hex = '2'
186 self.info_bits = bytearray('Caldera Derived Key', 'utf-8')
187
188 self.big_n = self.__hex_to_long(self.n_hex)
189 self.g = self.__hex_to_long(self.g_hex)
190 self.k = self.__hex_to_long(self.__hex_hash('00' + self.n_hex + '0' + self.g_hex))
191 self.small_a_value = self.__generate_random_small_a()
192 self.large_a_value = self.__calculate_a()
193
194 def authenticate(self, username, password):
195 """ Authenticate with a username and password. """
196 # Step 1: First initiate an authentication request
197 auth_data_dict = self.__get_authentication_request(username)
198 auth_data = json.dumps(auth_data_dict).encode()
199 auth_headers = {
200 'X-Amz-Target': 'AWSCognitoIdentityProviderService.InitiateAuth',
201 'Accept-Encoding': 'identity',
202 'Content-Type': 'application/x-amz-json-1.1',
203 }
204 auth_response_json = self.ie._download_json(
205 self.url, None, data=auth_data, headers=auth_headers,
206 note='Authenticating username', errnote='Invalid username')
207 challenge_parameters = auth_response_json.get('ChallengeParameters')
208
209 if auth_response_json.get('ChallengeName') != 'PASSWORD_VERIFIER':
210 raise AuthenticationException(auth_response_json['message'])
211
212 # Step 2: Respond to the Challenge with a valid ChallengeResponse
213 challenge_request = self.__get_challenge_response_request(challenge_parameters, password)
214 challenge_data = json.dumps(challenge_request).encode()
215 challenge_headers = {
216 'X-Amz-Target': 'AWSCognitoIdentityProviderService.RespondToAuthChallenge',
217 'Content-Type': 'application/x-amz-json-1.1',
218 }
219 auth_response_json = self.ie._download_json(
220 self.url, None, data=challenge_data, headers=challenge_headers,
221 note='Authenticating password', errnote='Invalid password')
222
223 if 'message' in auth_response_json:
224 raise InvalidLoginException(auth_response_json['message'])
225 return (
226 auth_response_json['AuthenticationResult']['IdToken'],
227 auth_response_json['AuthenticationResult']['RefreshToken'],
228 )
229
230 def __get_authentication_request(self, username):
231 """
232
233 :param str username: The username to use
234
235 :return: A full Authorization request.
236 :rtype: dict
237 """
238 return {
239 'AuthParameters': {
240 'USERNAME': username,
241 'SRP_A': self.__long_to_hex(self.large_a_value),
242 },
243 'AuthFlow': 'USER_SRP_AUTH',
244 'ClientId': self.client_id,
245 }
246
247 def __get_challenge_response_request(self, challenge_parameters, password):
248 """ Create a Challenge Response Request object.
249
250 :param dict[str,str|imt] challenge_parameters: The parameters for the challenge.
251 :param str password: The password.
252
253 :return: A valid and full request data object to use as a response for a challenge.
254 :rtype: dict
255 """
256 user_id = challenge_parameters['USERNAME']
257 user_id_for_srp = challenge_parameters['USER_ID_FOR_SRP']
258 srp_b = challenge_parameters['SRP_B']
259 salt = challenge_parameters['SALT']
260 secret_block = challenge_parameters['SECRET_BLOCK']
261
262 timestamp = self.__get_current_timestamp()
263
264 # Get a HKDF key for the password, SrpB and the Salt
265 hkdf = self.__get_hkdf_key_for_password(
266 user_id_for_srp,
267 password,
268 self.__hex_to_long(srp_b),
269 salt,
270 )
271 secret_block_bytes = base64.standard_b64decode(secret_block)
272
273 # the message is a combo of the pool_id, provided SRP userId, the Secret and Timestamp
274 msg = \
275 bytearray(self.pool_id.split('_')[1], 'utf-8') + \
276 bytearray(user_id_for_srp, 'utf-8') + \
277 bytearray(secret_block_bytes) + \
278 bytearray(timestamp, 'utf-8')
279 hmac_obj = hmac.new(hkdf, msg, digestmod=hashlib.sha256)
280 signature_string = base64.standard_b64encode(hmac_obj.digest()).decode('utf-8')
281 return {
282 'ChallengeResponses': {
283 'USERNAME': user_id,
284 'TIMESTAMP': timestamp,
285 'PASSWORD_CLAIM_SECRET_BLOCK': secret_block,
286 'PASSWORD_CLAIM_SIGNATURE': signature_string,
287 },
288 'ChallengeName': 'PASSWORD_VERIFIER',
289 'ClientId': self.client_id,
290 }
291
292 def __get_hkdf_key_for_password(self, username, password, server_b_value, salt):
293 """ Calculates the final hkdf based on computed S value, and computed U value and the key.
294
295 :param str username: Username.
296 :param str password: Password.
297 :param int server_b_value: Server B value.
298 :param int salt: Generated salt.
299
300 :return Computed HKDF value.
301 :rtype: object
302 """
303
304 u_value = self.__calculate_u(self.large_a_value, server_b_value)
305 if u_value == 0:
306 raise ValueError('U cannot be zero.')
307 username_password = '{}{}:{}'.format(self.pool_id.split('_')[1], username, password)
308 username_password_hash = self.__hash_sha256(username_password.encode())
309
310 x_value = self.__hex_to_long(self.__hex_hash(self.__pad_hex(salt) + username_password_hash))
311 g_mod_pow_xn = pow(self.g, x_value, self.big_n)
312 int_value2 = server_b_value - self.k * g_mod_pow_xn
313 s_value = pow(int_value2, self.small_a_value + u_value * x_value, self.big_n)
314 return self.__compute_hkdf(
315 bytearray.fromhex(self.__pad_hex(s_value)),
316 bytearray.fromhex(self.__pad_hex(self.__long_to_hex(u_value))),
317 )
318
319 def __compute_hkdf(self, ikm, salt):
320 """ Standard hkdf algorithm
321
322 :param {Buffer} ikm Input key material.
323 :param {Buffer} salt Salt value.
324 :return {Buffer} Strong key material.
325 """
326
327 prk = hmac.new(salt, ikm, hashlib.sha256).digest()
328 info_bits_update = self.info_bits + bytearray(chr(1), 'utf-8')
329 hmac_hash = hmac.new(prk, info_bits_update, hashlib.sha256).digest()
330 return hmac_hash[:16]
331
332 def __calculate_u(self, big_a, big_b):
333 """ Calculate the client's value U which is the hash of A and B
334
335 :param int big_a: Large A value.
336 :param int big_b: Server B value.
337
338 :return Computed U value.
339 :rtype: int
340 """
341
342 u_hex_hash = self.__hex_hash(self.__pad_hex(big_a) + self.__pad_hex(big_b))
343 return self.__hex_to_long(u_hex_hash)
344
345 def __generate_random_small_a(self):
346 """ Helper function to generate a random big integer
347
348 :return a random value.
349 :rtype: int
350 """
351 random_long_int = self.__get_random(128)
352 return random_long_int % self.big_n
353
354 def __calculate_a(self):
355 """ Calculate the client's public value A = g^a%N with the generated random number a
356
357 :return Computed large A.
358 :rtype: int
359 """
360
361 big_a = pow(self.g, self.small_a_value, self.big_n)
362 # safety check
363 if (big_a % self.big_n) == 0:
364 raise ValueError('Safety check for A failed')
365 return big_a
366
367 @staticmethod
368 def __long_to_hex(long_num):
369 return f'{long_num:x}'
370
371 @staticmethod
372 def __hex_to_long(hex_string):
373 return int(hex_string, 16)
374
375 @staticmethod
376 def __hex_hash(hex_string):
377 return AwsIdp.__hash_sha256(bytearray.fromhex(hex_string))
378
379 @staticmethod
380 def __hash_sha256(buf):
381 """AuthenticationHelper.hash"""
382 digest = hashlib.sha256(buf).hexdigest()
383 return (64 - len(digest)) * '0' + digest
384
385 @staticmethod
386 def __pad_hex(long_int):
387 """ Converts a Long integer (or hex string) to hex format padded with zeroes for hashing
388
389 :param int|str long_int: Number or string to pad.
390
391 :return Padded hex string.
392 :rtype: str
393 """
394
395 if not isinstance(long_int, str):
396 hash_str = AwsIdp.__long_to_hex(long_int)
397 else:
398 hash_str = long_int
399 if len(hash_str) % 2 == 1:
400 hash_str = f'0{hash_str}'
401 elif hash_str[0] in '89ABCDEFabcdef':
402 hash_str = f'00{hash_str}'
403 return hash_str
404
405 @staticmethod
406 def __get_random(nbytes):
407 random_hex = binascii.hexlify(os.urandom(nbytes))
408 return AwsIdp.__hex_to_long(random_hex)
409
410 @staticmethod
411 def __get_current_timestamp():
412 """ Creates a timestamp with the correct English format.
413
414 :return: timestamp in format 'Sun Jan 27 19:00:04 UTC 2019'
415 :rtype: str
416 """
417
418 # We need US only data, so we cannot just do a strftime:
419 # Sun Jan 27 19:00:04 UTC 2019
420 months = [None, 'Jan', 'Feb', 'Mar', 'Apr', 'May', 'Jun', 'Jul', 'Aug', 'Sep', 'Oct', 'Nov', 'Dec']
421 days = ['Mon', 'Tue', 'Wed', 'Thu', 'Fri', 'Sat', 'Sun']
422
423 time_now = dt.datetime.now(dt.timezone.utc)
424 format_string = f'{days[time_now.weekday()]} {months[time_now.month]} {time_now.day} %H:%M:%S UTC %Y'
425 return time_now.strftime(format_string)
426
427 def __str__(self):
428 return 'AWS IDP Client for:\nRegion: {}\nPoolId: {}\nAppId: {}'.format(
429 self.region, self.pool_id.split('_')[1], self.client_id,
430 )