]> jfr.im git - yt-dlp.git/blame - yt_dlp/extractor/goplay.py
[ie/youtube] Suppress "Unavailable videos are hidden" warning (#10159)
[yt-dlp.git] / yt_dlp / extractor / goplay.py
CommitLineData
fada8272
JJ
1import base64
2import binascii
c305a25c 3import datetime as dt
fada8272
JJ
4import hashlib
5import hmac
6import json
7import os
8
9from .common import InfoExtractor
10from ..utils import (
11 ExtractorError,
12 traverse_obj,
13 unescapeHTML,
14)
15
16
17class GoPlayIE(InfoExtractor):
18 _VALID_URL = r'https?://(www\.)?goplay\.be/video/([^/]+/[^/]+/|)(?P<display_id>[^/#]+)'
19
20 _NETRC_MACHINE = 'goplay'
21
22 _TESTS = [{
23 'url': 'https://www.goplay.be/video/de-container-cup/de-container-cup-s3/de-container-cup-s3-aflevering-2#autoplay',
24 'info_dict': {
25 'id': '9c4214b8-e55d-4e4b-a446-f015f6c6f811',
26 'ext': 'mp4',
27 'title': 'S3 - Aflevering 2',
28 'series': 'De Container Cup',
29 'season': 'Season 3',
30 'season_number': 3,
31 'episode': 'Episode 2',
32 'episode_number': 2,
33 },
add96eb9 34 'skip': 'This video is only available for registered users',
fada8272
JJ
35 }, {
36 'url': 'https://www.goplay.be/video/a-family-for-thr-holidays-s1-aflevering-1#autoplay',
37 'info_dict': {
38 'id': '74e3ed07-748c-49e4-85a0-393a93337dbf',
39 'ext': 'mp4',
40 'title': 'A Family for the Holidays',
41 },
add96eb9 42 'skip': 'This video is only available for registered users',
7e90e34f 43 }, {
44 'url': 'https://www.goplay.be/video/de-mol/de-mol-s11/de-mol-s11-aflevering-1#autoplay',
45 'info_dict': {
46 'id': '03eb8f2f-153e-41cb-9805-0d3a29dab656',
47 'ext': 'mp4',
48 'title': 'S11 - Aflevering 1',
49 'episode': 'Episode 1',
50 'series': 'De Mol',
51 'season_number': 11,
52 'episode_number': 1,
add96eb9 53 'season': 'Season 11',
7e90e34f 54 },
55 'params': {
add96eb9 56 'skip_download': True,
7e90e34f 57 },
add96eb9 58 'skip': 'This video is only available for registered users',
fada8272
JJ
59 }]
60
61 _id_token = None
62
63 def _perform_login(self, username, password):
64 self.report_login()
65 aws = AwsIdp(ie=self, pool_id='eu-west-1_dViSsKM5Y', client_id='6s1h851s8uplco5h6mqh1jac8m')
66 self._id_token, _ = aws.authenticate(username=username, password=password)
67
68 def _real_initialize(self):
69 if not self._id_token:
70 raise self.raise_login_required(method='password')
71
72 def _real_extract(self, url):
73 url, display_id = self._match_valid_url(url).group(0, 'display_id')
74 webpage = self._download_webpage(url, display_id)
75 video_data_json = self._html_search_regex(r'<div\s+data-hero="([^"]+)"', webpage, 'video_data')
76 video_data = self._parse_json(unescapeHTML(video_data_json), display_id).get('data')
77
78 movie = video_data.get('movie')
79 if movie:
80 video_id = movie['videoUuid']
81 info_dict = {
add96eb9 82 'title': movie.get('title'),
fada8272
JJ
83 }
84 else:
85 episode = traverse_obj(video_data, ('playlists', ..., 'episodes', lambda _, v: v['pageInfo']['url'] == url), get_all=False)
86 video_id = episode['videoUuid']
87 info_dict = {
88 'title': episode.get('episodeTitle'),
89 'series': traverse_obj(episode, ('program', 'title')),
90 'season_number': episode.get('seasonNumber'),
91 'episode_number': episode.get('episodeNumber'),
92 }
93
94 api = self._download_json(
d27bde98 95 f'https://api.goplay.be/web/v1/videos/long-form/{video_id}',
7e90e34f 96 video_id, headers={
add96eb9 97 'Authorization': f'Bearer {self._id_token}',
7e90e34f 98 **self.geo_verification_headers(),
99 })
100
101 if 'manifestUrls' in api:
102 formats, subtitles = self._extract_m3u8_formats_and_subtitles(
103 api['manifestUrls']['hls'], video_id, ext='mp4', m3u8_id='HLS')
fada8272 104
7e90e34f 105 else:
106 if 'ssai' not in api:
107 raise ExtractorError('expecting Google SSAI stream')
108
109 ssai_content_source_id = api['ssai']['contentSourceID']
110 ssai_video_id = api['ssai']['videoID']
111
112 dai = self._download_json(
113 f'https://dai.google.com/ondemand/dash/content/{ssai_content_source_id}/vid/{ssai_video_id}/streams',
114 video_id, data=b'{"api-key":"null"}',
115 headers={'content-type': 'application/json'})
116
117 periods = self._extract_mpd_periods(dai['stream_manifest'], video_id)
118
119 # skip pre-roll and mid-roll ads
120 periods = [p for p in periods if '-ad-' not in p['id']]
121
122 formats, subtitles = self._merge_mpd_periods(periods)
fada8272
JJ
123
124 info_dict.update({
125 'id': video_id,
126 'formats': formats,
7e90e34f 127 'subtitles': subtitles,
fada8272 128 })
fada8272
JJ
129 return info_dict
130
131
132# Taken from https://github.com/add-ons/plugin.video.viervijfzes/blob/master/resources/lib/viervijfzes/auth_awsidp.py
133# Released into Public domain by https://github.com/michaelarnauts
134
135class InvalidLoginException(ExtractorError):
136 """ The login credentials are invalid """
137
138
139class AuthenticationException(ExtractorError):
140 """ Something went wrong while logging in """
141
142
143class AwsIdp:
144 """ AWS Identity Provider """
145
146 def __init__(self, ie, pool_id, client_id):
147 """
148 :param InfoExtrator ie: The extractor that instantiated this class.
149 :param str pool_id: The AWS user pool to connect to (format: <region>_<poolid>).
150 E.g.: eu-west-1_aLkOfYN3T
151 :param str client_id: The client application ID (the ID of the application connecting)
152 """
153
154 self.ie = ie
155
156 self.pool_id = pool_id
add96eb9 157 if '_' not in self.pool_id:
158 raise ValueError('Invalid pool_id format. Should be <region>_<poolid>.')
fada8272
JJ
159
160 self.client_id = client_id
add96eb9 161 self.region = self.pool_id.split('_')[0]
162 self.url = f'https://cognito-idp.{self.region}.amazonaws.com/'
fada8272
JJ
163
164 # Initialize the values
165 # https://github.com/aws/amazon-cognito-identity-js/blob/master/src/AuthenticationHelper.js#L22
add96eb9 166 self.n_hex = (
167 'FFFFFFFFFFFFFFFFC90FDAA22168C234C4C6628B80DC1CD1'
168 '29024E088A67CC74020BBEA63B139B22514A08798E3404DD'
169 'EF9519B3CD3A431B302B0A6DF25F14374FE1356D6D51C245'
170 'E485B576625E7EC6F44C42E9A637ED6B0BFF5CB6F406B7ED'
171 'EE386BFB5A899FA5AE9F24117C4B1FE649286651ECE45B3D'
172 'C2007CB8A163BF0598DA48361C55D39A69163FA8FD24CF5F'
173 '83655D23DCA3AD961C62F356208552BB9ED529077096966D'
174 '670C354E4ABC9804F1746C08CA18217C32905E462E36CE3B'
175 'E39E772C180E86039B2783A2EC07A28FB5C55DF06F4C52C9'
176 'DE2BCBF6955817183995497CEA956AE515D2261898FA0510'
177 '15728E5A8AAAC42DAD33170D04507A33A85521ABDF1CBA64'
178 'ECFB850458DBEF0A8AEA71575D060C7DB3970F85A6E1E4C7'
179 'ABF5AE8CDB0933D71E8C94E04A25619DCEE3D2261AD2EE6B'
180 'F12FFA06D98A0864D87602733EC86A64521F2B18177B200C'
181 'BBE117577A615D6C770988C0BAD946E208E24FA074E5AB31'
182 '43DB5BFCE0FD108E4B82D120A93AD2CAFFFFFFFFFFFFFFFF')
fada8272
JJ
183
184 # https://github.com/aws/amazon-cognito-identity-js/blob/master/src/AuthenticationHelper.js#L49
185 self.g_hex = '2'
186 self.info_bits = bytearray('Caldera Derived Key', 'utf-8')
187
188 self.big_n = self.__hex_to_long(self.n_hex)
189 self.g = self.__hex_to_long(self.g_hex)
190 self.k = self.__hex_to_long(self.__hex_hash('00' + self.n_hex + '0' + self.g_hex))
191 self.small_a_value = self.__generate_random_small_a()
192 self.large_a_value = self.__calculate_a()
193
194 def authenticate(self, username, password):
195 """ Authenticate with a username and password. """
196 # Step 1: First initiate an authentication request
197 auth_data_dict = self.__get_authentication_request(username)
add96eb9 198 auth_data = json.dumps(auth_data_dict).encode()
fada8272 199 auth_headers = {
add96eb9 200 'X-Amz-Target': 'AWSCognitoIdentityProviderService.InitiateAuth',
201 'Accept-Encoding': 'identity',
202 'Content-Type': 'application/x-amz-json-1.1',
fada8272
JJ
203 }
204 auth_response_json = self.ie._download_json(
205 self.url, None, data=auth_data, headers=auth_headers,
206 note='Authenticating username', errnote='Invalid username')
add96eb9 207 challenge_parameters = auth_response_json.get('ChallengeParameters')
fada8272 208
add96eb9 209 if auth_response_json.get('ChallengeName') != 'PASSWORD_VERIFIER':
210 raise AuthenticationException(auth_response_json['message'])
fada8272
JJ
211
212 # Step 2: Respond to the Challenge with a valid ChallengeResponse
213 challenge_request = self.__get_challenge_response_request(challenge_parameters, password)
add96eb9 214 challenge_data = json.dumps(challenge_request).encode()
fada8272 215 challenge_headers = {
add96eb9 216 'X-Amz-Target': 'AWSCognitoIdentityProviderService.RespondToAuthChallenge',
217 'Content-Type': 'application/x-amz-json-1.1',
fada8272
JJ
218 }
219 auth_response_json = self.ie._download_json(
220 self.url, None, data=challenge_data, headers=challenge_headers,
221 note='Authenticating password', errnote='Invalid password')
222
223 if 'message' in auth_response_json:
224 raise InvalidLoginException(auth_response_json['message'])
225 return (
226 auth_response_json['AuthenticationResult']['IdToken'],
add96eb9 227 auth_response_json['AuthenticationResult']['RefreshToken'],
fada8272
JJ
228 )
229
230 def __get_authentication_request(self, username):
231 """
232
233 :param str username: The username to use
234
235 :return: A full Authorization request.
236 :rtype: dict
237 """
add96eb9 238 return {
239 'AuthParameters': {
240 'USERNAME': username,
241 'SRP_A': self.__long_to_hex(self.large_a_value),
fada8272 242 },
add96eb9 243 'AuthFlow': 'USER_SRP_AUTH',
244 'ClientId': self.client_id,
fada8272 245 }
fada8272
JJ
246
247 def __get_challenge_response_request(self, challenge_parameters, password):
248 """ Create a Challenge Response Request object.
249
250 :param dict[str,str|imt] challenge_parameters: The parameters for the challenge.
251 :param str password: The password.
252
253 :return: A valid and full request data object to use as a response for a challenge.
254 :rtype: dict
255 """
add96eb9 256 user_id = challenge_parameters['USERNAME']
257 user_id_for_srp = challenge_parameters['USER_ID_FOR_SRP']
258 srp_b = challenge_parameters['SRP_B']
259 salt = challenge_parameters['SALT']
260 secret_block = challenge_parameters['SECRET_BLOCK']
fada8272
JJ
261
262 timestamp = self.__get_current_timestamp()
263
264 # Get a HKDF key for the password, SrpB and the Salt
265 hkdf = self.__get_hkdf_key_for_password(
266 user_id_for_srp,
267 password,
268 self.__hex_to_long(srp_b),
add96eb9 269 salt,
fada8272
JJ
270 )
271 secret_block_bytes = base64.standard_b64decode(secret_block)
272
273 # the message is a combo of the pool_id, provided SRP userId, the Secret and Timestamp
274 msg = \
275 bytearray(self.pool_id.split('_')[1], 'utf-8') + \
276 bytearray(user_id_for_srp, 'utf-8') + \
277 bytearray(secret_block_bytes) + \
278 bytearray(timestamp, 'utf-8')
279 hmac_obj = hmac.new(hkdf, msg, digestmod=hashlib.sha256)
280 signature_string = base64.standard_b64encode(hmac_obj.digest()).decode('utf-8')
add96eb9 281 return {
282 'ChallengeResponses': {
283 'USERNAME': user_id,
284 'TIMESTAMP': timestamp,
285 'PASSWORD_CLAIM_SECRET_BLOCK': secret_block,
286 'PASSWORD_CLAIM_SIGNATURE': signature_string,
fada8272 287 },
add96eb9 288 'ChallengeName': 'PASSWORD_VERIFIER',
289 'ClientId': self.client_id,
fada8272 290 }
fada8272
JJ
291
292 def __get_hkdf_key_for_password(self, username, password, server_b_value, salt):
293 """ Calculates the final hkdf based on computed S value, and computed U value and the key.
294
295 :param str username: Username.
296 :param str password: Password.
297 :param int server_b_value: Server B value.
298 :param int salt: Generated salt.
299
300 :return Computed HKDF value.
301 :rtype: object
302 """
303
304 u_value = self.__calculate_u(self.large_a_value, server_b_value)
305 if u_value == 0:
306 raise ValueError('U cannot be zero.')
add96eb9 307 username_password = '{}{}:{}'.format(self.pool_id.split('_')[1], username, password)
308 username_password_hash = self.__hash_sha256(username_password.encode())
fada8272
JJ
309
310 x_value = self.__hex_to_long(self.__hex_hash(self.__pad_hex(salt) + username_password_hash))
311 g_mod_pow_xn = pow(self.g, x_value, self.big_n)
312 int_value2 = server_b_value - self.k * g_mod_pow_xn
313 s_value = pow(int_value2, self.small_a_value + u_value * x_value, self.big_n)
add96eb9 314 return self.__compute_hkdf(
fada8272 315 bytearray.fromhex(self.__pad_hex(s_value)),
add96eb9 316 bytearray.fromhex(self.__pad_hex(self.__long_to_hex(u_value))),
fada8272 317 )
fada8272
JJ
318
319 def __compute_hkdf(self, ikm, salt):
320 """ Standard hkdf algorithm
321
322 :param {Buffer} ikm Input key material.
323 :param {Buffer} salt Salt value.
324 :return {Buffer} Strong key material.
325 """
326
327 prk = hmac.new(salt, ikm, hashlib.sha256).digest()
328 info_bits_update = self.info_bits + bytearray(chr(1), 'utf-8')
329 hmac_hash = hmac.new(prk, info_bits_update, hashlib.sha256).digest()
330 return hmac_hash[:16]
331
332 def __calculate_u(self, big_a, big_b):
333 """ Calculate the client's value U which is the hash of A and B
334
335 :param int big_a: Large A value.
336 :param int big_b: Server B value.
337
338 :return Computed U value.
339 :rtype: int
340 """
341
342 u_hex_hash = self.__hex_hash(self.__pad_hex(big_a) + self.__pad_hex(big_b))
343 return self.__hex_to_long(u_hex_hash)
344
345 def __generate_random_small_a(self):
346 """ Helper function to generate a random big integer
347
348 :return a random value.
349 :rtype: int
350 """
351 random_long_int = self.__get_random(128)
352 return random_long_int % self.big_n
353
354 def __calculate_a(self):
355 """ Calculate the client's public value A = g^a%N with the generated random number a
356
357 :return Computed large A.
358 :rtype: int
359 """
360
361 big_a = pow(self.g, self.small_a_value, self.big_n)
362 # safety check
363 if (big_a % self.big_n) == 0:
364 raise ValueError('Safety check for A failed')
365 return big_a
366
367 @staticmethod
368 def __long_to_hex(long_num):
add96eb9 369 return f'{long_num:x}'
fada8272
JJ
370
371 @staticmethod
372 def __hex_to_long(hex_string):
373 return int(hex_string, 16)
374
375 @staticmethod
376 def __hex_hash(hex_string):
377 return AwsIdp.__hash_sha256(bytearray.fromhex(hex_string))
378
379 @staticmethod
380 def __hash_sha256(buf):
381 """AuthenticationHelper.hash"""
382 digest = hashlib.sha256(buf).hexdigest()
383 return (64 - len(digest)) * '0' + digest
384
385 @staticmethod
386 def __pad_hex(long_int):
387 """ Converts a Long integer (or hex string) to hex format padded with zeroes for hashing
388
389 :param int|str long_int: Number or string to pad.
390
391 :return Padded hex string.
392 :rtype: str
393 """
394
395 if not isinstance(long_int, str):
396 hash_str = AwsIdp.__long_to_hex(long_int)
397 else:
398 hash_str = long_int
399 if len(hash_str) % 2 == 1:
add96eb9 400 hash_str = f'0{hash_str}'
fada8272 401 elif hash_str[0] in '89ABCDEFabcdef':
add96eb9 402 hash_str = f'00{hash_str}'
fada8272
JJ
403 return hash_str
404
405 @staticmethod
406 def __get_random(nbytes):
407 random_hex = binascii.hexlify(os.urandom(nbytes))
408 return AwsIdp.__hex_to_long(random_hex)
409
410 @staticmethod
411 def __get_current_timestamp():
412 """ Creates a timestamp with the correct English format.
413
414 :return: timestamp in format 'Sun Jan 27 19:00:04 UTC 2019'
415 :rtype: str
416 """
417
418 # We need US only data, so we cannot just do a strftime:
419 # Sun Jan 27 19:00:04 UTC 2019
420 months = [None, 'Jan', 'Feb', 'Mar', 'Apr', 'May', 'Jun', 'Jul', 'Aug', 'Sep', 'Oct', 'Nov', 'Dec']
421 days = ['Mon', 'Tue', 'Wed', 'Thu', 'Fri', 'Sat', 'Sun']
422
c305a25c 423 time_now = dt.datetime.now(dt.timezone.utc)
add96eb9 424 format_string = f'{days[time_now.weekday()]} {months[time_now.month]} {time_now.day} %H:%M:%S UTC %Y'
425 return time_now.strftime(format_string)
fada8272
JJ
426
427 def __str__(self):
add96eb9 428 return 'AWS IDP Client for:\nRegion: {}\nPoolId: {}\nAppId: {}'.format(
429 self.region, self.pool_id.split('_')[1], self.client_id,
fada8272 430 )