]> jfr.im git - yt-dlp.git/blob - yt_dlp/extractor/sonyliv.py
[ie/orf:on] Improve extraction (#9677)
[yt-dlp.git] / yt_dlp / extractor / sonyliv.py
1 import datetime as dt
2 import itertools
3 import json
4 import math
5 import random
6 import time
7 import uuid
8
9 from .common import InfoExtractor
10 from ..networking.exceptions import HTTPError
11 from ..utils import (
12 ExtractorError,
13 int_or_none,
14 jwt_decode_hs256,
15 try_call,
16 )
17 from ..utils.traversal import traverse_obj
18
19
20 class SonyLIVIE(InfoExtractor):
21 _VALID_URL = r'''(?x)
22 (?:
23 sonyliv:|
24 https?://(?:www\.)?sonyliv\.com/(?:s(?:how|port)s/[^/]+|movies|clip|trailer|music-videos)/[^/?#&]+-
25 )
26 (?P<id>\d+)
27 '''
28 _TESTS = [{
29 'url': 'https://www.sonyliv.com/shows/bachelors-delight-1700000113/achaari-cheese-toast-1000022678?watch=true',
30 'info_dict': {
31 'title': 'Achaari Cheese Toast',
32 'id': '1000022678',
33 'ext': 'mp4',
34 'upload_date': '20200411',
35 'description': 'md5:3957fa31d9309bf336ceb3f37ad5b7cb',
36 'timestamp': 1586632091,
37 'duration': 185,
38 'season_number': 1,
39 'series': 'Bachelors Delight',
40 'episode_number': 1,
41 'release_year': 2016,
42 },
43 'params': {
44 'skip_download': True,
45 },
46 }, {
47 'url': 'https://www.sonyliv.com/movies/tahalka-1000050121?watch=true',
48 'only_matching': True,
49 }, {
50 'url': 'https://www.sonyliv.com/clip/jigarbaaz-1000098925',
51 'only_matching': True,
52 }, {
53 'url': 'https://www.sonyliv.com/trailer/sandwiched-forever-1000100286?watch=true',
54 'only_matching': True,
55 }, {
56 'url': 'https://www.sonyliv.com/sports/india-tour-of-australia-2020-21-1700000286/cricket-hls-day-3-1st-test-aus-vs-ind-19-dec-2020-1000100959?watch=true',
57 'only_matching': True,
58 }, {
59 'url': 'https://www.sonyliv.com/music-videos/yeh-un-dinon-ki-baat-hai-1000018779',
60 'only_matching': True,
61 }]
62 _GEO_COUNTRIES = ['IN']
63 _HEADERS = {}
64 _LOGIN_HINT = 'Use "--username <mobile_number>" to login using OTP or "--username token --password <auth_token>" to login using auth token.'
65 _NETRC_MACHINE = 'sonyliv'
66
67 def _get_device_id(self):
68 e = int(time.time() * 1000)
69 t = list('xxxxxxxxxxxx4xxxyxxxxxxxxxxxxxxx')
70 for i, c in enumerate(t):
71 n = int((e + 16 * random.random()) % 16) | 0
72 e = math.floor(e / 16)
73 if c == 'x':
74 t[i] = str(n)
75 elif c == 'y':
76 t[i] = '{:x}'.format(3 & n | 8)
77 return ''.join(t) + '-' + str(int(time.time() * 1000))
78
79 def _perform_login(self, username, password):
80 self._HEADERS['device_id'] = self._get_device_id()
81 self._HEADERS['content-type'] = 'application/json'
82
83 if username.lower() == 'token' and try_call(lambda: jwt_decode_hs256(password)):
84 self._HEADERS['authorization'] = password
85 self.report_login()
86 return
87 elif len(username) != 10 or not username.isdigit():
88 raise ExtractorError(f'Invalid username/password; {self._LOGIN_HINT}')
89
90 self.report_login()
91 otp_request_json = self._download_json(
92 'https://apiv2.sonyliv.com/AGL/1.6/A/ENG/WEB/IN/HR/CREATEOTP-V2',
93 None, note='Sending OTP', headers=self._HEADERS, data=json.dumps({
94 'mobileNumber': username,
95 'channelPartnerID': 'MSMIND',
96 'country': 'IN',
97 'timestamp': dt.datetime.now().strftime('%Y-%m-%dT%H:%M:%S.%MZ'),
98 'otpSize': 6,
99 'loginType': 'REGISTERORSIGNIN',
100 'isMobileMandatory': True,
101 }).encode())
102 if otp_request_json['resultCode'] == 'KO':
103 raise ExtractorError(otp_request_json['message'], expected=True)
104
105 otp_verify_json = self._download_json(
106 'https://apiv2.sonyliv.com/AGL/2.0/A/ENG/WEB/IN/HR/CONFIRMOTP-V2',
107 None, note='Verifying OTP', headers=self._HEADERS, data=json.dumps({
108 'channelPartnerID': 'MSMIND',
109 'mobileNumber': username,
110 'country': 'IN',
111 'otp': self._get_tfa_info('OTP'),
112 'dmaId': 'IN',
113 'ageConfirmation': True,
114 'timestamp': dt.datetime.now().strftime('%Y-%m-%dT%H:%M:%S.%MZ'),
115 'isMobileMandatory': True,
116 }).encode())
117 if otp_verify_json['resultCode'] == 'KO':
118 raise ExtractorError(otp_request_json['message'], expected=True)
119 self._HEADERS['authorization'] = otp_verify_json['resultObj']['accessToken']
120
121 def _call_api(self, version, path, video_id):
122 try:
123 return self._download_json(
124 'https://apiv2.sonyliv.com/AGL/%s/A/ENG/WEB/%s' % (version, path),
125 video_id, headers=self._HEADERS)['resultObj']
126 except ExtractorError as e:
127 if isinstance(e.cause, HTTPError) and e.cause.status == 406 and self._parse_json(
128 e.cause.response.read().decode(), video_id)['message'] == 'Please subscribe to watch this content':
129 self.raise_login_required(self._LOGIN_HINT, method=None)
130 if isinstance(e.cause, HTTPError) and e.cause.status == 403:
131 message = self._parse_json(
132 e.cause.response.read().decode(), video_id)['message']
133 if message == 'Geoblocked Country':
134 self.raise_geo_restricted(countries=self._GEO_COUNTRIES)
135 raise ExtractorError(message)
136 raise
137
138 def _initialize_pre_login(self):
139 self._HEADERS['security_token'] = self._call_api('1.4', 'ALL/GETTOKEN', None)
140
141 def _real_extract(self, url):
142 video_id = self._match_id(url)
143 content = self._call_api(
144 '1.5', 'IN/CONTENT/VIDEOURL/VOD/' + video_id, video_id)
145 if not self.get_param('allow_unplayable_formats') and content.get('isEncrypted'):
146 self.report_drm(video_id)
147 dash_url = content['videoURL']
148 headers = {
149 'x-playback-session-id': '%s-%d' % (uuid.uuid4().hex, time.time() * 1000)
150 }
151 formats = self._extract_mpd_formats(
152 dash_url, video_id, mpd_id='dash', headers=headers, fatal=False)
153 formats.extend(self._extract_m3u8_formats(
154 dash_url.replace('.mpd', '.m3u8').replace('/DASH/', '/HLS/'),
155 video_id, 'mp4', m3u8_id='hls', headers=headers, fatal=False))
156 for f in formats:
157 f.setdefault('http_headers', {}).update(headers)
158
159 metadata = self._call_api(
160 '1.6', 'IN/DETAIL/' + video_id, video_id)['containers'][0]['metadata']
161 title = metadata['episodeTitle']
162 subtitles = {}
163 for sub in content.get('subtitle', []):
164 sub_url = sub.get('subtitleUrl')
165 if not sub_url:
166 continue
167 subtitles.setdefault(sub.get('subtitleLanguageName', 'ENG'), []).append({
168 'url': sub_url,
169 })
170 return {
171 'id': video_id,
172 'title': title,
173 'formats': formats,
174 'thumbnail': content.get('posterURL'),
175 'description': metadata.get('longDescription') or metadata.get('shortDescription'),
176 'timestamp': int_or_none(metadata.get('creationDate'), 1000),
177 'duration': int_or_none(metadata.get('duration')),
178 'season_number': int_or_none(metadata.get('season')),
179 'series': metadata.get('title'),
180 'episode_number': int_or_none(metadata.get('episodeNumber')),
181 'release_year': int_or_none(metadata.get('year')),
182 'subtitles': subtitles,
183 }
184
185
186 class SonyLIVSeriesIE(InfoExtractor):
187 _VALID_URL = r'https?://(?:www\.)?sonyliv\.com/shows/[^/?#&]+-(?P<id>\d{10})/?(?:$|[?#])'
188 _TESTS = [{
189 'url': 'https://www.sonyliv.com/shows/adaalat-1700000091',
190 'playlist_mincount': 452,
191 'info_dict': {
192 'id': '1700000091',
193 },
194 }, {
195 'url': 'https://www.sonyliv.com/shows/beyhadh-1700000007/',
196 'playlist_mincount': 358,
197 'info_dict': {
198 'id': '1700000007',
199 },
200 }]
201 _API_BASE = 'https://apiv2.sonyliv.com/AGL'
202
203 def _entries(self, show_id):
204 headers = {
205 'Accept': 'application/json, text/plain, */*',
206 'Referer': 'https://www.sonyliv.com',
207 }
208 headers['security_token'] = self._download_json(
209 f'{self._API_BASE}/1.4/A/ENG/WEB/ALL/GETTOKEN', show_id,
210 'Downloading security token', headers=headers)['resultObj']
211 seasons = traverse_obj(self._download_json(
212 f'{self._API_BASE}/1.9/R/ENG/WEB/IN/DL/DETAIL/{show_id}', show_id,
213 'Downloading series JSON', headers=headers, query={
214 'kids_safe': 'false',
215 'from': '0',
216 'to': '49',
217 }), ('resultObj', 'containers', 0, 'containers', lambda _, v: int_or_none(v['id'])))
218 for season in seasons:
219 season_id = str(season['id'])
220 note = traverse_obj(season, ('metadata', 'title', {str})) or 'season'
221 cursor = 0
222 for page_num in itertools.count(1):
223 episodes = traverse_obj(self._download_json(
224 f'{self._API_BASE}/1.4/R/ENG/WEB/IN/CONTENT/DETAIL/BUNDLE/{season_id}',
225 season_id, f'Downloading {note} page {page_num} JSON', headers=headers, query={
226 'from': str(cursor),
227 'to': str(cursor + 99),
228 'orderBy': 'episodeNumber',
229 'sortOrder': 'asc',
230 }), ('resultObj', 'containers', 0, 'containers', lambda _, v: int_or_none(v['id'])))
231 if not episodes:
232 break
233 for episode in episodes:
234 video_id = str(episode['id'])
235 yield self.url_result(f'sonyliv:{video_id}', SonyLIVIE, video_id)
236 cursor += 100
237
238 def _real_extract(self, url):
239 show_id = self._match_id(url)
240 return self.playlist_result(self._entries(show_id), playlist_id=show_id)