]> jfr.im git - yt-dlp.git/blob - yt_dlp/extractor/zee5.py
[youtube] Fix error reporting of "Incomplete data"
[yt-dlp.git] / yt_dlp / extractor / zee5.py
1 import json
2
3 from .common import InfoExtractor
4 from ..compat import compat_str
5 from ..utils import (
6 ExtractorError,
7 int_or_none,
8 parse_age_limit,
9 str_or_none,
10 try_get,
11 unified_strdate,
12 unified_timestamp,
13 url_or_none,
14 )
15
16
17 class Zee5IE(InfoExtractor):
18 _VALID_URL = r'''(?x)
19 (?:
20 zee5:|
21 https?://(?:www\.)?zee5\.com/(?:[^#?]+/)?
22 (?:
23 (?:tv-shows|kids|web-series|zee5originals)(?:/[^#/?]+){3}
24 |movies/[^#/?]+
25 )/(?P<display_id>[^#/?]+)/
26 )
27 (?P<id>[^#/?]+)/?(?:$|[?#])
28 '''
29 _TESTS = [{
30 'url': 'https://www.zee5.com/movies/details/adavari-matalaku-ardhale-verule/0-0-movie_1143162669',
31 'info_dict': {
32 'id': '0-0-movie_1143162669',
33 'ext': 'mp4',
34 'display_id': 'adavari-matalaku-ardhale-verule',
35 'title': 'Adavari Matalaku Ardhale Verule',
36 'duration': 9360,
37 'description': compat_str,
38 'alt_title': 'Adavari Matalaku Ardhale Verule',
39 'uploader': 'Zee Entertainment Enterprises Ltd',
40 'release_date': '20070427',
41 'upload_date': '20070427',
42 'timestamp': 1177632000,
43 'thumbnail': r're:^https?://.*\.jpg$',
44 'episode_number': 0,
45 'episode': 'Episode 0',
46 'tags': list
47 },
48 'params': {
49 'format': 'bv',
50 },
51 }, {
52 'url': 'https://www.zee5.com/kids/kids-shows/bandbudh-aur-budbak/0-6-1899/yoga-se-hoga-bandbudh-aur-budbak/0-1-239839',
53 'info_dict': {
54 'id': '0-1-239839',
55 'ext': 'mp4',
56 'display_id': 'yoga-se-hoga-bandbudh-aur-budbak',
57 'title': 'Yoga Se Hoga-Bandbudh aur Budbak',
58 'duration': 659,
59 'description': compat_str,
60 'alt_title': 'Yoga Se Hoga-Bandbudh aur Budbak',
61 'uploader': 'Zee Entertainment Enterprises Ltd',
62 'release_date': '20150101',
63 'upload_date': '20150101',
64 'timestamp': 1420070400,
65 'thumbnail': r're:^https?://.*\.jpg$',
66 'series': 'Bandbudh Aur Budbak',
67 'season_number': 1,
68 'episode_number': 1,
69 'episode': 'Episode 1',
70 'season': 'Season 1',
71 'tags': list,
72 },
73 'params': {
74 'format': 'bv',
75 },
76 }, {
77 'url': 'https://www.zee5.com/hi/tv-shows/details/kundali-bhagya/0-6-366/kundali-bhagya-march-08-2021/0-1-manual_7g9jv1os7730?country=IN',
78 'only_matching': True
79 }, {
80 'url': 'https://www.zee5.com/global/hi/tv-shows/details/kundali-bhagya/0-6-366/kundali-bhagya-march-08-2021/0-1-manual_7g9jv1os7730',
81 'only_matching': True
82 }, {
83 'url': 'https://www.zee5.com/web-series/details/mithya/0-6-4z587408/maine-dekhi-hai-uski-mrityu/0-1-6z587412',
84 'only_matching': True
85 }]
86 _DETAIL_API_URL = 'https://spapi.zee5.com/singlePlayback/getDetails/secure?content_id={}&device_id={}&platform_name=desktop_web&country=IN&check_parental_control=false'
87 _DEVICE_ID = '1q70TH8Wz0wTyw4buVgg000000000000'
88 _USER_TOKEN = None
89 _LOGIN_HINT = 'Use "--username <mobile_number>" to login using otp or "--username token" and "--password <user_token>" to login using user token.'
90 _NETRC_MACHINE = 'zee5'
91 _GEO_COUNTRIES = ['IN']
92
93 def _perform_login(self, username, password):
94 if len(username) == 10 and username.isdigit() and self._USER_TOKEN is None:
95 self.report_login()
96 otp_request_json = self._download_json(f'https://b2bapi.zee5.com/device/sendotp_v1.php?phoneno=91{username}',
97 None, note='Sending OTP')
98 if otp_request_json['code'] == 0:
99 self.to_screen(otp_request_json['message'])
100 else:
101 raise ExtractorError(otp_request_json['message'], expected=True)
102 otp_code = self._get_tfa_info('OTP')
103 otp_verify_json = self._download_json(f'https://b2bapi.zee5.com/device/verifyotp_v1.php?phoneno=91{username}&otp={otp_code}&guest_token={self._DEVICE_ID}&platform=web',
104 None, note='Verifying OTP', fatal=False)
105 if not otp_verify_json:
106 raise ExtractorError('Unable to verify OTP.', expected=True)
107 self._USER_TOKEN = otp_verify_json.get('token')
108 if not self._USER_TOKEN:
109 raise ExtractorError(otp_request_json['message'], expected=True)
110 elif username.lower() == 'token' and len(password) > 1198:
111 self._USER_TOKEN = password
112 else:
113 raise ExtractorError(self._LOGIN_HINT, expected=True)
114
115 def _real_extract(self, url):
116 video_id, display_id = self._match_valid_url(url).group('id', 'display_id')
117 access_token_request = self._download_json(
118 'https://useraction.zee5.com/token/platform_tokens.php?platform_name=web_app',
119 video_id, note='Downloading access token')
120 data = {
121 'x-access-token': access_token_request['token']
122 }
123 if self._USER_TOKEN:
124 data['Authorization'] = 'bearer %s' % self._USER_TOKEN
125 else:
126 data['X-Z5-Guest-Token'] = self._DEVICE_ID
127
128 json_data = self._download_json(
129 self._DETAIL_API_URL.format(video_id, self._DEVICE_ID),
130 video_id, headers={'content-type': 'application/json'}, data=json.dumps(data).encode('utf-8'))
131 asset_data = json_data['assetDetails']
132 show_data = json_data.get('showDetails', {})
133 if 'premium' in asset_data['business_type']:
134 raise ExtractorError('Premium content is DRM protected.', expected=True)
135 if not asset_data.get('hls_url'):
136 self.raise_login_required(self._LOGIN_HINT, metadata_available=True, method=None)
137 formats, m3u8_subs = self._extract_m3u8_formats_and_subtitles(asset_data['hls_url'], video_id, 'mp4', fatal=False)
138 self._sort_formats(formats)
139
140 subtitles = {}
141 for sub in asset_data.get('subtitle_url', []):
142 sub_url = sub.get('url')
143 if not sub_url:
144 continue
145 subtitles.setdefault(sub.get('language', 'en'), []).append({
146 'url': self._proto_relative_url(sub_url),
147 })
148 subtitles = self._merge_subtitles(subtitles, m3u8_subs)
149 return {
150 'id': video_id,
151 'display_id': display_id,
152 'title': asset_data['title'],
153 'formats': formats,
154 'subtitles': subtitles,
155 'duration': int_or_none(asset_data.get('duration')),
156 'description': str_or_none(asset_data.get('description')),
157 'alt_title': str_or_none(asset_data.get('original_title')),
158 'uploader': str_or_none(asset_data.get('content_owner')),
159 'age_limit': parse_age_limit(asset_data.get('age_rating')),
160 'release_date': unified_strdate(asset_data.get('release_date')),
161 'timestamp': unified_timestamp(asset_data.get('release_date')),
162 'thumbnail': url_or_none(asset_data.get('image_url')),
163 'series': str_or_none(asset_data.get('tvshow_name')),
164 'season': try_get(show_data, lambda x: x['seasons']['title'], str),
165 'season_number': int_or_none(try_get(show_data, lambda x: x['seasons'][0]['orderid'])),
166 'episode_number': int_or_none(try_get(asset_data, lambda x: x['orderid'])),
167 'tags': try_get(asset_data, lambda x: x['tags'], list)
168 }
169
170
171 class Zee5SeriesIE(InfoExtractor):
172 IE_NAME = 'zee5:series'
173 _VALID_URL = r'''(?x)
174 (?:
175 zee5:series:|
176 https?://(?:www\.)?zee5\.com/(?:[^#?]+/)?
177 (?:tv-shows|web-series|kids|zee5originals)(?:/[^#/?]+){2}/
178 )
179 (?P<id>[^#/?]+)(?:/episodes)?/?(?:$|[?#])
180 '''
181 _TESTS = [{
182 'url': 'https://www.zee5.com/kids/kids-shows/bandbudh-aur-budbak/0-6-1899',
183 'playlist_mincount': 156,
184 'info_dict': {
185 'id': '0-6-1899',
186 },
187 }, {
188 'url': 'https://www.zee5.com/tv-shows/details/bhabi-ji-ghar-par-hai/0-6-199',
189 'playlist_mincount': 1500,
190 'info_dict': {
191 'id': '0-6-199',
192 },
193 }, {
194 'url': 'https://www.zee5.com/tv-shows/details/agent-raghav-crime-branch/0-6-965',
195 'playlist_mincount': 24,
196 'info_dict': {
197 'id': '0-6-965',
198 },
199 }, {
200 'url': 'https://www.zee5.com/ta/tv-shows/details/nagabhairavi/0-6-3201',
201 'playlist_mincount': 3,
202 'info_dict': {
203 'id': '0-6-3201',
204 },
205 }, {
206 'url': 'https://www.zee5.com/global/hi/tv-shows/details/khwaabon-ki-zamin-par/0-6-270',
207 'playlist_mincount': 150,
208 'info_dict': {
209 'id': '0-6-270',
210 },
211 }, {
212 'url': 'https://www.zee5.com/tv-shows/details/chala-hawa-yeu-dya-ladies-zindabaad/0-6-2943/episodes',
213 'only_matching': True,
214 }, {
215 'url': 'https://www.zee5.com/web-series/details/mithya/0-6-4z587408',
216 'only_matching': True,
217 }]
218
219 def _entries(self, show_id):
220 access_token_request = self._download_json(
221 'https://useraction.zee5.com/token/platform_tokens.php?platform_name=web_app',
222 show_id, note='Downloading access token')
223 headers = {
224 'X-Access-Token': access_token_request['token'],
225 'Referer': 'https://www.zee5.com/',
226 }
227 show_url = f'https://gwapi.zee5.com/content/tvshow/{show_id}?translation=en&country=IN'
228
229 page_num = 0
230 show_json = self._download_json(show_url, video_id=show_id, headers=headers)
231 for season in show_json.get('seasons') or []:
232 season_id = try_get(season, lambda x: x['id'], compat_str)
233 next_url = f'https://gwapi.zee5.com/content/tvshow/?season_id={season_id}&type=episode&translation=en&country=IN&on_air=false&asset_subtype=tvshow&page=1&limit=100'
234 while next_url:
235 page_num += 1
236 episodes_json = self._download_json(
237 next_url, video_id=show_id, headers=headers,
238 note='Downloading JSON metadata page %d' % page_num)
239 for episode in try_get(episodes_json, lambda x: x['episode'], list) or []:
240 video_id = episode.get('id')
241 yield self.url_result(
242 'zee5:%s' % video_id,
243 ie=Zee5IE.ie_key(), video_id=video_id)
244 next_url = url_or_none(episodes_json.get('next_episode_api'))
245
246 def _real_extract(self, url):
247 show_id = self._match_id(url)
248 return self.playlist_result(self._entries(show_id), playlist_id=show_id)