]> jfr.im git - yt-dlp.git/blob - yt_dlp/extractor/zee5.py
[extractor/youtube] Fix continuation loop with no comments (#7148)
[yt-dlp.git] / yt_dlp / extractor / zee5.py
1 import json
2 import random
3 import string
4
5 from .common import InfoExtractor
6 from ..compat import compat_str
7 from ..utils import (
8 ExtractorError,
9 int_or_none,
10 parse_age_limit,
11 str_or_none,
12 try_get,
13 unified_strdate,
14 unified_timestamp,
15 url_or_none,
16 )
17
18
19 class Zee5IE(InfoExtractor):
20 _VALID_URL = r'''(?x)
21 (?:
22 zee5:|
23 https?://(?:www\.)?zee5\.com/(?:[^#?]+/)?
24 (?:
25 (?:tv-shows|kids|web-series|zee5originals)(?:/[^#/?]+){3}
26 |(?:movies|kids|videos|news|music-videos)/(?!kids-shows)[^#/?]+
27 )/(?P<display_id>[^#/?]+)/
28 )
29 (?P<id>[^#/?]+)/?(?:$|[?#])
30 '''
31 _TESTS = [{
32 'url': 'https://www.zee5.com/movies/details/adavari-matalaku-ardhale-verule/0-0-movie_1143162669',
33 'info_dict': {
34 'id': '0-0-movie_1143162669',
35 'ext': 'mp4',
36 'display_id': 'adavari-matalaku-ardhale-verule',
37 'title': 'Adavari Matalaku Ardhale Verule',
38 'duration': 9360,
39 'description': compat_str,
40 'alt_title': 'Adavari Matalaku Ardhale Verule',
41 'uploader': 'Zee Entertainment Enterprises Ltd',
42 'release_date': '20070427',
43 'upload_date': '20070427',
44 'timestamp': 1177632000,
45 'thumbnail': r're:^https?://.*\.jpg$',
46 'episode_number': 0,
47 'episode': 'Episode 0',
48 'tags': list
49 },
50 'params': {
51 'format': 'bv',
52 },
53 }, {
54 'url': 'https://www.zee5.com/kids/kids-shows/bandbudh-aur-budbak/0-6-1899/yoga-se-hoga-bandbudh-aur-budbak/0-1-239839',
55 'info_dict': {
56 'id': '0-1-239839',
57 'ext': 'mp4',
58 'display_id': 'yoga-se-hoga-bandbudh-aur-budbak',
59 'title': 'Yoga Se Hoga-Bandbudh aur Budbak',
60 'duration': 659,
61 'description': compat_str,
62 'alt_title': 'Yoga Se Hoga-Bandbudh aur Budbak',
63 'uploader': 'Zee Entertainment Enterprises Ltd',
64 'release_date': '20150101',
65 'upload_date': '20150101',
66 'timestamp': 1420070400,
67 'thumbnail': r're:^https?://.*\.jpg$',
68 'series': 'Bandbudh Aur Budbak',
69 'season_number': 1,
70 'episode_number': 1,
71 'episode': 'Episode 1',
72 'season': 'Season 1',
73 'tags': list,
74 },
75 'params': {
76 'format': 'bv',
77 },
78 }, {
79 'url': 'https://www.zee5.com/hi/tv-shows/details/kundali-bhagya/0-6-366/kundali-bhagya-march-08-2021/0-1-manual_7g9jv1os7730?country=IN',
80 'only_matching': True
81 }, {
82 'url': 'https://www.zee5.com/global/hi/tv-shows/details/kundali-bhagya/0-6-366/kundali-bhagya-march-08-2021/0-1-manual_7g9jv1os7730',
83 'only_matching': True
84 }, {
85 'url': 'https://www.zee5.com/web-series/details/mithya/0-6-4z587408/maine-dekhi-hai-uski-mrityu/0-1-6z587412',
86 'only_matching': True
87 }, {
88 'url': 'https://www.zee5.com/kids/kids-movies/maya-bommalu/0-0-movie_1040370005',
89 'only_matching': True
90 }, {
91 'url': 'https://www.zee5.com/news/details/jana-sena-chief-pawan-kalyan-shows-slippers-to-ysrcp-leaders/0-0-newsauto_6ettj4242oo0',
92 'only_matching': True
93 }, {
94 'url': 'https://www.zee5.com/music-videos/details/adhento-gaani-vunnapaatuga-jersey-nani-shraddha-srinath/0-0-56973',
95 'only_matching': True
96 }]
97 _DETAIL_API_URL = 'https://spapi.zee5.com/singlePlayback/getDetails/secure?content_id={}&device_id={}&platform_name=desktop_web&country=IN&check_parental_control=false'
98 _DEVICE_ID = ''.join(random.choices(string.ascii_letters + string.digits, k=20)).ljust(32, '0')
99 _USER_TOKEN = None
100 _LOGIN_HINT = 'Use "--username <mobile_number>" to login using otp or "--username token" and "--password <user_token>" to login using user token.'
101 _NETRC_MACHINE = 'zee5'
102 _GEO_COUNTRIES = ['IN']
103
104 def _perform_login(self, username, password):
105 if len(username) == 10 and username.isdigit() and self._USER_TOKEN is None:
106 self.report_login()
107 otp_request_json = self._download_json(f'https://b2bapi.zee5.com/device/sendotp_v1.php?phoneno=91{username}',
108 None, note='Sending OTP')
109 if otp_request_json['code'] == 0:
110 self.to_screen(otp_request_json['message'])
111 else:
112 raise ExtractorError(otp_request_json['message'], expected=True)
113 otp_code = self._get_tfa_info('OTP')
114 otp_verify_json = self._download_json(f'https://b2bapi.zee5.com/device/verifyotp_v1.php?phoneno=91{username}&otp={otp_code}&guest_token={self._DEVICE_ID}&platform=web',
115 None, note='Verifying OTP', fatal=False)
116 if not otp_verify_json:
117 raise ExtractorError('Unable to verify OTP.', expected=True)
118 self._USER_TOKEN = otp_verify_json.get('token')
119 if not self._USER_TOKEN:
120 raise ExtractorError(otp_request_json['message'], expected=True)
121 elif username.lower() == 'token' and len(password) > 1198:
122 self._USER_TOKEN = password
123 else:
124 raise ExtractorError(self._LOGIN_HINT, expected=True)
125
126 def _real_extract(self, url):
127 video_id, display_id = self._match_valid_url(url).group('id', 'display_id')
128 access_token_request = self._download_json(
129 'https://useraction.zee5.com/token/platform_tokens.php?platform_name=web_app',
130 video_id, note='Downloading access token')
131 data = {
132 'x-access-token': access_token_request['token']
133 }
134 if self._USER_TOKEN:
135 data['Authorization'] = 'bearer %s' % self._USER_TOKEN
136 else:
137 data['X-Z5-Guest-Token'] = self._DEVICE_ID
138
139 json_data = self._download_json(
140 self._DETAIL_API_URL.format(video_id, self._DEVICE_ID),
141 video_id, headers={'content-type': 'application/json'}, data=json.dumps(data).encode('utf-8'))
142 asset_data = json_data['assetDetails']
143 show_data = json_data.get('showDetails', {})
144 if 'premium' in asset_data['business_type']:
145 raise ExtractorError('Premium content is DRM protected.', expected=True)
146 if not asset_data.get('hls_url'):
147 self.raise_login_required(self._LOGIN_HINT, metadata_available=True, method=None)
148 formats, m3u8_subs = self._extract_m3u8_formats_and_subtitles(asset_data['hls_url'], video_id, 'mp4', fatal=False)
149
150 subtitles = {}
151 for sub in asset_data.get('subtitle_url', []):
152 sub_url = sub.get('url')
153 if not sub_url:
154 continue
155 subtitles.setdefault(sub.get('language', 'en'), []).append({
156 'url': self._proto_relative_url(sub_url),
157 })
158 subtitles = self._merge_subtitles(subtitles, m3u8_subs)
159 return {
160 'id': video_id,
161 'display_id': display_id,
162 'title': asset_data['title'],
163 'formats': formats,
164 'subtitles': subtitles,
165 'duration': int_or_none(asset_data.get('duration')),
166 'description': str_or_none(asset_data.get('description')),
167 'alt_title': str_or_none(asset_data.get('original_title')),
168 'uploader': str_or_none(asset_data.get('content_owner')),
169 'age_limit': parse_age_limit(asset_data.get('age_rating')),
170 'release_date': unified_strdate(asset_data.get('release_date')),
171 'timestamp': unified_timestamp(asset_data.get('release_date')),
172 'thumbnail': url_or_none(asset_data.get('image_url')),
173 'series': str_or_none(asset_data.get('tvshow_name')),
174 'season': try_get(show_data, lambda x: x['seasons']['title'], str),
175 'season_number': int_or_none(try_get(show_data, lambda x: x['seasons'][0]['orderid'])),
176 'episode_number': int_or_none(try_get(asset_data, lambda x: x['orderid'])),
177 'tags': try_get(asset_data, lambda x: x['tags'], list)
178 }
179
180
181 class Zee5SeriesIE(InfoExtractor):
182 IE_NAME = 'zee5:series'
183 _VALID_URL = r'''(?x)
184 (?:
185 zee5:series:|
186 https?://(?:www\.)?zee5\.com/(?:[^#?]+/)?
187 (?:tv-shows|web-series|kids|zee5originals)/(?!kids-movies)(?:[^#/?]+/){2}
188 )
189 (?P<id>[^#/?]+)(?:/episodes)?/?(?:$|[?#])
190 '''
191 _TESTS = [{
192 'url': 'https://www.zee5.com/kids/kids-shows/bandbudh-aur-budbak/0-6-1899',
193 'playlist_mincount': 156,
194 'info_dict': {
195 'id': '0-6-1899',
196 },
197 }, {
198 'url': 'https://www.zee5.com/tv-shows/details/bhabi-ji-ghar-par-hai/0-6-199',
199 'playlist_mincount': 1500,
200 'info_dict': {
201 'id': '0-6-199',
202 },
203 }, {
204 'url': 'https://www.zee5.com/tv-shows/details/agent-raghav-crime-branch/0-6-965',
205 'playlist_mincount': 24,
206 'info_dict': {
207 'id': '0-6-965',
208 },
209 }, {
210 'url': 'https://www.zee5.com/ta/tv-shows/details/nagabhairavi/0-6-3201',
211 'playlist_mincount': 3,
212 'info_dict': {
213 'id': '0-6-3201',
214 },
215 }, {
216 'url': 'https://www.zee5.com/global/hi/tv-shows/details/khwaabon-ki-zamin-par/0-6-270',
217 'playlist_mincount': 150,
218 'info_dict': {
219 'id': '0-6-270',
220 },
221 }, {
222 'url': 'https://www.zee5.com/tv-shows/details/chala-hawa-yeu-dya-ladies-zindabaad/0-6-2943/episodes',
223 'only_matching': True,
224 }, {
225 'url': 'https://www.zee5.com/web-series/details/mithya/0-6-4z587408',
226 'only_matching': True,
227 }]
228
229 def _entries(self, show_id):
230 access_token_request = self._download_json(
231 'https://useraction.zee5.com/token/platform_tokens.php?platform_name=web_app',
232 show_id, note='Downloading access token')
233 headers = {
234 'X-Access-Token': access_token_request['token'],
235 'Referer': 'https://www.zee5.com/',
236 }
237 show_url = f'https://gwapi.zee5.com/content/tvshow/{show_id}?translation=en&country=IN'
238
239 page_num = 0
240 show_json = self._download_json(show_url, video_id=show_id, headers=headers)
241 for season in show_json.get('seasons') or []:
242 season_id = try_get(season, lambda x: x['id'], compat_str)
243 next_url = f'https://gwapi.zee5.com/content/tvshow/?season_id={season_id}&type=episode&translation=en&country=IN&on_air=false&asset_subtype=tvshow&page=1&limit=100'
244 while next_url:
245 page_num += 1
246 episodes_json = self._download_json(
247 next_url, video_id=show_id, headers=headers,
248 note='Downloading JSON metadata page %d' % page_num)
249 for episode in try_get(episodes_json, lambda x: x['episode'], list) or []:
250 video_id = episode.get('id')
251 yield self.url_result(
252 'zee5:%s' % video_id,
253 ie=Zee5IE.ie_key(), video_id=video_id)
254 next_url = url_or_none(episodes_json.get('next_episode_api'))
255
256 def _real_extract(self, url):
257 show_id = self._match_id(url)
258 return self.playlist_result(self._entries(show_id), playlist_id=show_id)