]> jfr.im git - yt-dlp.git/blame - yt_dlp/extractor/amazonminitv.py
[ie/orf:on] Improve extraction (#9677)
[yt-dlp.git] / yt_dlp / extractor / amazonminitv.py
CommitLineData
48652590 1import json
2
3from .common import InfoExtractor
4from ..utils import ExtractorError, int_or_none, traverse_obj, try_get
5
6
a9d069f5 7class AmazonMiniTVBaseIE(InfoExtractor):
8 def _real_initialize(self):
9 self._download_webpage(
10 'https://www.amazon.in/minitv', None,
11 note='Fetching guest session cookies')
12 AmazonMiniTVBaseIE.session_id = self._get_cookies('https://www.amazon.in')['session-id'].value
13
14 def _call_api(self, asin, data=None, note=None):
15 device = {'clientId': 'ATVIN', 'deviceLocale': 'en_GB'}
16 if data:
17 data['variables'].update({
18 'contentType': 'VOD',
19 'sessionIdToken': self.session_id,
20 **device,
21 })
22
23 resp = self._download_json(
24 f'https://www.amazon.in/minitv/api/web/{"graphql" if data else "prs"}',
538d3767 25 asin, note=note, headers={
26 'Content-Type': 'application/json',
27 'currentpageurl': '/',
28 'currentplatform': 'dWeb'
29 }, data=json.dumps(data).encode() if data else None,
a9d069f5 30 query=None if data else {
31 'deviceType': 'A1WMMUXPCUJL4N',
32 'contentId': asin,
33 **device,
34 })
35
36 if resp.get('errors'):
37 raise ExtractorError(f'MiniTV said: {resp["errors"][0]["message"]}')
38 elif not data:
39 return resp
40 return resp['data'][data['operationName']]
41
42
2da7bcca 43class AmazonMiniTVIE(AmazonMiniTVBaseIE):
48652590 44 _VALID_URL = r'(?:https?://(?:www\.)?amazon\.in/minitv/tp/|amazonminitv:(?:amzn1\.dv\.gti\.)?)(?P<id>[a-f0-9-]+)'
48652590 45 _TESTS = [{
46 'url': 'https://www.amazon.in/minitv/tp/75fe3a75-b8fe-4499-8100-5c9424344840?referrer=https%3A%2F%2Fwww.amazon.in%2Fminitv',
48652590 47 'info_dict': {
48 'id': 'amzn1.dv.gti.75fe3a75-b8fe-4499-8100-5c9424344840',
49 'ext': 'mp4',
50 'title': 'May I Kiss You?',
51 'language': 'Hindi',
538d3767 52 'thumbnail': r're:^https?://.*\.(?:jpg|png)$',
48652590 53 'description': 'md5:a549bfc747973e04feb707833474e59d',
54 'release_timestamp': 1644710400,
55 'release_date': '20220213',
56 'duration': 846,
a9d069f5 57 'chapters': 'count:2',
48652590 58 'series': 'Couple Goals',
59 'series_id': 'amzn1.dv.gti.56521d46-b040-4fd5-872e-3e70476a04b0',
60 'season': 'Season 3',
61 'season_number': 3,
62 'season_id': 'amzn1.dv.gti.20331016-d9b9-4968-b991-c89fa4927a36',
63 'episode': 'May I Kiss You?',
64 'episode_number': 2,
65 'episode_id': 'amzn1.dv.gti.75fe3a75-b8fe-4499-8100-5c9424344840',
66 },
67 }, {
68 'url': 'https://www.amazon.in/minitv/tp/280d2564-584f-452f-9c98-7baf906e01ab?referrer=https%3A%2F%2Fwww.amazon.in%2Fminitv',
48652590 69 'info_dict': {
70 'id': 'amzn1.dv.gti.280d2564-584f-452f-9c98-7baf906e01ab',
71 'ext': 'mp4',
72 'title': 'Jahaan',
73 'language': 'Hindi',
538d3767 74 'thumbnail': r're:^https?://.*\.(?:jpg|png)',
48652590 75 'description': 'md5:05eb765a77bf703f322f120ec6867339',
76 'release_timestamp': 1647475200,
77 'release_date': '20220317',
78 'duration': 783,
79 'chapters': [],
80 },
81 }, {
82 'url': 'https://www.amazon.in/minitv/tp/280d2564-584f-452f-9c98-7baf906e01ab',
83 'only_matching': True,
84 }, {
85 'url': 'amazonminitv:amzn1.dv.gti.280d2564-584f-452f-9c98-7baf906e01ab',
86 'only_matching': True,
87 }, {
88 'url': 'amazonminitv:280d2564-584f-452f-9c98-7baf906e01ab',
89 'only_matching': True,
90 }]
a9d069f5 91
2da7bcca 92 _GRAPHQL_QUERY_CONTENT = '''
93query content($sessionIdToken: String!, $deviceLocale: String, $contentId: ID!, $contentType: ContentType!, $clientId: String) {
94 content(
95 applicationContextInput: {deviceLocale: $deviceLocale, sessionIdToken: $sessionIdToken, clientId: $clientId}
96 contentId: $contentId
97 contentType: $contentType
98 ) {
99 contentId
100 name
101 ... on Episode {
102 contentId
103 vodType
104 name
105 images
106 description {
107 synopsis
108 contentLengthInSeconds
109 }
110 publicReleaseDateUTC
111 audioTracks
112 seasonId
113 seriesId
114 seriesName
115 seasonNumber
116 episodeNumber
117 timecode {
118 endCreditsTime
119 }
120 }
121 ... on MovieContent {
122 contentId
123 vodType
124 name
125 description {
126 synopsis
127 contentLengthInSeconds
128 }
129 images
130 publicReleaseDateUTC
131 audioTracks
132 }
133 }
134}'''
135
48652590 136 def _real_extract(self, url):
2da7bcca 137 asin = f'amzn1.dv.gti.{self._match_id(url)}'
138 prs = self._call_api(asin, note='Downloading playback info')
48652590 139
a9d069f5 140 formats, subtitles = [], {}
2da7bcca 141 for type_, asset in prs['playbackAssets'].items():
a9d069f5 142 if not traverse_obj(asset, 'manifestUrl'):
48652590 143 continue
144 if type_ == 'hls':
145 m3u8_fmts, m3u8_subs = self._extract_m3u8_formats_and_subtitles(
146 asset['manifestUrl'], asin, ext='mp4', entry_protocol='m3u8_native',
147 m3u8_id=type_, fatal=False)
148 formats.extend(m3u8_fmts)
149 subtitles = self._merge_subtitles(subtitles, m3u8_subs)
150 elif type_ == 'dash':
151 mpd_fmts, mpd_subs = self._extract_mpd_formats_and_subtitles(
152 asset['manifestUrl'], asin, mpd_id=type_, fatal=False)
153 formats.extend(mpd_fmts)
154 subtitles = self._merge_subtitles(subtitles, mpd_subs)
a9d069f5 155 else:
156 self.report_warning(f'Unknown asset type: {type_}')
48652590 157
2da7bcca 158 title_info = self._call_api(
159 asin, note='Downloading title info', data={
160 'operationName': 'content',
161 'variables': {'contentId': asin},
162 'query': self._GRAPHQL_QUERY_CONTENT,
163 })
48652590 164 credits_time = try_get(title_info, lambda x: x['timecode']['endCreditsTime'] / 1000)
48652590 165 is_episode = title_info.get('vodType') == 'EPISODE'
166
167 return {
168 'id': asin,
169 'title': title_info.get('name'),
170 'formats': formats,
171 'subtitles': subtitles,
172 'language': traverse_obj(title_info, ('audioTracks', 0)),
173 'thumbnails': [{
174 'id': type_,
175 'url': url,
176 } for type_, url in (title_info.get('images') or {}).items()],
177 'description': traverse_obj(title_info, ('description', 'synopsis')),
178 'release_timestamp': int_or_none(try_get(title_info, lambda x: x['publicReleaseDateUTC'] / 1000)),
a9d069f5 179 'duration': traverse_obj(title_info, ('description', 'contentLengthInSeconds')),
180 'chapters': [{
181 'start_time': credits_time,
182 'title': 'End Credits',
183 }] if credits_time else [],
48652590 184 'series': title_info.get('seriesName'),
185 'series_id': title_info.get('seriesId'),
186 'season_number': title_info.get('seasonNumber'),
187 'season_id': title_info.get('seasonId'),
188 'episode': title_info.get('name') if is_episode else None,
189 'episode_number': title_info.get('episodeNumber'),
190 'episode_id': asin if is_episode else None,
191 }
192
193
a9d069f5 194class AmazonMiniTVSeasonIE(AmazonMiniTVBaseIE):
48652590 195 IE_NAME = 'amazonminitv:season'
196 _VALID_URL = r'amazonminitv:season:(?:amzn1\.dv\.gti\.)?(?P<id>[a-f0-9-]+)'
a5387729 197 IE_DESC = 'Amazon MiniTV Season, "minitv:season:" prefix'
48652590 198 _TESTS = [{
199 'url': 'amazonminitv:season:amzn1.dv.gti.0aa996eb-6a1b-4886-a342-387fbd2f1db0',
200 'playlist_mincount': 6,
201 'info_dict': {
202 'id': 'amzn1.dv.gti.0aa996eb-6a1b-4886-a342-387fbd2f1db0',
203 },
204 }, {
205 'url': 'amazonminitv:season:0aa996eb-6a1b-4886-a342-387fbd2f1db0',
206 'only_matching': True,
207 }]
a9d069f5 208
48652590 209 _GRAPHQL_QUERY = '''
210query getEpisodes($sessionIdToken: String!, $clientId: String, $episodeOrSeasonId: ID!, $deviceLocale: String) {
211 getEpisodes(
212 applicationContextInput: {sessionIdToken: $sessionIdToken, deviceLocale: $deviceLocale, clientId: $clientId}
213 episodeOrSeasonId: $episodeOrSeasonId
214 ) {
215 episodes {
216 ... on Episode {
217 contentId
218 name
219 images
220 seriesName
221 seasonId
222 seriesId
223 seasonNumber
224 episodeNumber
225 description {
226 synopsis
227 contentLengthInSeconds
228 }
229 publicReleaseDateUTC
230 }
231 }
232 }
233}
234'''
235
236 def _entries(self, asin):
237 season_info = self._call_api(
a9d069f5 238 asin, note='Downloading season info', data={
48652590 239 'operationName': 'getEpisodes',
a9d069f5 240 'variables': {'episodeOrSeasonId': asin},
48652590 241 'query': self._GRAPHQL_QUERY,
a9d069f5 242 })
48652590 243
244 for episode in season_info['episodes']:
a9d069f5 245 yield self.url_result(
246 f'amazonminitv:{episode["contentId"]}', AmazonMiniTVIE, episode['contentId'])
48652590 247
248 def _real_extract(self, url):
249 asin = f'amzn1.dv.gti.{self._match_id(url)}'
a9d069f5 250 return self.playlist_result(self._entries(asin), asin)
48652590 251
252
a9d069f5 253class AmazonMiniTVSeriesIE(AmazonMiniTVBaseIE):
48652590 254 IE_NAME = 'amazonminitv:series'
255 _VALID_URL = r'amazonminitv:series:(?:amzn1\.dv\.gti\.)?(?P<id>[a-f0-9-]+)'
a5387729 256 IE_DESC = 'Amazon MiniTV Series, "minitv:series:" prefix'
48652590 257 _TESTS = [{
258 'url': 'amazonminitv:series:amzn1.dv.gti.56521d46-b040-4fd5-872e-3e70476a04b0',
259 'playlist_mincount': 3,
260 'info_dict': {
261 'id': 'amzn1.dv.gti.56521d46-b040-4fd5-872e-3e70476a04b0',
262 },
263 }, {
264 'url': 'amazonminitv:series:56521d46-b040-4fd5-872e-3e70476a04b0',
265 'only_matching': True,
266 }]
a9d069f5 267
48652590 268 _GRAPHQL_QUERY = '''
269query getSeasons($sessionIdToken: String!, $deviceLocale: String, $episodeOrSeasonOrSeriesId: ID!, $clientId: String) {
270 getSeasons(
271 applicationContextInput: {deviceLocale: $deviceLocale, sessionIdToken: $sessionIdToken, clientId: $clientId}
272 episodeOrSeasonOrSeriesId: $episodeOrSeasonOrSeriesId
273 ) {
274 seasons {
275 seasonId
276 }
277 }
278}
279'''
280
281 def _entries(self, asin):
282 season_info = self._call_api(
a9d069f5 283 asin, note='Downloading series info', data={
48652590 284 'operationName': 'getSeasons',
a9d069f5 285 'variables': {'episodeOrSeasonOrSeriesId': asin},
48652590 286 'query': self._GRAPHQL_QUERY,
a9d069f5 287 })
48652590 288
289 for season in season_info['seasons']:
290 yield self.url_result(f'amazonminitv:season:{season["seasonId"]}', AmazonMiniTVSeasonIE, season['seasonId'])
291
292 def _real_extract(self, url):
293 asin = f'amzn1.dv.gti.{self._match_id(url)}'
a9d069f5 294 return self.playlist_result(self._entries(asin), asin)