]>
Commit | Line | Data |
---|---|---|
2ef648d3 | 1 | import re |
3d2623a8 | 2 | import urllib.parse |
2ef648d3 JMF |
3 | |
4 | from .common import InfoExtractor | |
3d2623a8 | 5 | from ..networking.exceptions import HTTPError |
adccf336 | 6 | from ..utils import ( |
45b2ee6f | 7 | ExtractorError, |
cc2db878 | 8 | determine_ext, |
45b2ee6f | 9 | extract_attributes, |
adccf336 | 10 | int_or_none, |
45b2ee6f | 11 | merge_dicts, |
adccf336 | 12 | parse_iso8601, |
cc2db878 | 13 | strip_or_none, |
45b2ee6f | 14 | traverse_obj, |
15 | url_or_none, | |
16 | urljoin, | |
adccf336 | 17 | ) |
2ef648d3 | 18 | |
a95967f8 | 19 | |
cc2db878 | 20 | class IGNBaseIE(InfoExtractor): |
21 | def _call_api(self, slug): | |
22 | return self._download_json( | |
add96eb9 | 23 | f'http://apis.ign.com/{self._PAGE_TYPE}/v3/{self._PAGE_TYPE}s/slug/{slug}', slug) |
cc2db878 | 24 | |
45b2ee6f | 25 | def _checked_call_api(self, slug): |
26 | try: | |
27 | return self._call_api(slug) | |
28 | except ExtractorError as e: | |
3d2623a8 | 29 | if isinstance(e.cause, HTTPError) and e.cause.status == 404: |
45b2ee6f | 30 | e.cause.args = e.cause.args or [ |
3d2623a8 | 31 | e.cause.response.url, e.cause.status, e.cause.reason] |
45b2ee6f | 32 | raise ExtractorError( |
33 | 'Content not found: expired?', cause=e.cause, | |
34 | expected=True) | |
35 | raise | |
cc2db878 | 36 | |
45b2ee6f | 37 | def _extract_video_info(self, video, fatal=True): |
cc2db878 | 38 | video_id = video['videoId'] |
adccf336 | 39 | |
40 | formats = [] | |
45b2ee6f | 41 | refs = traverse_obj(video, 'refs', expected_type=dict) or {} |
cc2db878 | 42 | |
45b2ee6f | 43 | m3u8_url = url_or_none(refs.get('m3uUrl')) |
adccf336 | 44 | if m3u8_url: |
f889ac45 | 45 | formats.extend(self._extract_m3u8_formats( |
46 | m3u8_url, video_id, 'mp4', 'm3u8_native', | |
47 | m3u8_id='hls', fatal=False)) | |
cc2db878 | 48 | |
45b2ee6f | 49 | f4m_url = url_or_none(refs.get('f4mUrl')) |
adccf336 | 50 | if f4m_url: |
f889ac45 | 51 | formats.extend(self._extract_f4m_formats( |
52 | f4m_url, video_id, f4m_id='hds', fatal=False)) | |
cc2db878 | 53 | |
54 | for asset in (video.get('assets') or []): | |
45b2ee6f | 55 | asset_url = url_or_none(asset.get('url')) |
cc2db878 | 56 | if not asset_url: |
57 | continue | |
adccf336 | 58 | formats.append({ |
cc2db878 | 59 | 'url': asset_url, |
60 | 'tbr': int_or_none(asset.get('bitrate'), 1000), | |
61 | 'fps': int_or_none(asset.get('frame_rate')), | |
adccf336 | 62 | 'height': int_or_none(asset.get('height')), |
63 | 'width': int_or_none(asset.get('width')), | |
64 | }) | |
cc2db878 | 65 | |
45b2ee6f | 66 | mezzanine_url = traverse_obj( |
67 | video, ('system', 'mezzanineUrl'), expected_type=url_or_none) | |
cc2db878 | 68 | if mezzanine_url: |
69 | formats.append({ | |
70 | 'ext': determine_ext(mezzanine_url, 'mp4'), | |
71 | 'format_id': 'mezzanine', | |
f983b875 | 72 | 'quality': 1, |
cc2db878 | 73 | 'url': mezzanine_url, |
74 | }) | |
75 | ||
45b2ee6f | 76 | thumbnails = traverse_obj( |
77 | video, ('thumbnails', ..., {'url': 'url'}), expected_type=url_or_none) | |
78 | tags = traverse_obj( | |
79 | video, ('tags', ..., 'displayName'), | |
80 | expected_type=lambda x: x.strip() or None) | |
adccf336 | 81 | |
45b2ee6f | 82 | metadata = traverse_obj(video, 'metadata', expected_type=dict) or {} |
83 | title = traverse_obj( | |
84 | metadata, 'longTitle', 'title', 'name', | |
85 | expected_type=lambda x: x.strip() or None) | |
2ef648d3 | 86 | |
40c716d2 | 87 | return { |
cc2db878 | 88 | 'id': video_id, |
89 | 'title': title, | |
90 | 'description': strip_or_none(metadata.get('description')), | |
adccf336 | 91 | 'timestamp': parse_iso8601(metadata.get('publishDate')), |
92 | 'duration': int_or_none(metadata.get('duration')), | |
adccf336 | 93 | 'thumbnails': thumbnails, |
94 | 'formats': formats, | |
cc2db878 | 95 | 'tags': tags, |
40c716d2 | 96 | } |
2ef648d3 JMF |
97 | |
98 | ||
45b2ee6f | 99 | class IGNIE(IGNBaseIE): |
100 | """ | |
101 | Extractor for some of the IGN sites, like www.ign.com, es.ign.com de.ign.com. | |
102 | Some videos of it.ign.com are also supported | |
103 | """ | |
104 | _VIDEO_PATH_RE = r'/(?:\d{4}/\d{2}/\d{2}/)?(?P<id>.+?)' | |
105 | _PLAYLIST_PATH_RE = r'(?:/?\?(?P<filt>[^&#]+))?' | |
106 | _VALID_URL = ( | |
add96eb9 | 107 | r'https?://(?:.+?\.ign|www\.pcmag)\.com/videos(?:{})'.format('|'.join((_VIDEO_PATH_RE + r'(?:[/?&#]|$)', _PLAYLIST_PATH_RE)))) |
45b2ee6f | 108 | IE_NAME = 'ign.com' |
109 | _PAGE_TYPE = 'video' | |
110 | ||
111 | _TESTS = [{ | |
112 | 'url': 'http://www.ign.com/videos/2013/06/05/the-last-of-us-review', | |
113 | 'md5': 'd2e1586d9987d40fad7867bf96a018ea', | |
114 | 'info_dict': { | |
115 | 'id': '8f862beef863986b2785559b9e1aa599', | |
116 | 'ext': 'mp4', | |
117 | 'title': 'The Last of Us Review', | |
118 | 'description': 'md5:c8946d4260a4d43a00d5ae8ed998870c', | |
119 | 'timestamp': 1370440800, | |
120 | 'upload_date': '20130605', | |
121 | 'tags': 'count:9', | |
122 | 'display_id': 'the-last-of-us-review', | |
123 | 'thumbnail': 'https://assets1.ignimgs.com/vid/thumbnails/user/2014/03/26/lastofusreviewmimig2.jpg', | |
124 | 'duration': 440, | |
125 | }, | |
126 | 'params': { | |
127 | 'nocheckcertificate': True, | |
128 | }, | |
129 | }, { | |
130 | 'url': 'http://www.pcmag.com/videos/2015/01/06/010615-whats-new-now-is-gogo-snooping-on-your-data', | |
131 | 'md5': 'f1581a6fe8c5121be5b807684aeac3f6', | |
132 | 'info_dict': { | |
133 | 'id': 'ee10d774b508c9b8ec07e763b9125b91', | |
134 | 'ext': 'mp4', | |
135 | 'title': 'What\'s New Now: Is GoGo Snooping on Your Data?', | |
136 | 'description': 'md5:817a20299de610bd56f13175386da6fa', | |
137 | 'timestamp': 1420571160, | |
138 | 'upload_date': '20150106', | |
139 | 'tags': 'count:4', | |
140 | }, | |
141 | 'skip': '404 Not Found', | |
142 | }, { | |
143 | 'url': 'https://www.ign.com/videos/is-a-resident-evil-4-remake-on-the-way-ign-daily-fix', | |
144 | 'only_matching': True, | |
145 | }] | |
146 | ||
147 | @classmethod | |
148 | def _extract_embed_urls(cls, url, webpage): | |
149 | grids = re.findall( | |
150 | r'''(?s)<section\b[^>]+\bclass\s*=\s*['"](?:[\w-]+\s+)*?content-feed-grid(?!\B|-)[^>]+>(.+?)</section[^>]*>''', | |
151 | webpage) | |
add96eb9 | 152 | return filter( |
153 | None, (urljoin(url, m.group('path')) for m in re.finditer( | |
154 | rf'''<a\b[^>]+\bhref\s*=\s*('|")(?P<path>/videos{cls._VIDEO_PATH_RE})\1''', | |
155 | grids[0] if grids else ''))) | |
45b2ee6f | 156 | |
157 | def _real_extract(self, url): | |
158 | display_id, filt = self._match_valid_url(url).group('id', 'filt') | |
159 | if display_id: | |
160 | return self._extract_video(url, display_id) | |
161 | return self._extract_playlist(url, filt or 'all') | |
162 | ||
163 | def _extract_playlist(self, url, display_id): | |
164 | webpage = self._download_webpage(url, display_id) | |
165 | ||
166 | return self.playlist_result( | |
167 | (self.url_result(u, self.ie_key()) | |
168 | for u in self._extract_embed_urls(url, webpage)), | |
169 | playlist_id=display_id) | |
170 | ||
171 | def _extract_video(self, url, display_id): | |
172 | video = self._checked_call_api(display_id) | |
173 | ||
174 | info = self._extract_video_info(video) | |
175 | ||
176 | return merge_dicts({ | |
177 | 'display_id': display_id, | |
178 | }, info) | |
179 | ||
180 | ||
181 | class IGNVideoIE(IGNBaseIE): | |
cc2db878 | 182 | _VALID_URL = r'https?://.+?\.ign\.com/(?:[a-z]{2}/)?[^/]+/(?P<id>\d+)/(?:video|trailer)/' |
52fadd5f | 183 | _TESTS = [{ |
cc2db878 | 184 | 'url': 'http://me.ign.com/en/videos/112203/video/how-hitman-aims-to-be-different-than-every-other-s', |
185 | 'md5': 'dd9aca7ed2657c4e118d8b261e5e9de1', | |
40c716d2 | 186 | 'info_dict': { |
cc2db878 | 187 | 'id': 'e9be7ea899a9bbfc0674accc22a36cc8', |
40c716d2 | 188 | 'ext': 'mp4', |
cc2db878 | 189 | 'title': 'How Hitman Aims to Be Different Than Every Other Stealth Game - NYCC 2015', |
190 | 'description': 'Taking out assassination targets in Hitman has never been more stylish.', | |
191 | 'timestamp': 1444665600, | |
192 | 'upload_date': '20151012', | |
45b2ee6f | 193 | 'display_id': '112203', |
194 | 'thumbnail': 'https://sm.ign.com/ign_me/video/h/how-hitman/how-hitman-aims-to-be-different-than-every-other-s_8z14.jpg', | |
195 | 'duration': 298, | |
196 | 'tags': 'count:13', | |
45b2ee6f | 197 | }, |
198 | 'expected_warnings': ['HTTP Error 400: Bad Request'], | |
cc2db878 | 199 | }, { |
200 | 'url': 'http://me.ign.com/ar/angry-birds-2/106533/video/lrd-ldyy-lwl-lfylm-angry-birds', | |
201 | 'only_matching': True, | |
202 | }, { | |
203 | # Youtube embed | |
204 | 'url': 'https://me.ign.com/ar/ratchet-clank-rift-apart/144327/trailer/embed', | |
205 | 'only_matching': True, | |
206 | }, { | |
207 | # Twitter embed | |
208 | 'url': 'http://adria.ign.com/sherlock-season-4/9687/trailer/embed', | |
209 | 'only_matching': True, | |
210 | }, { | |
211 | # Vimeo embed | |
212 | 'url': 'https://kr.ign.com/bic-2018/3307/trailer/embed', | |
213 | 'only_matching': True, | |
52fadd5f | 214 | }] |
ee6adb16 | 215 | |
a95967f8 | 216 | def _real_extract(self, url): |
cc2db878 | 217 | video_id = self._match_id(url) |
45b2ee6f | 218 | parsed_url = urllib.parse.urlparse(url) |
219 | embed_url = urllib.parse.urlunparse( | |
220 | parsed_url._replace(path=parsed_url.path.rsplit('/', 1)[0] + '/embed')) | |
221 | ||
222 | webpage, urlh = self._download_webpage_handle(embed_url, video_id) | |
3d2623a8 | 223 | new_url = urlh.url |
add96eb9 | 224 | ign_url = urllib.parse.parse_qs( |
45b2ee6f | 225 | urllib.parse.urlparse(new_url).query).get('url', [None])[-1] |
cc2db878 | 226 | if ign_url: |
227 | return self.url_result(ign_url, IGNIE.ie_key()) | |
45b2ee6f | 228 | video = self._search_regex(r'(<div\b[^>]+\bdata-video-id\s*=\s*[^>]+>)', webpage, 'video element', fatal=False) |
229 | if not video: | |
230 | if new_url == url: | |
231 | raise ExtractorError('Redirect loop: ' + url) | |
232 | return self.url_result(new_url) | |
233 | video = extract_attributes(video) | |
234 | video_data = video.get('data-settings') or '{}' | |
235 | video_data = self._parse_json(video_data, video_id)['video'] | |
236 | info = self._extract_video_info(video_data) | |
237 | ||
238 | return merge_dicts({ | |
239 | 'display_id': video_id, | |
240 | }, info) | |
adccf336 | 241 | |
adccf336 | 242 | |
cc2db878 | 243 | class IGNArticleIE(IGNBaseIE): |
45b2ee6f | 244 | _VALID_URL = r'https?://.+?\.ign\.com/(?:articles(?:/\d{4}/\d{2}/\d{2})?|(?:[a-z]{2}/)?(?:[\w-]+/)*?feature/\d+)/(?P<id>[^/?&#]+)' |
cc2db878 | 245 | _PAGE_TYPE = 'article' |
adccf336 | 246 | _TESTS = [{ |
cc2db878 | 247 | 'url': 'http://me.ign.com/en/feature/15775/100-little-things-in-gta-5-that-will-blow-your-mind', |
adccf336 | 248 | 'info_dict': { |
45b2ee6f | 249 | 'id': '72113', |
cc2db878 | 250 | 'title': '100 Little Things in GTA 5 That Will Blow Your Mind', |
251 | }, | |
252 | 'playlist': [ | |
253 | { | |
254 | 'info_dict': { | |
255 | 'id': '5ebbd138523268b93c9141af17bec937', | |
256 | 'ext': 'mp4', | |
45b2ee6f | 257 | 'title': 'Grand Theft Auto V Video Review', |
cc2db878 | 258 | 'description': 'Rockstar drops the mic on this generation of games. Watch our review of the masterly Grand Theft Auto V.', |
259 | 'timestamp': 1379339880, | |
260 | 'upload_date': '20130916', | |
45b2ee6f | 261 | 'tags': 'count:12', |
262 | 'thumbnail': 'https://assets1.ignimgs.com/thumbs/userUploaded/2021/8/16/gta-v-heistsjpg-e94705-1629138553533.jpeg', | |
263 | 'display_id': 'grand-theft-auto-v-video-review', | |
264 | 'duration': 501, | |
cc2db878 | 265 | }, |
266 | }, | |
267 | { | |
268 | 'info_dict': { | |
269 | 'id': '638672ee848ae4ff108df2a296418ee2', | |
270 | 'ext': 'mp4', | |
45b2ee6f | 271 | 'title': 'GTA 5 In Slow Motion', |
cc2db878 | 272 | 'description': 'The twisted beauty of GTA 5 in stunning slow motion.', |
273 | 'timestamp': 1386878820, | |
274 | 'upload_date': '20131212', | |
45b2ee6f | 275 | 'duration': 202, |
276 | 'tags': 'count:25', | |
277 | 'display_id': 'gta-5-in-slow-motion', | |
278 | 'thumbnail': 'https://assets1.ignimgs.com/vid/thumbnails/user/2013/11/03/GTA-SLO-MO-1.jpg', | |
cc2db878 | 279 | }, |
280 | }, | |
281 | ], | |
282 | 'params': { | |
cc2db878 | 283 | 'skip_download': True, |
284 | }, | |
45b2ee6f | 285 | 'expected_warnings': ['Backend fetch failed'], |
607d65fb | 286 | }, { |
cc2db878 | 287 | 'url': 'http://www.ign.com/articles/2014/08/15/rewind-theater-wild-trailer-gamescom-2014?watch', |
adccf336 | 288 | 'info_dict': { |
cc2db878 | 289 | 'id': '53ee806780a81ec46e0790f8', |
290 | 'title': 'Rewind Theater - Wild Trailer Gamescom 2014', | |
291 | }, | |
45b2ee6f | 292 | 'playlist_count': 1, |
293 | 'expected_warnings': ['Backend fetch failed'], | |
cc2db878 | 294 | }, { |
295 | # videoId pattern | |
296 | 'url': 'http://www.ign.com/articles/2017/06/08/new-ducktales-short-donalds-birthday-doesnt-go-as-planned', | |
297 | 'only_matching': True, | |
298 | }, { | |
299 | # Youtube embed | |
300 | 'url': 'https://www.ign.com/articles/2021-mvp-named-in-puppy-bowl-xvii', | |
301 | 'only_matching': True, | |
302 | }, { | |
303 | # IMDB embed | |
304 | 'url': 'https://www.ign.com/articles/2014/08/07/sons-of-anarchy-final-season-trailer', | |
305 | 'only_matching': True, | |
306 | }, { | |
307 | # Facebook embed | |
308 | 'url': 'https://www.ign.com/articles/2017/09/20/marvels-the-punisher-watch-the-new-trailer-for-the-netflix-series', | |
309 | 'only_matching': True, | |
310 | }, { | |
311 | # Brightcove embed | |
312 | 'url': 'https://www.ign.com/articles/2016/01/16/supergirl-goes-flying-with-martian-manhunter-in-new-clip', | |
313 | 'only_matching': True, | |
adccf336 | 314 | }] |
cc2db878 | 315 | |
45b2ee6f | 316 | def _checked_call_api(self, slug): |
317 | try: | |
318 | return self._call_api(slug) | |
319 | except ExtractorError as e: | |
3d2623a8 | 320 | if isinstance(e.cause, HTTPError): |
45b2ee6f | 321 | e.cause.args = e.cause.args or [ |
3d2623a8 | 322 | e.cause.response.url, e.cause.status, e.cause.reason] |
323 | if e.cause.status == 404: | |
45b2ee6f | 324 | raise ExtractorError( |
325 | 'Content not found: expired?', cause=e.cause, | |
326 | expected=True) | |
3d2623a8 | 327 | elif e.cause.status == 503: |
add96eb9 | 328 | self.report_warning(str(e.cause)) |
45b2ee6f | 329 | return |
330 | raise | |
331 | ||
cc2db878 | 332 | def _real_extract(self, url): |
333 | display_id = self._match_id(url) | |
45b2ee6f | 334 | article = self._checked_call_api(display_id) |
335 | ||
336 | if article: | |
337 | # obsolete ? | |
338 | def entries(): | |
339 | media_url = traverse_obj( | |
340 | article, ('mediaRelations', 0, 'media', 'metadata', 'url'), | |
341 | expected_type=url_or_none) | |
342 | if media_url: | |
343 | yield self.url_result(media_url, IGNIE.ie_key()) | |
344 | for content in (article.get('content') or []): | |
345 | for video_url in re.findall(r'(?:\[(?:ignvideo\s+url|youtube\s+clip_id)|<iframe[^>]+src)="([^"]+)"', content): | |
346 | if url_or_none(video_url): | |
347 | yield self.url_result(video_url) | |
348 | ||
349 | return self.playlist_result( | |
350 | entries(), article.get('articleId'), | |
351 | traverse_obj( | |
352 | article, ('metadata', 'headline'), | |
353 | expected_type=lambda x: x.strip() or None)) | |
354 | ||
355 | webpage = self._download_webpage(url, display_id) | |
356 | ||
357 | playlist_id = self._html_search_meta('dable:item_id', webpage, default=None) | |
358 | if playlist_id: | |
359 | ||
360 | def entries(): | |
361 | for m in re.finditer( | |
362 | r'''(?s)<object\b[^>]+\bclass\s*=\s*("|')ign-videoplayer\1[^>]*>(?P<params>.+?)</object''', | |
363 | webpage): | |
364 | flashvars = self._search_regex( | |
365 | r'''(<param\b[^>]+\bname\s*=\s*("|')flashvars\2[^>]*>)''', | |
366 | m.group('params'), 'flashvars', default='') | |
add96eb9 | 367 | flashvars = urllib.parse.parse_qs(extract_attributes(flashvars).get('value') or '') |
45b2ee6f | 368 | v_url = url_or_none((flashvars.get('url') or [None])[-1]) |
369 | if v_url: | |
370 | yield self.url_result(v_url) | |
371 | else: | |
372 | playlist_id = self._search_regex( | |
373 | r'''\bdata-post-id\s*=\s*("|')(?P<id>[\da-f]+)\1''', | |
374 | webpage, 'id', group='id', default=None) | |
375 | ||
376 | nextjs_data = self._search_nextjs_data(webpage, display_id) | |
cc2db878 | 377 | |
45b2ee6f | 378 | def entries(): |
379 | for player in traverse_obj( | |
380 | nextjs_data, | |
381 | ('props', 'apolloState', 'ROOT_QUERY', lambda k, _: k.startswith('videoPlayerProps('), '__ref')): | |
382 | # skip promo links (which may not always be served, eg GH CI servers) | |
383 | if traverse_obj(nextjs_data, | |
384 | ('props', 'apolloState', player.replace('PlayerProps', 'ModernContent')), | |
385 | expected_type=dict): | |
386 | continue | |
387 | video = traverse_obj(nextjs_data, ('props', 'apolloState', player), expected_type=dict) or {} | |
388 | info = self._extract_video_info(video, fatal=False) | |
389 | if info: | |
390 | yield merge_dicts({ | |
391 | 'display_id': display_id, | |
392 | }, info) | |
cc2db878 | 393 | |
394 | return self.playlist_result( | |
45b2ee6f | 395 | entries(), playlist_id or display_id, |
396 | re.sub(r'\s+-\s+IGN\s*$', '', self._og_search_title(webpage, default='')) or None) |