]>
Commit | Line | Data |
---|---|---|
2ef648d3 | 1 | import re |
3d2623a8 | 2 | import urllib.parse |
2ef648d3 JMF |
3 | |
4 | from .common import InfoExtractor | |
45b2ee6f | 5 | from ..compat import compat_parse_qs |
3d2623a8 | 6 | from ..networking.exceptions import HTTPError |
adccf336 | 7 | from ..utils import ( |
45b2ee6f | 8 | ExtractorError, |
cc2db878 | 9 | determine_ext, |
45b2ee6f | 10 | error_to_compat_str, |
11 | extract_attributes, | |
adccf336 | 12 | int_or_none, |
45b2ee6f | 13 | merge_dicts, |
adccf336 | 14 | parse_iso8601, |
cc2db878 | 15 | strip_or_none, |
45b2ee6f | 16 | traverse_obj, |
17 | url_or_none, | |
18 | urljoin, | |
adccf336 | 19 | ) |
2ef648d3 | 20 | |
a95967f8 | 21 | |
cc2db878 | 22 | class IGNBaseIE(InfoExtractor): |
23 | def _call_api(self, slug): | |
24 | return self._download_json( | |
25 | 'http://apis.ign.com/{0}/v3/{0}s/slug/{1}'.format(self._PAGE_TYPE, slug), slug) | |
26 | ||
45b2ee6f | 27 | def _checked_call_api(self, slug): |
28 | try: | |
29 | return self._call_api(slug) | |
30 | except ExtractorError as e: | |
3d2623a8 | 31 | if isinstance(e.cause, HTTPError) and e.cause.status == 404: |
45b2ee6f | 32 | e.cause.args = e.cause.args or [ |
3d2623a8 | 33 | e.cause.response.url, e.cause.status, e.cause.reason] |
45b2ee6f | 34 | raise ExtractorError( |
35 | 'Content not found: expired?', cause=e.cause, | |
36 | expected=True) | |
37 | raise | |
cc2db878 | 38 | |
45b2ee6f | 39 | def _extract_video_info(self, video, fatal=True): |
cc2db878 | 40 | video_id = video['videoId'] |
adccf336 | 41 | |
42 | formats = [] | |
45b2ee6f | 43 | refs = traverse_obj(video, 'refs', expected_type=dict) or {} |
cc2db878 | 44 | |
45b2ee6f | 45 | m3u8_url = url_or_none(refs.get('m3uUrl')) |
adccf336 | 46 | if m3u8_url: |
f889ac45 | 47 | formats.extend(self._extract_m3u8_formats( |
48 | m3u8_url, video_id, 'mp4', 'm3u8_native', | |
49 | m3u8_id='hls', fatal=False)) | |
cc2db878 | 50 | |
45b2ee6f | 51 | f4m_url = url_or_none(refs.get('f4mUrl')) |
adccf336 | 52 | if f4m_url: |
f889ac45 | 53 | formats.extend(self._extract_f4m_formats( |
54 | f4m_url, video_id, f4m_id='hds', fatal=False)) | |
cc2db878 | 55 | |
56 | for asset in (video.get('assets') or []): | |
45b2ee6f | 57 | asset_url = url_or_none(asset.get('url')) |
cc2db878 | 58 | if not asset_url: |
59 | continue | |
adccf336 | 60 | formats.append({ |
cc2db878 | 61 | 'url': asset_url, |
62 | 'tbr': int_or_none(asset.get('bitrate'), 1000), | |
63 | 'fps': int_or_none(asset.get('frame_rate')), | |
adccf336 | 64 | 'height': int_or_none(asset.get('height')), |
65 | 'width': int_or_none(asset.get('width')), | |
66 | }) | |
cc2db878 | 67 | |
45b2ee6f | 68 | mezzanine_url = traverse_obj( |
69 | video, ('system', 'mezzanineUrl'), expected_type=url_or_none) | |
cc2db878 | 70 | if mezzanine_url: |
71 | formats.append({ | |
72 | 'ext': determine_ext(mezzanine_url, 'mp4'), | |
73 | 'format_id': 'mezzanine', | |
f983b875 | 74 | 'quality': 1, |
cc2db878 | 75 | 'url': mezzanine_url, |
76 | }) | |
77 | ||
45b2ee6f | 78 | thumbnails = traverse_obj( |
79 | video, ('thumbnails', ..., {'url': 'url'}), expected_type=url_or_none) | |
80 | tags = traverse_obj( | |
81 | video, ('tags', ..., 'displayName'), | |
82 | expected_type=lambda x: x.strip() or None) | |
adccf336 | 83 | |
45b2ee6f | 84 | metadata = traverse_obj(video, 'metadata', expected_type=dict) or {} |
85 | title = traverse_obj( | |
86 | metadata, 'longTitle', 'title', 'name', | |
87 | expected_type=lambda x: x.strip() or None) | |
2ef648d3 | 88 | |
40c716d2 | 89 | return { |
cc2db878 | 90 | 'id': video_id, |
91 | 'title': title, | |
92 | 'description': strip_or_none(metadata.get('description')), | |
adccf336 | 93 | 'timestamp': parse_iso8601(metadata.get('publishDate')), |
94 | 'duration': int_or_none(metadata.get('duration')), | |
adccf336 | 95 | 'thumbnails': thumbnails, |
96 | 'formats': formats, | |
cc2db878 | 97 | 'tags': tags, |
40c716d2 | 98 | } |
2ef648d3 JMF |
99 | |
100 | ||
45b2ee6f | 101 | class IGNIE(IGNBaseIE): |
102 | """ | |
103 | Extractor for some of the IGN sites, like www.ign.com, es.ign.com de.ign.com. | |
104 | Some videos of it.ign.com are also supported | |
105 | """ | |
106 | _VIDEO_PATH_RE = r'/(?:\d{4}/\d{2}/\d{2}/)?(?P<id>.+?)' | |
107 | _PLAYLIST_PATH_RE = r'(?:/?\?(?P<filt>[^&#]+))?' | |
108 | _VALID_URL = ( | |
109 | r'https?://(?:.+?\.ign|www\.pcmag)\.com/videos(?:%s)' | |
110 | % '|'.join((_VIDEO_PATH_RE + r'(?:[/?&#]|$)', _PLAYLIST_PATH_RE))) | |
111 | IE_NAME = 'ign.com' | |
112 | _PAGE_TYPE = 'video' | |
113 | ||
114 | _TESTS = [{ | |
115 | 'url': 'http://www.ign.com/videos/2013/06/05/the-last-of-us-review', | |
116 | 'md5': 'd2e1586d9987d40fad7867bf96a018ea', | |
117 | 'info_dict': { | |
118 | 'id': '8f862beef863986b2785559b9e1aa599', | |
119 | 'ext': 'mp4', | |
120 | 'title': 'The Last of Us Review', | |
121 | 'description': 'md5:c8946d4260a4d43a00d5ae8ed998870c', | |
122 | 'timestamp': 1370440800, | |
123 | 'upload_date': '20130605', | |
124 | 'tags': 'count:9', | |
125 | 'display_id': 'the-last-of-us-review', | |
126 | 'thumbnail': 'https://assets1.ignimgs.com/vid/thumbnails/user/2014/03/26/lastofusreviewmimig2.jpg', | |
127 | 'duration': 440, | |
128 | }, | |
129 | 'params': { | |
130 | 'nocheckcertificate': True, | |
131 | }, | |
132 | }, { | |
133 | 'url': 'http://www.pcmag.com/videos/2015/01/06/010615-whats-new-now-is-gogo-snooping-on-your-data', | |
134 | 'md5': 'f1581a6fe8c5121be5b807684aeac3f6', | |
135 | 'info_dict': { | |
136 | 'id': 'ee10d774b508c9b8ec07e763b9125b91', | |
137 | 'ext': 'mp4', | |
138 | 'title': 'What\'s New Now: Is GoGo Snooping on Your Data?', | |
139 | 'description': 'md5:817a20299de610bd56f13175386da6fa', | |
140 | 'timestamp': 1420571160, | |
141 | 'upload_date': '20150106', | |
142 | 'tags': 'count:4', | |
143 | }, | |
144 | 'skip': '404 Not Found', | |
145 | }, { | |
146 | 'url': 'https://www.ign.com/videos/is-a-resident-evil-4-remake-on-the-way-ign-daily-fix', | |
147 | 'only_matching': True, | |
148 | }] | |
149 | ||
150 | @classmethod | |
151 | def _extract_embed_urls(cls, url, webpage): | |
152 | grids = re.findall( | |
153 | r'''(?s)<section\b[^>]+\bclass\s*=\s*['"](?:[\w-]+\s+)*?content-feed-grid(?!\B|-)[^>]+>(.+?)</section[^>]*>''', | |
154 | webpage) | |
155 | return filter(None, | |
156 | (urljoin(url, m.group('path')) for m in re.finditer( | |
157 | r'''<a\b[^>]+\bhref\s*=\s*('|")(?P<path>/videos%s)\1''' | |
158 | % cls._VIDEO_PATH_RE, grids[0] if grids else ''))) | |
159 | ||
160 | def _real_extract(self, url): | |
161 | display_id, filt = self._match_valid_url(url).group('id', 'filt') | |
162 | if display_id: | |
163 | return self._extract_video(url, display_id) | |
164 | return self._extract_playlist(url, filt or 'all') | |
165 | ||
166 | def _extract_playlist(self, url, display_id): | |
167 | webpage = self._download_webpage(url, display_id) | |
168 | ||
169 | return self.playlist_result( | |
170 | (self.url_result(u, self.ie_key()) | |
171 | for u in self._extract_embed_urls(url, webpage)), | |
172 | playlist_id=display_id) | |
173 | ||
174 | def _extract_video(self, url, display_id): | |
175 | video = self._checked_call_api(display_id) | |
176 | ||
177 | info = self._extract_video_info(video) | |
178 | ||
179 | return merge_dicts({ | |
180 | 'display_id': display_id, | |
181 | }, info) | |
182 | ||
183 | ||
184 | class IGNVideoIE(IGNBaseIE): | |
cc2db878 | 185 | _VALID_URL = r'https?://.+?\.ign\.com/(?:[a-z]{2}/)?[^/]+/(?P<id>\d+)/(?:video|trailer)/' |
52fadd5f | 186 | _TESTS = [{ |
cc2db878 | 187 | 'url': 'http://me.ign.com/en/videos/112203/video/how-hitman-aims-to-be-different-than-every-other-s', |
188 | 'md5': 'dd9aca7ed2657c4e118d8b261e5e9de1', | |
40c716d2 | 189 | 'info_dict': { |
cc2db878 | 190 | 'id': 'e9be7ea899a9bbfc0674accc22a36cc8', |
40c716d2 | 191 | 'ext': 'mp4', |
cc2db878 | 192 | 'title': 'How Hitman Aims to Be Different Than Every Other Stealth Game - NYCC 2015', |
193 | 'description': 'Taking out assassination targets in Hitman has never been more stylish.', | |
194 | 'timestamp': 1444665600, | |
195 | 'upload_date': '20151012', | |
45b2ee6f | 196 | 'display_id': '112203', |
197 | 'thumbnail': 'https://sm.ign.com/ign_me/video/h/how-hitman/how-hitman-aims-to-be-different-than-every-other-s_8z14.jpg', | |
198 | 'duration': 298, | |
199 | 'tags': 'count:13', | |
45b2ee6f | 200 | }, |
201 | 'expected_warnings': ['HTTP Error 400: Bad Request'], | |
cc2db878 | 202 | }, { |
203 | 'url': 'http://me.ign.com/ar/angry-birds-2/106533/video/lrd-ldyy-lwl-lfylm-angry-birds', | |
204 | 'only_matching': True, | |
205 | }, { | |
206 | # Youtube embed | |
207 | 'url': 'https://me.ign.com/ar/ratchet-clank-rift-apart/144327/trailer/embed', | |
208 | 'only_matching': True, | |
209 | }, { | |
210 | # Twitter embed | |
211 | 'url': 'http://adria.ign.com/sherlock-season-4/9687/trailer/embed', | |
212 | 'only_matching': True, | |
213 | }, { | |
214 | # Vimeo embed | |
215 | 'url': 'https://kr.ign.com/bic-2018/3307/trailer/embed', | |
216 | 'only_matching': True, | |
52fadd5f | 217 | }] |
ee6adb16 | 218 | |
a95967f8 | 219 | def _real_extract(self, url): |
cc2db878 | 220 | video_id = self._match_id(url) |
45b2ee6f | 221 | parsed_url = urllib.parse.urlparse(url) |
222 | embed_url = urllib.parse.urlunparse( | |
223 | parsed_url._replace(path=parsed_url.path.rsplit('/', 1)[0] + '/embed')) | |
224 | ||
225 | webpage, urlh = self._download_webpage_handle(embed_url, video_id) | |
3d2623a8 | 226 | new_url = urlh.url |
cc2db878 | 227 | ign_url = compat_parse_qs( |
45b2ee6f | 228 | urllib.parse.urlparse(new_url).query).get('url', [None])[-1] |
cc2db878 | 229 | if ign_url: |
230 | return self.url_result(ign_url, IGNIE.ie_key()) | |
45b2ee6f | 231 | video = self._search_regex(r'(<div\b[^>]+\bdata-video-id\s*=\s*[^>]+>)', webpage, 'video element', fatal=False) |
232 | if not video: | |
233 | if new_url == url: | |
234 | raise ExtractorError('Redirect loop: ' + url) | |
235 | return self.url_result(new_url) | |
236 | video = extract_attributes(video) | |
237 | video_data = video.get('data-settings') or '{}' | |
238 | video_data = self._parse_json(video_data, video_id)['video'] | |
239 | info = self._extract_video_info(video_data) | |
240 | ||
241 | return merge_dicts({ | |
242 | 'display_id': video_id, | |
243 | }, info) | |
adccf336 | 244 | |
adccf336 | 245 | |
cc2db878 | 246 | class IGNArticleIE(IGNBaseIE): |
45b2ee6f | 247 | _VALID_URL = r'https?://.+?\.ign\.com/(?:articles(?:/\d{4}/\d{2}/\d{2})?|(?:[a-z]{2}/)?(?:[\w-]+/)*?feature/\d+)/(?P<id>[^/?&#]+)' |
cc2db878 | 248 | _PAGE_TYPE = 'article' |
adccf336 | 249 | _TESTS = [{ |
cc2db878 | 250 | 'url': 'http://me.ign.com/en/feature/15775/100-little-things-in-gta-5-that-will-blow-your-mind', |
adccf336 | 251 | 'info_dict': { |
45b2ee6f | 252 | 'id': '72113', |
cc2db878 | 253 | 'title': '100 Little Things in GTA 5 That Will Blow Your Mind', |
254 | }, | |
255 | 'playlist': [ | |
256 | { | |
257 | 'info_dict': { | |
258 | 'id': '5ebbd138523268b93c9141af17bec937', | |
259 | 'ext': 'mp4', | |
45b2ee6f | 260 | 'title': 'Grand Theft Auto V Video Review', |
cc2db878 | 261 | 'description': 'Rockstar drops the mic on this generation of games. Watch our review of the masterly Grand Theft Auto V.', |
262 | 'timestamp': 1379339880, | |
263 | 'upload_date': '20130916', | |
45b2ee6f | 264 | 'tags': 'count:12', |
265 | 'thumbnail': 'https://assets1.ignimgs.com/thumbs/userUploaded/2021/8/16/gta-v-heistsjpg-e94705-1629138553533.jpeg', | |
266 | 'display_id': 'grand-theft-auto-v-video-review', | |
267 | 'duration': 501, | |
cc2db878 | 268 | }, |
269 | }, | |
270 | { | |
271 | 'info_dict': { | |
272 | 'id': '638672ee848ae4ff108df2a296418ee2', | |
273 | 'ext': 'mp4', | |
45b2ee6f | 274 | 'title': 'GTA 5 In Slow Motion', |
cc2db878 | 275 | 'description': 'The twisted beauty of GTA 5 in stunning slow motion.', |
276 | 'timestamp': 1386878820, | |
277 | 'upload_date': '20131212', | |
45b2ee6f | 278 | 'duration': 202, |
279 | 'tags': 'count:25', | |
280 | 'display_id': 'gta-5-in-slow-motion', | |
281 | 'thumbnail': 'https://assets1.ignimgs.com/vid/thumbnails/user/2013/11/03/GTA-SLO-MO-1.jpg', | |
cc2db878 | 282 | }, |
283 | }, | |
284 | ], | |
285 | 'params': { | |
cc2db878 | 286 | 'skip_download': True, |
287 | }, | |
45b2ee6f | 288 | 'expected_warnings': ['Backend fetch failed'], |
607d65fb | 289 | }, { |
cc2db878 | 290 | 'url': 'http://www.ign.com/articles/2014/08/15/rewind-theater-wild-trailer-gamescom-2014?watch', |
adccf336 | 291 | 'info_dict': { |
cc2db878 | 292 | 'id': '53ee806780a81ec46e0790f8', |
293 | 'title': 'Rewind Theater - Wild Trailer Gamescom 2014', | |
294 | }, | |
45b2ee6f | 295 | 'playlist_count': 1, |
296 | 'expected_warnings': ['Backend fetch failed'], | |
cc2db878 | 297 | }, { |
298 | # videoId pattern | |
299 | 'url': 'http://www.ign.com/articles/2017/06/08/new-ducktales-short-donalds-birthday-doesnt-go-as-planned', | |
300 | 'only_matching': True, | |
301 | }, { | |
302 | # Youtube embed | |
303 | 'url': 'https://www.ign.com/articles/2021-mvp-named-in-puppy-bowl-xvii', | |
304 | 'only_matching': True, | |
305 | }, { | |
306 | # IMDB embed | |
307 | 'url': 'https://www.ign.com/articles/2014/08/07/sons-of-anarchy-final-season-trailer', | |
308 | 'only_matching': True, | |
309 | }, { | |
310 | # Facebook embed | |
311 | 'url': 'https://www.ign.com/articles/2017/09/20/marvels-the-punisher-watch-the-new-trailer-for-the-netflix-series', | |
312 | 'only_matching': True, | |
313 | }, { | |
314 | # Brightcove embed | |
315 | 'url': 'https://www.ign.com/articles/2016/01/16/supergirl-goes-flying-with-martian-manhunter-in-new-clip', | |
316 | 'only_matching': True, | |
adccf336 | 317 | }] |
cc2db878 | 318 | |
45b2ee6f | 319 | def _checked_call_api(self, slug): |
320 | try: | |
321 | return self._call_api(slug) | |
322 | except ExtractorError as e: | |
3d2623a8 | 323 | if isinstance(e.cause, HTTPError): |
45b2ee6f | 324 | e.cause.args = e.cause.args or [ |
3d2623a8 | 325 | e.cause.response.url, e.cause.status, e.cause.reason] |
326 | if e.cause.status == 404: | |
45b2ee6f | 327 | raise ExtractorError( |
328 | 'Content not found: expired?', cause=e.cause, | |
329 | expected=True) | |
3d2623a8 | 330 | elif e.cause.status == 503: |
45b2ee6f | 331 | self.report_warning(error_to_compat_str(e.cause)) |
332 | return | |
333 | raise | |
334 | ||
cc2db878 | 335 | def _real_extract(self, url): |
336 | display_id = self._match_id(url) | |
45b2ee6f | 337 | article = self._checked_call_api(display_id) |
338 | ||
339 | if article: | |
340 | # obsolete ? | |
341 | def entries(): | |
342 | media_url = traverse_obj( | |
343 | article, ('mediaRelations', 0, 'media', 'metadata', 'url'), | |
344 | expected_type=url_or_none) | |
345 | if media_url: | |
346 | yield self.url_result(media_url, IGNIE.ie_key()) | |
347 | for content in (article.get('content') or []): | |
348 | for video_url in re.findall(r'(?:\[(?:ignvideo\s+url|youtube\s+clip_id)|<iframe[^>]+src)="([^"]+)"', content): | |
349 | if url_or_none(video_url): | |
350 | yield self.url_result(video_url) | |
351 | ||
352 | return self.playlist_result( | |
353 | entries(), article.get('articleId'), | |
354 | traverse_obj( | |
355 | article, ('metadata', 'headline'), | |
356 | expected_type=lambda x: x.strip() or None)) | |
357 | ||
358 | webpage = self._download_webpage(url, display_id) | |
359 | ||
360 | playlist_id = self._html_search_meta('dable:item_id', webpage, default=None) | |
361 | if playlist_id: | |
362 | ||
363 | def entries(): | |
364 | for m in re.finditer( | |
365 | r'''(?s)<object\b[^>]+\bclass\s*=\s*("|')ign-videoplayer\1[^>]*>(?P<params>.+?)</object''', | |
366 | webpage): | |
367 | flashvars = self._search_regex( | |
368 | r'''(<param\b[^>]+\bname\s*=\s*("|')flashvars\2[^>]*>)''', | |
369 | m.group('params'), 'flashvars', default='') | |
370 | flashvars = compat_parse_qs(extract_attributes(flashvars).get('value') or '') | |
371 | v_url = url_or_none((flashvars.get('url') or [None])[-1]) | |
372 | if v_url: | |
373 | yield self.url_result(v_url) | |
374 | else: | |
375 | playlist_id = self._search_regex( | |
376 | r'''\bdata-post-id\s*=\s*("|')(?P<id>[\da-f]+)\1''', | |
377 | webpage, 'id', group='id', default=None) | |
378 | ||
379 | nextjs_data = self._search_nextjs_data(webpage, display_id) | |
cc2db878 | 380 | |
45b2ee6f | 381 | def entries(): |
382 | for player in traverse_obj( | |
383 | nextjs_data, | |
384 | ('props', 'apolloState', 'ROOT_QUERY', lambda k, _: k.startswith('videoPlayerProps('), '__ref')): | |
385 | # skip promo links (which may not always be served, eg GH CI servers) | |
386 | if traverse_obj(nextjs_data, | |
387 | ('props', 'apolloState', player.replace('PlayerProps', 'ModernContent')), | |
388 | expected_type=dict): | |
389 | continue | |
390 | video = traverse_obj(nextjs_data, ('props', 'apolloState', player), expected_type=dict) or {} | |
391 | info = self._extract_video_info(video, fatal=False) | |
392 | if info: | |
393 | yield merge_dicts({ | |
394 | 'display_id': display_id, | |
395 | }, info) | |
cc2db878 | 396 | |
397 | return self.playlist_result( | |
45b2ee6f | 398 | entries(), playlist_id or display_id, |
399 | re.sub(r'\s+-\s+IGN\s*$', '', self._og_search_title(webpage, default='')) or None) |