]> jfr.im git - yt-dlp.git/blame - yt_dlp/extractor/ign.py
Update to ytdl-commit-2dd6c6e
[yt-dlp.git] / yt_dlp / extractor / ign.py
CommitLineData
2ef648d3 1import re
45b2ee6f 2import urllib.error
2ef648d3
JMF
3
4from .common import InfoExtractor
45b2ee6f 5from ..compat import compat_parse_qs
adccf336 6from ..utils import (
45b2ee6f 7 ExtractorError,
cc2db878 8 determine_ext,
45b2ee6f 9 error_to_compat_str,
10 extract_attributes,
adccf336 11 int_or_none,
45b2ee6f 12 merge_dicts,
adccf336 13 parse_iso8601,
cc2db878 14 strip_or_none,
45b2ee6f 15 traverse_obj,
16 url_or_none,
17 urljoin,
adccf336 18)
2ef648d3 19
a95967f8 20
cc2db878 21class IGNBaseIE(InfoExtractor):
22 def _call_api(self, slug):
23 return self._download_json(
24 'http://apis.ign.com/{0}/v3/{0}s/slug/{1}'.format(self._PAGE_TYPE, slug), slug)
25
45b2ee6f 26 def _checked_call_api(self, slug):
27 try:
28 return self._call_api(slug)
29 except ExtractorError as e:
30 if isinstance(e.cause, urllib.error.HTTPError) and e.cause.code == 404:
31 e.cause.args = e.cause.args or [
32 e.cause.geturl(), e.cause.getcode(), e.cause.reason]
33 raise ExtractorError(
34 'Content not found: expired?', cause=e.cause,
35 expected=True)
36 raise
cc2db878 37
45b2ee6f 38 def _extract_video_info(self, video, fatal=True):
cc2db878 39 video_id = video['videoId']
adccf336 40
41 formats = []
45b2ee6f 42 refs = traverse_obj(video, 'refs', expected_type=dict) or {}
cc2db878 43
45b2ee6f 44 m3u8_url = url_or_none(refs.get('m3uUrl'))
adccf336 45 if m3u8_url:
f889ac45 46 formats.extend(self._extract_m3u8_formats(
47 m3u8_url, video_id, 'mp4', 'm3u8_native',
48 m3u8_id='hls', fatal=False))
cc2db878 49
45b2ee6f 50 f4m_url = url_or_none(refs.get('f4mUrl'))
adccf336 51 if f4m_url:
f889ac45 52 formats.extend(self._extract_f4m_formats(
53 f4m_url, video_id, f4m_id='hds', fatal=False))
cc2db878 54
55 for asset in (video.get('assets') or []):
45b2ee6f 56 asset_url = url_or_none(asset.get('url'))
cc2db878 57 if not asset_url:
58 continue
adccf336 59 formats.append({
cc2db878 60 'url': asset_url,
61 'tbr': int_or_none(asset.get('bitrate'), 1000),
62 'fps': int_or_none(asset.get('frame_rate')),
adccf336 63 'height': int_or_none(asset.get('height')),
64 'width': int_or_none(asset.get('width')),
65 })
cc2db878 66
45b2ee6f 67 mezzanine_url = traverse_obj(
68 video, ('system', 'mezzanineUrl'), expected_type=url_or_none)
cc2db878 69 if mezzanine_url:
70 formats.append({
71 'ext': determine_ext(mezzanine_url, 'mp4'),
72 'format_id': 'mezzanine',
f983b875 73 'quality': 1,
cc2db878 74 'url': mezzanine_url,
75 })
76
45b2ee6f 77 thumbnails = traverse_obj(
78 video, ('thumbnails', ..., {'url': 'url'}), expected_type=url_or_none)
79 tags = traverse_obj(
80 video, ('tags', ..., 'displayName'),
81 expected_type=lambda x: x.strip() or None)
adccf336 82
45b2ee6f 83 metadata = traverse_obj(video, 'metadata', expected_type=dict) or {}
84 title = traverse_obj(
85 metadata, 'longTitle', 'title', 'name',
86 expected_type=lambda x: x.strip() or None)
2ef648d3 87
40c716d2 88 return {
cc2db878 89 'id': video_id,
90 'title': title,
91 'description': strip_or_none(metadata.get('description')),
adccf336 92 'timestamp': parse_iso8601(metadata.get('publishDate')),
93 'duration': int_or_none(metadata.get('duration')),
adccf336 94 'thumbnails': thumbnails,
95 'formats': formats,
cc2db878 96 'tags': tags,
40c716d2 97 }
2ef648d3
JMF
98
99
45b2ee6f 100class IGNIE(IGNBaseIE):
101 """
102 Extractor for some of the IGN sites, like www.ign.com, es.ign.com de.ign.com.
103 Some videos of it.ign.com are also supported
104 """
105 _VIDEO_PATH_RE = r'/(?:\d{4}/\d{2}/\d{2}/)?(?P<id>.+?)'
106 _PLAYLIST_PATH_RE = r'(?:/?\?(?P<filt>[^&#]+))?'
107 _VALID_URL = (
108 r'https?://(?:.+?\.ign|www\.pcmag)\.com/videos(?:%s)'
109 % '|'.join((_VIDEO_PATH_RE + r'(?:[/?&#]|$)', _PLAYLIST_PATH_RE)))
110 IE_NAME = 'ign.com'
111 _PAGE_TYPE = 'video'
112
113 _TESTS = [{
114 'url': 'http://www.ign.com/videos/2013/06/05/the-last-of-us-review',
115 'md5': 'd2e1586d9987d40fad7867bf96a018ea',
116 'info_dict': {
117 'id': '8f862beef863986b2785559b9e1aa599',
118 'ext': 'mp4',
119 'title': 'The Last of Us Review',
120 'description': 'md5:c8946d4260a4d43a00d5ae8ed998870c',
121 'timestamp': 1370440800,
122 'upload_date': '20130605',
123 'tags': 'count:9',
124 'display_id': 'the-last-of-us-review',
125 'thumbnail': 'https://assets1.ignimgs.com/vid/thumbnails/user/2014/03/26/lastofusreviewmimig2.jpg',
126 'duration': 440,
127 },
128 'params': {
129 'nocheckcertificate': True,
130 },
131 }, {
132 'url': 'http://www.pcmag.com/videos/2015/01/06/010615-whats-new-now-is-gogo-snooping-on-your-data',
133 'md5': 'f1581a6fe8c5121be5b807684aeac3f6',
134 'info_dict': {
135 'id': 'ee10d774b508c9b8ec07e763b9125b91',
136 'ext': 'mp4',
137 'title': 'What\'s New Now: Is GoGo Snooping on Your Data?',
138 'description': 'md5:817a20299de610bd56f13175386da6fa',
139 'timestamp': 1420571160,
140 'upload_date': '20150106',
141 'tags': 'count:4',
142 },
143 'skip': '404 Not Found',
144 }, {
145 'url': 'https://www.ign.com/videos/is-a-resident-evil-4-remake-on-the-way-ign-daily-fix',
146 'only_matching': True,
147 }]
148
149 @classmethod
150 def _extract_embed_urls(cls, url, webpage):
151 grids = re.findall(
152 r'''(?s)<section\b[^>]+\bclass\s*=\s*['"](?:[\w-]+\s+)*?content-feed-grid(?!\B|-)[^>]+>(.+?)</section[^>]*>''',
153 webpage)
154 return filter(None,
155 (urljoin(url, m.group('path')) for m in re.finditer(
156 r'''<a\b[^>]+\bhref\s*=\s*('|")(?P<path>/videos%s)\1'''
157 % cls._VIDEO_PATH_RE, grids[0] if grids else '')))
158
159 def _real_extract(self, url):
160 display_id, filt = self._match_valid_url(url).group('id', 'filt')
161 if display_id:
162 return self._extract_video(url, display_id)
163 return self._extract_playlist(url, filt or 'all')
164
165 def _extract_playlist(self, url, display_id):
166 webpage = self._download_webpage(url, display_id)
167
168 return self.playlist_result(
169 (self.url_result(u, self.ie_key())
170 for u in self._extract_embed_urls(url, webpage)),
171 playlist_id=display_id)
172
173 def _extract_video(self, url, display_id):
174 video = self._checked_call_api(display_id)
175
176 info = self._extract_video_info(video)
177
178 return merge_dicts({
179 'display_id': display_id,
180 }, info)
181
182
183class IGNVideoIE(IGNBaseIE):
cc2db878 184 _VALID_URL = r'https?://.+?\.ign\.com/(?:[a-z]{2}/)?[^/]+/(?P<id>\d+)/(?:video|trailer)/'
52fadd5f 185 _TESTS = [{
cc2db878 186 'url': 'http://me.ign.com/en/videos/112203/video/how-hitman-aims-to-be-different-than-every-other-s',
187 'md5': 'dd9aca7ed2657c4e118d8b261e5e9de1',
40c716d2 188 'info_dict': {
cc2db878 189 'id': 'e9be7ea899a9bbfc0674accc22a36cc8',
40c716d2 190 'ext': 'mp4',
cc2db878 191 'title': 'How Hitman Aims to Be Different Than Every Other Stealth Game - NYCC 2015',
192 'description': 'Taking out assassination targets in Hitman has never been more stylish.',
193 'timestamp': 1444665600,
194 'upload_date': '20151012',
45b2ee6f 195 'display_id': '112203',
196 'thumbnail': 'https://sm.ign.com/ign_me/video/h/how-hitman/how-hitman-aims-to-be-different-than-every-other-s_8z14.jpg',
197 'duration': 298,
198 'tags': 'count:13',
199 'display_id': '112203',
200 'thumbnail': 'https://sm.ign.com/ign_me/video/h/how-hitman/how-hitman-aims-to-be-different-than-every-other-s_8z14.jpg',
201 'duration': 298,
202 'tags': 'count:13',
203 },
204 'expected_warnings': ['HTTP Error 400: Bad Request'],
cc2db878 205 }, {
206 'url': 'http://me.ign.com/ar/angry-birds-2/106533/video/lrd-ldyy-lwl-lfylm-angry-birds',
207 'only_matching': True,
208 }, {
209 # Youtube embed
210 'url': 'https://me.ign.com/ar/ratchet-clank-rift-apart/144327/trailer/embed',
211 'only_matching': True,
212 }, {
213 # Twitter embed
214 'url': 'http://adria.ign.com/sherlock-season-4/9687/trailer/embed',
215 'only_matching': True,
216 }, {
217 # Vimeo embed
218 'url': 'https://kr.ign.com/bic-2018/3307/trailer/embed',
219 'only_matching': True,
52fadd5f 220 }]
ee6adb16 221
a95967f8 222 def _real_extract(self, url):
cc2db878 223 video_id = self._match_id(url)
45b2ee6f 224 parsed_url = urllib.parse.urlparse(url)
225 embed_url = urllib.parse.urlunparse(
226 parsed_url._replace(path=parsed_url.path.rsplit('/', 1)[0] + '/embed'))
227
228 webpage, urlh = self._download_webpage_handle(embed_url, video_id)
229 new_url = urlh.geturl()
cc2db878 230 ign_url = compat_parse_qs(
45b2ee6f 231 urllib.parse.urlparse(new_url).query).get('url', [None])[-1]
cc2db878 232 if ign_url:
233 return self.url_result(ign_url, IGNIE.ie_key())
45b2ee6f 234 video = self._search_regex(r'(<div\b[^>]+\bdata-video-id\s*=\s*[^>]+>)', webpage, 'video element', fatal=False)
235 if not video:
236 if new_url == url:
237 raise ExtractorError('Redirect loop: ' + url)
238 return self.url_result(new_url)
239 video = extract_attributes(video)
240 video_data = video.get('data-settings') or '{}'
241 video_data = self._parse_json(video_data, video_id)['video']
242 info = self._extract_video_info(video_data)
243
244 return merge_dicts({
245 'display_id': video_id,
246 }, info)
adccf336 247
adccf336 248
cc2db878 249class IGNArticleIE(IGNBaseIE):
45b2ee6f 250 _VALID_URL = r'https?://.+?\.ign\.com/(?:articles(?:/\d{4}/\d{2}/\d{2})?|(?:[a-z]{2}/)?(?:[\w-]+/)*?feature/\d+)/(?P<id>[^/?&#]+)'
cc2db878 251 _PAGE_TYPE = 'article'
adccf336 252 _TESTS = [{
cc2db878 253 'url': 'http://me.ign.com/en/feature/15775/100-little-things-in-gta-5-that-will-blow-your-mind',
adccf336 254 'info_dict': {
45b2ee6f 255 'id': '72113',
cc2db878 256 'title': '100 Little Things in GTA 5 That Will Blow Your Mind',
257 },
258 'playlist': [
259 {
260 'info_dict': {
261 'id': '5ebbd138523268b93c9141af17bec937',
262 'ext': 'mp4',
45b2ee6f 263 'title': 'Grand Theft Auto V Video Review',
cc2db878 264 'description': 'Rockstar drops the mic on this generation of games. Watch our review of the masterly Grand Theft Auto V.',
265 'timestamp': 1379339880,
266 'upload_date': '20130916',
45b2ee6f 267 'tags': 'count:12',
268 'thumbnail': 'https://assets1.ignimgs.com/thumbs/userUploaded/2021/8/16/gta-v-heistsjpg-e94705-1629138553533.jpeg',
269 'display_id': 'grand-theft-auto-v-video-review',
270 'duration': 501,
cc2db878 271 },
272 },
273 {
274 'info_dict': {
275 'id': '638672ee848ae4ff108df2a296418ee2',
276 'ext': 'mp4',
45b2ee6f 277 'title': 'GTA 5 In Slow Motion',
cc2db878 278 'description': 'The twisted beauty of GTA 5 in stunning slow motion.',
279 'timestamp': 1386878820,
280 'upload_date': '20131212',
45b2ee6f 281 'duration': 202,
282 'tags': 'count:25',
283 'display_id': 'gta-5-in-slow-motion',
284 'thumbnail': 'https://assets1.ignimgs.com/vid/thumbnails/user/2013/11/03/GTA-SLO-MO-1.jpg',
cc2db878 285 },
286 },
287 ],
288 'params': {
cc2db878 289 'skip_download': True,
290 },
45b2ee6f 291 'expected_warnings': ['Backend fetch failed'],
607d65fb 292 }, {
cc2db878 293 'url': 'http://www.ign.com/articles/2014/08/15/rewind-theater-wild-trailer-gamescom-2014?watch',
adccf336 294 'info_dict': {
cc2db878 295 'id': '53ee806780a81ec46e0790f8',
296 'title': 'Rewind Theater - Wild Trailer Gamescom 2014',
297 },
45b2ee6f 298 'playlist_count': 1,
299 'expected_warnings': ['Backend fetch failed'],
cc2db878 300 }, {
301 # videoId pattern
302 'url': 'http://www.ign.com/articles/2017/06/08/new-ducktales-short-donalds-birthday-doesnt-go-as-planned',
303 'only_matching': True,
304 }, {
305 # Youtube embed
306 'url': 'https://www.ign.com/articles/2021-mvp-named-in-puppy-bowl-xvii',
307 'only_matching': True,
308 }, {
309 # IMDB embed
310 'url': 'https://www.ign.com/articles/2014/08/07/sons-of-anarchy-final-season-trailer',
311 'only_matching': True,
312 }, {
313 # Facebook embed
314 'url': 'https://www.ign.com/articles/2017/09/20/marvels-the-punisher-watch-the-new-trailer-for-the-netflix-series',
315 'only_matching': True,
316 }, {
317 # Brightcove embed
318 'url': 'https://www.ign.com/articles/2016/01/16/supergirl-goes-flying-with-martian-manhunter-in-new-clip',
319 'only_matching': True,
adccf336 320 }]
cc2db878 321
45b2ee6f 322 def _checked_call_api(self, slug):
323 try:
324 return self._call_api(slug)
325 except ExtractorError as e:
326 if isinstance(e.cause, urllib.error.HTTPError):
327 e.cause.args = e.cause.args or [
328 e.cause.geturl(), e.cause.getcode(), e.cause.reason]
329 if e.cause.code == 404:
330 raise ExtractorError(
331 'Content not found: expired?', cause=e.cause,
332 expected=True)
333 elif e.cause.code == 503:
334 self.report_warning(error_to_compat_str(e.cause))
335 return
336 raise
337
cc2db878 338 def _real_extract(self, url):
339 display_id = self._match_id(url)
45b2ee6f 340 article = self._checked_call_api(display_id)
341
342 if article:
343 # obsolete ?
344 def entries():
345 media_url = traverse_obj(
346 article, ('mediaRelations', 0, 'media', 'metadata', 'url'),
347 expected_type=url_or_none)
348 if media_url:
349 yield self.url_result(media_url, IGNIE.ie_key())
350 for content in (article.get('content') or []):
351 for video_url in re.findall(r'(?:\[(?:ignvideo\s+url|youtube\s+clip_id)|<iframe[^>]+src)="([^"]+)"', content):
352 if url_or_none(video_url):
353 yield self.url_result(video_url)
354
355 return self.playlist_result(
356 entries(), article.get('articleId'),
357 traverse_obj(
358 article, ('metadata', 'headline'),
359 expected_type=lambda x: x.strip() or None))
360
361 webpage = self._download_webpage(url, display_id)
362
363 playlist_id = self._html_search_meta('dable:item_id', webpage, default=None)
364 if playlist_id:
365
366 def entries():
367 for m in re.finditer(
368 r'''(?s)<object\b[^>]+\bclass\s*=\s*("|')ign-videoplayer\1[^>]*>(?P<params>.+?)</object''',
369 webpage):
370 flashvars = self._search_regex(
371 r'''(<param\b[^>]+\bname\s*=\s*("|')flashvars\2[^>]*>)''',
372 m.group('params'), 'flashvars', default='')
373 flashvars = compat_parse_qs(extract_attributes(flashvars).get('value') or '')
374 v_url = url_or_none((flashvars.get('url') or [None])[-1])
375 if v_url:
376 yield self.url_result(v_url)
377 else:
378 playlist_id = self._search_regex(
379 r'''\bdata-post-id\s*=\s*("|')(?P<id>[\da-f]+)\1''',
380 webpage, 'id', group='id', default=None)
381
382 nextjs_data = self._search_nextjs_data(webpage, display_id)
cc2db878 383
45b2ee6f 384 def entries():
385 for player in traverse_obj(
386 nextjs_data,
387 ('props', 'apolloState', 'ROOT_QUERY', lambda k, _: k.startswith('videoPlayerProps('), '__ref')):
388 # skip promo links (which may not always be served, eg GH CI servers)
389 if traverse_obj(nextjs_data,
390 ('props', 'apolloState', player.replace('PlayerProps', 'ModernContent')),
391 expected_type=dict):
392 continue
393 video = traverse_obj(nextjs_data, ('props', 'apolloState', player), expected_type=dict) or {}
394 info = self._extract_video_info(video, fatal=False)
395 if info:
396 yield merge_dicts({
397 'display_id': display_id,
398 }, info)
cc2db878 399
400 return self.playlist_result(
45b2ee6f 401 entries(), playlist_id or display_id,
402 re.sub(r'\s+-\s+IGN\s*$', '', self._og_search_title(webpage, default='')) or None)