]>
Commit | Line | Data |
---|---|---|
9bb8e0a3 PH |
1 | # encoding: utf-8 |
2 | from __future__ import unicode_literals | |
3 | ||
4 | from .common import InfoExtractor | |
5 | from ..utils import ( | |
6 | int_or_none, | |
7 | unified_strdate, | |
8 | ) | |
9 | ||
10 | ||
11 | class WSJIE(InfoExtractor): | |
12 | _VALID_URL = r'https?://video-api\.wsj\.com/api-video/player/iframe\.html\?guid=(?P<id>[a-zA-Z0-9-]+)' | |
13 | IE_DESC = 'Wall Street Journal' | |
14 | _TEST = { | |
15 | 'url': 'http://video-api.wsj.com/api-video/player/iframe.html?guid=1BD01A4C-BFE8-40A5-A42F-8A8AF9898B1A', | |
16 | 'md5': '9747d7a6ebc2f4df64b981e1dde9efa9', | |
17 | 'info_dict': { | |
18 | 'id': '1BD01A4C-BFE8-40A5-A42F-8A8AF9898B1A', | |
19 | 'ext': 'mp4', | |
20 | 'upload_date': '20150202', | |
d8443cd3 PH |
21 | 'uploader_id': 'jdesai', |
22 | 'creator': 'jdesai', | |
9bb8e0a3 PH |
23 | 'categories': list, # a long list |
24 | 'duration': 90, | |
25 | 'title': 'Bills Coach Rex Ryan Updates His Old Jets Tattoo', | |
26 | }, | |
27 | } | |
28 | ||
29 | def _real_extract(self, url): | |
30 | video_id = self._match_id(url) | |
31 | ||
32 | bitrates = [128, 174, 264, 320, 464, 664, 1264] | |
33 | api_url = ( | |
34 | 'http://video-api.wsj.com/api-video/find_all_videos.asp?' | |
35 | 'type=guid&count=1&query=%s&' | |
36 | 'fields=hls,adZone,thumbnailList,guid,state,secondsUntilStartTime,' | |
37 | 'author,description,name,linkURL,videoStillURL,duration,videoURL,' | |
38 | 'adCategory,catastrophic,linkShortURL,doctypeID,youtubeID,' | |
39 | 'titletag,rssURL,wsj-section,wsj-subsection,allthingsd-section,' | |
40 | 'allthingsd-subsection,sm-section,sm-subsection,provider,' | |
41 | 'formattedCreationDate,keywords,keywordsOmniture,column,editor,' | |
42 | 'emailURL,emailPartnerID,showName,omnitureProgramName,' | |
43 | 'omnitureVideoFormat,linkRelativeURL,touchCastID,' | |
44 | 'omniturePublishDate,%s') % ( | |
45 | video_id, ','.join('video%dkMP4Url' % br for br in bitrates)) | |
46 | info = self._download_json(api_url, video_id)['items'][0] | |
47 | ||
48 | # Thumbnails are conveniently in the correct format already | |
49 | thumbnails = info.get('thumbnailList') | |
50 | creator = info.get('author') | |
51 | uploader_id = info.get('editor') | |
52 | categories = info.get('keywords') | |
53 | duration = int_or_none(info.get('duration')) | |
54 | upload_date = unified_strdate( | |
55 | info.get('formattedCreationDate'), day_first=False) | |
56 | title = info.get('name', info.get('titletag')) | |
57 | ||
58 | formats = [{ | |
59 | 'format_id': 'f4m', | |
60 | 'format_note': 'f4m (meta URL)', | |
61 | 'url': info['videoURL'], | |
62 | }] | |
63 | if info.get('hls'): | |
64 | formats.extend(self._extract_m3u8_formats( | |
65 | info['hls'], video_id, ext='mp4', | |
66 | preference=0, entry_protocol='m3u8_native')) | |
67 | for br in bitrates: | |
68 | field = 'video%dkMP4Url' % br | |
69 | if info.get(field): | |
70 | formats.append({ | |
71 | 'format_id': 'mp4-%d' % br, | |
72 | 'container': 'mp4', | |
73 | 'tbr': br, | |
74 | 'url': info[field], | |
75 | }) | |
76 | self._sort_formats(formats) | |
77 | ||
78 | return { | |
79 | 'id': video_id, | |
80 | 'formats': formats, | |
81 | 'thumbnails': thumbnails, | |
82 | 'creator': creator, | |
83 | 'uploader_id': uploader_id, | |
84 | 'duration': duration, | |
85 | 'upload_date': upload_date, | |
86 | 'title': title, | |
87 | 'formats': formats, | |
88 | 'categories': categories, | |
89 | } |