]>
Commit | Line | Data |
---|---|---|
55adb63e RA |
1 | # coding: utf-8 |
2 | from __future__ import unicode_literals | |
3 | ||
4 | import re | |
5 | ||
6 | from .common import InfoExtractor | |
7 | from ..compat import ( | |
8 | compat_str, | |
9 | compat_urllib_parse_unquote, | |
10 | ) | |
11 | from ..utils import ( | |
12 | int_or_none, | |
13 | parse_iso8601, | |
14 | strip_or_none, | |
15 | try_get, | |
16 | unescapeHTML, | |
17 | urljoin, | |
18 | ) | |
19 | ||
20 | ||
21 | class KinjaEmbedIE(InfoExtractor): | |
22 | IENAME = 'kinja:embed' | |
23 | _DOMAIN_REGEX = r'''(?:[^.]+\.)? | |
24 | (?: | |
25 | avclub| | |
26 | clickhole| | |
27 | deadspin| | |
28 | gizmodo| | |
29 | jalopnik| | |
30 | jezebel| | |
31 | kinja| | |
32 | kotaku| | |
33 | lifehacker| | |
34 | splinternews| | |
35 | the(?:inventory|onion|root|takeout) | |
36 | )\.com''' | |
37 | _COMMON_REGEX = r'''/ | |
38 | (?: | |
39 | ajax/inset| | |
40 | embed/video | |
41 | )/iframe\?.*?\bid=''' | |
42 | _VALID_URL = r'''(?x)https?://%s%s | |
43 | (?P<type> | |
44 | fb| | |
45 | imgur| | |
46 | instagram| | |
47 | jwp(?:layer)?-video| | |
48 | kinjavideo| | |
49 | mcp| | |
50 | megaphone| | |
51 | ooyala| | |
52 | soundcloud(?:-playlist)?| | |
53 | tumblr-post| | |
54 | twitch-stream| | |
55 | twitter| | |
56 | ustream-channel| | |
57 | vimeo| | |
58 | vine| | |
59 | youtube-(?:list|video) | |
60 | )-(?P<id>[^&]+)''' % (_DOMAIN_REGEX, _COMMON_REGEX) | |
61 | _TESTS = [{ | |
62 | 'url': 'https://kinja.com/ajax/inset/iframe?id=fb-10103303356633621', | |
63 | 'only_matching': True, | |
64 | }, { | |
65 | 'url': 'https://kinja.com/ajax/inset/iframe?id=kinjavideo-100313', | |
66 | 'only_matching': True, | |
67 | }, { | |
68 | 'url': 'https://kinja.com/ajax/inset/iframe?id=megaphone-PPY1300931075', | |
69 | 'only_matching': True, | |
70 | }, { | |
71 | 'url': 'https://kinja.com/ajax/inset/iframe?id=ooyala-xzMXhleDpopuT0u1ijt_qZj3Va-34pEX%2FZTIxYmJjZDM2NWYzZDViZGRiOWJjYzc5', | |
72 | 'only_matching': True, | |
73 | }, { | |
74 | 'url': 'https://kinja.com/ajax/inset/iframe?id=soundcloud-128574047', | |
75 | 'only_matching': True, | |
76 | }, { | |
77 | 'url': 'https://kinja.com/ajax/inset/iframe?id=soundcloud-playlist-317413750', | |
78 | 'only_matching': True, | |
79 | }, { | |
80 | 'url': 'https://kinja.com/ajax/inset/iframe?id=tumblr-post-160130699814-daydreams-at-midnight', | |
81 | 'only_matching': True, | |
82 | }, { | |
83 | 'url': 'https://kinja.com/ajax/inset/iframe?id=twitch-stream-libratus_extra', | |
84 | 'only_matching': True, | |
85 | }, { | |
86 | 'url': 'https://kinja.com/ajax/inset/iframe?id=twitter-1068875942473404422', | |
87 | 'only_matching': True, | |
88 | }, { | |
89 | 'url': 'https://kinja.com/ajax/inset/iframe?id=ustream-channel-10414700', | |
90 | 'only_matching': True, | |
91 | }, { | |
92 | 'url': 'https://kinja.com/ajax/inset/iframe?id=vimeo-120153502', | |
93 | 'only_matching': True, | |
94 | }, { | |
95 | 'url': 'https://kinja.com/ajax/inset/iframe?id=vine-5BlvV5qqPrD', | |
96 | 'only_matching': True, | |
97 | }, { | |
98 | 'url': 'https://kinja.com/ajax/inset/iframe?id=youtube-list-BCQ3KyrPjgA/PLE6509247C270A72E', | |
99 | 'only_matching': True, | |
100 | }, { | |
101 | 'url': 'https://kinja.com/ajax/inset/iframe?id=youtube-video-00QyL0AgPAE', | |
102 | 'only_matching': True, | |
103 | }] | |
104 | _JWPLATFORM_PROVIDER = ('cdn.jwplayer.com/v2/media/', 'JWPlatform') | |
105 | _PROVIDER_MAP = { | |
106 | 'fb': ('facebook.com/video.php?v=', 'Facebook'), | |
107 | 'imgur': ('imgur.com/', 'Imgur'), | |
108 | 'instagram': ('instagram.com/p/', 'Instagram'), | |
109 | 'jwplayer-video': _JWPLATFORM_PROVIDER, | |
110 | 'jwp-video': _JWPLATFORM_PROVIDER, | |
111 | 'megaphone': ('player.megaphone.fm/', 'Generic'), | |
112 | 'ooyala': ('player.ooyala.com/player.js?embedCode=', 'Ooyala'), | |
113 | 'soundcloud': ('api.soundcloud.com/tracks/', 'Soundcloud'), | |
114 | 'soundcloud-playlist': ('api.soundcloud.com/playlists/', 'SoundcloudPlaylist'), | |
115 | 'tumblr-post': ('%s.tumblr.com/post/%s', 'Tumblr'), | |
116 | 'twitch-stream': ('twitch.tv/', 'TwitchStream'), | |
117 | 'twitter': ('twitter.com/i/cards/tfw/v1/', 'TwitterCard'), | |
118 | 'ustream-channel': ('ustream.tv/embed/', 'Ustream'), | |
119 | 'vimeo': ('vimeo.com/', 'Vimeo'), | |
120 | 'vine': ('vine.co/v/', 'Vine'), | |
121 | 'youtube-list': ('youtube.com/embed/%s?list=%s', 'YoutubePlaylist'), | |
122 | 'youtube-video': ('youtube.com/embed/', 'Youtube'), | |
123 | } | |
124 | ||
125 | @staticmethod | |
126 | def _extract_urls(webpage, url): | |
127 | return [urljoin(url, unescapeHTML(mobj.group('url'))) for mobj in re.finditer( | |
128 | r'(?x)<iframe[^>]+?src=(?P<q>["\'])(?P<url>(?:(?:https?:)?//%s)?%s(?:(?!\1).)+)\1' % (KinjaEmbedIE._DOMAIN_REGEX, KinjaEmbedIE._COMMON_REGEX), | |
129 | webpage)] | |
130 | ||
131 | def _real_extract(self, url): | |
5ad28e7f | 132 | video_type, video_id = self._match_valid_url(url).groups() |
55adb63e RA |
133 | |
134 | provider = self._PROVIDER_MAP.get(video_type) | |
135 | if provider: | |
136 | video_id = compat_urllib_parse_unquote(video_id) | |
137 | if video_type == 'tumblr-post': | |
138 | video_id, blog = video_id.split('-', 1) | |
139 | result_url = provider[0] % (blog, video_id) | |
140 | elif video_type == 'youtube-list': | |
141 | video_id, playlist_id = video_id.split('/') | |
142 | result_url = provider[0] % (video_id, playlist_id) | |
143 | else: | |
144 | if video_type == 'ooyala': | |
145 | video_id = video_id.split('/')[0] | |
146 | result_url = provider[0] + video_id | |
147 | return self.url_result('http://' + result_url, provider[1]) | |
148 | ||
149 | if video_type == 'kinjavideo': | |
150 | data = self._download_json( | |
151 | 'https://kinja.com/api/core/video/views/videoById', | |
152 | video_id, query={'videoId': video_id})['data'] | |
153 | title = data['title'] | |
154 | ||
155 | formats = [] | |
156 | for k in ('signedPlaylist', 'streaming'): | |
157 | m3u8_url = data.get(k + 'Url') | |
158 | if m3u8_url: | |
159 | formats.extend(self._extract_m3u8_formats( | |
160 | m3u8_url, video_id, 'mp4', 'm3u8_native', | |
161 | m3u8_id='hls', fatal=False)) | |
162 | self._sort_formats(formats) | |
163 | ||
164 | thumbnail = None | |
165 | poster = data.get('poster') or {} | |
166 | poster_id = poster.get('id') | |
167 | if poster_id: | |
168 | thumbnail = 'https://i.kinja-img.com/gawker-media/image/upload/%s.%s' % (poster_id, poster.get('format') or 'jpg') | |
169 | ||
170 | return { | |
171 | 'id': video_id, | |
172 | 'title': title, | |
173 | 'description': strip_or_none(data.get('description')), | |
174 | 'formats': formats, | |
175 | 'tags': data.get('tags'), | |
176 | 'timestamp': int_or_none(try_get( | |
177 | data, lambda x: x['postInfo']['publishTimeMillis']), 1000), | |
178 | 'thumbnail': thumbnail, | |
179 | 'uploader': data.get('network'), | |
180 | } | |
181 | else: | |
182 | video_data = self._download_json( | |
183 | 'https://api.vmh.univision.com/metadata/v1/content/' + video_id, | |
184 | video_id)['videoMetadata'] | |
185 | iptc = video_data['photoVideoMetadataIPTC'] | |
186 | title = iptc['title']['en'] | |
187 | fmg = video_data.get('photoVideoMetadata_fmg') or {} | |
188 | tvss_domain = fmg.get('tvssDomain') or 'https://auth.univision.com' | |
189 | data = self._download_json( | |
190 | tvss_domain + '/api/v3/video-auth/url-signature-tokens', | |
191 | video_id, query={'mcpids': video_id})['data'][0] | |
192 | formats = [] | |
193 | ||
194 | rendition_url = data.get('renditionUrl') | |
195 | if rendition_url: | |
196 | formats = self._extract_m3u8_formats( | |
197 | rendition_url, video_id, 'mp4', | |
198 | 'm3u8_native', m3u8_id='hls', fatal=False) | |
199 | ||
200 | fallback_rendition_url = data.get('fallbackRenditionUrl') | |
201 | if fallback_rendition_url: | |
202 | formats.append({ | |
203 | 'format_id': 'fallback', | |
204 | 'tbr': int_or_none(self._search_regex( | |
205 | r'_(\d+)\.mp4', fallback_rendition_url, | |
206 | 'bitrate', default=None)), | |
207 | 'url': fallback_rendition_url, | |
208 | }) | |
209 | ||
210 | self._sort_formats(formats) | |
211 | ||
212 | return { | |
213 | 'id': video_id, | |
214 | 'title': title, | |
215 | 'thumbnail': try_get(iptc, lambda x: x['cloudinaryLink']['link'], compat_str), | |
216 | 'uploader': fmg.get('network'), | |
217 | 'duration': int_or_none(iptc.get('fileDuration')), | |
218 | 'formats': formats, | |
219 | 'description': try_get(iptc, lambda x: x['description']['en'], compat_str), | |
220 | 'timestamp': parse_iso8601(iptc.get('dateReleased')), | |
221 | } |