]> jfr.im git - yt-dlp.git/blame - yt_dlp/extractor/kinja.py
[extractors] Use new framework for existing embeds (#4307)
[yt-dlp.git] / yt_dlp / extractor / kinja.py
CommitLineData
55adb63e
RA
1from .common import InfoExtractor
2from ..compat import (
3 compat_str,
4 compat_urllib_parse_unquote,
5)
6from ..utils import (
7 int_or_none,
8 parse_iso8601,
9 strip_or_none,
10 try_get,
55adb63e
RA
11)
12
13
14class KinjaEmbedIE(InfoExtractor):
15 IENAME = 'kinja:embed'
16 _DOMAIN_REGEX = r'''(?:[^.]+\.)?
17 (?:
18 avclub|
19 clickhole|
20 deadspin|
21 gizmodo|
22 jalopnik|
23 jezebel|
24 kinja|
25 kotaku|
26 lifehacker|
27 splinternews|
28 the(?:inventory|onion|root|takeout)
29 )\.com'''
30 _COMMON_REGEX = r'''/
31 (?:
32 ajax/inset|
33 embed/video
34 )/iframe\?.*?\bid='''
35 _VALID_URL = r'''(?x)https?://%s%s
36 (?P<type>
37 fb|
38 imgur|
39 instagram|
40 jwp(?:layer)?-video|
41 kinjavideo|
42 mcp|
43 megaphone|
44 ooyala|
45 soundcloud(?:-playlist)?|
46 tumblr-post|
47 twitch-stream|
48 twitter|
49 ustream-channel|
50 vimeo|
51 vine|
52 youtube-(?:list|video)
53 )-(?P<id>[^&]+)''' % (_DOMAIN_REGEX, _COMMON_REGEX)
bfd973ec 54 _EMBED_REGEX = [rf'(?x)<iframe[^>]+?src=(?P<q>["\'])(?P<url>(?:(?:https?:)?//{_DOMAIN_REGEX})?{_COMMON_REGEX}(?:(?!\1).)+)\1']
55adb63e
RA
55 _TESTS = [{
56 'url': 'https://kinja.com/ajax/inset/iframe?id=fb-10103303356633621',
57 'only_matching': True,
58 }, {
59 'url': 'https://kinja.com/ajax/inset/iframe?id=kinjavideo-100313',
60 'only_matching': True,
61 }, {
62 'url': 'https://kinja.com/ajax/inset/iframe?id=megaphone-PPY1300931075',
63 'only_matching': True,
64 }, {
65 'url': 'https://kinja.com/ajax/inset/iframe?id=ooyala-xzMXhleDpopuT0u1ijt_qZj3Va-34pEX%2FZTIxYmJjZDM2NWYzZDViZGRiOWJjYzc5',
66 'only_matching': True,
67 }, {
68 'url': 'https://kinja.com/ajax/inset/iframe?id=soundcloud-128574047',
69 'only_matching': True,
70 }, {
71 'url': 'https://kinja.com/ajax/inset/iframe?id=soundcloud-playlist-317413750',
72 'only_matching': True,
73 }, {
74 'url': 'https://kinja.com/ajax/inset/iframe?id=tumblr-post-160130699814-daydreams-at-midnight',
75 'only_matching': True,
76 }, {
77 'url': 'https://kinja.com/ajax/inset/iframe?id=twitch-stream-libratus_extra',
78 'only_matching': True,
79 }, {
80 'url': 'https://kinja.com/ajax/inset/iframe?id=twitter-1068875942473404422',
81 'only_matching': True,
82 }, {
83 'url': 'https://kinja.com/ajax/inset/iframe?id=ustream-channel-10414700',
84 'only_matching': True,
85 }, {
86 'url': 'https://kinja.com/ajax/inset/iframe?id=vimeo-120153502',
87 'only_matching': True,
88 }, {
89 'url': 'https://kinja.com/ajax/inset/iframe?id=vine-5BlvV5qqPrD',
90 'only_matching': True,
91 }, {
92 'url': 'https://kinja.com/ajax/inset/iframe?id=youtube-list-BCQ3KyrPjgA/PLE6509247C270A72E',
93 'only_matching': True,
94 }, {
95 'url': 'https://kinja.com/ajax/inset/iframe?id=youtube-video-00QyL0AgPAE',
96 'only_matching': True,
97 }]
98 _JWPLATFORM_PROVIDER = ('cdn.jwplayer.com/v2/media/', 'JWPlatform')
99 _PROVIDER_MAP = {
100 'fb': ('facebook.com/video.php?v=', 'Facebook'),
101 'imgur': ('imgur.com/', 'Imgur'),
102 'instagram': ('instagram.com/p/', 'Instagram'),
103 'jwplayer-video': _JWPLATFORM_PROVIDER,
104 'jwp-video': _JWPLATFORM_PROVIDER,
105 'megaphone': ('player.megaphone.fm/', 'Generic'),
106 'ooyala': ('player.ooyala.com/player.js?embedCode=', 'Ooyala'),
107 'soundcloud': ('api.soundcloud.com/tracks/', 'Soundcloud'),
108 'soundcloud-playlist': ('api.soundcloud.com/playlists/', 'SoundcloudPlaylist'),
109 'tumblr-post': ('%s.tumblr.com/post/%s', 'Tumblr'),
110 'twitch-stream': ('twitch.tv/', 'TwitchStream'),
111 'twitter': ('twitter.com/i/cards/tfw/v1/', 'TwitterCard'),
112 'ustream-channel': ('ustream.tv/embed/', 'Ustream'),
113 'vimeo': ('vimeo.com/', 'Vimeo'),
114 'vine': ('vine.co/v/', 'Vine'),
115 'youtube-list': ('youtube.com/embed/%s?list=%s', 'YoutubePlaylist'),
116 'youtube-video': ('youtube.com/embed/', 'Youtube'),
117 }
118
55adb63e 119 def _real_extract(self, url):
5ad28e7f 120 video_type, video_id = self._match_valid_url(url).groups()
55adb63e
RA
121
122 provider = self._PROVIDER_MAP.get(video_type)
123 if provider:
124 video_id = compat_urllib_parse_unquote(video_id)
125 if video_type == 'tumblr-post':
126 video_id, blog = video_id.split('-', 1)
127 result_url = provider[0] % (blog, video_id)
128 elif video_type == 'youtube-list':
129 video_id, playlist_id = video_id.split('/')
130 result_url = provider[0] % (video_id, playlist_id)
131 else:
132 if video_type == 'ooyala':
133 video_id = video_id.split('/')[0]
134 result_url = provider[0] + video_id
135 return self.url_result('http://' + result_url, provider[1])
136
137 if video_type == 'kinjavideo':
138 data = self._download_json(
139 'https://kinja.com/api/core/video/views/videoById',
140 video_id, query={'videoId': video_id})['data']
141 title = data['title']
142
143 formats = []
144 for k in ('signedPlaylist', 'streaming'):
145 m3u8_url = data.get(k + 'Url')
146 if m3u8_url:
147 formats.extend(self._extract_m3u8_formats(
148 m3u8_url, video_id, 'mp4', 'm3u8_native',
149 m3u8_id='hls', fatal=False))
150 self._sort_formats(formats)
151
152 thumbnail = None
153 poster = data.get('poster') or {}
154 poster_id = poster.get('id')
155 if poster_id:
156 thumbnail = 'https://i.kinja-img.com/gawker-media/image/upload/%s.%s' % (poster_id, poster.get('format') or 'jpg')
157
158 return {
159 'id': video_id,
160 'title': title,
161 'description': strip_or_none(data.get('description')),
162 'formats': formats,
163 'tags': data.get('tags'),
164 'timestamp': int_or_none(try_get(
165 data, lambda x: x['postInfo']['publishTimeMillis']), 1000),
166 'thumbnail': thumbnail,
167 'uploader': data.get('network'),
168 }
169 else:
170 video_data = self._download_json(
171 'https://api.vmh.univision.com/metadata/v1/content/' + video_id,
172 video_id)['videoMetadata']
173 iptc = video_data['photoVideoMetadataIPTC']
174 title = iptc['title']['en']
175 fmg = video_data.get('photoVideoMetadata_fmg') or {}
176 tvss_domain = fmg.get('tvssDomain') or 'https://auth.univision.com'
177 data = self._download_json(
178 tvss_domain + '/api/v3/video-auth/url-signature-tokens',
179 video_id, query={'mcpids': video_id})['data'][0]
180 formats = []
181
182 rendition_url = data.get('renditionUrl')
183 if rendition_url:
184 formats = self._extract_m3u8_formats(
185 rendition_url, video_id, 'mp4',
186 'm3u8_native', m3u8_id='hls', fatal=False)
187
188 fallback_rendition_url = data.get('fallbackRenditionUrl')
189 if fallback_rendition_url:
190 formats.append({
191 'format_id': 'fallback',
192 'tbr': int_or_none(self._search_regex(
193 r'_(\d+)\.mp4', fallback_rendition_url,
194 'bitrate', default=None)),
195 'url': fallback_rendition_url,
196 })
197
198 self._sort_formats(formats)
199
200 return {
201 'id': video_id,
202 'title': title,
203 'thumbnail': try_get(iptc, lambda x: x['cloudinaryLink']['link'], compat_str),
204 'uploader': fmg.get('network'),
205 'duration': int_or_none(iptc.get('fileDuration')),
206 'formats': formats,
207 'description': try_get(iptc, lambda x: x['description']['en'], compat_str),
208 'timestamp': parse_iso8601(iptc.get('dateReleased')),
209 }