]> jfr.im git - yt-dlp.git/blame - youtube_dlc/extractor/vice.py
[skip travis] renaming
[yt-dlp.git] / youtube_dlc / extractor / vice.py
CommitLineData
22979993 1# coding: utf-8
1fe8fb8c 2from __future__ import unicode_literals
1fe8fb8c 3
44b434e4 4import functools
b811b4c9
RA
5import hashlib
6import json
86c8cfc5 7import random
44b434e4
RA
8import re
9import time
0a477f87 10
b811b4c9 11from .adobepass import AdobePassIE
1fe8fb8c 12from .common import InfoExtractor
44b434e4 13from .youtube import YoutubeIE
86c8cfc5
S
14from ..compat import (
15 compat_HTTPError,
16 compat_str,
17)
b811b4c9 18from ..utils import (
44b434e4 19 clean_html,
86c8cfc5 20 ExtractorError,
b811b4c9 21 int_or_none,
44b434e4 22 OnDemandPagedList,
b811b4c9
RA
23 parse_age_limit,
24 str_or_none,
86c8cfc5 25 try_get,
b811b4c9 26)
1fe8fb8c
JMF
27
28
44b434e4
RA
29class ViceBaseIE(InfoExtractor):
30 def _call_api(self, resource, resource_key, resource_id, locale, fields, args=''):
31 return self._download_json(
32 'https://video.vice.com/api/v1/graphql', resource_id, query={
33 'query': '''{
34 %s(locale: "%s", %s: "%s"%s) {
35 %s
36 }
37}''' % (resource, locale, resource_key, resource_id, args, fields),
38 })['data'][resource]
39
40
41class ViceIE(ViceBaseIE, AdobePassIE):
86c8cfc5 42 IE_NAME = 'vice'
44b434e4 43 _VALID_URL = r'https?://(?:(?:video|vms)\.vice|(?:www\.)?vice(?:land|tv))\.com/(?P<locale>[^/]+)/(?:video/[^/]+|embed)/(?P<id>[\da-f]{24})'
86c8cfc5
S
44 _TESTS = [{
45 'url': 'https://video.vice.com/en_us/video/pet-cremator/58c69e38a55424f1227dc3f7',
46 'info_dict': {
44b434e4 47 'id': '58c69e38a55424f1227dc3f7',
86c8cfc5
S
48 'ext': 'mp4',
49 'title': '10 Questions You Always Wanted To Ask: Pet Cremator',
50 'description': 'md5:fe856caacf61fe0e74fab15ce2b07ca5',
51 'uploader': 'vice',
52 'uploader_id': '57a204088cb727dec794c67b',
53 'timestamp': 1489664942,
54 'upload_date': '20170316',
55 'age_limit': 14,
56 },
57 'params': {
58 # m3u8 download
59 'skip_download': True,
60 },
86c8cfc5
S
61 }, {
62 # geo restricted to US
63 'url': 'https://video.vice.com/en_us/video/the-signal-from-tolva/5816510690b70e6c5fd39a56',
64 'info_dict': {
44b434e4 65 'id': '5816510690b70e6c5fd39a56',
86c8cfc5 66 'ext': 'mp4',
44b434e4 67 'uploader': 'vice',
86c8cfc5
S
68 'title': 'The Signal From Tölva',
69 'description': 'md5:3927e3c79f9e8094606a2b3c5b5e55d5',
44b434e4 70 'uploader_id': '57a204088cb727dec794c67b',
86c8cfc5
S
71 'timestamp': 1477941983,
72 'upload_date': '20161031',
73 },
74 'params': {
75 # m3u8 download
76 'skip_download': True,
77 },
86c8cfc5
S
78 }, {
79 'url': 'https://video.vice.com/alps/video/ulfs-wien-beruchtigste-grafitti-crew-part-1/581b12b60a0e1f4c0fb6ea2f',
80 'info_dict': {
81 'id': '581b12b60a0e1f4c0fb6ea2f',
82 'ext': 'mp4',
83 'title': 'ULFs - Wien berüchtigste Grafitti Crew - Part 1',
44b434e4
RA
84 'description': 'Zwischen Hinterzimmer-Tattoos und U-Bahnschächten erzählen uns die Ulfs, wie es ist, "süchtig nach Sachbeschädigung" zu sein.',
85 'uploader': 'vice',
86c8cfc5
S
86 'uploader_id': '57a204088cb727dec794c67b',
87 'timestamp': 1485368119,
88 'upload_date': '20170125',
89 'age_limit': 14,
90 },
91 'params': {
92 # AES-encrypted m3u8
93 'skip_download': True,
86c8cfc5 94 },
86c8cfc5
S
95 }, {
96 'url': 'https://video.vice.com/en_us/video/pizza-show-trailer/56d8c9a54d286ed92f7f30e4',
97 'only_matching': True,
98 }, {
99 'url': 'https://video.vice.com/en_us/embed/57f41d3556a0a80f54726060',
100 'only_matching': True,
101 }, {
102 'url': 'https://vms.vice.com/en_us/video/preplay/58c69e38a55424f1227dc3f7',
103 'only_matching': True,
104 }, {
105 'url': 'https://www.viceland.com/en_us/video/thursday-march-1-2018/5a8f2d7ff1cdb332dd446ec1',
106 'only_matching': True,
107 }]
86c8cfc5
S
108
109 @staticmethod
110 def _extract_urls(webpage):
111 return re.findall(
44b434e4 112 r'<iframe\b[^>]+\bsrc=["\']((?:https?:)?//video\.vice\.com/[^/]+/embed/[\da-f]{24})',
86c8cfc5
S
113 webpage)
114
115 @staticmethod
116 def _extract_url(webpage):
117 urls = ViceIE._extract_urls(webpage)
118 return urls[0] if urls else None
119
120 def _real_extract(self, url):
121 locale, video_id = re.match(self._VALID_URL, url).groups()
122
44b434e4
RA
123 video = self._call_api('videos', 'id', video_id, locale, '''body
124 locked
125 rating
126 thumbnail_url
127 title''')[0]
128 title = video['title'].strip()
86c8cfc5 129 rating = video.get('rating')
b811b4c9
RA
130
131 query = {}
44b434e4 132 if video.get('locked'):
b811b4c9 133 resource = self._get_mvpd_resource(
86c8cfc5 134 'VICELAND', title, video_id, rating)
1d9e0a4f
RA
135 query['tvetoken'] = self._extract_mvpd_auth(
136 url, video_id, 'VICELAND', resource)
b811b4c9
RA
137
138 # signature generation algorithm is reverse engineered from signatureGenerator in
139 # webpack:///../shared/~/vice-player/dist/js/vice-player.js in
140 # https://www.viceland.com/assets/common/js/web.vendor.bundle.js
86c8cfc5
S
141 # new JS is located here https://vice-web-statics-cdn.vice.com/vice-player/player-embed.js
142 exp = int(time.time()) + 1440
143
b811b4c9
RA
144 query.update({
145 'exp': exp,
146 'sign': hashlib.sha512(('%s:GET:%d' % (video_id, exp)).encode()).hexdigest(),
44b434e4 147 'skipadstitching': 1,
86c8cfc5
S
148 'platform': 'desktop',
149 'rn': random.randint(10000, 100000),
b811b4c9
RA
150 })
151
152 try:
1d9e0a4f 153 preplay = self._download_json(
1fcc9166 154 'https://vms.vice.com/%s/video/preplay/%s' % (locale, video_id),
1d9e0a4f 155 video_id, query=query)
b811b4c9 156 except ExtractorError as e:
86c8cfc5 157 if isinstance(e.cause, compat_HTTPError) and e.cause.code in (400, 401):
b811b4c9 158 error = json.loads(e.cause.read().decode())
86c8cfc5 159 error_message = error.get('error_description') or error['details']
1d9e0a4f 160 raise ExtractorError('%s said: %s' % (
86c8cfc5 161 self.IE_NAME, error_message), expected=True)
b811b4c9
RA
162 raise
163
164 video_data = preplay['video']
44b434e4
RA
165 formats = self._extract_m3u8_formats(
166 preplay['playURL'], video_id, 'mp4', 'm3u8_native')
167 self._sort_formats(formats)
168 episode = video_data.get('episode') or {}
169 channel = video_data.get('channel') or {}
170 season = video_data.get('season') or {}
b811b4c9
RA
171
172 subtitles = {}
44b434e4
RA
173 for subtitle in preplay.get('subtitleURLs', []):
174 cc_url = subtitle.get('url')
175 if not cc_url:
176 continue
177 language_code = try_get(subtitle, lambda x: x['languages'][0]['language_code'], compat_str) or 'en'
178 subtitles.setdefault(language_code, []).append({
b811b4c9 179 'url': cc_url,
44b434e4 180 })
b811b4c9
RA
181
182 return {
44b434e4 183 'formats': formats,
b811b4c9
RA
184 'id': video_id,
185 'title': title,
44b434e4
RA
186 'description': clean_html(video.get('body')),
187 'thumbnail': video.get('thumbnail_url'),
188 'duration': int_or_none(video_data.get('video_duration')),
70bcc444 189 'timestamp': int_or_none(video_data.get('created_at'), 1000),
44b434e4
RA
190 'age_limit': parse_age_limit(video_data.get('video_rating') or rating),
191 'series': try_get(video_data, lambda x: x['show']['base']['display_title'], compat_str),
192 'episode_number': int_or_none(episode.get('episode_number')),
b811b4c9 193 'episode_id': str_or_none(episode.get('id') or video_data.get('episode_id')),
44b434e4
RA
194 'season_number': int_or_none(season.get('season_number')),
195 'season_id': str_or_none(season.get('id') or video_data.get('season_id')),
196 'uploader': channel.get('name'),
b811b4c9
RA
197 'uploader_id': str_or_none(channel.get('id')),
198 'subtitles': subtitles,
b811b4c9
RA
199 }
200
201
44b434e4 202class ViceShowIE(ViceBaseIE):
1d9e0a4f 203 IE_NAME = 'vice:show'
44b434e4
RA
204 _VALID_URL = r'https?://(?:video\.vice|(?:www\.)?vice(?:land|tv))\.com/(?P<locale>[^/]+)/show/(?P<id>[^/?#&]+)'
205 _PAGE_SIZE = 25
206 _TESTS = [{
207 'url': 'https://video.vice.com/en_us/show/fck-thats-delicious',
0a477f87 208 'info_dict': {
44b434e4
RA
209 'id': '57a2040c8cb727dec794c901',
210 'title': 'F*ck, That’s Delicious',
211 'description': 'The life and eating habits of rap’s greatest bon vivant, Action Bronson.',
0a477f87 212 },
44b434e4
RA
213 'playlist_mincount': 64,
214 }, {
215 'url': 'https://www.vicetv.com/en_us/show/fck-thats-delicious',
216 'only_matching': True,
217 }]
0a477f87 218
44b434e4
RA
219 def _fetch_page(self, locale, show_id, page):
220 videos = self._call_api('videos', 'show_id', show_id, locale, '''body
221 id
222 url''', ', page: %d, per_page: %d' % (page + 1, self._PAGE_SIZE))
223 for video in videos:
224 yield self.url_result(
225 video['url'], ViceIE.ie_key(), video.get('id'))
0a477f87 226
44b434e4
RA
227 def _real_extract(self, url):
228 locale, display_id = re.match(self._VALID_URL, url).groups()
229 show = self._call_api('shows', 'slug', display_id, locale, '''dek
230 id
231 title''')[0]
232 show_id = show['id']
0a477f87 233
44b434e4
RA
234 entries = OnDemandPagedList(
235 functools.partial(self._fetch_page, locale, show_id),
236 self._PAGE_SIZE)
0a477f87 237
44b434e4
RA
238 return self.playlist_result(
239 entries, show_id, show.get('title'), show.get('dek'))
4ac6dc37
YCH
240
241
44b434e4 242class ViceArticleIE(ViceBaseIE):
1d9e0a4f 243 IE_NAME = 'vice:article'
44b434e4 244 _VALID_URL = r'https://(?:www\.)?vice\.com/(?P<locale>[^/]+)/article/(?:[0-9a-z]{6}/)?(?P<id>[^?#]+)'
4ac6dc37
YCH
245
246 _TESTS = [{
247 'url': 'https://www.vice.com/en_us/article/on-set-with-the-woman-making-mormon-porn-in-utah',
248 'info_dict': {
44b434e4 249 'id': '58dc0a3dee202d2a0ccfcbd8',
4ac6dc37 250 'ext': 'mp4',
44b434e4
RA
251 'title': 'Mormon War on Porn',
252 'description': 'md5:1c5d91fe25fa8aa304f9def118b92dbf',
86c8cfc5
S
253 'uploader': 'vice',
254 'uploader_id': '57a204088cb727dec794c67b',
255 'timestamp': 1491883129,
256 'upload_date': '20170411',
257 'age_limit': 17,
4ac6dc37
YCH
258 },
259 'params': {
260 # AES-encrypted m3u8
261 'skip_download': True,
262 },
44b434e4 263 'add_ie': [ViceIE.ie_key()],
4ac6dc37 264 }, {
1d9e0a4f 265 'url': 'https://www.vice.com/en_us/article/how-to-hack-a-car',
44b434e4 266 'md5': '13010ee0bc694ea87ec40724397c2349',
4ac6dc37
YCH
267 'info_dict': {
268 'id': '3jstaBeXgAs',
269 'ext': 'mp4',
270 'title': 'How to Hack a Car: Phreaked Out (Episode 2)',
271 'description': 'md5:ee95453f7ff495db8efe14ae8bf56f30',
4ac6dc37 272 'uploader': 'Motherboard',
86c8cfc5 273 'uploader_id': 'MotherboardTV',
4ac6dc37
YCH
274 'upload_date': '20140529',
275 },
44b434e4 276 'add_ie': [YoutubeIE.ie_key()],
86c8cfc5
S
277 }, {
278 'url': 'https://www.vice.com/en_us/article/znm9dx/karley-sciortino-slutever-reloaded',
279 'md5': 'a7ecf64ee4fa19b916c16f4b56184ae2',
280 'info_dict': {
44b434e4 281 'id': '57f41d3556a0a80f54726060',
86c8cfc5
S
282 'ext': 'mp4',
283 'title': "Making The World's First Male Sex Doll",
44b434e4 284 'description': 'md5:19b00b215b99961cf869c40fbe9df755',
86c8cfc5
S
285 'uploader': 'vice',
286 'uploader_id': '57a204088cb727dec794c67b',
287 'timestamp': 1476919911,
288 'upload_date': '20161019',
289 'age_limit': 17,
290 },
291 'params': {
292 'skip_download': True,
44b434e4 293 'format': 'bestvideo',
86c8cfc5
S
294 },
295 'add_ie': [ViceIE.ie_key()],
1d9e0a4f
RA
296 }, {
297 'url': 'https://www.vice.com/en_us/article/cowboy-capitalists-part-1',
298 'only_matching': True,
299 }, {
300 'url': 'https://www.vice.com/ru/article/big-night-out-ibiza-clive-martin-229',
301 'only_matching': True,
4ac6dc37
YCH
302 }]
303
304 def _real_extract(self, url):
44b434e4 305 locale, display_id = re.match(self._VALID_URL, url).groups()
4ac6dc37 306
44b434e4
RA
307 article = self._call_api('articles', 'slug', display_id, locale, '''body
308 embed_code''')[0]
309 body = article['body']
1d9e0a4f
RA
310
311 def _url_res(video_url, ie_key):
4ac6dc37
YCH
312 return {
313 '_type': 'url_transparent',
1d9e0a4f 314 'url': video_url,
4ac6dc37 315 'display_id': display_id,
1d9e0a4f 316 'ie_key': ie_key,
4ac6dc37
YCH
317 }
318
44b434e4 319 vice_url = ViceIE._extract_url(body)
86c8cfc5
S
320 if vice_url:
321 return _url_res(vice_url, ViceIE.ie_key())
322
1d9e0a4f
RA
323 embed_code = self._search_regex(
324 r'embedCode=([^&\'"]+)', body,
325 'ooyala embed code', default=None)
326 if embed_code:
327 return _url_res('ooyala:%s' % embed_code, 'Ooyala')
328
5113b691 329 youtube_url = YoutubeIE._extract_url(body)
1d9e0a4f 330 if youtube_url:
5113b691 331 return _url_res(youtube_url, YoutubeIE.ie_key())
1d9e0a4f 332
4ac6dc37 333 video_url = self._html_search_regex(
1d9e0a4f 334 r'data-video-url="([^"]+)"',
44b434e4 335 article['embed_code'], 'video URL')
4ac6dc37 336
1d9e0a4f 337 return _url_res(video_url, ViceIE.ie_key())