]>
Commit | Line | Data |
---|---|---|
223b27f4 NJ |
1 | # coding: utf-8 |
2 | from __future__ import unicode_literals | |
3 | ||
4 | import json | |
5 | ||
6 | from .common import InfoExtractor | |
223b27f4 NJ |
7 | from ..utils import ( |
8 | ExtractorError, | |
9 | parse_iso8601, | |
5c2266df | 10 | sanitized_Request, |
223b27f4 NJ |
11 | ) |
12 | ||
13 | ||
14 | class VesselIE(InfoExtractor): | |
15 | _VALID_URL = r'https?://(?:www\.)?vessel\.com/videos/(?P<id>[0-9a-zA-Z]+)' | |
16 | _API_URL_TEMPLATE = 'https://www.vessel.com/api/view/items/%s' | |
17 | _LOGIN_URL = 'https://www.vessel.com/api/account/login' | |
18 | _NETRC_MACHINE = 'vessel' | |
19 | _TEST = { | |
20 | 'url': 'https://www.vessel.com/videos/HDN7G5UMs', | |
21 | 'md5': '455cdf8beb71c6dd797fd2f3818d05c4', | |
22 | 'info_dict': { | |
23 | 'id': 'HDN7G5UMs', | |
24 | 'ext': 'mp4', | |
25 | 'title': 'Nvidia GeForce GTX Titan X - The Best Video Card on the Market?', | |
26 | 'thumbnail': 're:^https?://.*\.jpg$', | |
27 | 'upload_date': '20150317', | |
28 | 'description': 'Did Nvidia pull out all the stops on the Titan X, or does its performance leave something to be desired?', | |
29 | 'timestamp': int, | |
30 | }, | |
31 | } | |
32 | ||
33 | @staticmethod | |
34 | def make_json_request(url, data): | |
35 | payload = json.dumps(data).encode('utf-8') | |
5c2266df | 36 | req = sanitized_Request(url, payload) |
223b27f4 NJ |
37 | req.add_header('Content-Type', 'application/json; charset=utf-8') |
38 | return req | |
39 | ||
40 | @staticmethod | |
46be82b8 | 41 | def find_assets(data, asset_type, asset_id=None): |
223b27f4 | 42 | for asset in data.get('assets', []): |
46be82b8 NJ |
43 | if not asset.get('type') == asset_type: |
44 | continue | |
45 | elif asset_id is not None and not asset.get('id') == asset_id: | |
46 | continue | |
47 | else: | |
223b27f4 NJ |
48 | yield asset |
49 | ||
50 | def _check_access_rights(self, data): | |
51 | access_info = data.get('__view', {}) | |
c496ec08 | 52 | if not access_info.get('allow_access', True): |
223b27f4 NJ |
53 | err_code = access_info.get('error_code') or '' |
54 | if err_code == 'ITEM_PAID_ONLY': | |
55 | raise ExtractorError( | |
56 | 'This video requires subscription.', expected=True) | |
57 | else: | |
58 | raise ExtractorError( | |
59 | 'Access to this content is restricted. (%s said: %s)' % (self.IE_NAME, err_code), expected=True) | |
60 | ||
61 | def _login(self): | |
62 | (username, password) = self._get_login_info() | |
63 | if username is None: | |
64 | return | |
65 | self.report_login() | |
66 | data = { | |
67 | 'client_id': 'web', | |
68 | 'type': 'password', | |
69 | 'user_key': username, | |
70 | 'password': password, | |
71 | } | |
72 | login_request = VesselIE.make_json_request(self._LOGIN_URL, data) | |
73 | self._download_webpage(login_request, None, False, 'Wrong login info') | |
74 | ||
75 | def _real_initialize(self): | |
76 | self._login() | |
77 | ||
78 | def _real_extract(self, url): | |
79 | video_id = self._match_id(url) | |
80 | ||
81 | webpage = self._download_webpage(url, video_id) | |
82 | data = self._parse_json(self._search_regex( | |
83 | r'App\.bootstrapData\((.*?)\);', webpage, 'data'), video_id) | |
84 | asset_id = data['model']['data']['id'] | |
85 | ||
86 | req = VesselIE.make_json_request( | |
87 | self._API_URL_TEMPLATE % asset_id, {'client': 'web'}) | |
88 | data = self._download_json(req, video_id) | |
46be82b8 | 89 | video_asset_id = data.get('main_video_asset') |
223b27f4 NJ |
90 | |
91 | self._check_access_rights(data) | |
92 | ||
93 | try: | |
46be82b8 NJ |
94 | video_asset = next( |
95 | VesselIE.find_assets(data, 'video', asset_id=video_asset_id)) | |
223b27f4 NJ |
96 | except StopIteration: |
97 | raise ExtractorError('No video assets found') | |
98 | ||
99 | formats = [] | |
100 | for f in video_asset.get('sources', []): | |
101 | if f['name'] == 'hls-index': | |
102 | formats.extend(self._extract_m3u8_formats( | |
103 | f['location'], video_id, ext='mp4', m3u8_id='m3u8')) | |
104 | else: | |
105 | formats.append({ | |
106 | 'format_id': f['name'], | |
107 | 'tbr': f.get('bitrate'), | |
108 | 'height': f.get('height'), | |
109 | 'width': f.get('width'), | |
110 | 'url': f['location'], | |
111 | }) | |
112 | self._sort_formats(formats) | |
113 | ||
114 | thumbnails = [] | |
115 | for im_asset in VesselIE.find_assets(data, 'image'): | |
116 | thumbnails.append({ | |
117 | 'url': im_asset['location'], | |
118 | 'width': im_asset.get('width', 0), | |
119 | 'height': im_asset.get('height', 0), | |
120 | }) | |
121 | ||
122 | return { | |
123 | 'id': video_id, | |
124 | 'title': data['title'], | |
125 | 'formats': formats, | |
126 | 'thumbnails': thumbnails, | |
127 | 'description': data.get('short_description'), | |
128 | 'duration': data.get('duration'), | |
129 | 'comment_count': data.get('comment_count'), | |
130 | 'like_count': data.get('like_count'), | |
131 | 'view_count': data.get('view_count'), | |
132 | 'timestamp': parse_iso8601(data.get('released_at')), | |
133 | } |