]>
Commit | Line | Data |
---|---|---|
f096ec26 RA |
1 | # coding: utf-8 |
2 | from __future__ import unicode_literals | |
3 | ||
a206ef62 RA |
4 | import re |
5 | ||
f096ec26 RA |
6 | from .common import InfoExtractor |
7 | from ..utils import ( | |
8 | int_or_none, | |
9 | urlencode_postdata, | |
10 | compat_str, | |
11 | ExtractorError, | |
12 | ) | |
13 | ||
14 | ||
15 | class CuriosityStreamBaseIE(InfoExtractor): | |
16 | _NETRC_MACHINE = 'curiositystream' | |
17 | _auth_token = None | |
18 | _API_BASE_URL = 'https://api.curiositystream.com/v1/' | |
19 | ||
20 | def _handle_errors(self, result): | |
21 | error = result.get('error', {}).get('message') | |
22 | if error: | |
23 | if isinstance(error, dict): | |
24 | error = ', '.join(error.values()) | |
25 | raise ExtractorError( | |
26 | '%s said: %s' % (self.IE_NAME, error), expected=True) | |
27 | ||
28 | def _call_api(self, path, video_id): | |
29 | headers = {} | |
30 | if self._auth_token: | |
31 | headers['X-Auth-Token'] = self._auth_token | |
32 | result = self._download_json( | |
33 | self._API_BASE_URL + path, video_id, headers=headers) | |
34 | self._handle_errors(result) | |
35 | return result['data'] | |
36 | ||
37 | def _real_initialize(self): | |
68217024 | 38 | email, password = self._get_login_info() |
b207d5eb RA |
39 | if email is None: |
40 | return | |
41 | result = self._download_json( | |
42 | self._API_BASE_URL + 'login', None, data=urlencode_postdata({ | |
43 | 'email': email, | |
44 | 'password': password, | |
45 | })) | |
46 | self._handle_errors(result) | |
47 | self._auth_token = result['message']['auth_token'] | |
f096ec26 | 48 | |
3b983ee4 RA |
49 | |
50 | class CuriosityStreamIE(CuriosityStreamBaseIE): | |
51 | IE_NAME = 'curiositystream' | |
52 | _VALID_URL = r'https?://(?:app\.)?curiositystream\.com/video/(?P<id>\d+)' | |
53 | _TEST = { | |
54 | 'url': 'https://app.curiositystream.com/video/2', | |
55 | 'md5': '262bb2f257ff301115f1973540de8983', | |
56 | 'info_dict': { | |
57 | 'id': '2', | |
58 | 'ext': 'mp4', | |
59 | 'title': 'How Did You Develop The Internet?', | |
60 | 'description': 'Vint Cerf, Google\'s Chief Internet Evangelist, describes how he and Bob Kahn created the internet.', | |
61 | } | |
62 | } | |
63 | ||
64 | def _real_extract(self, url): | |
65 | video_id = self._match_id(url) | |
66 | media = self._call_api('media/' + video_id, video_id) | |
f096ec26 RA |
67 | title = media['title'] |
68 | ||
a206ef62 RA |
69 | formats = [] |
70 | for encoding in media.get('encodings', []): | |
71 | m3u8_url = encoding.get('master_playlist_url') | |
72 | if m3u8_url: | |
73 | formats.extend(self._extract_m3u8_formats( | |
74 | m3u8_url, video_id, 'mp4', 'm3u8_native', | |
75 | m3u8_id='hls', fatal=False)) | |
76 | encoding_url = encoding.get('url') | |
77 | file_url = encoding.get('file_url') | |
78 | if not encoding_url and not file_url: | |
79 | continue | |
80 | f = { | |
81 | 'width': int_or_none(encoding.get('width')), | |
82 | 'height': int_or_none(encoding.get('height')), | |
83 | 'vbr': int_or_none(encoding.get('video_bitrate')), | |
84 | 'abr': int_or_none(encoding.get('audio_bitrate')), | |
85 | 'filesize': int_or_none(encoding.get('size_in_bytes')), | |
86 | 'vcodec': encoding.get('video_codec'), | |
87 | 'acodec': encoding.get('audio_codec'), | |
88 | 'container': encoding.get('container_type'), | |
89 | } | |
90 | for f_url in (encoding_url, file_url): | |
91 | if not f_url: | |
92 | continue | |
93 | fmt = f.copy() | |
94 | rtmp = re.search(r'^(?P<url>rtmpe?://(?P<host>[^/]+)/(?P<app>.+))/(?P<playpath>mp[34]:.+)$', f_url) | |
95 | if rtmp: | |
96 | fmt.update({ | |
97 | 'url': rtmp.group('url'), | |
98 | 'play_path': rtmp.group('playpath'), | |
99 | 'app': rtmp.group('app'), | |
100 | 'ext': 'flv', | |
101 | 'format_id': 'rtmp', | |
102 | }) | |
103 | else: | |
104 | fmt.update({ | |
105 | 'url': f_url, | |
106 | 'format_id': 'http', | |
107 | }) | |
108 | formats.append(fmt) | |
109 | self._sort_formats(formats) | |
110 | ||
f096ec26 RA |
111 | subtitles = {} |
112 | for closed_caption in media.get('closed_captions', []): | |
113 | sub_url = closed_caption.get('file') | |
114 | if not sub_url: | |
115 | continue | |
116 | lang = closed_caption.get('code') or closed_caption.get('language') or 'en' | |
117 | subtitles.setdefault(lang, []).append({ | |
118 | 'url': sub_url, | |
119 | }) | |
120 | ||
121 | return { | |
f096ec26 | 122 | 'id': video_id, |
a206ef62 | 123 | 'formats': formats, |
f096ec26 RA |
124 | 'title': title, |
125 | 'description': media.get('description'), | |
126 | 'thumbnail': media.get('image_large') or media.get('image_medium') or media.get('image_small'), | |
127 | 'duration': int_or_none(media.get('duration')), | |
128 | 'tags': media.get('tags'), | |
129 | 'subtitles': subtitles, | |
f096ec26 RA |
130 | } |
131 | ||
132 | ||
f096ec26 RA |
133 | class CuriosityStreamCollectionIE(CuriosityStreamBaseIE): |
134 | IE_NAME = 'curiositystream:collection' | |
3b983ee4 RA |
135 | _VALID_URL = r'https?://(?:app\.)?curiositystream\.com/(?:collection|series)/(?P<id>\d+)' |
136 | _TESTS = [{ | |
f096ec26 RA |
137 | 'url': 'https://app.curiositystream.com/collection/2', |
138 | 'info_dict': { | |
139 | 'id': '2', | |
140 | 'title': 'Curious Minds: The Internet', | |
141 | 'description': 'How is the internet shaping our lives in the 21st Century?', | |
142 | }, | |
3b983ee4 RA |
143 | 'playlist_mincount': 17, |
144 | }, { | |
145 | 'url': 'https://curiositystream.com/series/2', | |
146 | 'only_matching': True, | |
147 | }] | |
f096ec26 RA |
148 | |
149 | def _real_extract(self, url): | |
150 | collection_id = self._match_id(url) | |
151 | collection = self._call_api( | |
152 | 'collections/' + collection_id, collection_id) | |
153 | entries = [] | |
154 | for media in collection.get('media', []): | |
3b983ee4 RA |
155 | media_id = compat_str(media.get('id')) |
156 | entries.append(self.url_result( | |
157 | 'https://curiositystream.com/video/' + media_id, | |
158 | CuriosityStreamIE.ie_key(), media_id)) | |
f096ec26 RA |
159 | return self.playlist_result( |
160 | entries, collection_id, | |
161 | collection.get('title'), collection.get('description')) |