]>
Commit | Line | Data |
---|---|---|
dcdb292f | 1 | # coding: utf-8 |
c060b774 PH |
2 | from __future__ import unicode_literals |
3 | ||
ae287755 PH |
4 | import re |
5 | ||
6 | from .common import InfoExtractor | |
c678192a ZF |
7 | from ..utils import ( |
8 | ExtractorError, | |
9 | int_or_none, | |
c678192a ZF |
10 | urlencode_postdata |
11 | ) | |
ae287755 PH |
12 | |
13 | ||
14 | class TumblrIE(InfoExtractor): | |
afca767d | 15 | _VALID_URL = r'https?://(?P<blog_name>[^/?#&]+)\.tumblr\.com/(?:post|video)/(?P<id>[0-9]+)(?:$|[/?#])' |
c678192a ZF |
16 | _NETRC_MACHINE = 'tumblr' |
17 | _LOGIN_URL = 'https://www.tumblr.com/login' | |
62f1f950 | 18 | _TESTS = [{ |
c060b774 | 19 | 'url': 'http://tatianamaslanydaily.tumblr.com/post/54196191430/orphan-black-dvd-extra-behind-the-scenes', |
c060b774 PH |
20 | 'md5': '479bb068e5b16462f5176a6828829767', |
21 | 'info_dict': { | |
62f1f950 PP |
22 | 'id': '54196191430', |
23 | 'ext': 'mp4', | |
24 | 'title': 'tatiana maslany news, Orphan Black || DVD extra - behind the scenes ↳...', | |
681b9caa | 25 | 'description': 'md5:37db8211e40b50c7c44e95da14f630b7', |
ec85ded8 | 26 | 'thumbnail': r're:http://.*\.jpg', |
6f5ac90c | 27 | } |
62f1f950 PP |
28 | }, { |
29 | 'url': 'http://5sostrum.tumblr.com/post/90208453769/yall-forgetting-the-greatest-keek-of-them-all', | |
30 | 'md5': 'bf348ef8c0ef84fbf1cbd6fa6e000359', | |
31 | 'info_dict': { | |
32 | 'id': '90208453769', | |
33 | 'ext': 'mp4', | |
681b9caa | 34 | 'title': '5SOS STRUM ;]', |
62f1f950 | 35 | 'description': 'md5:dba62ac8639482759c8eb10ce474586a', |
ec85ded8 | 36 | 'thumbnail': r're:http://.*\.jpg', |
62f1f950 | 37 | } |
2a27e662 OA |
38 | }, { |
39 | 'url': 'http://hdvideotest.tumblr.com/post/130323439814/test-description-for-my-hd-video', | |
40 | 'md5': '7ae503065ad150122dc3089f8cf1546c', | |
41 | 'info_dict': { | |
42 | 'id': '130323439814', | |
43 | 'ext': 'mp4', | |
44 | 'title': 'HD Video Testing \u2014 Test description for my HD video', | |
45 | 'description': 'md5:97cc3ab5fcd27ee4af6356701541319c', | |
ec85ded8 | 46 | 'thumbnail': r're:http://.*\.jpg', |
2a27e662 OA |
47 | }, |
48 | 'params': { | |
49 | 'format': 'hd', | |
50 | }, | |
8f947841 YCH |
51 | }, { |
52 | 'url': 'http://naked-yogi.tumblr.com/post/118312946248/naked-smoking-stretching', | |
53 | 'md5': 'de07e5211d60d4f3a2c3df757ea9f6ab', | |
54 | 'info_dict': { | |
55 | 'id': 'Wmur', | |
56 | 'ext': 'mp4', | |
57 | 'title': 'naked smoking & stretching', | |
58 | 'upload_date': '20150506', | |
59 | 'timestamp': 1430931613, | |
88c86d21 S |
60 | 'age_limit': 18, |
61 | 'uploader_id': '1638622', | |
62 | 'uploader': 'naked-yogi', | |
8f947841 YCH |
63 | }, |
64 | 'add_ie': ['Vidme'], | |
c5895d5d YCH |
65 | }, { |
66 | 'url': 'http://camdamage.tumblr.com/post/98846056295/', | |
67 | 'md5': 'a9e0c8371ea1ca306d6554e3fecf50b6', | |
68 | 'info_dict': { | |
69 | 'id': '105463834', | |
70 | 'ext': 'mp4', | |
71 | 'title': 'Cam Damage-HD 720p', | |
72 | 'uploader': 'John Moyer', | |
73 | 'uploader_id': 'user32021558', | |
74 | }, | |
75 | 'add_ie': ['Vimeo'], | |
fc27ea94 YCH |
76 | }, { |
77 | 'url': 'http://sutiblr.tumblr.com/post/139638707273', | |
78 | 'md5': '2dd184b3669e049ba40563a7d423f95c', | |
79 | 'info_dict': { | |
80 | 'id': 'ir7qBEIKqvq', | |
81 | 'ext': 'mp4', | |
82 | 'title': 'Vine by sutiblr', | |
83 | 'alt_title': 'Vine by sutiblr', | |
84 | 'uploader': 'sutiblr', | |
85 | 'uploader_id': '1198993975374495744', | |
86 | 'upload_date': '20160220', | |
87 | 'like_count': int, | |
88 | 'comment_count': int, | |
89 | 'repost_count': int, | |
90 | }, | |
91 | 'add_ie': ['Vine'], | |
32d88410 YCH |
92 | }, { |
93 | 'url': 'http://vitasidorkina.tumblr.com/post/134652425014/joskriver-victoriassecret-invisibility-or', | |
94 | 'md5': '01c12ceb82cbf6b2fe0703aa56b3ad72', | |
95 | 'info_dict': { | |
96 | 'id': '-7LnUPGlSo', | |
97 | 'ext': 'mp4', | |
98 | 'title': 'Video by victoriassecret', | |
99 | 'description': 'Invisibility or flight…which superpower would YOU choose? #VSFashionShow #ThisOrThat', | |
100 | 'uploader_id': 'victoriassecret', | |
ec85ded8 | 101 | 'thumbnail': r're:^https?://.*\.jpg' |
32d88410 YCH |
102 | }, |
103 | 'add_ie': ['Instagram'], | |
62f1f950 | 104 | }] |
ae287755 | 105 | |
c678192a ZF |
106 | def _real_initialize(self): |
107 | self._login() | |
108 | ||
109 | def _login(self): | |
68217024 | 110 | username, password = self._get_login_info() |
c678192a ZF |
111 | if username is None: |
112 | return | |
56cd31f3 S |
113 | |
114 | login_page = self._download_webpage( | |
115 | self._LOGIN_URL, None, 'Downloading login page') | |
116 | ||
117 | login_form = self._hidden_inputs(login_page) | |
118 | login_form.update({ | |
c678192a ZF |
119 | 'user[email]': username, |
120 | 'user[password]': password | |
121 | }) | |
56cd31f3 S |
122 | |
123 | response, urlh = self._download_webpage_handle( | |
124 | self._LOGIN_URL, None, 'Logging in', | |
125 | data=urlencode_postdata(login_form), headers={ | |
c678192a | 126 | 'Content-Type': 'application/x-www-form-urlencoded', |
56cd31f3 S |
127 | 'Referer': self._LOGIN_URL, |
128 | }) | |
c678192a | 129 | |
56cd31f3 S |
130 | # Successful login |
131 | if '/dashboard' in urlh.geturl(): | |
132 | return | |
133 | ||
134 | login_errors = self._parse_json( | |
135 | self._search_regex( | |
136 | r'RegistrationForm\.errors\s*=\s*(\[.+?\])\s*;', response, | |
137 | 'login errors', default='[]'), | |
138 | None, fatal=False) | |
c678192a | 139 | if login_errors: |
56cd31f3 S |
140 | raise ExtractorError( |
141 | 'Unable to login: %s' % login_errors[0], expected=True) | |
142 | ||
143 | self.report_warning('Login has probably failed') | |
c678192a | 144 | |
ae287755 PH |
145 | def _real_extract(self, url): |
146 | m_url = re.match(self._VALID_URL, url) | |
147 | video_id = m_url.group('id') | |
148 | blog = m_url.group('blog_name') | |
149 | ||
150 | url = 'http://%s.tumblr.com/post/%s/' % (blog, video_id) | |
c909e582 | 151 | webpage, urlh = self._download_webpage_handle(url, video_id) |
c5895d5d | 152 | |
b827ee92 | 153 | redirect_url = urlh.geturl() |
97b01144 S |
154 | if 'tumblr.com/safe-mode' in redirect_url or redirect_url.startswith('/safe-mode'): |
155 | raise ExtractorError( | |
156 | 'This Tumblr may contain sensitive media. ' | |
157 | 'Disable safe mode in your account settings ' | |
158 | 'at https://www.tumblr.com/settings/account#safe_mode', | |
159 | expected=True) | |
160 | ||
681b9caa JMF |
161 | iframe_url = self._search_regex( |
162 | r'src=\'(https?://www\.tumblr\.com/video/[^\']+)\'', | |
c909e582 YCH |
163 | webpage, 'iframe url', default=None) |
164 | if iframe_url is None: | |
97b01144 | 165 | return self.url_result(redirect_url, 'Generic') |
c909e582 | 166 | |
140ac739 S |
167 | iframe = self._download_webpage(iframe_url, video_id, 'Downloading iframe page') |
168 | ||
169 | duration = None | |
170 | sources = [] | |
171 | ||
172 | sd_url = self._search_regex( | |
173 | r'<source[^>]+src=(["\'])(?P<url>.+?)\1', iframe, | |
174 | 'sd video url', default=None, group='url') | |
175 | if sd_url: | |
176 | sources.append((sd_url, 'sd')) | |
177 | ||
178 | options = self._parse_json( | |
179 | self._search_regex( | |
180 | r'data-crt-options=(["\'])(?P<options>.+?)\1', iframe, | |
181 | 'hd video url', default='', group='options'), | |
182 | video_id, fatal=False) | |
183 | if options: | |
184 | duration = int_or_none(options.get('duration')) | |
185 | hd_url = options.get('hdUrl') | |
186 | if hd_url: | |
187 | sources.append((hd_url, 'hd')) | |
2a27e662 | 188 | |
140ac739 S |
189 | formats = [{ |
190 | 'url': video_url, | |
2a27e662 | 191 | 'ext': 'mp4', |
140ac739 S |
192 | 'format_id': format_id, |
193 | 'height': int_or_none(self._search_regex( | |
194 | r'/(\d{3,4})$', video_url, 'height', default=None)), | |
195 | 'quality': quality, | |
196 | } for quality, (video_url, format_id) in enumerate(sources)] | |
2a27e662 | 197 | |
140ac739 | 198 | self._sort_formats(formats) |
ae287755 PH |
199 | |
200 | # The only place where you can get a title, it's not complete, | |
201 | # but searching in other places doesn't work for all videos | |
3da0e1f8 PH |
202 | video_title = self._html_search_regex( |
203 | r'(?s)<title>(?P<title>.*?)(?: \| Tumblr)?</title>', | |
204 | webpage, 'title') | |
ae287755 | 205 | |
3da0e1f8 PH |
206 | return { |
207 | 'id': video_id, | |
681b9caa | 208 | 'title': video_title, |
ae849ca1 YCH |
209 | 'description': self._og_search_description(webpage, default=None), |
210 | 'thumbnail': self._og_search_thumbnail(webpage, default=None), | |
140ac739 S |
211 | 'duration': duration, |
212 | 'formats': formats, | |
3da0e1f8 | 213 | } |