]>
Commit | Line | Data |
---|---|---|
dcdb292f | 1 | # coding: utf-8 |
c060b774 PH |
2 | from __future__ import unicode_literals |
3 | ||
ae287755 PH |
4 | import re |
5 | ||
6 | from .common import InfoExtractor | |
97b01144 | 7 | from ..compat import compat_str |
c678192a ZF |
8 | from ..utils import ( |
9 | ExtractorError, | |
10 | int_or_none, | |
c678192a ZF |
11 | urlencode_postdata |
12 | ) | |
ae287755 PH |
13 | |
14 | ||
15 | class TumblrIE(InfoExtractor): | |
afca767d | 16 | _VALID_URL = r'https?://(?P<blog_name>[^/?#&]+)\.tumblr\.com/(?:post|video)/(?P<id>[0-9]+)(?:$|[/?#])' |
c678192a ZF |
17 | _NETRC_MACHINE = 'tumblr' |
18 | _LOGIN_URL = 'https://www.tumblr.com/login' | |
62f1f950 | 19 | _TESTS = [{ |
c060b774 | 20 | 'url': 'http://tatianamaslanydaily.tumblr.com/post/54196191430/orphan-black-dvd-extra-behind-the-scenes', |
c060b774 PH |
21 | 'md5': '479bb068e5b16462f5176a6828829767', |
22 | 'info_dict': { | |
62f1f950 PP |
23 | 'id': '54196191430', |
24 | 'ext': 'mp4', | |
25 | 'title': 'tatiana maslany news, Orphan Black || DVD extra - behind the scenes ↳...', | |
681b9caa | 26 | 'description': 'md5:37db8211e40b50c7c44e95da14f630b7', |
ec85ded8 | 27 | 'thumbnail': r're:http://.*\.jpg', |
6f5ac90c | 28 | } |
62f1f950 PP |
29 | }, { |
30 | 'url': 'http://5sostrum.tumblr.com/post/90208453769/yall-forgetting-the-greatest-keek-of-them-all', | |
31 | 'md5': 'bf348ef8c0ef84fbf1cbd6fa6e000359', | |
32 | 'info_dict': { | |
33 | 'id': '90208453769', | |
34 | 'ext': 'mp4', | |
681b9caa | 35 | 'title': '5SOS STRUM ;]', |
62f1f950 | 36 | 'description': 'md5:dba62ac8639482759c8eb10ce474586a', |
ec85ded8 | 37 | 'thumbnail': r're:http://.*\.jpg', |
62f1f950 | 38 | } |
2a27e662 OA |
39 | }, { |
40 | 'url': 'http://hdvideotest.tumblr.com/post/130323439814/test-description-for-my-hd-video', | |
41 | 'md5': '7ae503065ad150122dc3089f8cf1546c', | |
42 | 'info_dict': { | |
43 | 'id': '130323439814', | |
44 | 'ext': 'mp4', | |
45 | 'title': 'HD Video Testing \u2014 Test description for my HD video', | |
46 | 'description': 'md5:97cc3ab5fcd27ee4af6356701541319c', | |
ec85ded8 | 47 | 'thumbnail': r're:http://.*\.jpg', |
2a27e662 OA |
48 | }, |
49 | 'params': { | |
50 | 'format': 'hd', | |
51 | }, | |
8f947841 YCH |
52 | }, { |
53 | 'url': 'http://naked-yogi.tumblr.com/post/118312946248/naked-smoking-stretching', | |
54 | 'md5': 'de07e5211d60d4f3a2c3df757ea9f6ab', | |
55 | 'info_dict': { | |
56 | 'id': 'Wmur', | |
57 | 'ext': 'mp4', | |
58 | 'title': 'naked smoking & stretching', | |
59 | 'upload_date': '20150506', | |
60 | 'timestamp': 1430931613, | |
88c86d21 S |
61 | 'age_limit': 18, |
62 | 'uploader_id': '1638622', | |
63 | 'uploader': 'naked-yogi', | |
8f947841 YCH |
64 | }, |
65 | 'add_ie': ['Vidme'], | |
c5895d5d YCH |
66 | }, { |
67 | 'url': 'http://camdamage.tumblr.com/post/98846056295/', | |
68 | 'md5': 'a9e0c8371ea1ca306d6554e3fecf50b6', | |
69 | 'info_dict': { | |
70 | 'id': '105463834', | |
71 | 'ext': 'mp4', | |
72 | 'title': 'Cam Damage-HD 720p', | |
73 | 'uploader': 'John Moyer', | |
74 | 'uploader_id': 'user32021558', | |
75 | }, | |
76 | 'add_ie': ['Vimeo'], | |
fc27ea94 YCH |
77 | }, { |
78 | 'url': 'http://sutiblr.tumblr.com/post/139638707273', | |
79 | 'md5': '2dd184b3669e049ba40563a7d423f95c', | |
80 | 'info_dict': { | |
81 | 'id': 'ir7qBEIKqvq', | |
82 | 'ext': 'mp4', | |
83 | 'title': 'Vine by sutiblr', | |
84 | 'alt_title': 'Vine by sutiblr', | |
85 | 'uploader': 'sutiblr', | |
86 | 'uploader_id': '1198993975374495744', | |
87 | 'upload_date': '20160220', | |
88 | 'like_count': int, | |
89 | 'comment_count': int, | |
90 | 'repost_count': int, | |
91 | }, | |
92 | 'add_ie': ['Vine'], | |
32d88410 YCH |
93 | }, { |
94 | 'url': 'http://vitasidorkina.tumblr.com/post/134652425014/joskriver-victoriassecret-invisibility-or', | |
95 | 'md5': '01c12ceb82cbf6b2fe0703aa56b3ad72', | |
96 | 'info_dict': { | |
97 | 'id': '-7LnUPGlSo', | |
98 | 'ext': 'mp4', | |
99 | 'title': 'Video by victoriassecret', | |
100 | 'description': 'Invisibility or flight…which superpower would YOU choose? #VSFashionShow #ThisOrThat', | |
101 | 'uploader_id': 'victoriassecret', | |
ec85ded8 | 102 | 'thumbnail': r're:^https?://.*\.jpg' |
32d88410 YCH |
103 | }, |
104 | 'add_ie': ['Instagram'], | |
62f1f950 | 105 | }] |
ae287755 | 106 | |
c678192a ZF |
107 | def _real_initialize(self): |
108 | self._login() | |
109 | ||
110 | def _login(self): | |
68217024 | 111 | username, password = self._get_login_info() |
c678192a ZF |
112 | if username is None: |
113 | return | |
56cd31f3 S |
114 | |
115 | login_page = self._download_webpage( | |
116 | self._LOGIN_URL, None, 'Downloading login page') | |
117 | ||
118 | login_form = self._hidden_inputs(login_page) | |
119 | login_form.update({ | |
c678192a ZF |
120 | 'user[email]': username, |
121 | 'user[password]': password | |
122 | }) | |
56cd31f3 S |
123 | |
124 | response, urlh = self._download_webpage_handle( | |
125 | self._LOGIN_URL, None, 'Logging in', | |
126 | data=urlencode_postdata(login_form), headers={ | |
c678192a | 127 | 'Content-Type': 'application/x-www-form-urlencoded', |
56cd31f3 S |
128 | 'Referer': self._LOGIN_URL, |
129 | }) | |
c678192a | 130 | |
56cd31f3 S |
131 | # Successful login |
132 | if '/dashboard' in urlh.geturl(): | |
133 | return | |
134 | ||
135 | login_errors = self._parse_json( | |
136 | self._search_regex( | |
137 | r'RegistrationForm\.errors\s*=\s*(\[.+?\])\s*;', response, | |
138 | 'login errors', default='[]'), | |
139 | None, fatal=False) | |
c678192a | 140 | if login_errors: |
56cd31f3 S |
141 | raise ExtractorError( |
142 | 'Unable to login: %s' % login_errors[0], expected=True) | |
143 | ||
144 | self.report_warning('Login has probably failed') | |
c678192a | 145 | |
ae287755 PH |
146 | def _real_extract(self, url): |
147 | m_url = re.match(self._VALID_URL, url) | |
148 | video_id = m_url.group('id') | |
149 | blog = m_url.group('blog_name') | |
150 | ||
151 | url = 'http://%s.tumblr.com/post/%s/' % (blog, video_id) | |
c909e582 | 152 | webpage, urlh = self._download_webpage_handle(url, video_id) |
c5895d5d | 153 | |
97b01144 S |
154 | redirect_url = compat_str(urlh.geturl()) |
155 | if 'tumblr.com/safe-mode' in redirect_url or redirect_url.startswith('/safe-mode'): | |
156 | raise ExtractorError( | |
157 | 'This Tumblr may contain sensitive media. ' | |
158 | 'Disable safe mode in your account settings ' | |
159 | 'at https://www.tumblr.com/settings/account#safe_mode', | |
160 | expected=True) | |
161 | ||
681b9caa JMF |
162 | iframe_url = self._search_regex( |
163 | r'src=\'(https?://www\.tumblr\.com/video/[^\']+)\'', | |
c909e582 YCH |
164 | webpage, 'iframe url', default=None) |
165 | if iframe_url is None: | |
97b01144 | 166 | return self.url_result(redirect_url, 'Generic') |
c909e582 | 167 | |
140ac739 S |
168 | iframe = self._download_webpage(iframe_url, video_id, 'Downloading iframe page') |
169 | ||
170 | duration = None | |
171 | sources = [] | |
172 | ||
173 | sd_url = self._search_regex( | |
174 | r'<source[^>]+src=(["\'])(?P<url>.+?)\1', iframe, | |
175 | 'sd video url', default=None, group='url') | |
176 | if sd_url: | |
177 | sources.append((sd_url, 'sd')) | |
178 | ||
179 | options = self._parse_json( | |
180 | self._search_regex( | |
181 | r'data-crt-options=(["\'])(?P<options>.+?)\1', iframe, | |
182 | 'hd video url', default='', group='options'), | |
183 | video_id, fatal=False) | |
184 | if options: | |
185 | duration = int_or_none(options.get('duration')) | |
186 | hd_url = options.get('hdUrl') | |
187 | if hd_url: | |
188 | sources.append((hd_url, 'hd')) | |
2a27e662 | 189 | |
140ac739 S |
190 | formats = [{ |
191 | 'url': video_url, | |
2a27e662 | 192 | 'ext': 'mp4', |
140ac739 S |
193 | 'format_id': format_id, |
194 | 'height': int_or_none(self._search_regex( | |
195 | r'/(\d{3,4})$', video_url, 'height', default=None)), | |
196 | 'quality': quality, | |
197 | } for quality, (video_url, format_id) in enumerate(sources)] | |
2a27e662 | 198 | |
140ac739 | 199 | self._sort_formats(formats) |
ae287755 PH |
200 | |
201 | # The only place where you can get a title, it's not complete, | |
202 | # but searching in other places doesn't work for all videos | |
3da0e1f8 PH |
203 | video_title = self._html_search_regex( |
204 | r'(?s)<title>(?P<title>.*?)(?: \| Tumblr)?</title>', | |
205 | webpage, 'title') | |
ae287755 | 206 | |
3da0e1f8 PH |
207 | return { |
208 | 'id': video_id, | |
681b9caa | 209 | 'title': video_title, |
ae849ca1 YCH |
210 | 'description': self._og_search_description(webpage, default=None), |
211 | 'thumbnail': self._og_search_thumbnail(webpage, default=None), | |
140ac739 S |
212 | 'duration': duration, |
213 | 'formats': formats, | |
3da0e1f8 | 214 | } |