]>
Commit | Line | Data |
---|---|---|
52ad14ae | 1 | # encoding: utf-8 |
214c22c7 | 2 | from __future__ import unicode_literals |
52ad14ae TT |
3 | |
4 | import re | |
a9bad429 | 5 | import json |
b2e8e7da | 6 | import datetime |
52ad14ae TT |
7 | |
8 | from .common import InfoExtractor | |
1cc79574 | 9 | from ..compat import ( |
52ad14ae TT |
10 | compat_urllib_parse, |
11 | compat_urllib_request, | |
12 | compat_urlparse, | |
1cc79574 PH |
13 | ) |
14 | from ..utils import ( | |
6110bbbf | 15 | ExtractorError, |
1cc79574 PH |
16 | int_or_none, |
17 | parse_duration, | |
bb865f3a YCH |
18 | parse_iso8601, |
19 | xpath_text, | |
20 | determine_ext, | |
52ad14ae TT |
21 | ) |
22 | ||
13ebea79 | 23 | |
52ad14ae | 24 | class NiconicoIE(InfoExtractor): |
214c22c7 JMF |
25 | IE_NAME = 'niconico' |
26 | IE_DESC = 'ニコニコ動画' | |
52ad14ae | 27 | |
1c9a1457 | 28 | _TESTS = [{ |
214c22c7 JMF |
29 | 'url': 'http://www.nicovideo.jp/watch/sm22312215', |
30 | 'md5': 'd1a75c0823e2f629128c43e1212760f9', | |
31 | 'info_dict': { | |
32 | 'id': 'sm22312215', | |
33 | 'ext': 'mp4', | |
34 | 'title': 'Big Buck Bunny', | |
35 | 'uploader': 'takuya0301', | |
36 | 'uploader_id': '2698420', | |
37 | 'upload_date': '20131123', | |
bb865f3a | 38 | 'timestamp': 1385182762, |
214c22c7 | 39 | 'description': '(c) copyright 2008, Blender Foundation / www.bigbuckbunny.org', |
15ce1338 | 40 | 'duration': 33, |
52ad14ae | 41 | }, |
1c9a1457 | 42 | }, { |
59d814f7 YCH |
43 | # File downloaded with and without credentials are different, so omit |
44 | # the md5 field | |
1c9a1457 | 45 | 'url': 'http://www.nicovideo.jp/watch/nm14296458', |
1c9a1457 S |
46 | 'info_dict': { |
47 | 'id': 'nm14296458', | |
48 | 'ext': 'swf', | |
49 | 'title': '【鏡音リン】Dance on media【オリジナル】take2!', | |
bb865f3a | 50 | 'description': 'md5:689f066d74610b3b22e0f1739add0f58', |
1c9a1457 S |
51 | 'uploader': 'りょうた', |
52 | 'uploader_id': '18822557', | |
53 | 'upload_date': '20110429', | |
bb865f3a | 54 | 'timestamp': 1304065916, |
1c9a1457 S |
55 | 'duration': 209, |
56 | }, | |
bb865f3a YCH |
57 | }, { |
58 | # 'video exists but is marked as "deleted" | |
b2e8e7da | 59 | # md5 is unstable |
bb865f3a | 60 | 'url': 'http://www.nicovideo.jp/watch/sm10000', |
bb865f3a YCH |
61 | 'info_dict': { |
62 | 'id': 'sm10000', | |
63 | 'ext': 'unknown_video', | |
64 | 'description': 'deleted', | |
65 | 'title': 'ドラえもんエターナル第3話「決戦第3新東京市」<前編>', | |
b2e8e7da YCH |
66 | 'upload_date': '20071224', |
67 | 'timestamp': 1198527840, # timestamp field has different value if logged in | |
68 | 'duration': 304, | |
bb865f3a | 69 | }, |
621ffe7b YCH |
70 | }, { |
71 | 'url': 'http://www.nicovideo.jp/watch/so22543406', | |
72 | 'info_dict': { | |
73 | 'id': '1388129933', | |
74 | 'ext': 'mp4', | |
75 | 'title': '【第1回】RADIOアニメロミックス ラブライブ!~のぞえりRadio Garden~', | |
76 | 'description': 'md5:b27d224bb0ff53d3c8269e9f8b561cf1', | |
77 | 'timestamp': 1388851200, | |
78 | 'upload_date': '20140104', | |
79 | 'uploader': 'アニメロチャンネル', | |
80 | 'uploader_id': '312', | |
81 | } | |
1c9a1457 | 82 | }] |
52ad14ae | 83 | |
937daef4 | 84 | _VALID_URL = r'https?://(?:www\.|secure\.)?nicovideo\.jp/watch/(?P<id>(?:[a-z]{2})?[0-9]+)' |
52ad14ae | 85 | _NETRC_MACHINE = 'niconico' |
23d83ad4 NJ |
86 | # Determine whether the downloader used authentication to download video |
87 | _AUTHENTICATED = False | |
52ad14ae TT |
88 | |
89 | def _real_initialize(self): | |
23d83ad4 | 90 | self._login() |
52ad14ae TT |
91 | |
92 | def _login(self): | |
93 | (username, password) = self._get_login_info() | |
23d83ad4 NJ |
94 | # No authentication to be performed |
95 | if not username: | |
96 | return True | |
52ad14ae TT |
97 | |
98 | # Log in | |
99 | login_form_strs = { | |
214c22c7 JMF |
100 | 'mail': username, |
101 | 'password': password, | |
52ad14ae TT |
102 | } |
103 | # Convert to UTF-8 *before* urlencode because Python 2.x's urlencode | |
104 | # chokes on unicode | |
214c22c7 | 105 | login_form = dict((k.encode('utf-8'), v.encode('utf-8')) for k, v in login_form_strs.items()) |
13ebea79 PH |
106 | login_data = compat_urllib_parse.urlencode(login_form).encode('utf-8') |
107 | request = compat_urllib_request.Request( | |
214c22c7 | 108 | 'https://secure.nicovideo.jp/secure/login', login_data) |
13ebea79 | 109 | login_results = self._download_webpage( |
214c22c7 | 110 | request, None, note='Logging in', errnote='Unable to log in') |
13ebea79 | 111 | if re.search(r'(?i)<h1 class="mb8p4">Log in error</h1>', login_results) is not None: |
214c22c7 | 112 | self._downloader.report_warning('unable to log in: bad username or password') |
52ad14ae | 113 | return False |
23d83ad4 NJ |
114 | # Successful login |
115 | self._AUTHENTICATED = True | |
52ad14ae TT |
116 | return True |
117 | ||
118 | def _real_extract(self, url): | |
937daef4 | 119 | video_id = self._match_id(url) |
52ad14ae | 120 | |
bb865f3a YCH |
121 | # Get video webpage. We are not actually interested in it for normal |
122 | # cases, but need the cookies in order to be able to download the | |
123 | # info webpage | |
621ffe7b YCH |
124 | webpage, handle = self._download_webpage_handle( |
125 | 'http://www.nicovideo.jp/watch/' + video_id, video_id) | |
126 | if video_id.startswith('so'): | |
127 | video_id = self._match_id(handle.geturl()) | |
52ad14ae | 128 | |
e26f8712 | 129 | video_info = self._download_xml( |
13ebea79 | 130 | 'http://ext.nicovideo.jp/api/getthumbinfo/' + video_id, video_id, |
214c22c7 | 131 | note='Downloading video info page') |
52ad14ae | 132 | |
23d83ad4 | 133 | if self._AUTHENTICATED: |
2fd466fc | 134 | # Get flv info |
135 | flv_info_webpage = self._download_webpage( | |
c3c5c315 | 136 | 'http://flapi.nicovideo.jp/api/getflv/' + video_id + '?as3=1', |
2fd466fc | 137 | video_id, 'Downloading flv info') |
138 | else: | |
139 | # Get external player info | |
140 | ext_player_info = self._download_webpage( | |
141 | 'http://ext.nicovideo.jp/thumb_watch/' + video_id, video_id) | |
142 | thumb_play_key = self._search_regex( | |
143 | r'\'thumbPlayKey\'\s*:\s*\'(.*?)\'', ext_player_info, 'thumbPlayKey') | |
ee580538 | 144 | |
2fd466fc | 145 | # Get flv info |
146 | flv_info_data = compat_urllib_parse.urlencode({ | |
147 | 'k': thumb_play_key, | |
148 | 'v': video_id | |
149 | }) | |
150 | flv_info_request = compat_urllib_request.Request( | |
151 | 'http://ext.nicovideo.jp/thumb_watch', flv_info_data, | |
152 | {'Content-Type': 'application/x-www-form-urlencoded'}) | |
153 | flv_info_webpage = self._download_webpage( | |
154 | flv_info_request, video_id, | |
155 | note='Downloading flv info', errnote='Unable to download flv info') | |
156 | ||
bb865f3a YCH |
157 | flv_info = compat_urlparse.parse_qs(flv_info_webpage) |
158 | if 'url' not in flv_info: | |
159 | if 'deleted' in flv_info: | |
160 | raise ExtractorError('The video has been deleted.', | |
161 | expected=True) | |
162 | else: | |
163 | raise ExtractorError('Unable to find video URL') | |
164 | ||
165 | video_real_url = flv_info['url'][0] | |
52ad14ae TT |
166 | |
167 | # Start extracting information | |
bb865f3a | 168 | title = xpath_text(video_info, './/title') |
59d814f7 YCH |
169 | if not title: |
170 | title = self._og_search_title(webpage, default=None) | |
bb865f3a YCH |
171 | if not title: |
172 | title = self._html_search_regex( | |
173 | r'<span[^>]+class="videoHeaderTitle"[^>]*>([^<]+)</span>', | |
174 | webpage, 'video title') | |
175 | ||
b2e8e7da YCH |
176 | watch_api_data_string = self._html_search_regex( |
177 | r'<div[^>]+id="watchAPIDataContainer"[^>]+>([^<]+)</div>', | |
178 | webpage, 'watch api data', default=None) | |
179 | watch_api_data = self._parse_json(watch_api_data_string, video_id) if watch_api_data_string else {} | |
180 | video_detail = watch_api_data.get('videoDetail', {}) | |
181 | ||
bb865f3a YCH |
182 | extension = xpath_text(video_info, './/movie_type') |
183 | if not extension: | |
184 | extension = determine_ext(video_real_url) | |
976b03c5 | 185 | |
b2e8e7da YCH |
186 | thumbnail = ( |
187 | xpath_text(video_info, './/thumbnail_url') or | |
188 | self._html_search_meta('image', webpage, 'thumbnail', default=None) or | |
189 | video_detail.get('thumbnail')) | |
190 | ||
bb865f3a | 191 | description = xpath_text(video_info, './/description') |
b2e8e7da | 192 | |
bb865f3a | 193 | timestamp = parse_iso8601(xpath_text(video_info, './/first_retrieve')) |
b2e8e7da YCH |
194 | if not timestamp: |
195 | match = self._html_search_meta('datePublished', webpage, 'date published', default=None) | |
196 | if match: | |
197 | timestamp = parse_iso8601(match.replace('+', ':00+')) | |
198 | if not timestamp and video_detail.get('postedAt'): | |
199 | timestamp = parse_iso8601( | |
200 | video_detail['postedAt'].replace('/', '-'), | |
201 | delimiter=' ', timezone=datetime.timedelta(hours=9)) | |
202 | ||
bb865f3a | 203 | view_count = int_or_none(xpath_text(video_info, './/view_counter')) |
b2e8e7da YCH |
204 | if not view_count: |
205 | match = self._html_search_regex( | |
206 | r'>Views: <strong[^>]*>([^<]+)</strong>', | |
207 | webpage, 'view count', default=None) | |
208 | if match: | |
209 | view_count = int_or_none(match.replace(',', '')) | |
210 | view_count = view_count or video_detail.get('viewCount') | |
211 | ||
bb865f3a | 212 | comment_count = int_or_none(xpath_text(video_info, './/comment_num')) |
b2e8e7da YCH |
213 | if not comment_count: |
214 | match = self._html_search_regex( | |
215 | r'>Comments: <strong[^>]*>([^<]+)</strong>', | |
216 | webpage, 'comment count', default=None) | |
217 | if match: | |
218 | comment_count = int_or_none(match.replace(',', '')) | |
219 | comment_count = comment_count or video_detail.get('commentCount') | |
220 | ||
221 | duration = (parse_duration( | |
222 | xpath_text(video_info, './/length') or | |
223 | self._html_search_meta( | |
224 | 'video:duration', webpage, 'video duration', default=None)) or | |
225 | video_detail.get('length')) | |
226 | ||
227 | webpage_url = xpath_text(video_info, './/watch_url') or url | |
15ce1338 | 228 | |
64ed7a38 | 229 | if video_info.find('.//ch_id') is not None: |
15ce1338 S |
230 | uploader_id = video_info.find('.//ch_id').text |
231 | uploader = video_info.find('.//ch_name').text | |
64ed7a38 | 232 | elif video_info.find('.//user_id') is not None: |
15ce1338 S |
233 | uploader_id = video_info.find('.//user_id').text |
234 | uploader = video_info.find('.//user_nickname').text | |
235 | else: | |
236 | uploader_id = uploader = None | |
52ad14ae | 237 | |
b2e8e7da | 238 | return { |
214c22c7 JMF |
239 | 'id': video_id, |
240 | 'url': video_real_url, | |
15ce1338 S |
241 | 'title': title, |
242 | 'ext': extension, | |
67d95f17 | 243 | 'format_id': 'economy' if video_real_url.endswith('low') else 'normal', |
15ce1338 S |
244 | 'thumbnail': thumbnail, |
245 | 'description': description, | |
246 | 'uploader': uploader, | |
bb865f3a | 247 | 'timestamp': timestamp, |
15ce1338 S |
248 | 'uploader_id': uploader_id, |
249 | 'view_count': view_count, | |
250 | 'comment_count': comment_count, | |
251 | 'duration': duration, | |
252 | 'webpage_url': webpage_url, | |
52ad14ae | 253 | } |
a9bad429 JMF |
254 | |
255 | ||
256 | class NiconicoPlaylistIE(InfoExtractor): | |
257 | _VALID_URL = r'https?://www\.nicovideo\.jp/mylist/(?P<id>\d+)' | |
258 | ||
259 | _TEST = { | |
260 | 'url': 'http://www.nicovideo.jp/mylist/27411728', | |
261 | 'info_dict': { | |
262 | 'id': '27411728', | |
263 | 'title': 'AKB48のオールナイトニッポン', | |
264 | }, | |
265 | 'playlist_mincount': 225, | |
266 | } | |
267 | ||
268 | def _real_extract(self, url): | |
269 | list_id = self._match_id(url) | |
270 | webpage = self._download_webpage(url, list_id) | |
271 | ||
272 | entries_json = self._search_regex(r'Mylist\.preload\(\d+, (\[.*\])\);', | |
9e1a5b84 | 273 | webpage, 'entries') |
a9bad429 JMF |
274 | entries = json.loads(entries_json) |
275 | entries = [{ | |
276 | '_type': 'url', | |
277 | 'ie_key': NiconicoIE.ie_key(), | |
2fdbf27a | 278 | 'url': ('http://www.nicovideo.jp/watch/%s' % |
9e1a5b84 | 279 | entry['item_data']['video_id']), |
a9bad429 JMF |
280 | } for entry in entries] |
281 | ||
282 | return { | |
283 | '_type': 'playlist', | |
284 | 'title': self._search_regex(r'\s+name: "(.*?)"', webpage, 'title'), | |
285 | 'id': list_id, | |
286 | 'entries': entries, | |
287 | } |