]>
Commit | Line | Data |
---|---|---|
52ad14ae | 1 | # encoding: utf-8 |
214c22c7 | 2 | from __future__ import unicode_literals |
52ad14ae TT |
3 | |
4 | import re | |
a9bad429 | 5 | import json |
b2e8e7da | 6 | import datetime |
52ad14ae TT |
7 | |
8 | from .common import InfoExtractor | |
1cc79574 | 9 | from ..compat import ( |
15707c7e | 10 | compat_urllib_parse_urlencode, |
52ad14ae | 11 | compat_urlparse, |
1cc79574 PH |
12 | ) |
13 | from ..utils import ( | |
6110bbbf | 14 | ExtractorError, |
1cc79574 PH |
15 | int_or_none, |
16 | parse_duration, | |
bb865f3a | 17 | parse_iso8601, |
5c2266df | 18 | sanitized_Request, |
bb865f3a YCH |
19 | xpath_text, |
20 | determine_ext, | |
6e6bc8da | 21 | urlencode_postdata, |
52ad14ae TT |
22 | ) |
23 | ||
13ebea79 | 24 | |
52ad14ae | 25 | class NiconicoIE(InfoExtractor): |
214c22c7 JMF |
26 | IE_NAME = 'niconico' |
27 | IE_DESC = 'ニコニコ動画' | |
52ad14ae | 28 | |
1c9a1457 | 29 | _TESTS = [{ |
214c22c7 JMF |
30 | 'url': 'http://www.nicovideo.jp/watch/sm22312215', |
31 | 'md5': 'd1a75c0823e2f629128c43e1212760f9', | |
32 | 'info_dict': { | |
33 | 'id': 'sm22312215', | |
34 | 'ext': 'mp4', | |
35 | 'title': 'Big Buck Bunny', | |
36 | 'uploader': 'takuya0301', | |
37 | 'uploader_id': '2698420', | |
38 | 'upload_date': '20131123', | |
bb865f3a | 39 | 'timestamp': 1385182762, |
214c22c7 | 40 | 'description': '(c) copyright 2008, Blender Foundation / www.bigbuckbunny.org', |
15ce1338 | 41 | 'duration': 33, |
52ad14ae | 42 | }, |
1c9a1457 | 43 | }, { |
59d814f7 YCH |
44 | # File downloaded with and without credentials are different, so omit |
45 | # the md5 field | |
1c9a1457 | 46 | 'url': 'http://www.nicovideo.jp/watch/nm14296458', |
1c9a1457 S |
47 | 'info_dict': { |
48 | 'id': 'nm14296458', | |
49 | 'ext': 'swf', | |
50 | 'title': '【鏡音リン】Dance on media【オリジナル】take2!', | |
bb865f3a | 51 | 'description': 'md5:689f066d74610b3b22e0f1739add0f58', |
1c9a1457 S |
52 | 'uploader': 'りょうた', |
53 | 'uploader_id': '18822557', | |
54 | 'upload_date': '20110429', | |
bb865f3a | 55 | 'timestamp': 1304065916, |
1c9a1457 S |
56 | 'duration': 209, |
57 | }, | |
bb865f3a YCH |
58 | }, { |
59 | # 'video exists but is marked as "deleted" | |
b2e8e7da | 60 | # md5 is unstable |
bb865f3a | 61 | 'url': 'http://www.nicovideo.jp/watch/sm10000', |
bb865f3a YCH |
62 | 'info_dict': { |
63 | 'id': 'sm10000', | |
64 | 'ext': 'unknown_video', | |
65 | 'description': 'deleted', | |
66 | 'title': 'ドラえもんエターナル第3話「決戦第3新東京市」<前編>', | |
b2e8e7da YCH |
67 | 'upload_date': '20071224', |
68 | 'timestamp': 1198527840, # timestamp field has different value if logged in | |
69 | 'duration': 304, | |
bb865f3a | 70 | }, |
621ffe7b YCH |
71 | }, { |
72 | 'url': 'http://www.nicovideo.jp/watch/so22543406', | |
73 | 'info_dict': { | |
74 | 'id': '1388129933', | |
75 | 'ext': 'mp4', | |
76 | 'title': '【第1回】RADIOアニメロミックス ラブライブ!~のぞえりRadio Garden~', | |
77 | 'description': 'md5:b27d224bb0ff53d3c8269e9f8b561cf1', | |
78 | 'timestamp': 1388851200, | |
79 | 'upload_date': '20140104', | |
80 | 'uploader': 'アニメロチャンネル', | |
81 | 'uploader_id': '312', | |
82 | } | |
1c9a1457 | 83 | }] |
52ad14ae | 84 | |
937daef4 | 85 | _VALID_URL = r'https?://(?:www\.|secure\.)?nicovideo\.jp/watch/(?P<id>(?:[a-z]{2})?[0-9]+)' |
52ad14ae | 86 | _NETRC_MACHINE = 'niconico' |
23d83ad4 NJ |
87 | # Determine whether the downloader used authentication to download video |
88 | _AUTHENTICATED = False | |
52ad14ae TT |
89 | |
90 | def _real_initialize(self): | |
23d83ad4 | 91 | self._login() |
52ad14ae TT |
92 | |
93 | def _login(self): | |
94 | (username, password) = self._get_login_info() | |
23d83ad4 NJ |
95 | # No authentication to be performed |
96 | if not username: | |
97 | return True | |
52ad14ae TT |
98 | |
99 | # Log in | |
100 | login_form_strs = { | |
214c22c7 JMF |
101 | 'mail': username, |
102 | 'password': password, | |
52ad14ae | 103 | } |
6e6bc8da | 104 | login_data = urlencode_postdata(login_form_strs) |
5c2266df | 105 | request = sanitized_Request( |
214c22c7 | 106 | 'https://secure.nicovideo.jp/secure/login', login_data) |
13ebea79 | 107 | login_results = self._download_webpage( |
214c22c7 | 108 | request, None, note='Logging in', errnote='Unable to log in') |
13ebea79 | 109 | if re.search(r'(?i)<h1 class="mb8p4">Log in error</h1>', login_results) is not None: |
214c22c7 | 110 | self._downloader.report_warning('unable to log in: bad username or password') |
52ad14ae | 111 | return False |
23d83ad4 NJ |
112 | # Successful login |
113 | self._AUTHENTICATED = True | |
52ad14ae TT |
114 | return True |
115 | ||
116 | def _real_extract(self, url): | |
937daef4 | 117 | video_id = self._match_id(url) |
52ad14ae | 118 | |
bb865f3a YCH |
119 | # Get video webpage. We are not actually interested in it for normal |
120 | # cases, but need the cookies in order to be able to download the | |
121 | # info webpage | |
621ffe7b YCH |
122 | webpage, handle = self._download_webpage_handle( |
123 | 'http://www.nicovideo.jp/watch/' + video_id, video_id) | |
124 | if video_id.startswith('so'): | |
125 | video_id = self._match_id(handle.geturl()) | |
52ad14ae | 126 | |
e26f8712 | 127 | video_info = self._download_xml( |
13ebea79 | 128 | 'http://ext.nicovideo.jp/api/getthumbinfo/' + video_id, video_id, |
214c22c7 | 129 | note='Downloading video info page') |
52ad14ae | 130 | |
23d83ad4 | 131 | if self._AUTHENTICATED: |
2fd466fc | 132 | # Get flv info |
133 | flv_info_webpage = self._download_webpage( | |
c3c5c315 | 134 | 'http://flapi.nicovideo.jp/api/getflv/' + video_id + '?as3=1', |
2fd466fc | 135 | video_id, 'Downloading flv info') |
136 | else: | |
137 | # Get external player info | |
138 | ext_player_info = self._download_webpage( | |
139 | 'http://ext.nicovideo.jp/thumb_watch/' + video_id, video_id) | |
140 | thumb_play_key = self._search_regex( | |
141 | r'\'thumbPlayKey\'\s*:\s*\'(.*?)\'', ext_player_info, 'thumbPlayKey') | |
ee580538 | 142 | |
2fd466fc | 143 | # Get flv info |
15707c7e | 144 | flv_info_data = compat_urllib_parse_urlencode({ |
2fd466fc | 145 | 'k': thumb_play_key, |
146 | 'v': video_id | |
147 | }) | |
5c2266df | 148 | flv_info_request = sanitized_Request( |
2fd466fc | 149 | 'http://ext.nicovideo.jp/thumb_watch', flv_info_data, |
150 | {'Content-Type': 'application/x-www-form-urlencoded'}) | |
151 | flv_info_webpage = self._download_webpage( | |
152 | flv_info_request, video_id, | |
153 | note='Downloading flv info', errnote='Unable to download flv info') | |
154 | ||
bb865f3a YCH |
155 | flv_info = compat_urlparse.parse_qs(flv_info_webpage) |
156 | if 'url' not in flv_info: | |
157 | if 'deleted' in flv_info: | |
158 | raise ExtractorError('The video has been deleted.', | |
159 | expected=True) | |
160 | else: | |
161 | raise ExtractorError('Unable to find video URL') | |
162 | ||
163 | video_real_url = flv_info['url'][0] | |
52ad14ae TT |
164 | |
165 | # Start extracting information | |
bb865f3a | 166 | title = xpath_text(video_info, './/title') |
59d814f7 YCH |
167 | if not title: |
168 | title = self._og_search_title(webpage, default=None) | |
bb865f3a YCH |
169 | if not title: |
170 | title = self._html_search_regex( | |
171 | r'<span[^>]+class="videoHeaderTitle"[^>]*>([^<]+)</span>', | |
172 | webpage, 'video title') | |
173 | ||
b2e8e7da YCH |
174 | watch_api_data_string = self._html_search_regex( |
175 | r'<div[^>]+id="watchAPIDataContainer"[^>]+>([^<]+)</div>', | |
176 | webpage, 'watch api data', default=None) | |
177 | watch_api_data = self._parse_json(watch_api_data_string, video_id) if watch_api_data_string else {} | |
178 | video_detail = watch_api_data.get('videoDetail', {}) | |
179 | ||
bb865f3a YCH |
180 | extension = xpath_text(video_info, './/movie_type') |
181 | if not extension: | |
182 | extension = determine_ext(video_real_url) | |
976b03c5 | 183 | |
b2e8e7da YCH |
184 | thumbnail = ( |
185 | xpath_text(video_info, './/thumbnail_url') or | |
186 | self._html_search_meta('image', webpage, 'thumbnail', default=None) or | |
187 | video_detail.get('thumbnail')) | |
188 | ||
bb865f3a | 189 | description = xpath_text(video_info, './/description') |
b2e8e7da | 190 | |
bb865f3a | 191 | timestamp = parse_iso8601(xpath_text(video_info, './/first_retrieve')) |
b2e8e7da YCH |
192 | if not timestamp: |
193 | match = self._html_search_meta('datePublished', webpage, 'date published', default=None) | |
194 | if match: | |
195 | timestamp = parse_iso8601(match.replace('+', ':00+')) | |
196 | if not timestamp and video_detail.get('postedAt'): | |
197 | timestamp = parse_iso8601( | |
198 | video_detail['postedAt'].replace('/', '-'), | |
199 | delimiter=' ', timezone=datetime.timedelta(hours=9)) | |
200 | ||
bb865f3a | 201 | view_count = int_or_none(xpath_text(video_info, './/view_counter')) |
b2e8e7da YCH |
202 | if not view_count: |
203 | match = self._html_search_regex( | |
204 | r'>Views: <strong[^>]*>([^<]+)</strong>', | |
205 | webpage, 'view count', default=None) | |
206 | if match: | |
207 | view_count = int_or_none(match.replace(',', '')) | |
208 | view_count = view_count or video_detail.get('viewCount') | |
209 | ||
bb865f3a | 210 | comment_count = int_or_none(xpath_text(video_info, './/comment_num')) |
b2e8e7da YCH |
211 | if not comment_count: |
212 | match = self._html_search_regex( | |
213 | r'>Comments: <strong[^>]*>([^<]+)</strong>', | |
214 | webpage, 'comment count', default=None) | |
215 | if match: | |
216 | comment_count = int_or_none(match.replace(',', '')) | |
217 | comment_count = comment_count or video_detail.get('commentCount') | |
218 | ||
219 | duration = (parse_duration( | |
220 | xpath_text(video_info, './/length') or | |
221 | self._html_search_meta( | |
222 | 'video:duration', webpage, 'video duration', default=None)) or | |
223 | video_detail.get('length')) | |
224 | ||
225 | webpage_url = xpath_text(video_info, './/watch_url') or url | |
15ce1338 | 226 | |
64ed7a38 | 227 | if video_info.find('.//ch_id') is not None: |
15ce1338 S |
228 | uploader_id = video_info.find('.//ch_id').text |
229 | uploader = video_info.find('.//ch_name').text | |
64ed7a38 | 230 | elif video_info.find('.//user_id') is not None: |
15ce1338 S |
231 | uploader_id = video_info.find('.//user_id').text |
232 | uploader = video_info.find('.//user_nickname').text | |
233 | else: | |
234 | uploader_id = uploader = None | |
52ad14ae | 235 | |
b2e8e7da | 236 | return { |
214c22c7 JMF |
237 | 'id': video_id, |
238 | 'url': video_real_url, | |
15ce1338 S |
239 | 'title': title, |
240 | 'ext': extension, | |
67d95f17 | 241 | 'format_id': 'economy' if video_real_url.endswith('low') else 'normal', |
15ce1338 S |
242 | 'thumbnail': thumbnail, |
243 | 'description': description, | |
244 | 'uploader': uploader, | |
bb865f3a | 245 | 'timestamp': timestamp, |
15ce1338 S |
246 | 'uploader_id': uploader_id, |
247 | 'view_count': view_count, | |
248 | 'comment_count': comment_count, | |
249 | 'duration': duration, | |
250 | 'webpage_url': webpage_url, | |
52ad14ae | 251 | } |
a9bad429 JMF |
252 | |
253 | ||
254 | class NiconicoPlaylistIE(InfoExtractor): | |
255 | _VALID_URL = r'https?://www\.nicovideo\.jp/mylist/(?P<id>\d+)' | |
256 | ||
257 | _TEST = { | |
258 | 'url': 'http://www.nicovideo.jp/mylist/27411728', | |
259 | 'info_dict': { | |
260 | 'id': '27411728', | |
261 | 'title': 'AKB48のオールナイトニッポン', | |
262 | }, | |
263 | 'playlist_mincount': 225, | |
264 | } | |
265 | ||
266 | def _real_extract(self, url): | |
267 | list_id = self._match_id(url) | |
268 | webpage = self._download_webpage(url, list_id) | |
269 | ||
270 | entries_json = self._search_regex(r'Mylist\.preload\(\d+, (\[.*\])\);', | |
9e1a5b84 | 271 | webpage, 'entries') |
a9bad429 JMF |
272 | entries = json.loads(entries_json) |
273 | entries = [{ | |
274 | '_type': 'url', | |
275 | 'ie_key': NiconicoIE.ie_key(), | |
2fdbf27a | 276 | 'url': ('http://www.nicovideo.jp/watch/%s' % |
9e1a5b84 | 277 | entry['item_data']['video_id']), |
a9bad429 JMF |
278 | } for entry in entries] |
279 | ||
280 | return { | |
281 | '_type': 'playlist', | |
282 | 'title': self._search_regex(r'\s+name: "(.*?)"', webpage, 'title'), | |
283 | 'id': list_id, | |
284 | 'entries': entries, | |
285 | } |