]>
Commit | Line | Data |
---|---|---|
dcdb292f | 1 | # coding: utf-8 |
214c22c7 | 2 | from __future__ import unicode_literals |
52ad14ae TT |
3 | |
4 | import re | |
a9bad429 | 5 | import json |
b2e8e7da | 6 | import datetime |
52ad14ae TT |
7 | |
8 | from .common import InfoExtractor | |
1cc79574 | 9 | from ..compat import ( |
52ad14ae | 10 | compat_urlparse, |
1cc79574 PH |
11 | ) |
12 | from ..utils import ( | |
6110bbbf | 13 | ExtractorError, |
1cc79574 PH |
14 | int_or_none, |
15 | parse_duration, | |
bb865f3a | 16 | parse_iso8601, |
5c2266df | 17 | sanitized_Request, |
bb865f3a YCH |
18 | xpath_text, |
19 | determine_ext, | |
6e6bc8da | 20 | urlencode_postdata, |
52ad14ae TT |
21 | ) |
22 | ||
13ebea79 | 23 | |
52ad14ae | 24 | class NiconicoIE(InfoExtractor): |
214c22c7 JMF |
25 | IE_NAME = 'niconico' |
26 | IE_DESC = 'ニコニコ動画' | |
52ad14ae | 27 | |
1c9a1457 | 28 | _TESTS = [{ |
214c22c7 JMF |
29 | 'url': 'http://www.nicovideo.jp/watch/sm22312215', |
30 | 'md5': 'd1a75c0823e2f629128c43e1212760f9', | |
31 | 'info_dict': { | |
32 | 'id': 'sm22312215', | |
33 | 'ext': 'mp4', | |
34 | 'title': 'Big Buck Bunny', | |
35 | 'uploader': 'takuya0301', | |
36 | 'uploader_id': '2698420', | |
37 | 'upload_date': '20131123', | |
bb865f3a | 38 | 'timestamp': 1385182762, |
214c22c7 | 39 | 'description': '(c) copyright 2008, Blender Foundation / www.bigbuckbunny.org', |
15ce1338 | 40 | 'duration': 33, |
52ad14ae | 41 | }, |
8e4988f1 | 42 | 'skip': 'Requires an account', |
1c9a1457 | 43 | }, { |
59d814f7 YCH |
44 | # File downloaded with and without credentials are different, so omit |
45 | # the md5 field | |
1c9a1457 | 46 | 'url': 'http://www.nicovideo.jp/watch/nm14296458', |
1c9a1457 S |
47 | 'info_dict': { |
48 | 'id': 'nm14296458', | |
49 | 'ext': 'swf', | |
50 | 'title': '【鏡音リン】Dance on media【オリジナル】take2!', | |
bb865f3a | 51 | 'description': 'md5:689f066d74610b3b22e0f1739add0f58', |
1c9a1457 S |
52 | 'uploader': 'りょうた', |
53 | 'uploader_id': '18822557', | |
54 | 'upload_date': '20110429', | |
bb865f3a | 55 | 'timestamp': 1304065916, |
1c9a1457 S |
56 | 'duration': 209, |
57 | }, | |
8e4988f1 | 58 | 'skip': 'Requires an account', |
bb865f3a YCH |
59 | }, { |
60 | # 'video exists but is marked as "deleted" | |
b2e8e7da | 61 | # md5 is unstable |
bb865f3a | 62 | 'url': 'http://www.nicovideo.jp/watch/sm10000', |
bb865f3a YCH |
63 | 'info_dict': { |
64 | 'id': 'sm10000', | |
65 | 'ext': 'unknown_video', | |
66 | 'description': 'deleted', | |
67 | 'title': 'ドラえもんエターナル第3話「決戦第3新東京市」<前編>', | |
b2e8e7da | 68 | 'upload_date': '20071224', |
8e4988f1 | 69 | 'timestamp': int, # timestamp field has different value if logged in |
b2e8e7da | 70 | 'duration': 304, |
bb865f3a | 71 | }, |
8e4988f1 | 72 | 'skip': 'Requires an account', |
621ffe7b YCH |
73 | }, { |
74 | 'url': 'http://www.nicovideo.jp/watch/so22543406', | |
75 | 'info_dict': { | |
76 | 'id': '1388129933', | |
77 | 'ext': 'mp4', | |
78 | 'title': '【第1回】RADIOアニメロミックス ラブライブ!~のぞえりRadio Garden~', | |
79 | 'description': 'md5:b27d224bb0ff53d3c8269e9f8b561cf1', | |
80 | 'timestamp': 1388851200, | |
81 | 'upload_date': '20140104', | |
82 | 'uploader': 'アニメロチャンネル', | |
83 | 'uploader_id': '312', | |
8e4988f1 YCH |
84 | }, |
85 | 'skip': 'The viewing period of the video you were searching for has expired.', | |
1c9a1457 | 86 | }] |
52ad14ae | 87 | |
937daef4 | 88 | _VALID_URL = r'https?://(?:www\.|secure\.)?nicovideo\.jp/watch/(?P<id>(?:[a-z]{2})?[0-9]+)' |
52ad14ae | 89 | _NETRC_MACHINE = 'niconico' |
52ad14ae TT |
90 | |
91 | def _real_initialize(self): | |
23d83ad4 | 92 | self._login() |
52ad14ae TT |
93 | |
94 | def _login(self): | |
95 | (username, password) = self._get_login_info() | |
23d83ad4 NJ |
96 | # No authentication to be performed |
97 | if not username: | |
98 | return True | |
52ad14ae TT |
99 | |
100 | # Log in | |
101 | login_form_strs = { | |
214c22c7 JMF |
102 | 'mail': username, |
103 | 'password': password, | |
52ad14ae | 104 | } |
6e6bc8da | 105 | login_data = urlencode_postdata(login_form_strs) |
5c2266df | 106 | request = sanitized_Request( |
214c22c7 | 107 | 'https://secure.nicovideo.jp/secure/login', login_data) |
13ebea79 | 108 | login_results = self._download_webpage( |
214c22c7 | 109 | request, None, note='Logging in', errnote='Unable to log in') |
13ebea79 | 110 | if re.search(r'(?i)<h1 class="mb8p4">Log in error</h1>', login_results) is not None: |
214c22c7 | 111 | self._downloader.report_warning('unable to log in: bad username or password') |
52ad14ae TT |
112 | return False |
113 | return True | |
114 | ||
115 | def _real_extract(self, url): | |
937daef4 | 116 | video_id = self._match_id(url) |
52ad14ae | 117 | |
bb865f3a YCH |
118 | # Get video webpage. We are not actually interested in it for normal |
119 | # cases, but need the cookies in order to be able to download the | |
120 | # info webpage | |
621ffe7b YCH |
121 | webpage, handle = self._download_webpage_handle( |
122 | 'http://www.nicovideo.jp/watch/' + video_id, video_id) | |
123 | if video_id.startswith('so'): | |
124 | video_id = self._match_id(handle.geturl()) | |
52ad14ae | 125 | |
e26f8712 | 126 | video_info = self._download_xml( |
13ebea79 | 127 | 'http://ext.nicovideo.jp/api/getthumbinfo/' + video_id, video_id, |
214c22c7 | 128 | note='Downloading video info page') |
52ad14ae | 129 | |
dcae7b3f YCH |
130 | # Get flv info |
131 | flv_info_webpage = self._download_webpage( | |
132 | 'http://flapi.nicovideo.jp/api/getflv/' + video_id + '?as3=1', | |
133 | video_id, 'Downloading flv info') | |
2fd466fc | 134 | |
bb865f3a YCH |
135 | flv_info = compat_urlparse.parse_qs(flv_info_webpage) |
136 | if 'url' not in flv_info: | |
137 | if 'deleted' in flv_info: | |
138 | raise ExtractorError('The video has been deleted.', | |
139 | expected=True) | |
dcae7b3f YCH |
140 | elif 'closed' in flv_info: |
141 | raise ExtractorError('Niconico videos now require logging in', | |
142 | expected=True) | |
bb865f3a YCH |
143 | else: |
144 | raise ExtractorError('Unable to find video URL') | |
145 | ||
146 | video_real_url = flv_info['url'][0] | |
52ad14ae TT |
147 | |
148 | # Start extracting information | |
bb865f3a | 149 | title = xpath_text(video_info, './/title') |
59d814f7 YCH |
150 | if not title: |
151 | title = self._og_search_title(webpage, default=None) | |
bb865f3a YCH |
152 | if not title: |
153 | title = self._html_search_regex( | |
154 | r'<span[^>]+class="videoHeaderTitle"[^>]*>([^<]+)</span>', | |
155 | webpage, 'video title') | |
156 | ||
b2e8e7da YCH |
157 | watch_api_data_string = self._html_search_regex( |
158 | r'<div[^>]+id="watchAPIDataContainer"[^>]+>([^<]+)</div>', | |
159 | webpage, 'watch api data', default=None) | |
160 | watch_api_data = self._parse_json(watch_api_data_string, video_id) if watch_api_data_string else {} | |
161 | video_detail = watch_api_data.get('videoDetail', {}) | |
162 | ||
bb865f3a YCH |
163 | extension = xpath_text(video_info, './/movie_type') |
164 | if not extension: | |
165 | extension = determine_ext(video_real_url) | |
976b03c5 | 166 | |
b2e8e7da YCH |
167 | thumbnail = ( |
168 | xpath_text(video_info, './/thumbnail_url') or | |
169 | self._html_search_meta('image', webpage, 'thumbnail', default=None) or | |
170 | video_detail.get('thumbnail')) | |
171 | ||
bb865f3a | 172 | description = xpath_text(video_info, './/description') |
b2e8e7da | 173 | |
bb865f3a | 174 | timestamp = parse_iso8601(xpath_text(video_info, './/first_retrieve')) |
b2e8e7da YCH |
175 | if not timestamp: |
176 | match = self._html_search_meta('datePublished', webpage, 'date published', default=None) | |
177 | if match: | |
178 | timestamp = parse_iso8601(match.replace('+', ':00+')) | |
179 | if not timestamp and video_detail.get('postedAt'): | |
180 | timestamp = parse_iso8601( | |
181 | video_detail['postedAt'].replace('/', '-'), | |
182 | delimiter=' ', timezone=datetime.timedelta(hours=9)) | |
183 | ||
bb865f3a | 184 | view_count = int_or_none(xpath_text(video_info, './/view_counter')) |
b2e8e7da YCH |
185 | if not view_count: |
186 | match = self._html_search_regex( | |
187 | r'>Views: <strong[^>]*>([^<]+)</strong>', | |
188 | webpage, 'view count', default=None) | |
189 | if match: | |
190 | view_count = int_or_none(match.replace(',', '')) | |
191 | view_count = view_count or video_detail.get('viewCount') | |
192 | ||
bb865f3a | 193 | comment_count = int_or_none(xpath_text(video_info, './/comment_num')) |
b2e8e7da YCH |
194 | if not comment_count: |
195 | match = self._html_search_regex( | |
196 | r'>Comments: <strong[^>]*>([^<]+)</strong>', | |
197 | webpage, 'comment count', default=None) | |
198 | if match: | |
199 | comment_count = int_or_none(match.replace(',', '')) | |
200 | comment_count = comment_count or video_detail.get('commentCount') | |
201 | ||
202 | duration = (parse_duration( | |
203 | xpath_text(video_info, './/length') or | |
204 | self._html_search_meta( | |
205 | 'video:duration', webpage, 'video duration', default=None)) or | |
206 | video_detail.get('length')) | |
207 | ||
208 | webpage_url = xpath_text(video_info, './/watch_url') or url | |
15ce1338 | 209 | |
64ed7a38 | 210 | if video_info.find('.//ch_id') is not None: |
15ce1338 S |
211 | uploader_id = video_info.find('.//ch_id').text |
212 | uploader = video_info.find('.//ch_name').text | |
64ed7a38 | 213 | elif video_info.find('.//user_id') is not None: |
15ce1338 S |
214 | uploader_id = video_info.find('.//user_id').text |
215 | uploader = video_info.find('.//user_nickname').text | |
216 | else: | |
217 | uploader_id = uploader = None | |
52ad14ae | 218 | |
b2e8e7da | 219 | return { |
214c22c7 JMF |
220 | 'id': video_id, |
221 | 'url': video_real_url, | |
15ce1338 S |
222 | 'title': title, |
223 | 'ext': extension, | |
67d95f17 | 224 | 'format_id': 'economy' if video_real_url.endswith('low') else 'normal', |
15ce1338 S |
225 | 'thumbnail': thumbnail, |
226 | 'description': description, | |
227 | 'uploader': uploader, | |
bb865f3a | 228 | 'timestamp': timestamp, |
15ce1338 S |
229 | 'uploader_id': uploader_id, |
230 | 'view_count': view_count, | |
231 | 'comment_count': comment_count, | |
232 | 'duration': duration, | |
233 | 'webpage_url': webpage_url, | |
52ad14ae | 234 | } |
a9bad429 JMF |
235 | |
236 | ||
237 | class NiconicoPlaylistIE(InfoExtractor): | |
92519402 | 238 | _VALID_URL = r'https?://(?:www\.)?nicovideo\.jp/mylist/(?P<id>\d+)' |
a9bad429 JMF |
239 | |
240 | _TEST = { | |
241 | 'url': 'http://www.nicovideo.jp/mylist/27411728', | |
242 | 'info_dict': { | |
243 | 'id': '27411728', | |
244 | 'title': 'AKB48のオールナイトニッポン', | |
245 | }, | |
246 | 'playlist_mincount': 225, | |
247 | } | |
248 | ||
249 | def _real_extract(self, url): | |
250 | list_id = self._match_id(url) | |
251 | webpage = self._download_webpage(url, list_id) | |
252 | ||
253 | entries_json = self._search_regex(r'Mylist\.preload\(\d+, (\[.*\])\);', | |
9e1a5b84 | 254 | webpage, 'entries') |
a9bad429 JMF |
255 | entries = json.loads(entries_json) |
256 | entries = [{ | |
257 | '_type': 'url', | |
258 | 'ie_key': NiconicoIE.ie_key(), | |
2fdbf27a | 259 | 'url': ('http://www.nicovideo.jp/watch/%s' % |
9e1a5b84 | 260 | entry['item_data']['video_id']), |
a9bad429 JMF |
261 | } for entry in entries] |
262 | ||
263 | return { | |
264 | '_type': 'playlist', | |
265 | 'title': self._search_regex(r'\s+name: "(.*?)"', webpage, 'title'), | |
266 | 'id': list_id, | |
267 | 'entries': entries, | |
268 | } |