]>
Commit | Line | Data |
---|---|---|
52ad14ae | 1 | # encoding: utf-8 |
214c22c7 | 2 | from __future__ import unicode_literals |
52ad14ae TT |
3 | |
4 | import re | |
a9bad429 | 5 | import json |
52ad14ae TT |
6 | |
7 | from .common import InfoExtractor | |
1cc79574 | 8 | from ..compat import ( |
52ad14ae TT |
9 | compat_urllib_parse, |
10 | compat_urllib_request, | |
11 | compat_urlparse, | |
1cc79574 PH |
12 | ) |
13 | from ..utils import ( | |
6110bbbf | 14 | ExtractorError, |
1cc79574 PH |
15 | int_or_none, |
16 | parse_duration, | |
17 | unified_strdate, | |
52ad14ae TT |
18 | ) |
19 | ||
13ebea79 | 20 | |
52ad14ae | 21 | class NiconicoIE(InfoExtractor): |
214c22c7 JMF |
22 | IE_NAME = 'niconico' |
23 | IE_DESC = 'ニコニコ動画' | |
52ad14ae TT |
24 | |
25 | _TEST = { | |
214c22c7 JMF |
26 | 'url': 'http://www.nicovideo.jp/watch/sm22312215', |
27 | 'md5': 'd1a75c0823e2f629128c43e1212760f9', | |
28 | 'info_dict': { | |
29 | 'id': 'sm22312215', | |
30 | 'ext': 'mp4', | |
31 | 'title': 'Big Buck Bunny', | |
32 | 'uploader': 'takuya0301', | |
33 | 'uploader_id': '2698420', | |
34 | 'upload_date': '20131123', | |
35 | 'description': '(c) copyright 2008, Blender Foundation / www.bigbuckbunny.org', | |
15ce1338 | 36 | 'duration': 33, |
52ad14ae | 37 | }, |
214c22c7 JMF |
38 | 'params': { |
39 | 'username': 'ydl.niconico@gmail.com', | |
40 | 'password': 'youtube-dl', | |
52ad14ae TT |
41 | }, |
42 | } | |
43 | ||
15ce1338 | 44 | _VALID_URL = r'https?://(?:www\.|secure\.)?nicovideo\.jp/watch/((?:[a-z]{2})?[0-9]+)' |
52ad14ae | 45 | _NETRC_MACHINE = 'niconico' |
23d83ad4 NJ |
46 | # Determine whether the downloader used authentication to download video |
47 | _AUTHENTICATED = False | |
52ad14ae TT |
48 | |
49 | def _real_initialize(self): | |
23d83ad4 | 50 | self._login() |
52ad14ae TT |
51 | |
52 | def _login(self): | |
53 | (username, password) = self._get_login_info() | |
23d83ad4 NJ |
54 | # No authentication to be performed |
55 | if not username: | |
56 | return True | |
52ad14ae TT |
57 | |
58 | # Log in | |
59 | login_form_strs = { | |
214c22c7 JMF |
60 | 'mail': username, |
61 | 'password': password, | |
52ad14ae TT |
62 | } |
63 | # Convert to UTF-8 *before* urlencode because Python 2.x's urlencode | |
64 | # chokes on unicode | |
214c22c7 | 65 | login_form = dict((k.encode('utf-8'), v.encode('utf-8')) for k, v in login_form_strs.items()) |
13ebea79 PH |
66 | login_data = compat_urllib_parse.urlencode(login_form).encode('utf-8') |
67 | request = compat_urllib_request.Request( | |
214c22c7 | 68 | 'https://secure.nicovideo.jp/secure/login', login_data) |
13ebea79 | 69 | login_results = self._download_webpage( |
214c22c7 | 70 | request, None, note='Logging in', errnote='Unable to log in') |
13ebea79 | 71 | if re.search(r'(?i)<h1 class="mb8p4">Log in error</h1>', login_results) is not None: |
214c22c7 | 72 | self._downloader.report_warning('unable to log in: bad username or password') |
52ad14ae | 73 | return False |
23d83ad4 NJ |
74 | # Successful login |
75 | self._AUTHENTICATED = True | |
52ad14ae TT |
76 | return True |
77 | ||
78 | def _real_extract(self, url): | |
13ebea79 PH |
79 | mobj = re.match(self._VALID_URL, url) |
80 | video_id = mobj.group(1) | |
52ad14ae | 81 | |
bd49928f PH |
82 | # Get video webpage. We are not actually interested in it, but need |
83 | # the cookies in order to be able to download the info webpage | |
84 | self._download_webpage('http://www.nicovideo.jp/watch/' + video_id, video_id) | |
52ad14ae | 85 | |
e26f8712 | 86 | video_info = self._download_xml( |
13ebea79 | 87 | 'http://ext.nicovideo.jp/api/getthumbinfo/' + video_id, video_id, |
214c22c7 | 88 | note='Downloading video info page') |
52ad14ae | 89 | |
23d83ad4 | 90 | if self._AUTHENTICATED: |
2fd466fc | 91 | # Get flv info |
92 | flv_info_webpage = self._download_webpage( | |
93 | 'http://flapi.nicovideo.jp/api/getflv?v=' + video_id, | |
94 | video_id, 'Downloading flv info') | |
95 | else: | |
96 | # Get external player info | |
97 | ext_player_info = self._download_webpage( | |
98 | 'http://ext.nicovideo.jp/thumb_watch/' + video_id, video_id) | |
99 | thumb_play_key = self._search_regex( | |
100 | r'\'thumbPlayKey\'\s*:\s*\'(.*?)\'', ext_player_info, 'thumbPlayKey') | |
101 | ||
102 | # Get flv info | |
103 | flv_info_data = compat_urllib_parse.urlencode({ | |
104 | 'k': thumb_play_key, | |
105 | 'v': video_id | |
106 | }) | |
107 | flv_info_request = compat_urllib_request.Request( | |
108 | 'http://ext.nicovideo.jp/thumb_watch', flv_info_data, | |
109 | {'Content-Type': 'application/x-www-form-urlencoded'}) | |
110 | flv_info_webpage = self._download_webpage( | |
111 | flv_info_request, video_id, | |
112 | note='Downloading flv info', errnote='Unable to download flv info') | |
113 | ||
6110bbbf JMF |
114 | if 'deleted=' in flv_info_webpage: |
115 | raise ExtractorError('The video has been deleted.', | |
9e1a5b84 | 116 | expected=True) |
13ebea79 | 117 | video_real_url = compat_urlparse.parse_qs(flv_info_webpage)['url'][0] |
52ad14ae TT |
118 | |
119 | # Start extracting information | |
15ce1338 S |
120 | title = video_info.find('.//title').text |
121 | extension = video_info.find('.//movie_type').text | |
122 | video_format = extension.upper() | |
123 | thumbnail = video_info.find('.//thumbnail_url').text | |
124 | description = video_info.find('.//description').text | |
125 | upload_date = unified_strdate(video_info.find('.//first_retrieve').text.split('+')[0]) | |
126 | view_count = int_or_none(video_info.find('.//view_counter').text) | |
127 | comment_count = int_or_none(video_info.find('.//comment_num').text) | |
128 | duration = parse_duration(video_info.find('.//length').text) | |
129 | webpage_url = video_info.find('.//watch_url').text | |
130 | ||
64ed7a38 | 131 | if video_info.find('.//ch_id') is not None: |
15ce1338 S |
132 | uploader_id = video_info.find('.//ch_id').text |
133 | uploader = video_info.find('.//ch_name').text | |
64ed7a38 | 134 | elif video_info.find('.//user_id') is not None: |
15ce1338 S |
135 | uploader_id = video_info.find('.//user_id').text |
136 | uploader = video_info.find('.//user_nickname').text | |
137 | else: | |
138 | uploader_id = uploader = None | |
52ad14ae TT |
139 | |
140 | return { | |
214c22c7 JMF |
141 | 'id': video_id, |
142 | 'url': video_real_url, | |
15ce1338 S |
143 | 'title': title, |
144 | 'ext': extension, | |
214c22c7 | 145 | 'format': video_format, |
15ce1338 S |
146 | 'thumbnail': thumbnail, |
147 | 'description': description, | |
148 | 'uploader': uploader, | |
149 | 'upload_date': upload_date, | |
150 | 'uploader_id': uploader_id, | |
151 | 'view_count': view_count, | |
152 | 'comment_count': comment_count, | |
153 | 'duration': duration, | |
154 | 'webpage_url': webpage_url, | |
52ad14ae | 155 | } |
a9bad429 JMF |
156 | |
157 | ||
158 | class NiconicoPlaylistIE(InfoExtractor): | |
159 | _VALID_URL = r'https?://www\.nicovideo\.jp/mylist/(?P<id>\d+)' | |
160 | ||
161 | _TEST = { | |
162 | 'url': 'http://www.nicovideo.jp/mylist/27411728', | |
163 | 'info_dict': { | |
164 | 'id': '27411728', | |
165 | 'title': 'AKB48のオールナイトニッポン', | |
166 | }, | |
167 | 'playlist_mincount': 225, | |
168 | } | |
169 | ||
170 | def _real_extract(self, url): | |
171 | list_id = self._match_id(url) | |
172 | webpage = self._download_webpage(url, list_id) | |
173 | ||
174 | entries_json = self._search_regex(r'Mylist\.preload\(\d+, (\[.*\])\);', | |
9e1a5b84 | 175 | webpage, 'entries') |
a9bad429 JMF |
176 | entries = json.loads(entries_json) |
177 | entries = [{ | |
178 | '_type': 'url', | |
179 | 'ie_key': NiconicoIE.ie_key(), | |
2fdbf27a | 180 | 'url': ('http://www.nicovideo.jp/watch/%s' % |
9e1a5b84 | 181 | entry['item_data']['video_id']), |
a9bad429 JMF |
182 | } for entry in entries] |
183 | ||
184 | return { | |
185 | '_type': 'playlist', | |
186 | 'title': self._search_regex(r'\s+name: "(.*?)"', webpage, 'title'), | |
187 | 'id': list_id, | |
188 | 'entries': entries, | |
189 | } |