]>
Commit | Line | Data |
---|---|---|
24a267b5 JMF |
1 | # coding: utf-8 |
2 | ||
8bdfddf6 PH |
3 | from __future__ import unicode_literals |
4 | ||
9caa687d | 5 | from .common import InfoExtractor |
b264c213 | 6 | from ..compat import compat_str |
40cf7fcb | 7 | from ..utils import ( |
5b012dfc | 8 | ExtractorError, |
40cf7fcb | 9 | int_or_none, |
664bcd80 | 10 | InAdvancePagedList, |
40cf7fcb | 11 | float_or_none, |
12 | unescapeHTML, | |
13 | ) | |
9caa687d YK |
14 | |
15 | ||
16 | class TudouIE(InfoExtractor): | |
40cf7fcb | 17 | IE_NAME = 'tudou' |
18 | _VALID_URL = r'https?://(?:www\.)?tudou\.com/(?:(?:programs|wlplay)/view|(?:listplay|albumplay)/[\w-]{11})/(?P<id>[\w-]{11})' | |
9ed3bdc6 | 19 | _TESTS = [{ |
8bdfddf6 PH |
20 | 'url': 'http://www.tudou.com/listplay/zzdE77v6Mmo/2xN2duXMxmw.html', |
21 | 'md5': '140a49ed444bd22f93330985d8475fcb', | |
22 | 'info_dict': { | |
23 | 'id': '159448201', | |
24 | 'ext': 'f4v', | |
25 | 'title': '卡马乔国足开大脚长传冲吊集锦', | |
26 | 'thumbnail': 're:^https?://.*\.jpg$', | |
40cf7fcb | 27 | 'timestamp': 1372113489000, |
28 | 'description': '卡马乔卡家军,开大脚先进战术不完全集锦!', | |
29 | 'duration': 289.04, | |
30 | 'view_count': int, | |
31 | 'filesize': int, | |
6f5ac90c | 32 | } |
a8be56ce PH |
33 | }, { |
34 | 'url': 'http://www.tudou.com/programs/view/ajX3gyhL0pc/', | |
35 | 'info_dict': { | |
36 | 'id': '117049447', | |
37 | 'ext': 'f4v', | |
38 | 'title': 'La Sylphide-Bolshoi-Ekaterina Krysanova & Vyacheslav Lopatin 2012', | |
39 | 'thumbnail': 're:^https?://.*\.jpg$', | |
40cf7fcb | 40 | 'timestamp': 1349207518000, |
41 | 'description': 'md5:294612423894260f2dcd5c6c04fe248b', | |
42 | 'duration': 5478.33, | |
43 | 'view_count': int, | |
44 | 'filesize': int, | |
a8be56ce | 45 | } |
9ed3bdc6 | 46 | }] |
9caa687d | 47 | |
c71a3195 | 48 | _PLAYER_URL = 'http://js.tudouui.com/bin/lingtong/PortalPlayer_177.swf' |
23875575 | 49 | |
5b012dfc YCH |
50 | # Translated from tudou/tools/TVCHelper.as in PortalPlayer_193.swf |
51 | # 0001, 0002 and 4001 are not included as they indicate temporary issues | |
52 | TVC_ERRORS = { | |
53 | '0003': 'The video is deleted or does not exist', | |
54 | '1001': 'This video is unavailable due to licensing issues', | |
55 | '1002': 'This video is unavailable as it\'s under review', | |
56 | '1003': 'This video is unavailable as it\'s under review', | |
57 | '3001': 'Password required', | |
58 | '5001': 'This video is available in Mainland China only due to licensing issues', | |
59 | '7001': 'This video is unavailable', | |
60 | '8001': 'This video is unavailable due to licensing issues', | |
61 | } | |
62 | ||
aab13551 | 63 | def _url_for_id(self, video_id, quality=None): |
b264c213 | 64 | info_url = 'http://v2.tudou.com/f?id=' + compat_str(video_id) |
24a267b5 JMF |
65 | if quality: |
66 | info_url += '&hd' + quality | |
611c1dd9 | 67 | xml_data = self._download_xml(info_url, video_id, 'Opening the info XML page') |
bec47a07 YCH |
68 | error = xml_data.attrib.get('error') |
69 | if error is not None: | |
70 | raise ExtractorError('Tudou said: %s' % error, expected=True) | |
87813a85 | 71 | final_url = xml_data.text |
24a267b5 JMF |
72 | return final_url |
73 | ||
9caa687d | 74 | def _real_extract(self, url): |
92b065dc | 75 | video_id = self._match_id(url) |
40cf7fcb | 76 | item_data = self._download_json( |
77 | 'http://www.tudou.com/tvp/getItemInfo.action?ic=%s' % video_id, video_id) | |
9ed3bdc6 | 78 | |
40cf7fcb | 79 | youku_vcode = item_data.get('vcode') |
141ba369 YCH |
80 | if youku_vcode: |
81 | return self.url_result('youku:' + youku_vcode, ie='Youku') | |
9ed3bdc6 | 82 | |
5b012dfc YCH |
83 | if not item_data.get('itemSegs'): |
84 | tvc_code = item_data.get('tvcCode') | |
85 | if tvc_code: | |
86 | err_msg = self.TVC_ERRORS.get(tvc_code) | |
87 | if err_msg: | |
88 | raise ExtractorError('Tudou said: %s' % err_msg, expected=True) | |
89 | raise ExtractorError('Unexpected error %s returned from Tudou' % tvc_code) | |
90 | raise ExtractorError('Unxpected error returned from Tudou') | |
91 | ||
40cf7fcb | 92 | title = unescapeHTML(item_data['kw']) |
93 | description = item_data.get('desc') | |
94 | thumbnail_url = item_data.get('pic') | |
95 | view_count = int_or_none(item_data.get('playTimes')) | |
96 | timestamp = int_or_none(item_data.get('pt')) | |
23875575 | 97 | |
40cf7fcb | 98 | segments = self._parse_json(item_data['itemSegs'], video_id) |
24a267b5 JMF |
99 | # It looks like the keys are the arguments that have to be passed as |
100 | # the hd field in the request url, we pick the higher | |
ca9cd290 | 101 | # Also, filter non-number qualities (see issue #3643). |
f931e259 NJ |
102 | quality = sorted(filter(lambda k: k.isdigit(), segments.keys()), |
103 | key=lambda k: int(k))[-1] | |
24a267b5 | 104 | parts = segments[quality] |
24a267b5 JMF |
105 | len_parts = len(parts) |
106 | if len_parts > 1: | |
8865bdeb | 107 | self.to_screen('%s: found %s parts' % (video_id, len_parts)) |
664bcd80 YCH |
108 | |
109 | def part_func(partnum): | |
110 | part = parts[partnum] | |
24a267b5 JMF |
111 | part_id = part['k'] |
112 | final_url = self._url_for_id(part_id, quality) | |
113 | ext = (final_url.split('?')[0]).split('.')[-1] | |
664bcd80 | 114 | return [{ |
8bdfddf6 PH |
115 | 'id': '%s' % part_id, |
116 | 'url': final_url, | |
117 | 'ext': ext, | |
118 | 'title': title, | |
119 | 'thumbnail': thumbnail_url, | |
40cf7fcb | 120 | 'description': description, |
121 | 'view_count': view_count, | |
122 | 'timestamp': timestamp, | |
123 | 'duration': float_or_none(part.get('seconds'), 1000), | |
124 | 'filesize': int_or_none(part.get('size')), | |
c71a3195 | 125 | 'http_headers': { |
40cf7fcb | 126 | 'Referer': self._PLAYER_URL, |
c71a3195 | 127 | }, |
664bcd80 YCH |
128 | }] |
129 | ||
130 | entries = InAdvancePagedList(part_func, len_parts, 1) | |
24a267b5 | 131 | |
92b065dc PH |
132 | return { |
133 | '_type': 'multi_video', | |
664bcd80 | 134 | 'entries': entries, |
92b065dc PH |
135 | 'id': video_id, |
136 | 'title': title, | |
137 | } | |
40cf7fcb | 138 | |
139 | ||
140 | class TudouPlaylistIE(InfoExtractor): | |
141 | IE_NAME = 'tudou:playlist' | |
142 | _VALID_URL = r'https?://(?:www\.)?tudou\.com/listplay/(?P<id>[\w-]{11})\.html' | |
143 | _TESTS = [{ | |
144 | 'url': 'http://www.tudou.com/listplay/zzdE77v6Mmo.html', | |
145 | 'info_dict': { | |
146 | 'id': 'zzdE77v6Mmo', | |
147 | }, | |
148 | 'playlist_mincount': 209, | |
149 | }] | |
150 | ||
151 | def _real_extract(self, url): | |
152 | playlist_id = self._match_id(url) | |
153 | playlist_data = self._download_json( | |
154 | 'http://www.tudou.com/tvp/plist.action?lcode=%s' % playlist_id, playlist_id) | |
155 | entries = [self.url_result( | |
156 | 'http://www.tudou.com/programs/view/%s' % item['icode'], | |
157 | 'Tudou', item['icode'], | |
158 | item['kw']) for item in playlist_data['items']] | |
159 | return self.playlist_result(entries, playlist_id) | |
160 | ||
161 | ||
162 | class TudouAlbumIE(InfoExtractor): | |
163 | IE_NAME = 'tudou:album' | |
164 | _VALID_URL = r'https?://(?:www\.)?tudou\.com/album(?:cover|play)/(?P<id>[\w-]{11})' | |
165 | _TESTS = [{ | |
166 | 'url': 'http://www.tudou.com/albumplay/v5qckFJvNJg.html', | |
167 | 'info_dict': { | |
168 | 'id': 'v5qckFJvNJg', | |
169 | }, | |
170 | 'playlist_mincount': 45, | |
171 | }] | |
172 | ||
173 | def _real_extract(self, url): | |
174 | album_id = self._match_id(url) | |
175 | album_data = self._download_json( | |
176 | 'http://www.tudou.com/tvp/alist.action?acode=%s' % album_id, album_id) | |
177 | entries = [self.url_result( | |
178 | 'http://www.tudou.com/programs/view/%s' % item['icode'], | |
179 | 'Tudou', item['icode'], | |
180 | item['kw']) for item in album_data['items']] | |
181 | return self.playlist_result(entries, album_id) |