]>
Commit | Line | Data |
---|---|---|
24a267b5 JMF |
1 | # coding: utf-8 |
2 | ||
8bdfddf6 PH |
3 | from __future__ import unicode_literals |
4 | ||
9caa687d | 5 | import re |
24a267b5 | 6 | import json |
9caa687d YK |
7 | |
8 | from .common import InfoExtractor | |
9 | ||
10 | ||
11 | class TudouIE(InfoExtractor): | |
92b065dc | 12 | _VALID_URL = r'https?://(?:www\.)?tudou\.com/(?:listplay|programs(?:/view)?|albumplay)/.*?/(?P<id>[^/?#]+?)(?:\.html)?/?(?:$|[?#])' |
9ed3bdc6 | 13 | _TESTS = [{ |
8bdfddf6 PH |
14 | 'url': 'http://www.tudou.com/listplay/zzdE77v6Mmo/2xN2duXMxmw.html', |
15 | 'md5': '140a49ed444bd22f93330985d8475fcb', | |
16 | 'info_dict': { | |
17 | 'id': '159448201', | |
18 | 'ext': 'f4v', | |
19 | 'title': '卡马乔国足开大脚长传冲吊集锦', | |
20 | 'thumbnail': 're:^https?://.*\.jpg$', | |
6f5ac90c | 21 | } |
a8be56ce PH |
22 | }, { |
23 | 'url': 'http://www.tudou.com/programs/view/ajX3gyhL0pc/', | |
24 | 'info_dict': { | |
25 | 'id': '117049447', | |
26 | 'ext': 'f4v', | |
27 | 'title': 'La Sylphide-Bolshoi-Ekaterina Krysanova & Vyacheslav Lopatin 2012', | |
28 | 'thumbnail': 're:^https?://.*\.jpg$', | |
29 | } | |
9ed3bdc6 | 30 | }] |
9caa687d | 31 | |
c71a3195 | 32 | _PLAYER_URL = 'http://js.tudouui.com/bin/lingtong/PortalPlayer_177.swf' |
23875575 | 33 | |
b74e86f4 | 34 | def _url_for_id(self, id, quality=None): |
2514d263 | 35 | info_url = "http://v2.tudou.com/f?id=" + str(id) |
24a267b5 JMF |
36 | if quality: |
37 | info_url += '&hd' + quality | |
38 | webpage = self._download_webpage(info_url, id, "Opening the info webpage") | |
5f6a1245 | 39 | final_url = self._html_search_regex('>(.+?)</f>', webpage, 'video url') |
24a267b5 JMF |
40 | return final_url |
41 | ||
9caa687d | 42 | def _real_extract(self, url): |
92b065dc | 43 | video_id = self._match_id(url) |
9caa687d | 44 | webpage = self._download_webpage(url, video_id) |
9ed3bdc6 PH |
45 | |
46 | m = re.search(r'vcode:\s*[\'"](.+?)[\'"]', webpage) | |
47 | if m and m.group(1): | |
48 | return { | |
49 | '_type': 'url', | |
8bdfddf6 | 50 | 'url': 'youku:' + m.group(1), |
9ed3bdc6 PH |
51 | 'ie_key': 'Youku' |
52 | } | |
53 | ||
7c58ef32 | 54 | title = self._search_regex( |
8bdfddf6 | 55 | r",kw:\s*['\"](.+?)[\"']", webpage, 'title') |
9ed3bdc6 | 56 | thumbnail_url = self._search_regex( |
8bdfddf6 | 57 | r",pic:\s*[\"'](.+?)[\"']", webpage, 'thumbnail URL', fatal=False) |
24a267b5 | 58 | |
23875575 S |
59 | player_url = self._search_regex( |
60 | r"playerUrl\s*:\s*['\"](.+?\.swf)[\"']", | |
61 | webpage, 'player URL', default=self._PLAYER_URL) | |
62 | ||
24a267b5 JMF |
63 | segs_json = self._search_regex(r'segs: \'(.*)\'', webpage, 'segments') |
64 | segments = json.loads(segs_json) | |
65 | # It looks like the keys are the arguments that have to be passed as | |
66 | # the hd field in the request url, we pick the higher | |
ca9cd290 | 67 | # Also, filter non-number qualities (see issue #3643). |
f931e259 NJ |
68 | quality = sorted(filter(lambda k: k.isdigit(), segments.keys()), |
69 | key=lambda k: int(k))[-1] | |
24a267b5 JMF |
70 | parts = segments[quality] |
71 | result = [] | |
72 | len_parts = len(parts) | |
73 | if len_parts > 1: | |
8865bdeb | 74 | self.to_screen('%s: found %s parts' % (video_id, len_parts)) |
24a267b5 JMF |
75 | for part in parts: |
76 | part_id = part['k'] | |
77 | final_url = self._url_for_id(part_id, quality) | |
78 | ext = (final_url.split('?')[0]).split('.')[-1] | |
8bdfddf6 PH |
79 | part_info = { |
80 | 'id': '%s' % part_id, | |
81 | 'url': final_url, | |
82 | 'ext': ext, | |
83 | 'title': title, | |
84 | 'thumbnail': thumbnail_url, | |
c71a3195 | 85 | 'http_headers': { |
23875575 | 86 | 'Referer': player_url, |
c71a3195 | 87 | }, |
8bdfddf6 | 88 | } |
24a267b5 JMF |
89 | result.append(part_info) |
90 | ||
92b065dc PH |
91 | return { |
92 | '_type': 'multi_video', | |
93 | 'entries': result, | |
94 | 'id': video_id, | |
95 | 'title': title, | |
96 | } |