]>
Commit | Line | Data |
---|---|---|
1 | # coding: utf-8 | |
2 | ||
3 | from __future__ import unicode_literals | |
4 | ||
5 | import re | |
6 | import json | |
7 | ||
8 | from .common import InfoExtractor | |
9 | ||
10 | ||
11 | class TudouIE(InfoExtractor): | |
12 | _VALID_URL = r'https?://(?:www\.)?tudou\.com/(?:listplay|programs(?:/view)?|albumplay)/.*?/(?P<id>[^/?#]+?)(?:\.html)?/?(?:$|[?#])' | |
13 | _TESTS = [{ | |
14 | 'url': 'http://www.tudou.com/listplay/zzdE77v6Mmo/2xN2duXMxmw.html', | |
15 | 'md5': '140a49ed444bd22f93330985d8475fcb', | |
16 | 'info_dict': { | |
17 | 'id': '159448201', | |
18 | 'ext': 'f4v', | |
19 | 'title': '卡马乔国足开大脚长传冲吊集锦', | |
20 | 'thumbnail': 're:^https?://.*\.jpg$', | |
21 | } | |
22 | }, { | |
23 | 'url': 'http://www.tudou.com/programs/view/ajX3gyhL0pc/', | |
24 | 'info_dict': { | |
25 | 'id': '117049447', | |
26 | 'ext': 'f4v', | |
27 | 'title': 'La Sylphide-Bolshoi-Ekaterina Krysanova & Vyacheslav Lopatin 2012', | |
28 | 'thumbnail': 're:^https?://.*\.jpg$', | |
29 | } | |
30 | }] | |
31 | ||
32 | _PLAYER_URL = 'http://js.tudouui.com/bin/lingtong/PortalPlayer_177.swf' | |
33 | ||
34 | def _url_for_id(self, id, quality=None): | |
35 | info_url = "http://v2.tudou.com/f?id=" + str(id) | |
36 | if quality: | |
37 | info_url += '&hd' + quality | |
38 | webpage = self._download_webpage(info_url, id, "Opening the info webpage") | |
39 | final_url = self._html_search_regex('>(.+?)</f>', webpage, 'video url') | |
40 | return final_url | |
41 | ||
42 | def _real_extract(self, url): | |
43 | video_id = self._match_id(url) | |
44 | webpage = self._download_webpage(url, video_id) | |
45 | ||
46 | m = re.search(r'vcode:\s*[\'"](.+?)[\'"]', webpage) | |
47 | if m and m.group(1): | |
48 | return { | |
49 | '_type': 'url', | |
50 | 'url': 'youku:' + m.group(1), | |
51 | 'ie_key': 'Youku' | |
52 | } | |
53 | ||
54 | title = self._search_regex( | |
55 | r",kw:\s*['\"](.+?)[\"']", webpage, 'title') | |
56 | thumbnail_url = self._search_regex( | |
57 | r",pic:\s*[\"'](.+?)[\"']", webpage, 'thumbnail URL', fatal=False) | |
58 | ||
59 | player_url = self._search_regex( | |
60 | r"playerUrl\s*:\s*['\"](.+?\.swf)[\"']", | |
61 | webpage, 'player URL', default=self._PLAYER_URL) | |
62 | ||
63 | segs_json = self._search_regex(r'segs: \'(.*)\'', webpage, 'segments') | |
64 | segments = json.loads(segs_json) | |
65 | # It looks like the keys are the arguments that have to be passed as | |
66 | # the hd field in the request url, we pick the higher | |
67 | # Also, filter non-number qualities (see issue #3643). | |
68 | quality = sorted(filter(lambda k: k.isdigit(), segments.keys()), | |
69 | key=lambda k: int(k))[-1] | |
70 | parts = segments[quality] | |
71 | result = [] | |
72 | len_parts = len(parts) | |
73 | if len_parts > 1: | |
74 | self.to_screen('%s: found %s parts' % (video_id, len_parts)) | |
75 | for part in parts: | |
76 | part_id = part['k'] | |
77 | final_url = self._url_for_id(part_id, quality) | |
78 | ext = (final_url.split('?')[0]).split('.')[-1] | |
79 | part_info = { | |
80 | 'id': '%s' % part_id, | |
81 | 'url': final_url, | |
82 | 'ext': ext, | |
83 | 'title': title, | |
84 | 'thumbnail': thumbnail_url, | |
85 | 'http_headers': { | |
86 | 'Referer': player_url, | |
87 | }, | |
88 | } | |
89 | result.append(part_info) | |
90 | ||
91 | return { | |
92 | '_type': 'multi_video', | |
93 | 'entries': result, | |
94 | 'id': video_id, | |
95 | 'title': title, | |
96 | } |