]>
Commit | Line | Data |
---|---|---|
24a267b5 JMF |
1 | # coding: utf-8 |
2 | ||
8bdfddf6 PH |
3 | from __future__ import unicode_literals |
4 | ||
9caa687d | 5 | from .common import InfoExtractor |
b264c213 | 6 | from ..compat import compat_str |
9caa687d YK |
7 | |
8 | ||
9 | class TudouIE(InfoExtractor): | |
94e507ae | 10 | _VALID_URL = r'https?://(?:www\.)?tudou\.com/(?:listplay|programs(?:/view)?|albumplay)/([^/]+/)*(?P<id>[^/?#]+?)(?:\.html)?/?(?:$|[?#])' |
9ed3bdc6 | 11 | _TESTS = [{ |
8bdfddf6 PH |
12 | 'url': 'http://www.tudou.com/listplay/zzdE77v6Mmo/2xN2duXMxmw.html', |
13 | 'md5': '140a49ed444bd22f93330985d8475fcb', | |
14 | 'info_dict': { | |
15 | 'id': '159448201', | |
16 | 'ext': 'f4v', | |
17 | 'title': '卡马乔国足开大脚长传冲吊集锦', | |
18 | 'thumbnail': 're:^https?://.*\.jpg$', | |
6f5ac90c | 19 | } |
a8be56ce PH |
20 | }, { |
21 | 'url': 'http://www.tudou.com/programs/view/ajX3gyhL0pc/', | |
22 | 'info_dict': { | |
23 | 'id': '117049447', | |
24 | 'ext': 'f4v', | |
25 | 'title': 'La Sylphide-Bolshoi-Ekaterina Krysanova & Vyacheslav Lopatin 2012', | |
26 | 'thumbnail': 're:^https?://.*\.jpg$', | |
27 | } | |
41ebd653 YCH |
28 | }, { |
29 | 'url': 'http://www.tudou.com/albumplay/cJAHGih4yYg.html', | |
30 | 'only_matching': True, | |
9ed3bdc6 | 31 | }] |
9caa687d | 32 | |
c71a3195 | 33 | _PLAYER_URL = 'http://js.tudouui.com/bin/lingtong/PortalPlayer_177.swf' |
23875575 | 34 | |
aab13551 | 35 | def _url_for_id(self, video_id, quality=None): |
b264c213 | 36 | info_url = 'http://v2.tudou.com/f?id=' + compat_str(video_id) |
24a267b5 JMF |
37 | if quality: |
38 | info_url += '&hd' + quality | |
87813a85 YCH |
39 | xml_data = self._download_xml(info_url, video_id, "Opening the info XML page") |
40 | final_url = xml_data.text | |
24a267b5 JMF |
41 | return final_url |
42 | ||
9caa687d | 43 | def _real_extract(self, url): |
92b065dc | 44 | video_id = self._match_id(url) |
9caa687d | 45 | webpage = self._download_webpage(url, video_id) |
9ed3bdc6 | 46 | |
141ba369 | 47 | youku_vcode = self._search_regex( |
349b3a2e | 48 | r'vcode\s*:\s*[\'"]([^\'"]*)[\'"]', webpage, 'youku vcode', default=None) |
141ba369 YCH |
49 | if youku_vcode: |
50 | return self.url_result('youku:' + youku_vcode, ie='Youku') | |
9ed3bdc6 | 51 | |
7c58ef32 | 52 | title = self._search_regex( |
349b3a2e | 53 | r',kw\s*:\s*[\'"]([^\'"]+)[\'"]', webpage, 'title') |
9ed3bdc6 | 54 | thumbnail_url = self._search_regex( |
349b3a2e | 55 | r',pic\s*:\s*[\'"]([^\'"]+)[\'"]', webpage, 'thumbnail URL', fatal=False) |
24a267b5 | 56 | |
23875575 | 57 | player_url = self._search_regex( |
349b3a2e | 58 | r'playerUrl\s*:\s*[\'"]([^\'"]+\.swf)[\'"]', |
23875575 S |
59 | webpage, 'player URL', default=self._PLAYER_URL) |
60 | ||
141ba369 | 61 | segments = self._parse_json(self._search_regex( |
349b3a2e | 62 | r'segs: \'([^\']+)\'', webpage, 'segments'), video_id) |
24a267b5 JMF |
63 | # It looks like the keys are the arguments that have to be passed as |
64 | # the hd field in the request url, we pick the higher | |
ca9cd290 | 65 | # Also, filter non-number qualities (see issue #3643). |
f931e259 NJ |
66 | quality = sorted(filter(lambda k: k.isdigit(), segments.keys()), |
67 | key=lambda k: int(k))[-1] | |
24a267b5 JMF |
68 | parts = segments[quality] |
69 | result = [] | |
70 | len_parts = len(parts) | |
71 | if len_parts > 1: | |
8865bdeb | 72 | self.to_screen('%s: found %s parts' % (video_id, len_parts)) |
24a267b5 JMF |
73 | for part in parts: |
74 | part_id = part['k'] | |
75 | final_url = self._url_for_id(part_id, quality) | |
76 | ext = (final_url.split('?')[0]).split('.')[-1] | |
8bdfddf6 PH |
77 | part_info = { |
78 | 'id': '%s' % part_id, | |
79 | 'url': final_url, | |
80 | 'ext': ext, | |
81 | 'title': title, | |
82 | 'thumbnail': thumbnail_url, | |
c71a3195 | 83 | 'http_headers': { |
23875575 | 84 | 'Referer': player_url, |
c71a3195 | 85 | }, |
8bdfddf6 | 86 | } |
24a267b5 JMF |
87 | result.append(part_info) |
88 | ||
92b065dc PH |
89 | return { |
90 | '_type': 'multi_video', | |
91 | 'entries': result, | |
92 | 'id': video_id, | |
93 | 'title': title, | |
94 | } |