]>
Commit | Line | Data |
---|---|---|
e1f6e61e | 1 | from .common import InfoExtractor |
1b77ee62 | 2 | from ..utils import js_to_json |
84c92dc0 | 3 | |
e1f6e61e JMF |
4 | |
5 | class C56IE(InfoExtractor): | |
4a419b88 | 6 | _VALID_URL = r'https?://(?:(?:www|player)\.)?56\.com/(?:.+?/)?(?:v_|(?:play_album.+-))(?P<textid>.+?)\.(?:html|swf)' |
84c92dc0 | 7 | IE_NAME = '56.com' |
1b77ee62 | 8 | _TESTS = [{ |
84c92dc0 | 9 | 'url': 'http://www.56.com/u39/v_OTM0NDA3MTY.html', |
84c92dc0 PH |
10 | 'md5': 'e59995ac63d0457783ea05f93f12a866', |
11 | 'info_dict': { | |
4a419b88 S |
12 | 'id': '93440716', |
13 | 'ext': 'flv', | |
84c92dc0 | 14 | 'title': '网事知多少 第32期:车怒', |
4a419b88 | 15 | 'duration': 283.813, |
e1f6e61e | 16 | }, |
1b77ee62 YCH |
17 | }, { |
18 | 'url': 'http://www.56.com/u47/v_MTM5NjQ5ODc2.html', | |
19 | 'md5': '', | |
20 | 'info_dict': { | |
21 | 'id': '82247482', | |
22 | 'title': '爱的诅咒之杜鹃花开', | |
23 | }, | |
24 | 'playlist_count': 7, | |
25 | 'add_ie': ['Sohu'], | |
26 | }] | |
e1f6e61e JMF |
27 | |
28 | def _real_extract(self, url): | |
5ad28e7f | 29 | mobj = self._match_valid_url(url) |
e1f6e61e | 30 | text_id = mobj.group('textid') |
4a419b88 | 31 | |
1b77ee62 YCH |
32 | webpage = self._download_webpage(url, text_id) |
33 | sohu_video_info_str = self._search_regex( | |
34 | r'var\s+sohuVideoInfo\s*=\s*({[^}]+});', webpage, 'Sohu video info', default=None) | |
35 | if sohu_video_info_str: | |
36 | sohu_video_info = self._parse_json( | |
37 | sohu_video_info_str, text_id, transform_source=js_to_json) | |
38 | return self.url_result(sohu_video_info['url'], 'Sohu') | |
39 | ||
4a419b88 S |
40 | page = self._download_json( |
41 | 'http://vxml.56.com/json/%s/' % text_id, text_id, 'Downloading video info') | |
42 | ||
43 | info = page['info'] | |
44 | ||
45 | formats = [ | |
46 | { | |
47 | 'format_id': f['type'], | |
48 | 'filesize': int(f['filesize']), | |
49 | 'url': f['url'] | |
50 | } for f in info['rfiles'] | |
51 | ] | |
e1f6e61e | 52 | |
84c92dc0 PH |
53 | return { |
54 | 'id': info['vid'], | |
55 | 'title': info['Subject'], | |
4a419b88 | 56 | 'duration': int(info['duration']) / 1000.0, |
84c92dc0 PH |
57 | 'formats': formats, |
58 | 'thumbnail': info.get('bimg') or info.get('img'), | |
59 | } |