]>
Commit | Line | Data |
---|---|---|
28746fbd PH |
1 | # coding: utf-8 |
2 | from __future__ import unicode_literals | |
3 | ||
520e7533 | 4 | import re |
28746fbd PH |
5 | |
6 | from .common import InfoExtractor | |
6612a349 | 7 | from ..compat import compat_str |
28746fbd | 8 | from ..utils import ( |
28746fbd | 9 | int_or_none, |
520e7533 | 10 | unescapeHTML, |
24e21613 | 11 | ExtractorError, |
76ab842d | 12 | xpath_text, |
28746fbd PH |
13 | ) |
14 | ||
15 | ||
16 | class BiliBiliIE(InfoExtractor): | |
5886b38d | 17 | _VALID_URL = r'https?://www\.bilibili\.(?:tv|com)/video/av(?P<id>\d+)(?:/index_(?P<page_num>\d+).html)?' |
28746fbd | 18 | |
c4a21bc9 | 19 | _TESTS = [{ |
28746fbd PH |
20 | 'url': 'http://www.bilibili.tv/video/av1074402/', |
21 | 'md5': '2c301e4dab317596e837c3e7633e7d86', | |
22 | 'info_dict': { | |
d90e4030 | 23 | 'id': '1554319', |
28746fbd PH |
24 | 'ext': 'flv', |
25 | 'title': '【金坷垃】金泡沫', | |
d90e4030 | 26 | 'duration': 308313, |
28746fbd PH |
27 | 'upload_date': '20140420', |
28 | 'thumbnail': 're:^https?://.+\.jpg', | |
d90e4030 | 29 | 'description': 'md5:ce18c2a2d2193f0df2917d270f2e5923', |
30 | 'timestamp': 1397983878, | |
31 | 'uploader': '菊子桑', | |
28746fbd | 32 | }, |
c4a21bc9 YCH |
33 | }, { |
34 | 'url': 'http://www.bilibili.com/video/av1041170/', | |
35 | 'info_dict': { | |
36 | 'id': '1041170', | |
37 | 'title': '【BD1080P】刀语【诸神&异域】', | |
520e7533 | 38 | 'description': '这是个神奇的故事~每个人不留弹幕不给走哦~切利哦!~', |
39 | 'uploader': '枫叶逝去', | |
40 | 'timestamp': 1396501299, | |
c4a21bc9 | 41 | }, |
520e7533 | 42 | 'playlist_count': 9, |
c4a21bc9 | 43 | }] |
28746fbd | 44 | |
520e7533 | 45 | def _real_extract(self, url): |
46 | mobj = re.match(self._VALID_URL, url) | |
47 | video_id = mobj.group('id') | |
48 | page_num = mobj.group('page_num') or '1' | |
49 | ||
50 | view_data = self._download_json( | |
51 | 'http://api.bilibili.com/view?type=json&appkey=8e9fc618fbd41e28&id=%s&page=%s' % (video_id, page_num), | |
52 | video_id) | |
53 | if 'error' in view_data: | |
54 | raise ExtractorError('%s said: %s' % (self.IE_NAME, view_data['error']), expected=True) | |
55 | ||
56 | cid = view_data['cid'] | |
57 | title = unescapeHTML(view_data['title']) | |
c4a21bc9 | 58 | |
76ab842d | 59 | doc = self._download_xml( |
d90e4030 | 60 | 'http://interface.bilibili.com/v_cdn_play?appkey=8e9fc618fbd41e28&cid=%s' % cid, |
61 | cid, | |
520e7533 | 62 | 'Downloading page %s/%s' % (page_num, view_data['pages']) |
58a84b8c | 63 | ) |
6d00a2dc | 64 | |
76ab842d | 65 | if xpath_text(doc, './result') == 'error': |
66 | raise ExtractorError('%s said: %s' % (self.IE_NAME, xpath_text(doc, './message')), expected=True) | |
28746fbd | 67 | |
d90e4030 | 68 | entries = [] |
c4a21bc9 | 69 | |
497f5fd9 | 70 | for durl in doc.findall('./durl'): |
76ab842d | 71 | size = xpath_text(durl, ['./filesize', './size']) |
497f5fd9 | 72 | formats = [{ |
55af2b26 | 73 | 'url': durl.find('./url').text, |
76ab842d | 74 | 'filesize': int_or_none(size), |
55af2b26 | 75 | 'ext': 'flv', |
497f5fd9 | 76 | }] |
77 | backup_urls = durl.find('./backup_url') | |
78 | if backup_urls is not None: | |
79 | for backup_url in backup_urls.findall('./url'): | |
80 | formats.append({'url': backup_url.text}) | |
81 | formats.reverse() | |
82 | ||
c4a21bc9 | 83 | entries.append({ |
76ab842d | 84 | 'id': '%s_part%s' % (cid, xpath_text(durl, './order')), |
c4a21bc9 | 85 | 'title': title, |
76ab842d | 86 | 'duration': int_or_none(xpath_text(durl, './length'), 1000), |
55af2b26 | 87 | 'formats': formats, |
58a84b8c | 88 | }) |
28746fbd | 89 | |
d90e4030 | 90 | info = { |
76ab842d | 91 | 'id': compat_str(cid), |
d90e4030 | 92 | 'title': title, |
93 | 'description': view_data.get('description'), | |
94 | 'thumbnail': view_data.get('pic'), | |
95 | 'uploader': view_data.get('author'), | |
96 | 'timestamp': int_or_none(view_data.get('created')), | |
6612a349 | 97 | 'view_count': int_or_none(view_data.get('play')), |
76ab842d | 98 | 'duration': int_or_none(xpath_text(doc, './timelength')), |
28746fbd | 99 | } |
d90e4030 | 100 | |
101 | if len(entries) == 1: | |
102 | entries[0].update(info) | |
103 | return entries[0] | |
104 | else: | |
105 | info.update({ | |
106 | '_type': 'multi_video', | |
520e7533 | 107 | 'id': video_id, |
d90e4030 | 108 | 'entries': entries, |
109 | }) | |
110 | return info |