]>
Commit | Line | Data |
---|---|---|
6b95b065 JMF |
1 | # encoding: utf-8 |
2 | import re | |
3 | import xml.etree.ElementTree | |
4 | ||
5 | from .common import InfoExtractor | |
6 | from ..utils import ( | |
7 | compat_urllib_parse, | |
8 | ExtractorError, | |
9 | ) | |
10 | ||
11 | ||
12 | class NaverIE(InfoExtractor): | |
13 | _VALID_URL = r'https?://tvcast\.naver\.com/v/(?P<id>\d+)' | |
14 | ||
15 | _TEST = { | |
16 | u'url': u'http://tvcast.naver.com/v/81652', | |
17 | u'file': u'81652.mp4', | |
18 | u'info_dict': { | |
19 | u'title': u'[9월 모의고사 해설강의][수학_김상희] 수학 A형 16~20번', | |
20 | u'description': u'합격불변의 법칙 메가스터디 | 메가스터디 수학 김상희 선생님이 9월 모의고사 수학A형 16번에서 20번까지 해설강의를 공개합니다.', | |
21 | u'upload_date': u'20130903', | |
22 | }, | |
23 | } | |
24 | ||
25 | def _real_extract(self, url): | |
26 | mobj = re.match(self._VALID_URL, url) | |
27 | video_id = mobj.group(1) | |
28 | webpage = self._download_webpage(url, video_id) | |
29 | m_id = re.search(r'var rmcPlayer = new nhn.rmcnmv.RMCVideoPlayer\("(.+?)", "(.+?)"', | |
30 | webpage) | |
31 | if m_id is None: | |
32 | raise ExtractorError(u'couldn\'t extract vid and key') | |
33 | vid = m_id.group(1) | |
34 | key = m_id.group(2) | |
35 | query = compat_urllib_parse.urlencode({'vid': vid, 'inKey': key,}) | |
36 | query_urls = compat_urllib_parse.urlencode({ | |
37 | 'masterVid': vid, | |
38 | 'protocol': 'p2p', | |
39 | 'inKey': key, | |
40 | }) | |
41 | info_xml = self._download_webpage( | |
42 | 'http://serviceapi.rmcnmv.naver.com/flash/videoInfo.nhn?' + query, | |
43 | video_id, u'Downloading video info') | |
44 | urls_xml = self._download_webpage( | |
45 | 'http://serviceapi.rmcnmv.naver.com/flash/playableEncodingOption.nhn?' + query_urls, | |
46 | video_id, u'Downloading video formats info') | |
47 | info = xml.etree.ElementTree.fromstring(info_xml.encode('utf-8')) | |
48 | urls = xml.etree.ElementTree.fromstring(urls_xml.encode('utf-8')) | |
49 | ||
50 | formats = [] | |
51 | for format_el in urls.findall('EncodingOptions/EncodingOption'): | |
52 | domain = format_el.find('Domain').text | |
53 | if domain.startswith('rtmp'): | |
54 | continue | |
55 | formats.append({ | |
56 | 'url': domain + format_el.find('uri').text, | |
57 | 'ext': 'mp4', | |
58 | 'width': int(format_el.find('width').text), | |
59 | 'height': int(format_el.find('height').text), | |
60 | }) | |
61 | ||
62 | info = { | |
63 | 'id': video_id, | |
64 | 'title': info.find('Subject').text, | |
65 | 'formats': formats, | |
66 | 'description': self._og_search_description(webpage), | |
67 | 'thumbnail': self._og_search_thumbnail(webpage), | |
68 | 'upload_date': info.find('WriteDate').text.replace('.', ''), | |
69 | 'view_count': int(info.find('PlayCount').text), | |
70 | } | |
71 | # TODO: Remove when #980 has been merged | |
72 | info.update(formats[-1]) | |
73 | return info |