]>
Commit | Line | Data |
---|---|---|
1 | # encoding: utf-8 | |
2 | from __future__ import unicode_literals | |
3 | ||
4 | import re | |
5 | ||
6 | from .common import InfoExtractor | |
7 | from ..utils import ( | |
8 | compat_urllib_parse, | |
9 | ExtractorError, | |
10 | ) | |
11 | ||
12 | ||
13 | class NaverIE(InfoExtractor): | |
14 | _VALID_URL = r'https?://(?:m\.)?tvcast\.naver\.com/v/(?P<id>\d+)' | |
15 | ||
16 | _TEST = { | |
17 | 'url': 'http://tvcast.naver.com/v/81652', | |
18 | 'info_dict': { | |
19 | 'id': '81652', | |
20 | 'ext': 'mp4', | |
21 | 'title': '[9월 모의고사 해설강의][수학_김상희] 수학 A형 16~20번', | |
22 | 'description': '합격불변의 법칙 메가스터디 | 메가스터디 수학 김상희 선생님이 9월 모의고사 수학A형 16번에서 20번까지 해설강의를 공개합니다.', | |
23 | 'upload_date': '20130903', | |
24 | }, | |
25 | } | |
26 | ||
27 | def _real_extract(self, url): | |
28 | mobj = re.match(self._VALID_URL, url) | |
29 | video_id = mobj.group(1) | |
30 | webpage = self._download_webpage(url, video_id) | |
31 | m_id = re.search(r'var rmcPlayer = new nhn.rmcnmv.RMCVideoPlayer\("(.+?)", "(.+?)"', | |
32 | webpage) | |
33 | if m_id is None: | |
34 | raise ExtractorError('couldn\'t extract vid and key') | |
35 | vid = m_id.group(1) | |
36 | key = m_id.group(2) | |
37 | query = compat_urllib_parse.urlencode({'vid': vid, 'inKey': key,}) | |
38 | query_urls = compat_urllib_parse.urlencode({ | |
39 | 'masterVid': vid, | |
40 | 'protocol': 'p2p', | |
41 | 'inKey': key, | |
42 | }) | |
43 | info = self._download_xml( | |
44 | 'http://serviceapi.rmcnmv.naver.com/flash/videoInfo.nhn?' + query, | |
45 | video_id, 'Downloading video info') | |
46 | urls = self._download_xml( | |
47 | 'http://serviceapi.rmcnmv.naver.com/flash/playableEncodingOption.nhn?' + query_urls, | |
48 | video_id, 'Downloading video formats info') | |
49 | ||
50 | formats = [] | |
51 | for format_el in urls.findall('EncodingOptions/EncodingOption'): | |
52 | domain = format_el.find('Domain').text | |
53 | f = { | |
54 | 'url': domain + format_el.find('uri').text, | |
55 | 'ext': 'mp4', | |
56 | 'width': int(format_el.find('width').text), | |
57 | 'height': int(format_el.find('height').text), | |
58 | } | |
59 | if domain.startswith('rtmp'): | |
60 | f.update({ | |
61 | 'ext': 'flv', | |
62 | 'rtmp_protocol': '1', # rtmpt | |
63 | }) | |
64 | formats.append(f) | |
65 | self._sort_formats(formats) | |
66 | ||
67 | return { | |
68 | 'id': video_id, | |
69 | 'title': info.find('Subject').text, | |
70 | 'formats': formats, | |
71 | 'description': self._og_search_description(webpage), | |
72 | 'thumbnail': self._og_search_thumbnail(webpage), | |
73 | 'upload_date': info.find('WriteDate').text.replace('.', ''), | |
74 | 'view_count': int(info.find('PlayCount').text), | |
75 | } |