]>
Commit | Line | Data |
---|---|---|
150f2082 JMF |
1 | # encoding: utf-8 |
2 | import re | |
3 | import xml.etree.ElementTree | |
4 | ||
5 | from .common import InfoExtractor | |
6 | from ..utils import ( | |
7 | compat_urllib_parse, | |
8 | determine_ext, | |
9 | ) | |
10 | ||
11 | ||
12 | class DaumIE(InfoExtractor): | |
13 | _VALID_URL = r'https?://tvpot\.daum\.net/.*?clipid=(?P<id>\d+)' | |
14 | IE_NAME = u'daum.net' | |
15 | ||
16 | _TEST = { | |
17 | u'url': u'http://tvpot.daum.net/clip/ClipView.do?clipid=52554690', | |
18 | u'file': u'52554690.mp4', | |
19 | u'info_dict': { | |
20 | u'title': u'DOTA 2GETHER 시즌2 6회 - 2부', | |
9363169b | 21 | u'description': u'DOTA 2GETHER 시즌2 6회 - 2부', |
150f2082 JMF |
22 | u'upload_date': u'20130831', |
23 | u'duration': 3868, | |
24 | }, | |
25 | } | |
26 | ||
27 | def _real_extract(self, url): | |
28 | mobj = re.match(self._VALID_URL, url) | |
29 | video_id = mobj.group(1) | |
9363169b JMF |
30 | canonical_url = 'http://tvpot.daum.net/v/%s' % video_id |
31 | webpage = self._download_webpage(canonical_url, video_id) | |
150f2082 JMF |
32 | full_id = self._search_regex(r'<link rel="video_src" href=".+?vid=(.+?)"', |
33 | webpage, u'full id') | |
34 | query = compat_urllib_parse.urlencode({'vid': full_id}) | |
35 | info_xml = self._download_webpage( | |
36 | 'http://tvpot.daum.net/clip/ClipInfoXml.do?' + query, video_id, | |
37 | u'Downloading video info') | |
38 | urls_xml = self._download_webpage( | |
39 | 'http://videofarm.daum.net/controller/api/open/v1_2/MovieData.apixml?' + query, | |
40 | video_id, u'Downloading video formats info') | |
41 | info = xml.etree.ElementTree.fromstring(info_xml.encode('utf-8')) | |
42 | urls = xml.etree.ElementTree.fromstring(urls_xml.encode('utf-8')) | |
43 | ||
44 | self.to_screen(u'%s: Getting video urls' % video_id) | |
45 | formats = [] | |
46 | for format_el in urls.findall('result/output_list/output_list'): | |
47 | profile = format_el.attrib['profile'] | |
48 | format_query = compat_urllib_parse.urlencode({ | |
49 | 'vid': full_id, | |
50 | 'profile': profile, | |
51 | }) | |
52 | url_xml = self._download_webpage( | |
53 | 'http://videofarm.daum.net/controller/api/open/v1_2/MovieLocation.apixml?' + format_query, | |
54 | video_id, note=False) | |
55 | url_doc = xml.etree.ElementTree.fromstring(url_xml.encode('utf-8')) | |
56 | format_url = url_doc.find('result/url').text | |
57 | formats.append({ | |
58 | 'url': format_url, | |
59 | 'ext': determine_ext(format_url), | |
60 | 'format_id': profile, | |
61 | }) | |
62 | ||
63 | info = { | |
64 | 'id': video_id, | |
65 | 'title': info.find('TITLE').text, | |
66 | 'formats': formats, | |
67 | 'thumbnail': self._og_search_thumbnail(webpage), | |
9363169b | 68 | 'description': info.find('CONTENTS').text, |
150f2082 JMF |
69 | 'duration': int(info.find('DURATION').text), |
70 | 'upload_date': info.find('REGDTTM').text[:8], | |
71 | } | |
72 | # TODO: Remove when #980 has been merged | |
73 | info.update(formats[-1]) | |
74 | return info |