]>
Commit | Line | Data |
---|---|---|
150f2082 | 1 | # encoding: utf-8 |
23f4a93b PH |
2 | |
3 | from __future__ import unicode_literals | |
4 | ||
150f2082 | 5 | import re |
150f2082 JMF |
6 | |
7 | from .common import InfoExtractor | |
1cc79574 | 8 | from ..compat import ( |
150f2082 | 9 | compat_urllib_parse, |
150f2082 JMF |
10 | ) |
11 | ||
12 | ||
13 | class DaumIE(InfoExtractor): | |
e5a79071 | 14 | _VALID_URL = r'https?://(?:m\.)?tvpot\.daum\.net/(?:v/|.*?clipid=)(?P<id>[^?#&]+)' |
23f4a93b | 15 | IE_NAME = 'daum.net' |
150f2082 | 16 | |
e5a79071 | 17 | _TESTS = [{ |
23f4a93b PH |
18 | 'url': 'http://tvpot.daum.net/clip/ClipView.do?clipid=52554690', |
19 | 'info_dict': { | |
20 | 'id': '52554690', | |
21 | 'ext': 'mp4', | |
22 | 'title': 'DOTA 2GETHER 시즌2 6회 - 2부', | |
23 | 'description': 'DOTA 2GETHER 시즌2 6회 - 2부', | |
24 | 'upload_date': '20130831', | |
25 | 'duration': 3868, | |
150f2082 | 26 | }, |
178b47e6 S |
27 | }, { |
28 | # Test for https://github.com/rg3/youtube-dl/issues/7949 | |
29 | 'url': 'http://tvpot.daum.net/mypot/View.do?ownerid=M1O35s8HPOo0&clipid=73147290', | |
30 | 'md5': 'c92d78bcee4424451f1667f275c1dc97', | |
31 | 'info_dict': { | |
32 | 'id': '73147290', | |
33 | 'ext': 'mp4', | |
34 | 'title': '싸이 - 나팔바지 [유희열의 스케치북] 299회 20151218', | |
35 | 'description': '싸이 - 나팔바지', | |
36 | 'upload_date': '20151219', | |
37 | 'duration': 232, | |
38 | }, | |
e5a79071 PH |
39 | }, { |
40 | 'url': 'http://tvpot.daum.net/v/vab4dyeDBysyBssyukBUjBz', | |
41 | 'only_matching': True, | |
42 | }, { | |
43 | 'url': 'http://tvpot.daum.net/v/07dXWRka62Y%24', | |
44 | 'only_matching': True, | |
45 | }] | |
150f2082 JMF |
46 | |
47 | def _real_extract(self, url): | |
48 | mobj = re.match(self._VALID_URL, url) | |
e5a79071 | 49 | video_id = mobj.group('id') |
9363169b JMF |
50 | canonical_url = 'http://tvpot.daum.net/v/%s' % video_id |
51 | webpage = self._download_webpage(canonical_url, video_id) | |
3a70ed9e S |
52 | og_url = self._og_search_url(webpage, default=None) or self._search_regex( |
53 | r'<link[^>]+rel=(["\'])canonical\1[^>]+href=(["\'])(?P<url>.+?)\2', | |
54 | webpage, 'canonical url', group='url') | |
ce93879a | 55 | full_id = self._search_regex( |
3a70ed9e | 56 | r'tvpot\.daum\.net/v/([^/]+)', og_url, 'full id') |
150f2082 | 57 | query = compat_urllib_parse.urlencode({'vid': full_id}) |
e26f8712 | 58 | info = self._download_xml( |
150f2082 | 59 | 'http://tvpot.daum.net/clip/ClipInfoXml.do?' + query, video_id, |
23f4a93b | 60 | 'Downloading video info') |
e26f8712 | 61 | urls = self._download_xml( |
150f2082 | 62 | 'http://videofarm.daum.net/controller/api/open/v1_2/MovieData.apixml?' + query, |
23f4a93b | 63 | video_id, 'Downloading video formats info') |
150f2082 | 64 | |
150f2082 JMF |
65 | formats = [] |
66 | for format_el in urls.findall('result/output_list/output_list'): | |
67 | profile = format_el.attrib['profile'] | |
68 | format_query = compat_urllib_parse.urlencode({ | |
69 | 'vid': full_id, | |
70 | 'profile': profile, | |
71 | }) | |
e26f8712 | 72 | url_doc = self._download_xml( |
150f2082 | 73 | 'http://videofarm.daum.net/controller/api/open/v1_2/MovieLocation.apixml?' + format_query, |
e5a79071 | 74 | video_id, note='Downloading video data for %s format' % profile) |
150f2082 JMF |
75 | format_url = url_doc.find('result/url').text |
76 | formats.append({ | |
77 | 'url': format_url, | |
150f2082 JMF |
78 | 'format_id': profile, |
79 | }) | |
80 | ||
fb7abb31 | 81 | return { |
150f2082 JMF |
82 | 'id': video_id, |
83 | 'title': info.find('TITLE').text, | |
84 | 'formats': formats, | |
85 | 'thumbnail': self._og_search_thumbnail(webpage), | |
9363169b | 86 | 'description': info.find('CONTENTS').text, |
150f2082 JMF |
87 | 'duration': int(info.find('DURATION').text), |
88 | 'upload_date': info.find('REGDTTM').text[:8], | |
89 | } |