]>
Commit | Line | Data |
---|---|---|
7105440c YCH |
1 | # coding: utf-8 |
2 | from __future__ import unicode_literals | |
3 | ||
85e80f71 PH |
4 | import re |
5 | ||
7105440c YCH |
6 | from .common import InfoExtractor |
7 | from ..compat import compat_urlparse | |
85e80f71 PH |
8 | from ..utils import ( |
9 | float_or_none, | |
10 | month_by_abbreviation, | |
bd05aa4e | 11 | ExtractorError, |
6d14d08e | 12 | get_element_by_attribute, |
85e80f71 | 13 | ) |
7105440c YCH |
14 | |
15 | ||
16 | class YamIE(InfoExtractor): | |
e014ff01 | 17 | IE_DESC = '蕃薯藤yam天空部落' |
7105440c YCH |
18 | _VALID_URL = r'http://mymedia.yam.com/m/(?P<id>\d+)' |
19 | ||
20 | _TESTS = [{ | |
21 | # An audio hosted on Yam | |
22 | 'url': 'http://mymedia.yam.com/m/2283921', | |
23 | 'md5': 'c011b8e262a52d5473d9c2e3c9963b9c', | |
24 | 'info_dict': { | |
25 | 'id': '2283921', | |
26 | 'ext': 'mp3', | |
27 | 'title': '發現 - 趙薇 京華煙雲主題曲', | |
6d14d08e | 28 | 'description': '發現 - 趙薇 京華煙雲主題曲', |
7105440c YCH |
29 | 'uploader_id': 'princekt', |
30 | 'upload_date': '20080807', | |
31 | 'duration': 313.0, | |
32 | } | |
33 | }, { | |
34 | # An external video hosted on YouTube | |
bd05aa4e YCH |
35 | 'url': 'http://mymedia.yam.com/m/3599430', |
36 | 'md5': '03127cf10d8f35d120a9e8e52e3b17c6', | |
7105440c | 37 | 'info_dict': { |
bd05aa4e | 38 | 'id': 'CNpEoQlrIgA', |
7105440c | 39 | 'ext': 'mp4', |
bd05aa4e | 40 | 'upload_date': '20150306', |
7105440c | 41 | 'uploader': '新莊社大瑜伽社', |
bd05aa4e | 42 | 'description': 'md5:11e2e405311633ace874f2e6226c8b17', |
7105440c | 43 | 'uploader_id': '2323agoy', |
bd05aa4e | 44 | 'title': '20090412陽明山二子坪-1', |
11384916 S |
45 | }, |
46 | 'skip': 'Video does not exist', | |
bd05aa4e YCH |
47 | }, { |
48 | 'url': 'http://mymedia.yam.com/m/3598173', | |
49 | 'info_dict': { | |
50 | 'id': '3598173', | |
51 | 'ext': 'mp4', | |
52 | }, | |
53 | 'skip': 'cause Yam system error', | |
54 | }, { | |
55 | 'url': 'http://mymedia.yam.com/m/3599437', | |
56 | 'info_dict': { | |
57 | 'id': '3599437', | |
58 | 'ext': 'mp4', | |
59 | }, | |
60 | 'skip': 'invalid YouTube URL', | |
6d14d08e YCH |
61 | }, { |
62 | 'url': 'http://mymedia.yam.com/m/2373534', | |
63 | 'md5': '7ff74b91b7a817269d83796f8c5890b1', | |
64 | 'info_dict': { | |
65 | 'id': '2373534', | |
66 | 'ext': 'mp3', | |
67 | 'title': '林俊傑&蔡卓妍-小酒窩', | |
68 | 'description': 'md5:904003395a0fcce6cfb25028ff468420', | |
69 | 'upload_date': '20080928', | |
70 | 'uploader_id': 'onliner2', | |
71 | } | |
7105440c YCH |
72 | }] |
73 | ||
74 | def _real_extract(self, url): | |
85e80f71 PH |
75 | video_id = self._match_id(url) |
76 | page = self._download_webpage(url, video_id) | |
7105440c | 77 | |
bd05aa4e YCH |
78 | # Check for errors |
79 | system_msg = self._html_search_regex( | |
80 | r'系統訊息(?:<br>|\n|\r)*([^<>]+)<br>', page, 'system message', | |
81 | default=None) | |
82 | if system_msg: | |
83 | raise ExtractorError(system_msg, expected=True) | |
84 | ||
7105440c YCH |
85 | # Is it hosted externally on YouTube? |
86 | youtube_url = self._html_search_regex( | |
87 | r'<embed src="(http://www.youtube.com/[^"]+)"', | |
88 | page, 'YouTube url', default=None) | |
89 | if youtube_url: | |
90 | return self.url_result(youtube_url, 'Youtube') | |
91 | ||
6d14d08e YCH |
92 | title = self._html_search_regex( |
93 | r'<h1[^>]+class="heading"[^>]*>\s*(.+)\s*</h1>', page, 'title') | |
94 | ||
7105440c | 95 | api_page = self._download_webpage( |
85e80f71 PH |
96 | 'http://mymedia.yam.com/api/a/?pID=' + video_id, video_id, |
97 | note='Downloading API page') | |
7105440c YCH |
98 | api_result_obj = compat_urlparse.parse_qs(api_page) |
99 | ||
6d14d08e | 100 | info_table = get_element_by_attribute('class', 'info', page) |
85e80f71 | 101 | uploader_id = self._html_search_regex( |
6d14d08e YCH |
102 | r'<!-- 發表作者 -->:[\n ]+<a href="/([a-z0-9]+)"', |
103 | info_table, 'uploader id', fatal=False) | |
104 | mobj = re.search(r'<!-- 發表於 -->(?P<mon>[A-Z][a-z]{2})\s+' + | |
7105440c | 105 | r'(?P<day>\d{1,2}), (?P<year>\d{4})', page) |
85e80f71 PH |
106 | if mobj: |
107 | upload_date = '%s%02d%02d' % ( | |
108 | mobj.group('year'), | |
109 | month_by_abbreviation(mobj.group('mon')), | |
110 | int(mobj.group('day'))) | |
111 | else: | |
112 | upload_date = None | |
113 | duration = float_or_none(api_result_obj['totaltime'][0], scale=1000) | |
7105440c YCH |
114 | |
115 | return { | |
85e80f71 | 116 | 'id': video_id, |
7105440c | 117 | 'url': api_result_obj['mp3file'][0], |
6d14d08e YCH |
118 | 'title': title, |
119 | 'description': self._html_search_meta('description', page), | |
85e80f71 PH |
120 | 'duration': duration, |
121 | 'uploader_id': uploader_id, | |
7105440c YCH |
122 | 'upload_date': upload_date, |
123 | } |