]>
Commit | Line | Data |
---|---|---|
7105440c YCH |
1 | # coding: utf-8 |
2 | from __future__ import unicode_literals | |
3 | ||
85e80f71 PH |
4 | import re |
5 | ||
7105440c YCH |
6 | from .common import InfoExtractor |
7 | from ..compat import compat_urlparse | |
85e80f71 PH |
8 | from ..utils import ( |
9 | float_or_none, | |
10 | month_by_abbreviation, | |
bd05aa4e | 11 | ExtractorError, |
85e80f71 | 12 | ) |
7105440c YCH |
13 | |
14 | ||
15 | class YamIE(InfoExtractor): | |
16 | _VALID_URL = r'http://mymedia.yam.com/m/(?P<id>\d+)' | |
17 | ||
18 | _TESTS = [{ | |
19 | # An audio hosted on Yam | |
20 | 'url': 'http://mymedia.yam.com/m/2283921', | |
21 | 'md5': 'c011b8e262a52d5473d9c2e3c9963b9c', | |
22 | 'info_dict': { | |
23 | 'id': '2283921', | |
24 | 'ext': 'mp3', | |
25 | 'title': '發現 - 趙薇 京華煙雲主題曲', | |
26 | 'uploader_id': 'princekt', | |
27 | 'upload_date': '20080807', | |
28 | 'duration': 313.0, | |
29 | } | |
30 | }, { | |
31 | # An external video hosted on YouTube | |
bd05aa4e YCH |
32 | 'url': 'http://mymedia.yam.com/m/3599430', |
33 | 'md5': '03127cf10d8f35d120a9e8e52e3b17c6', | |
7105440c | 34 | 'info_dict': { |
bd05aa4e | 35 | 'id': 'CNpEoQlrIgA', |
7105440c | 36 | 'ext': 'mp4', |
bd05aa4e | 37 | 'upload_date': '20150306', |
7105440c | 38 | 'uploader': '新莊社大瑜伽社', |
bd05aa4e | 39 | 'description': 'md5:11e2e405311633ace874f2e6226c8b17', |
7105440c | 40 | 'uploader_id': '2323agoy', |
bd05aa4e | 41 | 'title': '20090412陽明山二子坪-1', |
11384916 S |
42 | }, |
43 | 'skip': 'Video does not exist', | |
bd05aa4e YCH |
44 | }, { |
45 | 'url': 'http://mymedia.yam.com/m/3598173', | |
46 | 'info_dict': { | |
47 | 'id': '3598173', | |
48 | 'ext': 'mp4', | |
49 | }, | |
50 | 'skip': 'cause Yam system error', | |
51 | }, { | |
52 | 'url': 'http://mymedia.yam.com/m/3599437', | |
53 | 'info_dict': { | |
54 | 'id': '3599437', | |
55 | 'ext': 'mp4', | |
56 | }, | |
57 | 'skip': 'invalid YouTube URL', | |
7105440c YCH |
58 | }] |
59 | ||
60 | def _real_extract(self, url): | |
85e80f71 PH |
61 | video_id = self._match_id(url) |
62 | page = self._download_webpage(url, video_id) | |
7105440c | 63 | |
bd05aa4e YCH |
64 | # Check for errors |
65 | system_msg = self._html_search_regex( | |
66 | r'系統訊息(?:<br>|\n|\r)*([^<>]+)<br>', page, 'system message', | |
67 | default=None) | |
68 | if system_msg: | |
69 | raise ExtractorError(system_msg, expected=True) | |
70 | ||
7105440c YCH |
71 | # Is it hosted externally on YouTube? |
72 | youtube_url = self._html_search_regex( | |
73 | r'<embed src="(http://www.youtube.com/[^"]+)"', | |
74 | page, 'YouTube url', default=None) | |
75 | if youtube_url: | |
76 | return self.url_result(youtube_url, 'Youtube') | |
77 | ||
78 | api_page = self._download_webpage( | |
85e80f71 PH |
79 | 'http://mymedia.yam.com/api/a/?pID=' + video_id, video_id, |
80 | note='Downloading API page') | |
7105440c YCH |
81 | api_result_obj = compat_urlparse.parse_qs(api_page) |
82 | ||
85e80f71 PH |
83 | uploader_id = self._html_search_regex( |
84 | r'<!-- 發表作者 -->:[\n ]+<a href="/([a-z]+)"', | |
85 | page, 'uploader id', fatal=False) | |
7105440c YCH |
86 | mobj = re.search(r'<!-- 發表於 -->(?P<mon>[A-Z][a-z]{2}) ' + |
87 | r'(?P<day>\d{1,2}), (?P<year>\d{4})', page) | |
85e80f71 PH |
88 | if mobj: |
89 | upload_date = '%s%02d%02d' % ( | |
90 | mobj.group('year'), | |
91 | month_by_abbreviation(mobj.group('mon')), | |
92 | int(mobj.group('day'))) | |
93 | else: | |
94 | upload_date = None | |
95 | duration = float_or_none(api_result_obj['totaltime'][0], scale=1000) | |
7105440c YCH |
96 | |
97 | return { | |
85e80f71 | 98 | 'id': video_id, |
7105440c YCH |
99 | 'url': api_result_obj['mp3file'][0], |
100 | 'title': self._html_search_meta('description', page), | |
85e80f71 PH |
101 | 'duration': duration, |
102 | 'uploader_id': uploader_id, | |
7105440c YCH |
103 | 'upload_date': upload_date, |
104 | } |