]>
Commit | Line | Data |
---|---|---|
d0390a0c PH |
1 | from __future__ import unicode_literals |
2 | ||
80cbb6dd | 3 | import re |
80cbb6dd PH |
4 | |
5 | from .common import InfoExtractor | |
1cc79574 | 6 | from ..compat import ( |
abb82f1d | 7 | compat_urllib_parse, |
1cc79574 PH |
8 | ) |
9 | from ..utils import ( | |
baa7b197 | 10 | ExtractorError, |
dbc1366b | 11 | HEADRequest, |
b80505a4 | 12 | str_to_int, |
57c7411f | 13 | parse_iso8601, |
80cbb6dd PH |
14 | ) |
15 | ||
16 | ||
17 | class MixcloudIE(InfoExtractor): | |
8b286571 | 18 | _VALID_URL = r'^(?:https?://)?(?:www\.)?mixcloud\.com/([^/]+)/([^/]+)' |
d0390a0c | 19 | IE_NAME = 'mixcloud' |
80cbb6dd | 20 | |
58ba6c01 | 21 | _TESTS = [{ |
d0390a0c | 22 | 'url': 'http://www.mixcloud.com/dholbach/cryptkeeper/', |
d0390a0c | 23 | 'info_dict': { |
abb82f1d JMF |
24 | 'id': 'dholbach-cryptkeeper', |
25 | 'ext': 'mp3', | |
d0390a0c PH |
26 | 'title': 'Cryptkeeper', |
27 | 'description': 'After quite a long silence from myself, finally another Drum\'n\'Bass mix with my favourite current dance floor bangers.', | |
28 | 'uploader': 'Daniel Holbach', | |
29 | 'uploader_id': 'dholbach', | |
30 | 'upload_date': '20111115', | |
57c7411f PH |
31 | 'timestamp': 1321359578, |
32 | 'thumbnail': 're:https?://.*\.jpg', | |
33 | 'view_count': int, | |
34 | 'like_count': int, | |
19e1d359 | 35 | }, |
58ba6c01 S |
36 | }, { |
37 | 'url': 'http://www.mixcloud.com/gillespeterson/caribou-7-inch-vinyl-mix-chat/', | |
38 | 'info_dict': { | |
39 | 'id': 'gillespeterson-caribou-7-inch-vinyl-mix-chat', | |
40 | 'ext': 'm4a', | |
41 | 'title': 'Electric Relaxation vol. 3', | |
42 | 'description': 'md5:2b8aec6adce69f9d41724647c65875e8', | |
43 | 'uploader': 'Daniel Drumz', | |
44 | 'uploader_id': 'gillespeterson', | |
45 | 'thumbnail': 're:https?://.*\.jpg', | |
46 | 'view_count': int, | |
47 | 'like_count': int, | |
48 | }, | |
49 | }] | |
80cbb6dd | 50 | |
62a164e7 PH |
51 | def _get_url(self, track_id, template_url): |
52 | server_count = 30 | |
53 | for i in range(server_count): | |
54 | url = template_url % i | |
80cbb6dd | 55 | try: |
baa7b197 JMF |
56 | # We only want to know if the request succeed |
57 | # don't download the whole file | |
62a164e7 PH |
58 | self._request_webpage( |
59 | HEADRequest(url), track_id, | |
60 | 'Checking URL %d/%d ...' % (i + 1, server_count + 1)) | |
80cbb6dd | 61 | return url |
baa7b197 | 62 | except ExtractorError: |
62a164e7 | 63 | pass |
80cbb6dd PH |
64 | |
65 | return None | |
66 | ||
80cbb6dd PH |
67 | def _real_extract(self, url): |
68 | mobj = re.match(self._VALID_URL, url) | |
19e1d359 JMF |
69 | uploader = mobj.group(1) |
70 | cloudcast_name = mobj.group(2) | |
abb82f1d | 71 | track_id = compat_urllib_parse.unquote('-'.join((uploader, cloudcast_name))) |
dd2535c3 | 72 | |
19e1d359 | 73 | webpage = self._download_webpage(url, track_id) |
19e1d359 | 74 | |
dd2535c3 | 75 | preview_url = self._search_regex( |
58ba6c01 | 76 | r'\s(?:data-preview-url|m-preview)="([^"]+)"', webpage, 'preview url') |
5ffecde7 | 77 | song_url = preview_url.replace('/previews/', '/c/originals/') |
19e1d359 | 78 | template_url = re.sub(r'(stream\d*)', 'stream%d', song_url) |
62a164e7 | 79 | final_song_url = self._get_url(track_id, template_url) |
cbfc4702 JMF |
80 | if final_song_url is None: |
81 | self.to_screen('Trying with m4a extension') | |
82 | template_url = template_url.replace('.mp3', '.m4a').replace('originals/', 'm4a/64/') | |
62a164e7 | 83 | final_song_url = self._get_url(track_id, template_url) |
cbfc4702 | 84 | if final_song_url is None: |
57c7411f PH |
85 | raise ExtractorError('Unable to extract track url') |
86 | ||
87 | PREFIX = ( | |
9c1aa1d6 | 88 | r'<span class="play-button[^"]*?"' |
57c7411f PH |
89 | r'(?:\s+[a-zA-Z0-9-]+(?:="[^"]+")?)*?\s+') |
90 | title = self._html_search_regex( | |
91 | PREFIX + r'm-title="([^"]+)"', webpage, 'title') | |
92 | thumbnail = self._proto_relative_url(self._html_search_regex( | |
93 | PREFIX + r'm-thumbnail-url="([^"]+)"', webpage, 'thumbnail', | |
94 | fatal=False)) | |
95 | uploader = self._html_search_regex( | |
96 | PREFIX + r'm-owner-name="([^"]+)"', | |
97 | webpage, 'uploader', fatal=False) | |
98 | uploader_id = self._search_regex( | |
99 | r'\s+"profile": "([^"]+)",', webpage, 'uploader id', fatal=False) | |
100 | description = self._og_search_description(webpage) | |
b80505a4 S |
101 | like_count = str_to_int(self._search_regex( |
102 | [r'<meta itemprop="interactionCount" content="UserLikes:([0-9]+)"', | |
103 | r'/favorites/?">([0-9]+)<'], | |
57c7411f | 104 | webpage, 'like count', fatal=False)) |
b80505a4 S |
105 | view_count = str_to_int(self._search_regex( |
106 | [r'<meta itemprop="interactionCount" content="UserPlays:([0-9]+)"', | |
107 | r'/listeners/?">([0-9,.]+)</a>'], | |
57c7411f PH |
108 | webpage, 'play count', fatal=False)) |
109 | timestamp = parse_iso8601(self._search_regex( | |
110 | r'<time itemprop="dateCreated" datetime="([^"]+)">', | |
b80505a4 | 111 | webpage, 'upload date', default=None)) |
19e1d359 JMF |
112 | |
113 | return { | |
114 | 'id': track_id, | |
57c7411f | 115 | 'title': title, |
19e1d359 | 116 | 'url': final_song_url, |
57c7411f PH |
117 | 'description': description, |
118 | 'thumbnail': thumbnail, | |
119 | 'uploader': uploader, | |
120 | 'uploader_id': uploader_id, | |
121 | 'timestamp': timestamp, | |
122 | 'view_count': view_count, | |
123 | 'like_count': like_count, | |
19e1d359 | 124 | } |