]>
Commit | Line | Data |
---|---|---|
d0390a0c PH |
1 | from __future__ import unicode_literals |
2 | ||
80cbb6dd | 3 | import re |
80cbb6dd PH |
4 | |
5 | from .common import InfoExtractor | |
1cc79574 | 6 | from ..compat import ( |
abb82f1d | 7 | compat_urllib_parse, |
1cc79574 PH |
8 | ) |
9 | from ..utils import ( | |
baa7b197 | 10 | ExtractorError, |
dbc1366b | 11 | HEADRequest, |
b80505a4 | 12 | str_to_int, |
80cbb6dd PH |
13 | ) |
14 | ||
15 | ||
16 | class MixcloudIE(InfoExtractor): | |
8b286571 | 17 | _VALID_URL = r'^(?:https?://)?(?:www\.)?mixcloud\.com/([^/]+)/([^/]+)' |
d0390a0c | 18 | IE_NAME = 'mixcloud' |
80cbb6dd | 19 | |
58ba6c01 | 20 | _TESTS = [{ |
d0390a0c | 21 | 'url': 'http://www.mixcloud.com/dholbach/cryptkeeper/', |
d0390a0c | 22 | 'info_dict': { |
abb82f1d JMF |
23 | 'id': 'dholbach-cryptkeeper', |
24 | 'ext': 'mp3', | |
d0390a0c PH |
25 | 'title': 'Cryptkeeper', |
26 | 'description': 'After quite a long silence from myself, finally another Drum\'n\'Bass mix with my favourite current dance floor bangers.', | |
27 | 'uploader': 'Daniel Holbach', | |
28 | 'uploader_id': 'dholbach', | |
57c7411f PH |
29 | 'thumbnail': 're:https?://.*\.jpg', |
30 | 'view_count': int, | |
31 | 'like_count': int, | |
19e1d359 | 32 | }, |
58ba6c01 S |
33 | }, { |
34 | 'url': 'http://www.mixcloud.com/gillespeterson/caribou-7-inch-vinyl-mix-chat/', | |
35 | 'info_dict': { | |
36 | 'id': 'gillespeterson-caribou-7-inch-vinyl-mix-chat', | |
7a757b71 JMF |
37 | 'ext': 'mp3', |
38 | 'title': 'Caribou 7 inch Vinyl Mix & Chat', | |
58ba6c01 | 39 | 'description': 'md5:2b8aec6adce69f9d41724647c65875e8', |
7a757b71 | 40 | 'uploader': 'Gilles Peterson Worldwide', |
58ba6c01 | 41 | 'uploader_id': 'gillespeterson', |
7a757b71 | 42 | 'thumbnail': 're:https?://.*/images/', |
58ba6c01 S |
43 | 'view_count': int, |
44 | 'like_count': int, | |
45 | }, | |
46 | }] | |
80cbb6dd | 47 | |
c5826a49 JMF |
48 | def _check_url(self, url, track_id, ext): |
49 | try: | |
50 | # We only want to know if the request succeed | |
51 | # don't download the whole file | |
52 | self._request_webpage( | |
53 | HEADRequest(url), track_id, | |
54 | 'Trying %s URL' % ext) | |
55 | return True | |
56 | except ExtractorError: | |
57 | return False | |
80cbb6dd | 58 | |
80cbb6dd PH |
59 | def _real_extract(self, url): |
60 | mobj = re.match(self._VALID_URL, url) | |
19e1d359 JMF |
61 | uploader = mobj.group(1) |
62 | cloudcast_name = mobj.group(2) | |
abb82f1d | 63 | track_id = compat_urllib_parse.unquote('-'.join((uploader, cloudcast_name))) |
dd2535c3 | 64 | |
19e1d359 | 65 | webpage = self._download_webpage(url, track_id) |
19e1d359 | 66 | |
dd2535c3 | 67 | preview_url = self._search_regex( |
58ba6c01 | 68 | r'\s(?:data-preview-url|m-preview)="([^"]+)"', webpage, 'preview url') |
5ffecde7 | 69 | song_url = preview_url.replace('/previews/', '/c/originals/') |
c5826a49 JMF |
70 | if not self._check_url(song_url, track_id, 'mp3'): |
71 | song_url = song_url.replace('.mp3', '.m4a').replace('originals/', 'm4a/64/') | |
72 | if not self._check_url(song_url, track_id, 'm4a'): | |
73 | raise ExtractorError('Unable to extract track url') | |
57c7411f PH |
74 | |
75 | PREFIX = ( | |
7a757b71 | 76 | r'm-play-on-spacebar[^>]+' |
57c7411f PH |
77 | r'(?:\s+[a-zA-Z0-9-]+(?:="[^"]+")?)*?\s+') |
78 | title = self._html_search_regex( | |
79 | PREFIX + r'm-title="([^"]+)"', webpage, 'title') | |
80 | thumbnail = self._proto_relative_url(self._html_search_regex( | |
81 | PREFIX + r'm-thumbnail-url="([^"]+)"', webpage, 'thumbnail', | |
82 | fatal=False)) | |
83 | uploader = self._html_search_regex( | |
84 | PREFIX + r'm-owner-name="([^"]+)"', | |
85 | webpage, 'uploader', fatal=False) | |
86 | uploader_id = self._search_regex( | |
87 | r'\s+"profile": "([^"]+)",', webpage, 'uploader id', fatal=False) | |
88 | description = self._og_search_description(webpage) | |
b80505a4 | 89 | like_count = str_to_int(self._search_regex( |
cd341b6e | 90 | r'\bbutton-favorite\b[^>]+m-ajax-toggle-count="([^"]+)"', |
57c7411f | 91 | webpage, 'like count', fatal=False)) |
b80505a4 S |
92 | view_count = str_to_int(self._search_regex( |
93 | [r'<meta itemprop="interactionCount" content="UserPlays:([0-9]+)"', | |
94 | r'/listeners/?">([0-9,.]+)</a>'], | |
57c7411f | 95 | webpage, 'play count', fatal=False)) |
19e1d359 JMF |
96 | |
97 | return { | |
98 | 'id': track_id, | |
57c7411f | 99 | 'title': title, |
c5826a49 | 100 | 'url': song_url, |
57c7411f PH |
101 | 'description': description, |
102 | 'thumbnail': thumbnail, | |
103 | 'uploader': uploader, | |
104 | 'uploader_id': uploader_id, | |
57c7411f PH |
105 | 'view_count': view_count, |
106 | 'like_count': like_count, | |
19e1d359 | 107 | } |