]>
Commit | Line | Data |
---|---|---|
d0390a0c PH |
1 | from __future__ import unicode_literals |
2 | ||
80cbb6dd | 3 | import re |
80cbb6dd PH |
4 | |
5 | from .common import InfoExtractor | |
c2daf8df | 6 | from ..compat import compat_urllib_parse_unquote |
1cc79574 | 7 | from ..utils import ( |
baa7b197 | 8 | ExtractorError, |
dbc1366b | 9 | HEADRequest, |
b80505a4 | 10 | str_to_int, |
80cbb6dd PH |
11 | ) |
12 | ||
13 | ||
14 | class MixcloudIE(InfoExtractor): | |
8b286571 | 15 | _VALID_URL = r'^(?:https?://)?(?:www\.)?mixcloud\.com/([^/]+)/([^/]+)' |
d0390a0c | 16 | IE_NAME = 'mixcloud' |
80cbb6dd | 17 | |
58ba6c01 | 18 | _TESTS = [{ |
d0390a0c | 19 | 'url': 'http://www.mixcloud.com/dholbach/cryptkeeper/', |
d0390a0c | 20 | 'info_dict': { |
abb82f1d JMF |
21 | 'id': 'dholbach-cryptkeeper', |
22 | 'ext': 'mp3', | |
d0390a0c PH |
23 | 'title': 'Cryptkeeper', |
24 | 'description': 'After quite a long silence from myself, finally another Drum\'n\'Bass mix with my favourite current dance floor bangers.', | |
25 | 'uploader': 'Daniel Holbach', | |
26 | 'uploader_id': 'dholbach', | |
57c7411f PH |
27 | 'thumbnail': 're:https?://.*\.jpg', |
28 | 'view_count': int, | |
29 | 'like_count': int, | |
19e1d359 | 30 | }, |
58ba6c01 S |
31 | }, { |
32 | 'url': 'http://www.mixcloud.com/gillespeterson/caribou-7-inch-vinyl-mix-chat/', | |
33 | 'info_dict': { | |
34 | 'id': 'gillespeterson-caribou-7-inch-vinyl-mix-chat', | |
7a757b71 JMF |
35 | 'ext': 'mp3', |
36 | 'title': 'Caribou 7 inch Vinyl Mix & Chat', | |
58ba6c01 | 37 | 'description': 'md5:2b8aec6adce69f9d41724647c65875e8', |
7a757b71 | 38 | 'uploader': 'Gilles Peterson Worldwide', |
58ba6c01 | 39 | 'uploader_id': 'gillespeterson', |
7a757b71 | 40 | 'thumbnail': 're:https?://.*/images/', |
58ba6c01 S |
41 | 'view_count': int, |
42 | 'like_count': int, | |
43 | }, | |
44 | }] | |
80cbb6dd | 45 | |
c5826a49 JMF |
46 | def _check_url(self, url, track_id, ext): |
47 | try: | |
48 | # We only want to know if the request succeed | |
49 | # don't download the whole file | |
50 | self._request_webpage( | |
51 | HEADRequest(url), track_id, | |
52 | 'Trying %s URL' % ext) | |
53 | return True | |
54 | except ExtractorError: | |
55 | return False | |
80cbb6dd | 56 | |
80cbb6dd PH |
57 | def _real_extract(self, url): |
58 | mobj = re.match(self._VALID_URL, url) | |
19e1d359 JMF |
59 | uploader = mobj.group(1) |
60 | cloudcast_name = mobj.group(2) | |
c2daf8df | 61 | track_id = compat_urllib_parse_unquote('-'.join((uploader, cloudcast_name))) |
dd2535c3 | 62 | |
19e1d359 | 63 | webpage = self._download_webpage(url, track_id) |
19e1d359 | 64 | |
dd2535c3 | 65 | preview_url = self._search_regex( |
58ba6c01 | 66 | r'\s(?:data-preview-url|m-preview)="([^"]+)"', webpage, 'preview url') |
5ffecde7 | 67 | song_url = preview_url.replace('/previews/', '/c/originals/') |
c5826a49 JMF |
68 | if not self._check_url(song_url, track_id, 'mp3'): |
69 | song_url = song_url.replace('.mp3', '.m4a').replace('originals/', 'm4a/64/') | |
70 | if not self._check_url(song_url, track_id, 'm4a'): | |
71 | raise ExtractorError('Unable to extract track url') | |
57c7411f PH |
72 | |
73 | PREFIX = ( | |
7a757b71 | 74 | r'm-play-on-spacebar[^>]+' |
57c7411f PH |
75 | r'(?:\s+[a-zA-Z0-9-]+(?:="[^"]+")?)*?\s+') |
76 | title = self._html_search_regex( | |
77 | PREFIX + r'm-title="([^"]+)"', webpage, 'title') | |
78 | thumbnail = self._proto_relative_url(self._html_search_regex( | |
79 | PREFIX + r'm-thumbnail-url="([^"]+)"', webpage, 'thumbnail', | |
80 | fatal=False)) | |
81 | uploader = self._html_search_regex( | |
82 | PREFIX + r'm-owner-name="([^"]+)"', | |
83 | webpage, 'uploader', fatal=False) | |
84 | uploader_id = self._search_regex( | |
85 | r'\s+"profile": "([^"]+)",', webpage, 'uploader id', fatal=False) | |
86 | description = self._og_search_description(webpage) | |
b80505a4 | 87 | like_count = str_to_int(self._search_regex( |
cd341b6e | 88 | r'\bbutton-favorite\b[^>]+m-ajax-toggle-count="([^"]+)"', |
57c7411f | 89 | webpage, 'like count', fatal=False)) |
b80505a4 S |
90 | view_count = str_to_int(self._search_regex( |
91 | [r'<meta itemprop="interactionCount" content="UserPlays:([0-9]+)"', | |
92 | r'/listeners/?">([0-9,.]+)</a>'], | |
57c7411f | 93 | webpage, 'play count', fatal=False)) |
19e1d359 JMF |
94 | |
95 | return { | |
96 | 'id': track_id, | |
57c7411f | 97 | 'title': title, |
c5826a49 | 98 | 'url': song_url, |
57c7411f PH |
99 | 'description': description, |
100 | 'thumbnail': thumbnail, | |
101 | 'uploader': uploader, | |
102 | 'uploader_id': uploader_id, | |
57c7411f PH |
103 | 'view_count': view_count, |
104 | 'like_count': like_count, | |
19e1d359 | 105 | } |