]>
Commit | Line | Data |
---|---|---|
80cbb6dd PH |
1 | import json |
2 | import re | |
80cbb6dd PH |
3 | |
4 | from .common import InfoExtractor | |
5 | from ..utils import ( | |
19e1d359 | 6 | unified_strdate, |
baa7b197 | 7 | ExtractorError, |
80cbb6dd PH |
8 | ) |
9 | ||
10 | ||
11 | class MixcloudIE(InfoExtractor): | |
80cbb6dd PH |
12 | _VALID_URL = r'^(?:https?://)?(?:www\.)?mixcloud\.com/([\w\d-]+)/([\w\d-]+)' |
13 | IE_NAME = u'mixcloud' | |
14 | ||
19e1d359 JMF |
15 | _TEST = { |
16 | u'url': u'http://www.mixcloud.com/dholbach/cryptkeeper/', | |
17 | u'file': u'dholbach-cryptkeeper.mp3', | |
18 | u'info_dict': { | |
19 | u'title': u'Cryptkeeper', | |
20 | u'description': u'After quite a long silence from myself, finally another Drum\'n\'Bass mix with my favourite current dance floor bangers.', | |
21 | u'uploader': u'Daniel Holbach', | |
22 | u'uploader_id': u'dholbach', | |
23 | u'upload_date': u'20111115', | |
24 | }, | |
25 | } | |
80cbb6dd PH |
26 | |
27 | def check_urls(self, url_list): | |
28 | """Returns 1st active url from list""" | |
29 | for url in url_list: | |
30 | try: | |
baa7b197 JMF |
31 | # We only want to know if the request succeed |
32 | # don't download the whole file | |
33 | self._request_webpage(url, None, False) | |
80cbb6dd | 34 | return url |
baa7b197 | 35 | except ExtractorError: |
80cbb6dd PH |
36 | url = None |
37 | ||
38 | return None | |
39 | ||
cbfc4702 JMF |
40 | def _get_url(self, template_url): |
41 | return self.check_urls(template_url % i for i in range(30)) | |
42 | ||
80cbb6dd PH |
43 | def _real_extract(self, url): |
44 | mobj = re.match(self._VALID_URL, url) | |
80cbb6dd | 45 | |
19e1d359 JMF |
46 | uploader = mobj.group(1) |
47 | cloudcast_name = mobj.group(2) | |
48 | track_id = '-'.join((uploader, cloudcast_name)) | |
49 | api_url = 'http://api.mixcloud.com/%s/%s/' % (uploader, cloudcast_name) | |
50 | webpage = self._download_webpage(url, track_id) | |
51 | json_data = self._download_webpage(api_url, track_id, | |
52 | u'Downloading cloudcast info') | |
53 | info = json.loads(json_data) | |
54 | ||
55 | preview_url = self._search_regex(r'data-preview-url="(.+?)"', webpage, u'preview url') | |
56 | song_url = preview_url.replace('/previews/', '/cloudcasts/originals/') | |
57 | template_url = re.sub(r'(stream\d*)', 'stream%d', song_url) | |
cbfc4702 JMF |
58 | final_song_url = self._get_url(template_url) |
59 | if final_song_url is None: | |
60 | self.to_screen('Trying with m4a extension') | |
61 | template_url = template_url.replace('.mp3', '.m4a').replace('originals/', 'm4a/64/') | |
62 | final_song_url = self._get_url(template_url) | |
63 | if final_song_url is None: | |
64 | raise ExtractorError(u'Unable to extract track url') | |
19e1d359 JMF |
65 | |
66 | return { | |
67 | 'id': track_id, | |
68 | 'title': info['name'], | |
69 | 'url': final_song_url, | |
dc65dcbb | 70 | 'description': info.get('description'), |
19e1d359 JMF |
71 | 'thumbnail': info['pictures'].get('extra_large'), |
72 | 'uploader': info['user']['name'], | |
73 | 'uploader_id': info['user']['username'], | |
74 | 'upload_date': unified_strdate(info['created_time']), | |
75 | 'view_count': info['play_count'], | |
76 | } |