]>
Commit | Line | Data |
---|---|---|
80cbb6dd PH |
1 | import json |
2 | import re | |
3 | import socket | |
4 | ||
5 | from .common import InfoExtractor | |
6 | from ..utils import ( | |
7 | compat_http_client, | |
80cbb6dd PH |
8 | compat_urllib_error, |
9 | compat_urllib_request, | |
19e1d359 | 10 | unified_strdate, |
80cbb6dd PH |
11 | ) |
12 | ||
13 | ||
14 | class MixcloudIE(InfoExtractor): | |
80cbb6dd PH |
15 | _VALID_URL = r'^(?:https?://)?(?:www\.)?mixcloud\.com/([\w\d-]+)/([\w\d-]+)' |
16 | IE_NAME = u'mixcloud' | |
17 | ||
19e1d359 JMF |
18 | _TEST = { |
19 | u'url': u'http://www.mixcloud.com/dholbach/cryptkeeper/', | |
20 | u'file': u'dholbach-cryptkeeper.mp3', | |
21 | u'info_dict': { | |
22 | u'title': u'Cryptkeeper', | |
23 | u'description': u'After quite a long silence from myself, finally another Drum\'n\'Bass mix with my favourite current dance floor bangers.', | |
24 | u'uploader': u'Daniel Holbach', | |
25 | u'uploader_id': u'dholbach', | |
26 | u'upload_date': u'20111115', | |
27 | }, | |
28 | } | |
80cbb6dd PH |
29 | |
30 | def check_urls(self, url_list): | |
31 | """Returns 1st active url from list""" | |
32 | for url in url_list: | |
33 | try: | |
34 | compat_urllib_request.urlopen(url) | |
35 | return url | |
36 | except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error): | |
37 | url = None | |
38 | ||
39 | return None | |
40 | ||
80cbb6dd PH |
41 | def _real_extract(self, url): |
42 | mobj = re.match(self._VALID_URL, url) | |
80cbb6dd | 43 | |
19e1d359 JMF |
44 | uploader = mobj.group(1) |
45 | cloudcast_name = mobj.group(2) | |
46 | track_id = '-'.join((uploader, cloudcast_name)) | |
47 | api_url = 'http://api.mixcloud.com/%s/%s/' % (uploader, cloudcast_name) | |
48 | webpage = self._download_webpage(url, track_id) | |
49 | json_data = self._download_webpage(api_url, track_id, | |
50 | u'Downloading cloudcast info') | |
51 | info = json.loads(json_data) | |
52 | ||
53 | preview_url = self._search_regex(r'data-preview-url="(.+?)"', webpage, u'preview url') | |
54 | song_url = preview_url.replace('/previews/', '/cloudcasts/originals/') | |
55 | template_url = re.sub(r'(stream\d*)', 'stream%d', song_url) | |
56 | final_song_url = self.check_urls(template_url % i for i in range(30)) | |
57 | ||
58 | return { | |
59 | 'id': track_id, | |
60 | 'title': info['name'], | |
61 | 'url': final_song_url, | |
62 | 'ext': 'mp3', | |
63 | 'description': info['description'], | |
64 | 'thumbnail': info['pictures'].get('extra_large'), | |
65 | 'uploader': info['user']['name'], | |
66 | 'uploader_id': info['user']['username'], | |
67 | 'upload_date': unified_strdate(info['created_time']), | |
68 | 'view_count': info['play_count'], | |
69 | } |