]>
Commit | Line | Data |
---|---|---|
d0390a0c PH |
1 | from __future__ import unicode_literals |
2 | ||
80cbb6dd | 3 | import re |
80cbb6dd PH |
4 | |
5 | from .common import InfoExtractor | |
6 | from ..utils import ( | |
abb82f1d | 7 | compat_urllib_parse, |
baa7b197 | 8 | ExtractorError, |
57c7411f PH |
9 | int_or_none, |
10 | parse_iso8601, | |
80cbb6dd PH |
11 | ) |
12 | ||
13 | ||
14 | class MixcloudIE(InfoExtractor): | |
8b286571 | 15 | _VALID_URL = r'^(?:https?://)?(?:www\.)?mixcloud\.com/([^/]+)/([^/]+)' |
d0390a0c | 16 | IE_NAME = 'mixcloud' |
80cbb6dd | 17 | |
19e1d359 | 18 | _TEST = { |
d0390a0c | 19 | 'url': 'http://www.mixcloud.com/dholbach/cryptkeeper/', |
d0390a0c | 20 | 'info_dict': { |
abb82f1d JMF |
21 | 'id': 'dholbach-cryptkeeper', |
22 | 'ext': 'mp3', | |
d0390a0c PH |
23 | 'title': 'Cryptkeeper', |
24 | 'description': 'After quite a long silence from myself, finally another Drum\'n\'Bass mix with my favourite current dance floor bangers.', | |
25 | 'uploader': 'Daniel Holbach', | |
26 | 'uploader_id': 'dholbach', | |
27 | 'upload_date': '20111115', | |
57c7411f PH |
28 | 'timestamp': 1321359578, |
29 | 'thumbnail': 're:https?://.*\.jpg', | |
30 | 'view_count': int, | |
31 | 'like_count': int, | |
19e1d359 JMF |
32 | }, |
33 | } | |
80cbb6dd PH |
34 | |
35 | def check_urls(self, url_list): | |
36 | """Returns 1st active url from list""" | |
37 | for url in url_list: | |
38 | try: | |
baa7b197 JMF |
39 | # We only want to know if the request succeed |
40 | # don't download the whole file | |
41 | self._request_webpage(url, None, False) | |
80cbb6dd | 42 | return url |
baa7b197 | 43 | except ExtractorError: |
80cbb6dd PH |
44 | url = None |
45 | ||
46 | return None | |
47 | ||
cbfc4702 JMF |
48 | def _get_url(self, template_url): |
49 | return self.check_urls(template_url % i for i in range(30)) | |
50 | ||
80cbb6dd PH |
51 | def _real_extract(self, url): |
52 | mobj = re.match(self._VALID_URL, url) | |
19e1d359 JMF |
53 | uploader = mobj.group(1) |
54 | cloudcast_name = mobj.group(2) | |
abb82f1d | 55 | track_id = compat_urllib_parse.unquote('-'.join((uploader, cloudcast_name))) |
dd2535c3 | 56 | |
19e1d359 | 57 | webpage = self._download_webpage(url, track_id) |
19e1d359 | 58 | |
dd2535c3 | 59 | preview_url = self._search_regex( |
d0390a0c | 60 | r'\s(?:data-preview-url|m-preview)="(.+?)"', webpage, 'preview url') |
5ffecde7 | 61 | song_url = preview_url.replace('/previews/', '/c/originals/') |
19e1d359 | 62 | template_url = re.sub(r'(stream\d*)', 'stream%d', song_url) |
cbfc4702 JMF |
63 | final_song_url = self._get_url(template_url) |
64 | if final_song_url is None: | |
65 | self.to_screen('Trying with m4a extension') | |
66 | template_url = template_url.replace('.mp3', '.m4a').replace('originals/', 'm4a/64/') | |
67 | final_song_url = self._get_url(template_url) | |
68 | if final_song_url is None: | |
57c7411f PH |
69 | raise ExtractorError('Unable to extract track url') |
70 | ||
71 | PREFIX = ( | |
72 | r'<div class="cloudcast-play-button-container"' | |
73 | r'(?:\s+[a-zA-Z0-9-]+(?:="[^"]+")?)*?\s+') | |
74 | title = self._html_search_regex( | |
75 | PREFIX + r'm-title="([^"]+)"', webpage, 'title') | |
76 | thumbnail = self._proto_relative_url(self._html_search_regex( | |
77 | PREFIX + r'm-thumbnail-url="([^"]+)"', webpage, 'thumbnail', | |
78 | fatal=False)) | |
79 | uploader = self._html_search_regex( | |
80 | PREFIX + r'm-owner-name="([^"]+)"', | |
81 | webpage, 'uploader', fatal=False) | |
82 | uploader_id = self._search_regex( | |
83 | r'\s+"profile": "([^"]+)",', webpage, 'uploader id', fatal=False) | |
84 | description = self._og_search_description(webpage) | |
85 | like_count = int_or_none(self._search_regex( | |
86 | r'<meta itemprop="interactionCount" content="UserLikes:([0-9]+)"', | |
87 | webpage, 'like count', fatal=False)) | |
88 | view_count = int_or_none(self._search_regex( | |
89 | r'<meta itemprop="interactionCount" content="UserPlays:([0-9]+)"', | |
90 | webpage, 'play count', fatal=False)) | |
91 | timestamp = parse_iso8601(self._search_regex( | |
92 | r'<time itemprop="dateCreated" datetime="([^"]+)">', | |
93 | webpage, 'upload date')) | |
19e1d359 JMF |
94 | |
95 | return { | |
96 | 'id': track_id, | |
57c7411f | 97 | 'title': title, |
19e1d359 | 98 | 'url': final_song_url, |
57c7411f PH |
99 | 'description': description, |
100 | 'thumbnail': thumbnail, | |
101 | 'uploader': uploader, | |
102 | 'uploader_id': uploader_id, | |
103 | 'timestamp': timestamp, | |
104 | 'view_count': view_count, | |
105 | 'like_count': like_count, | |
19e1d359 | 106 | } |