]>
Commit | Line | Data |
---|---|---|
aad0d6d5 PH |
1 | import json |
2 | import re | |
3 | ||
4 | from .common import InfoExtractor | |
5 | from ..utils import ( | |
6 | compat_str, | |
7 | ||
8 | ExtractorError, | |
9 | unified_strdate, | |
10 | ) | |
11 | ||
12 | ||
13 | class SoundcloudIE(InfoExtractor): | |
14 | """Information extractor for soundcloud.com | |
15 | To access the media, the uid of the song and a stream token | |
16 | must be extracted from the page source and the script must make | |
17 | a request to media.soundcloud.com/crossdomain.xml. Then | |
18 | the media can be grabbed by requesting from an url composed | |
19 | of the stream token and uid | |
20 | """ | |
21 | ||
22 | _VALID_URL = r'^(?:https?://)?(?:www\.)?soundcloud\.com/([\w\d-]+)/([\w\d-]+)' | |
23 | IE_NAME = u'soundcloud' | |
24 | ||
25 | def report_resolve(self, video_id): | |
26 | """Report information extraction.""" | |
27 | self.to_screen(u'%s: Resolving id' % video_id) | |
28 | ||
29 | def _real_extract(self, url): | |
30 | mobj = re.match(self._VALID_URL, url) | |
31 | if mobj is None: | |
32 | raise ExtractorError(u'Invalid URL: %s' % url) | |
33 | ||
34 | # extract uploader (which is in the url) | |
35 | uploader = mobj.group(1) | |
36 | # extract simple title (uploader + slug of song title) | |
37 | slug_title = mobj.group(2) | |
38 | full_title = '%s/%s' % (uploader, slug_title) | |
39 | ||
40 | self.report_resolve(full_title) | |
41 | ||
42 | url = 'http://soundcloud.com/%s/%s' % (uploader, slug_title) | |
43 | resolv_url = 'http://api.soundcloud.com/resolve.json?url=' + url + '&client_id=b45b1aa10f1ac2941910a7f0d10f8e28' | |
44 | info_json = self._download_webpage(resolv_url, full_title, u'Downloading info JSON') | |
45 | ||
46 | info = json.loads(info_json) | |
47 | video_id = info['id'] | |
48 | self.report_extraction(full_title) | |
49 | ||
50 | streams_url = 'https://api.sndcdn.com/i1/tracks/' + str(video_id) + '/streams?client_id=b45b1aa10f1ac2941910a7f0d10f8e28' | |
51 | stream_json = self._download_webpage(streams_url, full_title, | |
52 | u'Downloading stream definitions', | |
53 | u'unable to download stream definitions') | |
54 | ||
55 | streams = json.loads(stream_json) | |
56 | mediaURL = streams['http_mp3_128_url'] | |
57 | upload_date = unified_strdate(info['created_at']) | |
58 | ||
59 | return [{ | |
60 | 'id': info['id'], | |
61 | 'url': mediaURL, | |
62 | 'uploader': info['user']['username'], | |
63 | 'upload_date': upload_date, | |
64 | 'title': info['title'], | |
65 | 'ext': u'mp3', | |
66 | 'description': info['description'], | |
67 | }] | |
68 | ||
69 | class SoundcloudSetIE(InfoExtractor): | |
70 | """Information extractor for soundcloud.com sets | |
71 | To access the media, the uid of the song and a stream token | |
72 | must be extracted from the page source and the script must make | |
73 | a request to media.soundcloud.com/crossdomain.xml. Then | |
74 | the media can be grabbed by requesting from an url composed | |
75 | of the stream token and uid | |
76 | """ | |
77 | ||
78 | _VALID_URL = r'^(?:https?://)?(?:www\.)?soundcloud\.com/([\w\d-]+)/sets/([\w\d-]+)' | |
79 | IE_NAME = u'soundcloud:set' | |
80 | ||
81 | def report_resolve(self, video_id): | |
82 | """Report information extraction.""" | |
83 | self.to_screen(u'%s: Resolving id' % video_id) | |
84 | ||
85 | def _real_extract(self, url): | |
86 | mobj = re.match(self._VALID_URL, url) | |
87 | if mobj is None: | |
88 | raise ExtractorError(u'Invalid URL: %s' % url) | |
89 | ||
90 | # extract uploader (which is in the url) | |
91 | uploader = mobj.group(1) | |
92 | # extract simple title (uploader + slug of song title) | |
93 | slug_title = mobj.group(2) | |
94 | full_title = '%s/sets/%s' % (uploader, slug_title) | |
95 | ||
96 | self.report_resolve(full_title) | |
97 | ||
98 | url = 'http://soundcloud.com/%s/sets/%s' % (uploader, slug_title) | |
99 | resolv_url = 'http://api.soundcloud.com/resolve.json?url=' + url + '&client_id=b45b1aa10f1ac2941910a7f0d10f8e28' | |
100 | info_json = self._download_webpage(resolv_url, full_title) | |
101 | ||
102 | videos = [] | |
103 | info = json.loads(info_json) | |
104 | if 'errors' in info: | |
105 | for err in info['errors']: | |
106 | self._downloader.report_error(u'unable to download video webpage: %s' % compat_str(err['error_message'])) | |
107 | return | |
108 | ||
109 | self.report_extraction(full_title) | |
110 | for track in info['tracks']: | |
111 | video_id = track['id'] | |
112 | ||
113 | streams_url = 'https://api.sndcdn.com/i1/tracks/' + str(video_id) + '/streams?client_id=b45b1aa10f1ac2941910a7f0d10f8e28' | |
114 | stream_json = self._download_webpage(streams_url, video_id, u'Downloading track info JSON') | |
115 | ||
116 | self.report_extraction(video_id) | |
117 | streams = json.loads(stream_json) | |
118 | mediaURL = streams['http_mp3_128_url'] | |
119 | ||
120 | videos.append({ | |
121 | 'id': video_id, | |
122 | 'url': mediaURL, | |
123 | 'uploader': track['user']['username'], | |
124 | 'upload_date': unified_strdate(track['created_at']), | |
125 | 'title': track['title'], | |
126 | 'ext': u'mp3', | |
127 | 'description': track['description'], | |
128 | }) | |
129 | return videos |