]>
Commit | Line | Data |
---|---|---|
80cbb6dd PH |
1 | import json |
2 | import re | |
3 | import socket | |
4 | ||
5 | from .common import InfoExtractor | |
6 | from ..utils import ( | |
7 | compat_http_client, | |
8 | compat_str, | |
9 | compat_urllib_error, | |
10 | compat_urllib_request, | |
11 | ||
12 | ExtractorError, | |
13 | ) | |
14 | ||
15 | ||
16 | class MixcloudIE(InfoExtractor): | |
17 | _WORKING = False # New API, but it seems good http://www.mixcloud.com/developers/documentation/ | |
18 | _VALID_URL = r'^(?:https?://)?(?:www\.)?mixcloud\.com/([\w\d-]+)/([\w\d-]+)' | |
19 | IE_NAME = u'mixcloud' | |
20 | ||
21 | def report_download_json(self, file_id): | |
22 | """Report JSON download.""" | |
23 | self.to_screen(u'Downloading json') | |
24 | ||
25 | def get_urls(self, jsonData, fmt, bitrate='best'): | |
26 | """Get urls from 'audio_formats' section in json""" | |
27 | try: | |
28 | bitrate_list = jsonData[fmt] | |
29 | if bitrate is None or bitrate == 'best' or bitrate not in bitrate_list: | |
30 | bitrate = max(bitrate_list) # select highest | |
31 | ||
32 | url_list = jsonData[fmt][bitrate] | |
33 | except TypeError: # we have no bitrate info. | |
34 | url_list = jsonData[fmt] | |
35 | return url_list | |
36 | ||
37 | def check_urls(self, url_list): | |
38 | """Returns 1st active url from list""" | |
39 | for url in url_list: | |
40 | try: | |
41 | compat_urllib_request.urlopen(url) | |
42 | return url | |
43 | except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error): | |
44 | url = None | |
45 | ||
46 | return None | |
47 | ||
48 | def _print_formats(self, formats): | |
49 | print('Available formats:') | |
50 | for fmt in formats.keys(): | |
51 | for b in formats[fmt]: | |
52 | try: | |
53 | ext = formats[fmt][b][0] | |
54 | print('%s\t%s\t[%s]' % (fmt, b, ext.split('.')[-1])) | |
55 | except TypeError: # we have no bitrate info | |
56 | ext = formats[fmt][0] | |
57 | print('%s\t%s\t[%s]' % (fmt, '??', ext.split('.')[-1])) | |
58 | break | |
59 | ||
60 | def _real_extract(self, url): | |
61 | mobj = re.match(self._VALID_URL, url) | |
62 | if mobj is None: | |
63 | raise ExtractorError(u'Invalid URL: %s' % url) | |
64 | # extract uploader & filename from url | |
65 | uploader = mobj.group(1).decode('utf-8') | |
66 | file_id = uploader + "-" + mobj.group(2).decode('utf-8') | |
67 | ||
68 | # construct API request | |
69 | file_url = 'http://www.mixcloud.com/api/1/cloudcast/' + '/'.join(url.split('/')[-3:-1]) + '.json' | |
70 | # retrieve .json file with links to files | |
71 | request = compat_urllib_request.Request(file_url) | |
72 | try: | |
73 | self.report_download_json(file_url) | |
74 | jsonData = compat_urllib_request.urlopen(request).read() | |
75 | except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err: | |
76 | raise ExtractorError(u'Unable to retrieve file: %s' % compat_str(err)) | |
77 | ||
78 | # parse JSON | |
79 | json_data = json.loads(jsonData) | |
80 | player_url = json_data['player_swf_url'] | |
81 | formats = dict(json_data['audio_formats']) | |
82 | ||
83 | req_format = self._downloader.params.get('format', None) | |
84 | ||
85 | if self._downloader.params.get('listformats', None): | |
86 | self._print_formats(formats) | |
87 | return | |
88 | ||
89 | if req_format is None or req_format == 'best': | |
90 | for format_param in formats.keys(): | |
91 | url_list = self.get_urls(formats, format_param) | |
92 | # check urls | |
93 | file_url = self.check_urls(url_list) | |
94 | if file_url is not None: | |
95 | break # got it! | |
96 | else: | |
97 | if req_format not in formats: | |
98 | raise ExtractorError(u'Format is not available') | |
99 | ||
100 | url_list = self.get_urls(formats, req_format) | |
101 | file_url = self.check_urls(url_list) | |
102 | format_param = req_format | |
103 | ||
104 | return [{ | |
105 | 'id': file_id.decode('utf-8'), | |
106 | 'url': file_url.decode('utf-8'), | |
107 | 'uploader': uploader.decode('utf-8'), | |
108 | 'upload_date': None, | |
109 | 'title': json_data['name'], | |
110 | 'ext': file_url.split('.')[-1].decode('utf-8'), | |
111 | 'format': (format_param is None and u'NA' or format_param.decode('utf-8')), | |
112 | 'thumbnail': json_data['thumbnail_url'], | |
113 | 'description': json_data['description'], | |
114 | 'player_url': player_url.decode('utf-8'), | |
115 | }] |