]>
Commit | Line | Data |
---|---|---|
da2e1f53 PH |
1 | from __future__ import unicode_literals |
2 | ||
3 | import json | |
4 | import re | |
5 | ||
6 | from .common import InfoExtractor | |
7 | from ..utils import ( | |
6539c91a | 8 | ExtractorError, |
da2e1f53 PH |
9 | int_or_none, |
10 | orderedSet, | |
11 | ) | |
12 | ||
480f2d89 | 13 | class DeezerBaseInfoExtractor(InfoExtractor): |
da2e1f53 | 14 | |
480f2d89 | 15 | def get_data(self, url): |
da2e1f53 PH |
16 | if 'test' not in self._downloader.params: |
17 | self._downloader.report_warning('For now, this extractor only supports the 30 second previews. Patches welcome!') | |
18 | ||
19 | mobj = re.match(self._VALID_URL, url) | |
480f2d89 | 20 | id = mobj.group('id') |
da2e1f53 | 21 | |
480f2d89 | 22 | webpage = self._download_webpage(url, id) |
6539c91a PH |
23 | geoblocking_msg = self._html_search_regex( |
24 | r'<p class="soon-txt">(.*?)</p>', webpage, 'geoblocking message', | |
25 | default=None) | |
26 | if geoblocking_msg is not None: | |
27 | raise ExtractorError( | |
28 | 'Deezer said: %s' % geoblocking_msg, expected=True) | |
29 | ||
da2e1f53 | 30 | data_json = self._search_regex( |
65150b41 S |
31 | (r'__DZR_APP_STATE__\s*=\s*({.+?})\s*</script>', |
32 | r'naboo\.display\(\'[^\']+\',\s*(.*?)\);\n'), | |
33 | webpage, 'data JSON') | |
da2e1f53 | 34 | data = json.loads(data_json) |
480f2d89 L |
35 | return id, webpage, data |
36 | ||
37 | class DeezerPlaylistIE(DeezerBaseInfoExtractor): | |
38 | _VALID_URL = r'https?://(?:www\.)?deezer\.com/(../)?playlist/(?P<id>[0-9]+)' | |
39 | _TEST = { | |
40 | 'url': 'http://www.deezer.com/playlist/176747451', | |
41 | 'info_dict': { | |
42 | 'id': '176747451', | |
43 | 'title': 'Best!', | |
44 | 'uploader': 'anonymous', | |
45 | 'thumbnail': r're:^https?://(e-)?cdns-images\.dzcdn\.net/images/cover/.*\.jpg$', | |
46 | }, | |
47 | 'playlist_count': 29, | |
48 | } | |
49 | ||
50 | def _real_extract(self, url): | |
51 | playlist_id, webpage, data = self.get_data(url) | |
da2e1f53 | 52 | |
840efd57 L |
53 | playlist_title = data.get('DATA').get('TITLE') |
54 | playlist_uploader = data.get('DATA').get('PARENT_USERNAME') | |
da2e1f53 PH |
55 | playlist_thumbnail = self._search_regex( |
56 | r'<img id="naboo_playlist_image".*?src="([^"]+)"', webpage, | |
57 | 'playlist thumbnail') | |
58 | ||
da2e1f53 | 59 | entries = [] |
840efd57 | 60 | for s in data.get('SONGS').get('data'): |
da2e1f53 PH |
61 | formats = [{ |
62 | 'format_id': 'preview', | |
840efd57 | 63 | 'url': s.get('MEDIA')[0].get('HREF'), |
da2e1f53 PH |
64 | 'preference': -100, # Only the first 30 seconds |
65 | 'ext': 'mp3', | |
66 | }] | |
67 | self._sort_formats(formats) | |
68 | artists = ', '.join( | |
840efd57 | 69 | orderedSet(a.get('ART_NAME') for a in s.get('ARTISTS'))) |
da2e1f53 | 70 | entries.append({ |
840efd57 | 71 | 'id': s.get('SNG_ID'), |
da2e1f53 | 72 | 'duration': int_or_none(s.get('DURATION')), |
840efd57 L |
73 | 'title': '%s - %s' % (artists, s.get('SNG_TITLE')), |
74 | 'uploader': s.get('ART_NAME'), | |
75 | 'uploader_id': s.get('ART_ID'), | |
da2e1f53 PH |
76 | 'age_limit': 16 if s.get('EXPLICIT_LYRICS') == '1' else 0, |
77 | 'formats': formats, | |
78 | }) | |
79 | ||
80 | return { | |
81 | '_type': 'playlist', | |
82 | 'id': playlist_id, | |
83 | 'title': playlist_title, | |
84 | 'uploader': playlist_uploader, | |
85 | 'thumbnail': playlist_thumbnail, | |
86 | 'entries': entries, | |
87 | } |