]>
Commit | Line | Data |
---|---|---|
da2e1f53 | 1 | import json |
da2e1f53 PH |
2 | |
3 | from .common import InfoExtractor | |
4 | from ..utils import ( | |
6539c91a | 5 | ExtractorError, |
da2e1f53 PH |
6 | int_or_none, |
7 | orderedSet, | |
8 | ) | |
9 | ||
da2e1f53 | 10 | |
4d3c8222 | 11 | class DeezerBaseInfoExtractor(InfoExtractor): |
480f2d89 | 12 | def get_data(self, url): |
a06916d9 | 13 | if not self.get_param('test'): |
6a39ee13 | 14 | self.report_warning('For now, this extractor only supports the 30 second previews. Patches welcome!') |
da2e1f53 | 15 | |
5ad28e7f | 16 | mobj = self._match_valid_url(url) |
e8868989 | 17 | data_id = mobj.group('id') |
da2e1f53 | 18 | |
e8868989 | 19 | webpage = self._download_webpage(url, data_id) |
6539c91a PH |
20 | geoblocking_msg = self._html_search_regex( |
21 | r'<p class="soon-txt">(.*?)</p>', webpage, 'geoblocking message', | |
22 | default=None) | |
23 | if geoblocking_msg is not None: | |
24 | raise ExtractorError( | |
25 | 'Deezer said: %s' % geoblocking_msg, expected=True) | |
26 | ||
da2e1f53 | 27 | data_json = self._search_regex( |
65150b41 S |
28 | (r'__DZR_APP_STATE__\s*=\s*({.+?})\s*</script>', |
29 | r'naboo\.display\(\'[^\']+\',\s*(.*?)\);\n'), | |
30 | webpage, 'data JSON') | |
da2e1f53 | 31 | data = json.loads(data_json) |
e8868989 | 32 | return data_id, webpage, data |
480f2d89 | 33 | |
4d3c8222 | 34 | |
480f2d89 L |
35 | class DeezerPlaylistIE(DeezerBaseInfoExtractor): |
36 | _VALID_URL = r'https?://(?:www\.)?deezer\.com/(../)?playlist/(?P<id>[0-9]+)' | |
37 | _TEST = { | |
38 | 'url': 'http://www.deezer.com/playlist/176747451', | |
39 | 'info_dict': { | |
40 | 'id': '176747451', | |
41 | 'title': 'Best!', | |
42 | 'uploader': 'anonymous', | |
43 | 'thumbnail': r're:^https?://(e-)?cdns-images\.dzcdn\.net/images/cover/.*\.jpg$', | |
44 | }, | |
45 | 'playlist_count': 29, | |
46 | } | |
47 | ||
48 | def _real_extract(self, url): | |
49 | playlist_id, webpage, data = self.get_data(url) | |
da2e1f53 | 50 | |
e8868989 LB |
51 | playlist_title = data.get('DATA', {}).get('TITLE') |
52 | playlist_uploader = data.get('DATA', {}).get('PARENT_USERNAME') | |
da2e1f53 PH |
53 | playlist_thumbnail = self._search_regex( |
54 | r'<img id="naboo_playlist_image".*?src="([^"]+)"', webpage, | |
55 | 'playlist thumbnail') | |
56 | ||
da2e1f53 | 57 | entries = [] |
e8868989 | 58 | for s in data.get('SONGS', {}).get('data'): |
da2e1f53 PH |
59 | formats = [{ |
60 | 'format_id': 'preview', | |
e8868989 | 61 | 'url': s.get('MEDIA', [{}])[0].get('HREF'), |
da2e1f53 PH |
62 | 'preference': -100, # Only the first 30 seconds |
63 | 'ext': 'mp3', | |
64 | }] | |
65 | self._sort_formats(formats) | |
66 | artists = ', '.join( | |
840efd57 | 67 | orderedSet(a.get('ART_NAME') for a in s.get('ARTISTS'))) |
da2e1f53 | 68 | entries.append({ |
840efd57 | 69 | 'id': s.get('SNG_ID'), |
da2e1f53 | 70 | 'duration': int_or_none(s.get('DURATION')), |
840efd57 L |
71 | 'title': '%s - %s' % (artists, s.get('SNG_TITLE')), |
72 | 'uploader': s.get('ART_NAME'), | |
73 | 'uploader_id': s.get('ART_ID'), | |
da2e1f53 PH |
74 | 'age_limit': 16 if s.get('EXPLICIT_LYRICS') == '1' else 0, |
75 | 'formats': formats, | |
76 | }) | |
77 | ||
78 | return { | |
79 | '_type': 'playlist', | |
80 | 'id': playlist_id, | |
81 | 'title': playlist_title, | |
82 | 'uploader': playlist_uploader, | |
83 | 'thumbnail': playlist_thumbnail, | |
84 | 'entries': entries, | |
85 | } | |
e187464c | 86 | |
4d3c8222 | 87 | |
e187464c L |
88 | class DeezerAlbumIE(DeezerBaseInfoExtractor): |
89 | _VALID_URL = r'https?://(?:www\.)?deezer\.com/(../)?album/(?P<id>[0-9]+)' | |
90 | _TEST = { | |
91 | 'url': 'https://www.deezer.com/fr/album/67505622', | |
92 | 'info_dict': { | |
93 | 'id': '67505622', | |
94 | 'title': 'Last Week', | |
95 | 'uploader': 'Home Brew', | |
96 | 'thumbnail': r're:^https?://(e-)?cdns-images\.dzcdn\.net/images/cover/.*\.jpg$', | |
97 | }, | |
98 | 'playlist_count': 7, | |
99 | } | |
100 | ||
101 | def _real_extract(self, url): | |
102 | album_id, webpage, data = self.get_data(url) | |
103 | ||
e8868989 LB |
104 | album_title = data.get('DATA', {}).get('ALB_TITLE') |
105 | album_uploader = data.get('DATA', {}).get('ART_NAME') | |
e187464c L |
106 | album_thumbnail = self._search_regex( |
107 | r'<img id="naboo_album_image".*?src="([^"]+)"', webpage, | |
108 | 'album thumbnail') | |
109 | ||
110 | entries = [] | |
e8868989 | 111 | for s in data.get('SONGS', {}).get('data'): |
e187464c L |
112 | formats = [{ |
113 | 'format_id': 'preview', | |
e8868989 | 114 | 'url': s.get('MEDIA', [{}])[0].get('HREF'), |
e187464c L |
115 | 'preference': -100, # Only the first 30 seconds |
116 | 'ext': 'mp3', | |
117 | }] | |
118 | self._sort_formats(formats) | |
119 | artists = ', '.join( | |
120 | orderedSet(a.get('ART_NAME') for a in s.get('ARTISTS'))) | |
121 | entries.append({ | |
122 | 'id': s.get('SNG_ID'), | |
123 | 'duration': int_or_none(s.get('DURATION')), | |
124 | 'title': '%s - %s' % (artists, s.get('SNG_TITLE')), | |
125 | 'uploader': s.get('ART_NAME'), | |
126 | 'uploader_id': s.get('ART_ID'), | |
127 | 'age_limit': 16 if s.get('EXPLICIT_LYRICS') == '1' else 0, | |
128 | 'formats': formats, | |
4d3c8222 | 129 | 'track': s.get('SNG_TITLE'), |
fe7d38c7 | 130 | 'track_number': int_or_none(s.get('TRACK_NUMBER')), |
4d3c8222 L |
131 | 'track_id': s.get('SNG_ID'), |
132 | 'artist': album_uploader, | |
133 | 'album': album_title, | |
134 | 'album_artist': album_uploader, | |
e187464c L |
135 | }) |
136 | ||
137 | return { | |
138 | '_type': 'playlist', | |
139 | 'id': album_id, | |
140 | 'title': album_title, | |
141 | 'uploader': album_uploader, | |
142 | 'thumbnail': album_thumbnail, | |
143 | 'entries': entries, | |
4d3c8222 | 144 | } |