]> jfr.im git - yt-dlp.git/blame - youtube_dl/extractor/deezer.py
[DeezerPlaylist] Update to new website
[yt-dlp.git] / youtube_dl / extractor / deezer.py
CommitLineData
da2e1f53
PH
1from __future__ import unicode_literals
2
3import json
4import re
5
6from .common import InfoExtractor
7from ..utils import (
6539c91a 8 ExtractorError,
da2e1f53
PH
9 int_or_none,
10 orderedSet,
11)
12
13
14class DeezerPlaylistIE(InfoExtractor):
15 _VALID_URL = r'https?://(?:www\.)?deezer\.com/playlist/(?P<id>[0-9]+)'
16 _TEST = {
17 'url': 'http://www.deezer.com/playlist/176747451',
18 'info_dict': {
19 'id': '176747451',
20 'title': 'Best!',
297f60e6
L
21 'uploader': 'anonymous',
22 'thumbnail': r're:^https?://e-cdns-images\.dzcdn\.net/images/cover/.*\.jpg$',
da2e1f53 23 },
297f60e6 24 'playlist_count': 29,
da2e1f53
PH
25 }
26
27 def _real_extract(self, url):
28 if 'test' not in self._downloader.params:
29 self._downloader.report_warning('For now, this extractor only supports the 30 second previews. Patches welcome!')
30
31 mobj = re.match(self._VALID_URL, url)
32 playlist_id = mobj.group('id')
33
34 webpage = self._download_webpage(url, playlist_id)
6539c91a
PH
35 geoblocking_msg = self._html_search_regex(
36 r'<p class="soon-txt">(.*?)</p>', webpage, 'geoblocking message',
37 default=None)
38 if geoblocking_msg is not None:
39 raise ExtractorError(
40 'Deezer said: %s' % geoblocking_msg, expected=True)
41
da2e1f53 42 data_json = self._search_regex(
65150b41
S
43 (r'__DZR_APP_STATE__\s*=\s*({.+?})\s*</script>',
44 r'naboo\.display\(\'[^\']+\',\s*(.*?)\);\n'),
45 webpage, 'data JSON')
da2e1f53
PH
46 data = json.loads(data_json)
47
297f60e6
L
48 playlist_title = data['DATA']['TITLE']
49 playlist_uploader = data['DATA']['PARENT_USERNAME']
da2e1f53
PH
50 playlist_thumbnail = self._search_regex(
51 r'<img id="naboo_playlist_image".*?src="([^"]+)"', webpage,
52 'playlist thumbnail')
53
da2e1f53
PH
54 entries = []
55 for s in data['SONGS']['data']:
da2e1f53
PH
56 formats = [{
57 'format_id': 'preview',
297f60e6 58 'url': s['MEDIA'][0]['HREF'],
da2e1f53
PH
59 'preference': -100, # Only the first 30 seconds
60 'ext': 'mp3',
61 }]
62 self._sort_formats(formats)
63 artists = ', '.join(
64 orderedSet(a['ART_NAME'] for a in s['ARTISTS']))
65 entries.append({
66 'id': s['SNG_ID'],
67 'duration': int_or_none(s.get('DURATION')),
68 'title': '%s - %s' % (artists, s['SNG_TITLE']),
69 'uploader': s['ART_NAME'],
70 'uploader_id': s['ART_ID'],
71 'age_limit': 16 if s.get('EXPLICIT_LYRICS') == '1' else 0,
72 'formats': formats,
73 })
74
75 return {
76 '_type': 'playlist',
77 'id': playlist_id,
78 'title': playlist_title,
79 'uploader': playlist_uploader,
80 'thumbnail': playlist_thumbnail,
81 'entries': entries,
82 }