]>
Commit | Line | Data |
---|---|---|
b1742275 T |
1 | # coding: utf-8 |
2 | from __future__ import unicode_literals | |
3 | ||
4 | import re | |
b1742275 T |
5 | |
6 | from ..compat import compat_urlparse | |
7 | from .common import InfoExtractor | |
3cbecdd1 S |
8 | from ..utils import parse_duration |
9 | ||
10 | ||
11 | class JamendoBaseIE(InfoExtractor): | |
12 | def _extract_meta(self, webpage, fatal=True): | |
13 | title = self._og_search_title( | |
14 | webpage, default=None) or self._search_regex( | |
15 | r'<title>([^<]+)', webpage, | |
16 | 'title', default=None) | |
17 | if title: | |
18 | title = self._search_regex( | |
19 | r'(.+?)\s*\|\s*Jamendo Music', title, 'title', default=None) | |
20 | if not title: | |
21 | title = self._html_search_meta( | |
22 | 'name', webpage, 'title', fatal=fatal) | |
23 | mobj = re.search(r'(.+) - (.+)', title or '') | |
24 | artist, second = mobj.groups() if mobj else [None] * 2 | |
25 | return title, artist, second | |
26 | ||
27 | ||
28 | class JamendoIE(JamendoBaseIE): | |
d3b6b3b9 | 29 | _VALID_URL = r'https?://(?:www\.)?jamendo\.com/track/(?P<id>[0-9]+)/(?P<display_id>[^/?#&]+)' |
b1742275 T |
30 | _TEST = { |
31 | 'url': 'https://www.jamendo.com/track/196219/stories-from-emona-i', | |
32 | 'md5': '6e9e82ed6db98678f171c25a8ed09ffd', | |
33 | 'info_dict': { | |
34 | 'id': '196219', | |
35 | 'display_id': 'stories-from-emona-i', | |
36 | 'ext': 'flac', | |
c19ef77c | 37 | 'title': 'Maya Filipič - Stories from Emona I', |
3cbecdd1 S |
38 | 'artist': 'Maya Filipič', |
39 | 'track': 'Stories from Emona I', | |
40 | 'duration': 210, | |
ec85ded8 | 41 | 'thumbnail': r're:^https?://.*\.jpg' |
b1742275 T |
42 | } |
43 | } | |
44 | ||
45 | def _real_extract(self, url): | |
d3b6b3b9 S |
46 | mobj = self._VALID_URL_RE.match(url) |
47 | track_id = mobj.group('id') | |
48 | display_id = mobj.group('display_id') | |
49 | ||
b1742275 T |
50 | webpage = self._download_webpage(url, display_id) |
51 | ||
3cbecdd1 | 52 | title, artist, track = self._extract_meta(webpage) |
d3b6b3b9 S |
53 | |
54 | formats = [{ | |
55 | 'url': 'https://%s.jamendo.com/?trackid=%s&format=%s&from=app-97dab294' | |
56 | % (sub_domain, track_id, format_id), | |
57 | 'format_id': format_id, | |
58 | 'ext': ext, | |
59 | 'quality': quality, | |
60 | } for quality, (format_id, sub_domain, ext) in enumerate(( | |
61 | ('mp31', 'mp3l', 'mp3'), | |
62 | ('mp32', 'mp3d', 'mp3'), | |
63 | ('ogg1', 'ogg', 'ogg'), | |
64 | ('flac', 'flac', 'flac'), | |
65 | ))] | |
66 | self._sort_formats(formats) | |
67 | ||
b1742275 T |
68 | thumbnail = self._html_search_meta( |
69 | 'image', webpage, 'thumbnail', fatal=False) | |
3cbecdd1 S |
70 | duration = parse_duration(self._search_regex( |
71 | r'<span[^>]+itemprop=["\']duration["\'][^>]+content=["\'](.+?)["\']', | |
72 | webpage, 'duration', fatal=False)) | |
b1742275 | 73 | |
b1742275 T |
74 | return { |
75 | 'id': track_id, | |
76 | 'display_id': display_id, | |
77 | 'thumbnail': thumbnail, | |
78 | 'title': title, | |
3cbecdd1 S |
79 | 'duration': duration, |
80 | 'artist': artist, | |
81 | 'track': track, | |
b1742275 T |
82 | 'formats': formats |
83 | } | |
84 | ||
85 | ||
3cbecdd1 | 86 | class JamendoAlbumIE(JamendoBaseIE): |
b1742275 T |
87 | _VALID_URL = r'https?://(?:www\.)?jamendo\.com/album/(?P<id>[0-9]+)/(?P<display_id>[\w-]+)' |
88 | _TEST = { | |
89 | 'url': 'https://www.jamendo.com/album/121486/duck-on-cover', | |
90 | 'info_dict': { | |
91 | 'id': '121486', | |
c19ef77c | 92 | 'title': 'Shearer - Duck On Cover' |
b1742275 | 93 | }, |
d3b6b3b9 S |
94 | 'playlist': [{ |
95 | 'md5': 'e1a2fcb42bda30dfac990212924149a8', | |
96 | 'info_dict': { | |
97 | 'id': '1032333', | |
98 | 'ext': 'flac', | |
3cbecdd1 S |
99 | 'title': 'Shearer - Warmachine', |
100 | 'artist': 'Shearer', | |
101 | 'track': 'Warmachine', | |
d3b6b3b9 S |
102 | } |
103 | }, { | |
104 | 'md5': '1f358d7b2f98edfe90fd55dac0799d50', | |
105 | 'info_dict': { | |
106 | 'id': '1032330', | |
107 | 'ext': 'flac', | |
3cbecdd1 S |
108 | 'title': 'Shearer - Without Your Ghost', |
109 | 'artist': 'Shearer', | |
110 | 'track': 'Without Your Ghost', | |
b1742275 | 111 | } |
d3b6b3b9 | 112 | }], |
b1742275 T |
113 | 'params': { |
114 | 'playlistend': 2 | |
115 | } | |
116 | } | |
117 | ||
118 | def _real_extract(self, url): | |
d3b6b3b9 S |
119 | mobj = self._VALID_URL_RE.match(url) |
120 | album_id = mobj.group('id') | |
121 | ||
122 | webpage = self._download_webpage(url, mobj.group('display_id')) | |
b1742275 | 123 | |
3cbecdd1 S |
124 | title, artist, album = self._extract_meta(webpage, fatal=False) |
125 | ||
126 | entries = [{ | |
127 | '_type': 'url_transparent', | |
128 | 'url': compat_urlparse.urljoin(url, m.group('path')), | |
129 | 'ie_key': JamendoIE.ie_key(), | |
130 | 'id': self._search_regex( | |
131 | r'/track/(\d+)', m.group('path'), 'track id', default=None), | |
132 | 'artist': artist, | |
133 | 'album': album, | |
134 | } for m in re.finditer( | |
135 | r'<a[^>]+href=(["\'])(?P<path>(?:(?!\1).)+)\1[^>]+class=["\'][^>]*js-trackrow-albumpage-link', | |
136 | webpage)] | |
d3b6b3b9 S |
137 | |
138 | return self.playlist_result(entries, album_id, title) |