]>
Commit | Line | Data |
---|---|---|
dcdb292f | 1 | # coding: utf-8 |
efb1bb90 JMF |
2 | from __future__ import unicode_literals |
3 | ||
2563bcc8 | 4 | import re |
2563bcc8 JMF |
5 | |
6 | from .common import InfoExtractor | |
6b820a23 | 7 | from ..utils import ( |
8 | ExtractorError, | |
9 | int_or_none, | |
10 | parse_iso8601, | |
2563bcc8 JMF |
11 | ) |
12 | ||
13 | ||
14 | class MySpaceIE(InfoExtractor): | |
3166b1f0 S |
15 | _VALID_URL = r'''(?x) |
16 | https?:// | |
17 | myspace\.com/[^/]+/ | |
18 | (?P<mediatype> | |
19 | video/[^/]+/(?P<video_id>\d+)| | |
20 | music/song/[^/?#&]+-(?P<song_id>\d+)-\d+(?:[/?#&]|$) | |
21 | ) | |
22 | ''' | |
efb1bb90 | 23 | |
3166b1f0 S |
24 | _TESTS = [{ |
25 | 'url': 'https://myspace.com/fiveminutestothestage/video/little-big-town/109594919', | |
26 | 'md5': '9c1483c106f4a695c47d2911feed50a7', | |
27 | 'info_dict': { | |
28 | 'id': '109594919', | |
29 | 'ext': 'mp4', | |
30 | 'title': 'Little Big Town', | |
31 | 'description': 'This country quartet was all smiles while playing a sold out show at the Pacific Amphitheatre in Orange County, California.', | |
32 | 'uploader': 'Five Minutes to the Stage', | |
33 | 'uploader_id': 'fiveminutestothestage', | |
34 | 'timestamp': 1414108751, | |
35 | 'upload_date': '20141023', | |
2563bcc8 | 36 | }, |
3166b1f0 | 37 | }, { |
a196a532 | 38 | # songs |
3166b1f0 S |
39 | 'url': 'https://myspace.com/killsorrow/music/song/of-weakened-soul...-93388656-103880681', |
40 | 'md5': '1d7ee4604a3da226dd69a123f748b262', | |
41 | 'info_dict': { | |
42 | 'id': '93388656', | |
43 | 'ext': 'm4a', | |
44 | 'title': 'Of weakened soul...', | |
45 | 'uploader': 'Killsorrow', | |
46 | 'uploader_id': 'killsorrow', | |
2563bcc8 | 47 | }, |
3166b1f0 S |
48 | }, { |
49 | 'add_ie': ['Youtube'], | |
50 | 'url': 'https://myspace.com/threedaysgrace/music/song/animal-i-have-become-28400208-28218041', | |
51 | 'info_dict': { | |
52 | 'id': 'xqds0B_meys', | |
53 | 'ext': 'webm', | |
54 | 'title': 'Three Days Grace - Animal I Have Become', | |
55 | 'description': 'md5:8bd86b3693e72a077cf863a8530c54bb', | |
56 | 'uploader': 'ThreeDaysGraceVEVO', | |
57 | 'uploader_id': 'ThreeDaysGraceVEVO', | |
58 | 'upload_date': '20091002', | |
59 | }, | |
60 | }, { | |
61 | 'url': 'https://myspace.com/starset2/music/song/first-light-95799905-106964426', | |
62 | 'only_matching': True, | |
63 | }, { | |
64 | 'url': 'https://myspace.com/thelargemouthbassband/music/song/02-pure-eyes.mp3-94422330-105113388', | |
65 | 'only_matching': True, | |
66 | }] | |
2563bcc8 JMF |
67 | |
68 | def _real_extract(self, url): | |
69 | mobj = re.match(self._VALID_URL, url) | |
3166b1f0 | 70 | video_id = mobj.group('video_id') or mobj.group('song_id') |
f65dba7c | 71 | is_song = mobj.group('mediatype').startswith('music/song') |
2563bcc8 | 72 | webpage = self._download_webpage(url, video_id) |
f2b44a25 | 73 | player_url = self._search_regex( |
f65dba7c | 74 | r'videoSwf":"([^"?]*)', webpage, 'player URL', fatal=False) |
efb1bb90 | 75 | |
f65dba7c RA |
76 | def formats_from_stream_urls(stream_url, hls_stream_url, http_stream_url, width=None, height=None): |
77 | formats = [] | |
78 | vcodec = 'none' if is_song else None | |
79 | if hls_stream_url: | |
80 | formats.append({ | |
81 | 'format_id': 'hls', | |
82 | 'url': hls_stream_url, | |
83 | 'protocol': 'm3u8_native', | |
84 | 'ext': 'm4a' if is_song else 'mp4', | |
85 | 'vcodec': vcodec, | |
86 | }) | |
87 | if stream_url and player_url: | |
88 | rtmp_url, play_path = stream_url.split(';', 1) | |
89 | formats.append({ | |
90 | 'format_id': 'rtmp', | |
91 | 'url': rtmp_url, | |
92 | 'play_path': play_path, | |
93 | 'player_url': player_url, | |
94 | 'protocol': 'rtmp', | |
95 | 'ext': 'flv', | |
96 | 'width': width, | |
97 | 'height': height, | |
98 | 'vcodec': vcodec, | |
99 | }) | |
100 | if http_stream_url: | |
101 | formats.append({ | |
102 | 'format_id': 'http', | |
103 | 'url': http_stream_url, | |
104 | 'width': width, | |
105 | 'height': height, | |
106 | 'vcodec': vcodec, | |
107 | }) | |
108 | return formats | |
6b820a23 | 109 | |
f65dba7c | 110 | if is_song: |
efb1bb90 | 111 | # songs don't store any useful info in the 'context' variable |
1940fadd TF |
112 | song_data = self._search_regex( |
113 | r'''<button.*data-song-id=(["\'])%s\1.*''' % video_id, | |
114 | webpage, 'song_data', default=None, group=0) | |
115 | if song_data is None: | |
954f36f8 JMF |
116 | # some songs in an album are not playable |
117 | self.report_warning( | |
1940fadd TF |
118 | '%s: No downloadable song on this page' % video_id) |
119 | return | |
810fb84d | 120 | |
efb1bb90 | 121 | def search_data(name): |
b66e6998 | 122 | return self._search_regex( |
954f36f8 JMF |
123 | r'''data-%s=([\'"])(?P<data>.*?)\1''' % name, |
124 | song_data, name, default='', group='data') | |
f65dba7c RA |
125 | formats = formats_from_stream_urls( |
126 | search_data('stream-url'), search_data('hls-stream-url'), | |
127 | search_data('http-stream-url')) | |
128 | if not formats: | |
3266f0c6 TF |
129 | vevo_id = search_data('vevo-id') |
130 | youtube_id = search_data('youtube-id') | |
131 | if vevo_id: | |
132 | self.to_screen('Vevo video detected: %s' % vevo_id) | |
133 | return self.url_result('vevo:%s' % vevo_id, ie='Vevo') | |
134 | elif youtube_id: | |
135 | self.to_screen('Youtube video detected: %s' % youtube_id) | |
136 | return self.url_result(youtube_id, ie='Youtube') | |
137 | else: | |
138 | raise ExtractorError( | |
139 | 'Found song but don\'t know how to download it') | |
f65dba7c | 140 | self._sort_formats(formats) |
6b820a23 | 141 | return { |
efb1bb90 JMF |
142 | 'id': video_id, |
143 | 'title': self._og_search_title(webpage), | |
03fd72d9 | 144 | 'uploader': search_data('artist-name'), |
efb1bb90 JMF |
145 | 'uploader_id': search_data('artist-username'), |
146 | 'thumbnail': self._og_search_thumbnail(webpage), | |
6b820a23 | 147 | 'duration': int_or_none(search_data('duration')), |
f65dba7c | 148 | 'formats': formats, |
efb1bb90 JMF |
149 | } |
150 | else: | |
6b820a23 | 151 | video = self._parse_json(self._search_regex( |
152 | r'context = ({.*?});', webpage, 'context'), | |
153 | video_id)['video'] | |
f65dba7c RA |
154 | formats = formats_from_stream_urls( |
155 | video.get('streamUrl'), video.get('hlsStreamUrl'), | |
156 | video.get('mp4StreamUrl'), int_or_none(video.get('width')), | |
157 | int_or_none(video.get('height'))) | |
6b820a23 | 158 | self._sort_formats(formats) |
159 | return { | |
160 | 'id': video_id, | |
efb1bb90 | 161 | 'title': video['title'], |
6b820a23 | 162 | 'description': video.get('description'), |
163 | 'thumbnail': video.get('imageUrl'), | |
164 | 'uploader': video.get('artistName'), | |
165 | 'uploader_id': video.get('artistUsername'), | |
166 | 'duration': int_or_none(video.get('duration')), | |
167 | 'timestamp': parse_iso8601(video.get('dateAdded')), | |
168 | 'formats': formats, | |
efb1bb90 JMF |
169 | } |
170 | ||
95c673a1 TF |
171 | |
172 | class MySpaceAlbumIE(InfoExtractor): | |
173 | IE_NAME = 'MySpace:album' | |
174 | _VALID_URL = r'https?://myspace\.com/([^/]+)/music/album/(?P<title>.*-)(?P<id>\d+)' | |
175 | ||
176 | _TESTS = [{ | |
177 | 'url': 'https://myspace.com/starset2/music/album/transmissions-19455773', | |
178 | 'info_dict': { | |
179 | 'title': 'Transmissions', | |
180 | 'id': '19455773', | |
181 | }, | |
182 | 'playlist_count': 14, | |
183 | 'skip': 'this album is only available in some countries', | |
184 | }, { | |
185 | 'url': 'https://myspace.com/killsorrow/music/album/the-demo-18596029', | |
186 | 'info_dict': { | |
187 | 'title': 'The Demo', | |
188 | 'id': '18596029', | |
189 | }, | |
190 | 'playlist_count': 5, | |
191 | }] | |
192 | ||
193 | def _real_extract(self, url): | |
194 | mobj = re.match(self._VALID_URL, url) | |
195 | playlist_id = mobj.group('id') | |
196 | display_id = mobj.group('title') + playlist_id | |
197 | webpage = self._download_webpage(url, display_id) | |
198 | tracks_paths = re.findall(r'"music:song" content="(.*?)"', webpage) | |
199 | if not tracks_paths: | |
954f36f8 JMF |
200 | raise ExtractorError( |
201 | '%s: No songs found, try using proxy' % display_id, | |
202 | expected=True) | |
95c673a1 TF |
203 | entries = [ |
204 | self.url_result(t_path, ie=MySpaceIE.ie_key()) | |
205 | for t_path in tracks_paths] | |
95c673a1 TF |
206 | return { |
207 | '_type': 'playlist', | |
208 | 'id': playlist_id, | |
209 | 'display_id': display_id, | |
954f36f8 | 210 | 'title': self._og_search_title(webpage), |
95c673a1 TF |
211 | 'entries': entries, |
212 | } |