]>
Commit | Line | Data |
---|---|---|
b4a186b7 JMF |
1 | from __future__ import unicode_literals |
2 | ||
28ef06f7 | 3 | import re |
4 | ||
5 | from .common import InfoExtractor | |
6 | from ..utils import ( | |
7 | ExtractorError, | |
b4a186b7 | 8 | RegexNotFoundError, |
28ef06f7 | 9 | unescapeHTML, |
10 | ) | |
11 | ||
b4a186b7 | 12 | |
28ef06f7 | 13 | class JukeboxIE(InfoExtractor): |
937f935d | 14 | _VALID_URL = r'^http://www\.jukebox?\..+?\/.+[,](?P<id>[a-z0-9\-]+)\.html' |
b4a186b7 JMF |
15 | _TEST = { |
16 | 'url': 'http://www.jukebox.es/kosheen/videoclip,pride,r303r.html', | |
b4a186b7 JMF |
17 | 'info_dict': { |
18 | 'id': 'r303r', | |
19 | 'ext': 'flv', | |
20 | 'title': 'Kosheen-En Vivo Pride', | |
21 | 'uploader': 'Kosheen', | |
22 | }, | |
23 | } | |
28ef06f7 | 24 | |
25 | def _real_extract(self, url): | |
937f935d | 26 | video_id = self._match_id(url) |
28ef06f7 | 27 | |
28 | html = self._download_webpage(url, video_id) | |
b4a186b7 | 29 | iframe_url = unescapeHTML(self._search_regex(r'<iframe .*src="([^"]*)"', html, 'iframe url')) |
28ef06f7 | 30 | |
31 | iframe_html = self._download_webpage(iframe_url, video_id, 'Downloading iframe') | |
b4a186b7 JMF |
32 | if re.search(r'class="jkb_waiting"', iframe_html) is not None: |
33 | raise ExtractorError('Video is not available(in your country?)!') | |
28ef06f7 | 34 | |
35 | self.report_extraction(video_id) | |
36 | ||
b4a186b7 JMF |
37 | try: |
38 | video_url = self._search_regex(r'"config":{"file":"(?P<video_url>http:[^"]+\?mdtk=[0-9]+)"', | |
9e1a5b84 | 39 | iframe_html, 'video url') |
b4a186b7 JMF |
40 | video_url = unescapeHTML(video_url).replace('\/', '/') |
41 | except RegexNotFoundError: | |
42 | youtube_url = self._search_regex( | |
43 | r'config":{"file":"(http:\\/\\/www\.youtube\.com\\/watch\?v=[^"]+)"', | |
44 | iframe_html, 'youtube url') | |
45 | youtube_url = unescapeHTML(youtube_url).replace('\/', '/') | |
46 | self.to_screen('Youtube video detected') | |
47 | return self.url_result(youtube_url, ie='Youtube') | |
48 | ||
49 | title = self._html_search_regex(r'<h1 class="inline">([^<]+)</h1>', | |
9e1a5b84 | 50 | html, 'title') |
b4a186b7 | 51 | artist = self._html_search_regex(r'<span id="infos_article_artist">([^<]+)</span>', |
9e1a5b84 | 52 | html, 'artist') |
b4a186b7 JMF |
53 | |
54 | return { | |
55 | 'id': video_id, | |
56 | 'url': video_url, | |
57 | 'title': artist + '-' + title, | |
58 | 'uploader': artist, | |
59 | } |