]>
Commit | Line | Data |
---|---|---|
7a5e7b30 S |
1 | from __future__ import unicode_literals |
2 | ||
3 | import re | |
4 | ||
5 | from .common import InfoExtractor | |
8604e882 PH |
6 | from ..utils import ( |
7 | int_or_none, | |
8 | qualities, | |
9 | ) | |
7a5e7b30 S |
10 | |
11 | ||
12 | class UbuIE(InfoExtractor): | |
13 | _VALID_URL = r'http://(?:www\.)?ubu\.com/film/(?P<id>[\da-z_-]+)\.html' | |
14 | _TEST = { | |
15 | 'url': 'http://ubu.com/film/her_noise.html', | |
8604e882 | 16 | 'md5': '138d5652618bf0f03878978db9bef1ee', |
7a5e7b30 S |
17 | 'info_dict': { |
18 | 'id': 'her_noise', | |
8604e882 | 19 | 'ext': 'm4v', |
7a5e7b30 S |
20 | 'title': 'Her Noise - The Making Of (2007)', |
21 | 'duration': 3600, | |
22 | }, | |
23 | } | |
24 | ||
25 | def _real_extract(self, url): | |
8604e882 | 26 | video_id = self._match_id(url) |
7a5e7b30 S |
27 | webpage = self._download_webpage(url, video_id) |
28 | ||
29 | title = self._html_search_regex( | |
30 | r'<title>.+?Film & Video: ([^<]+)</title>', webpage, 'title') | |
31 | ||
32 | duration = int_or_none(self._html_search_regex( | |
8604e882 PH |
33 | r'Duration: (\d+) minutes', webpage, 'duration', fatal=False), |
34 | invscale=60) | |
7a5e7b30 S |
35 | |
36 | formats = [] | |
7a5e7b30 | 37 | FORMAT_REGEXES = [ |
8604e882 PH |
38 | ('sq', r"'flashvars'\s*,\s*'file=([^']+)'"), |
39 | ('hq', r'href="(http://ubumexico\.centro\.org\.mx/video/[^"]+)"'), | |
7a5e7b30 | 40 | ] |
8604e882 | 41 | preference = qualities([fid for fid, _ in FORMAT_REGEXES]) |
7a5e7b30 S |
42 | for format_id, format_regex in FORMAT_REGEXES: |
43 | m = re.search(format_regex, webpage) | |
44 | if m: | |
45 | formats.append({ | |
46 | 'url': m.group(1), | |
47 | 'format_id': format_id, | |
8604e882 | 48 | 'preference': preference(format_id), |
7a5e7b30 | 49 | }) |
8604e882 | 50 | self._sort_formats(formats) |
7a5e7b30 S |
51 | |
52 | return { | |
53 | 'id': video_id, | |
54 | 'title': title, | |
55 | 'duration': duration, | |
56 | 'formats': formats, | |
57 | } |