]>
Commit | Line | Data |
---|---|---|
1 | import base64 | |
2 | import re | |
3 | import urllib.parse | |
4 | ||
5 | from .common import InfoExtractor | |
6 | ||
7 | ||
8 | class BigflixIE(InfoExtractor): | |
9 | _VALID_URL = r'https?://(?:www\.)?bigflix\.com/.+/(?P<id>[0-9]+)' | |
10 | _TESTS = [{ | |
11 | # 2 formats | |
12 | 'url': 'http://www.bigflix.com/Tamil-movies/Drama-movies/Madarasapatinam/16070', | |
13 | 'info_dict': { | |
14 | 'id': '16070', | |
15 | 'ext': 'mp4', | |
16 | 'title': 'Madarasapatinam', | |
17 | 'description': 'md5:9f0470b26a4ba8e824c823b5d95c2f6b', | |
18 | 'formats': 'mincount:2', | |
19 | }, | |
20 | 'params': { | |
21 | 'skip_download': True, | |
22 | }, | |
23 | }, { | |
24 | # multiple formats | |
25 | 'url': 'http://www.bigflix.com/Malayalam-movies/Drama-movies/Indian-Rupee/15967', | |
26 | 'only_matching': True, | |
27 | }] | |
28 | ||
29 | def _real_extract(self, url): | |
30 | video_id = self._match_id(url) | |
31 | ||
32 | webpage = self._download_webpage(url, video_id) | |
33 | ||
34 | title = self._html_search_regex( | |
35 | r'<div[^>]+class=["\']pagetitle["\'][^>]*>(.+?)</div>', | |
36 | webpage, 'title') | |
37 | ||
38 | def decode_url(quoted_b64_url): | |
39 | return base64.b64decode(urllib.parse.unquote( | |
40 | quoted_b64_url)).decode('utf-8') | |
41 | ||
42 | formats = [] | |
43 | for height, encoded_url in re.findall( | |
44 | r'ContentURL_(\d{3,4})[pP][^=]+=([^&]+)', webpage): | |
45 | video_url = decode_url(encoded_url) | |
46 | f = { | |
47 | 'url': video_url, | |
48 | 'format_id': f'{height}p', | |
49 | 'height': int(height), | |
50 | } | |
51 | if video_url.startswith('rtmp'): | |
52 | f['ext'] = 'flv' | |
53 | formats.append(f) | |
54 | ||
55 | file_url = self._search_regex( | |
56 | r'file=([^&]+)', webpage, 'video url', default=None) | |
57 | if file_url: | |
58 | video_url = decode_url(file_url) | |
59 | if all(f['url'] != video_url for f in formats): | |
60 | formats.append({ | |
61 | 'url': decode_url(file_url), | |
62 | }) | |
63 | ||
64 | description = self._html_search_meta('description', webpage) | |
65 | ||
66 | return { | |
67 | 'id': video_id, | |
68 | 'title': title, | |
69 | 'description': description, | |
70 | 'formats': formats, | |
71 | } |