]>
Commit | Line | Data |
---|---|---|
423817c4 PH |
1 | from __future__ import unicode_literals |
2 | ||
3 | import re | |
4 | ||
5 | from .common import InfoExtractor | |
6 | from ..utils import ( | |
7 | int_or_none, | |
8 | unified_strdate, | |
9 | ) | |
10 | ||
11 | ||
12 | class ExpoTVIE(InfoExtractor): | |
13 | _VALID_URL = r'https?://www\.expotv\.com/videos/[^?#]*/(?P<id>[0-9]+)($|[?#])' | |
14 | _TEST = { | |
15 | 'url': 'http://www.expotv.com/videos/reviews/1/24/LinneCardscom/17561', | |
16 | 'md5': '2985e6d7a392b2f7a05e0ca350fe41d0', | |
17 | 'info_dict': { | |
18 | 'id': '17561', | |
19 | 'ext': 'mp4', | |
20 | 'upload_date': '20060212', | |
21 | 'title': 'My Favorite Online Scrapbook Store', | |
22 | 'view_count': int, | |
23 | 'description': 'You\'ll find most everything you need at this virtual store front.', | |
24 | 'uploader': 'Anna T.', | |
25 | 'thumbnail': 're:^https?://.*\.jpg$', | |
26 | } | |
27 | } | |
28 | ||
29 | def _real_extract(self, url): | |
30 | mobj = re.match(self._VALID_URL, url) | |
31 | video_id = mobj.group('id') | |
32 | ||
33 | webpage = self._download_webpage(url, video_id) | |
34 | player_key = self._search_regex( | |
35 | r'<param name="playerKey" value="([^"]+)"', webpage, 'player key') | |
36 | config_url = 'http://client.expotv.com/video/config/%s/%s' % ( | |
37 | video_id, player_key) | |
38 | config = self._download_json( | |
39 | config_url, video_id, | |
40 | note='Downloading video configuration') | |
41 | ||
42 | formats = [{ | |
43 | 'url': fcfg['file'], | |
44 | 'height': int_or_none(fcfg.get('height')), | |
45 | 'format_note': fcfg.get('label'), | |
46 | 'ext': self._search_regex( | |
47 | r'filename=.*\.([a-z0-9_A-Z]+)&', fcfg['file'], | |
48 | 'file extension', default=None), | |
49 | } for fcfg in config['sources']] | |
50 | self._sort_formats(formats) | |
51 | ||
52 | title = self._og_search_title(webpage) | |
53 | description = self._og_search_description(webpage) | |
54 | thumbnail = config.get('image') | |
55 | view_count = int_or_none(self._search_regex( | |
56 | r'<h5>Plays: ([0-9]+)</h5>', webpage, 'view counts')) | |
57 | uploader = self._search_regex( | |
58 | r'<div class="reviewer">\s*<img alt="([^"]+)"', webpage, 'uploader', | |
59 | fatal=False) | |
60 | upload_date = unified_strdate(self._search_regex( | |
61 | r'<h5>Reviewed on ([0-9/.]+)</h5>', webpage, 'upload date', | |
62 | fatal=False)) | |
63 | ||
64 | return { | |
65 | 'id': video_id, | |
66 | 'formats': formats, | |
67 | 'title': title, | |
68 | 'description': description, | |
69 | 'view_count': view_count, | |
70 | 'thumbnail': thumbnail, | |
71 | 'uploader': uploader, | |
72 | 'upload_date': upload_date, | |
73 | } |