]>
Commit | Line | Data |
---|---|---|
423817c4 PH |
1 | from __future__ import unicode_literals |
2 | ||
3 | import re | |
4 | ||
5 | from .common import InfoExtractor | |
6 | from ..utils import ( | |
7 | int_or_none, | |
8 | unified_strdate, | |
9 | ) | |
10 | ||
11 | ||
12 | class ExpoTVIE(InfoExtractor): | |
13 | _VALID_URL = r'https?://www\.expotv\.com/videos/[^?#]*/(?P<id>[0-9]+)($|[?#])' | |
14 | _TEST = { | |
15 | 'url': 'http://www.expotv.com/videos/reviews/1/24/LinneCardscom/17561', | |
16 | 'md5': '2985e6d7a392b2f7a05e0ca350fe41d0', | |
17 | 'info_dict': { | |
18 | 'id': '17561', | |
19 | 'ext': 'mp4', | |
20 | 'upload_date': '20060212', | |
21 | 'title': 'My Favorite Online Scrapbook Store', | |
22 | 'view_count': int, | |
23 | 'description': 'You\'ll find most everything you need at this virtual store front.', | |
24 | 'uploader': 'Anna T.', | |
25 | 'thumbnail': 're:^https?://.*\.jpg$', | |
26 | } | |
27 | } | |
28 | ||
29 | def _real_extract(self, url): | |
30 | mobj = re.match(self._VALID_URL, url) | |
31 | video_id = mobj.group('id') | |
32 | ||
33 | webpage = self._download_webpage(url, video_id) | |
34 | player_key = self._search_regex( | |
35 | r'<param name="playerKey" value="([^"]+)"', webpage, 'player key') | |
423817c4 | 36 | config = self._download_json( |
7d49502a S |
37 | 'http://client.expotv.com/video/config/%s/%s' % (video_id, player_key), |
38 | video_id, 'Downloading video configuration') | |
423817c4 | 39 | |
03e3b4e1 | 40 | formats = [] |
41 | for fcfg in config['sources']: | |
7d49502a S |
42 | media_url = fcfg.get('file') |
43 | if not media_url: | |
44 | continue | |
45 | if fcfg.get('type') == 'm3u8': | |
46 | formats.extend(self._extract_m3u8_formats( | |
47 | media_url, video_id, 'mp4', entry_protocol='m3u8_native', m3u8_id='hls')) | |
03e3b4e1 | 48 | else: |
49 | formats.append({ | |
7d49502a | 50 | 'url': media_url, |
03e3b4e1 | 51 | 'height': int_or_none(fcfg.get('height')), |
52 | 'format_id': fcfg.get('label'), | |
53 | 'ext': self._search_regex( | |
7d49502a S |
54 | r'filename=.*\.([a-z0-9_A-Z]+)&', media_url, |
55 | 'file extension', default=None) or fcfg.get('type'), | |
03e3b4e1 | 56 | }) |
423817c4 PH |
57 | self._sort_formats(formats) |
58 | ||
59 | title = self._og_search_title(webpage) | |
60 | description = self._og_search_description(webpage) | |
61 | thumbnail = config.get('image') | |
62 | view_count = int_or_none(self._search_regex( | |
63 | r'<h5>Plays: ([0-9]+)</h5>', webpage, 'view counts')) | |
64 | uploader = self._search_regex( | |
65 | r'<div class="reviewer">\s*<img alt="([^"]+)"', webpage, 'uploader', | |
66 | fatal=False) | |
67 | upload_date = unified_strdate(self._search_regex( | |
68 | r'<h5>Reviewed on ([0-9/.]+)</h5>', webpage, 'upload date', | |
69 | fatal=False)) | |
70 | ||
71 | return { | |
72 | 'id': video_id, | |
73 | 'formats': formats, | |
74 | 'title': title, | |
75 | 'description': description, | |
76 | 'view_count': view_count, | |
77 | 'thumbnail': thumbnail, | |
78 | 'uploader': uploader, | |
79 | 'upload_date': upload_date, | |
80 | } |