]>
Commit | Line | Data |
---|---|---|
146323a7 PH |
1 | from .common import InfoExtractor |
2 | ||
3 | ||
4 | class GrouponIE(InfoExtractor): | |
f7199423 | 5 | _VALID_URL = r'https?://(?:www\.)?groupon\.com/deals/(?P<id>[^/?#&]+)' |
146323a7 PH |
6 | |
7 | _TEST = { | |
8 | 'url': 'https://www.groupon.com/deals/bikram-yoga-huntington-beach-2#ooid=tubGNycTo_9Uxg82uESj4i61EYX8nyuf', | |
9 | 'info_dict': { | |
10 | 'id': 'bikram-yoga-huntington-beach-2', | |
11 | 'title': '$49 for 10 Yoga Classes or One Month of Unlimited Classes at Bikram Yoga Huntington Beach ($180 Value)', | |
12 | 'description': 'Studio kept at 105 degrees and 40% humidity with anti-microbial and anti-slip Flotex flooring; certified instructors', | |
13 | }, | |
14 | 'playlist': [{ | |
277c7465 | 15 | 'md5': '42428ce8a00585f9bc36e49226eae7a1', |
146323a7 | 16 | 'info_dict': { |
f7199423 S |
17 | 'id': 'fk6OhWpXgIQ', |
18 | 'ext': 'mp4', | |
19 | 'title': 'Bikram Yoga Huntington Beach | Orange County !tubGNycTo@9Uxg82uESj4i61EYX8nyuf', | |
cce9d15d | 20 | 'description': 'md5:d41d8cd98f00b204e9800998ecf8427e', |
f7199423 S |
21 | 'duration': 45, |
22 | 'upload_date': '20160405', | |
23 | 'uploader_id': 'groupon', | |
24 | 'uploader': 'Groupon', | |
146323a7 | 25 | }, |
277c7465 | 26 | 'add_ie': ['Youtube'], |
688c634b | 27 | }], |
28 | 'params': { | |
29 | 'skip_download': True, | |
30 | }, | |
146323a7 PH |
31 | } |
32 | ||
f7199423 S |
33 | _PROVIDERS = { |
34 | 'ooyala': ('ooyala:%s', 'Ooyala'), | |
35 | 'youtube': ('%s', 'Youtube'), | |
36 | } | |
37 | ||
146323a7 PH |
38 | def _real_extract(self, url): |
39 | playlist_id = self._match_id(url) | |
40 | webpage = self._download_webpage(url, playlist_id) | |
41 | ||
42 | payload = self._parse_json(self._search_regex( | |
c77a0c01 | 43 | r'(?:var\s+|window\.)payload\s*=\s*(.*?);\n', webpage, 'payload'), playlist_id) |
146323a7 PH |
44 | videos = payload['carousel'].get('dealVideos', []) |
45 | entries = [] | |
46 | for v in videos: | |
f7199423 S |
47 | provider = v.get('provider') |
48 | video_id = v.get('media') or v.get('id') or v.get('baseURL') | |
49 | if not provider or not video_id: | |
50 | continue | |
51 | url_pattern, ie_key = self._PROVIDERS.get(provider.lower()) | |
52 | if not url_pattern: | |
146323a7 PH |
53 | self.report_warning( |
54 | '%s: Unsupported video provider %s, skipping video' % | |
f7199423 | 55 | (playlist_id, provider)) |
146323a7 | 56 | continue |
f7199423 | 57 | entries.append(self.url_result(url_pattern % video_id, ie_key)) |
146323a7 PH |
58 | |
59 | return { | |
60 | '_type': 'playlist', | |
61 | 'id': playlist_id, | |
62 | 'entries': entries, | |
63 | 'title': self._og_search_title(webpage), | |
64 | 'description': self._og_search_description(webpage), | |
65 | } |