]>
Commit | Line | Data |
---|---|---|
c6ddbdb6 D |
1 | from __future__ import unicode_literals |
2 | ||
28ebef0b D |
3 | import re |
4 | ||
c6ddbdb6 | 5 | from .common import InfoExtractor |
5c2266df | 6 | from ..compat import compat_urlparse |
a6762c4a S |
7 | from ..utils import ( |
8 | ExtractorError, | |
9 | determine_ext, | |
10 | int_or_none, | |
5c2266df | 11 | sanitized_Request, |
a6762c4a | 12 | ) |
c6ddbdb6 D |
13 | |
14 | ||
15 | class VoiceRepublicIE(InfoExtractor): | |
a6762c4a S |
16 | _VALID_URL = r'https?://voicerepublic\.com/(?:talks|embed)/(?P<id>[0-9a-z-]+)' |
17 | _TESTS = [{ | |
18 | 'url': 'http://voicerepublic.com/talks/watching-the-watchers-building-a-sousveillance-state', | |
c6ddbdb6 D |
19 | 'md5': '0554a24d1657915aa8e8f84e15dc9353', |
20 | 'info_dict': { | |
21 | 'id': '2296', | |
a6762c4a | 22 | 'display_id': 'watching-the-watchers-building-a-sousveillance-state', |
c6ddbdb6 D |
23 | 'ext': 'm4a', |
24 | 'title': 'Watching the Watchers: Building a Sousveillance State', | |
c6ddbdb6 | 25 | 'description': 'md5:715ba964958afa2398df615809cfecb1', |
a6762c4a S |
26 | 'thumbnail': 're:^https?://.*\.(?:png|jpg)$', |
27 | 'duration': 1800, | |
28 | 'view_count': int, | |
c6ddbdb6 | 29 | } |
a6762c4a S |
30 | }, { |
31 | 'url': 'http://voicerepublic.com/embed/watching-the-watchers-building-a-sousveillance-state', | |
32 | 'only_matching': True, | |
33 | }] | |
c6ddbdb6 D |
34 | |
35 | def _real_extract(self, url): | |
36 | display_id = self._match_id(url) | |
a6762c4a | 37 | |
5c2266df | 38 | req = sanitized_Request( |
a6762c4a | 39 | compat_urlparse.urljoin(url, '/talks/%s' % display_id)) |
c6ddbdb6 D |
40 | # Older versions of Firefox get redirected to an "upgrade browser" page |
41 | req.add_header('User-Agent', 'youtube-dl') | |
42 | webpage = self._download_webpage(req, display_id) | |
c6ddbdb6 | 43 | |
a6762c4a S |
44 | if '>Queued for processing, please stand by...<' in webpage: |
45 | raise ExtractorError( | |
46 | 'Audio is still queued for processing', expected=True) | |
f03a8a3c | 47 | |
370b39e8 S |
48 | config = self._search_regex( |
49 | r'(?s)return ({.+?});\s*\n', webpage, | |
50 | 'data', default=None) | |
51 | data = self._parse_json(config, display_id, fatal=False) if config else None | |
a6762c4a S |
52 | if data: |
53 | title = data['title'] | |
54 | description = data.get('teaser') | |
55 | talk_id = data.get('talk_id') or display_id | |
56 | talk = data['talk'] | |
57 | duration = int_or_none(talk.get('duration')) | |
58 | formats = [{ | |
59 | 'url': compat_urlparse.urljoin(url, talk_url), | |
60 | 'format_id': format_id, | |
61 | 'ext': determine_ext(talk_url) or format_id, | |
62 | 'vcodec': 'none', | |
63 | } for format_id, talk_url in talk['links'].items()] | |
64 | else: | |
65 | title = self._og_search_title(webpage) | |
66 | description = self._html_search_regex( | |
67 | r"(?s)<div class='talk-teaser'[^>]*>(.+?)</div>", | |
68 | webpage, 'description', fatal=False) | |
69 | talk_id = self._search_regex( | |
70 | [r"id='jc-(\d+)'", r"data-shareable-id='(\d+)'"], | |
71 | webpage, 'talk id', default=None) or display_id | |
72 | duration = None | |
370b39e8 S |
73 | player = self._search_regex( |
74 | r"class='vr-player jp-jplayer'([^>]+)>", webpage, 'player') | |
a6762c4a S |
75 | formats = [{ |
76 | 'url': compat_urlparse.urljoin(url, talk_url), | |
77 | 'format_id': format_id, | |
78 | 'ext': determine_ext(talk_url) or format_id, | |
79 | 'vcodec': 'none', | |
370b39e8 | 80 | } for format_id, talk_url in re.findall(r"data-([^=]+)='([^']+)'", player)] |
f03a8a3c | 81 | self._sort_formats(formats) |
c6ddbdb6 | 82 | |
a6762c4a S |
83 | thumbnail = self._og_search_thumbnail(webpage) |
84 | view_count = int_or_none(self._search_regex( | |
85 | r"class='play-count[^']*'>\s*(\d+) plays", | |
86 | webpage, 'play count', fatal=False)) | |
87 | ||
c6ddbdb6 | 88 | return { |
a6762c4a S |
89 | 'id': talk_id, |
90 | 'display_id': display_id, | |
91 | 'title': title, | |
92 | 'description': description, | |
c6ddbdb6 | 93 | 'thumbnail': thumbnail, |
a6762c4a S |
94 | 'duration': duration, |
95 | 'view_count': view_count, | |
96 | 'formats': formats, | |
c6ddbdb6 | 97 | } |