]>
Commit | Line | Data |
---|---|---|
c6ddbdb6 D |
1 | from __future__ import unicode_literals |
2 | ||
28ebef0b D |
3 | import re |
4 | ||
c6ddbdb6 | 5 | from .common import InfoExtractor |
c16f8a46 YCH |
6 | from ..compat import ( |
7 | compat_str, | |
8 | compat_urlparse, | |
9 | ) | |
a6762c4a S |
10 | from ..utils import ( |
11 | ExtractorError, | |
12 | determine_ext, | |
13 | int_or_none, | |
5c2266df | 14 | sanitized_Request, |
a6762c4a | 15 | ) |
c6ddbdb6 D |
16 | |
17 | ||
18 | class VoiceRepublicIE(InfoExtractor): | |
a6762c4a S |
19 | _VALID_URL = r'https?://voicerepublic\.com/(?:talks|embed)/(?P<id>[0-9a-z-]+)' |
20 | _TESTS = [{ | |
21 | 'url': 'http://voicerepublic.com/talks/watching-the-watchers-building-a-sousveillance-state', | |
c16f8a46 | 22 | 'md5': 'b9174d651323f17783000876347116e3', |
c6ddbdb6 D |
23 | 'info_dict': { |
24 | 'id': '2296', | |
a6762c4a | 25 | 'display_id': 'watching-the-watchers-building-a-sousveillance-state', |
c6ddbdb6 D |
26 | 'ext': 'm4a', |
27 | 'title': 'Watching the Watchers: Building a Sousveillance State', | |
c16f8a46 | 28 | 'description': 'Secret surveillance programs have metadata too. The people and companies that operate secret surveillance programs can be surveilled.', |
a6762c4a S |
29 | 'thumbnail': 're:^https?://.*\.(?:png|jpg)$', |
30 | 'duration': 1800, | |
31 | 'view_count': int, | |
c6ddbdb6 | 32 | } |
a6762c4a S |
33 | }, { |
34 | 'url': 'http://voicerepublic.com/embed/watching-the-watchers-building-a-sousveillance-state', | |
35 | 'only_matching': True, | |
36 | }] | |
c6ddbdb6 D |
37 | |
38 | def _real_extract(self, url): | |
39 | display_id = self._match_id(url) | |
a6762c4a | 40 | |
5c2266df | 41 | req = sanitized_Request( |
a6762c4a | 42 | compat_urlparse.urljoin(url, '/talks/%s' % display_id)) |
c6ddbdb6 D |
43 | # Older versions of Firefox get redirected to an "upgrade browser" page |
44 | req.add_header('User-Agent', 'youtube-dl') | |
45 | webpage = self._download_webpage(req, display_id) | |
c6ddbdb6 | 46 | |
a6762c4a S |
47 | if '>Queued for processing, please stand by...<' in webpage: |
48 | raise ExtractorError( | |
49 | 'Audio is still queued for processing', expected=True) | |
f03a8a3c | 50 | |
370b39e8 S |
51 | config = self._search_regex( |
52 | r'(?s)return ({.+?});\s*\n', webpage, | |
53 | 'data', default=None) | |
54 | data = self._parse_json(config, display_id, fatal=False) if config else None | |
a6762c4a S |
55 | if data: |
56 | title = data['title'] | |
57 | description = data.get('teaser') | |
c16f8a46 | 58 | talk_id = compat_str(data.get('talk_id') or display_id) |
a6762c4a S |
59 | talk = data['talk'] |
60 | duration = int_or_none(talk.get('duration')) | |
61 | formats = [{ | |
62 | 'url': compat_urlparse.urljoin(url, talk_url), | |
63 | 'format_id': format_id, | |
64 | 'ext': determine_ext(talk_url) or format_id, | |
65 | 'vcodec': 'none', | |
66 | } for format_id, talk_url in talk['links'].items()] | |
67 | else: | |
68 | title = self._og_search_title(webpage) | |
69 | description = self._html_search_regex( | |
70 | r"(?s)<div class='talk-teaser'[^>]*>(.+?)</div>", | |
71 | webpage, 'description', fatal=False) | |
72 | talk_id = self._search_regex( | |
73 | [r"id='jc-(\d+)'", r"data-shareable-id='(\d+)'"], | |
74 | webpage, 'talk id', default=None) or display_id | |
75 | duration = None | |
370b39e8 S |
76 | player = self._search_regex( |
77 | r"class='vr-player jp-jplayer'([^>]+)>", webpage, 'player') | |
a6762c4a S |
78 | formats = [{ |
79 | 'url': compat_urlparse.urljoin(url, talk_url), | |
80 | 'format_id': format_id, | |
81 | 'ext': determine_ext(talk_url) or format_id, | |
82 | 'vcodec': 'none', | |
370b39e8 | 83 | } for format_id, talk_url in re.findall(r"data-([^=]+)='([^']+)'", player)] |
f03a8a3c | 84 | self._sort_formats(formats) |
c6ddbdb6 | 85 | |
a6762c4a S |
86 | thumbnail = self._og_search_thumbnail(webpage) |
87 | view_count = int_or_none(self._search_regex( | |
88 | r"class='play-count[^']*'>\s*(\d+) plays", | |
89 | webpage, 'play count', fatal=False)) | |
90 | ||
c6ddbdb6 | 91 | return { |
a6762c4a S |
92 | 'id': talk_id, |
93 | 'display_id': display_id, | |
94 | 'title': title, | |
95 | 'description': description, | |
c6ddbdb6 | 96 | 'thumbnail': thumbnail, |
a6762c4a S |
97 | 'duration': duration, |
98 | 'view_count': view_count, | |
99 | 'formats': formats, | |
c6ddbdb6 | 100 | } |