]> jfr.im git - yt-dlp.git/blame - yt_dlp/extractor/voicerepublic.py
[ie/box] Fix formats extraction (#8649)
[yt-dlp.git] / yt_dlp / extractor / voicerepublic.py
CommitLineData
c6ddbdb6 1from .common import InfoExtractor
be96f992 2from ..compat import compat_str
a6762c4a
S
3from ..utils import (
4 ExtractorError,
5 determine_ext,
6 int_or_none,
be96f992 7 urljoin,
a6762c4a 8)
c6ddbdb6
D
9
10
11class VoiceRepublicIE(InfoExtractor):
a6762c4a
S
12 _VALID_URL = r'https?://voicerepublic\.com/(?:talks|embed)/(?P<id>[0-9a-z-]+)'
13 _TESTS = [{
14 'url': 'http://voicerepublic.com/talks/watching-the-watchers-building-a-sousveillance-state',
c16f8a46 15 'md5': 'b9174d651323f17783000876347116e3',
c6ddbdb6
D
16 'info_dict': {
17 'id': '2296',
a6762c4a 18 'display_id': 'watching-the-watchers-building-a-sousveillance-state',
c6ddbdb6
D
19 'ext': 'm4a',
20 'title': 'Watching the Watchers: Building a Sousveillance State',
c16f8a46 21 'description': 'Secret surveillance programs have metadata too. The people and companies that operate secret surveillance programs can be surveilled.',
be96f992 22 'duration': 1556,
a6762c4a 23 'view_count': int,
c6ddbdb6 24 }
a6762c4a
S
25 }, {
26 'url': 'http://voicerepublic.com/embed/watching-the-watchers-building-a-sousveillance-state',
27 'only_matching': True,
28 }]
c6ddbdb6
D
29
30 def _real_extract(self, url):
31 display_id = self._match_id(url)
a6762c4a 32
be96f992 33 webpage = self._download_webpage(url, display_id)
c6ddbdb6 34
a6762c4a
S
35 if '>Queued for processing, please stand by...<' in webpage:
36 raise ExtractorError(
37 'Audio is still queued for processing', expected=True)
f03a8a3c 38
be96f992
RA
39 talk = self._parse_json(self._search_regex(
40 r'initialSnapshot\s*=\s*({.+?});',
41 webpage, 'talk'), display_id)['talk']
42 title = talk['title']
43 formats = [{
44 'url': urljoin(url, talk_url),
45 'format_id': format_id,
46 'ext': determine_ext(talk_url) or format_id,
47 'vcodec': 'none',
48 } for format_id, talk_url in talk['media_links'].items()]
c6ddbdb6
D
49
50 return {
be96f992 51 'id': compat_str(talk.get('id') or display_id),
a6762c4a
S
52 'display_id': display_id,
53 'title': title,
be96f992
RA
54 'description': talk.get('teaser'),
55 'thumbnail': talk.get('image_url'),
56 'duration': int_or_none(talk.get('archived_duration')),
57 'view_count': int_or_none(talk.get('play_count')),
a6762c4a 58 'formats': formats,
c6ddbdb6 59 }