]>
jfr.im git - yt-dlp.git/blob - yt_dlp/extractor/threespeak.py
3 from .common
import InfoExtractor
10 class ThreeSpeakIE(InfoExtractor
):
11 _VALID_URL
= r
'https?://(?:www\.)?3speak\.tv/watch\?v\=[^/]+/(?P<id>[^/$&#?]+)'
14 'url': 'https://3speak.tv/watch?v=dannyshine/wjgoxyfy',
18 'title': 'Can People who took the Vax think Critically',
19 'uploader': 'dannyshine',
20 'description': 'md5:181aa7ccb304afafa089b5af3bca7a10',
21 'tags': ['sex', 'covid', 'antinatalism', 'comedy', 'vaccines'],
22 'thumbnail': 'https://img.3speakcontent.co/wjgoxyfy/thumbnails/default.png',
23 'upload_date': '20211021',
24 'duration': 2703.867833,
25 'filesize': 1620054781,
27 'params': {'skip_download': True}
30 def _real_extract(self
, url
):
31 id = self
._match
_id
(url
)
32 webpage
= self
._download
_webpage
(url
, id)
33 json_str
= self
._html
_search
_regex
(r
'JSON\.parse\(\'([^
\']+)\'\
)', webpage, 'json
')
34 # The json string itself is escaped. Hence the double parsing
35 data_json = self._parse_json(self._parse_json(f'"{json_str}"', id), id)
36 video_json = self._parse_json(data_json['json_metadata
'], id)
37 formats, subtitles = [], {}
38 og_m3u8 = self._html_search_regex(r'<meta\s?
property=\"ogvideo
\"\s?content
=\"([^
\"]+)\">', webpage, 'og m3u8
', fatal=False)
40 https_frmts, https_subs = self._extract_m3u8_formats_and_subtitles(og_m3u8, id, fatal=False, m3u8_id='https
')
41 formats.extend(https_frmts)
42 subtitles = self._merge_subtitles(subtitles, https_subs)
43 ipfs_m3u8 = try_get(video_json, lambda x: x['video
']['info
']['ipfs
'])
45 ipfs_frmts, ipfs_subs = self._extract_m3u8_formats_and_subtitles(f'https
://ipfs
.3speak
.tv
/ipfs
/{ipfs_m3u8}
',
46 id, fatal=False, m3u8_id='ipfs
')
47 formats.extend(ipfs_frmts)
48 subtitles = self._merge_subtitles(subtitles, ipfs_subs)
49 mp4_file = try_get(video_json, lambda x: x['video
']['info
']['file'])
52 'url
': f'https
://threespeakvideo
.b
-cdn
.net
/{id}
/{mp4_file}
',
54 'format_id
': 'https
-mp4
',
55 'duration
': try_get(video_json, lambda x: x['video
']['info
']['duration
']),
56 'filesize
': try_get(video_json, lambda x: x['video
']['info
']['filesize
']),
58 'format_note
': 'Original
file',
62 'title
': data_json.get('title
') or data_json.get('root_title
'),
63 'uploader
': data_json.get('author
'),
64 'description
': try_get(video_json, lambda x: x['video
']['content
']['description
']),
65 'tags
': try_get(video_json, lambda x: x['video
']['content
']['tags
']),
66 'thumbnail
': try_get(video_json, lambda x: x['image
'][0]),
67 'upload_date
': unified_strdate(data_json.get('created
')),
69 'subtitles
': subtitles,
73 class ThreeSpeakUserIE(InfoExtractor):
74 _VALID_URL = r'https?
://(?
:www\
.)?
3speak\
.tv
/user
/(?P
<id>[^
/$
&?
#]+)'
77 'url': 'https://3speak.tv/user/theycallmedan',
79 'id': 'theycallmedan',
81 'playlist_mincount': 115,
84 def _real_extract(self
, url
):
85 id = self
._match
_id
(url
)
86 webpage
= self
._download
_webpage
(url
, id)
89 'https://3speak.tv/watch?v=%s' % video
,
90 ie
=ThreeSpeakIE
.ie_key())
91 for video
in re
.findall(r
'data-payout\s?\=\s?\"([^\"]+)\"', webpage
) if video
93 return self
.playlist_result(entries
, id)