]>
jfr.im git - yt-dlp.git/blob - yt_dlp/extractor/threespeak.py
2 from __future__
import unicode_literals
6 from .common
import InfoExtractor
13 class ThreeSpeakIE(InfoExtractor
):
14 _VALID_URL
= r
'(?:https?://)(?:www\.)?3speak\.tv/watch\?v\=[^/]+/(?P<id>[^/$&#?]+)'
17 'url': 'https://3speak.tv/watch?v=dannyshine/wjgoxyfy',
21 'title': 'Can People who took the Vax think Critically',
22 'uploader': 'dannyshine',
23 'description': 'md5:181aa7ccb304afafa089b5af3bca7a10',
24 'tags': ['sex', 'covid', 'antinatalism', 'comedy', 'vaccines'],
25 'thumbnail': 'https://img.3speakcontent.co/wjgoxyfy/thumbnails/default.png',
26 'upload_date': '20211021',
27 'duration': 2703.867833,
28 'filesize': 1620054781,
30 'params': {'skip_download': True}
33 def _real_extract(self
, url
):
34 id = self
._match
_id
(url
)
35 webpage
= self
._download
_webpage
(url
, id)
36 json_str
= self
._html
_search
_regex
(r
'JSON\.parse\(\'([^
\']+)\'\
)', webpage, 'json
')
37 # The json string itself is escaped. Hence the double parsing
38 data_json = self._parse_json(self._parse_json(f'"{json_str}"', id), id)
39 video_json = self._parse_json(data_json['json_metadata
'], id)
40 formats, subtitles = [], {}
41 og_m3u8 = self._html_search_regex(r'<meta\s?
property=\"ogvideo
\"\s?content
=\"([^
\"]+)\">', webpage, 'og m3u8
', fatal=False)
43 https_frmts, https_subs = self._extract_m3u8_formats_and_subtitles(og_m3u8, id, fatal=False, m3u8_id='https
')
44 formats.extend(https_frmts)
45 subtitles = self._merge_subtitles(subtitles, https_subs)
46 ipfs_m3u8 = try_get(video_json, lambda x: x['video
']['info
']['ipfs
'])
48 ipfs_frmts, ipfs_subs = self._extract_m3u8_formats_and_subtitles(f'https
://ipfs
.3speak
.tv
/ipfs
/{ipfs_m3u8}
',
49 id, fatal=False, m3u8_id='ipfs
')
50 formats.extend(ipfs_frmts)
51 subtitles = self._merge_subtitles(subtitles, ipfs_subs)
52 mp4_file = try_get(video_json, lambda x: x['video
']['info
']['file'])
55 'url
': f'https
://threespeakvideo
.b
-cdn
.net
/{id}
/{mp4_file}
',
57 'format_id
': 'https
-mp4
',
58 'duration
': try_get(video_json, lambda x: x['video
']['info
']['duration
']),
59 'filesize
': try_get(video_json, lambda x: x['video
']['info
']['filesize
']),
61 'format_note
': 'Original
file',
63 self._sort_formats(formats)
66 'title
': data_json.get('title
') or data_json.get('root_title
'),
67 'uploader
': data_json.get('author
'),
68 'description
': try_get(video_json, lambda x: x['video
']['content
']['description
']),
69 'tags
': try_get(video_json, lambda x: x['video
']['content
']['tags
']),
70 'thumbnail
': try_get(video_json, lambda x: x['image
'][0]),
71 'upload_date
': unified_strdate(data_json.get('created
')),
73 'subtitles
': subtitles,
77 class ThreeSpeakUserIE(InfoExtractor):
78 _VALID_URL = r'(?
:https?
://)(?
:www\
.)?
3speak\
.tv
/user
/(?P
<id>[^
/$
&?
#]+)'
81 'url': 'https://3speak.tv/user/theycallmedan',
83 'id': 'theycallmedan',
85 'playlist_mincount': 115,
88 def _real_extract(self
, url
):
89 id = self
._match
_id
(url
)
90 webpage
= self
._download
_webpage
(url
, id)
93 'https://3speak.tv/watch?v=%s' % video
,
94 ie
=ThreeSpeakIE
.ie_key())
95 for video
in re
.findall(r
'data-payout\s?\=\s?\"([^\"]+)\"', webpage
) if video
97 return self
.playlist_result(entries
, id)