]>
jfr.im git - yt-dlp.git/blob - yt_dlp/extractor/rokfin.py
2 from datetime
import datetime
4 from .common
import InfoExtractor
18 _API_BASE_URL
= 'https://prod-api-v2.production.rokfin.com/api/v2/public/'
21 class RokfinIE(InfoExtractor
):
22 _VALID_URL
= r
'https?://(?:www\.)?rokfin\.com/(?P<id>(?P<type>post|stream)/\d+)'
24 'url': 'https://www.rokfin.com/post/57548/Mitt-Romneys-Crazy-Solution-To-Climate-Change',
28 'title': 'Mitt Romney\'s Crazy Solution To Climate Change',
29 'thumbnail': r
're:https://img\.production\.rokfin\.com/.+',
30 'upload_date': '20211023',
31 'timestamp': 1634998029,
32 'channel': 'Jimmy Dore',
34 'channel_url': 'https://rokfin.com/TheJimmyDoreShow',
36 'availability': 'public',
37 'live_status': 'not_live',
42 'url': 'https://rokfin.com/post/223/Julian-Assange-Arrested-Streaming-In-Real-Time',
46 'title': 'Julian Assange Arrested: Streaming In Real Time',
47 'thumbnail': r
're:https://img\.production\.rokfin\.com/.+',
48 'upload_date': '20190412',
49 'timestamp': 1555052644,
50 'channel': 'Ron Placone',
52 'channel_url': 'https://rokfin.com/RonPlacone',
53 'availability': 'public',
54 'live_status': 'not_live',
57 'tags': ['FreeThinkingMedia^', 'RealProgressives^'],
60 'url': 'https://www.rokfin.com/stream/10543/Its-A-Crazy-Mess-Regional-Director-Blows-Whistle-On-Pfizers-Vaccine-Trial-Data',
64 'title': '"It\'s A Crazy Mess" Regional Director Blows Whistle On Pfizer\'s Vaccine Trial Data',
65 'thumbnail': r
're:https://img\.production\.rokfin\.com/.+',
66 'description': 'md5:324ce2d3e3b62e659506409e458b9d8e',
67 'channel': 'Ryan Cristián',
69 'channel_url': 'https://rokfin.com/TLAVagabond',
70 'availability': 'public',
73 'live_status': 'was_live',
74 'timestamp': 1635874720,
75 'release_timestamp': 1635874720,
76 'release_date': '20211102',
77 'upload_date': '20211102',
80 'tags': ['FreeThinkingMedia^'],
84 def _real_extract(self
, url
):
85 video_id
, video_type
= self
._match
_valid
_url
(url
).group('id', 'type')
87 metadata
= self
._download
_json
(f
'{_API_BASE_URL}{video_id}', video_id
)
89 scheduled
= unified_timestamp(metadata
.get('scheduledAt'))
90 live_status
= ('was_live' if metadata
.get('stoppedAt')
91 else 'is_upcoming' if scheduled
92 else 'is_live' if video_type
== 'stream'
95 video_url
= traverse_obj(metadata
, 'url', ('content', 'contentUrl'), expected_type
=url_or_none
)
96 formats
, subtitles
= [{'url': video_url}
] if video_url
else [], {}
97 if determine_ext(video_url
) == 'm3u8':
98 formats
, subtitles
= self
._extract
_m
3u8_formats
_and
_subtitles
(
99 video_url
, video_id
, fatal
=False, live
=live_status
== 'is_live')
102 if traverse_obj(metadata
, 'premiumPlan', 'premium'):
103 self
.raise_login_required('This video is only available to premium users', True, method
='cookies')
105 self
.raise_no_formats(
106 f
'Stream is offline; sheduled for {datetime.fromtimestamp(scheduled).strftime("%Y-%m-%d %H:%M:%S")}',
107 video_id
=video_id
, expected
=True)
108 self
._sort
_formats
(formats
)
110 uploader
= traverse_obj(metadata
, ('createdBy', 'username'), ('creator', 'username'))
111 timestamp
= (scheduled
or float_or_none(metadata
.get('postedAtMilli'), 1000)
112 or unified_timestamp(metadata
.get('creationDateTime')))
116 'subtitles': subtitles
,
117 'title': str_or_none(traverse_obj(metadata
, 'title', ('content', 'contentTitle'))),
118 'duration': float_or_none(traverse_obj(metadata
, ('content', 'duration'))),
119 'thumbnail': url_or_none(traverse_obj(metadata
, 'thumbnail', ('content', 'thumbnailUrl1'))),
120 'description': str_or_none(traverse_obj(metadata
, 'description', ('content', 'contentDescription'))),
121 'like_count': int_or_none(metadata
.get('likeCount')),
122 'dislike_count': int_or_none(metadata
.get('dislikeCount')),
123 'channel': str_or_none(traverse_obj(metadata
, ('createdBy', 'name'), ('creator', 'name'))),
124 'channel_id': traverse_obj(metadata
, ('createdBy', 'id'), ('creator', 'id')),
125 'channel_url': url_or_none(f
'https://rokfin.com/{uploader}') if uploader
else None,
126 'timestamp': timestamp
,
127 'release_timestamp': timestamp
if live_status
!= 'not_live' else None,
128 'tags': traverse_obj(metadata
, ('tags', ..., 'title'), expected_type
=str_or_none
),
129 'live_status': live_status
,
130 'availability': self
._availability
(
131 needs_premium
=bool(traverse_obj(metadata
, 'premiumPlan', 'premium')),
132 is_private
=False, needs_subscription
=False, needs_auth
=False, is_unlisted
=False),
133 # 'comment_count': metadata.get('numComments'), # Data provided by website is wrong
134 '__post_extractor': self
.extract_comments(video_id
) if video_type
== 'post' else None,
137 def _get_comments(self
, video_id
):
139 for page_n
in itertools
.count():
140 raw_comments
= self
._download
_json
(
141 f
'{_API_BASE_URL}comment?postId={video_id[5:]}&page={page_n}&size=50',
142 video_id
, note
=f
'Downloading viewer comments page {page_n + 1}{format_field(pages_total, template=" of %s")}',
145 for comment
in raw_comments
.get('content') or []:
147 'text': str_or_none(comment
.get('comment')),
148 'author': str_or_none(comment
.get('name')),
149 'id': comment
.get('commentId'),
150 'author_id': comment
.get('userId'),
152 'like_count': int_or_none(comment
.get('numLikes')),
153 'dislike_count': int_or_none(comment
.get('numDislikes')),
154 'timestamp': unified_timestamp(comment
.get('postedAt'))
157 pages_total
= int_or_none(raw_comments
.get('totalPages')) or None
158 is_last
= raw_comments
.get('last')
159 if not raw_comments
.get('content') or is_last
or (page_n
> pages_total
if pages_total
else is_last
is not False):
163 class RokfinPlaylistBaseIE(InfoExtractor
):
168 'dead_stream': 'stream',
172 def _get_video_data(self
, metadata
):
173 for content
in metadata
.get('content') or []:
174 media_type
= self
._TYPES
.get(content
.get('mediaType'))
175 video_id
= content
.get('id') if media_type
== 'post' else content
.get('mediaId')
176 if not media_type
or not video_id
:
179 yield self
.url_result(f
'https://rokfin.com/{media_type}/{video_id}', video_id
=f
'{media_type}/{video_id}',
180 video_title
=str_or_none(traverse_obj(content
, ('content', 'contentTitle'))))
183 class RokfinStackIE(RokfinPlaylistBaseIE
):
184 IE_NAME
= 'rokfin:stack'
185 _VALID_URL
= r
'https?://(?:www\.)?rokfin\.com/stack/(?P<id>[^/]+)'
187 'url': 'https://www.rokfin.com/stack/271/Tulsi-Gabbard-Portsmouth-Townhall-FULL--Feb-9-2020',
194 def _real_extract(self
, url
):
195 list_id
= self
._match
_id
(url
)
196 return self
.playlist_result(self
._get
_video
_data
(
197 self
._download
_json
(f
'{_API_BASE_URL}stack/{list_id}', list_id
)), list_id
)
200 class RokfinChannelIE(RokfinPlaylistBaseIE
):
201 IE_NAME
= 'rokfin:channel'
202 _VALID_URL
= r
'https?://(?:www\.)?rokfin\.com/(?!((feed/?)|(discover/?)|(channels/?))$)(?P<id>[^/]+)/?$'
204 'url': 'https://rokfin.com/TheConvoCouch',
205 'playlist_mincount': 100,
208 'title': 'TheConvoCouch - New',
209 'description': 'md5:bb622b1bca100209b91cd685f7847f06',
222 def _real_initialize(self
):
223 self
._validate
_extractor
_args
()
225 def _validate_extractor_args(self
):
226 requested_tabs
= self
._configuration
_arg
('tab', None)
227 if requested_tabs
is not None and (len(requested_tabs
) > 1 or requested_tabs
[0] not in self
._TABS
):
228 raise ExtractorError(f
'Invalid extractor-arg "tab". Must be one of {", ".join(self._TABS)}', expected
=True)
230 def _entries(self
, channel_id
, channel_name
, tab
):
232 for page_n
in itertools
.count(0):
233 if tab
in ('posts', 'top'):
234 data_url
= f
'{_API_BASE_URL}user/{channel_name}/{tab}?page={page_n}&size=50'
236 data_url
= f
'{_API_BASE_URL}post/search/{tab}?page={page_n}&size=50&creator={channel_id}'
237 metadata
= self
._download
_json
(
238 data_url
, channel_name
,
239 note
=f
'Downloading video metadata page {page_n + 1}{format_field(pages_total, template=" of %s")}')
241 yield from self
._get
_video
_data
(metadata
)
242 pages_total
= int_or_none(metadata
.get('totalPages')) or None
243 is_last
= metadata
.get('last')
244 if is_last
or (page_n
> pages_total
if pages_total
else is_last
is not False):
247 def _real_extract(self
, url
):
248 channel_name
= self
._match
_id
(url
)
249 channel_info
= self
._download
_json
(f
'{_API_BASE_URL}user/{channel_name}', channel_name
)
250 channel_id
= channel_info
['id']
251 tab
= self
._configuration
_arg
('tab', default
=['new'])[0]
253 return self
.playlist_result(
254 self
._entries
(channel_id
, channel_name
, self
._TABS
[tab
]),
255 f
'{channel_id}-{tab}', f
'{channel_name} - {tab.title()}', str_or_none(channel_info
.get('description')))