6 from .common
import InfoExtractor
21 class WeiboBaseIE(InfoExtractor
):
22 def _update_visitor_cookies(self
, visitor_url
, video_id
):
23 headers
= {'Referer': visitor_url}
24 chrome_ver
= self
._search
_regex
(
25 r
'Chrome/(\d+)', self
.get_param('http_headers')['User-Agent'], 'user agent version', default
='90')
26 visitor_data
= self
._download
_json
(
27 'https://passport.weibo.com/visitor/genvisitor', video_id
,
28 note
='Generating first-visit guest request',
29 headers
=headers
, transform_source
=strip_jsonp
,
30 data
=urlencode_postdata({
34 'browser': f
'Chrome{chrome_ver},0,0,0',
36 'screenInfo': '1920*1080*24',
38 }, separators
=(',', ':'))}))['data']
40 self
._download
_webpage
(
41 'https://passport.weibo.com/visitor/visitor', video_id
,
42 note
='Running first-visit callback to get guest cookies',
43 headers
=headers
, query
={
45 't': visitor_data
['tid'],
46 'w': 3 if visitor_data
.get('new_tid') else 2,
47 'c': f
'{visitor_data.get("confidence", 100):03d}',
51 '_rand': random
.random(),
54 def _weibo_download_json(self
, url
, video_id
, *args
, fatal
=True, note
='Downloading JSON metadata', **kwargs
):
55 webpage
, urlh
= self
._download
_webpage
_handle
(url
, video_id
, *args
, fatal
=fatal
, note
=note
, **kwargs
)
56 if urllib
.parse
.urlparse(urlh
.url
).netloc
== 'passport.weibo.com':
57 self
._update
_visitor
_cookies
(urlh
.url
, video_id
)
58 webpage
= self
._download
_webpage
(url
, video_id
, *args
, fatal
=fatal
, note
=note
, **kwargs
)
59 return self
._parse
_json
(webpage
, video_id
, fatal
=fatal
)
61 def _extract_formats(self
, video_info
):
62 media_info
= traverse_obj(video_info
, ('page_info', 'media_info'))
63 formats
= traverse_obj(media_info
, (
64 'playback_list', lambda _
, v
: url_or_none(v
['play_info']['url']), 'play_info', {
66 'format': ('quality_desc', {str}
),
67 'format_id': ('label', {str}
),
68 'ext': ('mime', {mimetype2ext}
),
69 'tbr': ('bitrate', {int_or_none}
, {lambda x: x or None}
),
70 'vcodec': ('video_codecs', {str}
),
71 'fps': ('fps', {int_or_none}
),
72 'width': ('width', {int_or_none}
),
73 'height': ('height', {int_or_none}
),
74 'filesize': ('size', {int_or_none}
),
75 'acodec': ('audio_codecs', {str}
),
76 'asr': ('audio_sample_rate', {int_or_none}
),
77 'audio_channels': ('audio_channels', {int_or_none}
),
79 if not formats
: # fallback, should be barely used
80 for url
in set(traverse_obj(media_info
, (..., {url_or_none}
))):
81 if 'label=' in url
: # filter out non-video urls
82 format_id
, resolution
= self
._search
_regex
(
83 r
'label=(\w+)&template=(\d+x\d+)', url
, 'format info',
84 group
=(1, 2), default
=(None, None))
87 'format_id': format_id
,
88 **parse_resolution(resolution
),
89 **traverse_obj(media_info
, (
90 'video_details', lambda _
, v
: v
['label'].startswith(format_id
), {
91 'size': ('size', {int_or_none}
),
92 'tbr': ('bitrate', {int_or_none}
),
98 def _parse_video_info(self
, video_info
, video_id
=None):
101 'extractor_key': WeiboIE
.ie_key(),
102 'extractor': WeiboIE
.IE_NAME
,
103 'formats': self
._extract
_formats
(video_info
),
104 'http_headers': {'Referer': 'https://weibo.com/'}
,
105 '_old_archive_ids': [make_archive_id('WeiboMobile', video_id
)],
106 **traverse_obj(video_info
, {
107 'id': (('id', 'id_str', 'mid'), {str_or_none}
),
108 'display_id': ('mblogid', {str_or_none}
),
109 'title': ('page_info', 'media_info', ('video_title', 'kol_title', 'name'), {str}
, {lambda x: x or None}
),
110 'description': ('text_raw', {str}
),
111 'duration': ('page_info', 'media_info', 'duration', {int_or_none}
),
112 'timestamp': ('page_info', 'media_info', 'video_publish_time', {int_or_none}
),
113 'thumbnail': ('page_info', 'page_pic', {url_or_none}
),
114 'uploader': ('user', 'screen_name', {str}
),
115 'uploader_id': ('user', ('id', 'id_str'), {str_or_none}
),
116 'uploader_url': ('user', 'profile_url', {lambda x: urljoin('https://weibo.com/', x)}
),
117 'view_count': ('page_info', 'media_info', 'online_users_number', {int_or_none}
),
118 'like_count': ('attitudes_count', {int_or_none}
),
119 'repost_count': ('reposts_count', {int_or_none}
),
121 'tags': traverse_obj(video_info
, ('topic_struct', ..., 'topic_title', {str}
)) or None,
125 class WeiboIE(WeiboBaseIE
):
126 _VALID_URL
= r
'https?://(?:m\.weibo\.cn/status|(?:www\.)?weibo\.com/\d+)/(?P<id>[a-zA-Z0-9]+)'
128 'url': 'https://weibo.com/7827771738/N4xlMvjhI',
130 'id': '4910815147462302',
132 'display_id': 'N4xlMvjhI',
133 'title': '【睡前消息暑假版第一期:拉泰国一把 对中国有好处】',
134 'description': 'md5:e2637a7673980d68694ea7c43cf12a5f',
136 'timestamp': 1686312819,
137 'upload_date': '20230609',
138 'thumbnail': r
're:https://.*\.jpg',
139 'uploader': '睡前视频基地',
140 'uploader_id': '7827771738',
141 'uploader_url': 'https://weibo.com/u/7827771738',
145 'tags': ['泰国大选远进党获胜', '睡前消息', '暑期版'],
148 'url': 'https://m.weibo.cn/status/4189191225395228',
150 'id': '4189191225395228',
152 'display_id': 'FBqgOmDxO',
153 'title': '柴犬柴犬的秒拍视频',
154 'description': 'md5:80f461ab5cdae6bbdb70efbf5a1db24f',
156 'timestamp': 1514264429,
157 'upload_date': '20171226',
158 'thumbnail': r
're:https://.*\.jpg',
160 'uploader_id': '5926682210',
161 'uploader_url': 'https://weibo.com/u/5926682210',
167 'url': 'https://weibo.com/0/4224132150961381',
168 'note': 'no playback_list example',
169 'only_matching': True,
172 def _real_extract(self
, url
):
173 video_id
= self
._match
_id
(url
)
175 return self
._parse
_video
_info
(self
._weibo
_download
_json
(
176 f
'https://weibo.com/ajax/statuses/show?id={video_id}', video_id
))
179 class WeiboVideoIE(WeiboBaseIE
):
180 _VALID_URL
= r
'https?://(?:www\.)?weibo\.com/tv/show/(?P<id>\d+:\d+)'
182 'url': 'https://weibo.com/tv/show/1034:4797699866951785?from=old_pc_videoshow',
184 'id': '4797700463137878',
186 'display_id': 'LEZDodaiW',
187 'title': '呃,稍微了解了一下靡烟miya,感觉这东西也太二了',
188 'description': '呃,稍微了解了一下靡烟miya,感觉这东西也太二了 http://t.cn/A6aerGsM ',
190 'timestamp': 1659344278,
191 'upload_date': '20220801',
192 'thumbnail': r
're:https://.*\.jpg',
193 'uploader': '君子爱财陈平安',
194 'uploader_id': '3905382233',
195 'uploader_url': 'https://weibo.com/u/3905382233',
202 def _real_extract(self
, url
):
203 video_id
= self
._match
_id
(url
)
205 post_data
= f
'data={{"Component_Play_Playinfo":{{"oid":"{video_id}"}}}}'.encode()
206 video_info
= self
._weibo
_download
_json
(
207 f
'https://weibo.com/tv/api/component?page=%2Ftv%2Fshow%2F{video_id.replace(":", "%3A")}',
208 video_id
, headers
={'Referer': url}
, data
=post_data
)['data']['Component_Play_Playinfo']
209 return self
.url_result(f
'https://weibo.com/0/{video_info["mid"]}', WeiboIE
)
212 class WeiboUserIE(WeiboBaseIE
):
213 _VALID_URL
= r
'https?://(?:www\.)?weibo\.com/u/(?P<id>\d+)'
215 'url': 'https://weibo.com/u/2066652961?tabtype=video',
219 'description': '萧影殿下的全部视频',
222 'playlist_mincount': 195,
225 def _fetch_page(self
, uid
, cursor
=0, page
=1):
226 return self
._weibo
_download
_json
(
227 'https://weibo.com/ajax/profile/getWaterFallContent',
228 uid
, note
=f
'Downloading videos page {page}',
229 query
={'uid': uid, 'cursor': cursor}
)['data']
231 def _entries(self
, uid
, first_page
):
233 for page
in itertools
.count(1):
234 response
= first_page
if page
== 1 else self
._fetch
_page
(uid
, cursor
, page
)
235 for video_info
in traverse_obj(response
, ('list', ..., {dict}
)):
236 yield self
._parse
_video
_info
(video_info
)
237 cursor
= response
.get('next_cursor')
238 if (int_or_none(cursor
) or -1) < 0:
241 def _real_extract(self
, url
):
242 uid
= self
._match
_id
(url
)
243 first_page
= self
._fetch
_page
(uid
)
244 uploader
= traverse_obj(first_page
, ('list', ..., 'user', 'screen_name', {str}
), get_all
=False)
246 'title': f
'{uploader}的视频',
247 'description': f
'{uploader}的全部视频',
248 'uploader': uploader
,
249 } if uploader
else {}
251 return self
.playlist_result(self
._entries
(uid
, first_page
), uid
, **metainfo
)