]>
jfr.im git - yt-dlp.git/blob - yt_dlp/extractor/weibo.py
1 from .common
import InfoExtractor
18 class WeiboIE(InfoExtractor
):
19 _VALID_URL
= r
'https?://(?:www\.)?weibo\.com/[0-9]+/(?P<id>[a-zA-Z0-9]+)'
21 'url': 'https://weibo.com/6275294458/Fp6RGfbff?type=comment',
25 'title': 'You should have servants to massage you,... 来自Hosico_猫 - 微博',
29 def _real_extract(self
, url
):
30 video_id
= self
._match
_id
(url
)
31 # to get Referer url for genvisitor
32 webpage
, urlh
= self
._download
_webpage
_handle
(url
, video_id
)
34 visitor_url
= urlh
.geturl()
36 if 'passport.weibo.com' in visitor_url
:
38 visitor_data
= self
._download
_json
(
39 'https://passport.weibo.com/visitor/genvisitor', video_id
,
40 note
='Generating first-visit data',
41 transform_source
=strip_jsonp
,
42 headers
={'Referer': visitor_url}
,
43 data
=urlencode_postdata({
47 'browser': 'Gecko57,0,0,0',
49 'screenInfo': '1440*900*24',
54 tid
= visitor_data
['data']['tid']
55 cnfd
= '%03d' % visitor_data
['data']['confidence']
57 self
._download
_webpage
(
58 'https://passport.weibo.com/visitor/visitor', video_id
,
59 note
='Running first-visit callback',
67 '_rand': random
.random(),
70 webpage
= self
._download
_webpage
(
71 url
, video_id
, note
='Revisiting webpage')
73 title
= self
._html
_extract
_title
(webpage
)
75 video_formats
= compat_parse_qs(self
._search
_regex
(
76 r
'video-sources=\\\"(.+?)\"', webpage
, 'video_sources'))
79 supported_resolutions
= (480, 720)
80 for res
in supported_resolutions
:
81 vid_urls
= video_formats
.get(compat_str(res
))
82 if not vid_urls
or not isinstance(vid_urls
, list):
91 self
._sort
_formats
(formats
)
93 uploader
= self
._og
_search
_property
(
94 'nick-name', webpage
, 'uploader', default
=None)
104 class WeiboMobileIE(InfoExtractor
):
105 _VALID_URL
= r
'https?://m\.weibo\.cn/status/(?P<id>[0-9]+)(\?.+)?'
107 'url': 'https://m.weibo.cn/status/4189191225395228?wm=3333_2001&sourcetype=weixin&featurecode=newtitle&from=singlemessage&isappinstalled=0',
109 'id': '4189191225395228',
111 'title': '午睡当然是要甜甜蜜蜜的啦',
116 def _real_extract(self
, url
):
117 video_id
= self
._match
_id
(url
)
118 # to get Referer url for genvisitor
119 webpage
= self
._download
_webpage
(url
, video_id
, note
='visit the page')
121 weibo_info
= self
._parse
_json
(self
._search
_regex
(
122 r
'var\s+\$render_data\s*=\s*\[({.*})\]\[0\]\s*\|\|\s*{};',
123 webpage
, 'js_code', flags
=re
.DOTALL
),
124 video_id
, transform_source
=js_to_json
)
126 status_data
= weibo_info
.get('status', {})
127 page_info
= status_data
.get('page_info')
128 title
= status_data
['status_title']
129 uploader
= status_data
.get('user', {}).get('screen_name')
134 'uploader': uploader
,
135 'url': page_info
['media_info']['stream_url']