10 compat_urllib_parse_unquote
,
14 class DouyinIE(InfoExtractor
):
15 _VALID_URL
= r
'https?://(?:www\.)?douyin\.com/video/(?P<id>[0-9]+)'
17 'url': 'https://www.douyin.com/video/6961737553342991651',
18 'md5': '10523312c8b8100f353620ac9dc8f067',
20 'id': '6961737553342991651',
22 'title': '#杨超越 小小水手带你去远航❤️',
24 'upload_date': '20210513',
25 'timestamp': 1620905839,
26 'uploader_id': '110403406559',
33 'url': 'https://www.douyin.com/video/6982497745948921092',
34 'md5': 'd78408c984b9b5102904cf6b6bc2d712',
36 'id': '6982497745948921092',
38 'title': '这个夏日和小羊@杨超越 一起遇见白色幻想',
40 'upload_date': '20210708',
41 'timestamp': 1625739481,
42 'uploader_id': '408654318141572',
49 'url': 'https://www.douyin.com/video/6953975910773099811',
50 'md5': '72e882e24f75064c218b76c8b713c185',
52 'id': '6953975910773099811',
54 'title': '#一起看海 出现在你的夏日里',
56 'upload_date': '20210422',
57 'timestamp': 1619098692,
58 'uploader_id': '110403406559',
65 'url': 'https://www.douyin.com/video/6950251282489675042',
66 'md5': 'b4db86aec367ef810ddd38b1737d2fed',
68 'id': '6950251282489675042',
70 'title': '哈哈哈,成功了哈哈哈哈哈哈',
72 'upload_date': '20210412',
73 'timestamp': 1618231483,
74 'uploader_id': '110403406559',
81 'url': 'https://www.douyin.com/video/6963263655114722595',
82 'md5': '1abe1c477d05ee62efb40bf2329957cf',
84 'id': '6963263655114722595',
86 'title': '#哪个爱豆的105度最甜 换个角度看看我哈哈',
88 'upload_date': '20210517',
89 'timestamp': 1621261163,
90 'uploader_id': '110403406559',
98 def _real_extract(self
, url
):
99 video_id
= self
._match
_id
(url
)
100 webpage
= self
._download
_webpage
(url
, video_id
)
101 render_data
= self
._parse
_json
(
103 r
'<script [^>]*\bid=[\'"]RENDER_DATA[\'"][^
>]*>(%7B
.+%7D
)</script
>',
104 webpage, 'render data
'),
105 video_id, transform_source=compat_urllib_parse_unquote)
106 details = traverse_obj(render_data, (..., 'aweme
', 'detail
'), get_all=False)
108 thumbnails = [{'url': self._proto_relative_url(url)} for url in traverse_obj(
109 details, ('video
', ('cover
', 'dynamicCover
', 'originCover
')), expected_type=url_or_none, default=[])]
112 'width
': traverse_obj(details, ('video
', 'width
'), expected_type=int),
113 'height
': traverse_obj(details, ('video
', 'height
'), expected_type=int),
116 formats = [{**common, 'url': self._proto_relative_url(url)} for url in traverse_obj(
117 details, ('video
', 'playAddr
', ..., 'src
'), expected_type=url_or_none, default=[]) if url]
118 self._remove_duplicate_formats(formats)
120 download_url = traverse_obj(details, ('download
', 'url
'), expected_type=url_or_none)
124 'format_id
': 'download
',
125 'url
': self._proto_relative_url(download_url),
128 self._sort_formats(formats)
132 'title
': details.get('desc
') or self._html_search_meta('title
', webpage),
134 'thumbnails
': thumbnails,
135 'uploader
': traverse_obj(details, ('authorInfo
', 'nickname
'), expected_type=str),
136 'uploader_id
': traverse_obj(details, ('authorInfo
', 'uid
'), expected_type=str),
137 'uploader_url
': 'https
://www
.douyin
.com
/user
/%s' % traverse_obj(
138 details, ('authorInfo
', 'secUid
'), expected_type=str),
139 'timestamp
': int_or_none(details.get('createTime
')),
140 'duration
': traverse_obj(details, ('video
', 'duration
'), expected_type=int),
141 'view_count
': traverse_obj(details, ('stats
', 'playCount
'), expected_type=int),
142 'like_count
': traverse_obj(details, ('stats
', 'diggCount
'), expected_type=int),
143 'repost_count
': traverse_obj(details, ('stats
', 'shareCount
'), expected_type=int),
144 'comment_count
': traverse_obj(details, ('stats
', 'commentCount
'), expected_type=int),