]> jfr.im git - yt-dlp.git/blob - yt_dlp/extractor/douyin.py
[adobepass] Add MSO Sling TV (#596)
[yt-dlp.git] / yt_dlp / extractor / douyin.py
1 # coding: utf-8
2
3 from ..utils import (
4 int_or_none,
5 traverse_obj,
6 url_or_none,
7 )
8 from .common import (
9 InfoExtractor,
10 compat_urllib_parse_unquote,
11 )
12
13
14 class DouyinIE(InfoExtractor):
15 _VALID_URL = r'https?://(?:www\.)?douyin\.com/video/(?P<id>[0-9]+)'
16 _TESTS = [{
17 'url': 'https://www.douyin.com/video/6961737553342991651',
18 'md5': '10523312c8b8100f353620ac9dc8f067',
19 'info_dict': {
20 'id': '6961737553342991651',
21 'ext': 'mp4',
22 'title': '#杨超越 小小水手带你去远航❤️',
23 'uploader': '杨超越',
24 'upload_date': '20210513',
25 'timestamp': 1620905839,
26 'uploader_id': '110403406559',
27 'view_count': int,
28 'like_count': int,
29 'repost_count': int,
30 'comment_count': int,
31 }
32 }, {
33 'url': 'https://www.douyin.com/video/6982497745948921092',
34 'md5': 'd78408c984b9b5102904cf6b6bc2d712',
35 'info_dict': {
36 'id': '6982497745948921092',
37 'ext': 'mp4',
38 'title': '这个夏日和小羊@杨超越 一起遇见白色幻想',
39 'uploader': '杨超越工作室',
40 'upload_date': '20210708',
41 'timestamp': 1625739481,
42 'uploader_id': '408654318141572',
43 'view_count': int,
44 'like_count': int,
45 'repost_count': int,
46 'comment_count': int,
47 }
48 }, {
49 'url': 'https://www.douyin.com/video/6953975910773099811',
50 'md5': '72e882e24f75064c218b76c8b713c185',
51 'info_dict': {
52 'id': '6953975910773099811',
53 'ext': 'mp4',
54 'title': '#一起看海 出现在你的夏日里',
55 'uploader': '杨超越',
56 'upload_date': '20210422',
57 'timestamp': 1619098692,
58 'uploader_id': '110403406559',
59 'view_count': int,
60 'like_count': int,
61 'repost_count': int,
62 'comment_count': int,
63 }
64 }, {
65 'url': 'https://www.douyin.com/video/6950251282489675042',
66 'md5': 'b4db86aec367ef810ddd38b1737d2fed',
67 'info_dict': {
68 'id': '6950251282489675042',
69 'ext': 'mp4',
70 'title': '哈哈哈,成功了哈哈哈哈哈哈',
71 'uploader': '杨超越',
72 'upload_date': '20210412',
73 'timestamp': 1618231483,
74 'uploader_id': '110403406559',
75 'view_count': int,
76 'like_count': int,
77 'repost_count': int,
78 'comment_count': int,
79 }
80 }, {
81 'url': 'https://www.douyin.com/video/6963263655114722595',
82 'md5': '1abe1c477d05ee62efb40bf2329957cf',
83 'info_dict': {
84 'id': '6963263655114722595',
85 'ext': 'mp4',
86 'title': '#哪个爱豆的105度最甜 换个角度看看我哈哈',
87 'uploader': '杨超越',
88 'upload_date': '20210517',
89 'timestamp': 1621261163,
90 'uploader_id': '110403406559',
91 'view_count': int,
92 'like_count': int,
93 'repost_count': int,
94 'comment_count': int,
95 }
96 }]
97
98 def _real_extract(self, url):
99 video_id = self._match_id(url)
100 webpage = self._download_webpage(url, video_id)
101 render_data = self._parse_json(
102 self._search_regex(
103 r'<script [^>]*\bid=[\'"]RENDER_DATA[\'"][^>]*>(%7B.+%7D)</script>',
104 webpage, 'render data'),
105 video_id, transform_source=compat_urllib_parse_unquote)
106 details = traverse_obj(render_data, (..., 'aweme', 'detail'), get_all=False)
107
108 thumbnails = [{'url': self._proto_relative_url(url)} for url in traverse_obj(
109 details, ('video', ('cover', 'dynamicCover', 'originCover')), expected_type=url_or_none, default=[])]
110
111 common = {
112 'width': traverse_obj(details, ('video', 'width'), expected_type=int),
113 'height': traverse_obj(details, ('video', 'height'), expected_type=int),
114 'ext': 'mp4',
115 }
116 formats = [{**common, 'url': self._proto_relative_url(url)} for url in traverse_obj(
117 details, ('video', 'playAddr', ..., 'src'), expected_type=url_or_none, default=[]) if url]
118 self._remove_duplicate_formats(formats)
119
120 download_url = traverse_obj(details, ('download', 'url'), expected_type=url_or_none)
121 if download_url:
122 formats.append({
123 **common,
124 'format_id': 'download',
125 'url': self._proto_relative_url(download_url),
126 'quality': 1,
127 })
128 self._sort_formats(formats)
129
130 return {
131 'id': video_id,
132 'title': details.get('desc') or self._html_search_meta('title', webpage),
133 'formats': formats,
134 'thumbnails': thumbnails,
135 'uploader': traverse_obj(details, ('authorInfo', 'nickname'), expected_type=str),
136 'uploader_id': traverse_obj(details, ('authorInfo', 'uid'), expected_type=str),
137 'uploader_url': 'https://www.douyin.com/user/%s' % traverse_obj(
138 details, ('authorInfo', 'secUid'), expected_type=str),
139 'timestamp': int_or_none(details.get('createTime')),
140 'duration': traverse_obj(details, ('video', 'duration'), expected_type=int),
141 'view_count': traverse_obj(details, ('stats', 'playCount'), expected_type=int),
142 'like_count': traverse_obj(details, ('stats', 'diggCount'), expected_type=int),
143 'repost_count': traverse_obj(details, ('stats', 'shareCount'), expected_type=int),
144 'comment_count': traverse_obj(details, ('stats', 'commentCount'), expected_type=int),
145 }