]>
jfr.im git - yt-dlp.git/blob - yt_dlp/extractor/tiktok.py
2 from __future__
import unicode_literals
3 from datetime
import datetime
5 from .common
import InfoExtractor
14 class TikTokBaseIE(InfoExtractor
):
15 def _extract_aweme(self
, props_data
, webpage
, url
):
16 video_data
= try_get(props_data
, lambda x
: x
['pageProps'], expected_type
=dict)
18 video_data
, lambda x
: x
['itemInfo']['itemStruct'], dict)
19 author_info
= try_get(
20 video_data
, lambda x
: x
['itemInfo']['itemStruct']['author'], dict) or {}
21 share_info
= try_get(video_data
, lambda x
: x
['itemInfo']['shareMeta'], dict) or {}
23 unique_id
= str_or_none(author_info
.get('uniqueId'))
24 timestamp
= try_get(video_info
, lambda x
: int(x
['createTime']), int)
25 date
= datetime
.fromtimestamp(timestamp
).strftime('%Y%m%d')
27 height
= try_get(video_info
, lambda x
: x
['video']['height'], int)
28 width
= try_get(video_info
, lambda x
: x
['video']['width'], int)
31 'url': video_info
.get('thumbnail') or self
._og
_search
_thumbnail
(webpage
),
38 url
= try_get(video_info
, lambda x
: x
['video']['playAddr'])
40 url
= try_get(video_info
, lambda x
: x
['video']['downloadAddr'])
49 tracker
= try_get(props_data
, lambda x
: x
['initialProps']['$wid'])
51 'comment_count': int_or_none(video_info
.get('commentCount')),
52 'duration': try_get(video_info
, lambda x
: x
['video']['videoMeta']['duration'], int),
54 'id': str_or_none(video_info
.get('id')),
55 'like_count': int_or_none(video_info
.get('diggCount')),
56 'repost_count': int_or_none(video_info
.get('shareCount')),
57 'thumbnail': try_get(video_info
, lambda x
: x
['covers'][0]),
58 'timestamp': timestamp
,
60 'title': str_or_none(share_info
.get('title')) or self
._og
_search
_title
(webpage
),
61 'creator': str_or_none(author_info
.get('nickName')),
62 'uploader': unique_id
,
63 'uploader_id': str_or_none(author_info
.get('userId')),
64 'uploader_url': 'https://www.tiktok.com/@' + unique_id
,
65 'thumbnails': thumbnails
,
67 'webpage_url': self
._og
_search
_url
(webpage
),
68 'description': str_or_none(video_info
.get('text')) or str_or_none(share_info
.get('desc')),
73 'Cookie': 'tt_webid=%s; tt_webid_v2=%s' % (tracker
, tracker
),
78 class TikTokIE(TikTokBaseIE
):
79 _VALID_URL
= r
'https?://www\.tiktok\.com/@[\w\._]+/video/(?P<id>\d+)'
82 'url': 'https://www.tiktok.com/@leenabhushan/video/6748451240264420610',
83 'md5': '34a7543afd5a151b0840ba6736fb633b',
86 'creator': 'facestoriesbyleenabh',
87 'description': 'md5:a9f6c0c44a1ff2249cae610372d0ae95',
92 'id': '6748451240264420610',
95 'thumbnail': r
're:^https?://[\w\/\.\-]+(~[\w\-]+\.image)?',
97 'timestamp': 1571246252,
98 'title': 'facestoriesbyleenabh on TikTok',
99 'upload_date': '20191016',
100 'uploader': 'leenabhushan',
101 'uploader_id': '6691488002098119685',
102 'uploader_url': r
're:https://www.tiktok.com/@leenabhushan',
103 'webpage_url': r
're:https://www.tiktok.com/@leenabhushan/(video/)?6748451240264420610',
107 'url': 'https://www.tiktok.com/@patroxofficial/video/6742501081818877190?langCountry=en',
108 'md5': '06b9800d47d5fe51a19e322dd86e61c9',
110 'comment_count': int,
112 'description': 'md5:5e2a23877420bb85ce6521dbee39ba94',
117 'id': '6742501081818877190',
120 'thumbnail': r
're:^https?://[\w\/\.\-]+(~[\w\-]+\.image)?',
122 'timestamp': 1569860870,
123 'title': 'patroX on TikTok',
124 'upload_date': '20190930',
125 'uploader': 'patroxofficial',
126 'uploader_id': '18702747',
127 'uploader_url': r
're:https://www.tiktok.com/@patroxofficial',
128 'webpage_url': r
're:https://www.tiktok.com/@patroxofficial/(video/)?6742501081818877190',
133 def _real_extract(self
, url
):
134 video_id
= self
._match
_id
(url
)
136 # If we only call once, we get a 403 when downlaoding the video.
137 self
._download
_webpage
(url
, video_id
)
138 webpage
= self
._download
_webpage
(url
, video_id
, note
='Downloading video webpage')
139 json_string
= self
._search
_regex
(
140 r
'id=\"__NEXT_DATA__\"\s+type=\"application\/json\"\s*[^>]+>\s*(?P<json_string_ld>[^<]+)',
141 webpage
, 'json_string', group
='json_string_ld')
142 json_data
= self
._parse
_json
(json_string
, video_id
)
143 props_data
= try_get(json_data
, lambda x
: x
['props'], expected_type
=dict)
145 # Chech statusCode for success
146 if props_data
.get('pageProps').get('statusCode') == 0:
147 return self
._extract
_aweme
(props_data
, webpage
, url
)
149 raise ExtractorError('Video not available', video_id
=video_id
)