]>
jfr.im git - yt-dlp.git/blob - youtube_dlc/extractor/tiktok.py
2 from __future__
import unicode_literals
3 from datetime
import datetime
5 from .common
import InfoExtractor
14 class TikTokBaseIE(InfoExtractor
):
15 def _extract_aweme(self
, video_data
, webpage
):
17 video_data
, lambda x
: x
['videoData']['itemInfos'], dict)
18 author_info
= try_get(
19 video_data
, lambda x
: x
['videoData']['authorInfos'], dict)
20 share_info
= try_get(video_data
, lambda x
: x
['shareMeta'], dict)
22 unique_id
= str_or_none(author_info
.get('uniqueId'))
23 timestamp
= try_get(video_info
, lambda x
: int(x
['createTime']), int)
24 date
= datetime
.fromtimestamp(timestamp
).strftime('%Y%m%d')
26 height
= try_get(video_info
, lambda x
: x
['video']['videoMeta']['height'], int)
27 width
= try_get(video_info
, lambda x
: x
['video']['videoMeta']['width'], int)
30 'url': video_info
.get('thumbnail') or self
._og
_search
_thumbnail
(webpage
),
37 'url': try_get(video_info
, lambda x
: x
['video']['urls'][0]),
44 'comment_count': int_or_none(video_info
.get('commentCount')),
45 'duration': try_get(video_info
, lambda x
: x
['video']['videoMeta']['duration'], int),
47 'id': str_or_none(video_info
.get('id')),
48 'like_count': int_or_none(video_info
.get('diggCount')),
49 'repost_count': int_or_none(video_info
.get('shareCount')),
50 'thumbnail': try_get(video_info
, lambda x
: x
['covers'][0]),
51 'timestamp': timestamp
,
53 'title': str_or_none(share_info
.get('title')) or self
._og
_search
_title
(webpage
),
54 'creator': str_or_none(author_info
.get('nickName')),
55 'uploader': unique_id
,
56 'uploader_id': str_or_none(author_info
.get('userId')),
57 'uploader_url': 'https://www.tiktok.com/@' + unique_id
,
58 'thumbnails': thumbnails
,
60 'webpage_url': self
._og
_search
_url
(webpage
),
61 'description': str_or_none(video_info
.get('text')) or str_or_none(share_info
.get('desc')),
67 class TikTokIE(TikTokBaseIE
):
68 _VALID_URL
= r
'https?://www\.tiktok\.com/@[\w\._]+/video/(?P<id>\d+)'
71 'url': 'https://www.tiktok.com/@leenabhushan/video/6748451240264420610',
72 'md5': '34a7543afd5a151b0840ba6736fb633b',
75 'creator': 'facestoriesbyleenabh',
76 'description': 'md5:a9f6c0c44a1ff2249cae610372d0ae95',
81 'id': '6748451240264420610',
84 'thumbnail': r
're:^https?://[\w\/\.\-]+(~[\w\-]+\.image)?',
86 'timestamp': 1571246252,
87 'title': 'facestoriesbyleenabh on TikTok',
88 'upload_date': '20191016',
89 'uploader': 'leenabhushan',
90 'uploader_id': '6691488002098119685',
91 'uploader_url': r
're:https://www.tiktok.com/@leenabhushan',
92 'webpage_url': r
're:https://www.tiktok.com/@leenabhushan/(video/)?6748451240264420610',
96 'url': 'https://www.tiktok.com/@patroxofficial/video/6742501081818877190?langCountry=en',
97 'md5': '06b9800d47d5fe51a19e322dd86e61c9',
101 'description': 'md5:5e2a23877420bb85ce6521dbee39ba94',
106 'id': '6742501081818877190',
109 'thumbnail': r
're:^https?://[\w\/\.\-]+(~[\w\-]+\.image)?',
111 'timestamp': 1569860870,
112 'title': 'patroX on TikTok',
113 'upload_date': '20190930',
114 'uploader': 'patroxofficial',
115 'uploader_id': '18702747',
116 'uploader_url': r
're:https://www.tiktok.com/@patroxofficial',
117 'webpage_url': r
're:https://www.tiktok.com/@patroxofficial/(video/)?6742501081818877190',
122 def _real_extract(self
, url
):
123 video_id
= self
._match
_id
(url
)
125 webpage
= self
._download
_webpage
(url
, video_id
, note
='Downloading video webpage')
126 json_string
= self
._search
_regex
(
127 r
'id=\"__NEXT_DATA__\"\s+type=\"application\/json\"\s*[^>]+>\s*(?P<json_string_ld>[^<]+)',
128 webpage
, 'json_string', group
='json_string_ld')
129 json_data
= self
._parse
_json
(json_string
, video_id
)
130 video_data
= try_get(json_data
, lambda x
: x
['props']['pageProps'], expected_type
=dict)
132 # Chech statusCode for success
133 if video_data
.get('statusCode') == 0:
134 return self
._extract
_aweme
(video_data
, webpage
)
136 raise ExtractorError('Video not available', video_id
=video_id
)