]> jfr.im git - yt-dlp.git/blob - youtube_dlc/extractor/tiktok.py
Merge branch 'tiktok' of https://github.com/skyme5/youtube-dl into skyme5-tiktok
[yt-dlp.git] / youtube_dlc / extractor / tiktok.py
1 # coding: utf-8
2 from __future__ import unicode_literals
3 from datetime import datetime
4
5 from .common import InfoExtractor
6 from ..utils import (
7 ExtractorError,
8 int_or_none,
9 str_or_none,
10 try_get
11 )
12
13
14 class TikTokBaseIE(InfoExtractor):
15 def _extract_aweme(self, video_data, webpage):
16 video_info = try_get(
17 video_data, lambda x: x['videoData']['itemInfos'], dict)
18 author_info = try_get(
19 video_data, lambda x: x['videoData']['authorInfos'], dict)
20 share_info = try_get(video_data, lambda x: x['shareMeta'], dict)
21
22 unique_id = str_or_none(author_info.get('uniqueId'))
23 timestamp = try_get(video_info, lambda x: int(x['createTime']), int)
24 date = datetime.fromtimestamp(timestamp).strftime('%Y%m%d')
25
26 height = try_get(video_info, lambda x: x['video']['videoMeta']['height'], int)
27 width = try_get(video_info, lambda x: x['video']['videoMeta']['width'], int)
28 thumbnails = []
29 thumbnails.append({
30 'url': video_info.get('thumbnail') or self._og_search_thumbnail(webpage),
31 'width': width,
32 'height': height
33 })
34
35 formats = []
36 formats.append({
37 'url': try_get(video_info, lambda x: x['video']['urls'][0]),
38 'ext': 'mp4',
39 'height': height,
40 'width': width
41 })
42
43 return {
44 'comment_count': int_or_none(video_info.get('commentCount')),
45 'duration': try_get(video_info, lambda x: x['video']['videoMeta']['duration'], int),
46 'height': height,
47 'id': str_or_none(video_info.get('id')),
48 'like_count': int_or_none(video_info.get('diggCount')),
49 'repost_count': int_or_none(video_info.get('shareCount')),
50 'thumbnail': try_get(video_info, lambda x: x['covers'][0]),
51 'timestamp': timestamp,
52 'width': width,
53 'title': str_or_none(share_info.get('title')) or self._og_search_title(webpage),
54 'creator': str_or_none(author_info.get('nickName')),
55 'uploader': unique_id,
56 'uploader_id': str_or_none(author_info.get('userId')),
57 'uploader_url': 'https://www.tiktok.com/@' + unique_id,
58 'thumbnails': thumbnails,
59 'upload_date': date,
60 'webpage_url': self._og_search_url(webpage),
61 'description': str_or_none(video_info.get('text')) or str_or_none(share_info.get('desc')),
62 'ext': 'mp4',
63 'formats': formats
64 }
65
66
67 class TikTokIE(TikTokBaseIE):
68 _VALID_URL = r'https?://www\.tiktok\.com/@[\w\._]+/video/(?P<id>\d+)'
69
70 _TESTS = [{
71 'url': 'https://www.tiktok.com/@leenabhushan/video/6748451240264420610',
72 'md5': '34a7543afd5a151b0840ba6736fb633b',
73 'info_dict': {
74 'comment_count': int,
75 'creator': 'facestoriesbyleenabh',
76 'description': 'md5:a9f6c0c44a1ff2249cae610372d0ae95',
77 'duration': 13,
78 'ext': 'mp4',
79 'formats': list,
80 'height': 1280,
81 'id': '6748451240264420610',
82 'like_count': int,
83 'repost_count': int,
84 'thumbnail': r're:^https?://[\w\/\.\-]+(~[\w\-]+\.image)?',
85 'thumbnails': list,
86 'timestamp': 1571246252,
87 'title': 'facestoriesbyleenabh on TikTok',
88 'upload_date': '20191016',
89 'uploader': 'leenabhushan',
90 'uploader_id': '6691488002098119685',
91 'uploader_url': r're:https://www.tiktok.com/@leenabhushan',
92 'webpage_url': r're:https://www.tiktok.com/@leenabhushan/(video/)?6748451240264420610',
93 'width': 720,
94 }
95 }, {
96 'url': 'https://www.tiktok.com/@patroxofficial/video/6742501081818877190?langCountry=en',
97 'md5': '06b9800d47d5fe51a19e322dd86e61c9',
98 'info_dict': {
99 'comment_count': int,
100 'creator': 'patroX',
101 'description': 'md5:5e2a23877420bb85ce6521dbee39ba94',
102 'duration': 27,
103 'ext': 'mp4',
104 'formats': list,
105 'height': 960,
106 'id': '6742501081818877190',
107 'like_count': int,
108 'repost_count': int,
109 'thumbnail': r're:^https?://[\w\/\.\-]+(~[\w\-]+\.image)?',
110 'thumbnails': list,
111 'timestamp': 1569860870,
112 'title': 'patroX on TikTok',
113 'upload_date': '20190930',
114 'uploader': 'patroxofficial',
115 'uploader_id': '18702747',
116 'uploader_url': r're:https://www.tiktok.com/@patroxofficial',
117 'webpage_url': r're:https://www.tiktok.com/@patroxofficial/(video/)?6742501081818877190',
118 'width': 540,
119 }
120 }]
121
122 def _real_extract(self, url):
123 video_id = self._match_id(url)
124
125 webpage = self._download_webpage(url, video_id, note='Downloading video webpage')
126 json_string = self._search_regex(
127 r'id=\"__NEXT_DATA__\"\s+type=\"application\/json\"\s*[^>]+>\s*(?P<json_string_ld>[^<]+)',
128 webpage, 'json_string', group='json_string_ld')
129 json_data = self._parse_json(json_string, video_id)
130 video_data = try_get(json_data, lambda x: x['props']['pageProps'], expected_type=dict)
131
132 # Chech statusCode for success
133 if video_data.get('statusCode') == 0:
134 return self._extract_aweme(video_data, webpage)
135
136 raise ExtractorError('Video not available', video_id=video_id)