]>
Commit | Line | Data |
---|---|---|
1ead840d KS |
1 | # coding: utf-8 |
2 | from __future__ import unicode_literals | |
4b6d03ed | 3 | from datetime import datetime |
1ead840d KS |
4 | |
5 | from .common import InfoExtractor | |
6 | from ..utils import ( | |
ce18a19b | 7 | ExtractorError, |
1ead840d KS |
8 | int_or_none, |
9 | str_or_none, | |
4b6d03ed | 10 | try_get |
1ead840d KS |
11 | ) |
12 | ||
13 | ||
ce18a19b | 14 | class TikTokBaseIE(InfoExtractor): |
8219ef64 | 15 | def _extract_aweme(self, video_data, webpage, url): |
6fb11ca8 | 16 | video_info = try_get( |
1666c270 | 17 | video_data, lambda x: x['itemInfo']['itemStruct'], dict) |
6fb11ca8 | 18 | author_info = try_get( |
1666c270 DW |
19 | video_data, lambda x: x['itemInfo']['itemStruct']['author'], dict) |
20 | share_info = try_get(video_data, lambda x: x['itemInfo']['shareMeta'], dict) | |
4b6d03ed | 21 | |
6fb11ca8 AG |
22 | unique_id = str_or_none(author_info.get('uniqueId')) |
23 | timestamp = try_get(video_info, lambda x: int(x['createTime']), int) | |
24 | date = datetime.fromtimestamp(timestamp).strftime('%Y%m%d') | |
4b6d03ed | 25 | |
1666c270 DW |
26 | height = try_get(video_info, lambda x: x['video']['height'], int) |
27 | width = try_get(video_info, lambda x: x['video']['width'], int) | |
4b6d03ed AG |
28 | thumbnails = [] |
29 | thumbnails.append({ | |
30 | 'url': video_info.get('thumbnail') or self._og_search_thumbnail(webpage), | |
6fb11ca8 AG |
31 | 'width': width, |
32 | 'height': height | |
4b6d03ed AG |
33 | }) |
34 | ||
1ead840d | 35 | formats = [] |
4b6d03ed | 36 | formats.append({ |
1666c270 | 37 | 'url': try_get(video_info, lambda x: x['video']['playAddr']), |
4b6d03ed | 38 | 'ext': 'mp4', |
6fb11ca8 AG |
39 | 'height': height, |
40 | 'width': width | |
4b6d03ed | 41 | }) |
1ead840d KS |
42 | |
43 | return { | |
6fb11ca8 AG |
44 | 'comment_count': int_or_none(video_info.get('commentCount')), |
45 | 'duration': try_get(video_info, lambda x: x['video']['videoMeta']['duration'], int), | |
46 | 'height': height, | |
47 | 'id': str_or_none(video_info.get('id')), | |
48 | 'like_count': int_or_none(video_info.get('diggCount')), | |
49 | 'repost_count': int_or_none(video_info.get('shareCount')), | |
b19eec0d | 50 | 'thumbnail': try_get(video_info, lambda x: x['covers'][0]), |
6fb11ca8 AG |
51 | 'timestamp': timestamp, |
52 | 'width': width, | |
89cee32c | 53 | 'title': str_or_none(share_info.get('title')) or self._og_search_title(webpage), |
6fb11ca8 AG |
54 | 'creator': str_or_none(author_info.get('nickName')), |
55 | 'uploader': unique_id, | |
56 | 'uploader_id': str_or_none(author_info.get('userId')), | |
57 | 'uploader_url': 'https://www.tiktok.com/@' + unique_id, | |
4b6d03ed | 58 | 'thumbnails': thumbnails, |
4b6d03ed | 59 | 'upload_date': date, |
4b6d03ed | 60 | 'webpage_url': self._og_search_url(webpage), |
6fb11ca8 AG |
61 | 'description': str_or_none(video_info.get('text')) or str_or_none(share_info.get('desc')), |
62 | 'ext': 'mp4', | |
8219ef64 U |
63 | 'formats': formats, |
64 | 'http_headers': { | |
65 | 'Referer': url, | |
66 | } | |
1ead840d | 67 | } |
ce18a19b S |
68 | |
69 | ||
70 | class TikTokIE(TikTokBaseIE): | |
6255e567 | 71 | _VALID_URL = r'https?://www\.tiktok\.com/@[\w\._]+/video/(?P<id>\d+)' |
4b6d03ed | 72 | |
7c2ecbc1 | 73 | _TESTS = [{ |
4b6d03ed AG |
74 | 'url': 'https://www.tiktok.com/@leenabhushan/video/6748451240264420610', |
75 | 'md5': '34a7543afd5a151b0840ba6736fb633b', | |
ce18a19b | 76 | 'info_dict': { |
ce18a19b | 77 | 'comment_count': int, |
4b6d03ed | 78 | 'creator': 'facestoriesbyleenabh', |
6fb11ca8 | 79 | 'description': 'md5:a9f6c0c44a1ff2249cae610372d0ae95', |
4b6d03ed | 80 | 'duration': 13, |
6fb11ca8 | 81 | 'ext': 'mp4', |
4b6d03ed AG |
82 | 'formats': list, |
83 | 'height': 1280, | |
6fb11ca8 AG |
84 | 'id': '6748451240264420610', |
85 | 'like_count': int, | |
86 | 'repost_count': int, | |
87 | 'thumbnail': r're:^https?://[\w\/\.\-]+(~[\w\-]+\.image)?', | |
4b6d03ed | 88 | 'thumbnails': list, |
6fb11ca8 AG |
89 | 'timestamp': 1571246252, |
90 | 'title': 'facestoriesbyleenabh on TikTok', | |
91 | 'upload_date': '20191016', | |
92 | 'uploader': 'leenabhushan', | |
4b6d03ed AG |
93 | 'uploader_id': '6691488002098119685', |
94 | 'uploader_url': r're:https://www.tiktok.com/@leenabhushan', | |
95 | 'webpage_url': r're:https://www.tiktok.com/@leenabhushan/(video/)?6748451240264420610', | |
96 | 'width': 720, | |
ce18a19b | 97 | } |
7c2ecbc1 | 98 | }, { |
4b6d03ed AG |
99 | 'url': 'https://www.tiktok.com/@patroxofficial/video/6742501081818877190?langCountry=en', |
100 | 'md5': '06b9800d47d5fe51a19e322dd86e61c9', | |
101 | 'info_dict': { | |
4b6d03ed AG |
102 | 'comment_count': int, |
103 | 'creator': 'patroX', | |
104 | 'description': 'md5:5e2a23877420bb85ce6521dbee39ba94', | |
105 | 'duration': 27, | |
106 | 'ext': 'mp4', | |
107 | 'formats': list, | |
108 | 'height': 960, | |
109 | 'id': '6742501081818877190', | |
110 | 'like_count': int, | |
4b6d03ed | 111 | 'repost_count': int, |
4b6d03ed AG |
112 | 'thumbnail': r're:^https?://[\w\/\.\-]+(~[\w\-]+\.image)?', |
113 | 'thumbnails': list, | |
114 | 'timestamp': 1569860870, | |
115 | 'title': 'patroX on TikTok', | |
4b6d03ed | 116 | 'upload_date': '20190930', |
6fb11ca8 | 117 | 'uploader': 'patroxofficial', |
4b6d03ed AG |
118 | 'uploader_id': '18702747', |
119 | 'uploader_url': r're:https://www.tiktok.com/@patroxofficial', | |
4b6d03ed AG |
120 | 'webpage_url': r're:https://www.tiktok.com/@patroxofficial/(video/)?6742501081818877190', |
121 | 'width': 540, | |
122 | } | |
7c2ecbc1 | 123 | }] |
ce18a19b S |
124 | |
125 | def _real_extract(self, url): | |
126 | video_id = self._match_id(url) | |
ce18a19b | 127 | |
6fb11ca8 | 128 | webpage = self._download_webpage(url, video_id, note='Downloading video webpage') |
4b6d03ed | 129 | json_string = self._search_regex( |
6255e567 AG |
130 | r'id=\"__NEXT_DATA__\"\s+type=\"application\/json\"\s*[^>]+>\s*(?P<json_string_ld>[^<]+)', |
131 | webpage, 'json_string', group='json_string_ld') | |
4b6d03ed AG |
132 | json_data = self._parse_json(json_string, video_id) |
133 | video_data = try_get(json_data, lambda x: x['props']['pageProps'], expected_type=dict) | |
ce18a19b | 134 | |
4b6d03ed AG |
135 | # Chech statusCode for success |
136 | if video_data.get('statusCode') == 0: | |
8219ef64 | 137 | return self._extract_aweme(video_data, webpage, url) |
4b6d03ed | 138 | |
6fb11ca8 | 139 | raise ExtractorError('Video not available', video_id=video_id) |