]>
Commit | Line | Data |
---|---|---|
1ead840d KS |
1 | # coding: utf-8 |
2 | from __future__ import unicode_literals | |
4b6d03ed | 3 | from datetime import datetime |
1ead840d KS |
4 | |
5 | from .common import InfoExtractor | |
6 | from ..utils import ( | |
ce18a19b | 7 | ExtractorError, |
1ead840d KS |
8 | int_or_none, |
9 | str_or_none, | |
4b6d03ed | 10 | try_get |
1ead840d KS |
11 | ) |
12 | ||
13 | ||
ce18a19b | 14 | class TikTokBaseIE(InfoExtractor): |
4f5a0ad8 U |
15 | def _extract_aweme(self, props_data, webpage, url): |
16 | video_data = try_get(props_data, lambda x: x['pageProps'], expected_type=dict) | |
6fb11ca8 | 17 | video_info = try_get( |
1666c270 | 18 | video_data, lambda x: x['itemInfo']['itemStruct'], dict) |
6fb11ca8 | 19 | author_info = try_get( |
2e8d2629 | 20 | video_data, lambda x: x['itemInfo']['itemStruct']['author'], dict) or {} |
21 | share_info = try_get(video_data, lambda x: x['itemInfo']['shareMeta'], dict) or {} | |
4b6d03ed | 22 | |
6fb11ca8 AG |
23 | unique_id = str_or_none(author_info.get('uniqueId')) |
24 | timestamp = try_get(video_info, lambda x: int(x['createTime']), int) | |
25 | date = datetime.fromtimestamp(timestamp).strftime('%Y%m%d') | |
4b6d03ed | 26 | |
1666c270 DW |
27 | height = try_get(video_info, lambda x: x['video']['height'], int) |
28 | width = try_get(video_info, lambda x: x['video']['width'], int) | |
4b6d03ed AG |
29 | thumbnails = [] |
30 | thumbnails.append({ | |
31 | 'url': video_info.get('thumbnail') or self._og_search_thumbnail(webpage), | |
6fb11ca8 AG |
32 | 'width': width, |
33 | 'height': height | |
4b6d03ed AG |
34 | }) |
35 | ||
4f5a0ad8 U |
36 | url = '' |
37 | if not url: | |
38 | url = try_get(video_info, lambda x: x['video']['playAddr']) | |
39 | if not url: | |
40 | url = try_get(video_info, lambda x: x['video']['downloadAddr']) | |
1ead840d | 41 | formats = [] |
4b6d03ed | 42 | formats.append({ |
4f5a0ad8 | 43 | 'url': url, |
4b6d03ed | 44 | 'ext': 'mp4', |
6fb11ca8 AG |
45 | 'height': height, |
46 | 'width': width | |
4b6d03ed | 47 | }) |
1ead840d | 48 | |
4f5a0ad8 | 49 | tracker = try_get(props_data, lambda x: x['initialProps']['$wid']) |
1ead840d | 50 | return { |
6fb11ca8 AG |
51 | 'comment_count': int_or_none(video_info.get('commentCount')), |
52 | 'duration': try_get(video_info, lambda x: x['video']['videoMeta']['duration'], int), | |
53 | 'height': height, | |
54 | 'id': str_or_none(video_info.get('id')), | |
55 | 'like_count': int_or_none(video_info.get('diggCount')), | |
56 | 'repost_count': int_or_none(video_info.get('shareCount')), | |
b19eec0d | 57 | 'thumbnail': try_get(video_info, lambda x: x['covers'][0]), |
6fb11ca8 AG |
58 | 'timestamp': timestamp, |
59 | 'width': width, | |
89cee32c | 60 | 'title': str_or_none(share_info.get('title')) or self._og_search_title(webpage), |
6fb11ca8 AG |
61 | 'creator': str_or_none(author_info.get('nickName')), |
62 | 'uploader': unique_id, | |
63 | 'uploader_id': str_or_none(author_info.get('userId')), | |
64 | 'uploader_url': 'https://www.tiktok.com/@' + unique_id, | |
4b6d03ed | 65 | 'thumbnails': thumbnails, |
4b6d03ed | 66 | 'upload_date': date, |
4b6d03ed | 67 | 'webpage_url': self._og_search_url(webpage), |
6fb11ca8 AG |
68 | 'description': str_or_none(video_info.get('text')) or str_or_none(share_info.get('desc')), |
69 | 'ext': 'mp4', | |
8219ef64 U |
70 | 'formats': formats, |
71 | 'http_headers': { | |
72 | 'Referer': url, | |
4f5a0ad8 | 73 | 'Cookie': 'tt_webid=%s; tt_webid_v2=%s' % (tracker, tracker), |
8219ef64 | 74 | } |
1ead840d | 75 | } |
ce18a19b S |
76 | |
77 | ||
78 | class TikTokIE(TikTokBaseIE): | |
6255e567 | 79 | _VALID_URL = r'https?://www\.tiktok\.com/@[\w\._]+/video/(?P<id>\d+)' |
4b6d03ed | 80 | |
7c2ecbc1 | 81 | _TESTS = [{ |
4b6d03ed AG |
82 | 'url': 'https://www.tiktok.com/@leenabhushan/video/6748451240264420610', |
83 | 'md5': '34a7543afd5a151b0840ba6736fb633b', | |
ce18a19b | 84 | 'info_dict': { |
ce18a19b | 85 | 'comment_count': int, |
4b6d03ed | 86 | 'creator': 'facestoriesbyleenabh', |
6fb11ca8 | 87 | 'description': 'md5:a9f6c0c44a1ff2249cae610372d0ae95', |
4b6d03ed | 88 | 'duration': 13, |
6fb11ca8 | 89 | 'ext': 'mp4', |
4b6d03ed AG |
90 | 'formats': list, |
91 | 'height': 1280, | |
6fb11ca8 AG |
92 | 'id': '6748451240264420610', |
93 | 'like_count': int, | |
94 | 'repost_count': int, | |
95 | 'thumbnail': r're:^https?://[\w\/\.\-]+(~[\w\-]+\.image)?', | |
4b6d03ed | 96 | 'thumbnails': list, |
6fb11ca8 AG |
97 | 'timestamp': 1571246252, |
98 | 'title': 'facestoriesbyleenabh on TikTok', | |
99 | 'upload_date': '20191016', | |
100 | 'uploader': 'leenabhushan', | |
4b6d03ed AG |
101 | 'uploader_id': '6691488002098119685', |
102 | 'uploader_url': r're:https://www.tiktok.com/@leenabhushan', | |
103 | 'webpage_url': r're:https://www.tiktok.com/@leenabhushan/(video/)?6748451240264420610', | |
104 | 'width': 720, | |
ce18a19b | 105 | } |
7c2ecbc1 | 106 | }, { |
4b6d03ed AG |
107 | 'url': 'https://www.tiktok.com/@patroxofficial/video/6742501081818877190?langCountry=en', |
108 | 'md5': '06b9800d47d5fe51a19e322dd86e61c9', | |
109 | 'info_dict': { | |
4b6d03ed AG |
110 | 'comment_count': int, |
111 | 'creator': 'patroX', | |
112 | 'description': 'md5:5e2a23877420bb85ce6521dbee39ba94', | |
113 | 'duration': 27, | |
114 | 'ext': 'mp4', | |
115 | 'formats': list, | |
116 | 'height': 960, | |
117 | 'id': '6742501081818877190', | |
118 | 'like_count': int, | |
4b6d03ed | 119 | 'repost_count': int, |
4b6d03ed AG |
120 | 'thumbnail': r're:^https?://[\w\/\.\-]+(~[\w\-]+\.image)?', |
121 | 'thumbnails': list, | |
122 | 'timestamp': 1569860870, | |
123 | 'title': 'patroX on TikTok', | |
4b6d03ed | 124 | 'upload_date': '20190930', |
6fb11ca8 | 125 | 'uploader': 'patroxofficial', |
4b6d03ed AG |
126 | 'uploader_id': '18702747', |
127 | 'uploader_url': r're:https://www.tiktok.com/@patroxofficial', | |
4b6d03ed AG |
128 | 'webpage_url': r're:https://www.tiktok.com/@patroxofficial/(video/)?6742501081818877190', |
129 | 'width': 540, | |
130 | } | |
7c2ecbc1 | 131 | }] |
ce18a19b S |
132 | |
133 | def _real_extract(self, url): | |
134 | video_id = self._match_id(url) | |
ce18a19b | 135 | |
7bbc0bbc | 136 | # If we only call once, we get a 403 when downlaoding the video. |
61e76c1e | 137 | self._download_webpage(url, video_id) |
6fb11ca8 | 138 | webpage = self._download_webpage(url, video_id, note='Downloading video webpage') |
4b6d03ed | 139 | json_string = self._search_regex( |
6255e567 AG |
140 | r'id=\"__NEXT_DATA__\"\s+type=\"application\/json\"\s*[^>]+>\s*(?P<json_string_ld>[^<]+)', |
141 | webpage, 'json_string', group='json_string_ld') | |
4b6d03ed | 142 | json_data = self._parse_json(json_string, video_id) |
4f5a0ad8 | 143 | props_data = try_get(json_data, lambda x: x['props'], expected_type=dict) |
ce18a19b | 144 | |
4b6d03ed | 145 | # Chech statusCode for success |
1418a043 | 146 | status = props_data.get('pageProps').get('statusCode') |
147 | if status == 0: | |
4f5a0ad8 | 148 | return self._extract_aweme(props_data, webpage, url) |
1418a043 | 149 | elif status == 10216: |
150 | raise ExtractorError('This video is private', expected=True) | |
4b6d03ed | 151 | |
6fb11ca8 | 152 | raise ExtractorError('Video not available', video_id=video_id) |