]>
Commit | Line | Data |
---|---|---|
1ead840d KS |
1 | # coding: utf-8 |
2 | from __future__ import unicode_literals | |
3 | ||
4 | from .common import InfoExtractor | |
5 | from ..utils import ( | |
6 | compat_str, | |
ce18a19b | 7 | ExtractorError, |
1ead840d KS |
8 | int_or_none, |
9 | str_or_none, | |
10 | try_get, | |
11 | url_or_none, | |
12 | ) | |
13 | ||
14 | ||
ce18a19b S |
15 | class TikTokBaseIE(InfoExtractor): |
16 | def _extract_aweme(self, data): | |
17 | video = data['video'] | |
1ead840d | 18 | description = str_or_none(try_get(data, lambda x: x['desc'])) |
ce18a19b S |
19 | width = int_or_none(try_get(data, lambda x: video['width'])) |
20 | height = int_or_none(try_get(data, lambda x: video['height'])) | |
1ead840d | 21 | |
ce18a19b | 22 | format_urls = set() |
1ead840d | 23 | formats = [] |
ce18a19b S |
24 | for format_id in ( |
25 | 'play_addr_lowbr', 'play_addr', 'play_addr_h264', | |
26 | 'download_addr'): | |
27 | for format in try_get( | |
28 | video, lambda x: x[format_id]['url_list'], list) or []: | |
1ead840d KS |
29 | format_url = url_or_none(format) |
30 | if not format_url: | |
31 | continue | |
ce18a19b S |
32 | if format_url in format_urls: |
33 | continue | |
34 | format_urls.add(format_url) | |
1ead840d KS |
35 | formats.append({ |
36 | 'url': format_url, | |
37 | 'ext': 'mp4', | |
38 | 'height': height, | |
39 | 'width': width, | |
1ead840d | 40 | }) |
1ead840d KS |
41 | self._sort_formats(formats) |
42 | ||
ce18a19b S |
43 | thumbnail = url_or_none(try_get( |
44 | video, lambda x: x['cover']['url_list'][0], compat_str)) | |
1ead840d | 45 | uploader = try_get(data, lambda x: x['author']['nickname'], compat_str) |
ce18a19b S |
46 | timestamp = int_or_none(data.get('create_time')) |
47 | comment_count = int_or_none(data.get('comment_count')) or int_or_none( | |
48 | try_get(data, lambda x: x['statistics']['comment_count'])) | |
49 | repost_count = int_or_none(try_get( | |
50 | data, lambda x: x['statistics']['share_count'])) | |
1ead840d | 51 | |
ce18a19b | 52 | aweme_id = data['aweme_id'] |
1ead840d KS |
53 | |
54 | return { | |
ce18a19b S |
55 | 'id': aweme_id, |
56 | 'title': uploader or aweme_id, | |
1ead840d | 57 | 'description': description, |
ce18a19b | 58 | 'thumbnail': thumbnail, |
1ead840d | 59 | 'uploader': uploader, |
ce18a19b S |
60 | 'timestamp': timestamp, |
61 | 'comment_count': comment_count, | |
62 | 'repost_count': repost_count, | |
1ead840d | 63 | 'formats': formats, |
1ead840d | 64 | } |
ce18a19b S |
65 | |
66 | ||
67 | class TikTokIE(TikTokBaseIE): | |
68 | _VALID_URL = r'https?://(?:m\.)?tiktok\.com/v/(?P<id>\d+)' | |
69 | _TEST = { | |
70 | 'url': 'https://m.tiktok.com/v/6606727368545406213.html', | |
71 | 'md5': 'd584b572e92fcd48888051f238022420', | |
72 | 'info_dict': { | |
73 | 'id': '6606727368545406213', | |
74 | 'ext': 'mp4', | |
75 | 'title': 'Zureeal', | |
76 | 'description': '#bowsette#mario#cosplay#uk#lgbt#gaming#asian#bowsettecosplay', | |
77 | 'thumbnail': r're:^https?://.*~noop.image', | |
78 | 'uploader': 'Zureeal', | |
79 | 'timestamp': 1538248586, | |
80 | 'upload_date': '20180929', | |
81 | 'comment_count': int, | |
82 | 'repost_count': int, | |
83 | } | |
84 | } | |
85 | ||
86 | def _real_extract(self, url): | |
87 | video_id = self._match_id(url) | |
88 | webpage = self._download_webpage(url, video_id) | |
89 | data = self._parse_json(self._search_regex( | |
90 | r'\bdata\s*=\s*({.+?})\s*;', webpage, 'data'), video_id) | |
91 | return self._extract_aweme(data) | |
92 | ||
93 | ||
94 | class TikTokUserIE(TikTokBaseIE): | |
95 | _VALID_URL = r'https?://(?:m\.)?tiktok\.com/h5/share/usr/(?P<id>\d+)' | |
96 | _TEST = { | |
97 | 'url': 'https://m.tiktok.com/h5/share/usr/188294915489964032.html', | |
98 | 'info_dict': { | |
99 | 'id': '188294915489964032', | |
100 | }, | |
101 | 'playlist_mincount': 24, | |
102 | } | |
103 | ||
104 | def _real_extract(self, url): | |
105 | user_id = self._match_id(url) | |
106 | data = self._download_json( | |
107 | 'https://m.tiktok.com/h5/share/usr/list/%s/' % user_id, user_id, | |
108 | query={'_signature': '_'}) | |
109 | entries = [] | |
110 | for aweme in data['aweme_list']: | |
111 | try: | |
112 | entry = self._extract_aweme(aweme) | |
113 | except ExtractorError: | |
114 | continue | |
115 | entry['extractor_key'] = TikTokIE.ie_key() | |
116 | entries.append(entry) | |
117 | return self.playlist_result(entries, user_id) |