]> jfr.im git - yt-dlp.git/blame - yt_dlp/extractor/koo.py
[extractor] Deprecate `_sort_formats`
[yt-dlp.git] / yt_dlp / extractor / koo.py
CommitLineData
9ada988b
AG
1from .common import InfoExtractor
2from ..utils import (
3 clean_html,
4 try_get,
5)
6
7
8class KooIE(InfoExtractor):
73f035e1 9 _VALID_URL = r'https?://(?:www\.)?kooapp\.com/koo/[^/]+/(?P<id>[^/&#$?]+)'
9ada988b
AG
10 _TESTS = [{ # Test for video in the comments
11 'url': 'https://www.kooapp.com/koo/ytdlpTestAccount/946c4189-bc2d-4524-b95b-43f641e2adde',
12 'info_dict': {
13 'id': '946c4189-bc2d-4524-b95b-43f641e2adde',
14 'ext': 'mp4',
15 'title': 'test for video in comment',
16 'description': 'md5:daa77dc214add4da8b6ea7d2226776e7',
17 'timestamp': 1632215195,
18 'uploader_id': 'ytdlpTestAccount',
19 'uploader': 'yt-dlpTestAccount',
20 'duration': 7000,
21 'upload_date': '20210921'
22 },
23 'params': {'skip_download': True}
24 }, { # Test for koo with long title
25 'url': 'https://www.kooapp.com/koo/laxman_kumarDBFEC/33decbf7-5e1e-4bb8-bfd7-04744a064361',
26 'info_dict': {
27 'id': '33decbf7-5e1e-4bb8-bfd7-04744a064361',
28 'ext': 'mp4',
29 'title': 'md5:47a71c2337295330c5a19a8af1bbf450',
30 'description': 'md5:06a6a84e9321499486dab541693d8425',
31 'timestamp': 1632106884,
32 'uploader_id': 'laxman_kumarDBFEC',
33 'uploader': 'Laxman Kumar 🇮🇳',
34 'duration': 46000,
35 'upload_date': '20210920'
36 },
37 'params': {'skip_download': True}
38 }, { # Test for audio
39 'url': 'https://www.kooapp.com/koo/ytdlpTestAccount/a2a9c88e-ce4b-4d2d-952f-d06361c5b602',
40 'info_dict': {
41 'id': 'a2a9c88e-ce4b-4d2d-952f-d06361c5b602',
42 'ext': 'mp4',
43 'title': 'Test for audio',
44 'description': 'md5:ecb9a2b6a5d34b736cecb53788cb11e8',
45 'timestamp': 1632211634,
46 'uploader_id': 'ytdlpTestAccount',
47 'uploader': 'yt-dlpTestAccount',
48 'duration': 214000,
49 'upload_date': '20210921'
50 },
51 'params': {'skip_download': True}
52 }, { # Test for video
53 'url': 'https://www.kooapp.com/koo/ytdlpTestAccount/a3e56c53-c1ed-4ac9-ac02-ed1630e6b1d1',
54 'info_dict': {
55 'id': 'a3e56c53-c1ed-4ac9-ac02-ed1630e6b1d1',
56 'ext': 'mp4',
57 'title': 'Test for video',
58 'description': 'md5:7afc4eb839074ddeb2beea5dd6fe9500',
59 'timestamp': 1632211468,
60 'uploader_id': 'ytdlpTestAccount',
61 'uploader': 'yt-dlpTestAccount',
62 'duration': 14000,
63 'upload_date': '20210921'
64 },
65 'params': {'skip_download': True}
66 }, { # Test for link
67 'url': 'https://www.kooapp.com/koo/ytdlpTestAccount/01bf5b94-81a5-4d8e-a387-5f732022e15a',
68 'skip': 'No video/audio found at the provided url.',
69 'info_dict': {
70 'id': '01bf5b94-81a5-4d8e-a387-5f732022e15a',
71 'title': 'Test for link',
72 'ext': 'none',
73 },
74 }, { # Test for images
75 'url': 'https://www.kooapp.com/koo/ytdlpTestAccount/dc05d9cd-a61d-45fd-bb07-e8019d8ca8cb',
76 'skip': 'No video/audio found at the provided url.',
77 'info_dict': {
78 'id': 'dc05d9cd-a61d-45fd-bb07-e8019d8ca8cb',
79 'title': 'Test for images',
80 'ext': 'none',
81 },
82 }]
83
84 def _real_extract(self, url):
85 id = self._match_id(url)
86 data_json = self._download_json(f'https://www.kooapp.com/apiV1/ku/{id}?limit=20&offset=0&showSimilarKoos=true', id)['parentContent']
87 item_json = next(content['items'][0] for content in data_json
88 if try_get(content, lambda x: x['items'][0]['id']) == id)
89 media_json = item_json['mediaMap']
90 formats = []
91
92 mp4_url = media_json.get('videoMp4')
93 video_m3u8_url = media_json.get('videoHls')
94 if mp4_url:
95 formats.append({
96 'url': mp4_url,
97 'ext': 'mp4',
98 })
99 if video_m3u8_url:
100 formats.extend(self._extract_m3u8_formats(video_m3u8_url, id, fatal=False, ext='mp4'))
101 if not formats:
102 self.raise_no_formats('No video/audio found at the provided url.', expected=True)
103
9ada988b
AG
104 return {
105 'id': id,
106 'title': clean_html(item_json.get('title')),
107 'description': f'{clean_html(item_json.get("title"))}\n\n{clean_html(item_json.get("enTransliteration"))}',
108 'timestamp': item_json.get('createdAt'),
109 'uploader_id': item_json.get('handle'),
110 'uploader': item_json.get('name'),
111 'duration': media_json.get('duration'),
112 'formats': formats,
113 }