]> jfr.im git - yt-dlp.git/blame - yt_dlp/extractor/detik.py
[extractor/detik] Add extractor (#4284)
[yt-dlp.git] / yt_dlp / extractor / detik.py
CommitLineData
8f47b39b
H
1from .common import InfoExtractor
2from ..utils import merge_dicts, str_or_none
3
4
5class Detik20IE(InfoExtractor):
6 IE_NAME = '20.detik.com'
7 _VALID_URL = r'https?://20\.detik\.com/((?!program)[\w-]+)/[\d-]+/(?P<id>[\w-]+)'
8 _TESTS = [{
9 # detikflash
10 'url': 'https://20.detik.com/detikflash/20220705-220705098/zulhas-klaim-sukses-turunkan-harga-migor-jawa-bali',
11 'info_dict': {
12 'id': '220705098',
13 'ext': 'mp4',
14 'duration': 157,
15 'thumbnail': 'https://cdnv.detik.com/videoservice/AdminTV/2022/07/05/bfe0384db04f4bbb9dd5efc869c5d4b1-20220705164334-0s.jpg?w=650&q=80',
16 'description': 'md5:ac18dcee5b107abbec1ed46e0bf400e3',
17 'title': 'Zulhas Klaim Sukses Turunkan Harga Migor Jawa-Bali',
18 'tags': ['zulkifli hasan', 'menteri perdagangan', 'minyak goreng'],
19 'timestamp': 1657039548,
20 'upload_date': '20220705'
21 }
22 }, {
23 # e-flash
24 'url': 'https://20.detik.com/e-flash/20220705-220705109/ahli-level-ppkm-jadi-payung-strategi-protokol-kesehatan',
25 'info_dict': {
26 'id': '220705109',
27 'ext': 'mp4',
28 'tags': ['ppkm jabodetabek', 'dicky budiman', 'ppkm'],
29 'upload_date': '20220705',
30 'duration': 110,
31 'title': 'Ahli: Level PPKM Jadi Payung Strategi Protokol Kesehatan',
32 'thumbnail': 'https://cdnv.detik.com/videoservice/AdminTV/2022/07/05/Ahli-_Level_PPKM_Jadi_Payung_Strat_jOgUMCN-20220705182313-custom.jpg?w=650&q=80',
33 'description': 'md5:4eb825a9842e6bdfefd66f47b364314a',
34 'timestamp': 1657045255,
35 }
36 }, {
37 # otobuzz
38 'url': 'https://20.detik.com/otobuzz/20220704-220704093/mulai-rp-10-jutaan-ini-skema-kredit-mitsubishi-pajero-sport',
39 'info_dict': {
40 'id': '220704093',
41 'ext': 'mp4',
42 'tags': ['cicilan mobil', 'mitsubishi pajero sport', 'mitsubishi', 'pajero sport'],
43 'timestamp': 1656951521,
44 'duration': 83,
45 'upload_date': '20220704',
46 'thumbnail': 'https://cdnv.detik.com/videoservice/AdminTV/2022/07/04/5d6187e402ec4a91877755a5886ff5b6-20220704161859-0s.jpg?w=650&q=80',
47 'description': 'md5:9b2257341b6f375cdcf90106146d5ffb',
48 'title': 'Mulai Rp 10 Jutaan! Ini Skema Kredit Mitsubishi Pajero Sport',
49 }
50 }, {
51 # sport-buzz
52 'url': 'https://20.detik.com/sport-buzz/20220704-220704054/crash-crash-horor-di-paruh-pertama-motogp-2022',
53 'info_dict': {
54 'id': '220704054',
55 'ext': 'mp4',
56 'thumbnail': 'https://cdnv.detik.com/videoservice/AdminTV/2022/07/04/6b172c6fb564411996ea145128315630-20220704090746-0s.jpg?w=650&q=80',
57 'title': 'Crash-crash Horor di Paruh Pertama MotoGP 2022',
58 'description': 'md5:fbcc6687572ad7d16eb521b76daa50e4',
59 'timestamp': 1656925591,
60 'duration': 107,
61 'tags': ['marc marquez', 'fabio quartararo', 'francesco bagnaia', 'motogp crash', 'motogp 2022'],
62 'upload_date': '20220704',
63 }
64 }, {
65 # adu-perspektif
66 'url': 'https://20.detik.com/adu-perspektif/20220518-220518144/24-tahun-reformasi-dan-alarm-demokrasi-dari-filipina',
67 'info_dict': {
68 'id': '220518144',
69 'ext': 'mp4',
70 'title': '24 Tahun Reformasi dan Alarm Demokrasi dari Filipina',
71 'upload_date': '20220518',
72 'timestamp': 1652913823,
73 'duration': 185.0,
74 'tags': ['politik', 'adu perspektif', 'indonesia', 'filipina', 'demokrasi'],
75 'description': 'md5:8eaaf440b839c3d02dca8c9bbbb099a9',
76 'thumbnail': 'https://cdnv.detik.com/videoservice/AdminTV/2022/05/18/adpers_18_mei_compressed-20220518230458-custom.jpg?w=650&q=80',
77 }
78 }, {
79 # sosok
80 'url': 'https://20.detik.com/sosok/20220702-220703032/resa-boenard-si-princess-bantar-gebang',
81 'info_dict': {
82 'id': '220703032',
83 'ext': 'mp4',
84 'timestamp': 1656824438,
85 'thumbnail': 'https://cdnv.detik.com/videoservice/AdminTV/2022/07/02/SOSOK_BGBJ-20220702191138-custom.jpg?w=650&q=80',
86 'title': 'Resa Boenard Si \'Princess Bantar Gebang\'',
87 'description': 'md5:84ea66306a0285330de6a13fc6218b78',
88 'tags': ['sosok', 'sosok20d', 'bantar gebang', 'bgbj', 'resa boenard', 'bantar gebang bgbj', 'bgbj bantar gebang', 'sosok bantar gebang', 'sosok bgbj', 'bgbj resa boenard'],
89 'upload_date': '20220703',
90 'duration': 650,
91 }
92 }, {
93 # viral
94 'url': 'https://20.detik.com/viral/20220603-220603135/merasakan-bus-imut-tanpa-pengemudi-muter-muter-di-kawasan-bsd-city',
95 'info_dict': {
96 'id': '220603135',
97 'ext': 'mp4',
98 'description': 'md5:4771fe101aa303edb829c59c26f9e7c6',
99 'timestamp': 1654304305,
100 'title': 'Merasakan Bus Imut Tanpa Pengemudi, Muter-muter di Kawasan BSD City',
101 'tags': ['viral', 'autonomous vehicle', 'electric', 'shuttle bus'],
102 'thumbnail': 'https://cdnv.detik.com/videoservice/AdminTV/2022/06/03/VIRAL_BUS_NO_SUPIR-20220604004707-custom.jpg?w=650&q=80',
103 'duration': 593,
104 'upload_date': '20220604',
105 }
106 }]
107
108 def _real_extract(self, url):
109 display_id = self._match_id(url)
110 webpage = self._download_webpage(url, display_id)
111 json_ld_data = self._search_json_ld(webpage, display_id)
112
113 video_url = self._html_search_regex(
114 r'videoUrl\s*:\s*"(?P<video_url>[^"]+)', webpage, 'videoUrl')
115 formats, subtitles = self._extract_m3u8_formats_and_subtitles(video_url, display_id, ext='mp4')
116
117 return merge_dicts(json_ld_data, {
118 'id': self._html_search_meta('video_id', webpage),
119 'formats': formats,
120 'subtitles': subtitles,
121 'tags': str_or_none(self._html_search_meta(['keywords', 'keyword', 'dtk:keywords'], webpage), '').split(','),
122 })