]> jfr.im git - yt-dlp.git/blame - yt_dlp/extractor/zingmp3.py
[go,viu] Extract subtitles from the m3u8 manifest (#3219)
[yt-dlp.git] / yt_dlp / extractor / zingmp3.py
CommitLineData
dcdb292f 1# coding: utf-8
c66bdc48
DHS
2from __future__ import unicode_literals
3
ecca4519
HTL
4import hashlib
5import hmac
6import urllib.parse
7
c66bdc48 8from .common import InfoExtractor
3d47ee0a 9from ..utils import (
3d47ee0a 10 int_or_none,
ecca4519 11 traverse_obj,
3d47ee0a 12)
c66bdc48
DHS
13
14
1418a043 15class ZingMp3BaseIE(InfoExtractor):
ecca4519 16 _VALID_URL_TMPL = r'https?://(?:mp3\.zing|zingmp3)\.vn/(?P<type>(?:%s))/[^/]+/(?P<id>\w+)(?:\.html|\?)'
1418a043 17 _GEO_COUNTRIES = ['VN']
ecca4519
HTL
18 _DOMAIN = 'https://zingmp3.vn'
19 _SLUG_API = {
20 'bai-hat': '/api/v2/page/get/song',
21 'embed': '/api/v2/page/get/song',
22 'video-clip': '/api/v2/page/get/video',
23 'playlist': '/api/v2/page/get/playlist',
24 'album': '/api/v2/page/get/playlist',
25 'lyric': '/api/v2/lyric/get/lyric',
26 'song_streaming': '/api/v2/song/get/streaming',
27 }
28
29 _API_KEY = '88265e23d4284f25963e6eedac8fbfa3'
30 _SECRET_KEY = b'2aa2d1c561e809b267f3638c4a307aab'
c66bdc48 31
ecca4519
HTL
32 def _extract_item(self, item, song_id, type_url, fatal):
33 item_id = item.get('encodeId') or song_id
34 title = item.get('title') or item.get('alias')
35
36 if type_url == 'video-clip':
37 source = item.get('streaming')
38 else:
39 api = self.get_api_with_signature(name_api=self._SLUG_API.get('song_streaming'), param={'id': item_id})
40 source = self._download_json(api, video_id=item_id).get('data')
51156528 41
3d47ee0a 42 formats = []
ecca4519 43 for k, v in (source or {}).items():
1418a043 44 if not v:
3d47ee0a 45 continue
1418a043 46 if k in ('mp4', 'hls'):
47 for res, video_url in v.items():
48 if not video_url:
49 continue
50 if k == 'hls':
51 formats.extend(self._extract_m3u8_formats(
52 video_url, item_id, 'mp4',
53 'm3u8_native', m3u8_id=k, fatal=False))
54 elif k == 'mp4':
55 formats.append({
56 'format_id': 'mp4-' + res,
57 'url': video_url,
58 'height': int_or_none(self._search_regex(
59 r'^(\d+)p', res, 'resolution', default=None)),
60 })
ecca4519
HTL
61 continue
62 elif v == 'VIP':
63 continue
64 formats.append({
65 'ext': 'mp3',
66 'format_id': k,
67 'tbr': int_or_none(k),
68 'url': self._proto_relative_url(v),
69 'vcodec': 'none',
70 })
1418a043 71 if not formats:
72 if not fatal:
73 return
ecca4519 74 msg = item.get('msg')
1418a043 75 if msg == 'Sorry, this content is not available in your country.':
b7da73eb 76 self.raise_geo_restricted(countries=self._GEO_COUNTRIES, metadata_available=True)
77 self.raise_no_formats(msg, expected=True)
1418a043 78 self._sort_formats(formats)
79
1418a043 80 lyric = item.get('lyric')
ecca4519
HTL
81 if not lyric:
82 api = self.get_api_with_signature(name_api=self._SLUG_API.get("lyric"), param={'id': item_id})
83 info_lyric = self._download_json(api, video_id=item_id)
84 lyric = traverse_obj(info_lyric, ('data', 'file'))
85 subtitles = {
86 'origin': [{
87 'url': lyric,
88 }],
89 } if lyric else None
3d47ee0a 90
1418a043 91 album = item.get('album') or {}
c66bdc48
DHS
92
93 return {
1418a043 94 'id': item_id,
95 'title': title,
3d47ee0a 96 'formats': formats,
ecca4519 97 'thumbnail': traverse_obj(item, 'thumbnail', 'thumbnailM'),
1418a043 98 'subtitles': subtitles,
99 'duration': int_or_none(item.get('duration')),
100 'track': title,
ecca4519
HTL
101 'artist': traverse_obj(item, 'artistsNames', 'artists_names'),
102 'album': traverse_obj(album, 'name', 'title'),
103 'album_artist': traverse_obj(album, 'artistsNames', 'artists_names'),
c66bdc48
DHS
104 }
105
ecca4519
HTL
106 def _real_initialize(self):
107 if not self.get_param('cookiefile') and not self.get_param('cookiesfrombrowser'):
63b2f88b
HTL
108 self._request_webpage(self.get_api_with_signature(name_api=self._SLUG_API['bai-hat'], param={'id': ''}),
109 None, note='Updating cookies')
ecca4519 110
1418a043 111 def _real_extract(self, url):
ecca4519 112 song_id, type_url = self._match_valid_url(url).group('id', 'type')
ecca4519 113 api = self.get_api_with_signature(name_api=self._SLUG_API[type_url], param={'id': song_id})
ecca4519
HTL
114 return self._process_data(self._download_json(api, song_id)['data'], song_id, type_url)
115
116 def get_api_with_signature(self, name_api, param):
63b2f88b
HTL
117 param.update({'ctime': '1'})
118 sha256 = hashlib.sha256(''.join(f'{i}={param[i]}' for i in sorted(param)).encode('utf-8')).hexdigest()
ecca4519
HTL
119 data = {
120 'apiKey': self._API_KEY,
121 'sig': hmac.new(self._SECRET_KEY, f'{name_api}{sha256}'.encode('utf-8'), hashlib.sha512).hexdigest(),
122 **param,
123 }
124 return f'{self._DOMAIN}{name_api}?{urllib.parse.urlencode(data)}'
c66bdc48
DHS
125
126
1418a043 127class ZingMp3IE(ZingMp3BaseIE):
ecca4519 128 _VALID_URL = ZingMp3BaseIE._VALID_URL_TMPL % 'bai-hat|video-clip|embed'
c66bdc48 129 _TESTS = [{
ecca4519 130 'url': 'https://mp3.zing.vn/bai-hat/Xa-Mai-Xa-Bao-Thy/ZWZB9WAB.html',
4ffc3103 131 'md5': 'ead7ae13693b3205cbc89536a077daed',
c66bdc48
DHS
132 'info_dict': {
133 'id': 'ZWZB9WAB',
4ffc3103 134 'title': 'Xa Mãi Xa',
c66bdc48 135 'ext': 'mp3',
1418a043 136 'thumbnail': r're:^https?://.+\.jpg',
137 'subtitles': {
138 'origin': [{
139 'ext': 'lrc',
140 }]
141 },
142 'duration': 255,
143 'track': 'Xa Mãi Xa',
144 'artist': 'Bảo Thy',
145 'album': 'Special Album',
146 'album_artist': 'Bảo Thy',
c66bdc48 147 },
3d47ee0a 148 }, {
ecca4519 149 'url': 'https://zingmp3.vn/video-clip/Suong-Hoa-Dua-Loi-K-ICM-RYO/ZO8ZF7C7.html',
08d30158 150 'md5': 'c7f23d971ac1a4f675456ed13c9b9612',
3d47ee0a 151 'info_dict': {
1418a043 152 'id': 'ZO8ZF7C7',
153 'title': 'Sương Hoa Đưa Lối',
3d47ee0a 154 'ext': 'mp4',
1418a043 155 'thumbnail': r're:^https?://.+\.jpg',
156 'duration': 207,
157 'track': 'Sương Hoa Đưa Lối',
158 'artist': 'K-ICM, RYO',
08d30158 159 'album': 'Sương Hoa Đưa Lối (Single)',
160 'album_artist': 'K-ICM, RYO',
3d47ee0a 161 },
63b2f88b
HTL
162 }, {
163 'url': 'https://zingmp3.vn/bai-hat/Nguoi-Yeu-Toi-Lanh-Lung-Sat-Da-Mr-Siro/ZZ6IW7OU.html',
164 'md5': '3e9f7a9bd0d965573dbff8d7c68b629d',
165 'info_dict': {
166 'id': 'ZZ6IW7OU',
167 'title': 'Người Yêu Tôi Lạnh Lùng Sắt Đá',
168 'ext': 'mp3',
169 'thumbnail': r're:^https?://.+\.jpg',
170 'duration': 303,
171 'track': 'Người Yêu Tôi Lạnh Lùng Sắt Đá',
172 'artist': 'Mr. Siro',
173 'album': 'Người Yêu Tôi Lạnh Lùng Sắt Đá (Single)',
174 'album_artist': 'Mr. Siro',
175 },
ecca4519
HTL
176 }, {
177 'url': 'https://zingmp3.vn/embed/song/ZWZEI76B?start=false',
178 'only_matching': True,
3d47ee0a 179 }, {
1418a043 180 'url': 'https://zingmp3.vn/bai-hat/Xa-Mai-Xa-Bao-Thy/ZWZB9WAB.html',
181 'only_matching': True,
182 }]
183 IE_NAME = 'zingmp3'
ecca4519 184 IE_DESC = 'zingmp3.vn'
1418a043 185
ecca4519
HTL
186 def _process_data(self, data, song_id, type_url):
187 return self._extract_item(data, song_id, type_url, True)
1418a043 188
189
190class ZingMp3AlbumIE(ZingMp3BaseIE):
191 _VALID_URL = ZingMp3BaseIE._VALID_URL_TMPL % 'album|playlist'
192 _TESTS = [{
43abd799
S
193 'url': 'http://mp3.zing.vn/album/Lau-Dai-Tinh-Ai-Bang-Kieu-Minh-Tuyet/ZWZBWDAF.html',
194 'info_dict': {
195 '_type': 'playlist',
196 'id': 'ZWZBWDAF',
1418a043 197 'title': 'Lâu Đài Tình Ái',
c66bdc48 198 },
ecca4519 199 'playlist_count': 9,
63b2f88b
HTL
200 }, {
201 'url': 'https://zingmp3.vn/album/Nhung-Bai-Hat-Hay-Nhat-Cua-Mr-Siro-Mr-Siro/ZWZAEZZD.html',
202 'info_dict': {
203 '_type': 'playlist',
204 'id': 'ZWZAEZZD',
205 'title': 'Những Bài Hát Hay Nhất Của Mr. Siro',
206 },
207 'playlist_count': 49,
43abd799
S
208 }, {
209 'url': 'http://mp3.zing.vn/playlist/Duong-Hong-Loan-apollobee/IWCAACCB.html',
210 'only_matching': True,
1418a043 211 }, {
212 'url': 'https://zingmp3.vn/album/Lau-Dai-Tinh-Ai-Bang-Kieu-Minh-Tuyet/ZWZBWDAF.html',
213 'only_matching': True,
43abd799 214 }]
1418a043 215 IE_NAME = 'zingmp3:album'
216
ecca4519 217 def _process_data(self, data, song_id, type_url):
1418a043 218 def entries():
ecca4519
HTL
219 for item in traverse_obj(data, ('song', 'items')) or []:
220 entry = self._extract_item(item, song_id, type_url, False)
1418a043 221 if entry:
222 yield entry
ecca4519
HTL
223
224 return self.playlist_result(entries(), traverse_obj(data, 'id', 'encodeId'),
225 traverse_obj(data, 'name', 'title'))