]> jfr.im git - yt-dlp.git/blame - yt_dlp/extractor/streetvoice.py
[ie/orf:on] Improve extraction (#9677)
[yt-dlp.git] / yt_dlp / extractor / streetvoice.py
CommitLineData
c8dc41a6 1from .common import InfoExtractor
29f7c58a 2from ..utils import (
3 int_or_none,
4 parse_iso8601,
5 str_or_none,
6 strip_or_none,
7 try_get,
8 urljoin,
9)
c8dc41a6
YCH
10
11
12class StreetVoiceIE(InfoExtractor):
bb0aa4cb
S
13 _VALID_URL = r'https?://(?:.+?\.)?streetvoice\.com/[^/]+/songs/(?P<id>[0-9]+)'
14 _TESTS = [{
29f7c58a 15 'url': 'https://streetvoice.com/skippylu/songs/123688/',
16 'md5': '0eb535970629a5195685355f3ed60bfd',
bb0aa4cb 17 'info_dict': {
29f7c58a 18 'id': '123688',
bb0aa4cb 19 'ext': 'mp3',
29f7c58a 20 'title': '流浪',
21 'description': 'md5:8eb0bfcc9dcd8aa82bd6efca66e3fea6',
22 'thumbnail': r're:^https?://.*\.jpg',
23 'duration': 270,
24 'upload_date': '20100923',
bb0aa4cb
S
25 'uploader': 'Crispy脆樂團',
26 'uploader_id': '627810',
29f7c58a 27 'uploader_url': 're:^https?://streetvoice.com/skippylu/',
28 'timestamp': 1285261661,
29 'view_count': int,
30 'like_count': int,
31 'comment_count': int,
32 'repost_count': int,
33 'track': '流浪',
34 'track_id': '123688',
35 'album': '2010',
c8dc41a6 36 }
bb0aa4cb
S
37 }, {
38 'url': 'http://tw.streetvoice.com/skippylu/songs/94440/',
39 'only_matching': True,
40 }]
c8dc41a6
YCH
41
42 def _real_extract(self, url):
43 song_id = self._match_id(url)
29f7c58a 44 base_url = 'https://streetvoice.com/api/v4/song/%s/' % song_id
45 song = self._download_json(base_url, song_id, query={
46 'fields': 'album,comments_count,created_at,id,image,length,likes_count,name,nickname,plays_count,profile,share_count,synopsis,user,username',
47 })
48 title = song['name']
c8dc41a6 49
29f7c58a 50 formats = []
51 for suffix, format_id in [('hls/file', 'hls'), ('file', 'http'), ('file/original', 'original')]:
52 f_url = (self._download_json(
53 base_url + suffix + '/', song_id,
54 'Downloading %s format URL' % format_id,
55 data=b'', fatal=False) or {}).get('file')
56 if not f_url:
57 continue
58 f = {
59 'ext': 'mp3',
60 'format_id': format_id,
61 'url': f_url,
62 'vcodec': 'none',
63 }
64 if format_id == 'hls':
65 f['protocol'] = 'm3u8_native'
66 abr = self._search_regex(r'\.mp3\.(\d+)k', f_url, 'bitrate', default=None)
67 if abr:
68 abr = int(abr)
69 f.update({
70 'abr': abr,
71 'tbr': abr,
72 })
73 formats.append(f)
bb0aa4cb 74
29f7c58a 75 user = song.get('user') or {}
76 username = user.get('username')
77 get_count = lambda x: int_or_none(song.get(x + '_count'))
c8dc41a6 78
c8dc41a6
YCH
79 return {
80 'id': song_id,
29f7c58a 81 'formats': formats,
c8dc41a6 82 'title': title,
29f7c58a 83 'description': strip_or_none(song.get('synopsis')),
84 'thumbnail': song.get('image'),
85 'duration': int_or_none(song.get('length')),
86 'timestamp': parse_iso8601(song.get('created_at')),
87 'uploader': try_get(user, lambda x: x['profile']['nickname']),
88 'uploader_id': str_or_none(user.get('id')),
89 'uploader_url': urljoin(url, '/%s/' % username) if username else None,
90 'view_count': get_count('plays'),
91 'like_count': get_count('likes'),
92 'comment_count': get_count('comments'),
93 'repost_count': get_count('share'),
94 'track': title,
95 'track_id': song_id,
96 'album': try_get(song, lambda x: x['album']['name']),
c8dc41a6 97 }