]>
Commit | Line | Data |
---|---|---|
68a9a450 | 1 | from .common import InfoExtractor |
2 | from ..utils import ( | |
3 | ExtractorError, | |
4 | js_to_json, | |
5 | smuggle_url, | |
6 | str_or_none, | |
7 | traverse_obj, | |
8 | unescapeHTML, | |
9 | ) | |
10 | ||
11 | ||
12 | class GeniusIE(InfoExtractor): | |
13 | _VALID_URL = r'https?://(?:www\.)?genius\.com/videos/(?P<id>[^?/#]+)' | |
14 | _TESTS = [{ | |
15 | 'url': 'https://genius.com/videos/Vince-staples-breaks-down-the-meaning-of-when-sparks-fly', | |
16 | 'md5': '64c2ad98cfafcfda23bfa0ad0c512f4c', | |
17 | 'info_dict': { | |
18 | 'id': '6313303597112', | |
19 | 'ext': 'mp4', | |
20 | 'title': 'Vince Staples Breaks Down The Meaning Of “When Sparks Fly”', | |
21 | 'description': 'md5:bc15e00342c537c0039d414423ae5752', | |
22 | 'tags': 'count:1', | |
23 | 'uploader_id': '4863540648001', | |
24 | 'duration': 388.416, | |
25 | 'upload_date': '20221005', | |
26 | 'timestamp': 1664982341, | |
27 | 'thumbnail': r're:^https?://.*\.jpg$', | |
28 | }, | |
29 | }, { | |
30 | 'url': 'https://genius.com/videos/Breaking-down-drakes-certified-lover-boy-kanye-beef-way-2-sexy-cudi', | |
31 | 'md5': 'b8ed87a5efd1473bd027c20a969d4060', | |
32 | 'info_dict': { | |
33 | 'id': '6271792014001', | |
34 | 'ext': 'mp4', | |
35 | 'title': 'md5:c6355f7fa8a70bc86492a3963919fc15', | |
36 | 'description': 'md5:1774638c31548b31b037c09e9b821393', | |
37 | 'tags': 'count:3', | |
38 | 'uploader_id': '4863540648001', | |
39 | 'duration': 2685.099, | |
40 | 'upload_date': '20210909', | |
41 | 'timestamp': 1631209167, | |
42 | 'thumbnail': r're:^https?://.*\.jpg$', | |
43 | }, | |
44 | }] | |
45 | ||
46 | def _real_extract(self, url): | |
47 | display_id = self._match_id(url) | |
48 | webpage = self._download_webpage(url, display_id) | |
49 | ||
50 | metadata = self._search_json( | |
51 | r'<meta content="', webpage, 'metadata', display_id, transform_source=unescapeHTML) | |
52 | video_id = traverse_obj( | |
53 | metadata, ('video', 'provider_id'), | |
54 | ('dfp_kv', lambda _, x: x['name'] == 'brightcove_video_id', 'values', 0), get_all=False) | |
55 | if not video_id: | |
56 | raise ExtractorError('Brightcove video id not found in webpage') | |
57 | ||
58 | config = self._search_json(r'var\s*APP_CONFIG\s*=', webpage, 'config', video_id, default={}) | |
59 | account_id = config.get('brightcove_account_id', '4863540648001') | |
60 | player_id = traverse_obj( | |
61 | config, 'brightcove_standard_web_player_id', 'brightcove_standard_no_autoplay_web_player_id', | |
62 | 'brightcove_modal_web_player_id', 'brightcove_song_story_web_player_id', default='S1ZcmcOC1x') | |
63 | ||
64 | return self.url_result( | |
65 | smuggle_url( | |
66 | f'https://players.brightcove.net/{account_id}/{player_id}_default/index.html?videoId={video_id}', | |
67 | {'referrer': url}), 'BrightcoveNew', video_id) | |
68 | ||
69 | ||
70 | class GeniusLyricsIE(InfoExtractor): | |
71 | _VALID_URL = r'https?://(?:www\.)?genius\.com/(?P<id>[^?/#]+)-lyrics[?/#]?' | |
72 | _TESTS = [{ | |
73 | 'url': 'https://genius.com/Lil-baby-heyy-lyrics', | |
74 | 'playlist_mincount': 2, | |
75 | 'info_dict': { | |
76 | 'id': '8454545', | |
77 | 'title': 'Heyy', | |
78 | 'description': 'Heyy by Lil Baby', | |
79 | }, | |
80 | }, { | |
81 | 'url': 'https://genius.com/Outkast-two-dope-boyz-in-a-cadillac-lyrics', | |
82 | 'playlist_mincount': 1, | |
83 | 'info_dict': { | |
84 | 'id': '36239', | |
85 | 'title': 'Two Dope Boyz (In a Cadillac)', | |
86 | 'description': 'Two Dope Boyz (In a Cadillac) by OutKast', | |
87 | }, | |
88 | }, { | |
89 | 'url': 'https://genius.com/Playboi-carti-rip-lyrics', | |
90 | 'playlist_mincount': 1, | |
91 | 'info_dict': { | |
92 | 'id': '3710582', | |
93 | 'title': 'R.I.P.', | |
94 | 'description': 'R.I.P. by Playboi Carti', | |
95 | }, | |
96 | }] | |
97 | ||
98 | def _real_extract(self, url): | |
99 | display_id = self._match_id(url) | |
100 | webpage = self._download_webpage(url, display_id) | |
101 | ||
102 | json_string = self._search_json( | |
103 | r'window\.__PRELOADED_STATE__\s*=\s*JSON\.parse\(', webpage, 'json string', | |
104 | display_id, transform_source=js_to_json, contains_pattern=r'\'{(?s:.+)}\'') | |
105 | song_info = self._parse_json(json_string, display_id) | |
106 | song_id = str_or_none(traverse_obj(song_info, ('songPage', 'song'))) | |
107 | if not song_id: | |
108 | raise ExtractorError('Song id not found in webpage') | |
109 | ||
110 | title = traverse_obj( | |
111 | song_info, ('songPage', 'trackingData', lambda _, x: x['key'] == 'Title', 'value'), | |
112 | get_all=False, default='untitled') | |
113 | artist = traverse_obj( | |
114 | song_info, ('songPage', 'trackingData', lambda _, x: x['key'] == 'Primary Artist', 'value'), | |
115 | get_all=False, default='unknown artist') | |
116 | media = traverse_obj( | |
117 | song_info, ('entities', 'songs', song_id, 'media'), expected_type=list, default=[]) | |
118 | ||
119 | entries = [] | |
120 | for m in media: | |
121 | if m.get('type') in ('video', 'audio') and m.get('url'): | |
122 | if m.get('provider') == 'spotify': | |
123 | self.to_screen(f'{song_id}: Skipping Spotify audio embed') | |
124 | else: | |
125 | entries.append(self.url_result(m['url'])) | |
126 | ||
127 | return self.playlist_result(entries, song_id, title, f'{title} by {artist}') |